1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 /* 28 * Overview of the RSM Kernel Agent: 29 * --------------------------------- 30 * 31 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM 32 * kernel agent is a pseudo device driver which makes use of the RSMPI 33 * interface on behalf of the RSMAPI user library. 34 * 35 * The kernel agent functionality can be categorized into the following 36 * components: 37 * 1. Driver Infrastructure 38 * 2. Export/Import Segment Management 39 * 3. Internal resource allocation/deallocation 40 * 41 * The driver infrastructure includes the basic module loading entry points 42 * like _init, _info, _fini to load, unload and report information about 43 * the driver module. The driver infrastructure also includes the 44 * autoconfiguration entry points namely, attach, detach and getinfo for 45 * the device autoconfiguration. 46 * 47 * The kernel agent is a pseudo character device driver and exports 48 * a cb_ops structure which defines the driver entry points for character 49 * device access. This includes the open and close entry points. The 50 * other entry points provided include ioctl, devmap and segmap and chpoll. 51 * read and write entry points are not used since the device is memory 52 * mapped. Also ddi_prop_op is used for the prop_op entry point. 53 * 54 * The ioctl entry point supports a number of commands, which are used by 55 * the RSMAPI library in order to export and import segments. These 56 * commands include commands for binding and rebinding the physical pages 57 * allocated to the virtual address range, publishing the export segment, 58 * unpublishing and republishing an export segment, creating an 59 * import segment and a virtual connection from this import segment to 60 * an export segment, performing scatter-gather data transfer, barrier 61 * operations. 62 * 63 * 64 * Export and Import segments: 65 * --------------------------- 66 * 67 * In order to create an RSM export segment a process allocates a range in its 68 * virtual address space for the segment using standard Solaris interfaces. 69 * The process then calls RSMAPI, which in turn makes an ioctl call to the 70 * RSM kernel agent for an allocation of physical memory pages and for 71 * creation of the export segment by binding these pages to the virtual 72 * address range. These pages are locked in memory so that remote accesses 73 * are always applied to the correct page. Then the RSM segment is published, 74 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id 75 * is assigned to it. 76 * 77 * In order to import a published RSM segment, RSMAPI creates an import 78 * segment and forms a virtual connection across the interconnect to the 79 * export segment, via an ioctl into the kernel agent with the connect 80 * command. The import segment setup is completed by mapping the 81 * local device memory into the importers virtual address space. The 82 * mapping of the import segment is handled by the segmap/devmap 83 * infrastructure described as follows. 84 * 85 * Segmap and Devmap interfaces: 86 * 87 * The RSM kernel agent allows device memory to be directly accessed by user 88 * threads via memory mapping. In order to do so, the RSM kernel agent 89 * supports the devmap and segmap entry points. 90 * 91 * The segmap entry point(rsm_segmap) is responsible for setting up a memory 92 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is 93 * responsible for exporting the device memory to the user applications. 94 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the 95 * control is transfered to the devmap_setup call which calls rsm_devmap. 96 * 97 * rsm_devmap validates the user mapping to the device or kernel memory 98 * and passes the information to the system for setting up the mapping. The 99 * actual setting up of the mapping is done by devmap_devmem_setup(for 100 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are 101 * registered for device context management via the devmap_devmem_setup 102 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap, 103 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping 104 * is created, a mapping is freed, a mapping is accessed or an existing 105 * mapping is duplicated respectively. These callbacks allow the RSM kernel 106 * agent to maintain state information associated with the mappings. 107 * The state information is mainly in the form of a cookie list for the import 108 * segment for which mapping has been done. 109 * 110 * Forced disconnect of import segments: 111 * 112 * When an exported segment is unpublished, the exporter sends a forced 113 * disconnect message to all its importers. The importer segments are 114 * unloaded and disconnected. This involves unloading the original 115 * mappings and remapping to a preallocated kernel trash page. This is 116 * done by devmap_umem_remap. The trash/dummy page is a kernel page, 117 * preallocated by the kernel agent during attach using ddi_umem_alloc with 118 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application 119 * due to unloading of the original mappings. 120 * 121 * Additionally every segment has a mapping generation number associated 122 * with it. This is an entry in the barrier generation page, created 123 * during attach time. This mapping generation number for the import 124 * segments is incremented on a force disconnect to notify the application 125 * of the force disconnect. On this notification, the application needs 126 * to reconnect the segment to establish a new legitimate mapping. 127 * 128 * 129 * Locks used in the kernel agent: 130 * ------------------------------- 131 * 132 * The kernel agent uses a variety of mutexes and condition variables for 133 * mutual exclusion of the shared data structures and for synchronization 134 * between the various threads. Some of the locks are described as follows. 135 * 136 * Each resource structure, which represents either an export/import segment 137 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock. 138 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the 139 * rsmseglock_acquire and rsmseglock_release macros. An additional 140 * lock called the rsmsi_lock is used for the shared import data structure 141 * that is relevant for resources representing import segments. There is 142 * also a condition variable associated with the resource called s_cv. This 143 * is used to wait for events like the segment state change etc. 144 * 145 * The resource structures are allocated from a pool of resource structures, 146 * called rsm_resource. This pool is protected via a reader-writer lock, 147 * called rsmrc_lock. 148 * 149 * There are two separate hash tables, one for the export segments and 150 * one for the import segments. The export segments are inserted into the 151 * export segment hash table only after they have been published and the 152 * import segments are inserted in the import segments list only after they 153 * have successfully connected to an exported segment. These tables are 154 * protected via reader-writer locks. 155 * 156 * Debug Support in the kernel agent: 157 * ---------------------------------- 158 * 159 * Debugging support in the kernel agent is provided by the following 160 * macros. 161 * 162 * DBG_PRINTF((category, level, message)) is a macro which logs a debug 163 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer 164 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based 165 * on the definition of the category and level. All messages that belong to 166 * the specified category(rsmdbg_category) and are of an equal or greater 167 * severity than the specified level(rsmdbg_level) are logged. The message 168 * is a string which uses the same formatting rules as the strings used in 169 * printf. 170 * 171 * The category defines which component of the kernel agent has logged this 172 * message. There are a number of categories that have been defined such as 173 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro, 174 * DBG_ADDCATEGORY is used to add in another category to the currently 175 * specified category value so that the component using this new category 176 * can also effectively log debug messages. Thus, the category of a specific 177 * message is some combination of the available categories and we can define 178 * sub-categories if we want a finer level of granularity. 179 * 180 * The level defines the severity of the message. Different level values are 181 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being 182 * the least severe(debug level is 0). 183 * 184 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug 185 * variable or a string respectively. 186 * 187 * 188 * NOTES: 189 * 190 * Special Fork and Exec Handling: 191 * ------------------------------- 192 * 193 * The backing physical pages of an exported segment are always locked down. 194 * Thus, there are two cases in which a process having exported segments 195 * will cause a cpu to hang: (1) the process invokes exec; (2) a process 196 * forks and invokes exit before the duped file descriptors for the export 197 * segments are closed in the child process. The hang is caused because the 198 * address space release algorithm in Solaris VM subsystem is based on a 199 * non-blocking loop which does not terminate while segments are locked 200 * down. In addition to this, Solaris VM subsystem lacks a callback 201 * mechanism to the rsm kernel agent to allow unlocking these export 202 * segment pages. 203 * 204 * In order to circumvent this problem, the kernel agent does the following. 205 * The Solaris VM subsystem keeps memory segments in increasing order of 206 * virtual addressses. Thus a special page(special_exit_offset) is allocated 207 * by the kernel agent and is mmapped into the heap area of the process address 208 * space(the mmap is done by the RSMAPI library). During the mmap processing 209 * of this special page by the devmap infrastructure, a callback(the same 210 * devmap context management callbacks discussed above) is registered for an 211 * unmap. 212 * 213 * As discussed above, this page is processed by the Solaris address space 214 * release code before any of the exported segments pages(which are allocated 215 * from high memory). It is during this processing that the unmap callback gets 216 * called and this callback is responsible for force destroying the exported 217 * segments and thus eliminating the problem of locked pages. 218 * 219 * Flow-control: 220 * ------------ 221 * 222 * A credit based flow control algorithm is used for messages whose 223 * processing cannot be done in the interrupt context because it might 224 * involve invoking rsmpi calls, or might take a long time to complete 225 * or might need to allocate resources. The algorithm operates on a per 226 * path basis. To send a message the pathend needs to have a credit and 227 * it consumes one for every message that is flow controlled. On the 228 * receiving pathend the message is put on a msgbuf_queue and a task is 229 * dispatched on the worker thread - recv_taskq where it is processed. 230 * After processing the message, the receiving pathend dequeues the message, 231 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends 232 * credits to the sender pathend. 233 * 234 * RSM_DRTEST: 235 * ----------- 236 * 237 * This is used to enable the DR testing using a test driver on test 238 * platforms which do not supported DR. 239 * 240 */ 241 242 #include <sys/types.h> 243 #include <sys/param.h> 244 #include <sys/user.h> 245 #include <sys/buf.h> 246 #include <sys/systm.h> 247 #include <sys/cred.h> 248 #include <sys/vm.h> 249 #include <sys/uio.h> 250 #include <vm/seg.h> 251 #include <vm/page.h> 252 #include <sys/stat.h> 253 254 #include <sys/time.h> 255 #include <sys/errno.h> 256 257 #include <sys/file.h> 258 #include <sys/uio.h> 259 #include <sys/proc.h> 260 #include <sys/mman.h> 261 #include <sys/open.h> 262 #include <sys/atomic.h> 263 #include <sys/mem_config.h> 264 265 266 #include <sys/ddi.h> 267 #include <sys/devops.h> 268 #include <sys/ddidevmap.h> 269 #include <sys/sunddi.h> 270 #include <sys/esunddi.h> 271 #include <sys/ddi_impldefs.h> 272 273 #include <sys/kmem.h> 274 #include <sys/conf.h> 275 #include <sys/devops.h> 276 #include <sys/ddi_impldefs.h> 277 278 #include <sys/modctl.h> 279 280 #include <sys/policy.h> 281 #include <sys/types.h> 282 #include <sys/conf.h> 283 #include <sys/param.h> 284 285 #include <sys/taskq.h> 286 287 #include <sys/rsm/rsm_common.h> 288 #include <sys/rsm/rsmapi_common.h> 289 #include <sys/rsm/rsm.h> 290 #include <rsm_in.h> 291 #include <sys/rsm/rsmka_path_int.h> 292 #include <sys/rsm/rsmpi.h> 293 294 #include <sys/modctl.h> 295 #include <sys/debug.h> 296 297 #include <sys/tuneable.h> 298 299 #ifdef RSM_DRTEST 300 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec, 301 void *arg); 302 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, 303 void *arg); 304 #endif 305 306 extern void dbg_printf(int category, int level, char *fmt, ...); 307 extern void rsmka_pathmanager_init(); 308 extern void rsmka_pathmanager_cleanup(); 309 extern void rele_sendq_token(); 310 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t); 311 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t); 312 extern int rsmka_topology_ioctl(caddr_t, int, int); 313 314 extern pri_t maxclsyspri; 315 extern work_queue_t work_queue; 316 extern kmutex_t ipc_info_lock; 317 extern kmutex_t ipc_info_cvlock; 318 extern kcondvar_t ipc_info_cv; 319 extern kmutex_t path_hold_cvlock; 320 extern kcondvar_t path_hold_cv; 321 322 extern kmutex_t rsmka_buf_lock; 323 324 extern path_t *rsm_find_path(char *, int, rsm_addr_t); 325 extern adapter_t *rsmka_lookup_adapter(char *, int); 326 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *); 327 extern boolean_t rsmka_do_path_active(path_t *, int); 328 extern boolean_t rsmka_check_node_alive(rsm_node_id_t); 329 extern void rsmka_release_adapter(adapter_t *); 330 extern void rsmka_enqueue_msgbuf(path_t *path, void *data); 331 extern void rsmka_dequeue_msgbuf(path_t *path); 332 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path); 333 /* lint -w2 */ 334 335 static int rsm_open(dev_t *, int, int, cred_t *); 336 static int rsm_close(dev_t, int, int, cred_t *); 337 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 338 cred_t *credp, int *rvalp); 339 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, 340 uint_t); 341 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t, 342 uint_t, uint_t, cred_t *); 343 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 344 struct pollhead **phpp); 345 346 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 347 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t); 348 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t); 349 350 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *); 351 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t); 352 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t); 353 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int, 354 rsm_permission_t); 355 static void rsm_export_force_destroy(ddi_umem_cookie_t *); 356 static void rsmacl_free(rsmapi_access_entry_t *, int); 357 static void rsmpiacl_free(rsm_access_entry_t *, int); 358 359 static int rsm_inc_pgcnt(pgcnt_t); 360 static void rsm_dec_pgcnt(pgcnt_t); 361 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop); 362 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *, 363 size_t *); 364 static void exporter_quiesce(); 365 static void rsmseg_suspend(rsmseg_t *, int *); 366 static void rsmsegshare_suspend(rsmseg_t *); 367 static int rsmseg_resume(rsmseg_t *, void **); 368 static int rsmsegshare_resume(rsmseg_t *); 369 370 static struct cb_ops rsm_cb_ops = { 371 rsm_open, /* open */ 372 rsm_close, /* close */ 373 nodev, /* strategy */ 374 nodev, /* print */ 375 nodev, /* dump */ 376 nodev, /* read */ 377 nodev, /* write */ 378 rsm_ioctl, /* ioctl */ 379 rsm_devmap, /* devmap */ 380 NULL, /* mmap */ 381 rsm_segmap, /* segmap */ 382 rsm_chpoll, /* poll */ 383 ddi_prop_op, /* cb_prop_op */ 384 0, /* streamtab */ 385 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 386 0, 387 0, 388 0 389 }; 390 391 static struct dev_ops rsm_ops = { 392 DEVO_REV, /* devo_rev, */ 393 0, /* refcnt */ 394 rsm_info, /* get_dev_info */ 395 nulldev, /* identify */ 396 nulldev, /* probe */ 397 rsm_attach, /* attach */ 398 rsm_detach, /* detach */ 399 nodev, /* reset */ 400 &rsm_cb_ops, /* driver operations */ 401 (struct bus_ops *)0, /* bus operations */ 402 0, 403 ddi_quiesce_not_needed, /* quiesce */ 404 }; 405 406 /* 407 * Module linkage information for the kernel. 408 */ 409 410 static struct modldrv modldrv = { 411 &mod_driverops, /* Type of module. This one is a pseudo driver */ 412 "Remote Shared Memory Driver", 413 &rsm_ops, /* driver ops */ 414 }; 415 416 static struct modlinkage modlinkage = { 417 MODREV_1, 418 (void *)&modldrv, 419 0, 420 0, 421 0 422 }; 423 424 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta); 425 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta); 426 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled); 427 428 static kphysm_setup_vector_t rsm_dr_callback_vec = { 429 KPHYSM_SETUP_VECTOR_VERSION, 430 rsm_dr_callback_post_add, 431 rsm_dr_callback_pre_del, 432 rsm_dr_callback_post_del 433 }; 434 435 /* This flag can be changed to 0 to help with PIT testing */ 436 int rsmka_modunloadok = 1; 437 int no_reply_cnt = 0; 438 439 uint64_t rsm_ctrlmsg_errcnt = 0; 440 uint64_t rsm_ipcsend_errcnt = 0; 441 442 #define MAX_NODES 64 443 444 static struct rsm_driver_data rsm_drv_data; 445 static struct rsmresource_table rsm_resource; 446 447 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t); 448 static void rsmresource_destroy(void); 449 static int rsmresource_alloc(minor_t *); 450 static rsmresource_t *rsmresource_free(minor_t rnum); 451 static int rsm_closeconnection(rsmseg_t *seg, void **cookie); 452 static int rsm_unpublish(rsmseg_t *seg, int mode); 453 static int rsm_unbind(rsmseg_t *seg); 454 static uint_t rsmhash(rsm_memseg_id_t key); 455 static void rsmhash_alloc(rsmhash_table_t *rhash, int size); 456 static void rsmhash_free(rsmhash_table_t *rhash, int size); 457 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval); 458 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval); 459 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, 460 void *cookie); 461 int rsm_disconnect(rsmseg_t *seg); 462 void rsmseg_unload(rsmseg_t *); 463 void rsm_suspend_complete(rsm_node_id_t src_node, int flag); 464 465 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 466 rsm_intr_q_op_t opcode, rsm_addr_t src, 467 void *data, size_t size, rsm_intr_hand_arg_t arg); 468 469 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t); 470 471 rsm_node_id_t my_nodeid; 472 473 /* cookie, va, offsets and length for the barrier */ 474 static rsm_gnum_t *bar_va; 475 static ddi_umem_cookie_t bar_cookie; 476 static off_t barrier_offset; 477 static size_t barrier_size; 478 static int max_segs; 479 480 /* cookie for the trash memory */ 481 static ddi_umem_cookie_t remap_cookie; 482 483 static rsm_memseg_id_t rsm_nextavail_segmentid; 484 485 extern taskq_t *work_taskq; 486 extern char *taskq_name; 487 488 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */ 489 490 static rsmhash_table_t rsm_export_segs; /* list of exported segs */ 491 rsmhash_table_t rsm_import_segs; /* list of imported segs */ 492 static rsmhash_table_t rsm_event_queues; /* list of event queues */ 493 494 static rsm_ipc_t rsm_ipc; /* ipc info */ 495 496 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */ 497 static list_head_t rsm_suspend_list; 498 499 /* list of descriptors for remote importers */ 500 static importers_table_t importer_list; 501 502 kmutex_t rsm_suspend_cvlock; 503 kcondvar_t rsm_suspend_cv; 504 505 static kmutex_t rsm_lock; 506 507 adapter_t loopback_adapter; 508 rsm_controller_attr_t loopback_attr; 509 510 int rsmipc_send_controlmsg(path_t *path, int msgtype); 511 512 void rsmka_init_loopback(); 513 514 int rsmka_null_seg_create( 515 rsm_controller_handle_t, 516 rsm_memseg_export_handle_t *, 517 size_t, 518 uint_t, 519 rsm_memory_local_t *, 520 rsm_resource_callback_t, 521 rsm_resource_callback_arg_t); 522 523 int rsmka_null_seg_destroy( 524 rsm_memseg_export_handle_t); 525 526 int rsmka_null_bind( 527 rsm_memseg_export_handle_t, 528 off_t, 529 rsm_memory_local_t *, 530 rsm_resource_callback_t, 531 rsm_resource_callback_arg_t); 532 533 int rsmka_null_unbind( 534 rsm_memseg_export_handle_t, 535 off_t, 536 size_t); 537 538 int rsmka_null_rebind( 539 rsm_memseg_export_handle_t, 540 off_t, 541 rsm_memory_local_t *, 542 rsm_resource_callback_t, 543 rsm_resource_callback_arg_t); 544 545 int rsmka_null_publish( 546 rsm_memseg_export_handle_t, 547 rsm_access_entry_t [], 548 uint_t, 549 rsm_memseg_id_t, 550 rsm_resource_callback_t, 551 rsm_resource_callback_arg_t); 552 553 554 int rsmka_null_republish( 555 rsm_memseg_export_handle_t, 556 rsm_access_entry_t [], 557 uint_t, 558 rsm_resource_callback_t, 559 rsm_resource_callback_arg_t); 560 561 int rsmka_null_unpublish( 562 rsm_memseg_export_handle_t); 563 564 rsm_ops_t null_rsmpi_ops; 565 566 /* 567 * data and locks to keep track of total amount of exported memory 568 */ 569 static pgcnt_t rsm_pgcnt; 570 static pgcnt_t rsm_pgcnt_max; /* max allowed */ 571 static kmutex_t rsm_pgcnt_lock; 572 573 static int rsm_enable_dr; 574 575 static char loopback_str[] = "loopback"; 576 577 int rsm_hash_size; 578 579 /* 580 * The locking model is as follows: 581 * 582 * Local operations: 583 * find resource - grab reader lock on resouce list 584 * insert rc - grab writer lock 585 * delete rc - grab writer lock and resource mutex 586 * read/write - no lock 587 * 588 * Remote invocations: 589 * find resource - grab read lock and resource mutex 590 * 591 * State: 592 * resource state - grab resource mutex 593 */ 594 595 int 596 _init(void) 597 { 598 int e; 599 600 e = mod_install(&modlinkage); 601 if (e != 0) { 602 return (e); 603 } 604 605 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL); 606 607 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL); 608 609 610 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL); 611 612 rsm_hash_size = RSM_HASHSZ; 613 614 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 615 616 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 617 618 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL); 619 620 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL); 621 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0); 622 623 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL); 624 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0); 625 626 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL); 627 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0); 628 629 rsm_ipc.count = RSMIPC_SZ; 630 rsm_ipc.wanted = 0; 631 rsm_ipc.sequence = 0; 632 633 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL); 634 635 for (e = 0; e < RSMIPC_SZ; e++) { 636 rsmipc_slot_t *slot = &rsm_ipc.slots[e]; 637 638 RSMIPC_SET(slot, RSMIPC_FREE); 639 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL); 640 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0); 641 } 642 643 /* 644 * Initialize the suspend message list 645 */ 646 rsm_suspend_list.list_head = NULL; 647 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL); 648 649 /* 650 * It is assumed here that configuration data is available 651 * during system boot since _init may be called at that time. 652 */ 653 654 rsmka_pathmanager_init(); 655 656 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 657 "rsm: _init done\n")); 658 659 return (DDI_SUCCESS); 660 661 } 662 663 int 664 _info(struct modinfo *modinfop) 665 { 666 667 return (mod_info(&modlinkage, modinfop)); 668 } 669 670 int 671 _fini(void) 672 { 673 int e; 674 675 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 676 "rsm: _fini enter\n")); 677 678 /* 679 * The rsmka_modunloadok flag is simply used to help with 680 * the PIT testing. Make this flag 0 to disallow modunload. 681 */ 682 if (rsmka_modunloadok == 0) 683 return (EBUSY); 684 685 /* rsm_detach will be called as a result of mod_remove */ 686 e = mod_remove(&modlinkage); 687 if (e) { 688 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR, 689 "Unable to fini RSM %x\n", e)); 690 return (e); 691 } 692 693 rsmka_pathmanager_cleanup(); 694 695 rw_destroy(&rsm_resource.rsmrc_lock); 696 697 rw_destroy(&rsm_export_segs.rsmhash_rw); 698 rw_destroy(&rsm_import_segs.rsmhash_rw); 699 rw_destroy(&rsm_event_queues.rsmhash_rw); 700 701 mutex_destroy(&importer_list.lock); 702 703 mutex_destroy(&rsm_ipc.lock); 704 cv_destroy(&rsm_ipc.cv); 705 706 (void) mutex_destroy(&rsm_suspend_list.list_lock); 707 708 (void) mutex_destroy(&rsm_pgcnt_lock); 709 710 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n")); 711 712 return (DDI_SUCCESS); 713 714 } 715 716 /*ARGSUSED1*/ 717 static int 718 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 719 { 720 minor_t rnum; 721 int percent; 722 int ret; 723 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 724 725 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n")); 726 727 switch (cmd) { 728 case DDI_ATTACH: 729 break; 730 case DDI_RESUME: 731 default: 732 DBG_PRINTF((category, RSM_ERR, 733 "rsm:rsm_attach - cmd not supported\n")); 734 return (DDI_FAILURE); 735 } 736 737 if (rsm_dip != NULL) { 738 DBG_PRINTF((category, RSM_ERR, 739 "rsm:rsm_attach - supports only " 740 "one instance\n")); 741 return (DDI_FAILURE); 742 } 743 744 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 745 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 746 "enable-dynamic-reconfiguration", 1); 747 748 mutex_enter(&rsm_drv_data.drv_lock); 749 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING; 750 mutex_exit(&rsm_drv_data.drv_lock); 751 752 if (rsm_enable_dr) { 753 #ifdef RSM_DRTEST 754 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec, 755 (void *)NULL); 756 #else 757 ret = kphysm_setup_func_register(&rsm_dr_callback_vec, 758 (void *)NULL); 759 #endif 760 if (ret != 0) { 761 mutex_exit(&rsm_drv_data.drv_lock); 762 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic " 763 "reconfiguration setup failed\n"); 764 return (DDI_FAILURE); 765 } 766 } 767 768 mutex_enter(&rsm_drv_data.drv_lock); 769 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING); 770 rsm_drv_data.drv_state = RSM_DRV_OK; 771 cv_broadcast(&rsm_drv_data.drv_cv); 772 mutex_exit(&rsm_drv_data.drv_lock); 773 774 /* 775 * page_list_read_lock(); 776 * xx_setup(); 777 * page_list_read_unlock(); 778 */ 779 780 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 781 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 782 "segment-hashtable-size", RSM_HASHSZ); 783 if (rsm_hash_size == 0) { 784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 785 "rsm: segment-hashtable-size in rsm.conf " 786 "must be greater than 0, defaulting to 128\n")); 787 rsm_hash_size = RSM_HASHSZ; 788 } 789 790 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n", 791 rsm_hash_size)); 792 793 rsm_pgcnt = 0; 794 795 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 796 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 797 "max-exported-memory", 0); 798 if (percent < 0) { 799 DBG_PRINTF((category, RSM_ERR, 800 "rsm:rsm_attach not enough memory available to " 801 "export, or max-exported-memory set incorrectly.\n")); 802 return (DDI_FAILURE); 803 } 804 /* 0 indicates no fixed upper limit. maxmem is the max */ 805 /* available pageable physical mem */ 806 rsm_pgcnt_max = (percent*maxmem)/100; 807 808 if (rsm_pgcnt_max > 0) { 809 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 810 "rsm: Available physical memory = %lu pages, " 811 "Max exportable memory = %lu pages", 812 maxmem, rsm_pgcnt_max)); 813 } 814 815 /* 816 * Create minor number 817 */ 818 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) { 819 DBG_PRINTF((category, RSM_ERR, 820 "rsm: rsm_attach - Unable to get " 821 "minor number\n")); 822 return (DDI_FAILURE); 823 } 824 825 ASSERT(rnum == RSM_DRIVER_MINOR); 826 827 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR, 828 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) { 829 DBG_PRINTF((category, RSM_ERR, 830 "rsm: rsm_attach - unable to allocate " 831 "minor #\n")); 832 return (DDI_FAILURE); 833 } 834 835 rsm_dip = devi; 836 /* 837 * Allocate the hashtables 838 */ 839 rsmhash_alloc(&rsm_export_segs, rsm_hash_size); 840 rsmhash_alloc(&rsm_import_segs, rsm_hash_size); 841 842 importer_list.bucket = (importing_token_t **) 843 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP); 844 845 /* 846 * Allocate a resource struct 847 */ 848 { 849 rsmresource_t *p; 850 851 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP); 852 853 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL); 854 855 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR); 856 } 857 858 /* 859 * Based on the rsm.conf property max-segments, determine the maximum 860 * number of segments that can be exported/imported. This is then used 861 * to determine the size for barrier failure pages. 862 */ 863 864 /* First get the max number of segments from the rsm.conf file */ 865 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 866 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 867 "max-segments", 0); 868 if (max_segs == 0) { 869 /* Use default number of segments */ 870 max_segs = RSM_MAX_NUM_SEG; 871 } 872 873 /* 874 * Based on the max number of segments allowed, determine the barrier 875 * page size. add 1 to max_segs since the barrier page itself uses 876 * a slot 877 */ 878 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t), 879 PAGESIZE); 880 881 /* 882 * allocation of the barrier failure page 883 */ 884 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size, 885 DDI_UMEM_SLEEP, &bar_cookie); 886 887 /* 888 * Set the barrier_offset 889 */ 890 barrier_offset = 0; 891 892 /* 893 * Allocate a trash memory and get a cookie for it. This will be used 894 * when remapping segments during force disconnects. Allocate the 895 * trash memory with a large size which is page aligned. 896 */ 897 (void) ddi_umem_alloc((size_t)TRASHSIZE, 898 DDI_UMEM_TRASH, &remap_cookie); 899 900 /* initialize user segment id allocation variable */ 901 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE; 902 903 /* 904 * initialize the null_rsmpi_ops vector and the loopback adapter 905 */ 906 rsmka_init_loopback(); 907 908 909 ddi_report_dev(devi); 910 911 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n")); 912 913 return (DDI_SUCCESS); 914 } 915 916 /* 917 * The call to mod_remove in the _fine routine will cause the system 918 * to call rsm_detach 919 */ 920 /*ARGSUSED*/ 921 static int 922 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 923 { 924 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 925 926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n")); 927 928 switch (cmd) { 929 case DDI_DETACH: 930 break; 931 default: 932 DBG_PRINTF((category, RSM_ERR, 933 "rsm:rsm_detach - cmd %x not supported\n", 934 cmd)); 935 return (DDI_FAILURE); 936 } 937 938 mutex_enter(&rsm_drv_data.drv_lock); 939 while (rsm_drv_data.drv_state != RSM_DRV_OK) 940 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 941 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING; 942 mutex_exit(&rsm_drv_data.drv_lock); 943 944 /* 945 * Unregister the DR callback functions 946 */ 947 if (rsm_enable_dr) { 948 #ifdef RSM_DRTEST 949 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec, 950 (void *)NULL); 951 #else 952 kphysm_setup_func_unregister(&rsm_dr_callback_vec, 953 (void *)NULL); 954 #endif 955 } 956 957 mutex_enter(&rsm_drv_data.drv_lock); 958 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING); 959 rsm_drv_data.drv_state = RSM_DRV_NEW; 960 mutex_exit(&rsm_drv_data.drv_lock); 961 962 ASSERT(rsm_suspend_list.list_head == NULL); 963 964 /* 965 * Release all resources, seglist, controller, ... 966 */ 967 968 /* remove intersend queues */ 969 /* remove registered services */ 970 971 972 ddi_remove_minor_node(dip, DRIVER_NAME); 973 rsm_dip = NULL; 974 975 /* 976 * Free minor zero resource 977 */ 978 { 979 rsmresource_t *p; 980 981 p = rsmresource_free(RSM_DRIVER_MINOR); 982 if (p) { 983 mutex_destroy(&p->rsmrc_lock); 984 kmem_free((void *)p, sizeof (*p)); 985 } 986 } 987 988 /* 989 * Free resource table 990 */ 991 992 rsmresource_destroy(); 993 994 /* 995 * Free the hash tables 996 */ 997 rsmhash_free(&rsm_export_segs, rsm_hash_size); 998 rsmhash_free(&rsm_import_segs, rsm_hash_size); 999 1000 kmem_free((void *)importer_list.bucket, 1001 rsm_hash_size * sizeof (importing_token_t *)); 1002 importer_list.bucket = NULL; 1003 1004 1005 /* free barrier page */ 1006 if (bar_cookie != NULL) { 1007 ddi_umem_free(bar_cookie); 1008 } 1009 bar_va = NULL; 1010 bar_cookie = NULL; 1011 1012 /* 1013 * Free the memory allocated for the trash 1014 */ 1015 if (remap_cookie != NULL) { 1016 ddi_umem_free(remap_cookie); 1017 } 1018 remap_cookie = NULL; 1019 1020 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n")); 1021 1022 return (DDI_SUCCESS); 1023 } 1024 1025 /*ARGSUSED*/ 1026 static int 1027 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1028 { 1029 register int error; 1030 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 1031 1032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n")); 1033 1034 switch (infocmd) { 1035 case DDI_INFO_DEVT2DEVINFO: 1036 if (rsm_dip == NULL) 1037 error = DDI_FAILURE; 1038 else { 1039 *result = (void *)rsm_dip; 1040 error = DDI_SUCCESS; 1041 } 1042 break; 1043 case DDI_INFO_DEVT2INSTANCE: 1044 *result = (void *)0; 1045 error = DDI_SUCCESS; 1046 break; 1047 default: 1048 error = DDI_FAILURE; 1049 } 1050 1051 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n")); 1052 return (error); 1053 } 1054 1055 adapter_t * 1056 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode) 1057 { 1058 adapter_t *adapter; 1059 char adapter_devname[MAXNAMELEN]; 1060 int instance; 1061 DBG_DEFINE(category, 1062 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL); 1063 1064 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n")); 1065 1066 instance = msg->cnum; 1067 1068 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) { 1069 return (NULL); 1070 } 1071 1072 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode)) 1073 return (NULL); 1074 1075 if (strcmp(adapter_devname, "loopback") == 0) 1076 return (&loopback_adapter); 1077 1078 adapter = rsmka_lookup_adapter(adapter_devname, instance); 1079 1080 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n")); 1081 1082 return (adapter); 1083 } 1084 1085 1086 /* 1087 * *********************** Resource Number Management ******************** 1088 * All resources are stored in a simple hash table. The table is an array 1089 * of pointers to resource blks. Each blk contains: 1090 * base - base number of this blk 1091 * used - number of used slots in this blk. 1092 * blks - array of pointers to resource items. 1093 * An entry in a resource blk is empty if it's NULL. 1094 * 1095 * We start with no resource array. Each time we run out of slots, we 1096 * reallocate a new larger array and copy the pointer to the new array and 1097 * a new resource blk is allocated and added to the hash table. 1098 * 1099 * The resource control block contains: 1100 * root - array of pointer of resource blks 1101 * sz - current size of array. 1102 * len - last valid entry in array. 1103 * 1104 * A search operation based on a resource number is as follows: 1105 * index = rnum / RESOURCE_BLKSZ; 1106 * ASSERT(index < resource_block.len); 1107 * ASSERT(index < resource_block.sz); 1108 * offset = rnum % RESOURCE_BLKSZ; 1109 * ASSERT(offset >= resource_block.root[index]->base); 1110 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 1111 * return resource_block.root[index]->blks[offset]; 1112 * 1113 * A resource blk is freed with its used count reachs zero. 1114 */ 1115 static int 1116 rsmresource_alloc(minor_t *rnum) 1117 { 1118 1119 /* search for available resource slot */ 1120 int i, j, empty = -1; 1121 rsmresource_blk_t *blk; 1122 1123 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1124 "rsmresource_alloc enter\n")); 1125 1126 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1127 1128 /* Try to find an empty slot */ 1129 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1130 blk = rsm_resource.rsmrc_root[i]; 1131 if (blk != NULL && blk->rsmrcblk_avail > 0) { 1132 /* found an empty slot in this blk */ 1133 for (j = 0; j < RSMRC_BLKSZ; j++) { 1134 if (blk->rsmrcblk_blks[j] == NULL) { 1135 *rnum = (minor_t) 1136 (j + (i * RSMRC_BLKSZ)); 1137 /* 1138 * obey gen page limits 1139 */ 1140 if (*rnum >= max_segs + 1) { 1141 if (empty < 0) { 1142 rw_exit(&rsm_resource. 1143 rsmrc_lock); 1144 DBG_PRINTF(( 1145 RSM_KERNEL_ALL, 1146 RSM_ERR, 1147 "rsmresource" 1148 "_alloc failed:" 1149 "not enough res" 1150 "%d\n", *rnum)); 1151 return (RSMERR_INSUFFICIENT_RESOURCES); 1152 } else { 1153 /* use empty slot */ 1154 break; 1155 } 1156 1157 } 1158 1159 blk->rsmrcblk_blks[j] = RSMRC_RESERVED; 1160 blk->rsmrcblk_avail--; 1161 rw_exit(&rsm_resource.rsmrc_lock); 1162 DBG_PRINTF((RSM_KERNEL_ALL, 1163 RSM_DEBUG_VERBOSE, 1164 "rsmresource_alloc done\n")); 1165 return (RSM_SUCCESS); 1166 } 1167 } 1168 } else if (blk == NULL && empty < 0) { 1169 /* remember first empty slot */ 1170 empty = i; 1171 } 1172 } 1173 1174 /* Couldn't find anything, allocate a new blk */ 1175 /* 1176 * Do we need to reallocate the root array 1177 */ 1178 if (empty < 0) { 1179 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) { 1180 /* 1181 * Allocate new array and copy current stuff into it 1182 */ 1183 rsmresource_blk_t **p; 1184 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz + 1185 RSMRC_BLKSZ; 1186 /* 1187 * Don't allocate more that max valid rnum 1188 */ 1189 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >= 1190 max_segs + 1) { 1191 rw_exit(&rsm_resource.rsmrc_lock); 1192 return (RSMERR_INSUFFICIENT_RESOURCES); 1193 } 1194 1195 p = (rsmresource_blk_t **)kmem_zalloc( 1196 newsz * sizeof (*p), 1197 KM_SLEEP); 1198 1199 if (rsm_resource.rsmrc_root) { 1200 uint_t oldsz; 1201 1202 oldsz = (uint_t)(rsm_resource.rsmrc_sz * 1203 (int)sizeof (*p)); 1204 1205 /* 1206 * Copy old data into new space and 1207 * free old stuff 1208 */ 1209 bcopy(rsm_resource.rsmrc_root, p, oldsz); 1210 kmem_free(rsm_resource.rsmrc_root, oldsz); 1211 } 1212 1213 rsm_resource.rsmrc_root = p; 1214 rsm_resource.rsmrc_sz = (int)newsz; 1215 } 1216 1217 empty = rsm_resource.rsmrc_len; 1218 rsm_resource.rsmrc_len++; 1219 } 1220 1221 /* 1222 * Allocate a new blk 1223 */ 1224 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP); 1225 ASSERT(rsm_resource.rsmrc_root[empty] == NULL); 1226 rsm_resource.rsmrc_root[empty] = blk; 1227 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1; 1228 1229 /* 1230 * Allocate slot 1231 */ 1232 1233 *rnum = (minor_t)(empty * RSMRC_BLKSZ); 1234 1235 /* 1236 * watch out not to exceed bounds of barrier page 1237 */ 1238 if (*rnum >= max_segs + 1) { 1239 rw_exit(&rsm_resource.rsmrc_lock); 1240 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR, 1241 "rsmresource_alloc failed %d\n", *rnum)); 1242 1243 return (RSMERR_INSUFFICIENT_RESOURCES); 1244 } 1245 blk->rsmrcblk_blks[0] = RSMRC_RESERVED; 1246 1247 1248 rw_exit(&rsm_resource.rsmrc_lock); 1249 1250 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1251 "rsmresource_alloc done\n")); 1252 1253 return (RSM_SUCCESS); 1254 } 1255 1256 static rsmresource_t * 1257 rsmresource_free(minor_t rnum) 1258 { 1259 1260 /* search for available resource slot */ 1261 int i, j; 1262 rsmresource_blk_t *blk; 1263 rsmresource_t *p; 1264 1265 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1266 "rsmresource_free enter\n")); 1267 1268 i = (int)(rnum / RSMRC_BLKSZ); 1269 j = (int)(rnum % RSMRC_BLKSZ); 1270 1271 if (i >= rsm_resource.rsmrc_len) { 1272 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1273 "rsmresource_free done\n")); 1274 return (NULL); 1275 } 1276 1277 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1278 1279 ASSERT(rsm_resource.rsmrc_root); 1280 ASSERT(i < rsm_resource.rsmrc_len); 1281 ASSERT(i < rsm_resource.rsmrc_sz); 1282 blk = rsm_resource.rsmrc_root[i]; 1283 if (blk == NULL) { 1284 rw_exit(&rsm_resource.rsmrc_lock); 1285 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1286 "rsmresource_free done\n")); 1287 return (NULL); 1288 } 1289 1290 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */ 1291 1292 p = blk->rsmrcblk_blks[j]; 1293 if (p == RSMRC_RESERVED) { 1294 p = NULL; 1295 } 1296 1297 blk->rsmrcblk_blks[j] = NULL; 1298 blk->rsmrcblk_avail++; 1299 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) { 1300 /* free this blk */ 1301 kmem_free(blk, sizeof (*blk)); 1302 rsm_resource.rsmrc_root[i] = NULL; 1303 } 1304 1305 rw_exit(&rsm_resource.rsmrc_lock); 1306 1307 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1308 "rsmresource_free done\n")); 1309 1310 return (p); 1311 } 1312 1313 static rsmresource_t * 1314 rsmresource_lookup(minor_t rnum, int lock) 1315 { 1316 int i, j; 1317 rsmresource_blk_t *blk; 1318 rsmresource_t *p; 1319 1320 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1321 "rsmresource_lookup enter\n")); 1322 1323 /* Find resource and lock it in READER mode */ 1324 /* search for available resource slot */ 1325 1326 i = (int)(rnum / RSMRC_BLKSZ); 1327 j = (int)(rnum % RSMRC_BLKSZ); 1328 1329 if (i >= rsm_resource.rsmrc_len) { 1330 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1331 "rsmresource_lookup done\n")); 1332 return (NULL); 1333 } 1334 1335 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1336 1337 blk = rsm_resource.rsmrc_root[i]; 1338 if (blk != NULL) { 1339 ASSERT(i < rsm_resource.rsmrc_len); 1340 ASSERT(i < rsm_resource.rsmrc_sz); 1341 1342 p = blk->rsmrcblk_blks[j]; 1343 if (lock == RSM_LOCK) { 1344 if (p != RSMRC_RESERVED) { 1345 mutex_enter(&p->rsmrc_lock); 1346 } else { 1347 p = NULL; 1348 } 1349 } 1350 } else { 1351 p = NULL; 1352 } 1353 rw_exit(&rsm_resource.rsmrc_lock); 1354 1355 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1356 "rsmresource_lookup done\n")); 1357 1358 return (p); 1359 } 1360 1361 static void 1362 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type) 1363 { 1364 /* Find resource and lock it in READER mode */ 1365 /* Caller can upgrade if need be */ 1366 /* search for available resource slot */ 1367 int i, j; 1368 rsmresource_blk_t *blk; 1369 1370 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1371 "rsmresource_insert enter\n")); 1372 1373 i = (int)(rnum / RSMRC_BLKSZ); 1374 j = (int)(rnum % RSMRC_BLKSZ); 1375 1376 p->rsmrc_type = type; 1377 p->rsmrc_num = rnum; 1378 1379 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1380 1381 ASSERT(rsm_resource.rsmrc_root); 1382 ASSERT(i < rsm_resource.rsmrc_len); 1383 ASSERT(i < rsm_resource.rsmrc_sz); 1384 1385 blk = rsm_resource.rsmrc_root[i]; 1386 ASSERT(blk); 1387 1388 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED); 1389 1390 blk->rsmrcblk_blks[j] = p; 1391 1392 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1393 "rsmresource_insert done\n")); 1394 1395 rw_exit(&rsm_resource.rsmrc_lock); 1396 } 1397 1398 static void 1399 rsmresource_destroy() 1400 { 1401 int i, j; 1402 1403 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1404 "rsmresource_destroy enter\n")); 1405 1406 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1407 1408 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1409 rsmresource_blk_t *blk; 1410 1411 blk = rsm_resource.rsmrc_root[i]; 1412 if (blk == NULL) { 1413 continue; 1414 } 1415 for (j = 0; j < RSMRC_BLKSZ; j++) { 1416 if (blk->rsmrcblk_blks[j] != NULL) { 1417 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1418 "Not null slot %d, %lx\n", j, 1419 (size_t)blk->rsmrcblk_blks[j])); 1420 } 1421 } 1422 kmem_free(blk, sizeof (*blk)); 1423 rsm_resource.rsmrc_root[i] = NULL; 1424 } 1425 if (rsm_resource.rsmrc_root) { 1426 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *); 1427 kmem_free(rsm_resource.rsmrc_root, (uint_t)i); 1428 rsm_resource.rsmrc_root = NULL; 1429 rsm_resource.rsmrc_len = 0; 1430 rsm_resource.rsmrc_sz = 0; 1431 } 1432 1433 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1434 "rsmresource_destroy done\n")); 1435 1436 rw_exit(&rsm_resource.rsmrc_lock); 1437 } 1438 1439 1440 /* ******************** Generic Key Hash Table Management ********* */ 1441 static rsmresource_t * 1442 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key, 1443 rsm_resource_state_t state) 1444 { 1445 rsmresource_t *p; 1446 uint_t hashval; 1447 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1448 1449 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n")); 1450 1451 hashval = rsmhash(key); 1452 1453 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n", 1454 key, hashval)); 1455 1456 rw_enter(&rhash->rsmhash_rw, RW_READER); 1457 1458 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1459 1460 for (; p; p = p->rsmrc_next) { 1461 if (p->rsmrc_key == key) { 1462 /* acquire resource lock */ 1463 RSMRC_LOCK(p); 1464 break; 1465 } 1466 } 1467 1468 rw_exit(&rhash->rsmhash_rw); 1469 1470 if (p != NULL && p->rsmrc_state != state) { 1471 /* state changed, release lock and return null */ 1472 RSMRC_UNLOCK(p); 1473 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1474 "rsmhash_lookup done: state changed\n")); 1475 return (NULL); 1476 } 1477 1478 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n")); 1479 1480 return (p); 1481 } 1482 1483 static void 1484 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm) 1485 { 1486 rsmresource_t *p, **back; 1487 uint_t hashval; 1488 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1489 1490 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n")); 1491 1492 hashval = rsmhash(rcelm->rsmrc_key); 1493 1494 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n", 1495 rcelm->rsmrc_key, hashval)); 1496 1497 /* 1498 * It's ok not to find the segment. 1499 */ 1500 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1501 1502 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1503 1504 for (; (p = *back) != NULL; back = &p->rsmrc_next) { 1505 if (p == rcelm) { 1506 *back = rcelm->rsmrc_next; 1507 break; 1508 } 1509 } 1510 1511 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n")); 1512 1513 rw_exit(&rhash->rsmhash_rw); 1514 } 1515 1516 static int 1517 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key, 1518 int dup_check, rsm_resource_state_t state) 1519 { 1520 rsmresource_t *p = NULL, **bktp; 1521 uint_t hashval; 1522 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1523 1524 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n")); 1525 1526 /* lock table */ 1527 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1528 1529 /* 1530 * If the current resource state is other than the state passed in 1531 * then the resource is (probably) already on the list. eg. for an 1532 * import segment if the state is not RSM_STATE_NEW then it's on the 1533 * list already. 1534 */ 1535 RSMRC_LOCK(new); 1536 if (new->rsmrc_state != state) { 1537 RSMRC_UNLOCK(new); 1538 rw_exit(&rhash->rsmhash_rw); 1539 return (RSMERR_BAD_SEG_HNDL); 1540 } 1541 1542 hashval = rsmhash(key); 1543 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval)); 1544 1545 if (dup_check) { 1546 /* 1547 * Used for checking export segments; don't want to have 1548 * the same key used for multiple segments. 1549 */ 1550 1551 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1552 1553 for (; p; p = p->rsmrc_next) { 1554 if (p->rsmrc_key == key) { 1555 RSMRC_UNLOCK(new); 1556 break; 1557 } 1558 } 1559 } 1560 1561 if (p == NULL) { 1562 /* Key doesn't exist, add it */ 1563 1564 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1565 1566 new->rsmrc_key = key; 1567 new->rsmrc_next = *bktp; 1568 *bktp = new; 1569 } 1570 1571 rw_exit(&rhash->rsmhash_rw); 1572 1573 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n")); 1574 1575 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE); 1576 } 1577 1578 /* 1579 * XOR each byte of the key. 1580 */ 1581 static uint_t 1582 rsmhash(rsm_memseg_id_t key) 1583 { 1584 uint_t hash = key; 1585 1586 hash ^= (key >> 8); 1587 hash ^= (key >> 16); 1588 hash ^= (key >> 24); 1589 1590 return (hash % rsm_hash_size); 1591 1592 } 1593 1594 /* 1595 * generic function to get a specific bucket 1596 */ 1597 static void * 1598 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval) 1599 { 1600 1601 if (rhash->bucket == NULL) 1602 return (NULL); 1603 else 1604 return ((void *)rhash->bucket[hashval]); 1605 } 1606 1607 /* 1608 * generic function to get a specific bucket's address 1609 */ 1610 static void ** 1611 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval) 1612 { 1613 if (rhash->bucket == NULL) 1614 return (NULL); 1615 else 1616 return ((void **)&(rhash->bucket[hashval])); 1617 } 1618 1619 /* 1620 * generic function to alloc a hash table 1621 */ 1622 static void 1623 rsmhash_alloc(rsmhash_table_t *rhash, int size) 1624 { 1625 rhash->bucket = (rsmresource_t **) 1626 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP); 1627 } 1628 1629 /* 1630 * generic function to free a hash table 1631 */ 1632 static void 1633 rsmhash_free(rsmhash_table_t *rhash, int size) 1634 { 1635 1636 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t)); 1637 rhash->bucket = NULL; 1638 1639 } 1640 /* *********************** Exported Segment Key Management ************ */ 1641 1642 #define rsmexport_add(new, key) \ 1643 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \ 1644 RSM_STATE_BIND) 1645 1646 #define rsmexport_rm(arg) \ 1647 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg)) 1648 1649 #define rsmexport_lookup(key) \ 1650 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT) 1651 1652 /* ************************** Import Segment List Management ********** */ 1653 1654 /* 1655 * Add segment to import list. This will be useful for paging and loopback 1656 * segment unloading. 1657 */ 1658 #define rsmimport_add(arg, key) \ 1659 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \ 1660 RSM_STATE_NEW) 1661 1662 #define rsmimport_rm(arg) \ 1663 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg)) 1664 1665 /* 1666 * #define rsmimport_lookup(key) \ 1667 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT) 1668 */ 1669 1670 /* 1671 * increase the ref count and make the import segment point to the 1672 * shared data structure. Return a pointer to the share data struct 1673 * and the shared data struct is locked upon return 1674 */ 1675 static rsm_import_share_t * 1676 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter, 1677 rsmseg_t *segp) 1678 { 1679 uint_t hash; 1680 rsmresource_t *p; 1681 rsm_import_share_t *shdatap; 1682 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1683 1684 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n")); 1685 1686 hash = rsmhash(key); 1687 /* lock table */ 1688 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER); 1689 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n", 1690 key, hash)); 1691 1692 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash); 1693 1694 for (; p; p = p->rsmrc_next) { 1695 /* 1696 * Look for an entry that is importing the same exporter 1697 * with the share data structure allocated. 1698 */ 1699 if ((p->rsmrc_key == key) && 1700 (p->rsmrc_node == node) && 1701 (p->rsmrc_adapter == adapter) && 1702 (((rsmseg_t *)p)->s_share != NULL)) { 1703 shdatap = ((rsmseg_t *)p)->s_share; 1704 break; 1705 } 1706 } 1707 1708 if (p == NULL) { 1709 /* we are the first importer, create the shared data struct */ 1710 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP); 1711 shdatap->rsmsi_state = RSMSI_STATE_NEW; 1712 shdatap->rsmsi_segid = key; 1713 shdatap->rsmsi_node = node; 1714 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL); 1715 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0); 1716 } 1717 1718 rsmseglock_acquire(segp); 1719 1720 /* we grab the shared lock before returning from this function */ 1721 mutex_enter(&shdatap->rsmsi_lock); 1722 1723 shdatap->rsmsi_refcnt++; 1724 segp->s_share = shdatap; 1725 1726 rsmseglock_release(segp); 1727 1728 rw_exit(&rsm_import_segs.rsmhash_rw); 1729 1730 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n")); 1731 1732 return (shdatap); 1733 } 1734 1735 /* 1736 * the shared data structure should be locked before calling 1737 * rsmsharecv_signal(). 1738 * Change the state and signal any waiting segments. 1739 */ 1740 void 1741 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate) 1742 { 1743 ASSERT(rsmsharelock_held(seg)); 1744 1745 if (seg->s_share->rsmsi_state == oldstate) { 1746 seg->s_share->rsmsi_state = newstate; 1747 cv_broadcast(&seg->s_share->rsmsi_cv); 1748 } 1749 } 1750 1751 /* 1752 * Add to the hash table 1753 */ 1754 static void 1755 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr, 1756 void *cookie) 1757 { 1758 1759 importing_token_t *head; 1760 importing_token_t *new_token; 1761 int index; 1762 1763 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1764 1765 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n")); 1766 1767 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP); 1768 new_token->importing_node = node; 1769 new_token->key = key; 1770 new_token->import_segment_cookie = cookie; 1771 new_token->importing_adapter_hwaddr = hwaddr; 1772 1773 index = rsmhash(key); 1774 1775 mutex_enter(&importer_list.lock); 1776 1777 head = importer_list.bucket[index]; 1778 importer_list.bucket[index] = new_token; 1779 new_token->next = head; 1780 mutex_exit(&importer_list.lock); 1781 1782 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n")); 1783 } 1784 1785 static void 1786 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie) 1787 { 1788 1789 importing_token_t *prev, *token = NULL; 1790 int index; 1791 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1792 1793 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n")); 1794 1795 index = rsmhash(key); 1796 1797 mutex_enter(&importer_list.lock); 1798 1799 token = importer_list.bucket[index]; 1800 1801 prev = token; 1802 while (token != NULL) { 1803 if (token->importing_node == node && 1804 token->import_segment_cookie == cookie) { 1805 if (prev == token) 1806 importer_list.bucket[index] = token->next; 1807 else 1808 prev->next = token->next; 1809 kmem_free((void *)token, sizeof (*token)); 1810 break; 1811 } else { 1812 prev = token; 1813 token = token->next; 1814 } 1815 } 1816 1817 mutex_exit(&importer_list.lock); 1818 1819 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n")); 1820 1821 1822 } 1823 1824 /* **************************Segment Structure Management ************* */ 1825 1826 /* 1827 * Free segment structure 1828 */ 1829 static void 1830 rsmseg_free(rsmseg_t *seg) 1831 { 1832 1833 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1834 1835 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n")); 1836 1837 /* need to take seglock here to avoid race with rsmmap_unmap() */ 1838 rsmseglock_acquire(seg); 1839 if (seg->s_ckl != NULL) { 1840 /* Segment is still busy */ 1841 seg->s_state = RSM_STATE_END; 1842 rsmseglock_release(seg); 1843 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1844 "rsmseg_free done\n")); 1845 return; 1846 } 1847 1848 rsmseglock_release(seg); 1849 1850 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW); 1851 1852 /* 1853 * If it's an importer decrement the refcount 1854 * and if its down to zero free the shared data structure. 1855 * This is where failures during rsm_connect() are unrefcounted 1856 */ 1857 if (seg->s_share != NULL) { 1858 1859 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT); 1860 1861 rsmsharelock_acquire(seg); 1862 1863 ASSERT(seg->s_share->rsmsi_refcnt > 0); 1864 1865 seg->s_share->rsmsi_refcnt--; 1866 1867 if (seg->s_share->rsmsi_refcnt == 0) { 1868 rsmsharelock_release(seg); 1869 mutex_destroy(&seg->s_share->rsmsi_lock); 1870 cv_destroy(&seg->s_share->rsmsi_cv); 1871 kmem_free((void *)(seg->s_share), 1872 sizeof (rsm_import_share_t)); 1873 } else { 1874 rsmsharelock_release(seg); 1875 } 1876 /* 1877 * The following needs to be done after any 1878 * rsmsharelock calls which use seg->s_share. 1879 */ 1880 seg->s_share = NULL; 1881 } 1882 1883 cv_destroy(&seg->s_cv); 1884 mutex_destroy(&seg->s_lock); 1885 rsmacl_free(seg->s_acl, seg->s_acl_len); 1886 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len); 1887 if (seg->s_adapter) 1888 rsmka_release_adapter(seg->s_adapter); 1889 1890 kmem_free((void *)seg, sizeof (*seg)); 1891 1892 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n")); 1893 1894 } 1895 1896 1897 static rsmseg_t * 1898 rsmseg_alloc(minor_t num, struct cred *cred) 1899 { 1900 rsmseg_t *new; 1901 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1902 1903 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n")); 1904 /* 1905 * allocate memory for new segment. This should be a segkmem cache. 1906 */ 1907 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); 1908 1909 new->s_state = RSM_STATE_NEW; 1910 new->s_minor = num; 1911 new->s_acl_len = 0; 1912 new->s_cookie = NULL; 1913 new->s_adapter = NULL; 1914 1915 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask; 1916 /* we don't have a key yet, will set at export/connect */ 1917 new->s_uid = crgetuid(cred); 1918 new->s_gid = crgetgid(cred); 1919 1920 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL); 1921 cv_init(&new->s_cv, NULL, CV_DRIVER, 0); 1922 1923 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n")); 1924 1925 return (new); 1926 } 1927 1928 /* ******************************** Driver Open/Close/Poll *************** */ 1929 1930 /*ARGSUSED1*/ 1931 static int 1932 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred) 1933 { 1934 minor_t rnum; 1935 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 1936 1937 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n")); 1938 /* 1939 * Char only 1940 */ 1941 if (otyp != OTYP_CHR) { 1942 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n")); 1943 return (EINVAL); 1944 } 1945 1946 /* 1947 * Only zero can be opened, clones are used for resources. 1948 */ 1949 if (getminor(*devp) != RSM_DRIVER_MINOR) { 1950 DBG_PRINTF((category, RSM_ERR, 1951 "rsm_open: bad minor %d\n", getminor(*devp))); 1952 return (ENODEV); 1953 } 1954 1955 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) { 1956 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n")); 1957 return (EPERM); 1958 } 1959 1960 if (!(flag & FWRITE)) { 1961 /* 1962 * The library function _rsm_librsm_init calls open for 1963 * /dev/rsm with flag set to O_RDONLY. We want a valid 1964 * file descriptor to be returned for minor device zero. 1965 */ 1966 1967 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1968 "rsm_open RDONLY done\n")); 1969 return (DDI_SUCCESS); 1970 } 1971 1972 /* 1973 * - allocate new minor number and segment. 1974 * - add segment to list of all segments. 1975 * - set minordev data to segment 1976 * - update devp argument to new device 1977 * - update s_cred to cred; make sure you do crhold(cred); 1978 */ 1979 1980 /* allocate a new resource number */ 1981 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) { 1982 /* 1983 * We will bind this minor to a specific resource in first 1984 * ioctl 1985 */ 1986 *devp = makedevice(getmajor(*devp), rnum); 1987 } else { 1988 return (EAGAIN); 1989 } 1990 1991 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n")); 1992 return (DDI_SUCCESS); 1993 } 1994 1995 static void 1996 rsmseg_close(rsmseg_t *seg, int force_flag) 1997 { 1998 int e = RSM_SUCCESS; 1999 2000 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2001 2002 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n")); 2003 2004 rsmseglock_acquire(seg); 2005 if (!force_flag && (seg->s_hdr.rsmrc_type == 2006 RSM_RESOURCE_EXPORT_SEGMENT)) { 2007 /* 2008 * If we are processing rsm_close wait for force_destroy 2009 * processing to complete since force_destroy processing 2010 * needs to finish first before we can free the segment. 2011 * force_destroy is only for export segments 2012 */ 2013 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) { 2014 cv_wait(&seg->s_cv, &seg->s_lock); 2015 } 2016 } 2017 rsmseglock_release(seg); 2018 2019 /* It's ok to read the state without a lock */ 2020 switch (seg->s_state) { 2021 case RSM_STATE_EXPORT: 2022 case RSM_STATE_EXPORT_QUIESCING: 2023 case RSM_STATE_EXPORT_QUIESCED: 2024 e = rsm_unpublish(seg, 1); 2025 /* FALLTHRU */ 2026 case RSM_STATE_BIND_QUIESCED: 2027 /* FALLTHRU */ 2028 case RSM_STATE_BIND: 2029 e = rsm_unbind(seg); 2030 if (e != RSM_SUCCESS && force_flag == 1) 2031 return; 2032 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT); 2033 /* FALLTHRU */ 2034 case RSM_STATE_NEW_QUIESCED: 2035 rsmseglock_acquire(seg); 2036 seg->s_state = RSM_STATE_NEW; 2037 cv_broadcast(&seg->s_cv); 2038 rsmseglock_release(seg); 2039 break; 2040 case RSM_STATE_NEW: 2041 break; 2042 case RSM_STATE_ZOMBIE: 2043 /* 2044 * Segments in this state have been removed off the 2045 * exported segments list and have been unpublished 2046 * and unbind. These segments have been removed during 2047 * a callback to the rsm_export_force_destroy, which 2048 * is called for the purpose of unlocking these 2049 * exported memory segments when a process exits but 2050 * leaves the segments locked down since rsm_close is 2051 * is not called for the segments. This can happen 2052 * when a process calls fork or exec and then exits. 2053 * Once the segments are in the ZOMBIE state, all that 2054 * remains is to destroy them when rsm_close is called. 2055 * This is done here. Thus, for such segments the 2056 * the state is changed to new so that later in this 2057 * function rsmseg_free is called. 2058 */ 2059 rsmseglock_acquire(seg); 2060 seg->s_state = RSM_STATE_NEW; 2061 rsmseglock_release(seg); 2062 break; 2063 case RSM_STATE_MAP_QUIESCE: 2064 case RSM_STATE_ACTIVE: 2065 /* Disconnect will handle the unmap */ 2066 case RSM_STATE_CONN_QUIESCE: 2067 case RSM_STATE_CONNECT: 2068 case RSM_STATE_DISCONNECT: 2069 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 2070 (void) rsm_disconnect(seg); 2071 break; 2072 case RSM_STATE_MAPPING: 2073 /*FALLTHRU*/ 2074 case RSM_STATE_END: 2075 DBG_PRINTF((category, RSM_ERR, 2076 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2077 break; 2078 default: 2079 DBG_PRINTF((category, RSM_ERR, 2080 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2081 break; 2082 } 2083 2084 /* 2085 * check state. 2086 * - make sure you do crfree(s_cred); 2087 * release segment and minor number 2088 */ 2089 ASSERT(seg->s_state == RSM_STATE_NEW); 2090 2091 /* 2092 * The export_force_destroy callback is created to unlock 2093 * the exported segments of a process 2094 * when the process does a fork or exec and then exits calls this 2095 * function with the force flag set to 1 which indicates that the 2096 * segment state must be converted to ZOMBIE. This state means that the 2097 * segments still exist and have been unlocked and most importantly the 2098 * only operation allowed is to destroy them on an rsm_close. 2099 */ 2100 if (force_flag) { 2101 rsmseglock_acquire(seg); 2102 seg->s_state = RSM_STATE_ZOMBIE; 2103 rsmseglock_release(seg); 2104 } else { 2105 rsmseg_free(seg); 2106 } 2107 2108 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n")); 2109 } 2110 2111 static int 2112 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred) 2113 { 2114 minor_t rnum = getminor(dev); 2115 rsmresource_t *res; 2116 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2117 2118 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n")); 2119 2120 flag = flag; cred = cred; 2121 2122 if (otyp != OTYP_CHR) 2123 return (EINVAL); 2124 2125 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum)); 2126 2127 /* 2128 * At this point we are the last reference to the resource. 2129 * Free resource number from resource table. 2130 * It's ok to remove number before we free the segment. 2131 * We need to lock the resource to protect against remote calls. 2132 */ 2133 if (rnum == RSM_DRIVER_MINOR || 2134 (res = rsmresource_free(rnum)) == NULL) { 2135 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2136 return (DDI_SUCCESS); 2137 } 2138 2139 switch (res->rsmrc_type) { 2140 case RSM_RESOURCE_EXPORT_SEGMENT: 2141 case RSM_RESOURCE_IMPORT_SEGMENT: 2142 rsmseg_close((rsmseg_t *)res, 0); 2143 break; 2144 case RSM_RESOURCE_BAR: 2145 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n")); 2146 break; 2147 default: 2148 break; 2149 } 2150 2151 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2152 2153 return (DDI_SUCCESS); 2154 } 2155 2156 /* 2157 * rsm_inc_pgcnt 2158 * 2159 * Description: increment rsm page counter. 2160 * 2161 * Parameters: pgcnt_t pnum; number of pages to be used 2162 * 2163 * Returns: RSM_SUCCESS if memory limit not exceeded 2164 * ENOSPC if memory limit exceeded. In this case, the 2165 * page counter remains unchanged. 2166 * 2167 */ 2168 static int 2169 rsm_inc_pgcnt(pgcnt_t pnum) 2170 { 2171 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2172 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2173 return (RSM_SUCCESS); 2174 } 2175 2176 mutex_enter(&rsm_pgcnt_lock); 2177 2178 if (rsm_pgcnt + pnum > rsm_pgcnt_max) { 2179 /* ensure that limits have not been exceeded */ 2180 mutex_exit(&rsm_pgcnt_lock); 2181 return (RSMERR_INSUFFICIENT_MEM); 2182 } 2183 2184 rsm_pgcnt += pnum; 2185 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n", 2186 rsm_pgcnt)); 2187 mutex_exit(&rsm_pgcnt_lock); 2188 2189 return (RSM_SUCCESS); 2190 } 2191 2192 /* 2193 * rsm_dec_pgcnt 2194 * 2195 * Description: decrement rsm page counter. 2196 * 2197 * Parameters: pgcnt_t pnum; number of pages freed 2198 * 2199 */ 2200 static void 2201 rsm_dec_pgcnt(pgcnt_t pnum) 2202 { 2203 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2204 2205 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2206 return; 2207 } 2208 2209 mutex_enter(&rsm_pgcnt_lock); 2210 ASSERT(rsm_pgcnt >= pnum); 2211 rsm_pgcnt -= pnum; 2212 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n", 2213 rsm_pgcnt)); 2214 mutex_exit(&rsm_pgcnt_lock); 2215 } 2216 2217 static struct umem_callback_ops rsm_as_ops = { 2218 UMEM_CALLBACK_VERSION, /* version number */ 2219 rsm_export_force_destroy, 2220 }; 2221 2222 static int 2223 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len, 2224 proc_t *procp) 2225 { 2226 int error = RSM_SUCCESS; 2227 ulong_t pnum; 2228 struct umem_callback_ops *callbackops = &rsm_as_ops; 2229 2230 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2231 2232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n")); 2233 2234 /* 2235 * Make sure vaddr and len are aligned on a page boundary 2236 */ 2237 if ((uintptr_t)vaddr & (PAGESIZE - 1)) { 2238 return (RSMERR_BAD_ADDR); 2239 } 2240 2241 if (len & (PAGESIZE - 1)) { 2242 return (RSMERR_BAD_LENGTH); 2243 } 2244 2245 /* 2246 * Find number of pages 2247 */ 2248 pnum = btopr(len); 2249 error = rsm_inc_pgcnt(pnum); 2250 if (error != RSM_SUCCESS) { 2251 DBG_PRINTF((category, RSM_ERR, 2252 "rsm_bind_pages:mem limit exceeded\n")); 2253 return (RSMERR_INSUFFICIENT_MEM); 2254 } 2255 2256 error = umem_lockmemory(vaddr, len, 2257 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM, 2258 cookie, 2259 callbackops, procp); 2260 2261 if (error) { 2262 rsm_dec_pgcnt(pnum); 2263 DBG_PRINTF((category, RSM_ERR, 2264 "rsm_bind_pages:ddi_umem_lock failed\n")); 2265 /* 2266 * ddi_umem_lock, in the case of failure, returns one of 2267 * the following three errors. These are translated into 2268 * the RSMERR namespace and returned. 2269 */ 2270 if (error == EFAULT) 2271 return (RSMERR_BAD_ADDR); 2272 else if (error == EACCES) 2273 return (RSMERR_PERM_DENIED); 2274 else 2275 return (RSMERR_INSUFFICIENT_MEM); 2276 } 2277 2278 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n")); 2279 2280 return (error); 2281 2282 } 2283 2284 static int 2285 rsm_unbind_pages(rsmseg_t *seg) 2286 { 2287 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2288 2289 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n")); 2290 2291 ASSERT(rsmseglock_held(seg)); 2292 2293 if (seg->s_cookie != NULL) { 2294 /* unlock address range */ 2295 ddi_umem_unlock(seg->s_cookie); 2296 rsm_dec_pgcnt(btopr(seg->s_len)); 2297 seg->s_cookie = NULL; 2298 } 2299 2300 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n")); 2301 2302 return (RSM_SUCCESS); 2303 } 2304 2305 2306 static int 2307 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2308 { 2309 int e; 2310 adapter_t *adapter; 2311 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2312 2313 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n")); 2314 2315 adapter = rsm_getadapter(msg, mode); 2316 if (adapter == NULL) { 2317 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2318 "rsm_bind done:no adapter\n")); 2319 return (RSMERR_CTLR_NOT_PRESENT); 2320 } 2321 2322 /* lock address range */ 2323 if (msg->vaddr == NULL) { 2324 rsmka_release_adapter(adapter); 2325 DBG_PRINTF((category, RSM_ERR, 2326 "rsm: rsm_bind done: invalid vaddr\n")); 2327 return (RSMERR_BAD_ADDR); 2328 } 2329 if (msg->len <= 0) { 2330 rsmka_release_adapter(adapter); 2331 DBG_PRINTF((category, RSM_ERR, 2332 "rsm_bind: invalid length\n")); 2333 return (RSMERR_BAD_LENGTH); 2334 } 2335 2336 /* Lock segment */ 2337 rsmseglock_acquire(seg); 2338 2339 while (seg->s_state == RSM_STATE_NEW_QUIESCED) { 2340 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2341 DBG_PRINTF((category, RSM_DEBUG, 2342 "rsm_bind done: cv_wait INTERRUPTED")); 2343 rsmka_release_adapter(adapter); 2344 rsmseglock_release(seg); 2345 return (RSMERR_INTERRUPTED); 2346 } 2347 } 2348 2349 ASSERT(seg->s_state == RSM_STATE_NEW); 2350 2351 ASSERT(seg->s_cookie == NULL); 2352 2353 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc); 2354 if (e == RSM_SUCCESS) { 2355 seg->s_flags |= RSM_USER_MEMORY; 2356 if (msg->perm & RSM_ALLOW_REBIND) { 2357 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND; 2358 } 2359 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) { 2360 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT; 2361 } 2362 seg->s_region.r_vaddr = msg->vaddr; 2363 /* 2364 * Set the s_pid value in the segment structure. This is used 2365 * to identify exported segments belonging to a particular 2366 * process so that when the process exits, these segments can 2367 * be unlocked forcefully even if rsm_close is not called on 2368 * process exit since there maybe other processes referencing 2369 * them (for example on a fork or exec). 2370 * The s_pid value is also used to authenticate the process 2371 * doing a publish or unpublish on the export segment. Only 2372 * the creator of the export segment has a right to do a 2373 * publish or unpublish and unbind on the segment. 2374 */ 2375 seg->s_pid = ddi_get_pid(); 2376 seg->s_len = msg->len; 2377 seg->s_state = RSM_STATE_BIND; 2378 seg->s_adapter = adapter; 2379 seg->s_proc = curproc; 2380 } else { 2381 rsmka_release_adapter(adapter); 2382 DBG_PRINTF((category, RSM_WARNING, 2383 "unable to lock down pages\n")); 2384 } 2385 2386 msg->rnum = seg->s_minor; 2387 /* Unlock segment */ 2388 rsmseglock_release(seg); 2389 2390 if (e == RSM_SUCCESS) { 2391 /* copyout the resource number */ 2392 #ifdef _MULTI_DATAMODEL 2393 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2394 rsm_ioctlmsg32_t msg32; 2395 2396 msg32.rnum = msg->rnum; 2397 if (ddi_copyout((caddr_t)&msg32.rnum, 2398 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum, 2399 sizeof (minor_t), mode)) { 2400 rsmka_release_adapter(adapter); 2401 e = RSMERR_BAD_ADDR; 2402 } 2403 } 2404 #endif 2405 if (ddi_copyout((caddr_t)&msg->rnum, 2406 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum, 2407 sizeof (minor_t), mode)) { 2408 rsmka_release_adapter(adapter); 2409 e = RSMERR_BAD_ADDR; 2410 } 2411 } 2412 2413 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n")); 2414 2415 return (e); 2416 } 2417 2418 static void 2419 rsm_remap_local_importers(rsm_node_id_t src_nodeid, 2420 rsm_memseg_id_t ex_segid, 2421 ddi_umem_cookie_t cookie) 2422 2423 { 2424 rsmresource_t *p = NULL; 2425 rsmhash_table_t *rhash = &rsm_import_segs; 2426 uint_t index; 2427 2428 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2429 "rsm_remap_local_importers enter\n")); 2430 2431 index = rsmhash(ex_segid); 2432 2433 rw_enter(&rhash->rsmhash_rw, RW_READER); 2434 2435 p = rsmhash_getbkt(rhash, index); 2436 2437 for (; p; p = p->rsmrc_next) { 2438 rsmseg_t *seg = (rsmseg_t *)p; 2439 rsmseglock_acquire(seg); 2440 /* 2441 * Change the s_cookie value of only the local importers 2442 * which have been mapped (in state RSM_STATE_ACTIVE). 2443 * Note that there is no need to change the s_cookie value 2444 * if the imported segment is in RSM_STATE_MAPPING since 2445 * eventually the s_cookie will be updated via the mapping 2446 * functionality. 2447 */ 2448 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) && 2449 (seg->s_state == RSM_STATE_ACTIVE)) { 2450 seg->s_cookie = cookie; 2451 } 2452 rsmseglock_release(seg); 2453 } 2454 rw_exit(&rhash->rsmhash_rw); 2455 2456 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2457 "rsm_remap_local_importers done\n")); 2458 } 2459 2460 static int 2461 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg) 2462 { 2463 int e; 2464 adapter_t *adapter; 2465 ddi_umem_cookie_t cookie; 2466 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2467 2468 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n")); 2469 2470 /* Check for permissions to rebind */ 2471 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) { 2472 return (RSMERR_REBIND_NOT_ALLOWED); 2473 } 2474 2475 if (seg->s_pid != ddi_get_pid() && 2476 ddi_get_pid() != 0) { 2477 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n")); 2478 return (RSMERR_NOT_CREATOR); 2479 } 2480 2481 /* 2482 * We will not be allowing partial rebind and hence length passed 2483 * in must be same as segment length 2484 */ 2485 if (msg->vaddr == NULL) { 2486 DBG_PRINTF((category, RSM_ERR, 2487 "rsm_rebind done: null msg->vaddr\n")); 2488 return (RSMERR_BAD_ADDR); 2489 } 2490 if (msg->len != seg->s_len) { 2491 DBG_PRINTF((category, RSM_ERR, 2492 "rsm_rebind: invalid length\n")); 2493 return (RSMERR_BAD_LENGTH); 2494 } 2495 2496 /* Lock segment */ 2497 rsmseglock_acquire(seg); 2498 2499 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) || 2500 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 2501 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) { 2502 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2503 rsmseglock_release(seg); 2504 DBG_PRINTF((category, RSM_DEBUG, 2505 "rsm_rebind done: cv_wait INTERRUPTED")); 2506 return (RSMERR_INTERRUPTED); 2507 } 2508 } 2509 2510 /* verify segment state */ 2511 if ((seg->s_state != RSM_STATE_BIND) && 2512 (seg->s_state != RSM_STATE_EXPORT)) { 2513 /* Unlock segment */ 2514 rsmseglock_release(seg); 2515 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2516 "rsm_rebind done: invalid state\n")); 2517 return (RSMERR_BAD_SEG_HNDL); 2518 } 2519 2520 ASSERT(seg->s_cookie != NULL); 2521 2522 if (msg->vaddr == seg->s_region.r_vaddr) { 2523 rsmseglock_release(seg); 2524 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2525 return (RSM_SUCCESS); 2526 } 2527 2528 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc); 2529 if (e == RSM_SUCCESS) { 2530 struct buf *xbuf; 2531 dev_t sdev = 0; 2532 rsm_memory_local_t mem; 2533 2534 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE, 2535 sdev, 0, NULL, DDI_UMEM_SLEEP); 2536 ASSERT(xbuf != NULL); 2537 2538 mem.ms_type = RSM_MEM_BUF; 2539 mem.ms_bp = xbuf; 2540 2541 adapter = seg->s_adapter; 2542 e = adapter->rsmpi_ops->rsm_rebind( 2543 seg->s_handle.out, 0, &mem, 2544 RSM_RESOURCE_DONTWAIT, NULL); 2545 2546 if (e == RSM_SUCCESS) { 2547 /* 2548 * unbind the older pages, and unload local importers; 2549 * but don't disconnect importers 2550 */ 2551 (void) rsm_unbind_pages(seg); 2552 seg->s_cookie = cookie; 2553 seg->s_region.r_vaddr = msg->vaddr; 2554 rsm_remap_local_importers(my_nodeid, seg->s_segid, 2555 cookie); 2556 } else { 2557 /* 2558 * Unbind the pages associated with "cookie" by the 2559 * rsm_bind_pages calls prior to this. This is 2560 * similar to what is done in the rsm_unbind_pages 2561 * routine for the seg->s_cookie. 2562 */ 2563 ddi_umem_unlock(cookie); 2564 rsm_dec_pgcnt(btopr(msg->len)); 2565 DBG_PRINTF((category, RSM_ERR, 2566 "rsm_rebind failed with %d\n", e)); 2567 } 2568 /* 2569 * At present there is no dependency on the existence of xbuf. 2570 * So we can free it here. If in the future this changes, it can 2571 * be freed sometime during the segment destroy. 2572 */ 2573 freerbuf(xbuf); 2574 } 2575 2576 /* Unlock segment */ 2577 rsmseglock_release(seg); 2578 2579 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2580 2581 return (e); 2582 } 2583 2584 static int 2585 rsm_unbind(rsmseg_t *seg) 2586 { 2587 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2588 2589 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n")); 2590 2591 rsmseglock_acquire(seg); 2592 2593 /* verify segment state */ 2594 if ((seg->s_state != RSM_STATE_BIND) && 2595 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2596 rsmseglock_release(seg); 2597 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2598 "rsm_unbind: invalid state\n")); 2599 return (RSMERR_BAD_SEG_HNDL); 2600 } 2601 2602 /* unlock current range */ 2603 (void) rsm_unbind_pages(seg); 2604 2605 if (seg->s_state == RSM_STATE_BIND) { 2606 seg->s_state = RSM_STATE_NEW; 2607 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 2608 seg->s_state = RSM_STATE_NEW_QUIESCED; 2609 } 2610 2611 rsmseglock_release(seg); 2612 2613 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n")); 2614 2615 return (RSM_SUCCESS); 2616 } 2617 2618 /* **************************** Exporter Access List Management ******* */ 2619 static void 2620 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len) 2621 { 2622 int acl_sz; 2623 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2624 2625 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n")); 2626 2627 /* acl could be NULL */ 2628 2629 if (acl != NULL && acl_len > 0) { 2630 acl_sz = acl_len * sizeof (rsmapi_access_entry_t); 2631 kmem_free((void *)acl, acl_sz); 2632 } 2633 2634 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n")); 2635 } 2636 2637 static void 2638 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len) 2639 { 2640 int acl_sz; 2641 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2642 2643 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n")); 2644 2645 if (acl != NULL && acl_len > 0) { 2646 acl_sz = acl_len * sizeof (rsm_access_entry_t); 2647 kmem_free((void *)acl, acl_sz); 2648 } 2649 2650 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n")); 2651 2652 } 2653 2654 static int 2655 rsmacl_build(rsm_ioctlmsg_t *msg, int mode, 2656 rsmapi_access_entry_t **list, int *len, int loopback) 2657 { 2658 rsmapi_access_entry_t *acl; 2659 int acl_len; 2660 int i; 2661 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2662 2663 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n")); 2664 2665 *len = 0; 2666 *list = NULL; 2667 2668 acl_len = msg->acl_len; 2669 if ((loopback && acl_len > 1) || (acl_len < 0) || 2670 (acl_len > MAX_NODES)) { 2671 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2672 "rsmacl_build done: acl invalid\n")); 2673 return (RSMERR_BAD_ACL); 2674 } 2675 2676 if (acl_len > 0 && acl_len <= MAX_NODES) { 2677 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t); 2678 2679 acl = kmem_alloc(acl_size, KM_SLEEP); 2680 2681 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl, 2682 acl_size, mode)) { 2683 kmem_free((void *) acl, acl_size); 2684 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2685 "rsmacl_build done: BAD_ADDR\n")); 2686 return (RSMERR_BAD_ADDR); 2687 } 2688 2689 /* 2690 * Verify access list 2691 */ 2692 for (i = 0; i < acl_len; i++) { 2693 if (acl[i].ae_node > MAX_NODES || 2694 (loopback && (acl[i].ae_node != my_nodeid)) || 2695 acl[i].ae_permission > RSM_ACCESS_TRUSTED) { 2696 /* invalid entry */ 2697 kmem_free((void *) acl, acl_size); 2698 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2699 "rsmacl_build done: EINVAL\n")); 2700 return (RSMERR_BAD_ACL); 2701 } 2702 } 2703 2704 *len = acl_len; 2705 *list = acl; 2706 } 2707 2708 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n")); 2709 2710 return (DDI_SUCCESS); 2711 } 2712 2713 static int 2714 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest, 2715 int acl_len, adapter_t *adapter) 2716 { 2717 rsm_access_entry_t *acl; 2718 rsm_addr_t hwaddr; 2719 int i; 2720 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2721 2722 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n")); 2723 2724 if (src != NULL) { 2725 size_t acl_size = acl_len * sizeof (rsm_access_entry_t); 2726 acl = kmem_alloc(acl_size, KM_SLEEP); 2727 2728 /* 2729 * translate access list 2730 */ 2731 for (i = 0; i < acl_len; i++) { 2732 if (src[i].ae_node == my_nodeid) { 2733 acl[i].ae_addr = adapter->hwaddr; 2734 } else { 2735 hwaddr = get_remote_hwaddr(adapter, 2736 src[i].ae_node); 2737 if ((int64_t)hwaddr < 0) { 2738 /* invalid hwaddr */ 2739 kmem_free((void *) acl, acl_size); 2740 DBG_PRINTF((category, 2741 RSM_DEBUG_VERBOSE, 2742 "rsmpiacl_create done:" 2743 "EINVAL hwaddr\n")); 2744 return (RSMERR_INTERNAL_ERROR); 2745 } 2746 acl[i].ae_addr = hwaddr; 2747 } 2748 /* rsmpi understands only RSM_PERM_XXXX */ 2749 acl[i].ae_permission = 2750 src[i].ae_permission & RSM_PERM_RDWR; 2751 } 2752 *dest = acl; 2753 } else { 2754 *dest = NULL; 2755 } 2756 2757 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n")); 2758 2759 return (RSM_SUCCESS); 2760 } 2761 2762 static int 2763 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode, 2764 rsmipc_reply_t *reply) 2765 { 2766 2767 int i; 2768 rsmseg_t *seg; 2769 rsm_memseg_id_t key = req->rsmipc_key; 2770 rsm_permission_t perm = req->rsmipc_perm; 2771 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2772 2773 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2774 "rsmsegacl_validate enter\n")); 2775 2776 /* 2777 * Find segment and grab its lock. The reason why we grab the segment 2778 * lock in side the search is to avoid the race when the segment is 2779 * being deleted and we already have a pointer to it. 2780 */ 2781 seg = rsmexport_lookup(key); 2782 if (!seg) { 2783 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2784 "rsmsegacl_validate done: %u ENXIO\n", key)); 2785 return (RSMERR_SEG_NOT_PUBLISHED); 2786 } 2787 2788 ASSERT(rsmseglock_held(seg)); 2789 ASSERT(seg->s_state == RSM_STATE_EXPORT); 2790 2791 /* 2792 * We implement a 2-level protection scheme. 2793 * First, we check if local/remote host has access rights. 2794 * Second, we check if the user has access rights. 2795 * 2796 * This routine only validates the rnode access_list 2797 */ 2798 if (seg->s_acl_len > 0) { 2799 /* 2800 * Check host access list 2801 */ 2802 ASSERT(seg->s_acl != NULL); 2803 for (i = 0; i < seg->s_acl_len; i++) { 2804 if (seg->s_acl[i].ae_node == rnode) { 2805 perm &= seg->s_acl[i].ae_permission; 2806 goto found; 2807 } 2808 } 2809 /* rnode is not found in the list */ 2810 rsmseglock_release(seg); 2811 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2812 "rsmsegacl_validate done: EPERM\n")); 2813 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 2814 } else { 2815 /* use default owner creation umask */ 2816 perm &= seg->s_mode; 2817 } 2818 2819 found: 2820 /* update perm for this node */ 2821 reply->rsmipc_mode = perm; 2822 reply->rsmipc_uid = seg->s_uid; 2823 reply->rsmipc_gid = seg->s_gid; 2824 reply->rsmipc_segid = seg->s_segid; 2825 reply->rsmipc_seglen = seg->s_len; 2826 2827 /* 2828 * Perm of requesting node is valid; source will validate user 2829 */ 2830 rsmseglock_release(seg); 2831 2832 /* 2833 * Add the importer to the list right away, if connect fails 2834 * the importer will ask the exporter to remove it. 2835 */ 2836 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr, 2837 req->rsmipc_segment_cookie); 2838 2839 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n")); 2840 2841 return (RSM_SUCCESS); 2842 } 2843 2844 2845 /* ************************** Exporter Calls ************************* */ 2846 2847 static int 2848 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2849 { 2850 int e; 2851 int acl_len; 2852 rsmapi_access_entry_t *acl; 2853 rsm_access_entry_t *rsmpi_acl; 2854 rsm_memory_local_t mem; 2855 struct buf *xbuf; 2856 dev_t sdev = 0; 2857 adapter_t *adapter; 2858 rsm_memseg_id_t segment_id = 0; 2859 int loopback_flag = 0; 2860 int create_flags = 0; 2861 rsm_resource_callback_t callback_flag; 2862 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2863 2864 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n")); 2865 2866 if (seg->s_adapter == &loopback_adapter) 2867 loopback_flag = 1; 2868 2869 if (seg->s_pid != ddi_get_pid() && 2870 ddi_get_pid() != 0) { 2871 DBG_PRINTF((category, RSM_ERR, 2872 "rsm_publish: Not creator\n")); 2873 return (RSMERR_NOT_CREATOR); 2874 } 2875 2876 /* 2877 * Get per node access list 2878 */ 2879 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag); 2880 if (e != DDI_SUCCESS) { 2881 DBG_PRINTF((category, RSM_ERR, 2882 "rsm_publish done: rsmacl_build failed\n")); 2883 return (e); 2884 } 2885 2886 /* 2887 * The application provided msg->key is used for resolving a 2888 * segment id according to the following: 2889 * key = 0 Kernel Agent selects the segment id 2890 * key <= RSM_DLPI_ID_END Reserved for system usage except 2891 * RSMLIB range 2892 * key < RSM_USER_APP_ID_BASE segment id = key 2893 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections 2894 * 2895 * rsm_nextavail_segmentid is initialized to 0x80000000 and 2896 * overflows to zero after 0x80000000 allocations. 2897 * An algorithm is needed which allows reinitialization and provides 2898 * for reallocation after overflow. For now, ENOMEM is returned 2899 * once the overflow condition has occurred. 2900 */ 2901 if (msg->key == 0) { 2902 mutex_enter(&rsm_lock); 2903 segment_id = rsm_nextavail_segmentid; 2904 if (segment_id != 0) { 2905 rsm_nextavail_segmentid++; 2906 mutex_exit(&rsm_lock); 2907 } else { 2908 mutex_exit(&rsm_lock); 2909 DBG_PRINTF((category, RSM_ERR, 2910 "rsm_publish done: no more keys avlbl\n")); 2911 return (RSMERR_INSUFFICIENT_RESOURCES); 2912 } 2913 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END) 2914 /* range reserved for internal use by base/ndi libraries */ 2915 segment_id = msg->key; 2916 else if (msg->key <= RSM_DLPI_ID_END) 2917 return (RSMERR_RESERVED_SEGID); 2918 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1) 2919 segment_id = msg->key; 2920 else { 2921 DBG_PRINTF((category, RSM_ERR, 2922 "rsm_publish done: invalid key %u\n", msg->key)); 2923 return (RSMERR_RESERVED_SEGID); 2924 } 2925 2926 /* Add key to exportlist; The segment lock is held on success */ 2927 e = rsmexport_add(seg, segment_id); 2928 if (e) { 2929 rsmacl_free(acl, acl_len); 2930 DBG_PRINTF((category, RSM_ERR, 2931 "rsm_publish done: export_add failed: %d\n", e)); 2932 return (e); 2933 } 2934 2935 seg->s_segid = segment_id; 2936 2937 if ((seg->s_state != RSM_STATE_BIND) && 2938 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2939 /* state changed since then, free acl and return */ 2940 rsmseglock_release(seg); 2941 rsmexport_rm(seg); 2942 rsmacl_free(acl, acl_len); 2943 DBG_PRINTF((category, RSM_ERR, 2944 "rsm_publish done: segment in wrong state: %d\n", 2945 seg->s_state)); 2946 return (RSMERR_BAD_SEG_HNDL); 2947 } 2948 2949 /* 2950 * If this is for a local memory handle and permissions are zero, 2951 * then the surrogate segment is very large and we want to skip 2952 * allocation of DVMA space. 2953 * 2954 * Careful! If the user didn't use an ACL list, acl will be a NULL 2955 * pointer. Check that before dereferencing it. 2956 */ 2957 if (acl != (rsmapi_access_entry_t *)NULL) { 2958 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 2959 goto skipdriver; 2960 } 2961 2962 /* create segment */ 2963 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE, 2964 sdev, 0, NULL, DDI_UMEM_SLEEP); 2965 ASSERT(xbuf != NULL); 2966 2967 mem.ms_type = RSM_MEM_BUF; 2968 mem.ms_bp = xbuf; 2969 2970 /* This call includes a bind operations */ 2971 2972 adapter = seg->s_adapter; 2973 /* 2974 * create a acl list with hwaddr for RSMPI publish 2975 */ 2976 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter); 2977 2978 if (e != RSM_SUCCESS) { 2979 rsmseglock_release(seg); 2980 rsmexport_rm(seg); 2981 rsmacl_free(acl, acl_len); 2982 freerbuf(xbuf); 2983 DBG_PRINTF((category, RSM_ERR, 2984 "rsm_publish done: rsmpiacl_create failed: %d\n", e)); 2985 return (e); 2986 } 2987 2988 if (seg->s_state == RSM_STATE_BIND) { 2989 /* create segment */ 2990 2991 /* This call includes a bind operations */ 2992 2993 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 2994 create_flags = RSM_ALLOW_UNBIND_REBIND; 2995 } 2996 2997 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 2998 callback_flag = RSM_RESOURCE_DONTWAIT; 2999 } else { 3000 callback_flag = RSM_RESOURCE_SLEEP; 3001 } 3002 3003 e = adapter->rsmpi_ops->rsm_seg_create( 3004 adapter->rsmpi_handle, 3005 &seg->s_handle.out, seg->s_len, 3006 create_flags, &mem, 3007 callback_flag, NULL); 3008 /* 3009 * At present there is no dependency on the existence of xbuf. 3010 * So we can free it here. If in the future this changes, it can 3011 * be freed sometime during the segment destroy. 3012 */ 3013 freerbuf(xbuf); 3014 3015 if (e != RSM_SUCCESS) { 3016 rsmseglock_release(seg); 3017 rsmexport_rm(seg); 3018 rsmacl_free(acl, acl_len); 3019 rsmpiacl_free(rsmpi_acl, acl_len); 3020 DBG_PRINTF((category, RSM_ERR, 3021 "rsm_publish done: export_create failed: %d\n", e)); 3022 /* 3023 * The following assertion ensures that the two errors 3024 * related to the length and its alignment do not occur 3025 * since they have been checked during export_create 3026 */ 3027 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT && 3028 e != RSMERR_BAD_LENGTH); 3029 if (e == RSMERR_NOT_MEM) 3030 e = RSMERR_INSUFFICIENT_MEM; 3031 3032 return (e); 3033 } 3034 /* export segment, this should create an IMMU mapping */ 3035 e = adapter->rsmpi_ops->rsm_publish( 3036 seg->s_handle.out, 3037 rsmpi_acl, acl_len, 3038 seg->s_segid, 3039 RSM_RESOURCE_DONTWAIT, NULL); 3040 3041 if (e != RSM_SUCCESS) { 3042 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3043 rsmseglock_release(seg); 3044 rsmexport_rm(seg); 3045 rsmacl_free(acl, acl_len); 3046 rsmpiacl_free(rsmpi_acl, acl_len); 3047 DBG_PRINTF((category, RSM_ERR, 3048 "rsm_publish done: export_publish failed: %d\n", 3049 e)); 3050 return (e); 3051 } 3052 } 3053 3054 seg->s_acl_in = rsmpi_acl; 3055 3056 skipdriver: 3057 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */ 3058 seg->s_acl_len = acl_len; 3059 seg->s_acl = acl; 3060 3061 if (seg->s_state == RSM_STATE_BIND) { 3062 seg->s_state = RSM_STATE_EXPORT; 3063 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 3064 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 3065 cv_broadcast(&seg->s_cv); 3066 } 3067 3068 rsmseglock_release(seg); 3069 3070 /* 3071 * If the segment id was solicited, then return it in 3072 * the original incoming message. 3073 */ 3074 if (msg->key == 0) { 3075 msg->key = segment_id; 3076 #ifdef _MULTI_DATAMODEL 3077 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 3078 rsm_ioctlmsg32_t msg32; 3079 3080 msg32.key = msg->key; 3081 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3082 "rsm_publish done\n")); 3083 return (ddi_copyout((caddr_t)&msg32, 3084 (caddr_t)dataptr, sizeof (msg32), mode)); 3085 } 3086 #endif 3087 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3088 "rsm_publish done\n")); 3089 return (ddi_copyout((caddr_t)msg, 3090 (caddr_t)dataptr, sizeof (*msg), mode)); 3091 } 3092 3093 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n")); 3094 return (DDI_SUCCESS); 3095 } 3096 3097 /* 3098 * This function modifies the access control list of an already published 3099 * segment. There is no effect on import segments which are already 3100 * connected. 3101 */ 3102 static int 3103 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode) 3104 { 3105 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl; 3106 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl; 3107 int new_acl_len, old_acl_len, tmp_acl_len; 3108 int e, i; 3109 adapter_t *adapter; 3110 int loopback_flag = 0; 3111 rsm_memseg_id_t key; 3112 rsm_permission_t permission; 3113 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3114 3115 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n")); 3116 3117 if ((seg->s_state != RSM_STATE_EXPORT) && 3118 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) && 3119 (seg->s_state != RSM_STATE_EXPORT_QUIESCING)) 3120 return (RSMERR_SEG_NOT_PUBLISHED); 3121 3122 if (seg->s_pid != ddi_get_pid() && 3123 ddi_get_pid() != 0) { 3124 DBG_PRINTF((category, RSM_ERR, 3125 "rsm_republish: Not owner\n")); 3126 return (RSMERR_NOT_CREATOR); 3127 } 3128 3129 if (seg->s_adapter == &loopback_adapter) 3130 loopback_flag = 1; 3131 3132 /* 3133 * Build new list first 3134 */ 3135 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag); 3136 if (e) { 3137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3138 "rsm_republish done: rsmacl_build failed %d", e)); 3139 return (e); 3140 } 3141 3142 /* Lock segment */ 3143 rsmseglock_acquire(seg); 3144 /* 3145 * a republish is in progress - REPUBLISH message is being 3146 * sent to the importers so wait for it to complete OR 3147 * wait till DR completes 3148 */ 3149 while (((seg->s_state == RSM_STATE_EXPORT) && 3150 (seg->s_flags & RSM_REPUBLISH_WAIT)) || 3151 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) || 3152 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) { 3153 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3154 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3155 "rsm_republish done: cv_wait INTERRUPTED")); 3156 rsmseglock_release(seg); 3157 rsmacl_free(new_acl, new_acl_len); 3158 return (RSMERR_INTERRUPTED); 3159 } 3160 } 3161 3162 /* recheck if state is valid */ 3163 if (seg->s_state != RSM_STATE_EXPORT) { 3164 rsmseglock_release(seg); 3165 rsmacl_free(new_acl, new_acl_len); 3166 return (RSMERR_SEG_NOT_PUBLISHED); 3167 } 3168 3169 key = seg->s_key; 3170 old_acl = seg->s_acl; 3171 old_acl_len = seg->s_acl_len; 3172 3173 seg->s_acl = new_acl; 3174 seg->s_acl_len = new_acl_len; 3175 3176 /* 3177 * This call will only be meaningful if and when the interconnect 3178 * layer makes use of the access list 3179 */ 3180 adapter = seg->s_adapter; 3181 /* 3182 * create a acl list with hwaddr for RSMPI publish 3183 */ 3184 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter); 3185 3186 if (e != RSM_SUCCESS) { 3187 seg->s_acl = old_acl; 3188 seg->s_acl_len = old_acl_len; 3189 rsmseglock_release(seg); 3190 rsmacl_free(new_acl, new_acl_len); 3191 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3192 "rsm_republish done: rsmpiacl_create failed %d", e)); 3193 return (e); 3194 } 3195 rsmpi_old_acl = seg->s_acl_in; 3196 seg->s_acl_in = rsmpi_new_acl; 3197 3198 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out, 3199 seg->s_acl_in, seg->s_acl_len, 3200 RSM_RESOURCE_DONTWAIT, NULL); 3201 3202 if (e != RSM_SUCCESS) { 3203 seg->s_acl = old_acl; 3204 seg->s_acl_in = rsmpi_old_acl; 3205 seg->s_acl_len = old_acl_len; 3206 rsmseglock_release(seg); 3207 rsmacl_free(new_acl, new_acl_len); 3208 rsmpiacl_free(rsmpi_new_acl, new_acl_len); 3209 3210 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3211 "rsm_republish done: rsmpi republish failed %d\n", e)); 3212 return (e); 3213 } 3214 3215 /* create a tmp copy of the new acl */ 3216 tmp_acl_len = new_acl_len; 3217 if (tmp_acl_len > 0) { 3218 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP); 3219 for (i = 0; i < tmp_acl_len; i++) { 3220 tmp_acl[i].ae_node = new_acl[i].ae_node; 3221 tmp_acl[i].ae_permission = new_acl[i].ae_permission; 3222 } 3223 /* 3224 * The default permission of a node which was in the old 3225 * ACL but not in the new ACL is 0 ie no access. 3226 */ 3227 permission = 0; 3228 } else { 3229 /* 3230 * NULL acl means all importers can connect and 3231 * default permission will be owner creation umask 3232 */ 3233 tmp_acl = NULL; 3234 permission = seg->s_mode; 3235 } 3236 3237 /* make other republishers to wait for republish to complete */ 3238 seg->s_flags |= RSM_REPUBLISH_WAIT; 3239 3240 rsmseglock_release(seg); 3241 3242 /* send the new perms to the importing nodes */ 3243 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission); 3244 3245 rsmseglock_acquire(seg); 3246 seg->s_flags &= ~RSM_REPUBLISH_WAIT; 3247 /* wake up any one waiting for republish to complete */ 3248 cv_broadcast(&seg->s_cv); 3249 rsmseglock_release(seg); 3250 3251 rsmacl_free(tmp_acl, tmp_acl_len); 3252 rsmacl_free(old_acl, old_acl_len); 3253 rsmpiacl_free(rsmpi_old_acl, old_acl_len); 3254 3255 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n")); 3256 return (DDI_SUCCESS); 3257 } 3258 3259 static int 3260 rsm_unpublish(rsmseg_t *seg, int mode) 3261 { 3262 rsmapi_access_entry_t *acl; 3263 rsm_access_entry_t *rsmpi_acl; 3264 int acl_len; 3265 int e; 3266 adapter_t *adapter; 3267 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3268 3269 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n")); 3270 3271 if (seg->s_pid != ddi_get_pid() && 3272 ddi_get_pid() != 0) { 3273 DBG_PRINTF((category, RSM_ERR, 3274 "rsm_unpublish: Not creator\n")); 3275 return (RSMERR_NOT_CREATOR); 3276 } 3277 3278 rsmseglock_acquire(seg); 3279 /* 3280 * wait for QUIESCING to complete here before rsmexport_rm 3281 * is called because the SUSPEND_COMPLETE mesg which changes 3282 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and 3283 * signals the cv_wait needs to find it in the hashtable. 3284 */ 3285 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 3286 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) { 3287 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3288 rsmseglock_release(seg); 3289 DBG_PRINTF((category, RSM_ERR, 3290 "rsm_unpublish done: cv_wait INTR qscing" 3291 "getv/putv in progress")); 3292 return (RSMERR_INTERRUPTED); 3293 } 3294 } 3295 3296 /* verify segment state */ 3297 if ((seg->s_state != RSM_STATE_EXPORT) && 3298 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3299 rsmseglock_release(seg); 3300 DBG_PRINTF((category, RSM_ERR, 3301 "rsm_unpublish done: bad state %x\n", seg->s_state)); 3302 return (RSMERR_SEG_NOT_PUBLISHED); 3303 } 3304 3305 rsmseglock_release(seg); 3306 3307 rsmexport_rm(seg); 3308 3309 rsm_send_importer_disconnects(seg->s_segid, my_nodeid); 3310 3311 rsmseglock_acquire(seg); 3312 /* 3313 * wait for republish to complete 3314 */ 3315 while ((seg->s_state == RSM_STATE_EXPORT) && 3316 (seg->s_flags & RSM_REPUBLISH_WAIT)) { 3317 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3318 DBG_PRINTF((category, RSM_ERR, 3319 "rsm_unpublish done: cv_wait INTR repubing")); 3320 rsmseglock_release(seg); 3321 return (RSMERR_INTERRUPTED); 3322 } 3323 } 3324 3325 if ((seg->s_state != RSM_STATE_EXPORT) && 3326 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3327 DBG_PRINTF((category, RSM_ERR, 3328 "rsm_unpublish done: invalid state")); 3329 rsmseglock_release(seg); 3330 return (RSMERR_SEG_NOT_PUBLISHED); 3331 } 3332 3333 /* 3334 * check for putv/get surrogate segment which was not published 3335 * to the driver. 3336 * 3337 * Be certain to see if there is an ACL first! If this segment was 3338 * not published with an ACL, acl will be a null pointer. Check 3339 * that before dereferencing it. 3340 */ 3341 acl = seg->s_acl; 3342 if (acl != (rsmapi_access_entry_t *)NULL) { 3343 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 3344 goto bypass; 3345 } 3346 3347 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */ 3348 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) 3349 goto bypass; 3350 3351 adapter = seg->s_adapter; 3352 for (;;) { 3353 if (seg->s_state != RSM_STATE_EXPORT) { 3354 rsmseglock_release(seg); 3355 DBG_PRINTF((category, RSM_ERR, 3356 "rsm_unpublish done: bad state %x\n", 3357 seg->s_state)); 3358 return (RSMERR_SEG_NOT_PUBLISHED); 3359 } 3360 3361 /* unpublish from adapter */ 3362 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out); 3363 3364 if (e == RSM_SUCCESS) { 3365 break; 3366 } 3367 3368 if (e == RSMERR_SEG_IN_USE && mode == 1) { 3369 /* 3370 * wait for unpublish to succeed, it's busy. 3371 */ 3372 seg->s_flags |= RSM_EXPORT_WAIT; 3373 3374 /* wait for a max of 1 ms - this is an empirical */ 3375 /* value that was found by some minimal testing */ 3376 /* can be fine tuned when we have better numbers */ 3377 /* A long term fix would be to send cv_signal */ 3378 /* from the intr callback routine */ 3379 /* currently nobody signals this wait */ 3380 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock, 3381 drv_usectohz(1000), TR_CLOCK_TICK); 3382 3383 DBG_PRINTF((category, RSM_ERR, 3384 "rsm_unpublish: SEG_IN_USE\n")); 3385 3386 seg->s_flags &= ~RSM_EXPORT_WAIT; 3387 } else { 3388 if (mode == 1) { 3389 DBG_PRINTF((category, RSM_ERR, 3390 "rsm:rsmpi unpublish err %x\n", e)); 3391 seg->s_state = RSM_STATE_BIND; 3392 } 3393 rsmseglock_release(seg); 3394 return (e); 3395 } 3396 } 3397 3398 /* Free segment */ 3399 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3400 3401 if (e != RSM_SUCCESS) { 3402 DBG_PRINTF((category, RSM_ERR, 3403 "rsm_unpublish: rsmpi destroy key=%x failed %x\n", 3404 seg->s_key, e)); 3405 } 3406 3407 bypass: 3408 acl = seg->s_acl; 3409 rsmpi_acl = seg->s_acl_in; 3410 acl_len = seg->s_acl_len; 3411 3412 seg->s_acl = NULL; 3413 seg->s_acl_in = NULL; 3414 seg->s_acl_len = 0; 3415 3416 if (seg->s_state == RSM_STATE_EXPORT) { 3417 seg->s_state = RSM_STATE_BIND; 3418 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) { 3419 seg->s_state = RSM_STATE_BIND_QUIESCED; 3420 cv_broadcast(&seg->s_cv); 3421 } 3422 3423 rsmseglock_release(seg); 3424 3425 rsmacl_free(acl, acl_len); 3426 rsmpiacl_free(rsmpi_acl, acl_len); 3427 3428 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n")); 3429 3430 return (DDI_SUCCESS); 3431 } 3432 3433 /* 3434 * Called from rsm_unpublish to force an unload and disconnection of all 3435 * importers of the unpublished segment. 3436 * 3437 * First build the list of segments requiring a force disconnect, then 3438 * send a request for each. 3439 */ 3440 static void 3441 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid, 3442 rsm_node_id_t ex_nodeid) 3443 { 3444 rsmipc_request_t request; 3445 importing_token_t *prev_token, *token, *tmp_token, *tokp; 3446 importing_token_t *force_disconnect_list = NULL; 3447 int index; 3448 3449 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3450 "rsm_send_importer_disconnects enter\n")); 3451 3452 index = rsmhash(ex_segid); 3453 3454 mutex_enter(&importer_list.lock); 3455 3456 prev_token = NULL; 3457 token = importer_list.bucket[index]; 3458 3459 while (token != NULL) { 3460 if (token->key == ex_segid) { 3461 /* 3462 * take it off the importer list and add it 3463 * to the force disconnect list. 3464 */ 3465 if (prev_token == NULL) 3466 importer_list.bucket[index] = token->next; 3467 else 3468 prev_token->next = token->next; 3469 tmp_token = token; 3470 token = token->next; 3471 if (force_disconnect_list == NULL) { 3472 force_disconnect_list = tmp_token; 3473 tmp_token->next = NULL; 3474 } else { 3475 tokp = force_disconnect_list; 3476 /* 3477 * make sure that the tmp_token's node 3478 * is not already on the force disconnect 3479 * list. 3480 */ 3481 while (tokp != NULL) { 3482 if (tokp->importing_node == 3483 tmp_token->importing_node) { 3484 break; 3485 } 3486 tokp = tokp->next; 3487 } 3488 if (tokp == NULL) { 3489 tmp_token->next = 3490 force_disconnect_list; 3491 force_disconnect_list = tmp_token; 3492 } else { 3493 kmem_free((void *)tmp_token, 3494 sizeof (*token)); 3495 } 3496 } 3497 3498 } else { 3499 prev_token = token; 3500 token = token->next; 3501 } 3502 } 3503 mutex_exit(&importer_list.lock); 3504 3505 token = force_disconnect_list; 3506 while (token != NULL) { 3507 if (token->importing_node == my_nodeid) { 3508 rsm_force_unload(ex_nodeid, ex_segid, 3509 DISCONNECT); 3510 } else { 3511 request.rsmipc_hdr.rsmipc_type = 3512 RSMIPC_MSG_DISCONNECT; 3513 request.rsmipc_key = token->key; 3514 for (;;) { 3515 if (rsmipc_send(token->importing_node, 3516 &request, 3517 RSM_NO_REPLY) == RSM_SUCCESS) { 3518 break; 3519 } else { 3520 delay(drv_usectohz(10000)); 3521 } 3522 } 3523 } 3524 tmp_token = token; 3525 token = token->next; 3526 kmem_free((void *)tmp_token, sizeof (*token)); 3527 } 3528 3529 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3530 "rsm_send_importer_disconnects done\n")); 3531 } 3532 3533 /* 3534 * This function is used as a callback for unlocking the pages locked 3535 * down by a process which then does a fork or an exec. 3536 * It marks the export segments corresponding to umem cookie given by 3537 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be 3538 * destroyed later when an rsm_close occurs). 3539 */ 3540 static void 3541 rsm_export_force_destroy(ddi_umem_cookie_t *ck) 3542 { 3543 rsmresource_blk_t *blk; 3544 rsmresource_t *p; 3545 rsmseg_t *eseg = NULL; 3546 int i, j; 3547 int found = 0; 3548 3549 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3550 "rsm_export_force_destroy enter\n")); 3551 3552 /* 3553 * Walk the resource list and locate the export segment (either 3554 * in the BIND or the EXPORT state) which corresponds to the 3555 * ddi_umem_cookie_t being freed up, and call rsmseg_close. 3556 * Change the state to ZOMBIE by calling rsmseg_close with the 3557 * force_flag argument (the second argument) set to 1. Also, 3558 * unpublish and unbind the segment, but don't free it. Free it 3559 * only on a rsm_close call for the segment. 3560 */ 3561 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 3562 3563 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 3564 blk = rsm_resource.rsmrc_root[i]; 3565 if (blk == NULL) { 3566 continue; 3567 } 3568 3569 for (j = 0; j < RSMRC_BLKSZ; j++) { 3570 p = blk->rsmrcblk_blks[j]; 3571 if ((p != NULL) && (p != RSMRC_RESERVED) && 3572 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) { 3573 eseg = (rsmseg_t *)p; 3574 if (eseg->s_cookie != ck) 3575 continue; /* continue searching */ 3576 /* 3577 * Found the segment, set flag to indicate 3578 * force destroy processing is in progress 3579 */ 3580 rsmseglock_acquire(eseg); 3581 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT; 3582 rsmseglock_release(eseg); 3583 found = 1; 3584 break; 3585 } 3586 } 3587 3588 if (found) 3589 break; 3590 } 3591 3592 rw_exit(&rsm_resource.rsmrc_lock); 3593 3594 if (found) { 3595 ASSERT(eseg != NULL); 3596 /* call rsmseg_close with force flag set to 1 */ 3597 rsmseg_close(eseg, 1); 3598 /* 3599 * force destroy processing done, clear flag and signal any 3600 * thread waiting in rsmseg_close. 3601 */ 3602 rsmseglock_acquire(eseg); 3603 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT; 3604 cv_broadcast(&eseg->s_cv); 3605 rsmseglock_release(eseg); 3606 } 3607 3608 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3609 "rsm_export_force_destroy done\n")); 3610 } 3611 3612 /* ******************************* Remote Calls *********************** */ 3613 static void 3614 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req) 3615 { 3616 rsmipc_reply_t reply; 3617 DBG_DEFINE(category, 3618 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3619 3620 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3621 "rsm_intr_segconnect enter\n")); 3622 3623 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply); 3624 3625 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 3626 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie; 3627 3628 (void) rsmipc_send(src, NULL, &reply); 3629 3630 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3631 "rsm_intr_segconnect done\n")); 3632 } 3633 3634 3635 /* 3636 * When an exported segment is unpublished the exporter sends an ipc 3637 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher 3638 * calls this function. The import list is scanned; segments which match the 3639 * exported segment id are unloaded and disconnected. 3640 * 3641 * Will also be called from rsm_rebind with disconnect_flag FALSE. 3642 * 3643 */ 3644 static void 3645 rsm_force_unload(rsm_node_id_t src_nodeid, 3646 rsm_memseg_id_t ex_segid, 3647 boolean_t disconnect_flag) 3648 3649 { 3650 rsmresource_t *p = NULL; 3651 rsmhash_table_t *rhash = &rsm_import_segs; 3652 uint_t index; 3653 DBG_DEFINE(category, 3654 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3655 3656 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n")); 3657 3658 index = rsmhash(ex_segid); 3659 3660 rw_enter(&rhash->rsmhash_rw, RW_READER); 3661 3662 p = rsmhash_getbkt(rhash, index); 3663 3664 for (; p; p = p->rsmrc_next) { 3665 rsmseg_t *seg = (rsmseg_t *)p; 3666 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) { 3667 /* 3668 * In order to make rsmseg_unload and rsm_force_unload 3669 * thread safe, acquire the segment lock here. 3670 * rsmseg_unload is responsible for releasing the lock. 3671 * rsmseg_unload releases the lock just before a call 3672 * to rsmipc_send or in case of an early exit which 3673 * occurs if the segment was in the state 3674 * RSM_STATE_CONNECTING or RSM_STATE_NEW. 3675 */ 3676 rsmseglock_acquire(seg); 3677 if (disconnect_flag) 3678 seg->s_flags |= RSM_FORCE_DISCONNECT; 3679 rsmseg_unload(seg); 3680 } 3681 } 3682 rw_exit(&rhash->rsmhash_rw); 3683 3684 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n")); 3685 } 3686 3687 static void 3688 rsm_intr_reply(rsmipc_msghdr_t *msg) 3689 { 3690 /* 3691 * Find slot for cookie in reply. 3692 * Match sequence with sequence in cookie 3693 * If no match; return 3694 * Try to grap lock of slot, if locked return 3695 * copy data into reply slot area 3696 * signal waiter 3697 */ 3698 rsmipc_slot_t *slot; 3699 rsmipc_cookie_t *cookie; 3700 void *data = (void *) msg; 3701 size_t size = sizeof (rsmipc_reply_t); 3702 DBG_DEFINE(category, 3703 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3704 3705 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n")); 3706 3707 cookie = &msg->rsmipc_cookie; 3708 if (cookie->ic.index >= RSMIPC_SZ) { 3709 DBG_PRINTF((category, RSM_ERR, 3710 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index)); 3711 return; 3712 } 3713 3714 ASSERT(cookie->ic.index < RSMIPC_SZ); 3715 slot = &rsm_ipc.slots[cookie->ic.index]; 3716 mutex_enter(&slot->rsmipc_lock); 3717 if (slot->rsmipc_cookie.value == cookie->value) { 3718 /* found a match */ 3719 if (RSMIPC_GET(slot, RSMIPC_PENDING)) { 3720 bcopy(data, slot->rsmipc_data, size); 3721 RSMIPC_CLEAR(slot, RSMIPC_PENDING); 3722 cv_signal(&slot->rsmipc_cv); 3723 } 3724 } else { 3725 DBG_PRINTF((category, RSM_DEBUG, 3726 "rsm: rsm_intr_reply mismatched reply %d\n", 3727 cookie->ic.index)); 3728 } 3729 mutex_exit(&slot->rsmipc_lock); 3730 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n")); 3731 } 3732 3733 /* 3734 * This function gets dispatched on the worker thread when we receive 3735 * the SQREADY message. This function sends the SQREADY_ACK message. 3736 */ 3737 static void 3738 rsm_sqready_ack_deferred(void *arg) 3739 { 3740 path_t *path = (path_t *)arg; 3741 DBG_DEFINE(category, 3742 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3743 3744 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3745 "rsm_sqready_ack_deferred enter\n")); 3746 3747 mutex_enter(&path->mutex); 3748 3749 /* 3750 * If path is not active no point in sending the ACK 3751 * because the whole SQREADY protocol will again start 3752 * when the path becomes active. 3753 */ 3754 if (path->state != RSMKA_PATH_ACTIVE) { 3755 /* 3756 * decrement the path refcnt incremented in rsm_proc_sqready 3757 */ 3758 PATH_RELE_NOLOCK(path); 3759 mutex_exit(&path->mutex); 3760 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3761 "rsm_sqready_ack_deferred done:!ACTIVE\n")); 3762 return; 3763 } 3764 3765 /* send an SQREADY_ACK message */ 3766 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK); 3767 3768 /* initialize credits to the max level */ 3769 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3770 3771 /* wake up any send that is waiting for credits */ 3772 cv_broadcast(&path->sendq_token.sendq_cv); 3773 3774 /* 3775 * decrement the path refcnt since we incremented it in 3776 * rsm_proc_sqready 3777 */ 3778 PATH_RELE_NOLOCK(path); 3779 3780 mutex_exit(&path->mutex); 3781 3782 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3783 "rsm_sqready_ack_deferred done\n")); 3784 } 3785 3786 /* 3787 * Process the SQREADY message 3788 */ 3789 static void 3790 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3791 rsm_intr_hand_arg_t arg) 3792 { 3793 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3794 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3795 path_t *path; 3796 DBG_DEFINE(category, 3797 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3798 3799 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n")); 3800 3801 /* look up the path - incr the path refcnt */ 3802 path = rsm_find_path(hdlr_argp->adapter_name, 3803 hdlr_argp->adapter_instance, src_hwaddr); 3804 3805 /* 3806 * No path exists or path is not active - drop the message 3807 */ 3808 if (path == NULL) { 3809 DBG_PRINTF((category, RSM_DEBUG, 3810 "rsm_proc_sqready done: msg dropped no path\n")); 3811 return; 3812 } 3813 3814 mutex_exit(&path->mutex); 3815 3816 /* drain any tasks from the previous incarnation */ 3817 taskq_wait(path->recv_taskq); 3818 3819 mutex_enter(&path->mutex); 3820 /* 3821 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK 3822 * in the meanwhile we received an SQREADY message, blindly reset 3823 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK 3824 * and forget about the SQREADY that we sent. 3825 */ 3826 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3827 3828 if (path->state != RSMKA_PATH_ACTIVE) { 3829 /* decr refcnt and drop the mutex */ 3830 PATH_RELE_NOLOCK(path); 3831 mutex_exit(&path->mutex); 3832 DBG_PRINTF((category, RSM_DEBUG, 3833 "rsm_proc_sqready done: msg dropped path !ACTIVE\n")); 3834 return; 3835 } 3836 3837 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx " 3838 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3839 3840 /* 3841 * The sender's local incarnation number is our remote incarnation 3842 * number save it in the path data structure 3843 */ 3844 path->remote_incn = msg->rsmipc_local_incn; 3845 path->sendq_token.msgbuf_avail = 0; 3846 path->procmsg_cnt = 0; 3847 3848 /* 3849 * path is active - dispatch task to send SQREADY_ACK - remember 3850 * RSMPI calls can't be done in interrupt context 3851 * 3852 * We can use the recv_taskq to send because the remote endpoint 3853 * cannot start sending messages till it receives SQREADY_ACK hence 3854 * at this point there are no tasks on recv_taskq. 3855 * 3856 * The path refcnt will be decremented in rsm_sqready_ack_deferred. 3857 */ 3858 (void) taskq_dispatch(path->recv_taskq, 3859 rsm_sqready_ack_deferred, path, KM_NOSLEEP); 3860 3861 mutex_exit(&path->mutex); 3862 3863 3864 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n")); 3865 } 3866 3867 /* 3868 * Process the SQREADY_ACK message 3869 */ 3870 static void 3871 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3872 rsm_intr_hand_arg_t arg) 3873 { 3874 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3875 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3876 path_t *path; 3877 DBG_DEFINE(category, 3878 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3879 3880 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3881 "rsm_proc_sqready_ack enter\n")); 3882 3883 /* look up the path - incr the path refcnt */ 3884 path = rsm_find_path(hdlr_argp->adapter_name, 3885 hdlr_argp->adapter_instance, src_hwaddr); 3886 3887 /* 3888 * drop the message if - no path exists or path is not active 3889 * or if its not waiting for SQREADY_ACK message 3890 */ 3891 if (path == NULL) { 3892 DBG_PRINTF((category, RSM_DEBUG, 3893 "rsm_proc_sqready_ack done: msg dropped no path\n")); 3894 return; 3895 } 3896 3897 if ((path->state != RSMKA_PATH_ACTIVE) || 3898 !(path->flags & RSMKA_WAIT_FOR_SQACK)) { 3899 /* decrement the refcnt */ 3900 PATH_RELE_NOLOCK(path); 3901 mutex_exit(&path->mutex); 3902 DBG_PRINTF((category, RSM_DEBUG, 3903 "rsm_proc_sqready_ack done: msg dropped\n")); 3904 return; 3905 } 3906 3907 /* 3908 * Check if this message is in response to the last RSMIPC_MSG_SQREADY 3909 * sent, if not drop it. 3910 */ 3911 if (path->local_incn != msghdr->rsmipc_incn) { 3912 /* decrement the refcnt */ 3913 PATH_RELE_NOLOCK(path); 3914 mutex_exit(&path->mutex); 3915 DBG_PRINTF((category, RSM_DEBUG, 3916 "rsm_proc_sqready_ack done: msg old incn %lld\n", 3917 msghdr->rsmipc_incn)); 3918 return; 3919 } 3920 3921 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx " 3922 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3923 3924 /* 3925 * clear the WAIT_FOR_SQACK flag since we have recvd the ack 3926 */ 3927 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3928 3929 /* save the remote sendq incn number */ 3930 path->remote_incn = msg->rsmipc_local_incn; 3931 3932 /* initialize credits to the max level */ 3933 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3934 3935 /* wake up any send that is waiting for credits */ 3936 cv_broadcast(&path->sendq_token.sendq_cv); 3937 3938 /* decrement the refcnt */ 3939 PATH_RELE_NOLOCK(path); 3940 3941 mutex_exit(&path->mutex); 3942 3943 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3944 "rsm_proc_sqready_ack done\n")); 3945 } 3946 3947 /* 3948 * process the RSMIPC_MSG_CREDIT message 3949 */ 3950 static void 3951 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3952 rsm_intr_hand_arg_t arg) 3953 { 3954 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3955 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3956 path_t *path; 3957 DBG_DEFINE(category, 3958 RSM_KERNEL_AGENT | RSM_FUNC_ALL | 3959 RSM_INTR_CALLBACK | RSM_FLOWCONTROL); 3960 3961 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n")); 3962 3963 /* look up the path - incr the path refcnt */ 3964 path = rsm_find_path(hdlr_argp->adapter_name, 3965 hdlr_argp->adapter_instance, src_hwaddr); 3966 3967 if (path == NULL) { 3968 DBG_PRINTF((category, RSM_DEBUG, 3969 "rsm_add_credits enter: path not found\n")); 3970 return; 3971 } 3972 3973 /* the path is not active - discard credits */ 3974 if (path->state != RSMKA_PATH_ACTIVE) { 3975 PATH_RELE_NOLOCK(path); 3976 mutex_exit(&path->mutex); 3977 DBG_PRINTF((category, RSM_DEBUG, 3978 "rsm_add_credits enter:path=%lx !ACTIVE\n", path)); 3979 return; 3980 } 3981 3982 /* 3983 * Check if these credits are for current incarnation of the path. 3984 */ 3985 if (path->local_incn != msghdr->rsmipc_incn) { 3986 /* decrement the refcnt */ 3987 PATH_RELE_NOLOCK(path); 3988 mutex_exit(&path->mutex); 3989 DBG_PRINTF((category, RSM_DEBUG, 3990 "rsm_add_credits enter: old incn %lld\n", 3991 msghdr->rsmipc_incn)); 3992 return; 3993 } 3994 3995 DBG_PRINTF((category, RSM_DEBUG, 3996 "rsm_add_credits:path=%lx new-creds=%d " 3997 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits, 3998 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src, 3999 src_hwaddr)); 4000 4001 4002 /* add credits to the path's sendq */ 4003 path->sendq_token.msgbuf_avail += msg->rsmipc_credits; 4004 4005 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES); 4006 4007 /* wake up any send that is waiting for credits */ 4008 cv_broadcast(&path->sendq_token.sendq_cv); 4009 4010 /* decrement the refcnt */ 4011 PATH_RELE_NOLOCK(path); 4012 4013 mutex_exit(&path->mutex); 4014 4015 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n")); 4016 } 4017 4018 static void 4019 rsm_intr_event(rsmipc_request_t *msg) 4020 { 4021 rsmseg_t *seg; 4022 rsmresource_t *p; 4023 rsm_node_id_t src_node; 4024 DBG_DEFINE(category, 4025 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4026 4027 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n")); 4028 4029 src_node = msg->rsmipc_hdr.rsmipc_src; 4030 4031 if ((seg = msg->rsmipc_segment_cookie) != NULL) { 4032 /* This is for an import segment */ 4033 uint_t hashval = rsmhash(msg->rsmipc_key); 4034 4035 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4036 4037 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4038 4039 for (; p; p = p->rsmrc_next) { 4040 if ((p->rsmrc_key == msg->rsmipc_key) && 4041 (p->rsmrc_node == src_node)) { 4042 seg = (rsmseg_t *)p; 4043 rsmseglock_acquire(seg); 4044 4045 atomic_add_32(&seg->s_pollevent, 1); 4046 4047 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4048 pollwakeup(&seg->s_poll, POLLRDNORM); 4049 4050 rsmseglock_release(seg); 4051 } 4052 } 4053 4054 rw_exit(&rsm_import_segs.rsmhash_rw); 4055 } else { 4056 /* This is for an export segment */ 4057 seg = rsmexport_lookup(msg->rsmipc_key); 4058 if (!seg) { 4059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4060 "rsm_intr_event done: exp seg not found\n")); 4061 return; 4062 } 4063 4064 ASSERT(rsmseglock_held(seg)); 4065 4066 atomic_add_32(&seg->s_pollevent, 1); 4067 4068 /* 4069 * We must hold the segment lock here, or else the segment 4070 * can be freed while pollwakeup is using it. This implies 4071 * that we MUST NOT grab the segment lock during rsm_chpoll, 4072 * as outlined in the chpoll(2) man page. 4073 */ 4074 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4075 pollwakeup(&seg->s_poll, POLLRDNORM); 4076 4077 rsmseglock_release(seg); 4078 } 4079 4080 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n")); 4081 } 4082 4083 /* 4084 * The exporter did a republish and changed the ACL - this change is only 4085 * visible to new importers. 4086 */ 4087 static void 4088 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key, 4089 rsm_permission_t perm) 4090 { 4091 4092 rsmresource_t *p; 4093 rsmseg_t *seg; 4094 uint_t hashval = rsmhash(key); 4095 DBG_DEFINE(category, 4096 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4097 4098 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n")); 4099 4100 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4101 4102 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4103 4104 for (; p; p = p->rsmrc_next) { 4105 /* 4106 * find the importer and update the permission in the shared 4107 * data structure. Any new importers will use the new perms 4108 */ 4109 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) { 4110 seg = (rsmseg_t *)p; 4111 4112 rsmseglock_acquire(seg); 4113 rsmsharelock_acquire(seg); 4114 seg->s_share->rsmsi_mode = perm; 4115 rsmsharelock_release(seg); 4116 rsmseglock_release(seg); 4117 4118 break; 4119 } 4120 } 4121 4122 rw_exit(&rsm_import_segs.rsmhash_rw); 4123 4124 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n")); 4125 } 4126 4127 void 4128 rsm_suspend_complete(rsm_node_id_t src_node, int flag) 4129 { 4130 int done = 1; /* indicate all SUSPENDS have been acked */ 4131 list_element_t *elem; 4132 DBG_DEFINE(category, 4133 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4134 4135 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4136 "rsm_suspend_complete enter\n")); 4137 4138 mutex_enter(&rsm_suspend_list.list_lock); 4139 4140 if (rsm_suspend_list.list_head == NULL) { 4141 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4142 "rsm_suspend_complete done: suspend_list is empty\n")); 4143 mutex_exit(&rsm_suspend_list.list_lock); 4144 return; 4145 } 4146 4147 elem = rsm_suspend_list.list_head; 4148 while (elem != NULL) { 4149 if (elem->nodeid == src_node) { 4150 /* clear the pending flag for the node */ 4151 elem->flags &= ~RSM_SUSPEND_ACKPENDING; 4152 elem->flags |= flag; 4153 } 4154 4155 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING)) 4156 done = 0; /* still some nodes have not yet ACKED */ 4157 4158 elem = elem->next; 4159 } 4160 4161 mutex_exit(&rsm_suspend_list.list_lock); 4162 4163 if (!done) { 4164 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4165 "rsm_suspend_complete done: acks pending\n")); 4166 return; 4167 } 4168 /* 4169 * Now that we are done with suspending all the remote importers 4170 * time to quiesce the local exporters 4171 */ 4172 exporter_quiesce(); 4173 4174 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4175 "rsm_suspend_complete done\n")); 4176 } 4177 4178 static void 4179 exporter_quiesce() 4180 { 4181 int i, e; 4182 rsmresource_t *current; 4183 rsmseg_t *seg; 4184 adapter_t *adapter; 4185 DBG_DEFINE(category, 4186 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4187 4188 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n")); 4189 /* 4190 * The importers send a SUSPEND_COMPLETE to the exporter node 4191 * Unpublish, unbind the export segment and 4192 * move the segments to the EXPORT_QUIESCED state 4193 */ 4194 4195 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER); 4196 4197 for (i = 0; i < rsm_hash_size; i++) { 4198 current = rsm_export_segs.bucket[i]; 4199 while (current != NULL) { 4200 seg = (rsmseg_t *)current; 4201 rsmseglock_acquire(seg); 4202 if (current->rsmrc_state == 4203 RSM_STATE_EXPORT_QUIESCING) { 4204 adapter = seg->s_adapter; 4205 /* 4206 * some local memory handles are not published 4207 * check if it was published 4208 */ 4209 if ((seg->s_acl == NULL) || 4210 (seg->s_acl[0].ae_node != my_nodeid) || 4211 (seg->s_acl[0].ae_permission != 0)) { 4212 4213 e = adapter->rsmpi_ops->rsm_unpublish( 4214 seg->s_handle.out); 4215 DBG_PRINTF((category, RSM_DEBUG, 4216 "exporter_quiesce:unpub %d\n", e)); 4217 4218 e = adapter->rsmpi_ops->rsm_seg_destroy( 4219 seg->s_handle.out); 4220 4221 DBG_PRINTF((category, RSM_DEBUG, 4222 "exporter_quiesce:destroy %d\n", 4223 e)); 4224 } 4225 4226 (void) rsm_unbind_pages(seg); 4227 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 4228 cv_broadcast(&seg->s_cv); 4229 } 4230 rsmseglock_release(seg); 4231 current = current->rsmrc_next; 4232 } 4233 } 4234 rw_exit(&rsm_export_segs.rsmhash_rw); 4235 4236 /* 4237 * All the local segments we are done with the pre-del processing 4238 * - time to move to PREDEL_COMPLETED. 4239 */ 4240 4241 mutex_enter(&rsm_drv_data.drv_lock); 4242 4243 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED); 4244 4245 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED; 4246 4247 cv_broadcast(&rsm_drv_data.drv_cv); 4248 4249 mutex_exit(&rsm_drv_data.drv_lock); 4250 4251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n")); 4252 } 4253 4254 static void 4255 importer_suspend(rsm_node_id_t src_node) 4256 { 4257 int i; 4258 int susp_flg; /* true means already suspended */ 4259 int num_importers; 4260 rsmresource_t *p = NULL, *curp; 4261 rsmhash_table_t *rhash = &rsm_import_segs; 4262 rsmseg_t *seg; 4263 rsmipc_request_t request; 4264 DBG_DEFINE(category, 4265 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4266 4267 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n")); 4268 4269 rw_enter(&rhash->rsmhash_rw, RW_READER); 4270 for (i = 0; i < rsm_hash_size; i++) { 4271 p = rhash->bucket[i]; 4272 4273 /* 4274 * Suspend all importers with same <node, key> pair. 4275 * After the last one of the shared importers has been 4276 * suspended - suspend the shared mappings/connection. 4277 */ 4278 for (; p; p = p->rsmrc_next) { 4279 rsmseg_t *first = (rsmseg_t *)p; 4280 if ((first->s_node != src_node) || 4281 (first->s_state == RSM_STATE_DISCONNECT)) 4282 continue; /* go to next entry */ 4283 /* 4284 * search the rest of the bucket for 4285 * other siblings (imprtrs with the same key) 4286 * of "first" and suspend them. 4287 * All importers with same key fall in 4288 * the same bucket. 4289 */ 4290 num_importers = 0; 4291 for (curp = p; curp; curp = curp->rsmrc_next) { 4292 seg = (rsmseg_t *)curp; 4293 4294 rsmseglock_acquire(seg); 4295 4296 if ((seg->s_node != first->s_node) || 4297 (seg->s_key != first->s_key) || 4298 (seg->s_state == RSM_STATE_DISCONNECT)) { 4299 /* 4300 * either not a peer segment or its a 4301 * disconnected segment - skip it 4302 */ 4303 rsmseglock_release(seg); 4304 continue; 4305 } 4306 4307 rsmseg_suspend(seg, &susp_flg); 4308 4309 if (susp_flg) { /* seg already suspended */ 4310 rsmseglock_release(seg); 4311 break; /* the inner for loop */ 4312 } 4313 4314 num_importers++; 4315 rsmsharelock_acquire(seg); 4316 /* 4317 * we've processed all importers that are 4318 * siblings of "first" 4319 */ 4320 if (num_importers == 4321 seg->s_share->rsmsi_refcnt) { 4322 rsmsharelock_release(seg); 4323 rsmseglock_release(seg); 4324 break; 4325 } 4326 rsmsharelock_release(seg); 4327 rsmseglock_release(seg); 4328 } 4329 4330 /* 4331 * All the importers with the same key and 4332 * nodeid as "first" have been suspended. 4333 * Now suspend the shared connect/mapping. 4334 * This is done only once. 4335 */ 4336 if (!susp_flg) { 4337 rsmsegshare_suspend(seg); 4338 } 4339 } 4340 } 4341 4342 rw_exit(&rhash->rsmhash_rw); 4343 4344 /* send an ACK for SUSPEND message */ 4345 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE; 4346 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY); 4347 4348 4349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n")); 4350 4351 } 4352 4353 static void 4354 rsmseg_suspend(rsmseg_t *seg, int *susp_flg) 4355 { 4356 int recheck_state; 4357 rsmcookie_t *hdl; 4358 DBG_DEFINE(category, 4359 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4360 4361 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4362 "rsmseg_suspend enter: key=%u\n", seg->s_key)); 4363 4364 *susp_flg = 0; 4365 4366 ASSERT(rsmseglock_held(seg)); 4367 /* wait if putv/getv is in progress */ 4368 while (seg->s_rdmacnt > 0) 4369 cv_wait(&seg->s_cv, &seg->s_lock); 4370 4371 do { 4372 recheck_state = 0; 4373 4374 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4375 "rsmseg_suspend:segment %x state=%d\n", 4376 seg->s_key, seg->s_state)); 4377 4378 switch (seg->s_state) { 4379 case RSM_STATE_NEW: 4380 /* not a valid state */ 4381 break; 4382 case RSM_STATE_CONNECTING: 4383 seg->s_state = RSM_STATE_ABORT_CONNECT; 4384 break; 4385 case RSM_STATE_ABORT_CONNECT: 4386 break; 4387 case RSM_STATE_CONNECT: 4388 seg->s_handle.in = NULL; 4389 seg->s_state = RSM_STATE_CONN_QUIESCE; 4390 break; 4391 case RSM_STATE_MAPPING: 4392 /* wait until segment leaves the mapping state */ 4393 while (seg->s_state == RSM_STATE_MAPPING) 4394 cv_wait(&seg->s_cv, &seg->s_lock); 4395 recheck_state = 1; 4396 break; 4397 case RSM_STATE_ACTIVE: 4398 /* unload the mappings */ 4399 if (seg->s_ckl != NULL) { 4400 hdl = seg->s_ckl; 4401 for (; hdl != NULL; hdl = hdl->c_next) { 4402 (void) devmap_unload(hdl->c_dhp, 4403 hdl->c_off, hdl->c_len); 4404 } 4405 } 4406 seg->s_mapinfo = NULL; 4407 seg->s_state = RSM_STATE_MAP_QUIESCE; 4408 break; 4409 case RSM_STATE_CONN_QUIESCE: 4410 /* FALLTHRU */ 4411 case RSM_STATE_MAP_QUIESCE: 4412 /* rsmseg_suspend already done for seg */ 4413 *susp_flg = 1; 4414 break; 4415 case RSM_STATE_DISCONNECT: 4416 break; 4417 default: 4418 ASSERT(0); /* invalid state */ 4419 } 4420 } while (recheck_state); 4421 4422 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n")); 4423 } 4424 4425 static void 4426 rsmsegshare_suspend(rsmseg_t *seg) 4427 { 4428 int e; 4429 adapter_t *adapter; 4430 rsm_import_share_t *sharedp; 4431 DBG_DEFINE(category, 4432 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4433 4434 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4435 "rsmsegshare_suspend enter\n")); 4436 4437 rsmseglock_acquire(seg); 4438 rsmsharelock_acquire(seg); 4439 4440 sharedp = seg->s_share; 4441 adapter = seg->s_adapter; 4442 switch (sharedp->rsmsi_state) { 4443 case RSMSI_STATE_NEW: 4444 break; 4445 case RSMSI_STATE_CONNECTING: 4446 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 4447 break; 4448 case RSMSI_STATE_ABORT_CONNECT: 4449 break; 4450 case RSMSI_STATE_CONNECTED: 4451 /* do the rsmpi disconnect */ 4452 if (sharedp->rsmsi_node != my_nodeid) { 4453 e = adapter->rsmpi_ops-> 4454 rsm_disconnect(sharedp->rsmsi_handle); 4455 4456 DBG_PRINTF((category, RSM_DEBUG, 4457 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4458 sharedp->rsmsi_segid, e)); 4459 } 4460 4461 sharedp->rsmsi_handle = NULL; 4462 4463 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 4464 break; 4465 case RSMSI_STATE_CONN_QUIESCE: 4466 break; 4467 case RSMSI_STATE_MAPPED: 4468 /* do the rsmpi unmap and disconnect */ 4469 if (sharedp->rsmsi_node != my_nodeid) { 4470 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in); 4471 4472 DBG_PRINTF((category, RSM_DEBUG, 4473 "rsmshare_suspend: rsmpi unmap %d\n", e)); 4474 4475 e = adapter->rsmpi_ops-> 4476 rsm_disconnect(sharedp->rsmsi_handle); 4477 DBG_PRINTF((category, RSM_DEBUG, 4478 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4479 sharedp->rsmsi_segid, e)); 4480 } 4481 4482 sharedp->rsmsi_handle = NULL; 4483 4484 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE; 4485 break; 4486 case RSMSI_STATE_MAP_QUIESCE: 4487 break; 4488 case RSMSI_STATE_DISCONNECTED: 4489 break; 4490 default: 4491 ASSERT(0); /* invalid state */ 4492 } 4493 4494 rsmsharelock_release(seg); 4495 rsmseglock_release(seg); 4496 4497 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4498 "rsmsegshare_suspend done\n")); 4499 } 4500 4501 /* 4502 * This should get called on receiving a RESUME message or from 4503 * the pathmanger if the node undergoing DR dies. 4504 */ 4505 static void 4506 importer_resume(rsm_node_id_t src_node) 4507 { 4508 int i; 4509 rsmresource_t *p = NULL; 4510 rsmhash_table_t *rhash = &rsm_import_segs; 4511 void *cookie; 4512 DBG_DEFINE(category, 4513 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4514 4515 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n")); 4516 4517 rw_enter(&rhash->rsmhash_rw, RW_READER); 4518 4519 for (i = 0; i < rsm_hash_size; i++) { 4520 p = rhash->bucket[i]; 4521 4522 for (; p; p = p->rsmrc_next) { 4523 rsmseg_t *seg = (rsmseg_t *)p; 4524 4525 rsmseglock_acquire(seg); 4526 4527 /* process only importers of node undergoing DR */ 4528 if (seg->s_node != src_node) { 4529 rsmseglock_release(seg); 4530 continue; 4531 } 4532 4533 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) { 4534 rsmipc_request_t request; 4535 /* 4536 * rsmpi map/connect failed 4537 * inform the exporter so that it can 4538 * remove the importer. 4539 */ 4540 request.rsmipc_hdr.rsmipc_type = 4541 RSMIPC_MSG_NOTIMPORTING; 4542 request.rsmipc_key = seg->s_segid; 4543 request.rsmipc_segment_cookie = cookie; 4544 rsmseglock_release(seg); 4545 (void) rsmipc_send(seg->s_node, &request, 4546 RSM_NO_REPLY); 4547 } else { 4548 rsmseglock_release(seg); 4549 } 4550 } 4551 } 4552 4553 rw_exit(&rhash->rsmhash_rw); 4554 4555 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n")); 4556 } 4557 4558 static int 4559 rsmseg_resume(rsmseg_t *seg, void **cookie) 4560 { 4561 int e; 4562 int retc; 4563 off_t dev_offset; 4564 size_t maplen; 4565 uint_t maxprot; 4566 rsm_mapinfo_t *p; 4567 rsmcookie_t *hdl; 4568 rsm_import_share_t *sharedp; 4569 DBG_DEFINE(category, 4570 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4571 4572 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4573 "rsmseg_resume enter: key=%u\n", seg->s_key)); 4574 4575 *cookie = NULL; 4576 4577 ASSERT(rsmseglock_held(seg)); 4578 4579 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) && 4580 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 4581 return (RSM_SUCCESS); 4582 } 4583 4584 sharedp = seg->s_share; 4585 4586 rsmsharelock_acquire(seg); 4587 4588 /* resume the shared connection and/or mapping */ 4589 retc = rsmsegshare_resume(seg); 4590 4591 if (seg->s_state == RSM_STATE_CONN_QUIESCE) { 4592 /* shared state can either be connected or mapped */ 4593 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) || 4594 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) { 4595 ASSERT(retc == RSM_SUCCESS); 4596 seg->s_handle.in = sharedp->rsmsi_handle; 4597 rsmsharelock_release(seg); 4598 seg->s_state = RSM_STATE_CONNECT; 4599 4600 } else { /* error in rsmpi connect during resume */ 4601 seg->s_handle.in = NULL; 4602 seg->s_state = RSM_STATE_DISCONNECT; 4603 4604 sharedp->rsmsi_refcnt--; 4605 cookie = (void *)sharedp->rsmsi_cookie; 4606 4607 if (sharedp->rsmsi_refcnt == 0) { 4608 ASSERT(sharedp->rsmsi_mapcnt == 0); 4609 rsmsharelock_release(seg); 4610 4611 /* clean up the shared data structure */ 4612 mutex_destroy(&sharedp->rsmsi_lock); 4613 cv_destroy(&sharedp->rsmsi_cv); 4614 kmem_free((void *)(sharedp), 4615 sizeof (rsm_import_share_t)); 4616 4617 } else { 4618 rsmsharelock_release(seg); 4619 } 4620 /* 4621 * The following needs to be done after any 4622 * rsmsharelock calls which use seg->s_share. 4623 */ 4624 seg->s_share = NULL; 4625 } 4626 4627 /* signal any waiting segment */ 4628 cv_broadcast(&seg->s_cv); 4629 4630 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4631 "rsmseg_resume done:state=%d\n", seg->s_state)); 4632 return (retc); 4633 } 4634 4635 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE); 4636 4637 /* Setup protections for remap */ 4638 maxprot = PROT_USER; 4639 if (seg->s_mode & RSM_PERM_READ) { 4640 maxprot |= PROT_READ; 4641 } 4642 if (seg->s_mode & RSM_PERM_WRITE) { 4643 maxprot |= PROT_WRITE; 4644 } 4645 4646 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) { 4647 /* error in rsmpi connect or map during resume */ 4648 4649 /* remap to trash page */ 4650 ASSERT(seg->s_ckl != NULL); 4651 4652 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4653 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 4654 remap_cookie, hdl->c_off, hdl->c_len, 4655 maxprot, 0, NULL); 4656 4657 DBG_PRINTF((category, RSM_ERR, 4658 "rsmseg_resume:remap=%d\n", e)); 4659 } 4660 4661 seg->s_handle.in = NULL; 4662 seg->s_state = RSM_STATE_DISCONNECT; 4663 4664 sharedp->rsmsi_refcnt--; 4665 4666 sharedp->rsmsi_mapcnt--; 4667 seg->s_mapinfo = NULL; 4668 4669 if (sharedp->rsmsi_refcnt == 0) { 4670 ASSERT(sharedp->rsmsi_mapcnt == 0); 4671 rsmsharelock_release(seg); 4672 4673 /* clean up the shared data structure */ 4674 mutex_destroy(&sharedp->rsmsi_lock); 4675 cv_destroy(&sharedp->rsmsi_cv); 4676 kmem_free((void *)(sharedp), 4677 sizeof (rsm_import_share_t)); 4678 4679 } else { 4680 rsmsharelock_release(seg); 4681 } 4682 /* 4683 * The following needs to be done after any 4684 * rsmsharelock calls which use seg->s_share. 4685 */ 4686 seg->s_share = NULL; 4687 4688 /* signal any waiting segment */ 4689 cv_broadcast(&seg->s_cv); 4690 4691 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4692 "rsmseg_resume done:seg=%x,err=%d\n", 4693 seg->s_key, retc)); 4694 return (retc); 4695 4696 } 4697 4698 seg->s_handle.in = sharedp->rsmsi_handle; 4699 4700 if (seg->s_node == my_nodeid) { /* loopback */ 4701 ASSERT(seg->s_mapinfo == NULL); 4702 4703 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4704 e = devmap_umem_remap(hdl->c_dhp, 4705 rsm_dip, seg->s_cookie, 4706 hdl->c_off, hdl->c_len, 4707 maxprot, 0, NULL); 4708 4709 DBG_PRINTF((category, RSM_ERR, 4710 "rsmseg_resume:remap=%d\n", e)); 4711 } 4712 } else { /* remote exporter */ 4713 /* remap to the new rsmpi maps */ 4714 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 4715 4716 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4717 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len, 4718 &dev_offset, &maplen); 4719 e = devmap_devmem_remap(hdl->c_dhp, 4720 p->dip, p->dev_register, dev_offset, 4721 maplen, maxprot, 0, NULL); 4722 4723 DBG_PRINTF((category, RSM_ERR, 4724 "rsmseg_resume:remap=%d\n", e)); 4725 } 4726 } 4727 4728 rsmsharelock_release(seg); 4729 4730 seg->s_state = RSM_STATE_ACTIVE; 4731 cv_broadcast(&seg->s_cv); 4732 4733 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n")); 4734 4735 return (retc); 4736 } 4737 4738 static int 4739 rsmsegshare_resume(rsmseg_t *seg) 4740 { 4741 int e = RSM_SUCCESS; 4742 adapter_t *adapter; 4743 rsm_import_share_t *sharedp; 4744 DBG_DEFINE(category, 4745 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4746 4747 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n")); 4748 4749 ASSERT(rsmseglock_held(seg)); 4750 ASSERT(rsmsharelock_held(seg)); 4751 4752 sharedp = seg->s_share; 4753 4754 /* 4755 * If we are not in a xxxx_QUIESCE state that means shared 4756 * connect/mapping processing has been already been done 4757 * so return success. 4758 */ 4759 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) && 4760 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) { 4761 return (RSM_SUCCESS); 4762 } 4763 4764 adapter = seg->s_adapter; 4765 4766 if (sharedp->rsmsi_node != my_nodeid) { 4767 rsm_addr_t hwaddr; 4768 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node); 4769 4770 e = adapter->rsmpi_ops->rsm_connect( 4771 adapter->rsmpi_handle, hwaddr, 4772 sharedp->rsmsi_segid, &sharedp->rsmsi_handle); 4773 4774 DBG_PRINTF((category, RSM_DEBUG, 4775 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n", 4776 sharedp->rsmsi_segid, e)); 4777 4778 if (e != RSM_SUCCESS) { 4779 /* when do we send the NOT_IMPORTING message */ 4780 sharedp->rsmsi_handle = NULL; 4781 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4782 /* signal any waiting segment */ 4783 cv_broadcast(&sharedp->rsmsi_cv); 4784 return (e); 4785 } 4786 } 4787 4788 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) { 4789 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 4790 /* signal any waiting segment */ 4791 cv_broadcast(&sharedp->rsmsi_cv); 4792 return (e); 4793 } 4794 4795 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 4796 4797 /* do the rsmpi map of the whole segment here */ 4798 if (sharedp->rsmsi_node != my_nodeid) { 4799 size_t mapped_len; 4800 rsm_mapinfo_t *p; 4801 4802 /* 4803 * We need to do rsmpi maps with <off, lens> identical to 4804 * the old mapinfo list because the segment mapping handles 4805 * dhp and such need the fragmentation of rsmpi maps to be 4806 * identical to what it was during the mmap of the segment 4807 */ 4808 p = sharedp->rsmsi_mapinfo; 4809 4810 while (p != NULL) { 4811 mapped_len = 0; 4812 4813 e = adapter->rsmpi_ops->rsm_map( 4814 sharedp->rsmsi_handle, p->start_offset, 4815 p->individual_len, &mapped_len, 4816 &p->dip, &p->dev_register, &p->dev_offset, 4817 NULL, NULL); 4818 4819 if (e != 0) { 4820 DBG_PRINTF((category, RSM_ERR, 4821 "rsmsegshare_resume: rsmpi map err=%d\n", 4822 e)); 4823 break; 4824 } 4825 4826 if (mapped_len != p->individual_len) { 4827 DBG_PRINTF((category, RSM_ERR, 4828 "rsmsegshare_resume: rsmpi maplen" 4829 "< reqlen=%lx\n", mapped_len)); 4830 e = RSMERR_BAD_LENGTH; 4831 break; 4832 } 4833 4834 p = p->next; 4835 4836 } 4837 4838 4839 if (e != RSM_SUCCESS) { /* rsmpi map failed */ 4840 int err; 4841 /* Check if this is the first rsm_map */ 4842 if (p != sharedp->rsmsi_mapinfo) { 4843 /* 4844 * A single rsm_unmap undoes multiple rsm_maps. 4845 */ 4846 (void) seg->s_adapter->rsmpi_ops-> 4847 rsm_unmap(sharedp->rsmsi_handle); 4848 } 4849 4850 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 4851 sharedp->rsmsi_mapinfo = NULL; 4852 4853 err = adapter->rsmpi_ops-> 4854 rsm_disconnect(sharedp->rsmsi_handle); 4855 4856 DBG_PRINTF((category, RSM_DEBUG, 4857 "rsmsegshare_resume:disconn seg=%x:err=%d\n", 4858 sharedp->rsmsi_segid, err)); 4859 4860 sharedp->rsmsi_handle = NULL; 4861 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4862 4863 /* signal the waiting segments */ 4864 cv_broadcast(&sharedp->rsmsi_cv); 4865 DBG_PRINTF((category, RSM_DEBUG, 4866 "rsmsegshare_resume done: rsmpi map err\n")); 4867 return (e); 4868 } 4869 } 4870 4871 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 4872 4873 /* signal any waiting segment */ 4874 cv_broadcast(&sharedp->rsmsi_cv); 4875 4876 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n")); 4877 4878 return (e); 4879 } 4880 4881 /* 4882 * this is the routine that gets called by recv_taskq which is the 4883 * thread that processes messages that are flow-controlled. 4884 */ 4885 static void 4886 rsm_intr_proc_deferred(void *arg) 4887 { 4888 path_t *path = (path_t *)arg; 4889 rsmipc_request_t *msg; 4890 rsmipc_msghdr_t *msghdr; 4891 rsm_node_id_t src_node; 4892 msgbuf_elem_t *head; 4893 int e; 4894 DBG_DEFINE(category, 4895 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4896 4897 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4898 "rsm_intr_proc_deferred enter\n")); 4899 4900 mutex_enter(&path->mutex); 4901 4902 /* use the head of the msgbuf_queue */ 4903 head = rsmka_gethead_msgbuf(path); 4904 4905 mutex_exit(&path->mutex); 4906 4907 msg = (rsmipc_request_t *)&(head->msg); 4908 msghdr = (rsmipc_msghdr_t *)msg; 4909 4910 src_node = msghdr->rsmipc_src; 4911 4912 /* 4913 * messages that need to send a reply should check the message version 4914 * before processing the message. And all messages that need to 4915 * send a reply should be processed here by the worker thread. 4916 */ 4917 switch (msghdr->rsmipc_type) { 4918 case RSMIPC_MSG_SEGCONNECT: 4919 if (msghdr->rsmipc_version != RSM_VERSION) { 4920 rsmipc_reply_t reply; 4921 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION; 4922 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 4923 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie; 4924 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply); 4925 } else { 4926 rsm_intr_segconnect(src_node, msg); 4927 } 4928 break; 4929 case RSMIPC_MSG_DISCONNECT: 4930 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT); 4931 break; 4932 case RSMIPC_MSG_SUSPEND: 4933 importer_suspend(src_node); 4934 break; 4935 case RSMIPC_MSG_SUSPEND_DONE: 4936 rsm_suspend_complete(src_node, 0); 4937 break; 4938 case RSMIPC_MSG_RESUME: 4939 importer_resume(src_node); 4940 break; 4941 default: 4942 ASSERT(0); 4943 } 4944 4945 mutex_enter(&path->mutex); 4946 4947 rsmka_dequeue_msgbuf(path); 4948 4949 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */ 4950 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES) 4951 path->procmsg_cnt++; 4952 4953 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES); 4954 4955 /* No need to send credits if path is going down */ 4956 if ((path->state == RSMKA_PATH_ACTIVE) && 4957 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) { 4958 /* 4959 * send credits and reset procmsg_cnt if success otherwise 4960 * credits will be sent after processing the next message 4961 */ 4962 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT); 4963 if (e == 0) 4964 path->procmsg_cnt = 0; 4965 else 4966 DBG_PRINTF((category, RSM_ERR, 4967 "rsm_intr_proc_deferred:send credits err=%d\n", e)); 4968 } 4969 4970 /* 4971 * decrement the path refcnt since we incremented it in 4972 * rsm_intr_callback_dispatch 4973 */ 4974 PATH_RELE_NOLOCK(path); 4975 4976 mutex_exit(&path->mutex); 4977 4978 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4979 "rsm_intr_proc_deferred done\n")); 4980 } 4981 4982 /* 4983 * Flow-controlled messages are enqueued and dispatched onto a taskq here 4984 */ 4985 static void 4986 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr, 4987 rsm_intr_hand_arg_t arg) 4988 { 4989 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 4990 path_t *path; 4991 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 4992 DBG_DEFINE(category, 4993 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4994 4995 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4996 "rsm_intr_callback_dispatch enter\n")); 4997 ASSERT(data && hdlr_argp); 4998 4999 /* look up the path - incr the path refcnt */ 5000 path = rsm_find_path(hdlr_argp->adapter_name, 5001 hdlr_argp->adapter_instance, src_hwaddr); 5002 5003 /* the path has been removed - drop this message */ 5004 if (path == NULL) { 5005 DBG_PRINTF((category, RSM_DEBUG, 5006 "rsm_intr_callback_dispatch done: msg dropped\n")); 5007 return; 5008 } 5009 /* the path is not active - don't accept new messages */ 5010 if (path->state != RSMKA_PATH_ACTIVE) { 5011 PATH_RELE_NOLOCK(path); 5012 mutex_exit(&path->mutex); 5013 DBG_PRINTF((category, RSM_DEBUG, 5014 "rsm_intr_callback_dispatch done: msg dropped" 5015 " path=%lx !ACTIVE\n", path)); 5016 return; 5017 } 5018 5019 /* 5020 * Check if this message was sent to an older incarnation 5021 * of the path/sendq. 5022 */ 5023 if (path->local_incn != msghdr->rsmipc_incn) { 5024 /* decrement the refcnt */ 5025 PATH_RELE_NOLOCK(path); 5026 mutex_exit(&path->mutex); 5027 DBG_PRINTF((category, RSM_DEBUG, 5028 "rsm_intr_callback_dispatch done: old incn %lld\n", 5029 msghdr->rsmipc_incn)); 5030 return; 5031 } 5032 5033 /* copy and enqueue msg on the path's msgbuf queue */ 5034 rsmka_enqueue_msgbuf(path, data); 5035 5036 /* 5037 * schedule task to process messages - ignore retval from 5038 * task_dispatch because we sender cannot send more than 5039 * what receiver can handle. 5040 */ 5041 (void) taskq_dispatch(path->recv_taskq, 5042 rsm_intr_proc_deferred, path, KM_NOSLEEP); 5043 5044 mutex_exit(&path->mutex); 5045 5046 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5047 "rsm_intr_callback_dispatch done\n")); 5048 } 5049 5050 /* 5051 * This procedure is called from rsm_srv_func when a remote node creates a 5052 * a send queue. This event is used as a hint that an earlier failed 5053 * attempt to create a send queue to that remote node may now succeed and 5054 * should be retried. Indication of an earlier failed attempt is provided 5055 * by the RSMKA_SQCREATE_PENDING flag. 5056 */ 5057 static void 5058 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5059 { 5060 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 5061 path_t *path; 5062 DBG_DEFINE(category, 5063 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5064 5065 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5066 "rsm_sqcreateop_callback enter\n")); 5067 5068 /* look up the path - incr the path refcnt */ 5069 path = rsm_find_path(hdlr_argp->adapter_name, 5070 hdlr_argp->adapter_instance, src_hwaddr); 5071 5072 if (path == NULL) { 5073 DBG_PRINTF((category, RSM_DEBUG, 5074 "rsm_sqcreateop_callback done: no path\n")); 5075 return; 5076 } 5077 5078 if ((path->state == RSMKA_PATH_UP) && 5079 (path->flags & RSMKA_SQCREATE_PENDING)) { 5080 /* 5081 * previous attempt to create sendq had failed, retry 5082 * it and move to RSMKA_PATH_ACTIVE state if successful. 5083 * the refcnt will be decremented in the do_deferred_work 5084 */ 5085 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP); 5086 } else { 5087 /* decrement the refcnt */ 5088 PATH_RELE_NOLOCK(path); 5089 } 5090 mutex_exit(&path->mutex); 5091 5092 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5093 "rsm_sqcreateop_callback done\n")); 5094 } 5095 5096 static void 5097 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5098 { 5099 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 5100 rsmipc_request_t *msg = (rsmipc_request_t *)data; 5101 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data; 5102 rsm_node_id_t src_node; 5103 DBG_DEFINE(category, 5104 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5105 5106 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:" 5107 "src=%d, type=%d\n", msghdr->rsmipc_src, 5108 msghdr->rsmipc_type)); 5109 5110 /* 5111 * Check for the version number in the msg header. If it is not 5112 * RSM_VERSION, drop the message. In the future, we need to manage 5113 * incompatible version numbers in some way 5114 */ 5115 if (msghdr->rsmipc_version != RSM_VERSION) { 5116 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n")); 5117 /* 5118 * Drop requests that don't have a reply right here 5119 * Request with reply will send a BAD_VERSION reply 5120 * when they get processed by the worker thread. 5121 */ 5122 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) { 5123 return; 5124 } 5125 5126 } 5127 5128 src_node = msghdr->rsmipc_src; 5129 5130 switch (msghdr->rsmipc_type) { 5131 case RSMIPC_MSG_SEGCONNECT: 5132 case RSMIPC_MSG_DISCONNECT: 5133 case RSMIPC_MSG_SUSPEND: 5134 case RSMIPC_MSG_SUSPEND_DONE: 5135 case RSMIPC_MSG_RESUME: 5136 /* 5137 * These message types are handled by a worker thread using 5138 * the flow-control algorithm. 5139 * Any message processing that does one or more of the 5140 * following should be handled in a worker thread. 5141 * - allocates resources and might sleep 5142 * - makes RSMPI calls down to the interconnect driver 5143 * this by defn include requests with reply. 5144 * - takes a long duration of time 5145 */ 5146 rsm_intr_callback_dispatch(data, src_hwaddr, arg); 5147 break; 5148 case RSMIPC_MSG_NOTIMPORTING: 5149 importer_list_rm(src_node, msg->rsmipc_key, 5150 msg->rsmipc_segment_cookie); 5151 break; 5152 case RSMIPC_MSG_SQREADY: 5153 rsm_proc_sqready(data, src_hwaddr, arg); 5154 break; 5155 case RSMIPC_MSG_SQREADY_ACK: 5156 rsm_proc_sqready_ack(data, src_hwaddr, arg); 5157 break; 5158 case RSMIPC_MSG_CREDIT: 5159 rsm_add_credits(ctrlmsg, src_hwaddr, arg); 5160 break; 5161 case RSMIPC_MSG_REPLY: 5162 rsm_intr_reply(msghdr); 5163 break; 5164 case RSMIPC_MSG_BELL: 5165 rsm_intr_event(msg); 5166 break; 5167 case RSMIPC_MSG_IMPORTING: 5168 importer_list_add(src_node, msg->rsmipc_key, 5169 msg->rsmipc_adapter_hwaddr, 5170 msg->rsmipc_segment_cookie); 5171 break; 5172 case RSMIPC_MSG_REPUBLISH: 5173 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm); 5174 break; 5175 default: 5176 DBG_PRINTF((category, RSM_DEBUG, 5177 "rsm_intr_callback: bad msg %lx type %d data %lx\n", 5178 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data)); 5179 } 5180 5181 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n")); 5182 5183 } 5184 5185 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 5186 rsm_intr_q_op_t opcode, rsm_addr_t src, 5187 void *data, size_t size, rsm_intr_hand_arg_t arg) 5188 { 5189 DBG_DEFINE(category, 5190 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5191 5192 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n")); 5193 5194 switch (opcode) { 5195 case RSM_INTR_Q_OP_CREATE: 5196 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n")); 5197 rsm_sqcreateop_callback(src, arg); 5198 break; 5199 case RSM_INTR_Q_OP_DESTROY: 5200 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n")); 5201 break; 5202 case RSM_INTR_Q_OP_RECEIVE: 5203 rsm_intr_callback(data, src, arg); 5204 break; 5205 default: 5206 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5207 "rsm_srv_func: unknown opcode = %x\n", opcode)); 5208 } 5209 5210 chd = chd; 5211 size = size; 5212 5213 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n")); 5214 5215 return (RSM_INTR_HAND_CLAIMED); 5216 } 5217 5218 /* *************************** IPC slots ************************* */ 5219 static rsmipc_slot_t * 5220 rsmipc_alloc() 5221 { 5222 int i; 5223 rsmipc_slot_t *slot; 5224 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5225 5226 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n")); 5227 5228 /* try to find a free slot, if not wait */ 5229 mutex_enter(&rsm_ipc.lock); 5230 5231 while (rsm_ipc.count == 0) { 5232 rsm_ipc.wanted = 1; 5233 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock); 5234 } 5235 5236 /* An empty slot is available, find it */ 5237 slot = &rsm_ipc.slots[0]; 5238 for (i = 0; i < RSMIPC_SZ; i++, slot++) { 5239 if (RSMIPC_GET(slot, RSMIPC_FREE)) { 5240 RSMIPC_CLEAR(slot, RSMIPC_FREE); 5241 break; 5242 } 5243 } 5244 5245 ASSERT(i < RSMIPC_SZ); 5246 rsm_ipc.count--; /* one less is available */ 5247 rsm_ipc.sequence++; /* new sequence */ 5248 5249 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence; 5250 slot->rsmipc_cookie.ic.index = (uint_t)i; 5251 5252 mutex_exit(&rsm_ipc.lock); 5253 5254 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n")); 5255 5256 return (slot); 5257 } 5258 5259 static void 5260 rsmipc_free(rsmipc_slot_t *slot) 5261 { 5262 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5263 5264 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n")); 5265 5266 ASSERT(MUTEX_HELD(&slot->rsmipc_lock)); 5267 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot); 5268 5269 mutex_enter(&rsm_ipc.lock); 5270 5271 RSMIPC_SET(slot, RSMIPC_FREE); 5272 5273 slot->rsmipc_cookie.ic.sequence = 0; 5274 5275 mutex_exit(&slot->rsmipc_lock); 5276 rsm_ipc.count++; 5277 ASSERT(rsm_ipc.count <= RSMIPC_SZ); 5278 if (rsm_ipc.wanted) { 5279 rsm_ipc.wanted = 0; 5280 cv_broadcast(&rsm_ipc.cv); 5281 } 5282 5283 mutex_exit(&rsm_ipc.lock); 5284 5285 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n")); 5286 } 5287 5288 static int 5289 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply) 5290 { 5291 int e = 0; 5292 int credit_check = 0; 5293 int retry_cnt = 0; 5294 int min_retry_cnt = 10; 5295 rsm_send_t is; 5296 rsmipc_slot_t *rslot; 5297 adapter_t *adapter; 5298 path_t *path; 5299 sendq_token_t *sendq_token; 5300 sendq_token_t *used_sendq_token = NULL; 5301 rsm_send_q_handle_t ipc_handle; 5302 DBG_DEFINE(category, 5303 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5304 5305 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d", 5306 dest)); 5307 5308 /* 5309 * Check if this is a local case 5310 */ 5311 if (dest == my_nodeid) { 5312 switch (req->rsmipc_hdr.rsmipc_type) { 5313 case RSMIPC_MSG_SEGCONNECT: 5314 reply->rsmipc_status = (short)rsmsegacl_validate( 5315 req, dest, reply); 5316 break; 5317 case RSMIPC_MSG_BELL: 5318 req->rsmipc_hdr.rsmipc_src = dest; 5319 rsm_intr_event(req); 5320 break; 5321 case RSMIPC_MSG_IMPORTING: 5322 importer_list_add(dest, req->rsmipc_key, 5323 req->rsmipc_adapter_hwaddr, 5324 req->rsmipc_segment_cookie); 5325 break; 5326 case RSMIPC_MSG_NOTIMPORTING: 5327 importer_list_rm(dest, req->rsmipc_key, 5328 req->rsmipc_segment_cookie); 5329 break; 5330 case RSMIPC_MSG_REPUBLISH: 5331 importer_update(dest, req->rsmipc_key, 5332 req->rsmipc_perm); 5333 break; 5334 case RSMIPC_MSG_SUSPEND: 5335 importer_suspend(dest); 5336 break; 5337 case RSMIPC_MSG_SUSPEND_DONE: 5338 rsm_suspend_complete(dest, 0); 5339 break; 5340 case RSMIPC_MSG_RESUME: 5341 importer_resume(dest); 5342 break; 5343 default: 5344 ASSERT(0); 5345 } 5346 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5347 "rsmipc_send done\n")); 5348 return (0); 5349 } 5350 5351 if (dest >= MAX_NODES) { 5352 DBG_PRINTF((category, RSM_ERR, 5353 "rsm: rsmipc_send bad node number %x\n", dest)); 5354 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5355 } 5356 5357 /* 5358 * Oh boy! we are going remote. 5359 */ 5360 5361 /* 5362 * identify if we need to have credits to send this message 5363 * - only selected requests are flow controlled 5364 */ 5365 if (req != NULL) { 5366 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5367 "rsmipc_send:request type=%d\n", 5368 req->rsmipc_hdr.rsmipc_type)); 5369 5370 switch (req->rsmipc_hdr.rsmipc_type) { 5371 case RSMIPC_MSG_SEGCONNECT: 5372 case RSMIPC_MSG_DISCONNECT: 5373 case RSMIPC_MSG_IMPORTING: 5374 case RSMIPC_MSG_SUSPEND: 5375 case RSMIPC_MSG_SUSPEND_DONE: 5376 case RSMIPC_MSG_RESUME: 5377 credit_check = 1; 5378 break; 5379 default: 5380 credit_check = 0; 5381 } 5382 } 5383 5384 again: 5385 if (retry_cnt++ == min_retry_cnt) { 5386 /* backoff before further retries for 10ms */ 5387 delay(drv_usectohz(10000)); 5388 retry_cnt = 0; /* reset retry_cnt */ 5389 } 5390 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token); 5391 if (sendq_token == NULL) { 5392 DBG_PRINTF((category, RSM_ERR, 5393 "rsm: rsmipc_send no device to reach node %d\n", dest)); 5394 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5395 } 5396 5397 if ((sendq_token == used_sendq_token) && 5398 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) || 5399 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) { 5400 rele_sendq_token(sendq_token); 5401 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e)); 5402 return (RSMERR_CONN_ABORTED); 5403 } else 5404 used_sendq_token = sendq_token; 5405 5406 /* lint -save -e413 */ 5407 path = SQ_TOKEN_TO_PATH(sendq_token); 5408 adapter = path->local_adapter; 5409 /* lint -restore */ 5410 ipc_handle = sendq_token->rsmpi_sendq_handle; 5411 5412 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5413 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle)); 5414 5415 if (reply == NULL) { 5416 /* Send request without ack */ 5417 /* 5418 * Set the rsmipc_version number in the msghdr for KA 5419 * communication versioning 5420 */ 5421 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5422 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5423 /* 5424 * remote endpoints incn should match the value in our 5425 * path's remote_incn field. No need to grab any lock 5426 * since we have refcnted the path in rsmka_get_sendq_token 5427 */ 5428 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5429 5430 is.is_data = (void *)req; 5431 is.is_size = sizeof (*req); 5432 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5433 is.is_wait = 0; 5434 5435 if (credit_check) { 5436 mutex_enter(&path->mutex); 5437 /* 5438 * wait till we recv credits or path goes down. If path 5439 * goes down rsm_send will fail and we handle the error 5440 * then 5441 */ 5442 while ((sendq_token->msgbuf_avail == 0) && 5443 (path->state == RSMKA_PATH_ACTIVE)) { 5444 e = cv_wait_sig(&sendq_token->sendq_cv, 5445 &path->mutex); 5446 if (e == 0) { 5447 mutex_exit(&path->mutex); 5448 no_reply_cnt++; 5449 rele_sendq_token(sendq_token); 5450 DBG_PRINTF((category, RSM_DEBUG, 5451 "rsmipc_send done: " 5452 "cv_wait INTERRUPTED")); 5453 return (RSMERR_INTERRUPTED); 5454 } 5455 } 5456 5457 /* 5458 * path is not active retry on another path. 5459 */ 5460 if (path->state != RSMKA_PATH_ACTIVE) { 5461 mutex_exit(&path->mutex); 5462 rele_sendq_token(sendq_token); 5463 e = RSMERR_CONN_ABORTED; 5464 DBG_PRINTF((category, RSM_ERR, 5465 "rsm: rsmipc_send: path !ACTIVE")); 5466 goto again; 5467 } 5468 5469 ASSERT(sendq_token->msgbuf_avail > 0); 5470 5471 /* 5472 * reserve a msgbuf 5473 */ 5474 sendq_token->msgbuf_avail--; 5475 5476 mutex_exit(&path->mutex); 5477 5478 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5479 NULL); 5480 5481 if (e != RSM_SUCCESS) { 5482 mutex_enter(&path->mutex); 5483 /* 5484 * release the reserved msgbuf since 5485 * the send failed 5486 */ 5487 sendq_token->msgbuf_avail++; 5488 cv_broadcast(&sendq_token->sendq_cv); 5489 mutex_exit(&path->mutex); 5490 } 5491 } else 5492 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5493 NULL); 5494 5495 no_reply_cnt++; 5496 rele_sendq_token(sendq_token); 5497 if (e != RSM_SUCCESS) { 5498 DBG_PRINTF((category, RSM_ERR, 5499 "rsm: rsmipc_send no reply send" 5500 " err = %d no reply count = %d\n", 5501 e, no_reply_cnt)); 5502 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5503 e != RSMERR_BAD_BARRIER_HNDL); 5504 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5505 goto again; 5506 } else { 5507 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5508 "rsmipc_send done\n")); 5509 return (e); 5510 } 5511 5512 } 5513 5514 if (req == NULL) { 5515 /* Send reply - No flow control is done for reply */ 5516 /* 5517 * Set the version in the msg header for KA communication 5518 * versioning 5519 */ 5520 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5521 reply->rsmipc_hdr.rsmipc_src = my_nodeid; 5522 /* incn number is not used for reply msgs currently */ 5523 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5524 5525 is.is_data = (void *)reply; 5526 is.is_size = sizeof (*reply); 5527 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5528 is.is_wait = 0; 5529 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5530 rele_sendq_token(sendq_token); 5531 if (e != RSM_SUCCESS) { 5532 DBG_PRINTF((category, RSM_ERR, 5533 "rsm: rsmipc_send reply send" 5534 " err = %d\n", e)); 5535 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5536 goto again; 5537 } else { 5538 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5539 "rsmipc_send done\n")); 5540 return (e); 5541 } 5542 } 5543 5544 /* Reply needed */ 5545 rslot = rsmipc_alloc(); /* allocate a new ipc slot */ 5546 5547 mutex_enter(&rslot->rsmipc_lock); 5548 5549 rslot->rsmipc_data = (void *)reply; 5550 RSMIPC_SET(rslot, RSMIPC_PENDING); 5551 5552 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) { 5553 /* 5554 * Set the rsmipc_version number in the msghdr for KA 5555 * communication versioning 5556 */ 5557 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5558 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5559 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie; 5560 /* 5561 * remote endpoints incn should match the value in our 5562 * path's remote_incn field. No need to grab any lock 5563 * since we have refcnted the path in rsmka_get_sendq_token 5564 */ 5565 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5566 5567 is.is_data = (void *)req; 5568 is.is_size = sizeof (*req); 5569 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5570 is.is_wait = 0; 5571 if (credit_check) { 5572 5573 mutex_enter(&path->mutex); 5574 /* 5575 * wait till we recv credits or path goes down. If path 5576 * goes down rsm_send will fail and we handle the error 5577 * then. 5578 */ 5579 while ((sendq_token->msgbuf_avail == 0) && 5580 (path->state == RSMKA_PATH_ACTIVE)) { 5581 e = cv_wait_sig(&sendq_token->sendq_cv, 5582 &path->mutex); 5583 if (e == 0) { 5584 mutex_exit(&path->mutex); 5585 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5586 rsmipc_free(rslot); 5587 rele_sendq_token(sendq_token); 5588 DBG_PRINTF((category, RSM_DEBUG, 5589 "rsmipc_send done: " 5590 "cv_wait INTERRUPTED")); 5591 return (RSMERR_INTERRUPTED); 5592 } 5593 } 5594 5595 /* 5596 * path is not active retry on another path. 5597 */ 5598 if (path->state != RSMKA_PATH_ACTIVE) { 5599 mutex_exit(&path->mutex); 5600 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5601 rsmipc_free(rslot); 5602 rele_sendq_token(sendq_token); 5603 e = RSMERR_CONN_ABORTED; 5604 DBG_PRINTF((category, RSM_ERR, 5605 "rsm: rsmipc_send: path !ACTIVE")); 5606 goto again; 5607 } 5608 5609 ASSERT(sendq_token->msgbuf_avail > 0); 5610 5611 /* 5612 * reserve a msgbuf 5613 */ 5614 sendq_token->msgbuf_avail--; 5615 5616 mutex_exit(&path->mutex); 5617 5618 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5619 NULL); 5620 5621 if (e != RSM_SUCCESS) { 5622 mutex_enter(&path->mutex); 5623 /* 5624 * release the reserved msgbuf since 5625 * the send failed 5626 */ 5627 sendq_token->msgbuf_avail++; 5628 cv_broadcast(&sendq_token->sendq_cv); 5629 mutex_exit(&path->mutex); 5630 } 5631 } else 5632 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5633 NULL); 5634 5635 if (e != RSM_SUCCESS) { 5636 DBG_PRINTF((category, RSM_ERR, 5637 "rsm: rsmipc_send rsmpi send err = %d\n", e)); 5638 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5639 rsmipc_free(rslot); 5640 rele_sendq_token(sendq_token); 5641 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5642 goto again; 5643 } 5644 5645 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */ 5646 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock, 5647 drv_usectohz(5000000), TR_CLOCK_TICK); 5648 if (e < 0) { 5649 /* timed out - retry */ 5650 e = RSMERR_TIMEOUT; 5651 } else if (e == 0) { 5652 /* signalled - return error */ 5653 e = RSMERR_INTERRUPTED; 5654 break; 5655 } else { 5656 e = RSM_SUCCESS; 5657 } 5658 } 5659 5660 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5661 rsmipc_free(rslot); 5662 rele_sendq_token(sendq_token); 5663 5664 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e)); 5665 return (e); 5666 } 5667 5668 static int 5669 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie) 5670 { 5671 rsmipc_request_t request; 5672 5673 /* 5674 * inform the exporter to delete this importer 5675 */ 5676 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 5677 request.rsmipc_key = segid; 5678 request.rsmipc_segment_cookie = cookie; 5679 return (rsmipc_send(dest, &request, RSM_NO_REPLY)); 5680 } 5681 5682 static void 5683 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl, 5684 int acl_len, rsm_permission_t default_permission) 5685 { 5686 int i; 5687 importing_token_t *token; 5688 rsmipc_request_t request; 5689 republish_token_t *republish_list = NULL; 5690 republish_token_t *rp; 5691 rsm_permission_t permission; 5692 int index; 5693 5694 /* 5695 * send the new access mode to all the nodes that have imported 5696 * this segment. 5697 * If the new acl does not have a node that was present in 5698 * the old acl a access permission of 0 is sent. 5699 */ 5700 5701 index = rsmhash(segid); 5702 5703 /* 5704 * create a list of node/permissions to send the republish message 5705 */ 5706 mutex_enter(&importer_list.lock); 5707 5708 token = importer_list.bucket[index]; 5709 while (token != NULL) { 5710 if (segid == token->key) { 5711 permission = default_permission; 5712 5713 for (i = 0; i < acl_len; i++) { 5714 if (token->importing_node == acl[i].ae_node) { 5715 permission = acl[i].ae_permission; 5716 break; 5717 } 5718 } 5719 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP); 5720 5721 rp->key = segid; 5722 rp->importing_node = token->importing_node; 5723 rp->permission = permission; 5724 rp->next = republish_list; 5725 republish_list = rp; 5726 } 5727 token = token->next; 5728 } 5729 5730 mutex_exit(&importer_list.lock); 5731 5732 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH; 5733 request.rsmipc_key = segid; 5734 5735 while (republish_list != NULL) { 5736 request.rsmipc_perm = republish_list->permission; 5737 (void) rsmipc_send(republish_list->importing_node, 5738 &request, RSM_NO_REPLY); 5739 rp = republish_list; 5740 republish_list = republish_list->next; 5741 kmem_free(rp, sizeof (republish_token_t)); 5742 } 5743 } 5744 5745 static void 5746 rsm_send_suspend() 5747 { 5748 int i, e; 5749 rsmipc_request_t request; 5750 list_element_t *tokp; 5751 list_element_t *head = NULL; 5752 importing_token_t *token; 5753 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5754 "rsm_send_suspend enter\n")); 5755 5756 /* 5757 * create a list of node to send the suspend message 5758 * 5759 * Currently the whole importer list is scanned and we obtain 5760 * all the nodes - this basically gets all nodes that at least 5761 * import one segment from the local node. 5762 * 5763 * no need to grab the rsm_suspend_list lock here since we are 5764 * single threaded when suspend is called. 5765 */ 5766 5767 mutex_enter(&importer_list.lock); 5768 for (i = 0; i < rsm_hash_size; i++) { 5769 5770 token = importer_list.bucket[i]; 5771 5772 while (token != NULL) { 5773 5774 tokp = head; 5775 5776 /* 5777 * make sure that the token's node 5778 * is not already on the suspend list 5779 */ 5780 while (tokp != NULL) { 5781 if (tokp->nodeid == token->importing_node) { 5782 break; 5783 } 5784 tokp = tokp->next; 5785 } 5786 5787 if (tokp == NULL) { /* not in suspend list */ 5788 tokp = kmem_zalloc(sizeof (list_element_t), 5789 KM_SLEEP); 5790 tokp->nodeid = token->importing_node; 5791 tokp->next = head; 5792 head = tokp; 5793 } 5794 5795 token = token->next; 5796 } 5797 } 5798 mutex_exit(&importer_list.lock); 5799 5800 if (head == NULL) { /* no importers so go ahead and quiesce segments */ 5801 exporter_quiesce(); 5802 return; 5803 } 5804 5805 mutex_enter(&rsm_suspend_list.list_lock); 5806 ASSERT(rsm_suspend_list.list_head == NULL); 5807 /* 5808 * update the suspend list righaway so that if a node dies the 5809 * pathmanager can set the NODE dead flag 5810 */ 5811 rsm_suspend_list.list_head = head; 5812 mutex_exit(&rsm_suspend_list.list_lock); 5813 5814 tokp = head; 5815 5816 while (tokp != NULL) { 5817 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND; 5818 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY); 5819 /* 5820 * Error in rsmipc_send currently happens due to inaccessibility 5821 * of the remote node. 5822 */ 5823 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */ 5824 tokp->flags |= RSM_SUSPEND_ACKPENDING; 5825 } 5826 5827 tokp = tokp->next; 5828 } 5829 5830 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5831 "rsm_send_suspend done\n")); 5832 5833 } 5834 5835 static void 5836 rsm_send_resume() 5837 { 5838 rsmipc_request_t request; 5839 list_element_t *elem, *head; 5840 5841 /* 5842 * save the suspend list so that we know where to send 5843 * the resume messages and make the suspend list head 5844 * NULL. 5845 */ 5846 mutex_enter(&rsm_suspend_list.list_lock); 5847 head = rsm_suspend_list.list_head; 5848 rsm_suspend_list.list_head = NULL; 5849 mutex_exit(&rsm_suspend_list.list_lock); 5850 5851 while (head != NULL) { 5852 elem = head; 5853 head = head->next; 5854 5855 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME; 5856 5857 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY); 5858 5859 kmem_free((void *)elem, sizeof (list_element_t)); 5860 5861 } 5862 5863 } 5864 5865 /* 5866 * This function takes path and sends a message using the sendq 5867 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK 5868 * and RSMIPC_MSG_CREDIT are sent using this function. 5869 */ 5870 int 5871 rsmipc_send_controlmsg(path_t *path, int msgtype) 5872 { 5873 int e; 5874 int retry_cnt = 0; 5875 int min_retry_cnt = 10; 5876 adapter_t *adapter; 5877 rsm_send_t is; 5878 rsm_send_q_handle_t ipc_handle; 5879 rsmipc_controlmsg_t msg; 5880 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL); 5881 5882 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5883 "rsmipc_send_controlmsg enter\n")); 5884 5885 ASSERT(MUTEX_HELD(&path->mutex)); 5886 5887 adapter = path->local_adapter; 5888 5889 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx " 5890 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype, 5891 my_nodeid, adapter->hwaddr, path->remote_node, 5892 path->remote_hwaddr, path->procmsg_cnt)); 5893 5894 if (path->state != RSMKA_PATH_ACTIVE) { 5895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5896 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE")); 5897 return (1); 5898 } 5899 5900 ipc_handle = path->sendq_token.rsmpi_sendq_handle; 5901 5902 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION; 5903 msg.rsmipc_hdr.rsmipc_src = my_nodeid; 5904 msg.rsmipc_hdr.rsmipc_type = msgtype; 5905 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn; 5906 5907 if (msgtype == RSMIPC_MSG_CREDIT) 5908 msg.rsmipc_credits = path->procmsg_cnt; 5909 5910 msg.rsmipc_local_incn = path->local_incn; 5911 5912 msg.rsmipc_adapter_hwaddr = adapter->hwaddr; 5913 /* incr the sendq, path refcnt */ 5914 PATH_HOLD_NOLOCK(path); 5915 SENDQ_TOKEN_HOLD(path); 5916 5917 do { 5918 /* drop the path lock before doing the rsm_send */ 5919 mutex_exit(&path->mutex); 5920 5921 is.is_data = (void *)&msg; 5922 is.is_size = sizeof (msg); 5923 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5924 is.is_wait = 0; 5925 5926 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5927 5928 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5929 e != RSMERR_BAD_BARRIER_HNDL); 5930 5931 mutex_enter(&path->mutex); 5932 5933 if (e == RSM_SUCCESS) { 5934 break; 5935 } 5936 /* error counter for statistics */ 5937 atomic_add_64(&rsm_ctrlmsg_errcnt, 1); 5938 5939 DBG_PRINTF((category, RSM_ERR, 5940 "rsmipc_send_controlmsg:rsm_send error=%d", e)); 5941 5942 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */ 5943 (void) cv_reltimedwait(&path->sendq_token.sendq_cv, 5944 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK); 5945 retry_cnt = 0; 5946 } 5947 } while (path->state == RSMKA_PATH_ACTIVE); 5948 5949 /* decrement the sendq,path refcnt that we incr before rsm_send */ 5950 SENDQ_TOKEN_RELE(path); 5951 PATH_RELE_NOLOCK(path); 5952 5953 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5954 "rsmipc_send_controlmsg done=%d", e)); 5955 return (e); 5956 } 5957 5958 /* 5959 * Called from rsm_force_unload and path_importer_disconnect. The memory 5960 * mapping for the imported segment is removed and the segment is 5961 * disconnected at the interconnect layer if disconnect_flag is TRUE. 5962 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback 5963 * and FALSE from rsm_rebind. 5964 * 5965 * When subsequent accesses cause page faulting, the dummy page is mapped 5966 * to resolve the fault, and the mapping generation number is incremented 5967 * so that the application can be notified on a close barrier operation. 5968 * 5969 * It is important to note that the caller of rsmseg_unload is responsible for 5970 * acquiring the segment lock before making a call to rsmseg_unload. This is 5971 * required to make the caller and rsmseg_unload thread safe. The segment lock 5972 * will be released by the rsmseg_unload function. 5973 */ 5974 void 5975 rsmseg_unload(rsmseg_t *im_seg) 5976 { 5977 rsmcookie_t *hdl; 5978 void *shared_cookie; 5979 rsmipc_request_t request; 5980 uint_t maxprot; 5981 5982 DBG_DEFINE(category, 5983 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5984 5985 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n")); 5986 5987 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 5988 5989 /* wait until segment leaves the mapping state */ 5990 while (im_seg->s_state == RSM_STATE_MAPPING) 5991 cv_wait(&im_seg->s_cv, &im_seg->s_lock); 5992 /* 5993 * An unload is only necessary if the segment is connected. However, 5994 * if the segment was on the import list in state RSM_STATE_CONNECTING 5995 * then a connection was in progress. Change to RSM_STATE_NEW 5996 * here to cause an early exit from the connection process. 5997 */ 5998 if (im_seg->s_state == RSM_STATE_NEW) { 5999 rsmseglock_release(im_seg); 6000 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6001 "rsmseg_unload done: RSM_STATE_NEW\n")); 6002 return; 6003 } else if (im_seg->s_state == RSM_STATE_CONNECTING) { 6004 im_seg->s_state = RSM_STATE_ABORT_CONNECT; 6005 rsmsharelock_acquire(im_seg); 6006 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 6007 rsmsharelock_release(im_seg); 6008 rsmseglock_release(im_seg); 6009 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6010 "rsmseg_unload done: RSM_STATE_CONNECTING\n")); 6011 return; 6012 } 6013 6014 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) { 6015 if (im_seg->s_ckl != NULL) { 6016 int e; 6017 /* Setup protections for remap */ 6018 maxprot = PROT_USER; 6019 if (im_seg->s_mode & RSM_PERM_READ) { 6020 maxprot |= PROT_READ; 6021 } 6022 if (im_seg->s_mode & RSM_PERM_WRITE) { 6023 maxprot |= PROT_WRITE; 6024 } 6025 hdl = im_seg->s_ckl; 6026 for (; hdl != NULL; hdl = hdl->c_next) { 6027 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 6028 remap_cookie, 6029 hdl->c_off, hdl->c_len, 6030 maxprot, 0, NULL); 6031 6032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6033 "remap returns %d\n", e)); 6034 } 6035 } 6036 6037 (void) rsm_closeconnection(im_seg, &shared_cookie); 6038 6039 if (shared_cookie != NULL) { 6040 /* 6041 * inform the exporting node so this import 6042 * can be deleted from the list of importers. 6043 */ 6044 request.rsmipc_hdr.rsmipc_type = 6045 RSMIPC_MSG_NOTIMPORTING; 6046 request.rsmipc_key = im_seg->s_segid; 6047 request.rsmipc_segment_cookie = shared_cookie; 6048 rsmseglock_release(im_seg); 6049 (void) rsmipc_send(im_seg->s_node, &request, 6050 RSM_NO_REPLY); 6051 } else { 6052 rsmseglock_release(im_seg); 6053 } 6054 } 6055 else 6056 rsmseglock_release(im_seg); 6057 6058 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n")); 6059 6060 } 6061 6062 /* ****************************** Importer Calls ************************ */ 6063 6064 static int 6065 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr) 6066 { 6067 int shifts = 0; 6068 6069 if (crgetuid(cr) != owner) { 6070 shifts += 3; 6071 if (!groupmember(group, cr)) 6072 shifts += 3; 6073 } 6074 6075 mode &= ~(perm << shifts); 6076 6077 if (mode == 0) 6078 return (0); 6079 6080 return (secpolicy_rsm_access(cr, owner, mode)); 6081 } 6082 6083 6084 static int 6085 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred, 6086 intptr_t dataptr, int mode) 6087 { 6088 int e; 6089 int recheck_state = 0; 6090 void *shared_cookie; 6091 rsmipc_request_t request; 6092 rsmipc_reply_t reply; 6093 rsm_permission_t access; 6094 adapter_t *adapter; 6095 rsm_addr_t addr = 0; 6096 rsm_import_share_t *sharedp; 6097 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6098 6099 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n")); 6100 6101 adapter = rsm_getadapter(msg, mode); 6102 if (adapter == NULL) { 6103 DBG_PRINTF((category, RSM_ERR, 6104 "rsm_connect done:ENODEV adapter=NULL\n")); 6105 return (RSMERR_CTLR_NOT_PRESENT); 6106 } 6107 6108 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) { 6109 rsmka_release_adapter(adapter); 6110 DBG_PRINTF((category, RSM_ERR, 6111 "rsm_connect done:ENODEV loopback\n")); 6112 return (RSMERR_CTLR_NOT_PRESENT); 6113 } 6114 6115 6116 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6117 ASSERT(seg->s_state == RSM_STATE_NEW); 6118 6119 /* 6120 * Translate perm to access 6121 */ 6122 if (msg->perm & ~RSM_PERM_RDWR) { 6123 rsmka_release_adapter(adapter); 6124 DBG_PRINTF((category, RSM_ERR, 6125 "rsm_connect done:EINVAL invalid perms\n")); 6126 return (RSMERR_BAD_PERMS); 6127 } 6128 access = 0; 6129 if (msg->perm & RSM_PERM_READ) 6130 access |= RSM_ACCESS_READ; 6131 if (msg->perm & RSM_PERM_WRITE) 6132 access |= RSM_ACCESS_WRITE; 6133 6134 seg->s_node = msg->nodeid; 6135 6136 /* 6137 * Adding to the import list locks the segment; release the segment 6138 * lock so we can get the reply for the send. 6139 */ 6140 e = rsmimport_add(seg, msg->key); 6141 if (e) { 6142 rsmka_release_adapter(adapter); 6143 DBG_PRINTF((category, RSM_ERR, 6144 "rsm_connect done:rsmimport_add failed %d\n", e)); 6145 return (e); 6146 } 6147 seg->s_state = RSM_STATE_CONNECTING; 6148 6149 /* 6150 * Set the s_adapter field here so as to have a valid comparison of 6151 * the adapter and the s_adapter value during rsmshare_get. For 6152 * any error, set s_adapter to NULL before doing a release_adapter 6153 */ 6154 seg->s_adapter = adapter; 6155 6156 rsmseglock_release(seg); 6157 6158 /* 6159 * get the pointer to the shared data structure; the 6160 * shared data is locked and refcount has been incremented 6161 */ 6162 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg); 6163 6164 ASSERT(rsmsharelock_held(seg)); 6165 6166 do { 6167 /* flag indicates whether we need to recheck the state */ 6168 recheck_state = 0; 6169 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6170 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 6171 switch (sharedp->rsmsi_state) { 6172 case RSMSI_STATE_NEW: 6173 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6174 break; 6175 case RSMSI_STATE_CONNECTING: 6176 /* FALLTHRU */ 6177 case RSMSI_STATE_CONN_QUIESCE: 6178 /* FALLTHRU */ 6179 case RSMSI_STATE_MAP_QUIESCE: 6180 /* wait for the state to change */ 6181 while ((sharedp->rsmsi_state == 6182 RSMSI_STATE_CONNECTING) || 6183 (sharedp->rsmsi_state == 6184 RSMSI_STATE_CONN_QUIESCE) || 6185 (sharedp->rsmsi_state == 6186 RSMSI_STATE_MAP_QUIESCE)) { 6187 if (cv_wait_sig(&sharedp->rsmsi_cv, 6188 &sharedp->rsmsi_lock) == 0) { 6189 /* signalled - clean up and return */ 6190 rsmsharelock_release(seg); 6191 rsmimport_rm(seg); 6192 seg->s_adapter = NULL; 6193 rsmka_release_adapter(adapter); 6194 seg->s_state = RSM_STATE_NEW; 6195 DBG_PRINTF((category, RSM_ERR, 6196 "rsm_connect done: INTERRUPTED\n")); 6197 return (RSMERR_INTERRUPTED); 6198 } 6199 } 6200 /* 6201 * the state changed, loop back and check what it is 6202 */ 6203 recheck_state = 1; 6204 break; 6205 case RSMSI_STATE_ABORT_CONNECT: 6206 /* exit the loop and clean up further down */ 6207 break; 6208 case RSMSI_STATE_CONNECTED: 6209 /* already connected, good - fall through */ 6210 case RSMSI_STATE_MAPPED: 6211 /* already mapped, wow - fall through */ 6212 /* access validation etc is done further down */ 6213 break; 6214 case RSMSI_STATE_DISCONNECTED: 6215 /* disconnected - so reconnect now */ 6216 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6217 break; 6218 default: 6219 ASSERT(0); /* Invalid State */ 6220 } 6221 } while (recheck_state); 6222 6223 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6224 /* we are the first to connect */ 6225 rsmsharelock_release(seg); 6226 6227 if (msg->nodeid != my_nodeid) { 6228 addr = get_remote_hwaddr(adapter, msg->nodeid); 6229 6230 if ((int64_t)addr < 0) { 6231 rsmsharelock_acquire(seg); 6232 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6233 RSMSI_STATE_NEW); 6234 rsmsharelock_release(seg); 6235 rsmimport_rm(seg); 6236 seg->s_adapter = NULL; 6237 rsmka_release_adapter(adapter); 6238 seg->s_state = RSM_STATE_NEW; 6239 DBG_PRINTF((category, RSM_ERR, 6240 "rsm_connect done: hwaddr<0\n")); 6241 return (RSMERR_INTERNAL_ERROR); 6242 } 6243 } else { 6244 addr = adapter->hwaddr; 6245 } 6246 6247 /* 6248 * send request to node [src, dest, key, msgid] and get back 6249 * [status, msgid, cookie] 6250 */ 6251 request.rsmipc_key = msg->key; 6252 /* 6253 * we need the s_mode of the exporter so pass 6254 * RSM_ACCESS_TRUSTED 6255 */ 6256 request.rsmipc_perm = RSM_ACCESS_TRUSTED; 6257 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT; 6258 request.rsmipc_adapter_hwaddr = addr; 6259 request.rsmipc_segment_cookie = sharedp; 6260 6261 e = (int)rsmipc_send(msg->nodeid, &request, &reply); 6262 if (e) { 6263 rsmsharelock_acquire(seg); 6264 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6265 RSMSI_STATE_NEW); 6266 rsmsharelock_release(seg); 6267 rsmimport_rm(seg); 6268 seg->s_adapter = NULL; 6269 rsmka_release_adapter(adapter); 6270 seg->s_state = RSM_STATE_NEW; 6271 DBG_PRINTF((category, RSM_ERR, 6272 "rsm_connect done:rsmipc_send failed %d\n", e)); 6273 return (e); 6274 } 6275 6276 if (reply.rsmipc_status != RSM_SUCCESS) { 6277 rsmsharelock_acquire(seg); 6278 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6279 RSMSI_STATE_NEW); 6280 rsmsharelock_release(seg); 6281 rsmimport_rm(seg); 6282 seg->s_adapter = NULL; 6283 rsmka_release_adapter(adapter); 6284 seg->s_state = RSM_STATE_NEW; 6285 DBG_PRINTF((category, RSM_ERR, 6286 "rsm_connect done:rsmipc_send reply err %d\n", 6287 reply.rsmipc_status)); 6288 return (reply.rsmipc_status); 6289 } 6290 6291 rsmsharelock_acquire(seg); 6292 /* store the information recvd into the shared data struct */ 6293 sharedp->rsmsi_mode = reply.rsmipc_mode; 6294 sharedp->rsmsi_uid = reply.rsmipc_uid; 6295 sharedp->rsmsi_gid = reply.rsmipc_gid; 6296 sharedp->rsmsi_seglen = reply.rsmipc_seglen; 6297 sharedp->rsmsi_cookie = sharedp; 6298 } 6299 6300 rsmsharelock_release(seg); 6301 6302 /* 6303 * Get the segment lock and check for a force disconnect 6304 * from the export side which would have changed the state 6305 * back to RSM_STATE_NEW. Once the segment lock is acquired a 6306 * force disconnect will be held off until the connection 6307 * has completed. 6308 */ 6309 rsmseglock_acquire(seg); 6310 rsmsharelock_acquire(seg); 6311 ASSERT(seg->s_state == RSM_STATE_CONNECTING || 6312 seg->s_state == RSM_STATE_ABORT_CONNECT); 6313 6314 shared_cookie = sharedp->rsmsi_cookie; 6315 6316 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) || 6317 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) { 6318 seg->s_state = RSM_STATE_NEW; 6319 seg->s_adapter = NULL; 6320 rsmsharelock_release(seg); 6321 rsmseglock_release(seg); 6322 rsmimport_rm(seg); 6323 rsmka_release_adapter(adapter); 6324 6325 rsmsharelock_acquire(seg); 6326 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) { 6327 /* 6328 * set a flag indicating abort handling has been 6329 * done 6330 */ 6331 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE; 6332 rsmsharelock_release(seg); 6333 /* send a message to exporter - only once */ 6334 (void) rsm_send_notimporting(msg->nodeid, 6335 msg->key, shared_cookie); 6336 rsmsharelock_acquire(seg); 6337 /* 6338 * wake up any waiting importers and inform that 6339 * connection has been aborted 6340 */ 6341 cv_broadcast(&sharedp->rsmsi_cv); 6342 } 6343 rsmsharelock_release(seg); 6344 6345 DBG_PRINTF((category, RSM_ERR, 6346 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n")); 6347 return (RSMERR_INTERRUPTED); 6348 } 6349 6350 6351 /* 6352 * We need to verify that this process has access 6353 */ 6354 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid, 6355 access & sharedp->rsmsi_mode, 6356 (int)(msg->perm & RSM_PERM_RDWR), cred); 6357 if (e) { 6358 rsmsharelock_release(seg); 6359 seg->s_state = RSM_STATE_NEW; 6360 seg->s_adapter = NULL; 6361 rsmseglock_release(seg); 6362 rsmimport_rm(seg); 6363 rsmka_release_adapter(adapter); 6364 /* 6365 * No need to lock segment it has been removed 6366 * from the hash table 6367 */ 6368 rsmsharelock_acquire(seg); 6369 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6370 rsmsharelock_release(seg); 6371 /* this is the first importer */ 6372 6373 (void) rsm_send_notimporting(msg->nodeid, msg->key, 6374 shared_cookie); 6375 rsmsharelock_acquire(seg); 6376 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6377 cv_broadcast(&sharedp->rsmsi_cv); 6378 } 6379 rsmsharelock_release(seg); 6380 6381 DBG_PRINTF((category, RSM_ERR, 6382 "rsm_connect done: ipcaccess failed\n")); 6383 return (RSMERR_PERM_DENIED); 6384 } 6385 6386 /* update state and cookie */ 6387 seg->s_segid = sharedp->rsmsi_segid; 6388 seg->s_len = sharedp->rsmsi_seglen; 6389 seg->s_mode = access & sharedp->rsmsi_mode; 6390 seg->s_pid = ddi_get_pid(); 6391 seg->s_mapinfo = NULL; 6392 6393 if (seg->s_node != my_nodeid) { 6394 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6395 e = adapter->rsmpi_ops->rsm_connect( 6396 adapter->rsmpi_handle, 6397 addr, seg->s_segid, &sharedp->rsmsi_handle); 6398 6399 if (e != RSM_SUCCESS) { 6400 seg->s_state = RSM_STATE_NEW; 6401 seg->s_adapter = NULL; 6402 rsmsharelock_release(seg); 6403 rsmseglock_release(seg); 6404 rsmimport_rm(seg); 6405 rsmka_release_adapter(adapter); 6406 /* 6407 * inform the exporter to delete this importer 6408 */ 6409 (void) rsm_send_notimporting(msg->nodeid, 6410 msg->key, shared_cookie); 6411 6412 /* 6413 * Now inform any waiting importers to 6414 * retry connect. This needs to be done 6415 * after sending notimporting so that 6416 * the notimporting is sent before a waiting 6417 * importer sends a segconnect while retrying 6418 * 6419 * No need to lock segment it has been removed 6420 * from the hash table 6421 */ 6422 6423 rsmsharelock_acquire(seg); 6424 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6425 cv_broadcast(&sharedp->rsmsi_cv); 6426 rsmsharelock_release(seg); 6427 6428 DBG_PRINTF((category, RSM_ERR, 6429 "rsm_connect error %d\n", e)); 6430 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR) 6431 return ( 6432 RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 6433 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) || 6434 (e == RSMERR_UNKNOWN_RSM_ADDR)) 6435 return (RSMERR_REMOTE_NODE_UNREACHABLE); 6436 else 6437 return (e); 6438 } 6439 6440 } 6441 seg->s_handle.in = sharedp->rsmsi_handle; 6442 6443 } 6444 6445 seg->s_state = RSM_STATE_CONNECT; 6446 6447 6448 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */ 6449 if (bar_va) { 6450 /* increment generation number on barrier page */ 6451 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1); 6452 /* return user off into barrier page where status will be */ 6453 msg->off = (int)seg->s_hdr.rsmrc_num; 6454 msg->gnum = bar_va[msg->off]; /* gnum race */ 6455 } else { 6456 msg->off = 0; 6457 msg->gnum = 0; /* gnum race */ 6458 } 6459 6460 msg->len = (int)sharedp->rsmsi_seglen; 6461 msg->rnum = seg->s_minor; 6462 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED); 6463 rsmsharelock_release(seg); 6464 rsmseglock_release(seg); 6465 6466 /* Return back to user the segment size & perm in case it's needed */ 6467 6468 #ifdef _MULTI_DATAMODEL 6469 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6470 rsm_ioctlmsg32_t msg32; 6471 6472 if (msg->len > UINT_MAX) 6473 msg32.len = RSM_MAXSZ_PAGE_ALIGNED; 6474 else 6475 msg32.len = msg->len; 6476 msg32.off = msg->off; 6477 msg32.perm = msg->perm; 6478 msg32.gnum = msg->gnum; 6479 msg32.rnum = msg->rnum; 6480 6481 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6482 "rsm_connect done\n")); 6483 6484 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr, 6485 sizeof (msg32), mode)) 6486 return (RSMERR_BAD_ADDR); 6487 else 6488 return (RSM_SUCCESS); 6489 } 6490 #endif 6491 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n")); 6492 6493 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg), 6494 mode)) 6495 return (RSMERR_BAD_ADDR); 6496 else 6497 return (RSM_SUCCESS); 6498 } 6499 6500 static int 6501 rsm_unmap(rsmseg_t *seg) 6502 { 6503 int err; 6504 adapter_t *adapter; 6505 rsm_import_share_t *sharedp; 6506 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6507 6508 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6509 "rsm_unmap enter %u\n", seg->s_segid)); 6510 6511 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6512 6513 /* assert seg is locked */ 6514 ASSERT(rsmseglock_held(seg)); 6515 ASSERT(seg->s_state != RSM_STATE_MAPPING); 6516 6517 if ((seg->s_state != RSM_STATE_ACTIVE) && 6518 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 6519 /* segment unmap has already been done */ 6520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6521 return (RSM_SUCCESS); 6522 } 6523 6524 sharedp = seg->s_share; 6525 6526 rsmsharelock_acquire(seg); 6527 6528 /* 6529 * - shared data struct is in MAPPED or MAP_QUIESCE state 6530 */ 6531 6532 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED || 6533 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 6534 6535 /* 6536 * Unmap pages - previously rsm_memseg_import_unmap was called only if 6537 * the segment cookie list was NULL; but it is always NULL when 6538 * called from rsmmap_unmap and won't be NULL when called for 6539 * a force disconnect - so the check for NULL cookie list was removed 6540 */ 6541 6542 ASSERT(sharedp->rsmsi_mapcnt > 0); 6543 6544 sharedp->rsmsi_mapcnt--; 6545 6546 if (sharedp->rsmsi_mapcnt == 0) { 6547 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) { 6548 /* unmap the shared RSMPI mapping */ 6549 adapter = seg->s_adapter; 6550 if (seg->s_node != my_nodeid) { 6551 ASSERT(sharedp->rsmsi_handle != NULL); 6552 err = adapter->rsmpi_ops-> 6553 rsm_unmap(sharedp->rsmsi_handle); 6554 DBG_PRINTF((category, RSM_DEBUG, 6555 "rsm_unmap: rsmpi unmap %d\n", err)); 6556 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 6557 sharedp->rsmsi_mapinfo = NULL; 6558 } 6559 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 6560 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */ 6561 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 6562 } 6563 } 6564 6565 rsmsharelock_release(seg); 6566 6567 /* 6568 * The s_cookie field is used to store the cookie returned from the 6569 * ddi_umem_lock when binding the pages for an export segment. This 6570 * is the primary use of the s_cookie field and does not normally 6571 * pertain to any importing segment except in the loopback case. 6572 * For the loopback case, the import segment and export segment are 6573 * on the same node, the s_cookie field of the segment structure for 6574 * the importer is initialized to the s_cookie field in the exported 6575 * segment during the map operation and is used during the call to 6576 * devmap_umem_setup for the import mapping. 6577 * Thus, during unmap, we simply need to set s_cookie to NULL to 6578 * indicate that the mapping no longer exists. 6579 */ 6580 seg->s_cookie = NULL; 6581 6582 seg->s_mapinfo = NULL; 6583 6584 if (seg->s_state == RSM_STATE_ACTIVE) 6585 seg->s_state = RSM_STATE_CONNECT; 6586 else 6587 seg->s_state = RSM_STATE_CONN_QUIESCE; 6588 6589 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6590 6591 return (RSM_SUCCESS); 6592 } 6593 6594 /* 6595 * cookie returned here if not null indicates that it is 6596 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING 6597 * message. 6598 */ 6599 static int 6600 rsm_closeconnection(rsmseg_t *seg, void **cookie) 6601 { 6602 int e; 6603 adapter_t *adapter; 6604 rsm_import_share_t *sharedp; 6605 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6606 6607 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6608 "rsm_closeconnection enter\n")); 6609 6610 *cookie = (void *)NULL; 6611 6612 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6613 6614 /* assert seg is locked */ 6615 ASSERT(rsmseglock_held(seg)); 6616 6617 if (seg->s_state == RSM_STATE_DISCONNECT) { 6618 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6619 "rsm_closeconnection done: already disconnected\n")); 6620 return (RSM_SUCCESS); 6621 } 6622 6623 /* wait for all putv/getv ops to get done */ 6624 while (seg->s_rdmacnt > 0) { 6625 cv_wait(&seg->s_cv, &seg->s_lock); 6626 } 6627 6628 (void) rsm_unmap(seg); 6629 6630 ASSERT(seg->s_state == RSM_STATE_CONNECT || 6631 seg->s_state == RSM_STATE_CONN_QUIESCE); 6632 6633 adapter = seg->s_adapter; 6634 sharedp = seg->s_share; 6635 6636 ASSERT(sharedp != NULL); 6637 6638 rsmsharelock_acquire(seg); 6639 6640 /* 6641 * Disconnect on adapter 6642 * 6643 * The current algorithm is stateless, I don't have to contact 6644 * server when I go away. He only gives me permissions. Of course, 6645 * the adapters will talk to terminate the connect. 6646 * 6647 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE 6648 */ 6649 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) && 6650 (sharedp->rsmsi_node != my_nodeid)) { 6651 6652 if (sharedp->rsmsi_refcnt == 1) { 6653 /* this is the last importer */ 6654 ASSERT(sharedp->rsmsi_mapcnt == 0); 6655 6656 e = adapter->rsmpi_ops-> 6657 rsm_disconnect(sharedp->rsmsi_handle); 6658 if (e != RSM_SUCCESS) { 6659 DBG_PRINTF((category, RSM_DEBUG, 6660 "rsm:disconnect failed seg=%x:err=%d\n", 6661 seg->s_key, e)); 6662 } 6663 } 6664 } 6665 6666 seg->s_handle.in = NULL; 6667 6668 sharedp->rsmsi_refcnt--; 6669 6670 if (sharedp->rsmsi_refcnt == 0) { 6671 *cookie = (void *)sharedp->rsmsi_cookie; 6672 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 6673 sharedp->rsmsi_handle = NULL; 6674 rsmsharelock_release(seg); 6675 6676 /* clean up the shared data structure */ 6677 mutex_destroy(&sharedp->rsmsi_lock); 6678 cv_destroy(&sharedp->rsmsi_cv); 6679 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t)); 6680 6681 } else { 6682 rsmsharelock_release(seg); 6683 } 6684 6685 /* increment generation number on barrier page */ 6686 if (bar_va) { 6687 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1); 6688 } 6689 6690 /* 6691 * The following needs to be done after any 6692 * rsmsharelock calls which use seg->s_share. 6693 */ 6694 seg->s_share = NULL; 6695 6696 seg->s_state = RSM_STATE_DISCONNECT; 6697 /* signal anyone waiting in the CONN_QUIESCE state */ 6698 cv_broadcast(&seg->s_cv); 6699 6700 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6701 "rsm_closeconnection done\n")); 6702 6703 return (RSM_SUCCESS); 6704 } 6705 6706 int 6707 rsm_disconnect(rsmseg_t *seg) 6708 { 6709 rsmipc_request_t request; 6710 void *shared_cookie; 6711 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6712 6713 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n")); 6714 6715 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6716 6717 /* assert seg isn't locked */ 6718 ASSERT(!rsmseglock_held(seg)); 6719 6720 6721 /* Remove segment from imported list */ 6722 rsmimport_rm(seg); 6723 6724 /* acquire the segment */ 6725 rsmseglock_acquire(seg); 6726 6727 /* wait until segment leaves the mapping state */ 6728 while (seg->s_state == RSM_STATE_MAPPING) 6729 cv_wait(&seg->s_cv, &seg->s_lock); 6730 6731 if (seg->s_state == RSM_STATE_DISCONNECT) { 6732 seg->s_state = RSM_STATE_NEW; 6733 rsmseglock_release(seg); 6734 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6735 "rsm_disconnect done: already disconnected\n")); 6736 return (RSM_SUCCESS); 6737 } 6738 6739 (void) rsm_closeconnection(seg, &shared_cookie); 6740 6741 /* update state */ 6742 seg->s_state = RSM_STATE_NEW; 6743 6744 if (shared_cookie != NULL) { 6745 /* 6746 * This is the last importer so inform the exporting node 6747 * so this import can be deleted from the list of importers. 6748 */ 6749 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 6750 request.rsmipc_key = seg->s_segid; 6751 request.rsmipc_segment_cookie = shared_cookie; 6752 rsmseglock_release(seg); 6753 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 6754 } else { 6755 rsmseglock_release(seg); 6756 } 6757 6758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n")); 6759 6760 return (DDI_SUCCESS); 6761 } 6762 6763 /*ARGSUSED*/ 6764 static int 6765 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 6766 struct pollhead **phpp) 6767 { 6768 minor_t rnum; 6769 rsmresource_t *res; 6770 rsmseg_t *seg; 6771 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 6772 6773 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n")); 6774 6775 /* find minor, no lock */ 6776 rnum = getminor(dev); 6777 res = rsmresource_lookup(rnum, RSM_NOLOCK); 6778 6779 /* poll is supported only for export/import segments */ 6780 if ((res == NULL) || (res == RSMRC_RESERVED) || 6781 (res->rsmrc_type == RSM_RESOURCE_BAR)) { 6782 return (ENXIO); 6783 } 6784 6785 *reventsp = 0; 6786 6787 /* 6788 * An exported segment must be in state RSM_STATE_EXPORT; an 6789 * imported segment must be in state RSM_STATE_ACTIVE. 6790 */ 6791 seg = (rsmseg_t *)res; 6792 6793 if (seg->s_pollevent) { 6794 *reventsp = POLLRDNORM; 6795 } else if (!anyyet) { 6796 /* cannot take segment lock here */ 6797 *phpp = &seg->s_poll; 6798 seg->s_pollflag |= RSM_SEGMENT_POLL; 6799 } 6800 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n")); 6801 return (0); 6802 } 6803 6804 6805 6806 /* ************************* IOCTL Commands ********************* */ 6807 6808 static rsmseg_t * 6809 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp, 6810 rsm_resource_type_t type) 6811 { 6812 /* get segment from resource handle */ 6813 rsmseg_t *seg; 6814 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 6815 6816 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n")); 6817 6818 6819 if (res != RSMRC_RESERVED) { 6820 seg = (rsmseg_t *)res; 6821 } else { 6822 /* Allocate segment now and bind it */ 6823 seg = rsmseg_alloc(rnum, credp); 6824 6825 /* 6826 * if DR pre-processing is going on or DR is in progress 6827 * then the new export segments should be in the NEW_QSCD state 6828 */ 6829 if (type == RSM_RESOURCE_EXPORT_SEGMENT) { 6830 mutex_enter(&rsm_drv_data.drv_lock); 6831 if ((rsm_drv_data.drv_state == 6832 RSM_DRV_PREDEL_STARTED) || 6833 (rsm_drv_data.drv_state == 6834 RSM_DRV_PREDEL_COMPLETED) || 6835 (rsm_drv_data.drv_state == 6836 RSM_DRV_DR_IN_PROGRESS)) { 6837 seg->s_state = RSM_STATE_NEW_QUIESCED; 6838 } 6839 mutex_exit(&rsm_drv_data.drv_lock); 6840 } 6841 6842 rsmresource_insert(rnum, (rsmresource_t *)seg, type); 6843 } 6844 6845 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n")); 6846 6847 return (seg); 6848 } 6849 6850 static int 6851 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6852 int mode, cred_t *credp) 6853 { 6854 int error; 6855 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 6856 6857 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n")); 6858 6859 arg = arg; 6860 credp = credp; 6861 6862 ASSERT(seg != NULL); 6863 6864 switch (cmd) { 6865 case RSM_IOCTL_BIND: 6866 error = rsm_bind(seg, msg, arg, mode); 6867 break; 6868 case RSM_IOCTL_REBIND: 6869 error = rsm_rebind(seg, msg); 6870 break; 6871 case RSM_IOCTL_UNBIND: 6872 error = ENOTSUP; 6873 break; 6874 case RSM_IOCTL_PUBLISH: 6875 error = rsm_publish(seg, msg, arg, mode); 6876 break; 6877 case RSM_IOCTL_REPUBLISH: 6878 error = rsm_republish(seg, msg, mode); 6879 break; 6880 case RSM_IOCTL_UNPUBLISH: 6881 error = rsm_unpublish(seg, 1); 6882 break; 6883 default: 6884 error = EINVAL; 6885 break; 6886 } 6887 6888 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n", 6889 error)); 6890 6891 return (error); 6892 } 6893 static int 6894 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6895 int mode, cred_t *credp) 6896 { 6897 int error; 6898 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6899 6900 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n")); 6901 6902 ASSERT(seg); 6903 6904 switch (cmd) { 6905 case RSM_IOCTL_CONNECT: 6906 error = rsm_connect(seg, msg, credp, arg, mode); 6907 break; 6908 default: 6909 error = EINVAL; 6910 break; 6911 } 6912 6913 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n", 6914 error)); 6915 return (error); 6916 } 6917 6918 static int 6919 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6920 int mode) 6921 { 6922 int e; 6923 adapter_t *adapter; 6924 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6925 6926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n")); 6927 6928 6929 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) { 6930 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6931 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n")); 6932 return (RSMERR_CONN_ABORTED); 6933 } else if (seg->s_node == my_nodeid) { 6934 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6935 "rsmbar_ioctl done: loopback\n")); 6936 return (RSM_SUCCESS); 6937 } 6938 6939 adapter = seg->s_adapter; 6940 6941 switch (cmd) { 6942 case RSM_IOCTL_BAR_CHECK: 6943 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6944 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va)); 6945 return (bar_va ? RSM_SUCCESS : EINVAL); 6946 case RSM_IOCTL_BAR_OPEN: 6947 e = adapter->rsmpi_ops-> 6948 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar); 6949 break; 6950 case RSM_IOCTL_BAR_ORDER: 6951 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar); 6952 break; 6953 case RSM_IOCTL_BAR_CLOSE: 6954 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar); 6955 break; 6956 default: 6957 e = EINVAL; 6958 break; 6959 } 6960 6961 if (e == RSM_SUCCESS) { 6962 #ifdef _MULTI_DATAMODEL 6963 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6964 rsm_ioctlmsg32_t msg32; 6965 int i; 6966 6967 for (i = 0; i < 4; i++) { 6968 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64; 6969 } 6970 6971 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6972 "rsmbar_ioctl done\n")); 6973 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 6974 sizeof (msg32), mode)) 6975 return (RSMERR_BAD_ADDR); 6976 else 6977 return (RSM_SUCCESS); 6978 } 6979 #endif 6980 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6981 "rsmbar_ioctl done\n")); 6982 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg, 6983 sizeof (*msg), mode)) 6984 return (RSMERR_BAD_ADDR); 6985 else 6986 return (RSM_SUCCESS); 6987 } 6988 6989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6990 "rsmbar_ioctl done: error=%d\n", e)); 6991 6992 return (e); 6993 } 6994 6995 /* 6996 * Ring the doorbell of the export segment to which this segment is 6997 * connected. 6998 */ 6999 static int 7000 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7001 { 7002 int e = 0; 7003 rsmipc_request_t request; 7004 7005 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7006 7007 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n")); 7008 7009 request.rsmipc_key = seg->s_segid; 7010 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7011 request.rsmipc_segment_cookie = NULL; 7012 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 7013 7014 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7015 "exportbell_ioctl done: %d\n", e)); 7016 7017 return (e); 7018 } 7019 7020 /* 7021 * Ring the doorbells of all segments importing this segment 7022 */ 7023 static int 7024 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7025 { 7026 importing_token_t *token = NULL; 7027 rsmipc_request_t request; 7028 int index; 7029 7030 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 7031 7032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n")); 7033 7034 ASSERT(seg->s_state != RSM_STATE_NEW && 7035 seg->s_state != RSM_STATE_NEW_QUIESCED); 7036 7037 request.rsmipc_key = seg->s_segid; 7038 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7039 7040 index = rsmhash(seg->s_segid); 7041 7042 token = importer_list.bucket[index]; 7043 7044 while (token != NULL) { 7045 if (seg->s_key == token->key) { 7046 request.rsmipc_segment_cookie = 7047 token->import_segment_cookie; 7048 (void) rsmipc_send(token->importing_node, 7049 &request, RSM_NO_REPLY); 7050 } 7051 token = token->next; 7052 } 7053 7054 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7055 "importbell_ioctl done\n")); 7056 return (RSM_SUCCESS); 7057 } 7058 7059 static int 7060 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp, 7061 rsm_poll_event_t **eventspp, int mode) 7062 { 7063 rsm_poll_event_t *evlist = NULL; 7064 size_t evlistsz; 7065 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7066 7067 #ifdef _MULTI_DATAMODEL 7068 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7069 int i; 7070 rsm_consume_event_msg32_t cemsg32 = {0}; 7071 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7072 rsm_poll_event32_t *evlist32; 7073 size_t evlistsz32; 7074 7075 /* copyin the ioctl message */ 7076 if (ddi_copyin(arg, (caddr_t)&cemsg32, 7077 sizeof (rsm_consume_event_msg32_t), mode)) { 7078 DBG_PRINTF((category, RSM_ERR, 7079 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7080 return (RSMERR_BAD_ADDR); 7081 } 7082 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist; 7083 msgp->numents = (int)cemsg32.numents; 7084 7085 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents; 7086 /* 7087 * If numents is large alloc events list on heap otherwise 7088 * use the address of array that was passed in. 7089 */ 7090 if (msgp->numents > RSM_MAX_POLLFDS) { 7091 if (msgp->numents > max_segs) { /* validate numents */ 7092 DBG_PRINTF((category, RSM_ERR, 7093 "consumeevent_copyin: " 7094 "RSMERR_BAD_ARGS_ERRORS\n")); 7095 return (RSMERR_BAD_ARGS_ERRORS); 7096 } 7097 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7098 } else { 7099 evlist32 = event32; 7100 } 7101 7102 /* copyin the seglist into the rsm_poll_event32_t array */ 7103 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32, 7104 evlistsz32, mode)) { 7105 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7106 kmem_free(evlist32, evlistsz32); 7107 } 7108 DBG_PRINTF((category, RSM_ERR, 7109 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7110 return (RSMERR_BAD_ADDR); 7111 } 7112 7113 /* evlist and evlistsz are based on rsm_poll_event_t type */ 7114 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents; 7115 7116 if (msgp->numents > RSM_MAX_POLLFDS) { 7117 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7118 *eventspp = evlist; 7119 } else { 7120 evlist = *eventspp; 7121 } 7122 /* 7123 * copy the rsm_poll_event32_t array to the rsm_poll_event_t 7124 * array 7125 */ 7126 for (i = 0; i < msgp->numents; i++) { 7127 evlist[i].rnum = evlist32[i].rnum; 7128 evlist[i].fdsidx = evlist32[i].fdsidx; 7129 evlist[i].revent = evlist32[i].revent; 7130 } 7131 /* free the temp 32-bit event list */ 7132 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7133 kmem_free(evlist32, evlistsz32); 7134 } 7135 7136 return (RSM_SUCCESS); 7137 } 7138 #endif 7139 /* copyin the ioctl message */ 7140 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t), 7141 mode)) { 7142 DBG_PRINTF((category, RSM_ERR, 7143 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7144 return (RSMERR_BAD_ADDR); 7145 } 7146 /* 7147 * If numents is large alloc events list on heap otherwise 7148 * use the address of array that was passed in. 7149 */ 7150 if (msgp->numents > RSM_MAX_POLLFDS) { 7151 if (msgp->numents > max_segs) { /* validate numents */ 7152 DBG_PRINTF((category, RSM_ERR, 7153 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n")); 7154 return (RSMERR_BAD_ARGS_ERRORS); 7155 } 7156 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7157 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7158 *eventspp = evlist; 7159 } 7160 7161 /* copyin the seglist */ 7162 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp), 7163 sizeof (rsm_poll_event_t)*msgp->numents, mode)) { 7164 if (evlist) { 7165 kmem_free(evlist, evlistsz); 7166 *eventspp = NULL; 7167 } 7168 DBG_PRINTF((category, RSM_ERR, 7169 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7170 return (RSMERR_BAD_ADDR); 7171 } 7172 7173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7174 "consumeevent_copyin done\n")); 7175 return (RSM_SUCCESS); 7176 } 7177 7178 static int 7179 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp, 7180 rsm_poll_event_t *eventsp, int mode) 7181 { 7182 size_t evlistsz; 7183 int err = RSM_SUCCESS; 7184 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7185 7186 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7187 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n", 7188 msgp->numents, eventsp)); 7189 7190 #ifdef _MULTI_DATAMODEL 7191 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7192 int i; 7193 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7194 rsm_poll_event32_t *evlist32; 7195 size_t evlistsz32; 7196 7197 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents; 7198 if (msgp->numents > RSM_MAX_POLLFDS) { 7199 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7200 } else { 7201 evlist32 = event32; 7202 } 7203 7204 /* 7205 * copy the rsm_poll_event_t array to the rsm_poll_event32_t 7206 * array 7207 */ 7208 for (i = 0; i < msgp->numents; i++) { 7209 evlist32[i].rnum = eventsp[i].rnum; 7210 evlist32[i].fdsidx = eventsp[i].fdsidx; 7211 evlist32[i].revent = eventsp[i].revent; 7212 } 7213 7214 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist, 7215 evlistsz32, mode)) { 7216 err = RSMERR_BAD_ADDR; 7217 } 7218 7219 if (msgp->numents > RSM_MAX_POLLFDS) { 7220 if (evlist32) { /* free the temp 32-bit event list */ 7221 kmem_free(evlist32, evlistsz32); 7222 } 7223 /* 7224 * eventsp and evlistsz are based on rsm_poll_event_t 7225 * type 7226 */ 7227 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7228 /* event list on the heap and needs to be freed here */ 7229 if (eventsp) { 7230 kmem_free(eventsp, evlistsz); 7231 } 7232 } 7233 7234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7235 "consumeevent_copyout done: err=%d\n", err)); 7236 return (err); 7237 } 7238 #endif 7239 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7240 7241 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz, 7242 mode)) { 7243 err = RSMERR_BAD_ADDR; 7244 } 7245 7246 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) { 7247 /* event list on the heap and needs to be freed here */ 7248 kmem_free(eventsp, evlistsz); 7249 } 7250 7251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7252 "consumeevent_copyout done: err=%d\n", err)); 7253 return (err); 7254 } 7255 7256 static int 7257 rsm_consumeevent_ioctl(caddr_t arg, int mode) 7258 { 7259 int rc; 7260 int i; 7261 minor_t rnum; 7262 rsm_consume_event_msg_t msg = {0}; 7263 rsmseg_t *seg; 7264 rsm_poll_event_t *event_list; 7265 rsm_poll_event_t events[RSM_MAX_POLLFDS]; 7266 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7267 7268 event_list = events; 7269 7270 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) != 7271 RSM_SUCCESS) { 7272 return (rc); 7273 } 7274 7275 for (i = 0; i < msg.numents; i++) { 7276 rnum = event_list[i].rnum; 7277 event_list[i].revent = 0; 7278 /* get the segment structure */ 7279 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 7280 if (seg) { 7281 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7282 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum, 7283 seg)); 7284 if (seg->s_pollevent) { 7285 /* consume the event */ 7286 atomic_add_32(&seg->s_pollevent, -1); 7287 event_list[i].revent = POLLRDNORM; 7288 } 7289 rsmseglock_release(seg); 7290 } 7291 } 7292 7293 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) != 7294 RSM_SUCCESS) { 7295 return (rc); 7296 } 7297 7298 return (RSM_SUCCESS); 7299 } 7300 7301 static int 7302 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode) 7303 { 7304 int size; 7305 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7306 7307 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n")); 7308 7309 #ifdef _MULTI_DATAMODEL 7310 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7311 rsmka_iovec32_t *iovec32, *iovec32_base; 7312 int i; 7313 7314 size = count * sizeof (rsmka_iovec32_t); 7315 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP); 7316 if (ddi_copyin((caddr_t)user_vec, 7317 (caddr_t)iovec32, size, mode)) { 7318 kmem_free(iovec32, size); 7319 DBG_PRINTF((category, RSM_DEBUG, 7320 "iovec_copyin: returning RSMERR_BAD_ADDR\n")); 7321 return (RSMERR_BAD_ADDR); 7322 } 7323 7324 for (i = 0; i < count; i++, iovec++, iovec32++) { 7325 iovec->io_type = (int)iovec32->io_type; 7326 if (iovec->io_type == RSM_HANDLE_TYPE) 7327 iovec->local.segid = (rsm_memseg_id_t) 7328 iovec32->local; 7329 else 7330 iovec->local.vaddr = 7331 (caddr_t)(uintptr_t)iovec32->local; 7332 iovec->local_offset = (size_t)iovec32->local_offset; 7333 iovec->remote_offset = (size_t)iovec32->remote_offset; 7334 iovec->transfer_len = (size_t)iovec32->transfer_len; 7335 7336 } 7337 kmem_free(iovec32_base, size); 7338 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7339 "iovec_copyin done\n")); 7340 return (DDI_SUCCESS); 7341 } 7342 #endif 7343 7344 size = count * sizeof (rsmka_iovec_t); 7345 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) { 7346 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7347 "iovec_copyin done: RSMERR_BAD_ADDR\n")); 7348 return (RSMERR_BAD_ADDR); 7349 } 7350 7351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n")); 7352 7353 return (DDI_SUCCESS); 7354 } 7355 7356 7357 static int 7358 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7359 { 7360 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7361 7362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n")); 7363 7364 #ifdef _MULTI_DATAMODEL 7365 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7366 rsmka_scat_gath32_t sg_io32; 7367 7368 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32), 7369 mode)) { 7370 DBG_PRINTF((category, RSM_DEBUG, 7371 "sgio_copyin done: returning EFAULT\n")); 7372 return (RSMERR_BAD_ADDR); 7373 } 7374 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid; 7375 sg_io->io_request_count = (size_t)sg_io32.io_request_count; 7376 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count; 7377 sg_io->flags = (size_t)sg_io32.flags; 7378 sg_io->remote_handle = (rsm_memseg_import_handle_t) 7379 (uintptr_t)sg_io32.remote_handle; 7380 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec; 7381 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7382 "sgio_copyin done\n")); 7383 return (DDI_SUCCESS); 7384 } 7385 #endif 7386 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t), 7387 mode)) { 7388 DBG_PRINTF((category, RSM_DEBUG, 7389 "sgio_copyin done: returning EFAULT\n")); 7390 return (RSMERR_BAD_ADDR); 7391 } 7392 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n")); 7393 return (DDI_SUCCESS); 7394 } 7395 7396 static int 7397 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7398 { 7399 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7400 7401 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7402 "sgio_resid_copyout enter\n")); 7403 7404 #ifdef _MULTI_DATAMODEL 7405 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7406 rsmka_scat_gath32_t sg_io32; 7407 7408 sg_io32.io_residual_count = sg_io->io_residual_count; 7409 sg_io32.flags = sg_io->flags; 7410 7411 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count, 7412 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count, 7413 sizeof (uint32_t), mode)) { 7414 7415 DBG_PRINTF((category, RSM_ERR, 7416 "sgio_resid_copyout error: rescnt\n")); 7417 return (RSMERR_BAD_ADDR); 7418 } 7419 7420 if (ddi_copyout((caddr_t)&sg_io32.flags, 7421 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags, 7422 sizeof (uint32_t), mode)) { 7423 7424 DBG_PRINTF((category, RSM_ERR, 7425 "sgio_resid_copyout error: flags\n")); 7426 return (RSMERR_BAD_ADDR); 7427 } 7428 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7429 "sgio_resid_copyout done\n")); 7430 return (DDI_SUCCESS); 7431 } 7432 #endif 7433 if (ddi_copyout((caddr_t)&sg_io->io_residual_count, 7434 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count, 7435 sizeof (ulong_t), mode)) { 7436 7437 DBG_PRINTF((category, RSM_ERR, 7438 "sgio_resid_copyout error:rescnt\n")); 7439 return (RSMERR_BAD_ADDR); 7440 } 7441 7442 if (ddi_copyout((caddr_t)&sg_io->flags, 7443 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags, 7444 sizeof (uint_t), mode)) { 7445 7446 DBG_PRINTF((category, RSM_ERR, 7447 "sgio_resid_copyout error:flags\n")); 7448 return (RSMERR_BAD_ADDR); 7449 } 7450 7451 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n")); 7452 return (DDI_SUCCESS); 7453 } 7454 7455 7456 static int 7457 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp) 7458 { 7459 rsmka_scat_gath_t sg_io; 7460 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN]; 7461 rsmka_iovec_t *ka_iovec; 7462 rsmka_iovec_t *ka_iovec_start; 7463 rsmpi_scat_gath_t rsmpi_sg_io; 7464 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN]; 7465 rsmpi_iovec_t *iovec; 7466 rsmpi_iovec_t *iovec_start = NULL; 7467 rsmapi_access_entry_t *acl; 7468 rsmresource_t *res; 7469 minor_t rnum; 7470 rsmseg_t *im_seg, *ex_seg; 7471 int e; 7472 int error = 0; 7473 uint_t i; 7474 uint_t iov_proc = 0; /* num of iovecs processed */ 7475 size_t size = 0; 7476 size_t ka_size; 7477 7478 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7479 7480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n")); 7481 7482 credp = credp; 7483 7484 /* 7485 * Copyin the scatter/gather structure and build new structure 7486 * for rsmpi. 7487 */ 7488 e = sgio_copyin(arg, &sg_io, mode); 7489 if (e != DDI_SUCCESS) { 7490 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7491 "rsm_iovec_ioctl done: sgio_copyin %d\n", e)); 7492 return (e); 7493 } 7494 7495 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) { 7496 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7497 "rsm_iovec_ioctl done: request_count(%d) too large\n", 7498 sg_io.io_request_count)); 7499 return (RSMERR_BAD_SGIO); 7500 } 7501 7502 rsmpi_sg_io.io_request_count = sg_io.io_request_count; 7503 rsmpi_sg_io.io_residual_count = sg_io.io_request_count; 7504 rsmpi_sg_io.io_segflg = 0; 7505 7506 /* Allocate memory and copyin io vector array */ 7507 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7508 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t); 7509 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP); 7510 } else { 7511 ka_iovec_start = ka_iovec = ka_iovec_arr; 7512 } 7513 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec, 7514 sg_io.io_request_count, mode); 7515 if (e != DDI_SUCCESS) { 7516 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7517 kmem_free(ka_iovec, ka_size); 7518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7519 "rsm_iovec_ioctl done: iovec_copyin %d\n", e)); 7520 return (e); 7521 } 7522 7523 /* get the import segment descriptor */ 7524 rnum = getminor(dev); 7525 res = rsmresource_lookup(rnum, RSM_LOCK); 7526 7527 /* 7528 * The following sequence of locking may (or MAY NOT) cause a 7529 * deadlock but this is currently not addressed here since the 7530 * implementation will be changed to incorporate the use of 7531 * reference counting for both the import and the export segments. 7532 */ 7533 7534 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */ 7535 7536 im_seg = (rsmseg_t *)res; 7537 7538 if (im_seg == NULL) { 7539 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7540 kmem_free(ka_iovec, ka_size); 7541 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7542 "rsm_iovec_ioctl done: rsmresource_lookup failed\n")); 7543 return (EINVAL); 7544 } 7545 /* putv/getv supported is supported only on import segments */ 7546 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) { 7547 rsmseglock_release(im_seg); 7548 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7549 kmem_free(ka_iovec, ka_size); 7550 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7551 "rsm_iovec_ioctl done: not an import segment\n")); 7552 return (EINVAL); 7553 } 7554 7555 /* 7556 * wait for a remote DR to complete ie. for segments to get UNQUIESCED 7557 * as well as wait for a local DR to complete. 7558 */ 7559 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) || 7560 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) || 7561 (im_seg->s_flags & RSM_DR_INPROGRESS)) { 7562 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) { 7563 DBG_PRINTF((category, RSM_DEBUG, 7564 "rsm_iovec_ioctl done: cv_wait INTR")); 7565 rsmseglock_release(im_seg); 7566 return (RSMERR_INTERRUPTED); 7567 } 7568 } 7569 7570 if ((im_seg->s_state != RSM_STATE_CONNECT) && 7571 (im_seg->s_state != RSM_STATE_ACTIVE)) { 7572 7573 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT || 7574 im_seg->s_state == RSM_STATE_NEW); 7575 7576 DBG_PRINTF((category, RSM_DEBUG, 7577 "rsm_iovec_ioctl done: im_seg not conn/map")); 7578 rsmseglock_release(im_seg); 7579 e = RSMERR_BAD_SGIO; 7580 goto out; 7581 } 7582 7583 im_seg->s_rdmacnt++; 7584 rsmseglock_release(im_seg); 7585 7586 /* 7587 * Allocate and set up the io vector for rsmpi 7588 */ 7589 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7590 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t); 7591 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP); 7592 } else { 7593 iovec_start = iovec = iovec_arr; 7594 } 7595 7596 rsmpi_sg_io.iovec = iovec; 7597 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) { 7598 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7599 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7600 7601 if (ex_seg == NULL) { 7602 e = RSMERR_BAD_SGIO; 7603 break; 7604 } 7605 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7606 7607 acl = ex_seg->s_acl; 7608 if (acl[0].ae_permission == 0) { 7609 struct buf *xbuf; 7610 dev_t sdev = 0; 7611 7612 xbuf = ddi_umem_iosetup(ex_seg->s_cookie, 7613 0, ex_seg->s_len, B_WRITE, 7614 sdev, 0, NULL, DDI_UMEM_SLEEP); 7615 7616 ASSERT(xbuf != NULL); 7617 7618 iovec->local_mem.ms_type = RSM_MEM_BUF; 7619 iovec->local_mem.ms_memory.bp = xbuf; 7620 } else { 7621 iovec->local_mem.ms_type = RSM_MEM_HANDLE; 7622 iovec->local_mem.ms_memory.handle = 7623 ex_seg->s_handle.out; 7624 } 7625 ex_seg->s_rdmacnt++; /* refcnt the handle */ 7626 rsmseglock_release(ex_seg); 7627 } else { 7628 iovec->local_mem.ms_type = RSM_MEM_VADDR; 7629 iovec->local_mem.ms_memory.vr.vaddr = 7630 ka_iovec->local.vaddr; 7631 } 7632 7633 iovec->local_offset = ka_iovec->local_offset; 7634 iovec->remote_handle = im_seg->s_handle.in; 7635 iovec->remote_offset = ka_iovec->remote_offset; 7636 iovec->transfer_length = ka_iovec->transfer_len; 7637 iovec++; 7638 ka_iovec++; 7639 } 7640 7641 if (iov_proc < sg_io.io_request_count) { 7642 /* error while processing handle */ 7643 rsmseglock_acquire(im_seg); 7644 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */ 7645 if (im_seg->s_rdmacnt == 0) { 7646 cv_broadcast(&im_seg->s_cv); 7647 } 7648 rsmseglock_release(im_seg); 7649 goto out; 7650 } 7651 7652 /* call rsmpi */ 7653 if (cmd == RSM_IOCTL_PUTV) 7654 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv( 7655 im_seg->s_adapter->rsmpi_handle, 7656 &rsmpi_sg_io); 7657 else if (cmd == RSM_IOCTL_GETV) 7658 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv( 7659 im_seg->s_adapter->rsmpi_handle, 7660 &rsmpi_sg_io); 7661 else { 7662 e = EINVAL; 7663 DBG_PRINTF((category, RSM_DEBUG, 7664 "iovec_ioctl: bad command = %x\n", cmd)); 7665 } 7666 7667 7668 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7669 "rsm_iovec_ioctl RSMPI oper done %d\n", e)); 7670 7671 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count; 7672 7673 /* 7674 * Check for implicit signal post flag and do the signal 7675 * post if needed 7676 */ 7677 if (sg_io.flags & RSM_IMPLICIT_SIGPOST && 7678 e == RSM_SUCCESS) { 7679 rsmipc_request_t request; 7680 7681 request.rsmipc_key = im_seg->s_segid; 7682 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7683 request.rsmipc_segment_cookie = NULL; 7684 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY); 7685 /* 7686 * Reset the implicit signal post flag to 0 to indicate 7687 * that the signal post has been done and need not be 7688 * done in the RSMAPI library 7689 */ 7690 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST; 7691 } 7692 7693 rsmseglock_acquire(im_seg); 7694 im_seg->s_rdmacnt--; 7695 if (im_seg->s_rdmacnt == 0) { 7696 cv_broadcast(&im_seg->s_cv); 7697 } 7698 rsmseglock_release(im_seg); 7699 error = sgio_resid_copyout(arg, &sg_io, mode); 7700 out: 7701 iovec = iovec_start; 7702 ka_iovec = ka_iovec_start; 7703 for (i = 0; i < iov_proc; i++) { 7704 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7705 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7706 7707 ASSERT(ex_seg != NULL); 7708 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7709 7710 ex_seg->s_rdmacnt--; /* unrefcnt the handle */ 7711 if (ex_seg->s_rdmacnt == 0) { 7712 cv_broadcast(&ex_seg->s_cv); 7713 } 7714 rsmseglock_release(ex_seg); 7715 } 7716 7717 ASSERT(iovec != NULL); /* true if iov_proc > 0 */ 7718 7719 /* 7720 * At present there is no dependency on the existence of xbufs 7721 * created by ddi_umem_iosetup for each of the iovecs. So we 7722 * can these xbufs here. 7723 */ 7724 if (iovec->local_mem.ms_type == RSM_MEM_BUF) { 7725 freerbuf(iovec->local_mem.ms_memory.bp); 7726 } 7727 7728 iovec++; 7729 ka_iovec++; 7730 } 7731 7732 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7733 if (iovec_start) 7734 kmem_free(iovec_start, size); 7735 kmem_free(ka_iovec_start, ka_size); 7736 } 7737 7738 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7739 "rsm_iovec_ioctl done %d\n", e)); 7740 /* if RSMPI call fails return that else return copyout's retval */ 7741 return ((e != RSM_SUCCESS) ? e : error); 7742 7743 } 7744 7745 7746 static int 7747 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode) 7748 { 7749 adapter_t *adapter; 7750 rsm_addr_t addr; 7751 rsm_node_id_t node; 7752 int rval = DDI_SUCCESS; 7753 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7754 7755 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n")); 7756 7757 adapter = rsm_getadapter(msg, mode); 7758 if (adapter == NULL) { 7759 DBG_PRINTF((category, RSM_DEBUG, 7760 "rsmaddr_ioctl done: adapter not found\n")); 7761 return (RSMERR_CTLR_NOT_PRESENT); 7762 } 7763 7764 switch (cmd) { 7765 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */ 7766 /* returns the hwaddr in msg->hwaddr */ 7767 if (msg->nodeid == my_nodeid) { 7768 msg->hwaddr = adapter->hwaddr; 7769 } else { 7770 addr = get_remote_hwaddr(adapter, msg->nodeid); 7771 if ((int64_t)addr < 0) { 7772 rval = RSMERR_INTERNAL_ERROR; 7773 } else { 7774 msg->hwaddr = addr; 7775 } 7776 } 7777 break; 7778 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */ 7779 /* returns the nodeid in msg->nodeid */ 7780 if (msg->hwaddr == adapter->hwaddr) { 7781 msg->nodeid = my_nodeid; 7782 } else { 7783 node = get_remote_nodeid(adapter, msg->hwaddr); 7784 if ((int)node < 0) { 7785 rval = RSMERR_INTERNAL_ERROR; 7786 } else { 7787 msg->nodeid = (rsm_node_id_t)node; 7788 } 7789 } 7790 break; 7791 default: 7792 rval = EINVAL; 7793 break; 7794 } 7795 7796 rsmka_release_adapter(adapter); 7797 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7798 "rsmaddr_ioctl done: %d\n", rval)); 7799 return (rval); 7800 } 7801 7802 static int 7803 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode) 7804 { 7805 DBG_DEFINE(category, 7806 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7807 7808 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n")); 7809 7810 #ifdef _MULTI_DATAMODEL 7811 7812 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7813 rsm_ioctlmsg32_t msg32; 7814 int i; 7815 7816 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) { 7817 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7818 "rsm_ddi_copyin done: EFAULT\n")); 7819 return (RSMERR_BAD_ADDR); 7820 } 7821 msg->len = msg32.len; 7822 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr; 7823 msg->arg = (caddr_t)(uintptr_t)msg32.arg; 7824 msg->key = msg32.key; 7825 msg->acl_len = msg32.acl_len; 7826 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl; 7827 msg->cnum = msg32.cnum; 7828 msg->cname = (caddr_t)(uintptr_t)msg32.cname; 7829 msg->cname_len = msg32.cname_len; 7830 msg->nodeid = msg32.nodeid; 7831 msg->hwaddr = msg32.hwaddr; 7832 msg->perm = msg32.perm; 7833 for (i = 0; i < 4; i++) { 7834 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64; 7835 } 7836 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7837 "rsm_ddi_copyin done\n")); 7838 return (RSM_SUCCESS); 7839 } 7840 #endif 7841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n")); 7842 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode)) 7843 return (RSMERR_BAD_ADDR); 7844 else 7845 return (RSM_SUCCESS); 7846 } 7847 7848 static int 7849 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode) 7850 { 7851 rsmka_int_controller_attr_t rsm_cattr; 7852 DBG_DEFINE(category, 7853 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7854 7855 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7856 "rsmattr_ddi_copyout enter\n")); 7857 /* 7858 * need to copy appropriate data from rsm_controller_attr_t 7859 * to rsmka_int_controller_attr_t 7860 */ 7861 #ifdef _MULTI_DATAMODEL 7862 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7863 rsmka_int_controller_attr32_t rsm_cattr32; 7864 7865 rsm_cattr32.attr_direct_access_sizes = 7866 adapter->rsm_attr.attr_direct_access_sizes; 7867 rsm_cattr32.attr_atomic_sizes = 7868 adapter->rsm_attr.attr_atomic_sizes; 7869 rsm_cattr32.attr_page_size = 7870 adapter->rsm_attr.attr_page_size; 7871 if (adapter->rsm_attr.attr_max_export_segment_size > 7872 UINT_MAX) 7873 rsm_cattr32.attr_max_export_segment_size = 7874 RSM_MAXSZ_PAGE_ALIGNED; 7875 else 7876 rsm_cattr32.attr_max_export_segment_size = 7877 adapter->rsm_attr.attr_max_export_segment_size; 7878 if (adapter->rsm_attr.attr_tot_export_segment_size > 7879 UINT_MAX) 7880 rsm_cattr32.attr_tot_export_segment_size = 7881 RSM_MAXSZ_PAGE_ALIGNED; 7882 else 7883 rsm_cattr32.attr_tot_export_segment_size = 7884 adapter->rsm_attr.attr_tot_export_segment_size; 7885 if (adapter->rsm_attr.attr_max_export_segments > 7886 UINT_MAX) 7887 rsm_cattr32.attr_max_export_segments = 7888 UINT_MAX; 7889 else 7890 rsm_cattr32.attr_max_export_segments = 7891 adapter->rsm_attr.attr_max_export_segments; 7892 if (adapter->rsm_attr.attr_max_import_map_size > 7893 UINT_MAX) 7894 rsm_cattr32.attr_max_import_map_size = 7895 RSM_MAXSZ_PAGE_ALIGNED; 7896 else 7897 rsm_cattr32.attr_max_import_map_size = 7898 adapter->rsm_attr.attr_max_import_map_size; 7899 if (adapter->rsm_attr.attr_tot_import_map_size > 7900 UINT_MAX) 7901 rsm_cattr32.attr_tot_import_map_size = 7902 RSM_MAXSZ_PAGE_ALIGNED; 7903 else 7904 rsm_cattr32.attr_tot_import_map_size = 7905 adapter->rsm_attr.attr_tot_import_map_size; 7906 if (adapter->rsm_attr.attr_max_import_segments > 7907 UINT_MAX) 7908 rsm_cattr32.attr_max_import_segments = 7909 UINT_MAX; 7910 else 7911 rsm_cattr32.attr_max_import_segments = 7912 adapter->rsm_attr.attr_max_import_segments; 7913 rsm_cattr32.attr_controller_addr = 7914 adapter->rsm_attr.attr_controller_addr; 7915 7916 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7917 "rsmattr_ddi_copyout done\n")); 7918 if (ddi_copyout((caddr_t)&rsm_cattr32, arg, 7919 sizeof (rsmka_int_controller_attr32_t), mode)) { 7920 return (RSMERR_BAD_ADDR); 7921 } 7922 else 7923 return (RSM_SUCCESS); 7924 } 7925 #endif 7926 rsm_cattr.attr_direct_access_sizes = 7927 adapter->rsm_attr.attr_direct_access_sizes; 7928 rsm_cattr.attr_atomic_sizes = 7929 adapter->rsm_attr.attr_atomic_sizes; 7930 rsm_cattr.attr_page_size = 7931 adapter->rsm_attr.attr_page_size; 7932 rsm_cattr.attr_max_export_segment_size = 7933 adapter->rsm_attr.attr_max_export_segment_size; 7934 rsm_cattr.attr_tot_export_segment_size = 7935 adapter->rsm_attr.attr_tot_export_segment_size; 7936 rsm_cattr.attr_max_export_segments = 7937 adapter->rsm_attr.attr_max_export_segments; 7938 rsm_cattr.attr_max_import_map_size = 7939 adapter->rsm_attr.attr_max_import_map_size; 7940 rsm_cattr.attr_tot_import_map_size = 7941 adapter->rsm_attr.attr_tot_import_map_size; 7942 rsm_cattr.attr_max_import_segments = 7943 adapter->rsm_attr.attr_max_import_segments; 7944 rsm_cattr.attr_controller_addr = 7945 adapter->rsm_attr.attr_controller_addr; 7946 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7947 "rsmattr_ddi_copyout done\n")); 7948 if (ddi_copyout((caddr_t)&rsm_cattr, arg, 7949 sizeof (rsmka_int_controller_attr_t), mode)) { 7950 return (RSMERR_BAD_ADDR); 7951 } 7952 else 7953 return (RSM_SUCCESS); 7954 } 7955 7956 /*ARGSUSED*/ 7957 static int 7958 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7959 int *rvalp) 7960 { 7961 rsmseg_t *seg; 7962 rsmresource_t *res; 7963 minor_t rnum; 7964 rsm_ioctlmsg_t msg = {0}; 7965 int error; 7966 adapter_t *adapter; 7967 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7968 7969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n")); 7970 7971 if (cmd == RSM_IOCTL_CONSUMEEVENT) { 7972 error = rsm_consumeevent_ioctl((caddr_t)arg, mode); 7973 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7974 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error)); 7975 return (error); 7976 } 7977 7978 /* topology cmd does not use the arg common to other cmds */ 7979 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) { 7980 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode); 7981 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7982 "rsm_ioctl done: %d\n", error)); 7983 return (error); 7984 } 7985 7986 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) { 7987 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp); 7988 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7989 "rsm_ioctl done: %d\n", error)); 7990 return (error); 7991 } 7992 7993 /* 7994 * try to load arguments 7995 */ 7996 if (cmd != RSM_IOCTL_RING_BELL && 7997 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) { 7998 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7999 "rsm_ioctl done: EFAULT\n")); 8000 return (RSMERR_BAD_ADDR); 8001 } 8002 8003 if (cmd == RSM_IOCTL_ATTR) { 8004 adapter = rsm_getadapter(&msg, mode); 8005 if (adapter == NULL) { 8006 DBG_PRINTF((category, RSM_DEBUG, 8007 "rsm_ioctl done: ENODEV\n")); 8008 return (RSMERR_CTLR_NOT_PRESENT); 8009 } 8010 error = rsmattr_ddi_copyout(adapter, msg.arg, mode); 8011 rsmka_release_adapter(adapter); 8012 DBG_PRINTF((category, RSM_DEBUG, 8013 "rsm_ioctl:after copyout %d\n", error)); 8014 return (error); 8015 } 8016 8017 if (cmd == RSM_IOCTL_BAR_INFO) { 8018 /* Return library off,len of barrier page */ 8019 msg.off = barrier_offset; 8020 msg.len = (int)barrier_size; 8021 #ifdef _MULTI_DATAMODEL 8022 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8023 rsm_ioctlmsg32_t msg32; 8024 8025 if (msg.len > UINT_MAX) 8026 msg.len = RSM_MAXSZ_PAGE_ALIGNED; 8027 else 8028 msg32.len = (int32_t)msg.len; 8029 msg32.off = (int32_t)msg.off; 8030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8031 "rsm_ioctl done\n")); 8032 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8033 sizeof (msg32), mode)) 8034 return (RSMERR_BAD_ADDR); 8035 else 8036 return (RSM_SUCCESS); 8037 } 8038 #endif 8039 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8040 "rsm_ioctl done\n")); 8041 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8042 sizeof (msg), mode)) 8043 return (RSMERR_BAD_ADDR); 8044 else 8045 return (RSM_SUCCESS); 8046 } 8047 8048 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) { 8049 /* map the nodeid or hwaddr */ 8050 error = rsmaddr_ioctl(cmd, &msg, mode); 8051 if (error == RSM_SUCCESS) { 8052 #ifdef _MULTI_DATAMODEL 8053 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8054 rsm_ioctlmsg32_t msg32; 8055 8056 msg32.hwaddr = (uint64_t)msg.hwaddr; 8057 msg32.nodeid = (uint32_t)msg.nodeid; 8058 8059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8060 "rsm_ioctl done\n")); 8061 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8062 sizeof (msg32), mode)) 8063 return (RSMERR_BAD_ADDR); 8064 else 8065 return (RSM_SUCCESS); 8066 } 8067 #endif 8068 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8069 "rsm_ioctl done\n")); 8070 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8071 sizeof (msg), mode)) 8072 return (RSMERR_BAD_ADDR); 8073 else 8074 return (RSM_SUCCESS); 8075 } 8076 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8077 "rsm_ioctl done: %d\n", error)); 8078 return (error); 8079 } 8080 8081 /* Find resource and look it in read mode */ 8082 rnum = getminor(dev); 8083 res = rsmresource_lookup(rnum, RSM_NOLOCK); 8084 ASSERT(res != NULL); 8085 8086 /* 8087 * Find command group 8088 */ 8089 switch (RSM_IOCTL_CMDGRP(cmd)) { 8090 case RSM_IOCTL_EXPORT_SEG: 8091 /* 8092 * Export list is searched during publish, loopback and 8093 * remote lookup call. 8094 */ 8095 seg = rsmresource_seg(res, rnum, credp, 8096 RSM_RESOURCE_EXPORT_SEGMENT); 8097 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) { 8098 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode, 8099 credp); 8100 } else { /* export ioctl on an import/barrier resource */ 8101 error = RSMERR_BAD_SEG_HNDL; 8102 } 8103 break; 8104 case RSM_IOCTL_IMPORT_SEG: 8105 /* Import list is searched during remote unmap call. */ 8106 seg = rsmresource_seg(res, rnum, credp, 8107 RSM_RESOURCE_IMPORT_SEGMENT); 8108 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8109 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode, 8110 credp); 8111 } else { /* import ioctl on an export/barrier resource */ 8112 error = RSMERR_BAD_SEG_HNDL; 8113 } 8114 break; 8115 case RSM_IOCTL_BAR: 8116 if (res != RSMRC_RESERVED && 8117 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8118 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg, 8119 mode); 8120 } else { /* invalid res value */ 8121 error = RSMERR_BAD_SEG_HNDL; 8122 } 8123 break; 8124 case RSM_IOCTL_BELL: 8125 if (res != RSMRC_RESERVED) { 8126 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) 8127 error = exportbell_ioctl((rsmseg_t *)res, cmd); 8128 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT) 8129 error = importbell_ioctl((rsmseg_t *)res, cmd); 8130 else /* RSM_RESOURCE_BAR */ 8131 error = RSMERR_BAD_SEG_HNDL; 8132 } else { /* invalid res value */ 8133 error = RSMERR_BAD_SEG_HNDL; 8134 } 8135 break; 8136 default: 8137 error = EINVAL; 8138 } 8139 8140 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n", 8141 error)); 8142 return (error); 8143 } 8144 8145 8146 /* **************************** Segment Mapping Operations ********* */ 8147 static rsm_mapinfo_t * 8148 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset, 8149 size_t *map_len) 8150 { 8151 rsm_mapinfo_t *p; 8152 /* 8153 * Find the correct mapinfo structure to use during the mapping 8154 * from the seg->s_mapinfo list. 8155 * The seg->s_mapinfo list contains in reverse order the mappings 8156 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to 8157 * access the correct entry within this list for the mapping 8158 * requested. 8159 * 8160 * The algorithm for selecting a list entry is as follows: 8161 * 8162 * When start_offset of an entry <= off we have found the entry 8163 * we were looking for. Adjust the dev_offset and map_len (needs 8164 * to be PAGESIZE aligned). 8165 */ 8166 p = seg->s_mapinfo; 8167 for (; p; p = p->next) { 8168 if (p->start_offset <= off) { 8169 *dev_offset = p->dev_offset + off - p->start_offset; 8170 *map_len = (len > p->individual_len) ? 8171 p->individual_len : ptob(btopr(len)); 8172 return (p); 8173 } 8174 p = p->next; 8175 } 8176 8177 return (NULL); 8178 } 8179 8180 static void 8181 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo) 8182 { 8183 rsm_mapinfo_t *p; 8184 8185 while (mapinfo != NULL) { 8186 p = mapinfo; 8187 mapinfo = mapinfo->next; 8188 kmem_free(p, sizeof (*p)); 8189 } 8190 } 8191 8192 static int 8193 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 8194 size_t len, void **pvtp) 8195 { 8196 rsmcookie_t *p; 8197 rsmresource_t *res; 8198 rsmseg_t *seg; 8199 minor_t rnum; 8200 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8201 8202 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n")); 8203 8204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8205 "rsmmap_map: dhp = %x\n", dhp)); 8206 8207 flags = flags; 8208 8209 rnum = getminor(dev); 8210 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8211 ASSERT(res != NULL); 8212 8213 seg = (rsmseg_t *)res; 8214 8215 rsmseglock_acquire(seg); 8216 8217 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8218 8219 /* 8220 * Allocate structure and add cookie to segment list 8221 */ 8222 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8223 8224 p->c_dhp = dhp; 8225 p->c_off = off; 8226 p->c_len = len; 8227 p->c_next = seg->s_ckl; 8228 seg->s_ckl = p; 8229 8230 *pvtp = (void *)seg; 8231 8232 rsmseglock_release(seg); 8233 8234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n")); 8235 return (DDI_SUCCESS); 8236 } 8237 8238 /* 8239 * Page fault handling is done here. The prerequisite mapping setup 8240 * has been done in rsm_devmap with calls to ddi_devmem_setup or 8241 * ddi_umem_setup 8242 */ 8243 static int 8244 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len, 8245 uint_t type, uint_t rw) 8246 { 8247 int e; 8248 rsmseg_t *seg = (rsmseg_t *)pvt; 8249 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8250 8251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n")); 8252 8253 rsmseglock_acquire(seg); 8254 8255 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8256 8257 while (seg->s_state == RSM_STATE_MAP_QUIESCE) { 8258 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8259 DBG_PRINTF((category, RSM_DEBUG, 8260 "rsmmap_access done: cv_wait INTR")); 8261 rsmseglock_release(seg); 8262 return (RSMERR_INTERRUPTED); 8263 } 8264 } 8265 8266 ASSERT(seg->s_state == RSM_STATE_DISCONNECT || 8267 seg->s_state == RSM_STATE_ACTIVE); 8268 8269 if (seg->s_state == RSM_STATE_DISCONNECT) 8270 seg->s_flags |= RSM_IMPORT_DUMMY; 8271 8272 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8273 "rsmmap_access: dhp = %x\n", dhp)); 8274 8275 rsmseglock_release(seg); 8276 8277 if (e = devmap_load(dhp, offset, len, type, rw)) { 8278 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n")); 8279 } 8280 8281 8282 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n")); 8283 8284 return (e); 8285 } 8286 8287 static int 8288 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 8289 void **newpvt) 8290 { 8291 rsmseg_t *seg = (rsmseg_t *)oldpvt; 8292 rsmcookie_t *p, *old; 8293 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8294 8295 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n")); 8296 8297 /* 8298 * Same as map, create an entry to hold cookie and add it to 8299 * connect segment list. The oldpvt is a pointer to segment. 8300 * Return segment pointer in newpvt. 8301 */ 8302 rsmseglock_acquire(seg); 8303 8304 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8305 8306 /* 8307 * Find old cookie 8308 */ 8309 for (old = seg->s_ckl; old != NULL; old = old->c_next) { 8310 if (old->c_dhp == dhp) { 8311 break; 8312 } 8313 } 8314 if (old == NULL) { 8315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8316 "rsmmap_dup done: EINVAL\n")); 8317 rsmseglock_release(seg); 8318 return (EINVAL); 8319 } 8320 8321 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8322 8323 p->c_dhp = new_dhp; 8324 p->c_off = old->c_off; 8325 p->c_len = old->c_len; 8326 p->c_next = seg->s_ckl; 8327 seg->s_ckl = p; 8328 8329 *newpvt = (void *)seg; 8330 8331 rsmseglock_release(seg); 8332 8333 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n")); 8334 8335 return (DDI_SUCCESS); 8336 } 8337 8338 static void 8339 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 8340 devmap_cookie_t new_dhp1, void **pvtp1, 8341 devmap_cookie_t new_dhp2, void **pvtp2) 8342 { 8343 /* 8344 * Remove pvtp structure from segment list. 8345 */ 8346 rsmseg_t *seg = (rsmseg_t *)pvtp; 8347 int freeflag; 8348 8349 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8350 8351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n")); 8352 8353 off = off; len = len; 8354 pvtp1 = pvtp1; pvtp2 = pvtp2; 8355 8356 rsmseglock_acquire(seg); 8357 8358 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8359 8360 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8361 "rsmmap_unmap: dhp = %x\n", dhp)); 8362 /* 8363 * We can go ahead and remove the dhps even if we are in 8364 * the MAPPING state because the dhps being removed here 8365 * belong to a different mmap and we are holding the segment 8366 * lock. 8367 */ 8368 if (new_dhp1 == NULL && new_dhp2 == NULL) { 8369 /* find and remove dhp handle */ 8370 rsmcookie_t *tmp, **back = &seg->s_ckl; 8371 8372 while (*back != NULL) { 8373 tmp = *back; 8374 if (tmp->c_dhp == dhp) { 8375 *back = tmp->c_next; 8376 kmem_free(tmp, sizeof (*tmp)); 8377 break; 8378 } 8379 back = &tmp->c_next; 8380 } 8381 } else { 8382 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8383 "rsmmap_unmap:parital unmap" 8384 "new_dhp1 %lx, new_dhp2 %lx\n", 8385 (size_t)new_dhp1, (size_t)new_dhp2)); 8386 } 8387 8388 /* 8389 * rsmmap_unmap is called for each mapping cookie on the list. 8390 * When the list becomes empty and we are not in the MAPPING 8391 * state then unmap in the rsmpi driver. 8392 */ 8393 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING)) 8394 (void) rsm_unmap(seg); 8395 8396 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) { 8397 freeflag = 1; 8398 } else { 8399 freeflag = 0; 8400 } 8401 8402 rsmseglock_release(seg); 8403 8404 if (freeflag) { 8405 /* Free the segment structure */ 8406 rsmseg_free(seg); 8407 } 8408 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n")); 8409 8410 } 8411 8412 static struct devmap_callback_ctl rsmmap_ops = { 8413 DEVMAP_OPS_REV, /* devmap_ops version number */ 8414 rsmmap_map, /* devmap_ops map routine */ 8415 rsmmap_access, /* devmap_ops access routine */ 8416 rsmmap_dup, /* devmap_ops dup routine */ 8417 rsmmap_unmap, /* devmap_ops unmap routine */ 8418 }; 8419 8420 static int 8421 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len, 8422 size_t *maplen, uint_t model /*ARGSUSED*/) 8423 { 8424 struct devmap_callback_ctl *callbackops = &rsmmap_ops; 8425 int err; 8426 uint_t maxprot; 8427 minor_t rnum; 8428 rsmseg_t *seg; 8429 off_t dev_offset; 8430 size_t cur_len; 8431 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8432 8433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n")); 8434 8435 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8436 "rsm_devmap: off = %lx, len = %lx\n", off, len)); 8437 rnum = getminor(dev); 8438 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8439 ASSERT(seg != NULL); 8440 8441 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8442 if ((off == barrier_offset) && 8443 (len == barrier_size)) { 8444 8445 ASSERT(bar_va != NULL && bar_cookie != NULL); 8446 8447 /* 8448 * The offset argument in devmap_umem_setup represents 8449 * the offset within the kernel memory defined by the 8450 * cookie. We use this offset as barrier_offset. 8451 */ 8452 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie, 8453 barrier_offset, len, PROT_USER|PROT_READ, 8454 DEVMAP_DEFAULTS, 0); 8455 8456 if (err != 0) { 8457 DBG_PRINTF((category, RSM_ERR, 8458 "rsm_devmap done: %d\n", err)); 8459 return (RSMERR_MAP_FAILED); 8460 } 8461 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8462 "rsm_devmap done: %d\n", err)); 8463 8464 *maplen = barrier_size; 8465 8466 return (err); 8467 } else { 8468 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8469 "rsm_devmap done: %d\n", err)); 8470 return (RSMERR_MAP_FAILED); 8471 } 8472 } 8473 8474 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8475 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8476 8477 /* 8478 * Make sure we still have permission for the map operation. 8479 */ 8480 maxprot = PROT_USER; 8481 if (seg->s_mode & RSM_PERM_READ) { 8482 maxprot |= PROT_READ; 8483 } 8484 8485 if (seg->s_mode & RSM_PERM_WRITE) { 8486 maxprot |= PROT_WRITE; 8487 } 8488 8489 /* 8490 * For each devmap call, rsmmap_map is called. This maintains driver 8491 * private information for the mapping. Thus, if there are multiple 8492 * devmap calls there will be multiple rsmmap_map calls and for each 8493 * call, the mapping information will be stored. 8494 * In case of an error during the processing of the devmap call, error 8495 * will be returned. This error return causes the caller of rsm_devmap 8496 * to undo all the mappings by calling rsmmap_unmap for each one. 8497 * rsmmap_unmap will free up the private information for the requested 8498 * mapping. 8499 */ 8500 if (seg->s_node != my_nodeid) { 8501 rsm_mapinfo_t *p; 8502 8503 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len); 8504 if (p == NULL) { 8505 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8506 "rsm_devmap: incorrect mapping info\n")); 8507 return (RSMERR_MAP_FAILED); 8508 } 8509 err = devmap_devmem_setup(dhc, p->dip, 8510 callbackops, p->dev_register, 8511 dev_offset, cur_len, maxprot, 8512 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0); 8513 8514 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8515 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx," 8516 "off=%lx,len=%lx\n", 8517 p->dip, p->dev_register, dev_offset, off, cur_len)); 8518 8519 if (err != 0) { 8520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8521 "rsm_devmap: devmap_devmem_setup failed %d\n", 8522 err)); 8523 return (RSMERR_MAP_FAILED); 8524 } 8525 /* cur_len is always an integral multiple pagesize */ 8526 ASSERT((cur_len & (PAGESIZE-1)) == 0); 8527 *maplen = cur_len; 8528 return (err); 8529 8530 } else { 8531 err = devmap_umem_setup(dhc, rsm_dip, callbackops, 8532 seg->s_cookie, off, len, maxprot, 8533 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0); 8534 if (err != 0) { 8535 DBG_PRINTF((category, RSM_DEBUG, 8536 "rsm_devmap: devmap_umem_setup failed %d\n", 8537 err)); 8538 return (RSMERR_MAP_FAILED); 8539 } 8540 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8541 "rsm_devmap: loopback done\n")); 8542 8543 *maplen = ptob(btopr(len)); 8544 8545 return (err); 8546 } 8547 } 8548 8549 /* 8550 * We can use the devmap framework for mapping device memory to user space by 8551 * specifying this routine in the rsm_cb_ops structure. The kernel mmap 8552 * processing calls this entry point and devmap_setup is called within this 8553 * function, which eventually calls rsm_devmap 8554 */ 8555 static int 8556 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 8557 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 8558 { 8559 int error = 0; 8560 int old_state; 8561 minor_t rnum; 8562 rsmseg_t *seg, *eseg; 8563 adapter_t *adapter; 8564 rsm_import_share_t *sharedp; 8565 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8566 8567 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n")); 8568 8569 /* 8570 * find segment 8571 */ 8572 rnum = getminor(dev); 8573 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 8574 8575 if (seg == NULL) { 8576 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8577 "rsm_segmap done: invalid segment\n")); 8578 return (EINVAL); 8579 } 8580 8581 /* 8582 * the user is trying to map a resource that has not been 8583 * defined yet. The library uses this to map in the 8584 * barrier page. 8585 */ 8586 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8587 rsmseglock_release(seg); 8588 8589 /* 8590 * The mapping for the barrier page is identified 8591 * by the special offset barrier_offset 8592 */ 8593 8594 if (off == (off_t)barrier_offset || 8595 len == (off_t)barrier_size) { 8596 if (bar_cookie == NULL || bar_va == NULL) { 8597 DBG_PRINTF((category, RSM_DEBUG, 8598 "rsm_segmap: bar cookie/va is NULL\n")); 8599 return (EINVAL); 8600 } 8601 8602 error = devmap_setup(dev, (offset_t)off, as, addrp, 8603 (size_t)len, prot, maxprot, flags, cred); 8604 8605 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8606 "rsm_segmap done: %d\n", error)); 8607 return (error); 8608 } else { 8609 DBG_PRINTF((category, RSM_DEBUG, 8610 "rsm_segmap: bad offset/length\n")); 8611 return (EINVAL); 8612 } 8613 } 8614 8615 /* Make sure you can only map imported segments */ 8616 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) { 8617 rsmseglock_release(seg); 8618 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8619 "rsm_segmap done: not an import segment\n")); 8620 return (EINVAL); 8621 } 8622 /* check means library is broken */ 8623 ASSERT(seg->s_hdr.rsmrc_num == rnum); 8624 8625 /* wait for the segment to become unquiesced */ 8626 while (seg->s_state == RSM_STATE_CONN_QUIESCE) { 8627 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8628 rsmseglock_release(seg); 8629 DBG_PRINTF((category, RSM_DEBUG, 8630 "rsm_segmap done: cv_wait INTR")); 8631 return (ENODEV); 8632 } 8633 } 8634 8635 /* wait until segment leaves the mapping state */ 8636 while (seg->s_state == RSM_STATE_MAPPING) 8637 cv_wait(&seg->s_cv, &seg->s_lock); 8638 8639 /* 8640 * we allow multiple maps of the same segment in the KA 8641 * and it works because we do an rsmpi map of the whole 8642 * segment during the first map and all the device mapping 8643 * information needed in rsm_devmap is in the mapinfo list. 8644 */ 8645 if ((seg->s_state != RSM_STATE_CONNECT) && 8646 (seg->s_state != RSM_STATE_ACTIVE)) { 8647 rsmseglock_release(seg); 8648 DBG_PRINTF((category, RSM_DEBUG, 8649 "rsm_segmap done: segment not connected\n")); 8650 return (ENODEV); 8651 } 8652 8653 /* 8654 * Make sure we are not mapping a larger segment than what's 8655 * exported 8656 */ 8657 if ((size_t)off + ptob(btopr(len)) > seg->s_len) { 8658 rsmseglock_release(seg); 8659 DBG_PRINTF((category, RSM_DEBUG, 8660 "rsm_segmap done: off+len>seg size\n")); 8661 return (ENXIO); 8662 } 8663 8664 /* 8665 * Make sure we still have permission for the map operation. 8666 */ 8667 maxprot = PROT_USER; 8668 if (seg->s_mode & RSM_PERM_READ) { 8669 maxprot |= PROT_READ; 8670 } 8671 8672 if (seg->s_mode & RSM_PERM_WRITE) { 8673 maxprot |= PROT_WRITE; 8674 } 8675 8676 if ((prot & maxprot) != prot) { 8677 /* No permission */ 8678 rsmseglock_release(seg); 8679 DBG_PRINTF((category, RSM_DEBUG, 8680 "rsm_segmap done: no permission\n")); 8681 return (EACCES); 8682 } 8683 8684 old_state = seg->s_state; 8685 8686 ASSERT(seg->s_share != NULL); 8687 8688 rsmsharelock_acquire(seg); 8689 8690 sharedp = seg->s_share; 8691 8692 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8693 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 8694 8695 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) && 8696 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) { 8697 rsmsharelock_release(seg); 8698 rsmseglock_release(seg); 8699 DBG_PRINTF((category, RSM_DEBUG, 8700 "rsm_segmap done:RSMSI_STATE %d invalid\n", 8701 sharedp->rsmsi_state)); 8702 return (ENODEV); 8703 } 8704 8705 /* 8706 * Do the map - since we want importers to share mappings 8707 * we do the rsmpi map for the whole segment 8708 */ 8709 if (seg->s_node != my_nodeid) { 8710 uint_t dev_register; 8711 off_t dev_offset; 8712 dev_info_t *dip; 8713 size_t tmp_len; 8714 size_t total_length_mapped = 0; 8715 size_t length_to_map = seg->s_len; 8716 off_t tmp_off = 0; 8717 rsm_mapinfo_t *p; 8718 8719 /* 8720 * length_to_map = seg->s_len is always an integral 8721 * multiple of PAGESIZE. Length mapped in each entry in mapinfo 8722 * list is a multiple of PAGESIZE - RSMPI map ensures this 8723 */ 8724 8725 adapter = seg->s_adapter; 8726 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8727 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8728 8729 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) { 8730 error = 0; 8731 /* map the whole segment */ 8732 while (total_length_mapped < seg->s_len) { 8733 tmp_len = 0; 8734 8735 error = adapter->rsmpi_ops->rsm_map( 8736 seg->s_handle.in, tmp_off, 8737 length_to_map, &tmp_len, 8738 &dip, &dev_register, &dev_offset, 8739 NULL, NULL); 8740 8741 if (error != 0) 8742 break; 8743 8744 /* 8745 * Store the mapping info obtained from rsm_map 8746 */ 8747 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8748 p->dev_register = dev_register; 8749 p->dev_offset = dev_offset; 8750 p->dip = dip; 8751 p->individual_len = tmp_len; 8752 p->start_offset = tmp_off; 8753 p->next = sharedp->rsmsi_mapinfo; 8754 sharedp->rsmsi_mapinfo = p; 8755 8756 total_length_mapped += tmp_len; 8757 length_to_map -= tmp_len; 8758 tmp_off += tmp_len; 8759 } 8760 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8761 8762 if (error != RSM_SUCCESS) { 8763 /* Check if this is the the first rsm_map */ 8764 if (sharedp->rsmsi_mapinfo != NULL) { 8765 /* 8766 * A single rsm_unmap undoes 8767 * multiple rsm_maps. 8768 */ 8769 (void) seg->s_adapter->rsmpi_ops-> 8770 rsm_unmap(sharedp->rsmsi_handle); 8771 rsm_free_mapinfo(sharedp-> 8772 rsmsi_mapinfo); 8773 } 8774 sharedp->rsmsi_mapinfo = NULL; 8775 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8776 rsmsharelock_release(seg); 8777 rsmseglock_release(seg); 8778 DBG_PRINTF((category, RSM_DEBUG, 8779 "rsm_segmap done: rsmpi map err %d\n", 8780 error)); 8781 ASSERT(error != RSMERR_BAD_LENGTH && 8782 error != RSMERR_BAD_MEM_ALIGNMENT && 8783 error != RSMERR_BAD_SEG_HNDL); 8784 if (error == RSMERR_UNSUPPORTED_OPERATION) 8785 return (ENOTSUP); 8786 else if (error == RSMERR_INSUFFICIENT_RESOURCES) 8787 return (EAGAIN); 8788 else if (error == RSMERR_CONN_ABORTED) 8789 return (ENODEV); 8790 else 8791 return (error); 8792 } else { 8793 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8794 } 8795 } else { 8796 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8797 } 8798 8799 sharedp->rsmsi_mapcnt++; 8800 8801 rsmsharelock_release(seg); 8802 8803 /* move to an intermediate mapping state */ 8804 seg->s_state = RSM_STATE_MAPPING; 8805 rsmseglock_release(seg); 8806 8807 error = devmap_setup(dev, (offset_t)off, as, addrp, 8808 len, prot, maxprot, flags, cred); 8809 8810 rsmseglock_acquire(seg); 8811 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8812 8813 if (error == DDI_SUCCESS) { 8814 seg->s_state = RSM_STATE_ACTIVE; 8815 } else { 8816 rsmsharelock_acquire(seg); 8817 8818 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8819 8820 sharedp->rsmsi_mapcnt--; 8821 if (sharedp->rsmsi_mapcnt == 0) { 8822 /* unmap the shared RSMPI mapping */ 8823 ASSERT(sharedp->rsmsi_handle != NULL); 8824 (void) adapter->rsmpi_ops-> 8825 rsm_unmap(sharedp->rsmsi_handle); 8826 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 8827 sharedp->rsmsi_mapinfo = NULL; 8828 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8829 } 8830 8831 rsmsharelock_release(seg); 8832 seg->s_state = old_state; 8833 DBG_PRINTF((category, RSM_ERR, 8834 "rsm: devmap_setup failed %d\n", error)); 8835 } 8836 cv_broadcast(&seg->s_cv); 8837 rsmseglock_release(seg); 8838 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n", 8839 error)); 8840 return (error); 8841 } else { 8842 /* 8843 * For loopback, the export segment mapping cookie (s_cookie) 8844 * is also used as the s_cookie value for its import segments 8845 * during mapping. 8846 * Note that reference counting for s_cookie of the export 8847 * segment is not required due to the following: 8848 * We never have a case of the export segment being destroyed, 8849 * leaving the import segments with a stale value for the 8850 * s_cookie field, since a force disconnect is done prior to a 8851 * destroy of an export segment. The force disconnect causes 8852 * the s_cookie value to be reset to NULL. Also for the 8853 * rsm_rebind operation, we change the s_cookie value of the 8854 * export segment as well as of all its local (loopback) 8855 * importers. 8856 */ 8857 DBG_ADDCATEGORY(category, RSM_LOOPBACK); 8858 8859 rsmsharelock_release(seg); 8860 /* 8861 * In order to maintain the lock ordering between the export 8862 * and import segment locks, we need to acquire the export 8863 * segment lock first and only then acquire the import 8864 * segment lock. 8865 * The above is necessary to avoid any deadlock scenarios 8866 * with rsm_rebind which also acquires both the export 8867 * and import segment locks in the above mentioned order. 8868 * Based on code inspection, there seem to be no other 8869 * situations in which both the export and import segment 8870 * locks are acquired either in the same or opposite order 8871 * as mentioned above. 8872 * Thus in order to conform to the above lock order, we 8873 * need to change the state of the import segment to 8874 * RSM_STATE_MAPPING, release the lock. Once this is done we 8875 * can now safely acquire the export segment lock first 8876 * followed by the import segment lock which is as per 8877 * the lock order mentioned above. 8878 */ 8879 /* move to an intermediate mapping state */ 8880 seg->s_state = RSM_STATE_MAPPING; 8881 rsmseglock_release(seg); 8882 8883 eseg = rsmexport_lookup(seg->s_key); 8884 8885 if (eseg == NULL) { 8886 rsmseglock_acquire(seg); 8887 /* 8888 * Revert to old_state and signal any waiters 8889 * The shared state is not changed 8890 */ 8891 8892 seg->s_state = old_state; 8893 cv_broadcast(&seg->s_cv); 8894 rsmseglock_release(seg); 8895 DBG_PRINTF((category, RSM_DEBUG, 8896 "rsm_segmap done: key %d not found\n", seg->s_key)); 8897 return (ENODEV); 8898 } 8899 8900 rsmsharelock_acquire(seg); 8901 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8902 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8903 8904 sharedp->rsmsi_mapcnt++; 8905 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8906 rsmsharelock_release(seg); 8907 8908 ASSERT(eseg->s_cookie != NULL); 8909 8910 /* 8911 * It is not required or necessary to acquire the import 8912 * segment lock here to change the value of s_cookie since 8913 * no one will touch the import segment as long as it is 8914 * in the RSM_STATE_MAPPING state. 8915 */ 8916 seg->s_cookie = eseg->s_cookie; 8917 8918 rsmseglock_release(eseg); 8919 8920 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len, 8921 prot, maxprot, flags, cred); 8922 8923 rsmseglock_acquire(seg); 8924 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8925 if (error == 0) { 8926 seg->s_state = RSM_STATE_ACTIVE; 8927 } else { 8928 rsmsharelock_acquire(seg); 8929 8930 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8931 8932 sharedp->rsmsi_mapcnt--; 8933 if (sharedp->rsmsi_mapcnt == 0) { 8934 sharedp->rsmsi_mapinfo = NULL; 8935 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8936 } 8937 rsmsharelock_release(seg); 8938 seg->s_state = old_state; 8939 seg->s_cookie = NULL; 8940 } 8941 cv_broadcast(&seg->s_cv); 8942 rsmseglock_release(seg); 8943 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8944 "rsm_segmap done: %d\n", error)); 8945 return (error); 8946 } 8947 } 8948 8949 int 8950 rsmka_null_seg_create( 8951 rsm_controller_handle_t argcp, 8952 rsm_memseg_export_handle_t *handle, 8953 size_t size, 8954 uint_t flags, 8955 rsm_memory_local_t *memory, 8956 rsm_resource_callback_t callback, 8957 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8958 { 8959 return (RSM_SUCCESS); 8960 } 8961 8962 8963 int 8964 rsmka_null_seg_destroy( 8965 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 8966 { 8967 return (RSM_SUCCESS); 8968 } 8969 8970 8971 int 8972 rsmka_null_bind( 8973 rsm_memseg_export_handle_t argmemseg, 8974 off_t offset, 8975 rsm_memory_local_t *argmemory, 8976 rsm_resource_callback_t callback, 8977 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8978 { 8979 return (RSM_SUCCESS); 8980 } 8981 8982 8983 int 8984 rsmka_null_unbind( 8985 rsm_memseg_export_handle_t argmemseg, 8986 off_t offset, 8987 size_t length /*ARGSUSED*/) 8988 { 8989 return (DDI_SUCCESS); 8990 } 8991 8992 int 8993 rsmka_null_rebind( 8994 rsm_memseg_export_handle_t argmemseg, 8995 off_t offset, 8996 rsm_memory_local_t *memory, 8997 rsm_resource_callback_t callback, 8998 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8999 { 9000 return (RSM_SUCCESS); 9001 } 9002 9003 int 9004 rsmka_null_publish( 9005 rsm_memseg_export_handle_t argmemseg, 9006 rsm_access_entry_t access_list[], 9007 uint_t access_list_length, 9008 rsm_memseg_id_t segment_id, 9009 rsm_resource_callback_t callback, 9010 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9011 { 9012 return (RSM_SUCCESS); 9013 } 9014 9015 9016 int 9017 rsmka_null_republish( 9018 rsm_memseg_export_handle_t memseg, 9019 rsm_access_entry_t access_list[], 9020 uint_t access_list_length, 9021 rsm_resource_callback_t callback, 9022 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9023 { 9024 return (RSM_SUCCESS); 9025 } 9026 9027 int 9028 rsmka_null_unpublish( 9029 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 9030 { 9031 return (RSM_SUCCESS); 9032 } 9033 9034 9035 void 9036 rsmka_init_loopback() 9037 { 9038 rsm_ops_t *ops = &null_rsmpi_ops; 9039 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK); 9040 9041 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9042 "rsmka_init_loopback enter\n")); 9043 9044 /* initialize null ops vector */ 9045 ops->rsm_seg_create = rsmka_null_seg_create; 9046 ops->rsm_seg_destroy = rsmka_null_seg_destroy; 9047 ops->rsm_bind = rsmka_null_bind; 9048 ops->rsm_unbind = rsmka_null_unbind; 9049 ops->rsm_rebind = rsmka_null_rebind; 9050 ops->rsm_publish = rsmka_null_publish; 9051 ops->rsm_unpublish = rsmka_null_unpublish; 9052 ops->rsm_republish = rsmka_null_republish; 9053 9054 /* initialize attributes for loopback adapter */ 9055 loopback_attr.attr_name = loopback_str; 9056 loopback_attr.attr_page_size = 0x8; /* 8K */ 9057 9058 /* initialize loopback adapter */ 9059 loopback_adapter.rsm_attr = loopback_attr; 9060 loopback_adapter.rsmpi_ops = &null_rsmpi_ops; 9061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9062 "rsmka_init_loopback done\n")); 9063 } 9064 9065 /* ************** DR functions ********************************** */ 9066 static void 9067 rsm_quiesce_exp_seg(rsmresource_t *resp) 9068 { 9069 int recheck_state; 9070 rsmseg_t *segp = (rsmseg_t *)resp; 9071 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9072 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9073 9074 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9075 "%s enter: key=%u\n", function, segp->s_key)); 9076 9077 rsmseglock_acquire(segp); 9078 do { 9079 recheck_state = 0; 9080 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) || 9081 (segp->s_state == RSM_STATE_BIND_QUIESCED) || 9082 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) || 9083 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) { 9084 rsmseglock_release(segp); 9085 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9086 "%s done:state =%d\n", function, 9087 segp->s_state)); 9088 return; 9089 } 9090 9091 if (segp->s_state == RSM_STATE_NEW) { 9092 segp->s_state = RSM_STATE_NEW_QUIESCED; 9093 rsmseglock_release(segp); 9094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9095 "%s done:state =%d\n", function, 9096 segp->s_state)); 9097 return; 9098 } 9099 9100 if (segp->s_state == RSM_STATE_BIND) { 9101 /* unbind */ 9102 (void) rsm_unbind_pages(segp); 9103 segp->s_state = RSM_STATE_BIND_QUIESCED; 9104 rsmseglock_release(segp); 9105 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9106 "%s done:state =%d\n", function, 9107 segp->s_state)); 9108 return; 9109 } 9110 9111 if (segp->s_state == RSM_STATE_EXPORT) { 9112 /* 9113 * wait for putv/getv to complete if the segp is 9114 * a local memory handle 9115 */ 9116 while ((segp->s_state == RSM_STATE_EXPORT) && 9117 (segp->s_rdmacnt != 0)) { 9118 cv_wait(&segp->s_cv, &segp->s_lock); 9119 } 9120 9121 if (segp->s_state != RSM_STATE_EXPORT) { 9122 /* 9123 * state changed need to see what it 9124 * should be changed to. 9125 */ 9126 recheck_state = 1; 9127 continue; 9128 } 9129 9130 segp->s_state = RSM_STATE_EXPORT_QUIESCING; 9131 rsmseglock_release(segp); 9132 /* 9133 * send SUSPEND messages - currently it will be 9134 * done at the end 9135 */ 9136 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9137 "%s done:state =%d\n", function, 9138 segp->s_state)); 9139 return; 9140 } 9141 } while (recheck_state); 9142 9143 rsmseglock_release(segp); 9144 } 9145 9146 static void 9147 rsm_unquiesce_exp_seg(rsmresource_t *resp) 9148 { 9149 int ret; 9150 rsmseg_t *segp = (rsmseg_t *)resp; 9151 rsmapi_access_entry_t *acl; 9152 rsm_access_entry_t *rsmpi_acl; 9153 int acl_len; 9154 int create_flags = 0; 9155 struct buf *xbuf; 9156 rsm_memory_local_t mem; 9157 adapter_t *adapter; 9158 dev_t sdev = 0; 9159 rsm_resource_callback_t callback_flag; 9160 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9161 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9162 9163 rsmseglock_acquire(segp); 9164 9165 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9166 "%s enter: key=%u, state=%d\n", function, segp->s_key, 9167 segp->s_state)); 9168 9169 if ((segp->s_state == RSM_STATE_NEW) || 9170 (segp->s_state == RSM_STATE_BIND) || 9171 (segp->s_state == RSM_STATE_EXPORT)) { 9172 rsmseglock_release(segp); 9173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9174 function, segp->s_state)); 9175 return; 9176 } 9177 9178 if (segp->s_state == RSM_STATE_NEW_QUIESCED) { 9179 segp->s_state = RSM_STATE_NEW; 9180 cv_broadcast(&segp->s_cv); 9181 rsmseglock_release(segp); 9182 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9183 function, segp->s_state)); 9184 return; 9185 } 9186 9187 if (segp->s_state == RSM_STATE_BIND_QUIESCED) { 9188 /* bind the segment */ 9189 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9190 segp->s_len, segp->s_proc); 9191 if (ret == RSM_SUCCESS) { /* bind successful */ 9192 segp->s_state = RSM_STATE_BIND; 9193 } else { /* bind failed - resource unavailable */ 9194 segp->s_state = RSM_STATE_NEW; 9195 } 9196 cv_broadcast(&segp->s_cv); 9197 rsmseglock_release(segp); 9198 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9199 "%s done: bind_qscd bind = %d\n", function, ret)); 9200 return; 9201 } 9202 9203 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) { 9204 /* wait for the segment to move to EXPORT_QUIESCED state */ 9205 cv_wait(&segp->s_cv, &segp->s_lock); 9206 } 9207 9208 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) { 9209 /* bind the segment */ 9210 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9211 segp->s_len, segp->s_proc); 9212 9213 if (ret != RSM_SUCCESS) { 9214 /* bind failed - resource unavailable */ 9215 acl_len = segp->s_acl_len; 9216 acl = segp->s_acl; 9217 rsmpi_acl = segp->s_acl_in; 9218 segp->s_acl_len = 0; 9219 segp->s_acl = NULL; 9220 segp->s_acl_in = NULL; 9221 rsmseglock_release(segp); 9222 9223 rsmexport_rm(segp); 9224 rsmacl_free(acl, acl_len); 9225 rsmpiacl_free(rsmpi_acl, acl_len); 9226 9227 rsmseglock_acquire(segp); 9228 segp->s_state = RSM_STATE_NEW; 9229 cv_broadcast(&segp->s_cv); 9230 rsmseglock_release(segp); 9231 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9232 "%s done: exp_qscd bind failed = %d\n", 9233 function, ret)); 9234 return; 9235 } 9236 /* 9237 * publish the segment 9238 * if successful 9239 * segp->s_state = RSM_STATE_EXPORT; 9240 * else failed 9241 * segp->s_state = RSM_STATE_BIND; 9242 */ 9243 9244 /* check whether it is a local_memory_handle */ 9245 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) { 9246 if ((segp->s_acl[0].ae_node == my_nodeid) && 9247 (segp->s_acl[0].ae_permission == 0)) { 9248 segp->s_state = RSM_STATE_EXPORT; 9249 cv_broadcast(&segp->s_cv); 9250 rsmseglock_release(segp); 9251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9252 "%s done:exp_qscd\n", function)); 9253 return; 9254 } 9255 } 9256 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE, 9257 sdev, 0, NULL, DDI_UMEM_SLEEP); 9258 ASSERT(xbuf != NULL); 9259 9260 mem.ms_type = RSM_MEM_BUF; 9261 mem.ms_bp = xbuf; 9262 9263 adapter = segp->s_adapter; 9264 9265 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 9266 create_flags = RSM_ALLOW_UNBIND_REBIND; 9267 } 9268 9269 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 9270 callback_flag = RSM_RESOURCE_DONTWAIT; 9271 } else { 9272 callback_flag = RSM_RESOURCE_SLEEP; 9273 } 9274 9275 ret = adapter->rsmpi_ops->rsm_seg_create( 9276 adapter->rsmpi_handle, &segp->s_handle.out, 9277 segp->s_len, create_flags, &mem, 9278 callback_flag, NULL); 9279 9280 if (ret != RSM_SUCCESS) { 9281 acl_len = segp->s_acl_len; 9282 acl = segp->s_acl; 9283 rsmpi_acl = segp->s_acl_in; 9284 segp->s_acl_len = 0; 9285 segp->s_acl = NULL; 9286 segp->s_acl_in = NULL; 9287 rsmseglock_release(segp); 9288 9289 rsmexport_rm(segp); 9290 rsmacl_free(acl, acl_len); 9291 rsmpiacl_free(rsmpi_acl, acl_len); 9292 9293 rsmseglock_acquire(segp); 9294 segp->s_state = RSM_STATE_BIND; 9295 cv_broadcast(&segp->s_cv); 9296 rsmseglock_release(segp); 9297 DBG_PRINTF((category, RSM_ERR, 9298 "%s done: exp_qscd create failed = %d\n", 9299 function, ret)); 9300 return; 9301 } 9302 9303 ret = adapter->rsmpi_ops->rsm_publish( 9304 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len, 9305 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL); 9306 9307 if (ret != RSM_SUCCESS) { 9308 acl_len = segp->s_acl_len; 9309 acl = segp->s_acl; 9310 rsmpi_acl = segp->s_acl_in; 9311 segp->s_acl_len = 0; 9312 segp->s_acl = NULL; 9313 segp->s_acl_in = NULL; 9314 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out); 9315 rsmseglock_release(segp); 9316 9317 rsmexport_rm(segp); 9318 rsmacl_free(acl, acl_len); 9319 rsmpiacl_free(rsmpi_acl, acl_len); 9320 9321 rsmseglock_acquire(segp); 9322 segp->s_state = RSM_STATE_BIND; 9323 cv_broadcast(&segp->s_cv); 9324 rsmseglock_release(segp); 9325 DBG_PRINTF((category, RSM_ERR, 9326 "%s done: exp_qscd publish failed = %d\n", 9327 function, ret)); 9328 return; 9329 } 9330 9331 segp->s_state = RSM_STATE_EXPORT; 9332 cv_broadcast(&segp->s_cv); 9333 rsmseglock_release(segp); 9334 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n", 9335 function)); 9336 return; 9337 } 9338 9339 rsmseglock_release(segp); 9340 9341 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9342 } 9343 9344 static void 9345 rsm_quiesce_imp_seg(rsmresource_t *resp) 9346 { 9347 rsmseg_t *segp = (rsmseg_t *)resp; 9348 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9349 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg"); 9350 9351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9352 "%s enter: key=%u\n", function, segp->s_key)); 9353 9354 rsmseglock_acquire(segp); 9355 segp->s_flags |= RSM_DR_INPROGRESS; 9356 9357 while (segp->s_rdmacnt != 0) { 9358 /* wait for the RDMA to complete */ 9359 cv_wait(&segp->s_cv, &segp->s_lock); 9360 } 9361 9362 rsmseglock_release(segp); 9363 9364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9365 9366 } 9367 9368 static void 9369 rsm_unquiesce_imp_seg(rsmresource_t *resp) 9370 { 9371 rsmseg_t *segp = (rsmseg_t *)resp; 9372 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9373 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg"); 9374 9375 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9376 "%s enter: key=%u\n", function, segp->s_key)); 9377 9378 rsmseglock_acquire(segp); 9379 9380 segp->s_flags &= ~RSM_DR_INPROGRESS; 9381 /* wake up any waiting putv/getv ops */ 9382 cv_broadcast(&segp->s_cv); 9383 9384 rsmseglock_release(segp); 9385 9386 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9387 9388 9389 } 9390 9391 static void 9392 rsm_process_exp_seg(rsmresource_t *resp, int event) 9393 { 9394 if (event == RSM_DR_QUIESCE) 9395 rsm_quiesce_exp_seg(resp); 9396 else /* UNQUIESCE */ 9397 rsm_unquiesce_exp_seg(resp); 9398 } 9399 9400 static void 9401 rsm_process_imp_seg(rsmresource_t *resp, int event) 9402 { 9403 if (event == RSM_DR_QUIESCE) 9404 rsm_quiesce_imp_seg(resp); 9405 else /* UNQUIESCE */ 9406 rsm_unquiesce_imp_seg(resp); 9407 } 9408 9409 static void 9410 rsm_dr_process_local_segments(int event) 9411 { 9412 9413 int i, j; 9414 rsmresource_blk_t *blk; 9415 rsmresource_t *p; 9416 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9417 9418 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9419 "rsm_dr_process_local_segments enter\n")); 9420 9421 /* iterate through the resource structure */ 9422 9423 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 9424 9425 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 9426 blk = rsm_resource.rsmrc_root[i]; 9427 if (blk != NULL) { 9428 for (j = 0; j < RSMRC_BLKSZ; j++) { 9429 p = blk->rsmrcblk_blks[j]; 9430 if ((p != NULL) && (p != RSMRC_RESERVED)) { 9431 /* valid resource */ 9432 if (p->rsmrc_type == 9433 RSM_RESOURCE_EXPORT_SEGMENT) 9434 rsm_process_exp_seg(p, event); 9435 else if (p->rsmrc_type == 9436 RSM_RESOURCE_IMPORT_SEGMENT) 9437 rsm_process_imp_seg(p, event); 9438 } 9439 } 9440 } 9441 } 9442 9443 rw_exit(&rsm_resource.rsmrc_lock); 9444 9445 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9446 "rsm_dr_process_local_segments done\n")); 9447 } 9448 9449 /* *************** DR callback functions ************ */ 9450 static void 9451 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */) 9452 { 9453 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9454 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9455 "rsm_dr_callback_post_add is a no-op\n")); 9456 /* Noop */ 9457 } 9458 9459 static int 9460 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */) 9461 { 9462 int recheck_state = 0; 9463 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9464 9465 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9466 "rsm_dr_callback_pre_del enter\n")); 9467 9468 mutex_enter(&rsm_drv_data.drv_lock); 9469 9470 do { 9471 recheck_state = 0; 9472 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9473 "rsm_dr_callback_pre_del:state=%d\n", 9474 rsm_drv_data.drv_state)); 9475 9476 switch (rsm_drv_data.drv_state) { 9477 case RSM_DRV_NEW: 9478 /* 9479 * The state should usually never be RSM_DRV_NEW 9480 * since in this state the callbacks have not yet 9481 * been registered. So, ASSERT. 9482 */ 9483 ASSERT(0); 9484 return (0); 9485 case RSM_DRV_REG_PROCESSING: 9486 /* 9487 * The driver is in the process of registering 9488 * with the DR framework. So, wait till the 9489 * registration process is complete. 9490 */ 9491 recheck_state = 1; 9492 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9493 break; 9494 case RSM_DRV_UNREG_PROCESSING: 9495 /* 9496 * If the state is RSM_DRV_UNREG_PROCESSING, the 9497 * module is in the process of detaching and 9498 * unregistering the callbacks from the DR 9499 * framework. So, simply return. 9500 */ 9501 mutex_exit(&rsm_drv_data.drv_lock); 9502 DBG_PRINTF((category, RSM_DEBUG, 9503 "rsm_dr_callback_pre_del:" 9504 "pre-del on NEW/UNREG\n")); 9505 return (0); 9506 case RSM_DRV_OK: 9507 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED; 9508 break; 9509 case RSM_DRV_PREDEL_STARTED: 9510 /* FALLTHRU */ 9511 case RSM_DRV_PREDEL_COMPLETED: 9512 /* FALLTHRU */ 9513 case RSM_DRV_POSTDEL_IN_PROGRESS: 9514 recheck_state = 1; 9515 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9516 break; 9517 case RSM_DRV_DR_IN_PROGRESS: 9518 rsm_drv_data.drv_memdel_cnt++; 9519 mutex_exit(&rsm_drv_data.drv_lock); 9520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9521 "rsm_dr_callback_pre_del done\n")); 9522 return (0); 9523 /* break; */ 9524 default: 9525 ASSERT(0); 9526 break; 9527 } 9528 9529 } while (recheck_state); 9530 9531 rsm_drv_data.drv_memdel_cnt++; 9532 9533 mutex_exit(&rsm_drv_data.drv_lock); 9534 9535 /* Do all the quiescing stuff here */ 9536 DBG_PRINTF((category, RSM_DEBUG, 9537 "rsm_dr_callback_pre_del: quiesce things now\n")); 9538 9539 rsm_dr_process_local_segments(RSM_DR_QUIESCE); 9540 9541 /* 9542 * now that all local segments have been quiesced lets inform 9543 * the importers 9544 */ 9545 rsm_send_suspend(); 9546 9547 /* 9548 * In response to the suspend message the remote node(s) will process 9549 * the segments and send a suspend_complete message. Till all 9550 * the nodes send the suspend_complete message we wait in the 9551 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce 9552 * function we transition to the RSM_DRV_PREDEL_COMPLETED state. 9553 */ 9554 mutex_enter(&rsm_drv_data.drv_lock); 9555 9556 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) { 9557 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9558 } 9559 9560 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED); 9561 9562 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS; 9563 cv_broadcast(&rsm_drv_data.drv_cv); 9564 9565 mutex_exit(&rsm_drv_data.drv_lock); 9566 9567 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9568 "rsm_dr_callback_pre_del done\n")); 9569 9570 return (0); 9571 } 9572 9573 static void 9574 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */) 9575 { 9576 int recheck_state = 0; 9577 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9578 9579 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9580 "rsm_dr_callback_post_del enter\n")); 9581 9582 mutex_enter(&rsm_drv_data.drv_lock); 9583 9584 do { 9585 recheck_state = 0; 9586 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9587 "rsm_dr_callback_post_del:state=%d\n", 9588 rsm_drv_data.drv_state)); 9589 9590 switch (rsm_drv_data.drv_state) { 9591 case RSM_DRV_NEW: 9592 /* 9593 * The driver state cannot not be RSM_DRV_NEW 9594 * since in this state the callbacks have not 9595 * yet been registered. 9596 */ 9597 ASSERT(0); 9598 return; 9599 case RSM_DRV_REG_PROCESSING: 9600 /* 9601 * The driver is in the process of registering with 9602 * the DR framework. Wait till the registration is 9603 * complete. 9604 */ 9605 recheck_state = 1; 9606 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9607 break; 9608 case RSM_DRV_UNREG_PROCESSING: 9609 /* 9610 * RSM_DRV_UNREG_PROCESSING state means the module 9611 * is detaching and unregistering the callbacks 9612 * from the DR framework. So simply return. 9613 */ 9614 /* FALLTHRU */ 9615 case RSM_DRV_OK: 9616 /* 9617 * RSM_DRV_OK means we missed the pre-del 9618 * corresponding to this post-del coz we had not 9619 * registered yet, so simply return. 9620 */ 9621 mutex_exit(&rsm_drv_data.drv_lock); 9622 DBG_PRINTF((category, RSM_DEBUG, 9623 "rsm_dr_callback_post_del:" 9624 "post-del on OK/UNREG\n")); 9625 return; 9626 /* break; */ 9627 case RSM_DRV_PREDEL_STARTED: 9628 /* FALLTHRU */ 9629 case RSM_DRV_PREDEL_COMPLETED: 9630 /* FALLTHRU */ 9631 case RSM_DRV_POSTDEL_IN_PROGRESS: 9632 recheck_state = 1; 9633 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9634 break; 9635 case RSM_DRV_DR_IN_PROGRESS: 9636 rsm_drv_data.drv_memdel_cnt--; 9637 if (rsm_drv_data.drv_memdel_cnt > 0) { 9638 mutex_exit(&rsm_drv_data.drv_lock); 9639 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9640 "rsm_dr_callback_post_del done:\n")); 9641 return; 9642 } 9643 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS; 9644 break; 9645 default: 9646 ASSERT(0); 9647 return; 9648 /* break; */ 9649 } 9650 } while (recheck_state); 9651 9652 mutex_exit(&rsm_drv_data.drv_lock); 9653 9654 /* Do all the unquiescing stuff here */ 9655 DBG_PRINTF((category, RSM_DEBUG, 9656 "rsm_dr_callback_post_del: unquiesce things now\n")); 9657 9658 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE); 9659 9660 /* 9661 * now that all local segments have been unquiesced lets inform 9662 * the importers 9663 */ 9664 rsm_send_resume(); 9665 9666 mutex_enter(&rsm_drv_data.drv_lock); 9667 9668 rsm_drv_data.drv_state = RSM_DRV_OK; 9669 9670 cv_broadcast(&rsm_drv_data.drv_cv); 9671 9672 mutex_exit(&rsm_drv_data.drv_lock); 9673 9674 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9675 "rsm_dr_callback_post_del done\n")); 9676 9677 return; 9678 9679 } 9680