1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Overview of the RSM Kernel Agent: 31 * --------------------------------- 32 * 33 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM 34 * kernel agent is a pseudo device driver which makes use of the RSMPI 35 * interface on behalf of the RSMAPI user library. 36 * 37 * The kernel agent functionality can be categorized into the following 38 * components: 39 * 1. Driver Infrastructure 40 * 2. Export/Import Segment Management 41 * 3. Internal resource allocation/deallocation 42 * 43 * The driver infrastructure includes the basic module loading entry points 44 * like _init, _info, _fini to load, unload and report information about 45 * the driver module. The driver infrastructure also includes the 46 * autoconfiguration entry points namely, attach, detach and getinfo for 47 * the device autoconfiguration. 48 * 49 * The kernel agent is a pseudo character device driver and exports 50 * a cb_ops structure which defines the driver entry points for character 51 * device access. This includes the open and close entry points. The 52 * other entry points provided include ioctl, devmap and segmap and chpoll. 53 * read and write entry points are not used since the device is memory 54 * mapped. Also ddi_prop_op is used for the prop_op entry point. 55 * 56 * The ioctl entry point supports a number of commands, which are used by 57 * the RSMAPI library in order to export and import segments. These 58 * commands include commands for binding and rebinding the physical pages 59 * allocated to the virtual address range, publishing the export segment, 60 * unpublishing and republishing an export segment, creating an 61 * import segment and a virtual connection from this import segment to 62 * an export segment, performing scatter-gather data transfer, barrier 63 * operations. 64 * 65 * 66 * Export and Import segments: 67 * --------------------------- 68 * 69 * In order to create an RSM export segment a process allocates a range in its 70 * virtual address space for the segment using standard Solaris interfaces. 71 * The process then calls RSMAPI, which in turn makes an ioctl call to the 72 * RSM kernel agent for an allocation of physical memory pages and for 73 * creation of the export segment by binding these pages to the virtual 74 * address range. These pages are locked in memory so that remote accesses 75 * are always applied to the correct page. Then the RSM segment is published, 76 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id 77 * is assigned to it. 78 * 79 * In order to import a published RSM segment, RSMAPI creates an import 80 * segment and forms a virtual connection across the interconnect to the 81 * export segment, via an ioctl into the kernel agent with the connect 82 * command. The import segment setup is completed by mapping the 83 * local device memory into the importers virtual address space. The 84 * mapping of the import segment is handled by the segmap/devmap 85 * infrastructure described as follows. 86 * 87 * Segmap and Devmap interfaces: 88 * 89 * The RSM kernel agent allows device memory to be directly accessed by user 90 * threads via memory mapping. In order to do so, the RSM kernel agent 91 * supports the devmap and segmap entry points. 92 * 93 * The segmap entry point(rsm_segmap) is responsible for setting up a memory 94 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is 95 * responsible for exporting the device memory to the user applications. 96 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the 97 * control is transfered to the devmap_setup call which calls rsm_devmap. 98 * 99 * rsm_devmap validates the user mapping to the device or kernel memory 100 * and passes the information to the system for setting up the mapping. The 101 * actual setting up of the mapping is done by devmap_devmem_setup(for 102 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are 103 * registered for device context management via the devmap_devmem_setup 104 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap, 105 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping 106 * is created, a mapping is freed, a mapping is accessed or an existing 107 * mapping is duplicated respectively. These callbacks allow the RSM kernel 108 * agent to maintain state information associated with the mappings. 109 * The state information is mainly in the form of a cookie list for the import 110 * segment for which mapping has been done. 111 * 112 * Forced disconnect of import segments: 113 * 114 * When an exported segment is unpublished, the exporter sends a forced 115 * disconnect message to all its importers. The importer segments are 116 * unloaded and disconnected. This involves unloading the original 117 * mappings and remapping to a preallocated kernel trash page. This is 118 * done by devmap_umem_remap. The trash/dummy page is a kernel page, 119 * preallocated by the kernel agent during attach using ddi_umem_alloc with 120 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application 121 * due to unloading of the original mappings. 122 * 123 * Additionally every segment has a mapping generation number associated 124 * with it. This is an entry in the barrier generation page, created 125 * during attach time. This mapping generation number for the import 126 * segments is incremented on a force disconnect to notify the application 127 * of the force disconnect. On this notification, the application needs 128 * to reconnect the segment to establish a new legitimate mapping. 129 * 130 * 131 * Locks used in the kernel agent: 132 * ------------------------------- 133 * 134 * The kernel agent uses a variety of mutexes and condition variables for 135 * mutual exclusion of the shared data structures and for synchronization 136 * between the various threads. Some of the locks are described as follows. 137 * 138 * Each resource structure, which represents either an export/import segment 139 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock. 140 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the 141 * rsmseglock_acquire and rsmseglock_release macros. An additional 142 * lock called the rsmsi_lock is used for the shared import data structure 143 * that is relevant for resources representing import segments. There is 144 * also a condition variable associated with the resource called s_cv. This 145 * is used to wait for events like the segment state change etc. 146 * 147 * The resource structures are allocated from a pool of resource structures, 148 * called rsm_resource. This pool is protected via a reader-writer lock, 149 * called rsmrc_lock. 150 * 151 * There are two separate hash tables, one for the export segments and 152 * one for the import segments. The export segments are inserted into the 153 * export segment hash table only after they have been published and the 154 * import segments are inserted in the import segments list only after they 155 * have successfully connected to an exported segment. These tables are 156 * protected via reader-writer locks. 157 * 158 * Debug Support in the kernel agent: 159 * ---------------------------------- 160 * 161 * Debugging support in the kernel agent is provided by the following 162 * macros. 163 * 164 * DBG_PRINTF((category, level, message)) is a macro which logs a debug 165 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer 166 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based 167 * on the definition of the category and level. All messages that belong to 168 * the specified category(rsmdbg_category) and are of an equal or greater 169 * severity than the specified level(rsmdbg_level) are logged. The message 170 * is a string which uses the same formatting rules as the strings used in 171 * printf. 172 * 173 * The category defines which component of the kernel agent has logged this 174 * message. There are a number of categories that have been defined such as 175 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro, 176 * DBG_ADDCATEGORY is used to add in another category to the currently 177 * specified category value so that the component using this new category 178 * can also effectively log debug messages. Thus, the category of a specific 179 * message is some combination of the available categories and we can define 180 * sub-categories if we want a finer level of granularity. 181 * 182 * The level defines the severity of the message. Different level values are 183 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being 184 * the least severe(debug level is 0). 185 * 186 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug 187 * variable or a string respectively. 188 * 189 * 190 * NOTES: 191 * 192 * Special Fork and Exec Handling: 193 * ------------------------------- 194 * 195 * The backing physical pages of an exported segment are always locked down. 196 * Thus, there are two cases in which a process having exported segments 197 * will cause a cpu to hang: (1) the process invokes exec; (2) a process 198 * forks and invokes exit before the duped file descriptors for the export 199 * segments are closed in the child process. The hang is caused because the 200 * address space release algorithm in Solaris VM subsystem is based on a 201 * non-blocking loop which does not terminate while segments are locked 202 * down. In addition to this, Solaris VM subsystem lacks a callback 203 * mechanism to the rsm kernel agent to allow unlocking these export 204 * segment pages. 205 * 206 * In order to circumvent this problem, the kernel agent does the following. 207 * The Solaris VM subsystem keeps memory segments in increasing order of 208 * virtual addressses. Thus a special page(special_exit_offset) is allocated 209 * by the kernel agent and is mmapped into the heap area of the process address 210 * space(the mmap is done by the RSMAPI library). During the mmap processing 211 * of this special page by the devmap infrastructure, a callback(the same 212 * devmap context management callbacks discussed above) is registered for an 213 * unmap. 214 * 215 * As discussed above, this page is processed by the Solaris address space 216 * release code before any of the exported segments pages(which are allocated 217 * from high memory). It is during this processing that the unmap callback gets 218 * called and this callback is responsible for force destroying the exported 219 * segments and thus eliminating the problem of locked pages. 220 * 221 * Flow-control: 222 * ------------ 223 * 224 * A credit based flow control algorithm is used for messages whose 225 * processing cannot be done in the interrupt context because it might 226 * involve invoking rsmpi calls, or might take a long time to complete 227 * or might need to allocate resources. The algorithm operates on a per 228 * path basis. To send a message the pathend needs to have a credit and 229 * it consumes one for every message that is flow controlled. On the 230 * receiving pathend the message is put on a msgbuf_queue and a task is 231 * dispatched on the worker thread - recv_taskq where it is processed. 232 * After processing the message, the receiving pathend dequeues the message, 233 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends 234 * credits to the sender pathend. 235 * 236 * RSM_DRTEST: 237 * ----------- 238 * 239 * This is used to enable the DR testing using a test driver on test 240 * platforms which do not supported DR. 241 * 242 */ 243 244 #include <sys/types.h> 245 #include <sys/param.h> 246 #include <sys/user.h> 247 #include <sys/buf.h> 248 #include <sys/systm.h> 249 #include <sys/cred.h> 250 #include <sys/vm.h> 251 #include <sys/uio.h> 252 #include <vm/seg.h> 253 #include <vm/page.h> 254 #include <sys/stat.h> 255 256 #include <sys/time.h> 257 #include <sys/errno.h> 258 259 #include <sys/file.h> 260 #include <sys/uio.h> 261 #include <sys/proc.h> 262 #include <sys/mman.h> 263 #include <sys/open.h> 264 #include <sys/atomic.h> 265 #include <sys/mem_config.h> 266 267 268 #include <sys/ddi.h> 269 #include <sys/devops.h> 270 #include <sys/ddidevmap.h> 271 #include <sys/sunddi.h> 272 #include <sys/esunddi.h> 273 #include <sys/ddi_impldefs.h> 274 275 #include <sys/kmem.h> 276 #include <sys/conf.h> 277 #include <sys/devops.h> 278 #include <sys/ddi_impldefs.h> 279 280 #include <sys/modctl.h> 281 282 #include <sys/policy.h> 283 #include <sys/types.h> 284 #include <sys/conf.h> 285 #include <sys/param.h> 286 287 #include <sys/taskq.h> 288 289 #include <sys/rsm/rsm_common.h> 290 #include <sys/rsm/rsmapi_common.h> 291 #include <sys/rsm/rsm.h> 292 #include <rsm_in.h> 293 #include <sys/rsm/rsmka_path_int.h> 294 #include <sys/rsm/rsmpi.h> 295 296 #include <sys/modctl.h> 297 #include <sys/debug.h> 298 299 #include <sys/tuneable.h> 300 301 #ifdef RSM_DRTEST 302 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec, 303 void *arg); 304 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, 305 void *arg); 306 #endif 307 308 extern void dbg_printf(int category, int level, char *fmt, ...); 309 extern void rsmka_pathmanager_init(); 310 extern void rsmka_pathmanager_cleanup(); 311 extern void rele_sendq_token(); 312 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t); 313 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t); 314 extern int rsmka_topology_ioctl(caddr_t, int, int); 315 316 extern pri_t maxclsyspri; 317 extern work_queue_t work_queue; 318 extern kmutex_t ipc_info_lock; 319 extern kmutex_t ipc_info_cvlock; 320 extern kcondvar_t ipc_info_cv; 321 extern kmutex_t path_hold_cvlock; 322 extern kcondvar_t path_hold_cv; 323 324 extern kmutex_t rsmka_buf_lock; 325 326 extern path_t *rsm_find_path(char *, int, rsm_addr_t); 327 extern adapter_t *rsmka_lookup_adapter(char *, int); 328 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *); 329 extern boolean_t rsmka_do_path_active(path_t *, int); 330 extern boolean_t rsmka_check_node_alive(rsm_node_id_t); 331 extern void rsmka_release_adapter(adapter_t *); 332 extern void rsmka_enqueue_msgbuf(path_t *path, void *data); 333 extern void rsmka_dequeue_msgbuf(path_t *path); 334 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path); 335 /* lint -w2 */ 336 337 static int rsm_open(dev_t *, int, int, cred_t *); 338 static int rsm_close(dev_t, int, int, cred_t *); 339 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 340 cred_t *credp, int *rvalp); 341 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, 342 uint_t); 343 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t, 344 uint_t, uint_t, cred_t *); 345 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 346 struct pollhead **phpp); 347 348 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 349 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t); 350 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t); 351 352 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *); 353 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t); 354 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t); 355 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int, 356 rsm_permission_t); 357 static void rsm_export_force_destroy(ddi_umem_cookie_t *); 358 static void rsmacl_free(rsmapi_access_entry_t *, int); 359 static void rsmpiacl_free(rsm_access_entry_t *, int); 360 361 static int rsm_inc_pgcnt(pgcnt_t); 362 static void rsm_dec_pgcnt(pgcnt_t); 363 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop); 364 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *, 365 size_t *); 366 static void exporter_quiesce(); 367 static void rsmseg_suspend(rsmseg_t *, int *); 368 static void rsmsegshare_suspend(rsmseg_t *); 369 static int rsmseg_resume(rsmseg_t *, void **); 370 static int rsmsegshare_resume(rsmseg_t *); 371 372 static struct cb_ops rsm_cb_ops = { 373 rsm_open, /* open */ 374 rsm_close, /* close */ 375 nodev, /* strategy */ 376 nodev, /* print */ 377 nodev, /* dump */ 378 nodev, /* read */ 379 nodev, /* write */ 380 rsm_ioctl, /* ioctl */ 381 rsm_devmap, /* devmap */ 382 NULL, /* mmap */ 383 rsm_segmap, /* segmap */ 384 rsm_chpoll, /* poll */ 385 ddi_prop_op, /* cb_prop_op */ 386 0, /* streamtab */ 387 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 388 0, 389 0, 390 0 391 }; 392 393 static struct dev_ops rsm_ops = { 394 DEVO_REV, /* devo_rev, */ 395 0, /* refcnt */ 396 rsm_info, /* get_dev_info */ 397 nulldev, /* identify */ 398 nulldev, /* probe */ 399 rsm_attach, /* attach */ 400 rsm_detach, /* detach */ 401 nodev, /* reset */ 402 &rsm_cb_ops, /* driver operations */ 403 (struct bus_ops *)0, /* bus operations */ 404 0 405 }; 406 407 /* 408 * Module linkage information for the kernel. 409 */ 410 411 static struct modldrv modldrv = { 412 &mod_driverops, /* Type of module. This one is a pseudo driver */ 413 "Remote Shared Memory Driver %I%", 414 &rsm_ops, /* driver ops */ 415 }; 416 417 static struct modlinkage modlinkage = { 418 MODREV_1, 419 (void *)&modldrv, 420 0, 421 0, 422 0 423 }; 424 425 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta); 426 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta); 427 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled); 428 429 static kphysm_setup_vector_t rsm_dr_callback_vec = { 430 KPHYSM_SETUP_VECTOR_VERSION, 431 rsm_dr_callback_post_add, 432 rsm_dr_callback_pre_del, 433 rsm_dr_callback_post_del 434 }; 435 436 /* This flag can be changed to 0 to help with PIT testing */ 437 int rsmka_modunloadok = 1; 438 int no_reply_cnt = 0; 439 440 uint64_t rsm_ctrlmsg_errcnt = 0; 441 uint64_t rsm_ipcsend_errcnt = 0; 442 443 #define MAX_NODES 64 444 445 static struct rsm_driver_data rsm_drv_data; 446 static struct rsmresource_table rsm_resource; 447 448 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t); 449 static void rsmresource_destroy(void); 450 static int rsmresource_alloc(minor_t *); 451 static rsmresource_t *rsmresource_free(minor_t rnum); 452 static int rsm_closeconnection(rsmseg_t *seg, void **cookie); 453 static int rsm_unpublish(rsmseg_t *seg, int mode); 454 static int rsm_unbind(rsmseg_t *seg); 455 static uint_t rsmhash(rsm_memseg_id_t key); 456 static void rsmhash_alloc(rsmhash_table_t *rhash, int size); 457 static void rsmhash_free(rsmhash_table_t *rhash, int size); 458 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval); 459 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval); 460 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, 461 void *cookie); 462 int rsm_disconnect(rsmseg_t *seg); 463 void rsmseg_unload(rsmseg_t *); 464 void rsm_suspend_complete(rsm_node_id_t src_node, int flag); 465 466 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 467 rsm_intr_q_op_t opcode, rsm_addr_t src, 468 void *data, size_t size, rsm_intr_hand_arg_t arg); 469 470 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t); 471 472 rsm_node_id_t my_nodeid; 473 474 /* cookie, va, offsets and length for the barrier */ 475 static rsm_gnum_t *bar_va; 476 static ddi_umem_cookie_t bar_cookie; 477 static off_t barrier_offset; 478 static size_t barrier_size; 479 static int max_segs; 480 481 /* cookie for the trash memory */ 482 static ddi_umem_cookie_t remap_cookie; 483 484 static rsm_memseg_id_t rsm_nextavail_segmentid; 485 486 extern taskq_t *work_taskq; 487 extern char *taskq_name; 488 489 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */ 490 491 static rsmhash_table_t rsm_export_segs; /* list of exported segs */ 492 rsmhash_table_t rsm_import_segs; /* list of imported segs */ 493 static rsmhash_table_t rsm_event_queues; /* list of event queues */ 494 495 static rsm_ipc_t rsm_ipc; /* ipc info */ 496 497 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */ 498 static list_head_t rsm_suspend_list; 499 500 /* list of descriptors for remote importers */ 501 static importers_table_t importer_list; 502 503 kmutex_t rsm_suspend_cvlock; 504 kcondvar_t rsm_suspend_cv; 505 506 static kmutex_t rsm_lock; 507 508 adapter_t loopback_adapter; 509 rsm_controller_attr_t loopback_attr; 510 511 int rsmipc_send_controlmsg(path_t *path, int msgtype); 512 513 void rsmka_init_loopback(); 514 515 int rsmka_null_seg_create( 516 rsm_controller_handle_t, 517 rsm_memseg_export_handle_t *, 518 size_t, 519 uint_t, 520 rsm_memory_local_t *, 521 rsm_resource_callback_t, 522 rsm_resource_callback_arg_t); 523 524 int rsmka_null_seg_destroy( 525 rsm_memseg_export_handle_t); 526 527 int rsmka_null_bind( 528 rsm_memseg_export_handle_t, 529 off_t, 530 rsm_memory_local_t *, 531 rsm_resource_callback_t, 532 rsm_resource_callback_arg_t); 533 534 int rsmka_null_unbind( 535 rsm_memseg_export_handle_t, 536 off_t, 537 size_t); 538 539 int rsmka_null_rebind( 540 rsm_memseg_export_handle_t, 541 off_t, 542 rsm_memory_local_t *, 543 rsm_resource_callback_t, 544 rsm_resource_callback_arg_t); 545 546 int rsmka_null_publish( 547 rsm_memseg_export_handle_t, 548 rsm_access_entry_t [], 549 uint_t, 550 rsm_memseg_id_t, 551 rsm_resource_callback_t, 552 rsm_resource_callback_arg_t); 553 554 555 int rsmka_null_republish( 556 rsm_memseg_export_handle_t, 557 rsm_access_entry_t [], 558 uint_t, 559 rsm_resource_callback_t, 560 rsm_resource_callback_arg_t); 561 562 int rsmka_null_unpublish( 563 rsm_memseg_export_handle_t); 564 565 rsm_ops_t null_rsmpi_ops; 566 567 /* 568 * data and locks to keep track of total amount of exported memory 569 */ 570 static pgcnt_t rsm_pgcnt; 571 static pgcnt_t rsm_pgcnt_max; /* max allowed */ 572 static kmutex_t rsm_pgcnt_lock; 573 574 static int rsm_enable_dr; 575 576 static char loopback_str[] = "loopback"; 577 578 int rsm_hash_size; 579 580 /* 581 * The locking model is as follows: 582 * 583 * Local operations: 584 * find resource - grab reader lock on resouce list 585 * insert rc - grab writer lock 586 * delete rc - grab writer lock and resource mutex 587 * read/write - no lock 588 * 589 * Remote invocations: 590 * find resource - grab read lock and resource mutex 591 * 592 * State: 593 * resource state - grab resource mutex 594 */ 595 596 int 597 _init(void) 598 { 599 int e; 600 601 e = mod_install(&modlinkage); 602 if (e != 0) { 603 return (e); 604 } 605 606 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL); 607 608 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL); 609 610 611 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL); 612 613 rsm_hash_size = RSM_HASHSZ; 614 615 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 616 617 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 618 619 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL); 620 621 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL); 622 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0); 623 624 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL); 625 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0); 626 627 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL); 628 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0); 629 630 rsm_ipc.count = RSMIPC_SZ; 631 rsm_ipc.wanted = 0; 632 rsm_ipc.sequence = 0; 633 634 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL); 635 636 for (e = 0; e < RSMIPC_SZ; e++) { 637 rsmipc_slot_t *slot = &rsm_ipc.slots[e]; 638 639 RSMIPC_SET(slot, RSMIPC_FREE); 640 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL); 641 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0); 642 } 643 644 /* 645 * Initialize the suspend message list 646 */ 647 rsm_suspend_list.list_head = NULL; 648 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL); 649 650 /* 651 * It is assumed here that configuration data is available 652 * during system boot since _init may be called at that time. 653 */ 654 655 rsmka_pathmanager_init(); 656 657 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 658 "rsm: _init done\n")); 659 660 return (DDI_SUCCESS); 661 662 } 663 664 int 665 _info(struct modinfo *modinfop) 666 { 667 668 return (mod_info(&modlinkage, modinfop)); 669 } 670 671 int 672 _fini(void) 673 { 674 int e; 675 676 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 677 "rsm: _fini enter\n")); 678 679 /* 680 * The rsmka_modunloadok flag is simply used to help with 681 * the PIT testing. Make this flag 0 to disallow modunload. 682 */ 683 if (rsmka_modunloadok == 0) 684 return (EBUSY); 685 686 /* rsm_detach will be called as a result of mod_remove */ 687 e = mod_remove(&modlinkage); 688 if (e) { 689 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR, 690 "Unable to fini RSM %x\n", e)); 691 return (e); 692 } 693 694 rsmka_pathmanager_cleanup(); 695 696 rw_destroy(&rsm_resource.rsmrc_lock); 697 698 rw_destroy(&rsm_export_segs.rsmhash_rw); 699 rw_destroy(&rsm_import_segs.rsmhash_rw); 700 rw_destroy(&rsm_event_queues.rsmhash_rw); 701 702 mutex_destroy(&importer_list.lock); 703 704 mutex_destroy(&rsm_ipc.lock); 705 cv_destroy(&rsm_ipc.cv); 706 707 (void) mutex_destroy(&rsm_suspend_list.list_lock); 708 709 (void) mutex_destroy(&rsm_pgcnt_lock); 710 711 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n")); 712 713 return (DDI_SUCCESS); 714 715 } 716 717 /*ARGSUSED1*/ 718 static int 719 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 720 { 721 minor_t rnum; 722 int percent; 723 int ret; 724 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 725 726 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n")); 727 728 switch (cmd) { 729 case DDI_ATTACH: 730 break; 731 case DDI_RESUME: 732 default: 733 DBG_PRINTF((category, RSM_ERR, 734 "rsm:rsm_attach - cmd not supported\n")); 735 return (DDI_FAILURE); 736 } 737 738 if (rsm_dip != NULL) { 739 DBG_PRINTF((category, RSM_ERR, 740 "rsm:rsm_attach - supports only " 741 "one instance\n")); 742 return (DDI_FAILURE); 743 } 744 745 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 746 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 747 "enable-dynamic-reconfiguration", 1); 748 749 mutex_enter(&rsm_drv_data.drv_lock); 750 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING; 751 mutex_exit(&rsm_drv_data.drv_lock); 752 753 if (rsm_enable_dr) { 754 #ifdef RSM_DRTEST 755 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec, 756 (void *)NULL); 757 #else 758 ret = kphysm_setup_func_register(&rsm_dr_callback_vec, 759 (void *)NULL); 760 #endif 761 if (ret != 0) { 762 mutex_exit(&rsm_drv_data.drv_lock); 763 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic " 764 "reconfiguration setup failed\n"); 765 return (DDI_FAILURE); 766 } 767 } 768 769 mutex_enter(&rsm_drv_data.drv_lock); 770 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING); 771 rsm_drv_data.drv_state = RSM_DRV_OK; 772 cv_broadcast(&rsm_drv_data.drv_cv); 773 mutex_exit(&rsm_drv_data.drv_lock); 774 775 /* 776 * page_list_read_lock(); 777 * xx_setup(); 778 * page_list_read_unlock(); 779 */ 780 781 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 782 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 783 "segment-hashtable-size", RSM_HASHSZ); 784 if (rsm_hash_size == 0) { 785 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 786 "rsm: segment-hashtable-size in rsm.conf " 787 "must be greater than 0, defaulting to 128\n")); 788 rsm_hash_size = RSM_HASHSZ; 789 } 790 791 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n", 792 rsm_hash_size)); 793 794 rsm_pgcnt = 0; 795 796 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 797 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 798 "max-exported-memory", 0); 799 if (percent < 0) { 800 DBG_PRINTF((category, RSM_ERR, 801 "rsm:rsm_attach not enough memory available to " 802 "export, or max-exported-memory set incorrectly.\n")); 803 return (DDI_FAILURE); 804 } 805 /* 0 indicates no fixed upper limit. maxmem is the max */ 806 /* available pageable physical mem */ 807 rsm_pgcnt_max = (percent*maxmem)/100; 808 809 if (rsm_pgcnt_max > 0) { 810 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 811 "rsm: Available physical memory = %lu pages, " 812 "Max exportable memory = %lu pages", 813 maxmem, rsm_pgcnt_max)); 814 } 815 816 /* 817 * Create minor number 818 */ 819 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) { 820 DBG_PRINTF((category, RSM_ERR, 821 "rsm: rsm_attach - Unable to get " 822 "minor number\n")); 823 return (DDI_FAILURE); 824 } 825 826 ASSERT(rnum == RSM_DRIVER_MINOR); 827 828 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR, 829 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) { 830 DBG_PRINTF((category, RSM_ERR, 831 "rsm: rsm_attach - unable to allocate " 832 "minor #\n")); 833 return (DDI_FAILURE); 834 } 835 836 rsm_dip = devi; 837 /* 838 * Allocate the hashtables 839 */ 840 rsmhash_alloc(&rsm_export_segs, rsm_hash_size); 841 rsmhash_alloc(&rsm_import_segs, rsm_hash_size); 842 843 importer_list.bucket = (importing_token_t **) 844 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), 845 KM_SLEEP); 846 847 /* 848 * Allocate a resource struct 849 */ 850 { 851 rsmresource_t *p; 852 853 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP); 854 855 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL); 856 857 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR); 858 } 859 860 /* 861 * Based on the rsm.conf property max-segments, determine the maximum 862 * number of segments that can be exported/imported. This is then used 863 * to determine the size for barrier failure pages. 864 */ 865 866 /* First get the max number of segments from the rsm.conf file */ 867 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 868 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 869 "max-segments", 0); 870 if (max_segs == 0) { 871 /* Use default number of segments */ 872 max_segs = RSM_MAX_NUM_SEG; 873 } 874 875 /* 876 * Based on the max number of segments allowed, determine the barrier 877 * page size. add 1 to max_segs since the barrier page itself uses 878 * a slot 879 */ 880 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t), 881 PAGESIZE); 882 883 /* 884 * allocation of the barrier failure page 885 */ 886 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size, 887 DDI_UMEM_SLEEP, &bar_cookie); 888 889 /* 890 * Set the barrier_offset 891 */ 892 barrier_offset = 0; 893 894 /* 895 * Allocate a trash memory and get a cookie for it. This will be used 896 * when remapping segments during force disconnects. Allocate the 897 * trash memory with a large size which is page aligned. 898 */ 899 (void) ddi_umem_alloc((size_t)TRASHSIZE, 900 DDI_UMEM_TRASH, &remap_cookie); 901 902 /* initialize user segment id allocation variable */ 903 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE; 904 905 /* 906 * initialize the null_rsmpi_ops vector and the loopback adapter 907 */ 908 rsmka_init_loopback(); 909 910 911 ddi_report_dev(devi); 912 913 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n")); 914 915 return (DDI_SUCCESS); 916 } 917 918 /* 919 * The call to mod_remove in the _fine routine will cause the system 920 * to call rsm_detach 921 */ 922 /*ARGSUSED*/ 923 static int 924 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 925 { 926 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 927 928 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n")); 929 930 switch (cmd) { 931 case DDI_DETACH: 932 break; 933 default: 934 DBG_PRINTF((category, RSM_ERR, 935 "rsm:rsm_detach - cmd %x not supported\n", 936 cmd)); 937 return (DDI_FAILURE); 938 } 939 940 mutex_enter(&rsm_drv_data.drv_lock); 941 while (rsm_drv_data.drv_state != RSM_DRV_OK) 942 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 943 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING; 944 mutex_exit(&rsm_drv_data.drv_lock); 945 946 /* 947 * Unregister the DR callback functions 948 */ 949 if (rsm_enable_dr) { 950 #ifdef RSM_DRTEST 951 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec, 952 (void *)NULL); 953 #else 954 kphysm_setup_func_unregister(&rsm_dr_callback_vec, 955 (void *)NULL); 956 #endif 957 } 958 959 mutex_enter(&rsm_drv_data.drv_lock); 960 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING); 961 rsm_drv_data.drv_state = RSM_DRV_NEW; 962 mutex_exit(&rsm_drv_data.drv_lock); 963 964 ASSERT(rsm_suspend_list.list_head == NULL); 965 966 /* 967 * Release all resources, seglist, controller, ... 968 */ 969 970 /* remove intersend queues */ 971 /* remove registered services */ 972 973 974 ddi_remove_minor_node(dip, DRIVER_NAME); 975 rsm_dip = NULL; 976 977 /* 978 * Free minor zero resource 979 */ 980 { 981 rsmresource_t *p; 982 983 p = rsmresource_free(RSM_DRIVER_MINOR); 984 if (p) { 985 mutex_destroy(&p->rsmrc_lock); 986 kmem_free((void *)p, sizeof (*p)); 987 } 988 } 989 990 /* 991 * Free resource table 992 */ 993 994 rsmresource_destroy(); 995 996 /* 997 * Free the hash tables 998 */ 999 rsmhash_free(&rsm_export_segs, rsm_hash_size); 1000 rsmhash_free(&rsm_import_segs, rsm_hash_size); 1001 1002 kmem_free((void *)importer_list.bucket, 1003 rsm_hash_size * sizeof (importing_token_t *)); 1004 importer_list.bucket = NULL; 1005 1006 1007 /* free barrier page */ 1008 if (bar_cookie != NULL) { 1009 ddi_umem_free(bar_cookie); 1010 } 1011 bar_va = NULL; 1012 bar_cookie = NULL; 1013 1014 /* 1015 * Free the memory allocated for the trash 1016 */ 1017 if (remap_cookie != NULL) { 1018 ddi_umem_free(remap_cookie); 1019 } 1020 remap_cookie = NULL; 1021 1022 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n")); 1023 1024 return (DDI_SUCCESS); 1025 } 1026 1027 /*ARGSUSED*/ 1028 static int 1029 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1030 { 1031 register int error; 1032 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 1033 1034 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n")); 1035 1036 switch (infocmd) { 1037 case DDI_INFO_DEVT2DEVINFO: 1038 if (rsm_dip == NULL) 1039 error = DDI_FAILURE; 1040 else { 1041 *result = (void *)rsm_dip; 1042 error = DDI_SUCCESS; 1043 } 1044 break; 1045 case DDI_INFO_DEVT2INSTANCE: 1046 *result = (void *)0; 1047 error = DDI_SUCCESS; 1048 break; 1049 default: 1050 error = DDI_FAILURE; 1051 } 1052 1053 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n")); 1054 return (error); 1055 } 1056 1057 adapter_t * 1058 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode) 1059 { 1060 adapter_t *adapter; 1061 char adapter_devname[MAXNAMELEN]; 1062 int instance; 1063 DBG_DEFINE(category, 1064 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL); 1065 1066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n")); 1067 1068 instance = msg->cnum; 1069 1070 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) { 1071 return (NULL); 1072 } 1073 1074 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode)) 1075 return (NULL); 1076 1077 if (strcmp(adapter_devname, "loopback") == 0) 1078 return (&loopback_adapter); 1079 1080 adapter = rsmka_lookup_adapter(adapter_devname, instance); 1081 1082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n")); 1083 1084 return (adapter); 1085 } 1086 1087 1088 /* 1089 * *********************** Resource Number Management ******************** 1090 * All resources are stored in a simple hash table. The table is an array 1091 * of pointers to resource blks. Each blk contains: 1092 * base - base number of this blk 1093 * used - number of used slots in this blk. 1094 * blks - array of pointers to resource items. 1095 * An entry in a resource blk is empty if it's NULL. 1096 * 1097 * We start with no resource array. Each time we run out of slots, we 1098 * reallocate a new larger array and copy the pointer to the new array and 1099 * a new resource blk is allocated and added to the hash table. 1100 * 1101 * The resource control block contains: 1102 * root - array of pointer of resource blks 1103 * sz - current size of array. 1104 * len - last valid entry in array. 1105 * 1106 * A search operation based on a resource number is as follows: 1107 * index = rnum / RESOURCE_BLKSZ; 1108 * ASSERT(index < resource_block.len); 1109 * ASSERT(index < resource_block.sz); 1110 * offset = rnum % RESOURCE_BLKSZ; 1111 * ASSERT(offset >= resource_block.root[index]->base); 1112 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 1113 * return resource_block.root[index]->blks[offset]; 1114 * 1115 * A resource blk is freed with its used count reachs zero. 1116 */ 1117 static int 1118 rsmresource_alloc(minor_t *rnum) 1119 { 1120 1121 /* search for available resource slot */ 1122 int i, j, empty = -1; 1123 rsmresource_blk_t *blk; 1124 1125 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1126 "rsmresource_alloc enter\n")); 1127 1128 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1129 1130 /* Try to find an empty slot */ 1131 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1132 blk = rsm_resource.rsmrc_root[i]; 1133 if (blk != NULL && blk->rsmrcblk_avail > 0) { 1134 /* found an empty slot in this blk */ 1135 for (j = 0; j < RSMRC_BLKSZ; j++) { 1136 if (blk->rsmrcblk_blks[j] == NULL) { 1137 *rnum = (minor_t) 1138 (j + (i * RSMRC_BLKSZ)); 1139 /* 1140 * obey gen page limits 1141 */ 1142 if (*rnum >= max_segs + 1) { 1143 if (empty < 0) { 1144 rw_exit(&rsm_resource. 1145 rsmrc_lock); 1146 DBG_PRINTF(( 1147 RSM_KERNEL_ALL, 1148 RSM_ERR, 1149 "rsmresource" 1150 "_alloc failed:" 1151 "not enough res" 1152 "%d\n", *rnum)); 1153 return ( 1154 RSMERR_INSUFFICIENT_RESOURCES); 1155 } else { 1156 /* use empty slot */ 1157 break; 1158 } 1159 1160 } 1161 1162 blk->rsmrcblk_blks[j] = RSMRC_RESERVED; 1163 blk->rsmrcblk_avail--; 1164 rw_exit(&rsm_resource.rsmrc_lock); 1165 DBG_PRINTF((RSM_KERNEL_ALL, 1166 RSM_DEBUG_VERBOSE, 1167 "rsmresource_alloc done\n")); 1168 return (RSM_SUCCESS); 1169 } 1170 } 1171 } else if (blk == NULL && empty < 0) { 1172 /* remember first empty slot */ 1173 empty = i; 1174 } 1175 } 1176 1177 /* Couldn't find anything, allocate a new blk */ 1178 /* 1179 * Do we need to reallocate the root array 1180 */ 1181 if (empty < 0) { 1182 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) { 1183 /* 1184 * Allocate new array and copy current stuff into it 1185 */ 1186 rsmresource_blk_t **p; 1187 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz + 1188 RSMRC_BLKSZ; 1189 /* 1190 * Don't allocate more that max valid rnum 1191 */ 1192 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >= 1193 max_segs + 1) { 1194 rw_exit(&rsm_resource.rsmrc_lock); 1195 return (RSMERR_INSUFFICIENT_RESOURCES); 1196 } 1197 1198 p = (rsmresource_blk_t **)kmem_zalloc( 1199 newsz * sizeof (*p), 1200 KM_SLEEP); 1201 1202 if (rsm_resource.rsmrc_root) { 1203 uint_t oldsz; 1204 1205 oldsz = (uint_t)(rsm_resource.rsmrc_sz * 1206 (int)sizeof (*p)); 1207 1208 /* 1209 * Copy old data into new space and 1210 * free old stuff 1211 */ 1212 bcopy(rsm_resource.rsmrc_root, p, oldsz); 1213 kmem_free(rsm_resource.rsmrc_root, oldsz); 1214 } 1215 1216 rsm_resource.rsmrc_root = p; 1217 rsm_resource.rsmrc_sz = (int)newsz; 1218 } 1219 1220 empty = rsm_resource.rsmrc_len; 1221 rsm_resource.rsmrc_len++; 1222 } 1223 1224 /* 1225 * Allocate a new blk 1226 */ 1227 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP); 1228 ASSERT(rsm_resource.rsmrc_root[empty] == NULL); 1229 rsm_resource.rsmrc_root[empty] = blk; 1230 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1; 1231 1232 /* 1233 * Allocate slot 1234 */ 1235 1236 *rnum = (minor_t)(empty * RSMRC_BLKSZ); 1237 1238 /* 1239 * watch out not to exceed bounds of barrier page 1240 */ 1241 if (*rnum >= max_segs + 1) { 1242 rw_exit(&rsm_resource.rsmrc_lock); 1243 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR, 1244 "rsmresource_alloc failed %d\n", *rnum)); 1245 1246 return (RSMERR_INSUFFICIENT_RESOURCES); 1247 } 1248 blk->rsmrcblk_blks[0] = RSMRC_RESERVED; 1249 1250 1251 rw_exit(&rsm_resource.rsmrc_lock); 1252 1253 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1254 "rsmresource_alloc done\n")); 1255 1256 return (RSM_SUCCESS); 1257 } 1258 1259 static rsmresource_t * 1260 rsmresource_free(minor_t rnum) 1261 { 1262 1263 /* search for available resource slot */ 1264 int i, j; 1265 rsmresource_blk_t *blk; 1266 rsmresource_t *p; 1267 1268 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1269 "rsmresource_free enter\n")); 1270 1271 i = (int)(rnum / RSMRC_BLKSZ); 1272 j = (int)(rnum % RSMRC_BLKSZ); 1273 1274 if (i >= rsm_resource.rsmrc_len) { 1275 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1276 "rsmresource_free done\n")); 1277 return (NULL); 1278 } 1279 1280 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1281 1282 ASSERT(rsm_resource.rsmrc_root); 1283 ASSERT(i < rsm_resource.rsmrc_len); 1284 ASSERT(i < rsm_resource.rsmrc_sz); 1285 blk = rsm_resource.rsmrc_root[i]; 1286 if (blk == NULL) { 1287 rw_exit(&rsm_resource.rsmrc_lock); 1288 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1289 "rsmresource_free done\n")); 1290 return (NULL); 1291 } 1292 1293 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */ 1294 1295 p = blk->rsmrcblk_blks[j]; 1296 if (p == RSMRC_RESERVED) { 1297 p = NULL; 1298 } 1299 1300 blk->rsmrcblk_blks[j] = NULL; 1301 blk->rsmrcblk_avail++; 1302 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) { 1303 /* free this blk */ 1304 kmem_free(blk, sizeof (*blk)); 1305 rsm_resource.rsmrc_root[i] = NULL; 1306 } 1307 1308 rw_exit(&rsm_resource.rsmrc_lock); 1309 1310 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1311 "rsmresource_free done\n")); 1312 1313 return (p); 1314 } 1315 1316 static rsmresource_t * 1317 rsmresource_lookup(minor_t rnum, int lock) 1318 { 1319 int i, j; 1320 rsmresource_blk_t *blk; 1321 rsmresource_t *p; 1322 1323 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1324 "rsmresource_lookup enter\n")); 1325 1326 /* Find resource and lock it in READER mode */ 1327 /* search for available resource slot */ 1328 1329 i = (int)(rnum / RSMRC_BLKSZ); 1330 j = (int)(rnum % RSMRC_BLKSZ); 1331 1332 if (i >= rsm_resource.rsmrc_len) { 1333 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1334 "rsmresource_lookup done\n")); 1335 return (NULL); 1336 } 1337 1338 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1339 1340 blk = rsm_resource.rsmrc_root[i]; 1341 if (blk != NULL) { 1342 ASSERT(i < rsm_resource.rsmrc_len); 1343 ASSERT(i < rsm_resource.rsmrc_sz); 1344 1345 p = blk->rsmrcblk_blks[j]; 1346 if (lock == RSM_LOCK) { 1347 if (p != RSMRC_RESERVED) { 1348 mutex_enter(&p->rsmrc_lock); 1349 } else { 1350 p = NULL; 1351 } 1352 } 1353 } else { 1354 p = NULL; 1355 } 1356 rw_exit(&rsm_resource.rsmrc_lock); 1357 1358 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1359 "rsmresource_lookup done\n")); 1360 1361 return (p); 1362 } 1363 1364 static void 1365 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type) 1366 { 1367 /* Find resource and lock it in READER mode */ 1368 /* Caller can upgrade if need be */ 1369 /* search for available resource slot */ 1370 int i, j; 1371 rsmresource_blk_t *blk; 1372 1373 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1374 "rsmresource_insert enter\n")); 1375 1376 i = (int)(rnum / RSMRC_BLKSZ); 1377 j = (int)(rnum % RSMRC_BLKSZ); 1378 1379 p->rsmrc_type = type; 1380 p->rsmrc_num = rnum; 1381 1382 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1383 1384 ASSERT(rsm_resource.rsmrc_root); 1385 ASSERT(i < rsm_resource.rsmrc_len); 1386 ASSERT(i < rsm_resource.rsmrc_sz); 1387 1388 blk = rsm_resource.rsmrc_root[i]; 1389 ASSERT(blk); 1390 1391 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED); 1392 1393 blk->rsmrcblk_blks[j] = p; 1394 1395 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1396 "rsmresource_insert done\n")); 1397 1398 rw_exit(&rsm_resource.rsmrc_lock); 1399 } 1400 1401 static void 1402 rsmresource_destroy() 1403 { 1404 int i, j; 1405 1406 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1407 "rsmresource_destroy enter\n")); 1408 1409 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1410 1411 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1412 rsmresource_blk_t *blk; 1413 1414 blk = rsm_resource.rsmrc_root[i]; 1415 if (blk == NULL) { 1416 continue; 1417 } 1418 for (j = 0; j < RSMRC_BLKSZ; j++) { 1419 if (blk->rsmrcblk_blks[j] != NULL) { 1420 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1421 "Not null slot %d, %lx\n", j, 1422 (size_t)blk->rsmrcblk_blks[j])); 1423 } 1424 } 1425 kmem_free(blk, sizeof (*blk)); 1426 rsm_resource.rsmrc_root[i] = NULL; 1427 } 1428 if (rsm_resource.rsmrc_root) { 1429 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *); 1430 kmem_free(rsm_resource.rsmrc_root, (uint_t)i); 1431 rsm_resource.rsmrc_root = NULL; 1432 rsm_resource.rsmrc_len = 0; 1433 rsm_resource.rsmrc_sz = 0; 1434 } 1435 1436 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1437 "rsmresource_destroy done\n")); 1438 1439 rw_exit(&rsm_resource.rsmrc_lock); 1440 } 1441 1442 1443 /* ******************** Generic Key Hash Table Management ********* */ 1444 static rsmresource_t * 1445 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key, 1446 rsm_resource_state_t state) 1447 { 1448 rsmresource_t *p; 1449 uint_t hashval; 1450 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1451 1452 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n")); 1453 1454 hashval = rsmhash(key); 1455 1456 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n", 1457 key, hashval)); 1458 1459 rw_enter(&rhash->rsmhash_rw, RW_READER); 1460 1461 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1462 1463 for (; p; p = p->rsmrc_next) { 1464 if (p->rsmrc_key == key) { 1465 /* acquire resource lock */ 1466 RSMRC_LOCK(p); 1467 break; 1468 } 1469 } 1470 1471 rw_exit(&rhash->rsmhash_rw); 1472 1473 if (p != NULL && p->rsmrc_state != state) { 1474 /* state changed, release lock and return null */ 1475 RSMRC_UNLOCK(p); 1476 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1477 "rsmhash_lookup done: state changed\n")); 1478 return (NULL); 1479 } 1480 1481 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n")); 1482 1483 return (p); 1484 } 1485 1486 static void 1487 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm) 1488 { 1489 rsmresource_t *p, **back; 1490 uint_t hashval; 1491 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1492 1493 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n")); 1494 1495 hashval = rsmhash(rcelm->rsmrc_key); 1496 1497 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n", 1498 rcelm->rsmrc_key, hashval)); 1499 1500 /* 1501 * It's ok not to find the segment. 1502 */ 1503 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1504 1505 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1506 1507 for (; (p = *back) != NULL; back = &p->rsmrc_next) { 1508 if (p == rcelm) { 1509 *back = rcelm->rsmrc_next; 1510 break; 1511 } 1512 } 1513 1514 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n")); 1515 1516 rw_exit(&rhash->rsmhash_rw); 1517 } 1518 1519 static int 1520 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key, 1521 int dup_check, rsm_resource_state_t state) 1522 { 1523 rsmresource_t *p = NULL, **bktp; 1524 uint_t hashval; 1525 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1526 1527 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n")); 1528 1529 /* lock table */ 1530 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1531 1532 /* 1533 * If the current resource state is other than the state passed in 1534 * then the resource is (probably) already on the list. eg. for an 1535 * import segment if the state is not RSM_STATE_NEW then it's on the 1536 * list already. 1537 */ 1538 RSMRC_LOCK(new); 1539 if (new->rsmrc_state != state) { 1540 RSMRC_UNLOCK(new); 1541 rw_exit(&rhash->rsmhash_rw); 1542 return (RSMERR_BAD_SEG_HNDL); 1543 } 1544 1545 hashval = rsmhash(key); 1546 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval)); 1547 1548 if (dup_check) { 1549 /* 1550 * Used for checking export segments; don't want to have 1551 * the same key used for multiple segments. 1552 */ 1553 1554 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1555 1556 for (; p; p = p->rsmrc_next) { 1557 if (p->rsmrc_key == key) { 1558 RSMRC_UNLOCK(new); 1559 break; 1560 } 1561 } 1562 } 1563 1564 if (p == NULL) { 1565 /* Key doesn't exist, add it */ 1566 1567 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1568 1569 new->rsmrc_key = key; 1570 new->rsmrc_next = *bktp; 1571 *bktp = new; 1572 } 1573 1574 rw_exit(&rhash->rsmhash_rw); 1575 1576 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n")); 1577 1578 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE); 1579 } 1580 1581 /* 1582 * XOR each byte of the key. 1583 */ 1584 static uint_t 1585 rsmhash(rsm_memseg_id_t key) 1586 { 1587 uint_t hash = key; 1588 1589 hash ^= (key >> 8); 1590 hash ^= (key >> 16); 1591 hash ^= (key >> 24); 1592 1593 return (hash % rsm_hash_size); 1594 1595 } 1596 1597 /* 1598 * generic function to get a specific bucket 1599 */ 1600 static void * 1601 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval) 1602 { 1603 1604 if (rhash->bucket == NULL) 1605 return (NULL); 1606 else 1607 return ((void *)rhash->bucket[hashval]); 1608 } 1609 1610 /* 1611 * generic function to get a specific bucket's address 1612 */ 1613 static void ** 1614 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval) 1615 { 1616 if (rhash->bucket == NULL) 1617 return (NULL); 1618 else 1619 return ((void **)&(rhash->bucket[hashval])); 1620 } 1621 1622 /* 1623 * generic function to alloc a hash table 1624 */ 1625 static void 1626 rsmhash_alloc(rsmhash_table_t *rhash, int size) 1627 { 1628 rhash->bucket = (rsmresource_t **) 1629 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP); 1630 } 1631 1632 /* 1633 * generic function to free a hash table 1634 */ 1635 static void 1636 rsmhash_free(rsmhash_table_t *rhash, int size) 1637 { 1638 1639 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t)); 1640 rhash->bucket = NULL; 1641 1642 } 1643 /* *********************** Exported Segment Key Management ************ */ 1644 1645 #define rsmexport_add(new, key) \ 1646 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \ 1647 RSM_STATE_BIND) 1648 1649 #define rsmexport_rm(arg) \ 1650 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg)) 1651 1652 #define rsmexport_lookup(key) \ 1653 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT) 1654 1655 /* ************************** Import Segment List Management ********** */ 1656 1657 /* 1658 * Add segment to import list. This will be useful for paging and loopback 1659 * segment unloading. 1660 */ 1661 #define rsmimport_add(arg, key) \ 1662 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \ 1663 RSM_STATE_NEW) 1664 1665 #define rsmimport_rm(arg) \ 1666 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg)) 1667 1668 /* 1669 * #define rsmimport_lookup(key) \ 1670 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT) 1671 */ 1672 1673 /* 1674 * increase the ref count and make the import segment point to the 1675 * shared data structure. Return a pointer to the share data struct 1676 * and the shared data struct is locked upon return 1677 */ 1678 static rsm_import_share_t * 1679 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter, 1680 rsmseg_t *segp) 1681 { 1682 uint_t hash; 1683 rsmresource_t *p; 1684 rsm_import_share_t *shdatap; 1685 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1686 1687 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n")); 1688 1689 hash = rsmhash(key); 1690 /* lock table */ 1691 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER); 1692 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n", 1693 key, hash)); 1694 1695 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash); 1696 1697 for (; p; p = p->rsmrc_next) { 1698 /* 1699 * Look for an entry that is importing the same exporter 1700 * with the share data structure allocated. 1701 */ 1702 if ((p->rsmrc_key == key) && 1703 (p->rsmrc_node == node) && 1704 (p->rsmrc_adapter == adapter) && 1705 (((rsmseg_t *)p)->s_share != NULL)) { 1706 shdatap = ((rsmseg_t *)p)->s_share; 1707 break; 1708 } 1709 } 1710 1711 if (p == NULL) { 1712 /* we are the first importer, create the shared data struct */ 1713 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP); 1714 shdatap->rsmsi_state = RSMSI_STATE_NEW; 1715 shdatap->rsmsi_segid = key; 1716 shdatap->rsmsi_node = node; 1717 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL); 1718 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0); 1719 } 1720 1721 rsmseglock_acquire(segp); 1722 1723 /* we grab the shared lock before returning from this function */ 1724 mutex_enter(&shdatap->rsmsi_lock); 1725 1726 shdatap->rsmsi_refcnt++; 1727 segp->s_share = shdatap; 1728 1729 rsmseglock_release(segp); 1730 1731 rw_exit(&rsm_import_segs.rsmhash_rw); 1732 1733 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n")); 1734 1735 return (shdatap); 1736 } 1737 1738 /* 1739 * the shared data structure should be locked before calling 1740 * rsmsharecv_signal(). 1741 * Change the state and signal any waiting segments. 1742 */ 1743 void 1744 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate) 1745 { 1746 ASSERT(rsmsharelock_held(seg)); 1747 1748 if (seg->s_share->rsmsi_state == oldstate) { 1749 seg->s_share->rsmsi_state = newstate; 1750 cv_broadcast(&seg->s_share->rsmsi_cv); 1751 } 1752 } 1753 1754 /* 1755 * Add to the hash table 1756 */ 1757 static void 1758 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr, 1759 void *cookie) 1760 { 1761 1762 importing_token_t *head; 1763 importing_token_t *new_token; 1764 int index; 1765 1766 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1767 1768 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n")); 1769 1770 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP); 1771 new_token->importing_node = node; 1772 new_token->key = key; 1773 new_token->import_segment_cookie = cookie; 1774 new_token->importing_adapter_hwaddr = hwaddr; 1775 1776 index = rsmhash(key); 1777 1778 mutex_enter(&importer_list.lock); 1779 1780 head = importer_list.bucket[index]; 1781 importer_list.bucket[index] = new_token; 1782 new_token->next = head; 1783 mutex_exit(&importer_list.lock); 1784 1785 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n")); 1786 } 1787 1788 static void 1789 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie) 1790 { 1791 1792 importing_token_t *prev, *token = NULL; 1793 int index; 1794 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1795 1796 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n")); 1797 1798 index = rsmhash(key); 1799 1800 mutex_enter(&importer_list.lock); 1801 1802 token = importer_list.bucket[index]; 1803 1804 prev = token; 1805 while (token != NULL) { 1806 if (token->importing_node == node && 1807 token->import_segment_cookie == cookie) { 1808 if (prev == token) 1809 importer_list.bucket[index] = token->next; 1810 else 1811 prev->next = token->next; 1812 kmem_free((void *)token, sizeof (*token)); 1813 break; 1814 } else { 1815 prev = token; 1816 token = token->next; 1817 } 1818 } 1819 1820 mutex_exit(&importer_list.lock); 1821 1822 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n")); 1823 1824 1825 } 1826 1827 /* **************************Segment Structure Management ************* */ 1828 1829 /* 1830 * Free segment structure 1831 */ 1832 static void 1833 rsmseg_free(rsmseg_t *seg) 1834 { 1835 1836 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1837 1838 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n")); 1839 1840 /* need to take seglock here to avoid race with rsmmap_unmap() */ 1841 rsmseglock_acquire(seg); 1842 if (seg->s_ckl != NULL) { 1843 /* Segment is still busy */ 1844 seg->s_state = RSM_STATE_END; 1845 rsmseglock_release(seg); 1846 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1847 "rsmseg_free done\n")); 1848 return; 1849 } 1850 1851 rsmseglock_release(seg); 1852 1853 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW); 1854 1855 /* 1856 * If it's an importer decrement the refcount 1857 * and if its down to zero free the shared data structure. 1858 * This is where failures during rsm_connect() are unrefcounted 1859 */ 1860 if (seg->s_share != NULL) { 1861 1862 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT); 1863 1864 rsmsharelock_acquire(seg); 1865 1866 ASSERT(seg->s_share->rsmsi_refcnt > 0); 1867 1868 seg->s_share->rsmsi_refcnt--; 1869 1870 if (seg->s_share->rsmsi_refcnt == 0) { 1871 rsmsharelock_release(seg); 1872 mutex_destroy(&seg->s_share->rsmsi_lock); 1873 cv_destroy(&seg->s_share->rsmsi_cv); 1874 kmem_free((void *)(seg->s_share), 1875 sizeof (rsm_import_share_t)); 1876 } else { 1877 rsmsharelock_release(seg); 1878 } 1879 /* 1880 * The following needs to be done after any 1881 * rsmsharelock calls which use seg->s_share. 1882 */ 1883 seg->s_share = NULL; 1884 } 1885 1886 cv_destroy(&seg->s_cv); 1887 mutex_destroy(&seg->s_lock); 1888 rsmacl_free(seg->s_acl, seg->s_acl_len); 1889 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len); 1890 if (seg->s_adapter) 1891 rsmka_release_adapter(seg->s_adapter); 1892 1893 kmem_free((void *)seg, sizeof (*seg)); 1894 1895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n")); 1896 1897 } 1898 1899 1900 static rsmseg_t * 1901 rsmseg_alloc(minor_t num, struct cred *cred) 1902 { 1903 rsmseg_t *new; 1904 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1905 1906 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n")); 1907 /* 1908 * allocate memory for new segment. This should be a segkmem cache. 1909 */ 1910 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); 1911 1912 new->s_state = RSM_STATE_NEW; 1913 new->s_minor = num; 1914 new->s_acl_len = 0; 1915 new->s_cookie = NULL; 1916 new->s_adapter = NULL; 1917 1918 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask; 1919 /* we don't have a key yet, will set at export/connect */ 1920 new->s_uid = crgetuid(cred); 1921 new->s_gid = crgetgid(cred); 1922 1923 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL); 1924 cv_init(&new->s_cv, NULL, CV_DRIVER, 0); 1925 1926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n")); 1927 1928 return (new); 1929 } 1930 1931 /* ******************************** Driver Open/Close/Poll *************** */ 1932 1933 /*ARGSUSED1*/ 1934 static int 1935 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred) 1936 { 1937 minor_t rnum; 1938 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 1939 1940 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n")); 1941 /* 1942 * Char only 1943 */ 1944 if (otyp != OTYP_CHR) { 1945 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n")); 1946 return (EINVAL); 1947 } 1948 1949 /* 1950 * Only zero can be opened, clones are used for resources. 1951 */ 1952 if (getminor(*devp) != RSM_DRIVER_MINOR) { 1953 DBG_PRINTF((category, RSM_ERR, 1954 "rsm_open: bad minor %d\n", getminor(*devp))); 1955 return (ENODEV); 1956 } 1957 1958 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) { 1959 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n")); 1960 return (EPERM); 1961 } 1962 1963 if (!(flag & FWRITE)) { 1964 /* 1965 * The library function _rsm_librsm_init calls open for 1966 * /dev/rsm with flag set to O_RDONLY. We want a valid 1967 * file descriptor to be returned for minor device zero. 1968 */ 1969 1970 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1971 "rsm_open RDONLY done\n")); 1972 return (DDI_SUCCESS); 1973 } 1974 1975 /* 1976 * - allocate new minor number and segment. 1977 * - add segment to list of all segments. 1978 * - set minordev data to segment 1979 * - update devp argument to new device 1980 * - update s_cred to cred; make sure you do crhold(cred); 1981 */ 1982 1983 /* allocate a new resource number */ 1984 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) { 1985 /* 1986 * We will bind this minor to a specific resource in first 1987 * ioctl 1988 */ 1989 *devp = makedevice(getmajor(*devp), rnum); 1990 } else { 1991 return (EAGAIN); 1992 } 1993 1994 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n")); 1995 return (DDI_SUCCESS); 1996 } 1997 1998 static void 1999 rsmseg_close(rsmseg_t *seg, int force_flag) 2000 { 2001 int e = RSM_SUCCESS; 2002 2003 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2004 2005 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n")); 2006 2007 rsmseglock_acquire(seg); 2008 if (!force_flag && (seg->s_hdr.rsmrc_type == 2009 RSM_RESOURCE_EXPORT_SEGMENT)) { 2010 /* 2011 * If we are processing rsm_close wait for force_destroy 2012 * processing to complete since force_destroy processing 2013 * needs to finish first before we can free the segment. 2014 * force_destroy is only for export segments 2015 */ 2016 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) { 2017 cv_wait(&seg->s_cv, &seg->s_lock); 2018 } 2019 } 2020 rsmseglock_release(seg); 2021 2022 /* It's ok to read the state without a lock */ 2023 switch (seg->s_state) { 2024 case RSM_STATE_EXPORT: 2025 case RSM_STATE_EXPORT_QUIESCING: 2026 case RSM_STATE_EXPORT_QUIESCED: 2027 e = rsm_unpublish(seg, 1); 2028 /* FALLTHRU */ 2029 case RSM_STATE_BIND_QUIESCED: 2030 /* FALLTHRU */ 2031 case RSM_STATE_BIND: 2032 e = rsm_unbind(seg); 2033 if (e != RSM_SUCCESS && force_flag == 1) 2034 return; 2035 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT); 2036 /* FALLTHRU */ 2037 case RSM_STATE_NEW_QUIESCED: 2038 rsmseglock_acquire(seg); 2039 seg->s_state = RSM_STATE_NEW; 2040 cv_broadcast(&seg->s_cv); 2041 rsmseglock_release(seg); 2042 break; 2043 case RSM_STATE_NEW: 2044 break; 2045 case RSM_STATE_ZOMBIE: 2046 /* 2047 * Segments in this state have been removed off the 2048 * exported segments list and have been unpublished 2049 * and unbind. These segments have been removed during 2050 * a callback to the rsm_export_force_destroy, which 2051 * is called for the purpose of unlocking these 2052 * exported memory segments when a process exits but 2053 * leaves the segments locked down since rsm_close is 2054 * is not called for the segments. This can happen 2055 * when a process calls fork or exec and then exits. 2056 * Once the segments are in the ZOMBIE state, all that 2057 * remains is to destroy them when rsm_close is called. 2058 * This is done here. Thus, for such segments the 2059 * the state is changed to new so that later in this 2060 * function rsmseg_free is called. 2061 */ 2062 rsmseglock_acquire(seg); 2063 seg->s_state = RSM_STATE_NEW; 2064 rsmseglock_release(seg); 2065 break; 2066 case RSM_STATE_MAP_QUIESCE: 2067 case RSM_STATE_ACTIVE: 2068 /* Disconnect will handle the unmap */ 2069 case RSM_STATE_CONN_QUIESCE: 2070 case RSM_STATE_CONNECT: 2071 case RSM_STATE_DISCONNECT: 2072 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 2073 (void) rsm_disconnect(seg); 2074 break; 2075 case RSM_STATE_MAPPING: 2076 /*FALLTHRU*/ 2077 case RSM_STATE_END: 2078 DBG_PRINTF((category, RSM_ERR, 2079 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2080 break; 2081 default: 2082 DBG_PRINTF((category, RSM_ERR, 2083 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2084 break; 2085 } 2086 2087 /* 2088 * check state. 2089 * - make sure you do crfree(s_cred); 2090 * release segment and minor number 2091 */ 2092 ASSERT(seg->s_state == RSM_STATE_NEW); 2093 2094 /* 2095 * The export_force_destroy callback is created to unlock 2096 * the exported segments of a process 2097 * when the process does a fork or exec and then exits calls this 2098 * function with the force flag set to 1 which indicates that the 2099 * segment state must be converted to ZOMBIE. This state means that the 2100 * segments still exist and have been unlocked and most importantly the 2101 * only operation allowed is to destroy them on an rsm_close. 2102 */ 2103 if (force_flag) { 2104 rsmseglock_acquire(seg); 2105 seg->s_state = RSM_STATE_ZOMBIE; 2106 rsmseglock_release(seg); 2107 } else { 2108 rsmseg_free(seg); 2109 } 2110 2111 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n")); 2112 } 2113 2114 static int 2115 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred) 2116 { 2117 minor_t rnum = getminor(dev); 2118 rsmresource_t *res; 2119 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2120 2121 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n")); 2122 2123 flag = flag; cred = cred; 2124 2125 if (otyp != OTYP_CHR) 2126 return (EINVAL); 2127 2128 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum)); 2129 2130 /* 2131 * At this point we are the last reference to the resource. 2132 * Free resource number from resource table. 2133 * It's ok to remove number before we free the segment. 2134 * We need to lock the resource to protect against remote calls. 2135 */ 2136 if (rnum == RSM_DRIVER_MINOR || 2137 (res = rsmresource_free(rnum)) == NULL) { 2138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2139 return (DDI_SUCCESS); 2140 } 2141 2142 switch (res->rsmrc_type) { 2143 case RSM_RESOURCE_EXPORT_SEGMENT: 2144 case RSM_RESOURCE_IMPORT_SEGMENT: 2145 rsmseg_close((rsmseg_t *)res, 0); 2146 break; 2147 case RSM_RESOURCE_BAR: 2148 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n")); 2149 break; 2150 default: 2151 break; 2152 } 2153 2154 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2155 2156 return (DDI_SUCCESS); 2157 } 2158 2159 /* 2160 * rsm_inc_pgcnt 2161 * 2162 * Description: increment rsm page counter. 2163 * 2164 * Parameters: pgcnt_t pnum; number of pages to be used 2165 * 2166 * Returns: RSM_SUCCESS if memory limit not exceeded 2167 * ENOSPC if memory limit exceeded. In this case, the 2168 * page counter remains unchanged. 2169 * 2170 */ 2171 static int 2172 rsm_inc_pgcnt(pgcnt_t pnum) 2173 { 2174 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2175 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2176 return (RSM_SUCCESS); 2177 } 2178 2179 mutex_enter(&rsm_pgcnt_lock); 2180 2181 if (rsm_pgcnt + pnum > rsm_pgcnt_max) { 2182 /* ensure that limits have not been exceeded */ 2183 mutex_exit(&rsm_pgcnt_lock); 2184 return (RSMERR_INSUFFICIENT_MEM); 2185 } 2186 2187 rsm_pgcnt += pnum; 2188 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n", 2189 rsm_pgcnt)); 2190 mutex_exit(&rsm_pgcnt_lock); 2191 2192 return (RSM_SUCCESS); 2193 } 2194 2195 /* 2196 * rsm_dec_pgcnt 2197 * 2198 * Description: decrement rsm page counter. 2199 * 2200 * Parameters: pgcnt_t pnum; number of pages freed 2201 * 2202 */ 2203 static void 2204 rsm_dec_pgcnt(pgcnt_t pnum) 2205 { 2206 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2207 2208 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2209 return; 2210 } 2211 2212 mutex_enter(&rsm_pgcnt_lock); 2213 ASSERT(rsm_pgcnt >= pnum); 2214 rsm_pgcnt -= pnum; 2215 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n", 2216 rsm_pgcnt)); 2217 mutex_exit(&rsm_pgcnt_lock); 2218 } 2219 2220 static struct umem_callback_ops rsm_as_ops = { 2221 UMEM_CALLBACK_VERSION, /* version number */ 2222 rsm_export_force_destroy, 2223 }; 2224 2225 static int 2226 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len, 2227 proc_t *procp) 2228 { 2229 int error = RSM_SUCCESS; 2230 ulong_t pnum; 2231 struct umem_callback_ops *callbackops = &rsm_as_ops; 2232 2233 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2234 2235 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n")); 2236 2237 /* 2238 * Make sure vaddr and len are aligned on a page boundary 2239 */ 2240 if ((uintptr_t)vaddr & (PAGESIZE - 1)) { 2241 return (RSMERR_BAD_ADDR); 2242 } 2243 2244 if (len & (PAGESIZE - 1)) { 2245 return (RSMERR_BAD_LENGTH); 2246 } 2247 2248 /* 2249 * Find number of pages 2250 */ 2251 pnum = btopr(len); 2252 error = rsm_inc_pgcnt(pnum); 2253 if (error != RSM_SUCCESS) { 2254 DBG_PRINTF((category, RSM_ERR, 2255 "rsm_bind_pages:mem limit exceeded\n")); 2256 return (RSMERR_INSUFFICIENT_MEM); 2257 } 2258 2259 error = umem_lockmemory(vaddr, len, 2260 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM, 2261 cookie, 2262 callbackops, procp); 2263 2264 if (error) { 2265 rsm_dec_pgcnt(pnum); 2266 DBG_PRINTF((category, RSM_ERR, 2267 "rsm_bind_pages:ddi_umem_lock failed\n")); 2268 /* 2269 * ddi_umem_lock, in the case of failure, returns one of 2270 * the following three errors. These are translated into 2271 * the RSMERR namespace and returned. 2272 */ 2273 if (error == EFAULT) 2274 return (RSMERR_BAD_ADDR); 2275 else if (error == EACCES) 2276 return (RSMERR_PERM_DENIED); 2277 else 2278 return (RSMERR_INSUFFICIENT_MEM); 2279 } 2280 2281 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n")); 2282 2283 return (error); 2284 2285 } 2286 2287 static int 2288 rsm_unbind_pages(rsmseg_t *seg) 2289 { 2290 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2291 2292 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n")); 2293 2294 ASSERT(rsmseglock_held(seg)); 2295 2296 if (seg->s_cookie != NULL) { 2297 /* unlock address range */ 2298 ddi_umem_unlock(seg->s_cookie); 2299 rsm_dec_pgcnt(btopr(seg->s_len)); 2300 seg->s_cookie = NULL; 2301 } 2302 2303 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n")); 2304 2305 return (RSM_SUCCESS); 2306 } 2307 2308 2309 static int 2310 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2311 { 2312 int e; 2313 adapter_t *adapter; 2314 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2315 2316 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n")); 2317 2318 adapter = rsm_getadapter(msg, mode); 2319 if (adapter == NULL) { 2320 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2321 "rsm_bind done:no adapter\n")); 2322 return (RSMERR_CTLR_NOT_PRESENT); 2323 } 2324 2325 /* lock address range */ 2326 if (msg->vaddr == NULL) { 2327 rsmka_release_adapter(adapter); 2328 DBG_PRINTF((category, RSM_ERR, 2329 "rsm: rsm_bind done: invalid vaddr\n")); 2330 return (RSMERR_BAD_ADDR); 2331 } 2332 if (msg->len <= 0) { 2333 rsmka_release_adapter(adapter); 2334 DBG_PRINTF((category, RSM_ERR, 2335 "rsm_bind: invalid length\n")); 2336 return (RSMERR_BAD_LENGTH); 2337 } 2338 2339 /* Lock segment */ 2340 rsmseglock_acquire(seg); 2341 2342 while (seg->s_state == RSM_STATE_NEW_QUIESCED) { 2343 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2344 DBG_PRINTF((category, RSM_DEBUG, 2345 "rsm_bind done: cv_wait INTERRUPTED")); 2346 rsmka_release_adapter(adapter); 2347 rsmseglock_release(seg); 2348 return (RSMERR_INTERRUPTED); 2349 } 2350 } 2351 2352 ASSERT(seg->s_state == RSM_STATE_NEW); 2353 2354 ASSERT(seg->s_cookie == NULL); 2355 2356 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc); 2357 if (e == RSM_SUCCESS) { 2358 seg->s_flags |= RSM_USER_MEMORY; 2359 if (msg->perm & RSM_ALLOW_REBIND) { 2360 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND; 2361 } 2362 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) { 2363 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT; 2364 } 2365 seg->s_region.r_vaddr = msg->vaddr; 2366 /* 2367 * Set the s_pid value in the segment structure. This is used 2368 * to identify exported segments belonging to a particular 2369 * process so that when the process exits, these segments can 2370 * be unlocked forcefully even if rsm_close is not called on 2371 * process exit since there maybe other processes referencing 2372 * them (for example on a fork or exec). 2373 * The s_pid value is also used to authenticate the process 2374 * doing a publish or unpublish on the export segment. Only 2375 * the creator of the export segment has a right to do a 2376 * publish or unpublish and unbind on the segment. 2377 */ 2378 seg->s_pid = ddi_get_pid(); 2379 seg->s_len = msg->len; 2380 seg->s_state = RSM_STATE_BIND; 2381 seg->s_adapter = adapter; 2382 seg->s_proc = curproc; 2383 } else { 2384 rsmka_release_adapter(adapter); 2385 DBG_PRINTF((category, RSM_WARNING, 2386 "unable to lock down pages\n")); 2387 } 2388 2389 msg->rnum = seg->s_minor; 2390 /* Unlock segment */ 2391 rsmseglock_release(seg); 2392 2393 if (e == RSM_SUCCESS) { 2394 /* copyout the resource number */ 2395 #ifdef _MULTI_DATAMODEL 2396 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2397 rsm_ioctlmsg32_t msg32; 2398 2399 msg32.rnum = msg->rnum; 2400 if (ddi_copyout((caddr_t)&msg32.rnum, 2401 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum, 2402 sizeof (minor_t), mode)) { 2403 rsmka_release_adapter(adapter); 2404 e = RSMERR_BAD_ADDR; 2405 } 2406 } 2407 #endif 2408 if (ddi_copyout((caddr_t)&msg->rnum, 2409 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum, 2410 sizeof (minor_t), mode)) { 2411 rsmka_release_adapter(adapter); 2412 e = RSMERR_BAD_ADDR; 2413 } 2414 } 2415 2416 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n")); 2417 2418 return (e); 2419 } 2420 2421 static void 2422 rsm_remap_local_importers(rsm_node_id_t src_nodeid, 2423 rsm_memseg_id_t ex_segid, 2424 ddi_umem_cookie_t cookie) 2425 2426 { 2427 rsmresource_t *p = NULL; 2428 rsmhash_table_t *rhash = &rsm_import_segs; 2429 uint_t index; 2430 2431 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2432 "rsm_remap_local_importers enter\n")); 2433 2434 index = rsmhash(ex_segid); 2435 2436 rw_enter(&rhash->rsmhash_rw, RW_READER); 2437 2438 p = rsmhash_getbkt(rhash, index); 2439 2440 for (; p; p = p->rsmrc_next) { 2441 rsmseg_t *seg = (rsmseg_t *)p; 2442 rsmseglock_acquire(seg); 2443 /* 2444 * Change the s_cookie value of only the local importers 2445 * which have been mapped (in state RSM_STATE_ACTIVE). 2446 * Note that there is no need to change the s_cookie value 2447 * if the imported segment is in RSM_STATE_MAPPING since 2448 * eventually the s_cookie will be updated via the mapping 2449 * functionality. 2450 */ 2451 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) && 2452 (seg->s_state == RSM_STATE_ACTIVE)) { 2453 seg->s_cookie = cookie; 2454 } 2455 rsmseglock_release(seg); 2456 } 2457 rw_exit(&rhash->rsmhash_rw); 2458 2459 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2460 "rsm_remap_local_importers done\n")); 2461 } 2462 2463 static int 2464 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg) 2465 { 2466 int e; 2467 adapter_t *adapter; 2468 ddi_umem_cookie_t cookie; 2469 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2470 2471 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n")); 2472 2473 /* Check for permissions to rebind */ 2474 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) { 2475 return (RSMERR_REBIND_NOT_ALLOWED); 2476 } 2477 2478 if (seg->s_pid != ddi_get_pid() && 2479 ddi_get_pid() != 0) { 2480 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n")); 2481 return (RSMERR_NOT_CREATOR); 2482 } 2483 2484 /* 2485 * We will not be allowing partial rebind and hence length passed 2486 * in must be same as segment length 2487 */ 2488 if (msg->vaddr == NULL) { 2489 DBG_PRINTF((category, RSM_ERR, 2490 "rsm_rebind done: null msg->vaddr\n")); 2491 return (RSMERR_BAD_ADDR); 2492 } 2493 if (msg->len != seg->s_len) { 2494 DBG_PRINTF((category, RSM_ERR, 2495 "rsm_rebind: invalid length\n")); 2496 return (RSMERR_BAD_LENGTH); 2497 } 2498 2499 /* Lock segment */ 2500 rsmseglock_acquire(seg); 2501 2502 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) || 2503 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 2504 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) { 2505 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2506 rsmseglock_release(seg); 2507 DBG_PRINTF((category, RSM_DEBUG, 2508 "rsm_rebind done: cv_wait INTERRUPTED")); 2509 return (RSMERR_INTERRUPTED); 2510 } 2511 } 2512 2513 /* verify segment state */ 2514 if ((seg->s_state != RSM_STATE_BIND) && 2515 (seg->s_state != RSM_STATE_EXPORT)) { 2516 /* Unlock segment */ 2517 rsmseglock_release(seg); 2518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2519 "rsm_rebind done: invalid state\n")); 2520 return (RSMERR_BAD_SEG_HNDL); 2521 } 2522 2523 ASSERT(seg->s_cookie != NULL); 2524 2525 if (msg->vaddr == seg->s_region.r_vaddr) { 2526 rsmseglock_release(seg); 2527 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2528 return (RSM_SUCCESS); 2529 } 2530 2531 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc); 2532 if (e == RSM_SUCCESS) { 2533 struct buf *xbuf; 2534 dev_t sdev = 0; 2535 rsm_memory_local_t mem; 2536 2537 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE, 2538 sdev, 0, NULL, DDI_UMEM_SLEEP); 2539 ASSERT(xbuf != NULL); 2540 2541 mem.ms_type = RSM_MEM_BUF; 2542 mem.ms_bp = xbuf; 2543 2544 adapter = seg->s_adapter; 2545 e = adapter->rsmpi_ops->rsm_rebind( 2546 seg->s_handle.out, 0, &mem, 2547 RSM_RESOURCE_DONTWAIT, NULL); 2548 2549 if (e == RSM_SUCCESS) { 2550 /* 2551 * unbind the older pages, and unload local importers; 2552 * but don't disconnect importers 2553 */ 2554 (void) rsm_unbind_pages(seg); 2555 seg->s_cookie = cookie; 2556 seg->s_region.r_vaddr = msg->vaddr; 2557 rsm_remap_local_importers(my_nodeid, seg->s_segid, 2558 cookie); 2559 } else { 2560 /* 2561 * Unbind the pages associated with "cookie" by the 2562 * rsm_bind_pages calls prior to this. This is 2563 * similar to what is done in the rsm_unbind_pages 2564 * routine for the seg->s_cookie. 2565 */ 2566 ddi_umem_unlock(cookie); 2567 rsm_dec_pgcnt(btopr(msg->len)); 2568 DBG_PRINTF((category, RSM_ERR, 2569 "rsm_rebind failed with %d\n", e)); 2570 } 2571 /* 2572 * At present there is no dependency on the existence of xbuf. 2573 * So we can free it here. If in the future this changes, it can 2574 * be freed sometime during the segment destroy. 2575 */ 2576 freerbuf(xbuf); 2577 } 2578 2579 /* Unlock segment */ 2580 rsmseglock_release(seg); 2581 2582 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2583 2584 return (e); 2585 } 2586 2587 static int 2588 rsm_unbind(rsmseg_t *seg) 2589 { 2590 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2591 2592 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n")); 2593 2594 rsmseglock_acquire(seg); 2595 2596 /* verify segment state */ 2597 if ((seg->s_state != RSM_STATE_BIND) && 2598 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2599 rsmseglock_release(seg); 2600 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2601 "rsm_unbind: invalid state\n")); 2602 return (RSMERR_BAD_SEG_HNDL); 2603 } 2604 2605 /* unlock current range */ 2606 (void) rsm_unbind_pages(seg); 2607 2608 if (seg->s_state == RSM_STATE_BIND) { 2609 seg->s_state = RSM_STATE_NEW; 2610 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 2611 seg->s_state = RSM_STATE_NEW_QUIESCED; 2612 } 2613 2614 rsmseglock_release(seg); 2615 2616 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n")); 2617 2618 return (RSM_SUCCESS); 2619 } 2620 2621 /* **************************** Exporter Access List Management ******* */ 2622 static void 2623 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len) 2624 { 2625 int acl_sz; 2626 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2627 2628 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n")); 2629 2630 /* acl could be NULL */ 2631 2632 if (acl != NULL && acl_len > 0) { 2633 acl_sz = acl_len * sizeof (rsmapi_access_entry_t); 2634 kmem_free((void *)acl, acl_sz); 2635 } 2636 2637 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n")); 2638 } 2639 2640 static void 2641 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len) 2642 { 2643 int acl_sz; 2644 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2645 2646 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n")); 2647 2648 if (acl != NULL && acl_len > 0) { 2649 acl_sz = acl_len * sizeof (rsm_access_entry_t); 2650 kmem_free((void *)acl, acl_sz); 2651 } 2652 2653 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n")); 2654 2655 } 2656 2657 static int 2658 rsmacl_build(rsm_ioctlmsg_t *msg, int mode, 2659 rsmapi_access_entry_t **list, int *len, int loopback) 2660 { 2661 rsmapi_access_entry_t *acl; 2662 int acl_len; 2663 int i; 2664 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2665 2666 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n")); 2667 2668 *len = 0; 2669 *list = NULL; 2670 2671 acl_len = msg->acl_len; 2672 if ((loopback && acl_len > 1) || (acl_len < 0) || 2673 (acl_len > MAX_NODES)) { 2674 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2675 "rsmacl_build done: acl invalid\n")); 2676 return (RSMERR_BAD_ACL); 2677 } 2678 2679 if (acl_len > 0 && acl_len <= MAX_NODES) { 2680 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t); 2681 2682 acl = kmem_alloc(acl_size, KM_SLEEP); 2683 2684 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl, 2685 acl_size, mode)) { 2686 kmem_free((void *) acl, acl_size); 2687 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2688 "rsmacl_build done: BAD_ADDR\n")); 2689 return (RSMERR_BAD_ADDR); 2690 } 2691 2692 /* 2693 * Verify access list 2694 */ 2695 for (i = 0; i < acl_len; i++) { 2696 if (acl[i].ae_node > MAX_NODES || 2697 (loopback && (acl[i].ae_node != my_nodeid)) || 2698 acl[i].ae_permission > RSM_ACCESS_TRUSTED) { 2699 /* invalid entry */ 2700 kmem_free((void *) acl, acl_size); 2701 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2702 "rsmacl_build done: EINVAL\n")); 2703 return (RSMERR_BAD_ACL); 2704 } 2705 } 2706 2707 *len = acl_len; 2708 *list = acl; 2709 } 2710 2711 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n")); 2712 2713 return (DDI_SUCCESS); 2714 } 2715 2716 static int 2717 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest, 2718 int acl_len, adapter_t *adapter) 2719 { 2720 rsm_access_entry_t *acl; 2721 rsm_addr_t hwaddr; 2722 int i; 2723 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2724 2725 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n")); 2726 2727 if (src != NULL) { 2728 size_t acl_size = acl_len * sizeof (rsm_access_entry_t); 2729 acl = kmem_alloc(acl_size, KM_SLEEP); 2730 2731 /* 2732 * translate access list 2733 */ 2734 for (i = 0; i < acl_len; i++) { 2735 if (src[i].ae_node == my_nodeid) { 2736 acl[i].ae_addr = adapter->hwaddr; 2737 } else { 2738 hwaddr = get_remote_hwaddr(adapter, 2739 src[i].ae_node); 2740 if ((int64_t)hwaddr < 0) { 2741 /* invalid hwaddr */ 2742 kmem_free((void *) acl, acl_size); 2743 DBG_PRINTF((category, 2744 RSM_DEBUG_VERBOSE, 2745 "rsmpiacl_create done:" 2746 "EINVAL hwaddr\n")); 2747 return (RSMERR_INTERNAL_ERROR); 2748 } 2749 acl[i].ae_addr = hwaddr; 2750 } 2751 /* rsmpi understands only RSM_PERM_XXXX */ 2752 acl[i].ae_permission = 2753 src[i].ae_permission & RSM_PERM_RDWR; 2754 } 2755 *dest = acl; 2756 } else { 2757 *dest = NULL; 2758 } 2759 2760 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n")); 2761 2762 return (RSM_SUCCESS); 2763 } 2764 2765 static int 2766 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode, 2767 rsmipc_reply_t *reply) 2768 { 2769 2770 int i; 2771 rsmseg_t *seg; 2772 rsm_memseg_id_t key = req->rsmipc_key; 2773 rsm_permission_t perm = req->rsmipc_perm; 2774 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2775 2776 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2777 "rsmsegacl_validate enter\n")); 2778 2779 /* 2780 * Find segment and grab its lock. The reason why we grab the segment 2781 * lock in side the search is to avoid the race when the segment is 2782 * being deleted and we already have a pointer to it. 2783 */ 2784 seg = rsmexport_lookup(key); 2785 if (!seg) { 2786 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2787 "rsmsegacl_validate done: %u ENXIO\n", key)); 2788 return (RSMERR_SEG_NOT_PUBLISHED); 2789 } 2790 2791 ASSERT(rsmseglock_held(seg)); 2792 ASSERT(seg->s_state == RSM_STATE_EXPORT); 2793 2794 /* 2795 * We implement a 2-level protection scheme. 2796 * First, we check if local/remote host has access rights. 2797 * Second, we check if the user has access rights. 2798 * 2799 * This routine only validates the rnode access_list 2800 */ 2801 if (seg->s_acl_len > 0) { 2802 /* 2803 * Check host access list 2804 */ 2805 ASSERT(seg->s_acl != NULL); 2806 for (i = 0; i < seg->s_acl_len; i++) { 2807 if (seg->s_acl[i].ae_node == rnode) { 2808 perm &= seg->s_acl[i].ae_permission; 2809 goto found; 2810 } 2811 } 2812 /* rnode is not found in the list */ 2813 rsmseglock_release(seg); 2814 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2815 "rsmsegacl_validate done: EPERM\n")); 2816 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 2817 } else { 2818 /* use default owner creation umask */ 2819 perm &= seg->s_mode; 2820 } 2821 2822 found: 2823 /* update perm for this node */ 2824 reply->rsmipc_mode = perm; 2825 reply->rsmipc_uid = seg->s_uid; 2826 reply->rsmipc_gid = seg->s_gid; 2827 reply->rsmipc_segid = seg->s_segid; 2828 reply->rsmipc_seglen = seg->s_len; 2829 2830 /* 2831 * Perm of requesting node is valid; source will validate user 2832 */ 2833 rsmseglock_release(seg); 2834 2835 /* 2836 * Add the importer to the list right away, if connect fails 2837 * the importer will ask the exporter to remove it. 2838 */ 2839 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr, 2840 req->rsmipc_segment_cookie); 2841 2842 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n")); 2843 2844 return (RSM_SUCCESS); 2845 } 2846 2847 2848 /* ************************** Exporter Calls ************************* */ 2849 2850 static int 2851 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2852 { 2853 int e; 2854 int acl_len; 2855 rsmapi_access_entry_t *acl; 2856 rsm_access_entry_t *rsmpi_acl; 2857 rsm_memory_local_t mem; 2858 struct buf *xbuf; 2859 dev_t sdev = 0; 2860 adapter_t *adapter; 2861 rsm_memseg_id_t segment_id = 0; 2862 int loopback_flag = 0; 2863 int create_flags = 0; 2864 rsm_resource_callback_t callback_flag; 2865 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2866 2867 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n")); 2868 2869 if (seg->s_adapter == &loopback_adapter) 2870 loopback_flag = 1; 2871 2872 if (seg->s_pid != ddi_get_pid() && 2873 ddi_get_pid() != 0) { 2874 DBG_PRINTF((category, RSM_ERR, 2875 "rsm_publish: Not creator\n")); 2876 return (RSMERR_NOT_CREATOR); 2877 } 2878 2879 /* 2880 * Get per node access list 2881 */ 2882 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag); 2883 if (e != DDI_SUCCESS) { 2884 DBG_PRINTF((category, RSM_ERR, 2885 "rsm_publish done: rsmacl_build failed\n")); 2886 return (e); 2887 } 2888 2889 /* 2890 * The application provided msg->key is used for resolving a 2891 * segment id according to the following: 2892 * key = 0 Kernel Agent selects the segment id 2893 * key <= RSM_DLPI_ID_END Reserved for system usage except 2894 * RSMLIB range 2895 * key < RSM_USER_APP_ID_BASE segment id = key 2896 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections 2897 * 2898 * rsm_nextavail_segmentid is initialized to 0x80000000 and 2899 * overflows to zero after 0x80000000 allocations. 2900 * An algorithm is needed which allows reinitialization and provides 2901 * for reallocation after overflow. For now, ENOMEM is returned 2902 * once the overflow condition has occurred. 2903 */ 2904 if (msg->key == 0) { 2905 mutex_enter(&rsm_lock); 2906 segment_id = rsm_nextavail_segmentid; 2907 if (segment_id != 0) { 2908 rsm_nextavail_segmentid++; 2909 mutex_exit(&rsm_lock); 2910 } else { 2911 mutex_exit(&rsm_lock); 2912 DBG_PRINTF((category, RSM_ERR, 2913 "rsm_publish done: no more keys avlbl\n")); 2914 return (RSMERR_INSUFFICIENT_RESOURCES); 2915 } 2916 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END) 2917 /* range reserved for internal use by base/ndi libraries */ 2918 segment_id = msg->key; 2919 else if (msg->key <= RSM_DLPI_ID_END) 2920 return (RSMERR_RESERVED_SEGID); 2921 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1) 2922 segment_id = msg->key; 2923 else { 2924 DBG_PRINTF((category, RSM_ERR, 2925 "rsm_publish done: invalid key %u\n", msg->key)); 2926 return (RSMERR_RESERVED_SEGID); 2927 } 2928 2929 /* Add key to exportlist; The segment lock is held on success */ 2930 e = rsmexport_add(seg, segment_id); 2931 if (e) { 2932 rsmacl_free(acl, acl_len); 2933 DBG_PRINTF((category, RSM_ERR, 2934 "rsm_publish done: export_add failed: %d\n", e)); 2935 return (e); 2936 } 2937 2938 seg->s_segid = segment_id; 2939 2940 if ((seg->s_state != RSM_STATE_BIND) && 2941 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2942 /* state changed since then, free acl and return */ 2943 rsmseglock_release(seg); 2944 rsmexport_rm(seg); 2945 rsmacl_free(acl, acl_len); 2946 DBG_PRINTF((category, RSM_ERR, 2947 "rsm_publish done: segment in wrong state: %d\n", 2948 seg->s_state)); 2949 return (RSMERR_BAD_SEG_HNDL); 2950 } 2951 2952 /* 2953 * If this is for a local memory handle and permissions are zero, 2954 * then the surrogate segment is very large and we want to skip 2955 * allocation of DVMA space. 2956 * 2957 * Careful! If the user didn't use an ACL list, acl will be a NULL 2958 * pointer. Check that before dereferencing it. 2959 */ 2960 if (acl != (rsmapi_access_entry_t *)NULL) { 2961 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 2962 goto skipdriver; 2963 } 2964 2965 /* create segment */ 2966 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE, 2967 sdev, 0, NULL, DDI_UMEM_SLEEP); 2968 ASSERT(xbuf != NULL); 2969 2970 mem.ms_type = RSM_MEM_BUF; 2971 mem.ms_bp = xbuf; 2972 2973 /* This call includes a bind operations */ 2974 2975 adapter = seg->s_adapter; 2976 /* 2977 * create a acl list with hwaddr for RSMPI publish 2978 */ 2979 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter); 2980 2981 if (e != RSM_SUCCESS) { 2982 rsmseglock_release(seg); 2983 rsmexport_rm(seg); 2984 rsmacl_free(acl, acl_len); 2985 freerbuf(xbuf); 2986 DBG_PRINTF((category, RSM_ERR, 2987 "rsm_publish done: rsmpiacl_create failed: %d\n", e)); 2988 return (e); 2989 } 2990 2991 if (seg->s_state == RSM_STATE_BIND) { 2992 /* create segment */ 2993 2994 /* This call includes a bind operations */ 2995 2996 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 2997 create_flags = RSM_ALLOW_UNBIND_REBIND; 2998 } 2999 3000 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 3001 callback_flag = RSM_RESOURCE_DONTWAIT; 3002 } else { 3003 callback_flag = RSM_RESOURCE_SLEEP; 3004 } 3005 3006 e = adapter->rsmpi_ops->rsm_seg_create( 3007 adapter->rsmpi_handle, 3008 &seg->s_handle.out, seg->s_len, 3009 create_flags, &mem, 3010 callback_flag, NULL); 3011 /* 3012 * At present there is no dependency on the existence of xbuf. 3013 * So we can free it here. If in the future this changes, it can 3014 * be freed sometime during the segment destroy. 3015 */ 3016 freerbuf(xbuf); 3017 3018 if (e != RSM_SUCCESS) { 3019 rsmseglock_release(seg); 3020 rsmexport_rm(seg); 3021 rsmacl_free(acl, acl_len); 3022 rsmpiacl_free(rsmpi_acl, acl_len); 3023 DBG_PRINTF((category, RSM_ERR, 3024 "rsm_publish done: export_create failed: %d\n", e)); 3025 /* 3026 * The following assertion ensures that the two errors 3027 * related to the length and its alignment do not occur 3028 * since they have been checked during export_create 3029 */ 3030 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT && 3031 e != RSMERR_BAD_LENGTH); 3032 if (e == RSMERR_NOT_MEM) 3033 e = RSMERR_INSUFFICIENT_MEM; 3034 3035 return (e); 3036 } 3037 /* export segment, this should create an IMMU mapping */ 3038 e = adapter->rsmpi_ops->rsm_publish( 3039 seg->s_handle.out, 3040 rsmpi_acl, acl_len, 3041 seg->s_segid, 3042 RSM_RESOURCE_DONTWAIT, NULL); 3043 3044 if (e != RSM_SUCCESS) { 3045 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3046 rsmseglock_release(seg); 3047 rsmexport_rm(seg); 3048 rsmacl_free(acl, acl_len); 3049 rsmpiacl_free(rsmpi_acl, acl_len); 3050 DBG_PRINTF((category, RSM_ERR, 3051 "rsm_publish done: export_publish failed: %d\n", 3052 e)); 3053 return (e); 3054 } 3055 } 3056 3057 seg->s_acl_in = rsmpi_acl; 3058 3059 skipdriver: 3060 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */ 3061 seg->s_acl_len = acl_len; 3062 seg->s_acl = acl; 3063 3064 if (seg->s_state == RSM_STATE_BIND) { 3065 seg->s_state = RSM_STATE_EXPORT; 3066 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 3067 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 3068 cv_broadcast(&seg->s_cv); 3069 } 3070 3071 rsmseglock_release(seg); 3072 3073 /* 3074 * If the segment id was solicited, then return it in 3075 * the original incoming message. 3076 */ 3077 if (msg->key == 0) { 3078 msg->key = segment_id; 3079 #ifdef _MULTI_DATAMODEL 3080 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 3081 rsm_ioctlmsg32_t msg32; 3082 3083 msg32.key = msg->key; 3084 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3085 "rsm_publish done\n")); 3086 return (ddi_copyout((caddr_t)&msg32, 3087 (caddr_t)dataptr, sizeof (msg32), mode)); 3088 } 3089 #endif 3090 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3091 "rsm_publish done\n")); 3092 return (ddi_copyout((caddr_t)msg, 3093 (caddr_t)dataptr, sizeof (*msg), mode)); 3094 } 3095 3096 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n")); 3097 return (DDI_SUCCESS); 3098 } 3099 3100 /* 3101 * This function modifies the access control list of an already published 3102 * segment. There is no effect on import segments which are already 3103 * connected. 3104 */ 3105 static int 3106 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode) 3107 { 3108 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl; 3109 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl; 3110 int new_acl_len, old_acl_len, tmp_acl_len; 3111 int e, i; 3112 adapter_t *adapter; 3113 int loopback_flag = 0; 3114 rsm_memseg_id_t key; 3115 rsm_permission_t permission; 3116 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3117 3118 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n")); 3119 3120 if ((seg->s_state != RSM_STATE_EXPORT) && 3121 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) && 3122 (seg->s_state != RSM_STATE_EXPORT_QUIESCING)) 3123 return (RSMERR_SEG_NOT_PUBLISHED); 3124 3125 if (seg->s_pid != ddi_get_pid() && 3126 ddi_get_pid() != 0) { 3127 DBG_PRINTF((category, RSM_ERR, 3128 "rsm_republish: Not owner\n")); 3129 return (RSMERR_NOT_CREATOR); 3130 } 3131 3132 if (seg->s_adapter == &loopback_adapter) 3133 loopback_flag = 1; 3134 3135 /* 3136 * Build new list first 3137 */ 3138 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag); 3139 if (e) { 3140 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3141 "rsm_republish done: rsmacl_build failed %d", e)); 3142 return (e); 3143 } 3144 3145 /* Lock segment */ 3146 rsmseglock_acquire(seg); 3147 /* 3148 * a republish is in progress - REPUBLISH message is being 3149 * sent to the importers so wait for it to complete OR 3150 * wait till DR completes 3151 */ 3152 while (((seg->s_state == RSM_STATE_EXPORT) && 3153 (seg->s_flags & RSM_REPUBLISH_WAIT)) || 3154 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) || 3155 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) { 3156 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3157 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3158 "rsm_republish done: cv_wait INTERRUPTED")); 3159 rsmseglock_release(seg); 3160 rsmacl_free(new_acl, new_acl_len); 3161 return (RSMERR_INTERRUPTED); 3162 } 3163 } 3164 3165 /* recheck if state is valid */ 3166 if (seg->s_state != RSM_STATE_EXPORT) { 3167 rsmseglock_release(seg); 3168 rsmacl_free(new_acl, new_acl_len); 3169 return (RSMERR_SEG_NOT_PUBLISHED); 3170 } 3171 3172 key = seg->s_key; 3173 old_acl = seg->s_acl; 3174 old_acl_len = seg->s_acl_len; 3175 3176 seg->s_acl = new_acl; 3177 seg->s_acl_len = new_acl_len; 3178 3179 /* 3180 * This call will only be meaningful if and when the interconnect 3181 * layer makes use of the access list 3182 */ 3183 adapter = seg->s_adapter; 3184 /* 3185 * create a acl list with hwaddr for RSMPI publish 3186 */ 3187 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter); 3188 3189 if (e != RSM_SUCCESS) { 3190 seg->s_acl = old_acl; 3191 seg->s_acl_len = old_acl_len; 3192 rsmseglock_release(seg); 3193 rsmacl_free(new_acl, new_acl_len); 3194 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3195 "rsm_republish done: rsmpiacl_create failed %d", e)); 3196 return (e); 3197 } 3198 rsmpi_old_acl = seg->s_acl_in; 3199 seg->s_acl_in = rsmpi_new_acl; 3200 3201 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out, 3202 seg->s_acl_in, seg->s_acl_len, 3203 RSM_RESOURCE_DONTWAIT, NULL); 3204 3205 if (e != RSM_SUCCESS) { 3206 seg->s_acl = old_acl; 3207 seg->s_acl_in = rsmpi_old_acl; 3208 seg->s_acl_len = old_acl_len; 3209 rsmseglock_release(seg); 3210 rsmacl_free(new_acl, new_acl_len); 3211 rsmpiacl_free(rsmpi_new_acl, new_acl_len); 3212 3213 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3214 "rsm_republish done: rsmpi republish failed %d\n", e)); 3215 return (e); 3216 } 3217 3218 /* create a tmp copy of the new acl */ 3219 tmp_acl_len = new_acl_len; 3220 if (tmp_acl_len > 0) { 3221 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP); 3222 for (i = 0; i < tmp_acl_len; i++) { 3223 tmp_acl[i].ae_node = new_acl[i].ae_node; 3224 tmp_acl[i].ae_permission = new_acl[i].ae_permission; 3225 } 3226 /* 3227 * The default permission of a node which was in the old 3228 * ACL but not in the new ACL is 0 ie no access. 3229 */ 3230 permission = 0; 3231 } else { 3232 /* 3233 * NULL acl means all importers can connect and 3234 * default permission will be owner creation umask 3235 */ 3236 tmp_acl = NULL; 3237 permission = seg->s_mode; 3238 } 3239 3240 /* make other republishers to wait for republish to complete */ 3241 seg->s_flags |= RSM_REPUBLISH_WAIT; 3242 3243 rsmseglock_release(seg); 3244 3245 /* send the new perms to the importing nodes */ 3246 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission); 3247 3248 rsmseglock_acquire(seg); 3249 seg->s_flags &= ~RSM_REPUBLISH_WAIT; 3250 /* wake up any one waiting for republish to complete */ 3251 cv_broadcast(&seg->s_cv); 3252 rsmseglock_release(seg); 3253 3254 rsmacl_free(tmp_acl, tmp_acl_len); 3255 rsmacl_free(old_acl, old_acl_len); 3256 rsmpiacl_free(rsmpi_old_acl, old_acl_len); 3257 3258 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n")); 3259 return (DDI_SUCCESS); 3260 } 3261 3262 static int 3263 rsm_unpublish(rsmseg_t *seg, int mode) 3264 { 3265 rsmapi_access_entry_t *acl; 3266 rsm_access_entry_t *rsmpi_acl; 3267 int acl_len; 3268 int e; 3269 clock_t ticks; 3270 adapter_t *adapter; 3271 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3272 3273 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n")); 3274 3275 if (seg->s_pid != ddi_get_pid() && 3276 ddi_get_pid() != 0) { 3277 DBG_PRINTF((category, RSM_ERR, 3278 "rsm_unpublish: Not creator\n")); 3279 return (RSMERR_NOT_CREATOR); 3280 } 3281 3282 rsmseglock_acquire(seg); 3283 /* 3284 * wait for QUIESCING to complete here before rsmexport_rm 3285 * is called because the SUSPEND_COMPLETE mesg which changes 3286 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and 3287 * signals the cv_wait needs to find it in the hashtable. 3288 */ 3289 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 3290 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) { 3291 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3292 rsmseglock_release(seg); 3293 DBG_PRINTF((category, RSM_ERR, 3294 "rsm_unpublish done: cv_wait INTR qscing" 3295 "getv/putv in progress")); 3296 return (RSMERR_INTERRUPTED); 3297 } 3298 } 3299 3300 /* verify segment state */ 3301 if ((seg->s_state != RSM_STATE_EXPORT) && 3302 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3303 rsmseglock_release(seg); 3304 DBG_PRINTF((category, RSM_ERR, 3305 "rsm_unpublish done: bad state %x\n", seg->s_state)); 3306 return (RSMERR_SEG_NOT_PUBLISHED); 3307 } 3308 3309 rsmseglock_release(seg); 3310 3311 rsmexport_rm(seg); 3312 3313 rsm_send_importer_disconnects(seg->s_segid, my_nodeid); 3314 3315 rsmseglock_acquire(seg); 3316 /* 3317 * wait for republish to complete 3318 */ 3319 while ((seg->s_state == RSM_STATE_EXPORT) && 3320 (seg->s_flags & RSM_REPUBLISH_WAIT)) { 3321 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3322 DBG_PRINTF((category, RSM_ERR, 3323 "rsm_unpublish done: cv_wait INTR repubing")); 3324 rsmseglock_release(seg); 3325 return (RSMERR_INTERRUPTED); 3326 } 3327 } 3328 3329 if ((seg->s_state != RSM_STATE_EXPORT) && 3330 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3331 DBG_PRINTF((category, RSM_ERR, 3332 "rsm_unpublish done: invalid state")); 3333 rsmseglock_release(seg); 3334 return (RSMERR_SEG_NOT_PUBLISHED); 3335 } 3336 3337 /* 3338 * check for putv/get surrogate segment which was not published 3339 * to the driver. 3340 * 3341 * Be certain to see if there is an ACL first! If this segment was 3342 * not published with an ACL, acl will be a null pointer. Check 3343 * that before dereferencing it. 3344 */ 3345 acl = seg->s_acl; 3346 if (acl != (rsmapi_access_entry_t *)NULL) { 3347 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 3348 goto bypass; 3349 } 3350 3351 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */ 3352 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) 3353 goto bypass; 3354 3355 adapter = seg->s_adapter; 3356 for (;;) { 3357 if (seg->s_state != RSM_STATE_EXPORT) { 3358 rsmseglock_release(seg); 3359 DBG_PRINTF((category, RSM_ERR, 3360 "rsm_unpublish done: bad state %x\n", 3361 seg->s_state)); 3362 return (RSMERR_SEG_NOT_PUBLISHED); 3363 } 3364 3365 /* unpublish from adapter */ 3366 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out); 3367 3368 if (e == RSM_SUCCESS) { 3369 break; 3370 } 3371 3372 if (e == RSMERR_SEG_IN_USE && mode == 1) { 3373 /* 3374 * wait for unpublish to succeed, it's busy. 3375 */ 3376 seg->s_flags |= RSM_EXPORT_WAIT; 3377 3378 /* wait for a max of 1 ms - this is an empirical */ 3379 /* value that was found by some minimal testing */ 3380 /* can be fine tuned when we have better numbers */ 3381 /* A long term fix would be to send cv_signal */ 3382 /* from the intr callback routine */ 3383 (void) drv_getparm(LBOLT, &ticks); 3384 ticks += drv_usectohz(1000); 3385 /* currently nobody signals this wait */ 3386 (void) cv_timedwait(&seg->s_cv, &seg->s_lock, ticks); 3387 3388 DBG_PRINTF((category, RSM_ERR, 3389 "rsm_unpublish: SEG_IN_USE\n")); 3390 3391 seg->s_flags &= ~RSM_EXPORT_WAIT; 3392 } else { 3393 if (mode == 1) { 3394 DBG_PRINTF((category, RSM_ERR, 3395 "rsm:rsmpi unpublish err %x\n", e)); 3396 seg->s_state = RSM_STATE_BIND; 3397 } 3398 rsmseglock_release(seg); 3399 return (e); 3400 } 3401 } 3402 3403 /* Free segment */ 3404 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3405 3406 if (e != RSM_SUCCESS) { 3407 DBG_PRINTF((category, RSM_ERR, 3408 "rsm_unpublish: rsmpi destroy key=%x failed %x\n", 3409 seg->s_key, e)); 3410 } 3411 3412 bypass: 3413 acl = seg->s_acl; 3414 rsmpi_acl = seg->s_acl_in; 3415 acl_len = seg->s_acl_len; 3416 3417 seg->s_acl = NULL; 3418 seg->s_acl_in = NULL; 3419 seg->s_acl_len = 0; 3420 3421 if (seg->s_state == RSM_STATE_EXPORT) { 3422 seg->s_state = RSM_STATE_BIND; 3423 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) { 3424 seg->s_state = RSM_STATE_BIND_QUIESCED; 3425 cv_broadcast(&seg->s_cv); 3426 } 3427 3428 rsmseglock_release(seg); 3429 3430 rsmacl_free(acl, acl_len); 3431 rsmpiacl_free(rsmpi_acl, acl_len); 3432 3433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n")); 3434 3435 return (DDI_SUCCESS); 3436 } 3437 3438 /* 3439 * Called from rsm_unpublish to force an unload and disconnection of all 3440 * importers of the unpublished segment. 3441 * 3442 * First build the list of segments requiring a force disconnect, then 3443 * send a request for each. 3444 */ 3445 static void 3446 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid, 3447 rsm_node_id_t ex_nodeid) 3448 { 3449 rsmipc_request_t request; 3450 importing_token_t *prev_token, *token, *tmp_token, *tokp; 3451 importing_token_t *force_disconnect_list = NULL; 3452 int index; 3453 3454 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3455 "rsm_send_importer_disconnects enter\n")); 3456 3457 index = rsmhash(ex_segid); 3458 3459 mutex_enter(&importer_list.lock); 3460 3461 prev_token = NULL; 3462 token = importer_list.bucket[index]; 3463 3464 while (token != NULL) { 3465 if (token->key == ex_segid) { 3466 /* 3467 * take it off the importer list and add it 3468 * to the force disconnect list. 3469 */ 3470 if (prev_token == NULL) 3471 importer_list.bucket[index] = token->next; 3472 else 3473 prev_token->next = token->next; 3474 tmp_token = token; 3475 token = token->next; 3476 if (force_disconnect_list == NULL) { 3477 force_disconnect_list = tmp_token; 3478 tmp_token->next = NULL; 3479 } else { 3480 tokp = force_disconnect_list; 3481 /* 3482 * make sure that the tmp_token's node 3483 * is not already on the force disconnect 3484 * list. 3485 */ 3486 while (tokp != NULL) { 3487 if (tokp->importing_node == 3488 tmp_token->importing_node) { 3489 break; 3490 } 3491 tokp = tokp->next; 3492 } 3493 if (tokp == NULL) { 3494 tmp_token->next = 3495 force_disconnect_list; 3496 force_disconnect_list = tmp_token; 3497 } else { 3498 kmem_free((void *)tmp_token, 3499 sizeof (*token)); 3500 } 3501 } 3502 3503 } else { 3504 prev_token = token; 3505 token = token->next; 3506 } 3507 } 3508 mutex_exit(&importer_list.lock); 3509 3510 token = force_disconnect_list; 3511 while (token != NULL) { 3512 if (token->importing_node == my_nodeid) { 3513 rsm_force_unload(ex_nodeid, ex_segid, 3514 DISCONNECT); 3515 } else { 3516 request.rsmipc_hdr.rsmipc_type = 3517 RSMIPC_MSG_DISCONNECT; 3518 request.rsmipc_key = token->key; 3519 (void) rsmipc_send(token->importing_node, 3520 &request, 3521 RSM_NO_REPLY); 3522 } 3523 tmp_token = token; 3524 token = token->next; 3525 kmem_free((void *)tmp_token, sizeof (*token)); 3526 } 3527 3528 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3529 "rsm_send_importer_disconnects done\n")); 3530 } 3531 3532 /* 3533 * This function is used as a callback for unlocking the pages locked 3534 * down by a process which then does a fork or an exec. 3535 * It marks the export segments corresponding to umem cookie given by 3536 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be 3537 * destroyed later when an rsm_close occurs). 3538 */ 3539 static void 3540 rsm_export_force_destroy(ddi_umem_cookie_t *ck) 3541 { 3542 rsmresource_blk_t *blk; 3543 rsmresource_t *p; 3544 rsmseg_t *eseg = NULL; 3545 int i, j; 3546 int found = 0; 3547 3548 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3549 "rsm_export_force_destroy enter\n")); 3550 3551 /* 3552 * Walk the resource list and locate the export segment (either 3553 * in the BIND or the EXPORT state) which corresponds to the 3554 * ddi_umem_cookie_t being freed up, and call rsmseg_close. 3555 * Change the state to ZOMBIE by calling rsmseg_close with the 3556 * force_flag argument (the second argument) set to 1. Also, 3557 * unpublish and unbind the segment, but don't free it. Free it 3558 * only on a rsm_close call for the segment. 3559 */ 3560 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 3561 3562 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 3563 blk = rsm_resource.rsmrc_root[i]; 3564 if (blk == NULL) { 3565 continue; 3566 } 3567 3568 for (j = 0; j < RSMRC_BLKSZ; j++) { 3569 p = blk->rsmrcblk_blks[j]; 3570 if ((p != NULL) && (p != RSMRC_RESERVED) && 3571 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) { 3572 eseg = (rsmseg_t *)p; 3573 if (eseg->s_cookie != ck) 3574 continue; /* continue searching */ 3575 /* 3576 * Found the segment, set flag to indicate 3577 * force destroy processing is in progress 3578 */ 3579 rsmseglock_acquire(eseg); 3580 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT; 3581 rsmseglock_release(eseg); 3582 found = 1; 3583 break; 3584 } 3585 } 3586 3587 if (found) 3588 break; 3589 } 3590 3591 rw_exit(&rsm_resource.rsmrc_lock); 3592 3593 if (found) { 3594 ASSERT(eseg != NULL); 3595 /* call rsmseg_close with force flag set to 1 */ 3596 rsmseg_close(eseg, 1); 3597 /* 3598 * force destroy processing done, clear flag and signal any 3599 * thread waiting in rsmseg_close. 3600 */ 3601 rsmseglock_acquire(eseg); 3602 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT; 3603 cv_broadcast(&eseg->s_cv); 3604 rsmseglock_release(eseg); 3605 } 3606 3607 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3608 "rsm_export_force_destroy done\n")); 3609 } 3610 3611 /* ******************************* Remote Calls *********************** */ 3612 static void 3613 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req) 3614 { 3615 rsmipc_reply_t reply; 3616 DBG_DEFINE(category, 3617 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3618 3619 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3620 "rsm_intr_segconnect enter\n")); 3621 3622 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply); 3623 3624 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 3625 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie; 3626 3627 (void) rsmipc_send(src, NULL, &reply); 3628 3629 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3630 "rsm_intr_segconnect done\n")); 3631 } 3632 3633 3634 /* 3635 * When an exported segment is unpublished the exporter sends an ipc 3636 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher 3637 * calls this function. The import list is scanned; segments which match the 3638 * exported segment id are unloaded and disconnected. 3639 * 3640 * Will also be called from rsm_rebind with disconnect_flag FALSE. 3641 * 3642 */ 3643 static void 3644 rsm_force_unload(rsm_node_id_t src_nodeid, 3645 rsm_memseg_id_t ex_segid, 3646 boolean_t disconnect_flag) 3647 3648 { 3649 rsmresource_t *p = NULL; 3650 rsmhash_table_t *rhash = &rsm_import_segs; 3651 uint_t index; 3652 DBG_DEFINE(category, 3653 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3654 3655 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n")); 3656 3657 index = rsmhash(ex_segid); 3658 3659 rw_enter(&rhash->rsmhash_rw, RW_READER); 3660 3661 p = rsmhash_getbkt(rhash, index); 3662 3663 for (; p; p = p->rsmrc_next) { 3664 rsmseg_t *seg = (rsmseg_t *)p; 3665 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) { 3666 /* 3667 * In order to make rsmseg_unload and rsm_force_unload 3668 * thread safe, acquire the segment lock here. 3669 * rsmseg_unload is responsible for releasing the lock. 3670 * rsmseg_unload releases the lock just before a call 3671 * to rsmipc_send or in case of an early exit which 3672 * occurs if the segment was in the state 3673 * RSM_STATE_CONNECTING or RSM_STATE_NEW. 3674 */ 3675 rsmseglock_acquire(seg); 3676 if (disconnect_flag) 3677 seg->s_flags |= RSM_FORCE_DISCONNECT; 3678 rsmseg_unload(seg); 3679 } 3680 } 3681 rw_exit(&rhash->rsmhash_rw); 3682 3683 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n")); 3684 } 3685 3686 static void 3687 rsm_intr_reply(rsmipc_msghdr_t *msg) 3688 { 3689 /* 3690 * Find slot for cookie in reply. 3691 * Match sequence with sequence in cookie 3692 * If no match; return 3693 * Try to grap lock of slot, if locked return 3694 * copy data into reply slot area 3695 * signal waiter 3696 */ 3697 rsmipc_slot_t *slot; 3698 rsmipc_cookie_t *cookie; 3699 void *data = (void *) msg; 3700 size_t size = sizeof (rsmipc_reply_t); 3701 DBG_DEFINE(category, 3702 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3703 3704 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n")); 3705 3706 cookie = &msg->rsmipc_cookie; 3707 if (cookie->ic.index >= RSMIPC_SZ) { 3708 DBG_PRINTF((category, RSM_ERR, 3709 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index)); 3710 return; 3711 } 3712 3713 ASSERT(cookie->ic.index < RSMIPC_SZ); 3714 slot = &rsm_ipc.slots[cookie->ic.index]; 3715 mutex_enter(&slot->rsmipc_lock); 3716 if (slot->rsmipc_cookie.value == cookie->value) { 3717 /* found a match */ 3718 if (RSMIPC_GET(slot, RSMIPC_PENDING)) { 3719 bcopy(data, slot->rsmipc_data, size); 3720 RSMIPC_CLEAR(slot, RSMIPC_PENDING); 3721 cv_signal(&slot->rsmipc_cv); 3722 } 3723 } else { 3724 DBG_PRINTF((category, RSM_DEBUG, 3725 "rsm: rsm_intr_reply mismatched reply %d\n", 3726 cookie->ic.index)); 3727 } 3728 mutex_exit(&slot->rsmipc_lock); 3729 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n")); 3730 } 3731 3732 /* 3733 * This function gets dispatched on the worker thread when we receive 3734 * the SQREADY message. This function sends the SQREADY_ACK message. 3735 */ 3736 static void 3737 rsm_sqready_ack_deferred(void *arg) 3738 { 3739 path_t *path = (path_t *)arg; 3740 DBG_DEFINE(category, 3741 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3742 3743 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3744 "rsm_sqready_ack_deferred enter\n")); 3745 3746 mutex_enter(&path->mutex); 3747 3748 /* 3749 * If path is not active no point in sending the ACK 3750 * because the whole SQREADY protocol will again start 3751 * when the path becomes active. 3752 */ 3753 if (path->state != RSMKA_PATH_ACTIVE) { 3754 /* 3755 * decrement the path refcnt incremented in rsm_proc_sqready 3756 */ 3757 PATH_RELE_NOLOCK(path); 3758 mutex_exit(&path->mutex); 3759 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3760 "rsm_sqready_ack_deferred done:!ACTIVE\n")); 3761 return; 3762 } 3763 3764 /* send an SQREADY_ACK message */ 3765 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK); 3766 3767 /* initialize credits to the max level */ 3768 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3769 3770 /* wake up any send that is waiting for credits */ 3771 cv_broadcast(&path->sendq_token.sendq_cv); 3772 3773 /* 3774 * decrement the path refcnt since we incremented it in 3775 * rsm_proc_sqready 3776 */ 3777 PATH_RELE_NOLOCK(path); 3778 3779 mutex_exit(&path->mutex); 3780 3781 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3782 "rsm_sqready_ack_deferred done\n")); 3783 } 3784 3785 /* 3786 * Process the SQREADY message 3787 */ 3788 static void 3789 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3790 rsm_intr_hand_arg_t arg) 3791 { 3792 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3793 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3794 path_t *path; 3795 DBG_DEFINE(category, 3796 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3797 3798 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n")); 3799 3800 /* look up the path - incr the path refcnt */ 3801 path = rsm_find_path(hdlr_argp->adapter_name, 3802 hdlr_argp->adapter_instance, src_hwaddr); 3803 3804 /* 3805 * No path exists or path is not active - drop the message 3806 */ 3807 if (path == NULL) { 3808 DBG_PRINTF((category, RSM_DEBUG, 3809 "rsm_proc_sqready done: msg dropped no path\n")); 3810 return; 3811 } 3812 3813 mutex_exit(&path->mutex); 3814 3815 /* drain any tasks from the previous incarnation */ 3816 taskq_wait(path->recv_taskq); 3817 3818 mutex_enter(&path->mutex); 3819 /* 3820 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK 3821 * in the meanwhile we received an SQREADY message, blindly reset 3822 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK 3823 * and forget about the SQREADY that we sent. 3824 */ 3825 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3826 3827 if (path->state != RSMKA_PATH_ACTIVE) { 3828 /* decr refcnt and drop the mutex */ 3829 PATH_RELE_NOLOCK(path); 3830 mutex_exit(&path->mutex); 3831 DBG_PRINTF((category, RSM_DEBUG, 3832 "rsm_proc_sqready done: msg dropped path !ACTIVE\n")); 3833 return; 3834 } 3835 3836 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx " 3837 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3838 3839 /* 3840 * The sender's local incarnation number is our remote incarnation 3841 * number save it in the path data structure 3842 */ 3843 path->remote_incn = msg->rsmipc_local_incn; 3844 path->sendq_token.msgbuf_avail = 0; 3845 path->procmsg_cnt = 0; 3846 3847 /* 3848 * path is active - dispatch task to send SQREADY_ACK - remember 3849 * RSMPI calls can't be done in interrupt context 3850 * 3851 * We can use the recv_taskq to send because the remote endpoint 3852 * cannot start sending messages till it receives SQREADY_ACK hence 3853 * at this point there are no tasks on recv_taskq. 3854 * 3855 * The path refcnt will be decremented in rsm_sqready_ack_deferred. 3856 */ 3857 (void) taskq_dispatch(path->recv_taskq, 3858 rsm_sqready_ack_deferred, path, KM_NOSLEEP); 3859 3860 mutex_exit(&path->mutex); 3861 3862 3863 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n")); 3864 } 3865 3866 /* 3867 * Process the SQREADY_ACK message 3868 */ 3869 static void 3870 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3871 rsm_intr_hand_arg_t arg) 3872 { 3873 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3874 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3875 path_t *path; 3876 DBG_DEFINE(category, 3877 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3878 3879 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3880 "rsm_proc_sqready_ack enter\n")); 3881 3882 /* look up the path - incr the path refcnt */ 3883 path = rsm_find_path(hdlr_argp->adapter_name, 3884 hdlr_argp->adapter_instance, src_hwaddr); 3885 3886 /* 3887 * drop the message if - no path exists or path is not active 3888 * or if its not waiting for SQREADY_ACK message 3889 */ 3890 if (path == NULL) { 3891 DBG_PRINTF((category, RSM_DEBUG, 3892 "rsm_proc_sqready_ack done: msg dropped no path\n")); 3893 return; 3894 } 3895 3896 if ((path->state != RSMKA_PATH_ACTIVE) || 3897 !(path->flags & RSMKA_WAIT_FOR_SQACK)) { 3898 /* decrement the refcnt */ 3899 PATH_RELE_NOLOCK(path); 3900 mutex_exit(&path->mutex); 3901 DBG_PRINTF((category, RSM_DEBUG, 3902 "rsm_proc_sqready_ack done: msg dropped\n")); 3903 return; 3904 } 3905 3906 /* 3907 * Check if this message is in response to the last RSMIPC_MSG_SQREADY 3908 * sent, if not drop it. 3909 */ 3910 if (path->local_incn != msghdr->rsmipc_incn) { 3911 /* decrement the refcnt */ 3912 PATH_RELE_NOLOCK(path); 3913 mutex_exit(&path->mutex); 3914 DBG_PRINTF((category, RSM_DEBUG, 3915 "rsm_proc_sqready_ack done: msg old incn %lld\n", 3916 msghdr->rsmipc_incn)); 3917 return; 3918 } 3919 3920 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx " 3921 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3922 3923 /* 3924 * clear the WAIT_FOR_SQACK flag since we have recvd the ack 3925 */ 3926 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3927 3928 /* save the remote sendq incn number */ 3929 path->remote_incn = msg->rsmipc_local_incn; 3930 3931 /* initialize credits to the max level */ 3932 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3933 3934 /* wake up any send that is waiting for credits */ 3935 cv_broadcast(&path->sendq_token.sendq_cv); 3936 3937 /* decrement the refcnt */ 3938 PATH_RELE_NOLOCK(path); 3939 3940 mutex_exit(&path->mutex); 3941 3942 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3943 "rsm_proc_sqready_ack done\n")); 3944 } 3945 3946 /* 3947 * process the RSMIPC_MSG_CREDIT message 3948 */ 3949 static void 3950 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3951 rsm_intr_hand_arg_t arg) 3952 { 3953 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3954 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3955 path_t *path; 3956 DBG_DEFINE(category, 3957 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK | RSM_FLOWCONTROL); 3958 3959 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n")); 3960 3961 /* look up the path - incr the path refcnt */ 3962 path = rsm_find_path(hdlr_argp->adapter_name, 3963 hdlr_argp->adapter_instance, src_hwaddr); 3964 3965 if (path == NULL) { 3966 DBG_PRINTF((category, RSM_DEBUG, 3967 "rsm_add_credits enter: path not found\n")); 3968 return; 3969 } 3970 3971 /* the path is not active - discard credits */ 3972 if (path->state != RSMKA_PATH_ACTIVE) { 3973 PATH_RELE_NOLOCK(path); 3974 mutex_exit(&path->mutex); 3975 DBG_PRINTF((category, RSM_DEBUG, 3976 "rsm_add_credits enter:path=%lx !ACTIVE\n", path)); 3977 return; 3978 } 3979 3980 /* 3981 * Check if these credits are for current incarnation of the path. 3982 */ 3983 if (path->local_incn != msghdr->rsmipc_incn) { 3984 /* decrement the refcnt */ 3985 PATH_RELE_NOLOCK(path); 3986 mutex_exit(&path->mutex); 3987 DBG_PRINTF((category, RSM_DEBUG, 3988 "rsm_add_credits enter: old incn %lld\n", 3989 msghdr->rsmipc_incn)); 3990 return; 3991 } 3992 3993 DBG_PRINTF((category, RSM_DEBUG, 3994 "rsm_add_credits:path=%lx new-creds=%d " 3995 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits, 3996 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src, 3997 src_hwaddr)); 3998 3999 4000 /* add credits to the path's sendq */ 4001 path->sendq_token.msgbuf_avail += msg->rsmipc_credits; 4002 4003 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES); 4004 4005 /* wake up any send that is waiting for credits */ 4006 cv_broadcast(&path->sendq_token.sendq_cv); 4007 4008 /* decrement the refcnt */ 4009 PATH_RELE_NOLOCK(path); 4010 4011 mutex_exit(&path->mutex); 4012 4013 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n")); 4014 } 4015 4016 static void 4017 rsm_intr_event(rsmipc_request_t *msg) 4018 { 4019 rsmseg_t *seg; 4020 rsmresource_t *p; 4021 rsm_node_id_t src_node; 4022 DBG_DEFINE(category, 4023 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4024 4025 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n")); 4026 4027 src_node = msg->rsmipc_hdr.rsmipc_src; 4028 4029 if ((seg = msg->rsmipc_segment_cookie) != NULL) { 4030 /* This is for an import segment */ 4031 uint_t hashval = rsmhash(msg->rsmipc_key); 4032 4033 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4034 4035 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4036 4037 for (; p; p = p->rsmrc_next) { 4038 if ((p->rsmrc_key == msg->rsmipc_key) && 4039 (p->rsmrc_node == src_node)) { 4040 seg = (rsmseg_t *)p; 4041 rsmseglock_acquire(seg); 4042 4043 atomic_add_32(&seg->s_pollevent, 1); 4044 4045 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4046 pollwakeup(&seg->s_poll, POLLRDNORM); 4047 4048 rsmseglock_release(seg); 4049 } 4050 } 4051 4052 rw_exit(&rsm_import_segs.rsmhash_rw); 4053 } else { 4054 /* This is for an export segment */ 4055 seg = rsmexport_lookup(msg->rsmipc_key); 4056 if (!seg) { 4057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4058 "rsm_intr_event done: exp seg not found\n")); 4059 return; 4060 } 4061 4062 ASSERT(rsmseglock_held(seg)); 4063 4064 atomic_add_32(&seg->s_pollevent, 1); 4065 4066 /* 4067 * We must hold the segment lock here, or else the segment 4068 * can be freed while pollwakeup is using it. This implies 4069 * that we MUST NOT grab the segment lock during rsm_chpoll, 4070 * as outlined in the chpoll(2) man page. 4071 */ 4072 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4073 pollwakeup(&seg->s_poll, POLLRDNORM); 4074 4075 rsmseglock_release(seg); 4076 } 4077 4078 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n")); 4079 } 4080 4081 /* 4082 * The exporter did a republish and changed the ACL - this change is only 4083 * visible to new importers. 4084 */ 4085 static void 4086 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key, 4087 rsm_permission_t perm) 4088 { 4089 4090 rsmresource_t *p; 4091 rsmseg_t *seg; 4092 uint_t hashval = rsmhash(key); 4093 DBG_DEFINE(category, 4094 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4095 4096 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n")); 4097 4098 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4099 4100 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4101 4102 for (; p; p = p->rsmrc_next) { 4103 /* 4104 * find the importer and update the permission in the shared 4105 * data structure. Any new importers will use the new perms 4106 */ 4107 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) { 4108 seg = (rsmseg_t *)p; 4109 4110 rsmseglock_acquire(seg); 4111 rsmsharelock_acquire(seg); 4112 seg->s_share->rsmsi_mode = perm; 4113 rsmsharelock_release(seg); 4114 rsmseglock_release(seg); 4115 4116 break; 4117 } 4118 } 4119 4120 rw_exit(&rsm_import_segs.rsmhash_rw); 4121 4122 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n")); 4123 } 4124 4125 void 4126 rsm_suspend_complete(rsm_node_id_t src_node, int flag) 4127 { 4128 int done = 1; /* indicate all SUSPENDS have been acked */ 4129 list_element_t *elem; 4130 DBG_DEFINE(category, 4131 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4132 4133 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4134 "rsm_suspend_complete enter\n")); 4135 4136 mutex_enter(&rsm_suspend_list.list_lock); 4137 4138 if (rsm_suspend_list.list_head == NULL) { 4139 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4140 "rsm_suspend_complete done: suspend_list is empty\n")); 4141 mutex_exit(&rsm_suspend_list.list_lock); 4142 return; 4143 } 4144 4145 elem = rsm_suspend_list.list_head; 4146 while (elem != NULL) { 4147 if (elem->nodeid == src_node) { 4148 /* clear the pending flag for the node */ 4149 elem->flags &= ~RSM_SUSPEND_ACKPENDING; 4150 elem->flags |= flag; 4151 } 4152 4153 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING)) 4154 done = 0; /* still some nodes have not yet ACKED */ 4155 4156 elem = elem->next; 4157 } 4158 4159 mutex_exit(&rsm_suspend_list.list_lock); 4160 4161 if (!done) { 4162 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4163 "rsm_suspend_complete done: acks pending\n")); 4164 return; 4165 } 4166 /* 4167 * Now that we are done with suspending all the remote importers 4168 * time to quiesce the local exporters 4169 */ 4170 exporter_quiesce(); 4171 4172 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4173 "rsm_suspend_complete done\n")); 4174 } 4175 4176 static void 4177 exporter_quiesce() 4178 { 4179 int i, e; 4180 rsmresource_t *current; 4181 rsmseg_t *seg; 4182 adapter_t *adapter; 4183 DBG_DEFINE(category, 4184 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4185 4186 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n")); 4187 /* 4188 * The importers send a SUSPEND_COMPLETE to the exporter node 4189 * Unpublish, unbind the export segment and 4190 * move the segments to the EXPORT_QUIESCED state 4191 */ 4192 4193 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER); 4194 4195 for (i = 0; i < rsm_hash_size; i++) { 4196 current = rsm_export_segs.bucket[i]; 4197 while (current != NULL) { 4198 seg = (rsmseg_t *)current; 4199 rsmseglock_acquire(seg); 4200 if (current->rsmrc_state == 4201 RSM_STATE_EXPORT_QUIESCING) { 4202 adapter = seg->s_adapter; 4203 /* 4204 * some local memory handles are not published 4205 * check if it was published 4206 */ 4207 if ((seg->s_acl == NULL) || 4208 (seg->s_acl[0].ae_node != my_nodeid) || 4209 (seg->s_acl[0].ae_permission != 0)) { 4210 4211 e = adapter->rsmpi_ops->rsm_unpublish( 4212 seg->s_handle.out); 4213 DBG_PRINTF((category, RSM_DEBUG, 4214 "exporter_quiesce:unpub %d\n", e)); 4215 4216 e = adapter->rsmpi_ops->rsm_seg_destroy( 4217 seg->s_handle.out); 4218 4219 DBG_PRINTF((category, RSM_DEBUG, 4220 "exporter_quiesce:destroy %d\n", 4221 e)); 4222 } 4223 4224 (void) rsm_unbind_pages(seg); 4225 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 4226 cv_broadcast(&seg->s_cv); 4227 } 4228 rsmseglock_release(seg); 4229 current = current->rsmrc_next; 4230 } 4231 } 4232 rw_exit(&rsm_export_segs.rsmhash_rw); 4233 4234 /* 4235 * All the local segments we are done with the pre-del processing 4236 * - time to move to PREDEL_COMPLETED. 4237 */ 4238 4239 mutex_enter(&rsm_drv_data.drv_lock); 4240 4241 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED); 4242 4243 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED; 4244 4245 cv_broadcast(&rsm_drv_data.drv_cv); 4246 4247 mutex_exit(&rsm_drv_data.drv_lock); 4248 4249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n")); 4250 } 4251 4252 static void 4253 importer_suspend(rsm_node_id_t src_node) 4254 { 4255 int i; 4256 int susp_flg; /* true means already suspended */ 4257 int num_importers; 4258 rsmresource_t *p = NULL, *curp; 4259 rsmhash_table_t *rhash = &rsm_import_segs; 4260 rsmseg_t *seg; 4261 rsmipc_request_t request; 4262 DBG_DEFINE(category, 4263 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4264 4265 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n")); 4266 4267 rw_enter(&rhash->rsmhash_rw, RW_READER); 4268 for (i = 0; i < rsm_hash_size; i++) { 4269 p = rhash->bucket[i]; 4270 4271 /* 4272 * Suspend all importers with same <node, key> pair. 4273 * After the last one of the shared importers has been 4274 * suspended - suspend the shared mappings/connection. 4275 */ 4276 for (; p; p = p->rsmrc_next) { 4277 rsmseg_t *first = (rsmseg_t *)p; 4278 if ((first->s_node != src_node) || 4279 (first->s_state == RSM_STATE_DISCONNECT)) 4280 continue; /* go to next entry */ 4281 /* 4282 * search the rest of the bucket for 4283 * other siblings (imprtrs with the same key) 4284 * of "first" and suspend them. 4285 * All importers with same key fall in 4286 * the same bucket. 4287 */ 4288 num_importers = 0; 4289 for (curp = p; curp; curp = curp->rsmrc_next) { 4290 seg = (rsmseg_t *)curp; 4291 4292 rsmseglock_acquire(seg); 4293 4294 if ((seg->s_node != first->s_node) || 4295 (seg->s_key != first->s_key) || 4296 (seg->s_state == RSM_STATE_DISCONNECT)) { 4297 /* 4298 * either not a peer segment or its a 4299 * disconnected segment - skip it 4300 */ 4301 rsmseglock_release(seg); 4302 continue; 4303 } 4304 4305 rsmseg_suspend(seg, &susp_flg); 4306 4307 if (susp_flg) { /* seg already suspended */ 4308 rsmseglock_release(seg); 4309 break; /* the inner for loop */ 4310 } 4311 4312 num_importers++; 4313 rsmsharelock_acquire(seg); 4314 /* 4315 * we've processed all importers that are 4316 * siblings of "first" 4317 */ 4318 if (num_importers == 4319 seg->s_share->rsmsi_refcnt) { 4320 rsmsharelock_release(seg); 4321 rsmseglock_release(seg); 4322 break; 4323 } 4324 rsmsharelock_release(seg); 4325 rsmseglock_release(seg); 4326 } 4327 4328 /* 4329 * All the importers with the same key and 4330 * nodeid as "first" have been suspended. 4331 * Now suspend the shared connect/mapping. 4332 * This is done only once. 4333 */ 4334 if (!susp_flg) { 4335 rsmsegshare_suspend(seg); 4336 } 4337 } 4338 } 4339 4340 rw_exit(&rhash->rsmhash_rw); 4341 4342 /* send an ACK for SUSPEND message */ 4343 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE; 4344 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY); 4345 4346 4347 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n")); 4348 4349 } 4350 4351 static void 4352 rsmseg_suspend(rsmseg_t *seg, int *susp_flg) 4353 { 4354 int recheck_state; 4355 rsmcookie_t *hdl; 4356 DBG_DEFINE(category, 4357 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4358 4359 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4360 "rsmseg_suspend enter: key=%u\n", seg->s_key)); 4361 4362 *susp_flg = 0; 4363 4364 ASSERT(rsmseglock_held(seg)); 4365 /* wait if putv/getv is in progress */ 4366 while (seg->s_rdmacnt > 0) 4367 cv_wait(&seg->s_cv, &seg->s_lock); 4368 4369 do { 4370 recheck_state = 0; 4371 4372 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4373 "rsmseg_suspend:segment %x state=%d\n", 4374 seg->s_key, seg->s_state)); 4375 4376 switch (seg->s_state) { 4377 case RSM_STATE_NEW: 4378 /* not a valid state */ 4379 break; 4380 case RSM_STATE_CONNECTING: 4381 seg->s_state = RSM_STATE_ABORT_CONNECT; 4382 break; 4383 case RSM_STATE_ABORT_CONNECT: 4384 break; 4385 case RSM_STATE_CONNECT: 4386 seg->s_handle.in = NULL; 4387 seg->s_state = RSM_STATE_CONN_QUIESCE; 4388 break; 4389 case RSM_STATE_MAPPING: 4390 /* wait until segment leaves the mapping state */ 4391 while (seg->s_state == RSM_STATE_MAPPING) 4392 cv_wait(&seg->s_cv, &seg->s_lock); 4393 recheck_state = 1; 4394 break; 4395 case RSM_STATE_ACTIVE: 4396 /* unload the mappings */ 4397 if (seg->s_ckl != NULL) { 4398 hdl = seg->s_ckl; 4399 for (; hdl != NULL; hdl = hdl->c_next) { 4400 (void) devmap_unload(hdl->c_dhp, 4401 hdl->c_off, hdl->c_len); 4402 } 4403 } 4404 seg->s_mapinfo = NULL; 4405 seg->s_state = RSM_STATE_MAP_QUIESCE; 4406 break; 4407 case RSM_STATE_CONN_QUIESCE: 4408 /* FALLTHRU */ 4409 case RSM_STATE_MAP_QUIESCE: 4410 /* rsmseg_suspend already done for seg */ 4411 *susp_flg = 1; 4412 break; 4413 case RSM_STATE_DISCONNECT: 4414 break; 4415 default: 4416 ASSERT(0); /* invalid state */ 4417 } 4418 } while (recheck_state); 4419 4420 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n")); 4421 } 4422 4423 static void 4424 rsmsegshare_suspend(rsmseg_t *seg) 4425 { 4426 int e; 4427 adapter_t *adapter; 4428 rsm_import_share_t *sharedp; 4429 DBG_DEFINE(category, 4430 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4431 4432 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4433 "rsmsegshare_suspend enter\n")); 4434 4435 rsmseglock_acquire(seg); 4436 rsmsharelock_acquire(seg); 4437 4438 sharedp = seg->s_share; 4439 adapter = seg->s_adapter; 4440 switch (sharedp->rsmsi_state) { 4441 case RSMSI_STATE_NEW: 4442 break; 4443 case RSMSI_STATE_CONNECTING: 4444 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 4445 break; 4446 case RSMSI_STATE_ABORT_CONNECT: 4447 break; 4448 case RSMSI_STATE_CONNECTED: 4449 /* do the rsmpi disconnect */ 4450 if (sharedp->rsmsi_node != my_nodeid) { 4451 e = adapter->rsmpi_ops-> 4452 rsm_disconnect(sharedp->rsmsi_handle); 4453 4454 DBG_PRINTF((category, RSM_DEBUG, 4455 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4456 sharedp->rsmsi_segid, e)); 4457 } 4458 4459 sharedp->rsmsi_handle = NULL; 4460 4461 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 4462 break; 4463 case RSMSI_STATE_CONN_QUIESCE: 4464 break; 4465 case RSMSI_STATE_MAPPED: 4466 /* do the rsmpi unmap and disconnect */ 4467 if (sharedp->rsmsi_node != my_nodeid) { 4468 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in); 4469 4470 DBG_PRINTF((category, RSM_DEBUG, 4471 "rsmshare_suspend: rsmpi unmap %d\n", e)); 4472 4473 e = adapter->rsmpi_ops-> 4474 rsm_disconnect(sharedp->rsmsi_handle); 4475 DBG_PRINTF((category, RSM_DEBUG, 4476 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4477 sharedp->rsmsi_segid, e)); 4478 } 4479 4480 sharedp->rsmsi_handle = NULL; 4481 4482 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE; 4483 break; 4484 case RSMSI_STATE_MAP_QUIESCE: 4485 break; 4486 case RSMSI_STATE_DISCONNECTED: 4487 break; 4488 default: 4489 ASSERT(0); /* invalid state */ 4490 } 4491 4492 rsmsharelock_release(seg); 4493 rsmseglock_release(seg); 4494 4495 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4496 "rsmsegshare_suspend done\n")); 4497 } 4498 4499 /* 4500 * This should get called on receiving a RESUME message or from 4501 * the pathmanger if the node undergoing DR dies. 4502 */ 4503 static void 4504 importer_resume(rsm_node_id_t src_node) 4505 { 4506 int i; 4507 rsmresource_t *p = NULL; 4508 rsmhash_table_t *rhash = &rsm_import_segs; 4509 void *cookie; 4510 DBG_DEFINE(category, 4511 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4512 4513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n")); 4514 4515 rw_enter(&rhash->rsmhash_rw, RW_READER); 4516 4517 for (i = 0; i < rsm_hash_size; i++) { 4518 p = rhash->bucket[i]; 4519 4520 for (; p; p = p->rsmrc_next) { 4521 rsmseg_t *seg = (rsmseg_t *)p; 4522 4523 rsmseglock_acquire(seg); 4524 4525 /* process only importers of node undergoing DR */ 4526 if (seg->s_node != src_node) { 4527 rsmseglock_release(seg); 4528 continue; 4529 } 4530 4531 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) { 4532 rsmipc_request_t request; 4533 /* 4534 * rsmpi map/connect failed 4535 * inform the exporter so that it can 4536 * remove the importer. 4537 */ 4538 request.rsmipc_hdr.rsmipc_type = 4539 RSMIPC_MSG_NOTIMPORTING; 4540 request.rsmipc_key = seg->s_segid; 4541 request.rsmipc_segment_cookie = cookie; 4542 rsmseglock_release(seg); 4543 (void) rsmipc_send(seg->s_node, &request, 4544 RSM_NO_REPLY); 4545 } else { 4546 rsmseglock_release(seg); 4547 } 4548 } 4549 } 4550 4551 rw_exit(&rhash->rsmhash_rw); 4552 4553 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n")); 4554 } 4555 4556 static int 4557 rsmseg_resume(rsmseg_t *seg, void **cookie) 4558 { 4559 int e; 4560 int retc; 4561 off_t dev_offset; 4562 size_t maplen; 4563 uint_t maxprot; 4564 rsm_mapinfo_t *p; 4565 rsmcookie_t *hdl; 4566 rsm_import_share_t *sharedp; 4567 DBG_DEFINE(category, 4568 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4569 4570 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4571 "rsmseg_resume enter: key=%u\n", seg->s_key)); 4572 4573 *cookie = NULL; 4574 4575 ASSERT(rsmseglock_held(seg)); 4576 4577 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) && 4578 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 4579 return (RSM_SUCCESS); 4580 } 4581 4582 sharedp = seg->s_share; 4583 4584 rsmsharelock_acquire(seg); 4585 4586 /* resume the shared connection and/or mapping */ 4587 retc = rsmsegshare_resume(seg); 4588 4589 if (seg->s_state == RSM_STATE_CONN_QUIESCE) { 4590 /* shared state can either be connected or mapped */ 4591 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) || 4592 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) { 4593 ASSERT(retc == RSM_SUCCESS); 4594 seg->s_handle.in = sharedp->rsmsi_handle; 4595 rsmsharelock_release(seg); 4596 seg->s_state = RSM_STATE_CONNECT; 4597 4598 } else { /* error in rsmpi connect during resume */ 4599 seg->s_handle.in = NULL; 4600 seg->s_state = RSM_STATE_DISCONNECT; 4601 4602 sharedp->rsmsi_refcnt--; 4603 cookie = (void *)sharedp->rsmsi_cookie; 4604 4605 if (sharedp->rsmsi_refcnt == 0) { 4606 ASSERT(sharedp->rsmsi_mapcnt == 0); 4607 rsmsharelock_release(seg); 4608 4609 /* clean up the shared data structure */ 4610 mutex_destroy(&sharedp->rsmsi_lock); 4611 cv_destroy(&sharedp->rsmsi_cv); 4612 kmem_free((void *)(sharedp), 4613 sizeof (rsm_import_share_t)); 4614 4615 } else { 4616 rsmsharelock_release(seg); 4617 } 4618 /* 4619 * The following needs to be done after any 4620 * rsmsharelock calls which use seg->s_share. 4621 */ 4622 seg->s_share = NULL; 4623 } 4624 4625 /* signal any waiting segment */ 4626 cv_broadcast(&seg->s_cv); 4627 4628 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4629 "rsmseg_resume done:state=%d\n", seg->s_state)); 4630 return (retc); 4631 } 4632 4633 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE); 4634 4635 /* Setup protections for remap */ 4636 maxprot = PROT_USER; 4637 if (seg->s_mode & RSM_PERM_READ) { 4638 maxprot |= PROT_READ; 4639 } 4640 if (seg->s_mode & RSM_PERM_WRITE) { 4641 maxprot |= PROT_WRITE; 4642 } 4643 4644 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) { 4645 /* error in rsmpi connect or map during resume */ 4646 4647 /* remap to trash page */ 4648 ASSERT(seg->s_ckl != NULL); 4649 4650 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4651 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 4652 remap_cookie, hdl->c_off, hdl->c_len, 4653 maxprot, 0, NULL); 4654 4655 DBG_PRINTF((category, RSM_ERR, 4656 "rsmseg_resume:remap=%d\n", e)); 4657 } 4658 4659 seg->s_handle.in = NULL; 4660 seg->s_state = RSM_STATE_DISCONNECT; 4661 4662 sharedp->rsmsi_refcnt--; 4663 4664 sharedp->rsmsi_mapcnt--; 4665 seg->s_mapinfo = NULL; 4666 4667 if (sharedp->rsmsi_refcnt == 0) { 4668 ASSERT(sharedp->rsmsi_mapcnt == 0); 4669 rsmsharelock_release(seg); 4670 4671 /* clean up the shared data structure */ 4672 mutex_destroy(&sharedp->rsmsi_lock); 4673 cv_destroy(&sharedp->rsmsi_cv); 4674 kmem_free((void *)(sharedp), 4675 sizeof (rsm_import_share_t)); 4676 4677 } else { 4678 rsmsharelock_release(seg); 4679 } 4680 /* 4681 * The following needs to be done after any 4682 * rsmsharelock calls which use seg->s_share. 4683 */ 4684 seg->s_share = NULL; 4685 4686 /* signal any waiting segment */ 4687 cv_broadcast(&seg->s_cv); 4688 4689 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4690 "rsmseg_resume done:seg=%x,err=%d\n", 4691 seg->s_key, retc)); 4692 return (retc); 4693 4694 } 4695 4696 seg->s_handle.in = sharedp->rsmsi_handle; 4697 4698 if (seg->s_node == my_nodeid) { /* loopback */ 4699 ASSERT(seg->s_mapinfo == NULL); 4700 4701 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4702 e = devmap_umem_remap(hdl->c_dhp, 4703 rsm_dip, seg->s_cookie, 4704 hdl->c_off, hdl->c_len, 4705 maxprot, 0, NULL); 4706 4707 DBG_PRINTF((category, RSM_ERR, 4708 "rsmseg_resume:remap=%d\n", e)); 4709 } 4710 } else { /* remote exporter */ 4711 /* remap to the new rsmpi maps */ 4712 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 4713 4714 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4715 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len, 4716 &dev_offset, &maplen); 4717 e = devmap_devmem_remap(hdl->c_dhp, 4718 p->dip, p->dev_register, dev_offset, 4719 maplen, maxprot, 0, NULL); 4720 4721 DBG_PRINTF((category, RSM_ERR, 4722 "rsmseg_resume:remap=%d\n", e)); 4723 } 4724 } 4725 4726 rsmsharelock_release(seg); 4727 4728 seg->s_state = RSM_STATE_ACTIVE; 4729 cv_broadcast(&seg->s_cv); 4730 4731 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n")); 4732 4733 return (retc); 4734 } 4735 4736 static int 4737 rsmsegshare_resume(rsmseg_t *seg) 4738 { 4739 int e = RSM_SUCCESS; 4740 adapter_t *adapter; 4741 rsm_import_share_t *sharedp; 4742 DBG_DEFINE(category, 4743 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4744 4745 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n")); 4746 4747 ASSERT(rsmseglock_held(seg)); 4748 ASSERT(rsmsharelock_held(seg)); 4749 4750 sharedp = seg->s_share; 4751 4752 /* 4753 * If we are not in a xxxx_QUIESCE state that means shared 4754 * connect/mapping processing has been already been done 4755 * so return success. 4756 */ 4757 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) && 4758 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) { 4759 return (RSM_SUCCESS); 4760 } 4761 4762 adapter = seg->s_adapter; 4763 4764 if (sharedp->rsmsi_node != my_nodeid) { 4765 rsm_addr_t hwaddr; 4766 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node); 4767 4768 e = adapter->rsmpi_ops->rsm_connect( 4769 adapter->rsmpi_handle, hwaddr, 4770 sharedp->rsmsi_segid, &sharedp->rsmsi_handle); 4771 4772 DBG_PRINTF((category, RSM_DEBUG, 4773 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n", 4774 sharedp->rsmsi_segid, e)); 4775 4776 if (e != RSM_SUCCESS) { 4777 /* when do we send the NOT_IMPORTING message */ 4778 sharedp->rsmsi_handle = NULL; 4779 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4780 /* signal any waiting segment */ 4781 cv_broadcast(&sharedp->rsmsi_cv); 4782 return (e); 4783 } 4784 } 4785 4786 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) { 4787 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 4788 /* signal any waiting segment */ 4789 cv_broadcast(&sharedp->rsmsi_cv); 4790 return (e); 4791 } 4792 4793 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 4794 4795 /* do the rsmpi map of the whole segment here */ 4796 if (sharedp->rsmsi_node != my_nodeid) { 4797 size_t mapped_len; 4798 rsm_mapinfo_t *p; 4799 4800 /* 4801 * We need to do rsmpi maps with <off, lens> identical to 4802 * the old mapinfo list because the segment mapping handles 4803 * dhp and such need the fragmentation of rsmpi maps to be 4804 * identical to what it was during the mmap of the segment 4805 */ 4806 p = sharedp->rsmsi_mapinfo; 4807 4808 while (p != NULL) { 4809 mapped_len = 0; 4810 4811 e = adapter->rsmpi_ops->rsm_map( 4812 sharedp->rsmsi_handle, p->start_offset, 4813 p->individual_len, &mapped_len, 4814 &p->dip, &p->dev_register, &p->dev_offset, 4815 NULL, NULL); 4816 4817 if (e != 0) { 4818 DBG_PRINTF((category, RSM_ERR, 4819 "rsmsegshare_resume: rsmpi map err=%d\n", 4820 e)); 4821 break; 4822 } 4823 4824 if (mapped_len != p->individual_len) { 4825 DBG_PRINTF((category, RSM_ERR, 4826 "rsmsegshare_resume: rsmpi maplen" 4827 "< reqlen=%lx\n", mapped_len)); 4828 e = RSMERR_BAD_LENGTH; 4829 break; 4830 } 4831 4832 p = p->next; 4833 4834 } 4835 4836 4837 if (e != RSM_SUCCESS) { /* rsmpi map failed */ 4838 int err; 4839 /* Check if this is the first rsm_map */ 4840 if (p != sharedp->rsmsi_mapinfo) { 4841 /* 4842 * A single rsm_unmap undoes multiple rsm_maps. 4843 */ 4844 (void) seg->s_adapter->rsmpi_ops-> 4845 rsm_unmap(sharedp->rsmsi_handle); 4846 } 4847 4848 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 4849 sharedp->rsmsi_mapinfo = NULL; 4850 4851 err = adapter->rsmpi_ops-> 4852 rsm_disconnect(sharedp->rsmsi_handle); 4853 4854 DBG_PRINTF((category, RSM_DEBUG, 4855 "rsmsegshare_resume:disconn seg=%x:err=%d\n", 4856 sharedp->rsmsi_segid, err)); 4857 4858 sharedp->rsmsi_handle = NULL; 4859 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4860 4861 /* signal the waiting segments */ 4862 cv_broadcast(&sharedp->rsmsi_cv); 4863 DBG_PRINTF((category, RSM_DEBUG, 4864 "rsmsegshare_resume done: rsmpi map err\n")); 4865 return (e); 4866 } 4867 } 4868 4869 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 4870 4871 /* signal any waiting segment */ 4872 cv_broadcast(&sharedp->rsmsi_cv); 4873 4874 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n")); 4875 4876 return (e); 4877 } 4878 4879 /* 4880 * this is the routine that gets called by recv_taskq which is the 4881 * thread that processes messages that are flow-controlled. 4882 */ 4883 static void 4884 rsm_intr_proc_deferred(void *arg) 4885 { 4886 path_t *path = (path_t *)arg; 4887 rsmipc_request_t *msg; 4888 rsmipc_msghdr_t *msghdr; 4889 rsm_node_id_t src_node; 4890 msgbuf_elem_t *head; 4891 int e; 4892 DBG_DEFINE(category, 4893 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4894 4895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4896 "rsm_intr_proc_deferred enter\n")); 4897 4898 mutex_enter(&path->mutex); 4899 4900 /* use the head of the msgbuf_queue */ 4901 head = rsmka_gethead_msgbuf(path); 4902 4903 mutex_exit(&path->mutex); 4904 4905 msg = (rsmipc_request_t *)&(head->msg); 4906 msghdr = (rsmipc_msghdr_t *)msg; 4907 4908 src_node = msghdr->rsmipc_src; 4909 4910 /* 4911 * messages that need to send a reply should check the message version 4912 * before processing the message. And all messages that need to 4913 * send a reply should be processed here by the worker thread. 4914 */ 4915 switch (msghdr->rsmipc_type) { 4916 case RSMIPC_MSG_SEGCONNECT: 4917 if (msghdr->rsmipc_version != RSM_VERSION) { 4918 rsmipc_reply_t reply; 4919 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION; 4920 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 4921 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie; 4922 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply); 4923 } else { 4924 rsm_intr_segconnect(src_node, msg); 4925 } 4926 break; 4927 case RSMIPC_MSG_DISCONNECT: 4928 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT); 4929 break; 4930 case RSMIPC_MSG_SUSPEND: 4931 importer_suspend(src_node); 4932 break; 4933 case RSMIPC_MSG_SUSPEND_DONE: 4934 rsm_suspend_complete(src_node, 0); 4935 break; 4936 case RSMIPC_MSG_RESUME: 4937 importer_resume(src_node); 4938 break; 4939 default: 4940 ASSERT(0); 4941 } 4942 4943 mutex_enter(&path->mutex); 4944 4945 rsmka_dequeue_msgbuf(path); 4946 4947 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */ 4948 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES) 4949 path->procmsg_cnt++; 4950 4951 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES); 4952 4953 /* No need to send credits if path is going down */ 4954 if ((path->state == RSMKA_PATH_ACTIVE) && 4955 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) { 4956 /* 4957 * send credits and reset procmsg_cnt if success otherwise 4958 * credits will be sent after processing the next message 4959 */ 4960 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT); 4961 if (e == 0) 4962 path->procmsg_cnt = 0; 4963 else 4964 DBG_PRINTF((category, RSM_ERR, 4965 "rsm_intr_proc_deferred:send credits err=%d\n", e)); 4966 } 4967 4968 /* 4969 * decrement the path refcnt since we incremented it in 4970 * rsm_intr_callback_dispatch 4971 */ 4972 PATH_RELE_NOLOCK(path); 4973 4974 mutex_exit(&path->mutex); 4975 4976 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4977 "rsm_intr_proc_deferred done\n")); 4978 } 4979 4980 /* 4981 * Flow-controlled messages are enqueued and dispatched onto a taskq here 4982 */ 4983 static void 4984 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr, 4985 rsm_intr_hand_arg_t arg) 4986 { 4987 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 4988 path_t *path; 4989 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 4990 DBG_DEFINE(category, 4991 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4992 4993 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4994 "rsm_intr_callback_dispatch enter\n")); 4995 ASSERT(data && hdlr_argp); 4996 4997 /* look up the path - incr the path refcnt */ 4998 path = rsm_find_path(hdlr_argp->adapter_name, 4999 hdlr_argp->adapter_instance, src_hwaddr); 5000 5001 /* the path has been removed - drop this message */ 5002 if (path == NULL) { 5003 DBG_PRINTF((category, RSM_DEBUG, 5004 "rsm_intr_callback_dispatch done: msg dropped\n")); 5005 return; 5006 } 5007 /* the path is not active - don't accept new messages */ 5008 if (path->state != RSMKA_PATH_ACTIVE) { 5009 PATH_RELE_NOLOCK(path); 5010 mutex_exit(&path->mutex); 5011 DBG_PRINTF((category, RSM_DEBUG, 5012 "rsm_intr_callback_dispatch done: msg dropped" 5013 " path=%lx !ACTIVE\n", path)); 5014 return; 5015 } 5016 5017 /* 5018 * Check if this message was sent to an older incarnation 5019 * of the path/sendq. 5020 */ 5021 if (path->local_incn != msghdr->rsmipc_incn) { 5022 /* decrement the refcnt */ 5023 PATH_RELE_NOLOCK(path); 5024 mutex_exit(&path->mutex); 5025 DBG_PRINTF((category, RSM_DEBUG, 5026 "rsm_intr_callback_dispatch done: old incn %lld\n", 5027 msghdr->rsmipc_incn)); 5028 return; 5029 } 5030 5031 /* copy and enqueue msg on the path's msgbuf queue */ 5032 rsmka_enqueue_msgbuf(path, data); 5033 5034 /* 5035 * schedule task to process messages - ignore retval from 5036 * task_dispatch because we sender cannot send more than 5037 * what receiver can handle. 5038 */ 5039 (void) taskq_dispatch(path->recv_taskq, 5040 rsm_intr_proc_deferred, path, KM_NOSLEEP); 5041 5042 mutex_exit(&path->mutex); 5043 5044 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5045 "rsm_intr_callback_dispatch done\n")); 5046 } 5047 5048 /* 5049 * This procedure is called from rsm_srv_func when a remote node creates a 5050 * a send queue. This event is used as a hint that an earlier failed 5051 * attempt to create a send queue to that remote node may now succeed and 5052 * should be retried. Indication of an earlier failed attempt is provided 5053 * by the RSMKA_SQCREATE_PENDING flag. 5054 */ 5055 static void 5056 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5057 { 5058 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 5059 path_t *path; 5060 DBG_DEFINE(category, 5061 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5062 5063 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5064 "rsm_sqcreateop_callback enter\n")); 5065 5066 /* look up the path - incr the path refcnt */ 5067 path = rsm_find_path(hdlr_argp->adapter_name, 5068 hdlr_argp->adapter_instance, src_hwaddr); 5069 5070 if (path == NULL) { 5071 DBG_PRINTF((category, RSM_DEBUG, 5072 "rsm_sqcreateop_callback done: no path\n")); 5073 return; 5074 } 5075 5076 if ((path->state == RSMKA_PATH_UP) && 5077 (path->flags & RSMKA_SQCREATE_PENDING)) { 5078 /* 5079 * previous attempt to create sendq had failed, retry 5080 * it and move to RSMKA_PATH_ACTIVE state if successful. 5081 * the refcnt will be decremented in the do_deferred_work 5082 */ 5083 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP); 5084 } else { 5085 /* decrement the refcnt */ 5086 PATH_RELE_NOLOCK(path); 5087 } 5088 mutex_exit(&path->mutex); 5089 5090 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5091 "rsm_sqcreateop_callback done\n")); 5092 } 5093 5094 static void 5095 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5096 { 5097 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 5098 rsmipc_request_t *msg = (rsmipc_request_t *)data; 5099 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data; 5100 rsm_node_id_t src_node; 5101 DBG_DEFINE(category, 5102 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5103 5104 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:" 5105 "src=%d, type=%d\n", msghdr->rsmipc_src, 5106 msghdr->rsmipc_type)); 5107 5108 /* 5109 * Check for the version number in the msg header. If it is not 5110 * RSM_VERSION, drop the message. In the future, we need to manage 5111 * incompatible version numbers in some way 5112 */ 5113 if (msghdr->rsmipc_version != RSM_VERSION) { 5114 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n")); 5115 /* 5116 * Drop requests that don't have a reply right here 5117 * Request with reply will send a BAD_VERSION reply 5118 * when they get processed by the worker thread. 5119 */ 5120 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) { 5121 return; 5122 } 5123 5124 } 5125 5126 src_node = msghdr->rsmipc_src; 5127 5128 switch (msghdr->rsmipc_type) { 5129 case RSMIPC_MSG_SEGCONNECT: 5130 case RSMIPC_MSG_DISCONNECT: 5131 case RSMIPC_MSG_SUSPEND: 5132 case RSMIPC_MSG_SUSPEND_DONE: 5133 case RSMIPC_MSG_RESUME: 5134 /* 5135 * These message types are handled by a worker thread using 5136 * the flow-control algorithm. 5137 * Any message processing that does one or more of the 5138 * following should be handled in a worker thread. 5139 * - allocates resources and might sleep 5140 * - makes RSMPI calls down to the interconnect driver 5141 * this by defn include requests with reply. 5142 * - takes a long duration of time 5143 */ 5144 rsm_intr_callback_dispatch(data, src_hwaddr, arg); 5145 break; 5146 case RSMIPC_MSG_NOTIMPORTING: 5147 importer_list_rm(src_node, msg->rsmipc_key, 5148 msg->rsmipc_segment_cookie); 5149 break; 5150 case RSMIPC_MSG_SQREADY: 5151 rsm_proc_sqready(data, src_hwaddr, arg); 5152 break; 5153 case RSMIPC_MSG_SQREADY_ACK: 5154 rsm_proc_sqready_ack(data, src_hwaddr, arg); 5155 break; 5156 case RSMIPC_MSG_CREDIT: 5157 rsm_add_credits(ctrlmsg, src_hwaddr, arg); 5158 break; 5159 case RSMIPC_MSG_REPLY: 5160 rsm_intr_reply(msghdr); 5161 break; 5162 case RSMIPC_MSG_BELL: 5163 rsm_intr_event(msg); 5164 break; 5165 case RSMIPC_MSG_IMPORTING: 5166 importer_list_add(src_node, msg->rsmipc_key, 5167 msg->rsmipc_adapter_hwaddr, 5168 msg->rsmipc_segment_cookie); 5169 break; 5170 case RSMIPC_MSG_REPUBLISH: 5171 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm); 5172 break; 5173 default: 5174 DBG_PRINTF((category, RSM_DEBUG, 5175 "rsm_intr_callback: bad msg %lx type %d data %lx\n", 5176 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data)); 5177 } 5178 5179 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n")); 5180 5181 } 5182 5183 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 5184 rsm_intr_q_op_t opcode, rsm_addr_t src, 5185 void *data, size_t size, rsm_intr_hand_arg_t arg) 5186 { 5187 DBG_DEFINE(category, 5188 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5189 5190 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n")); 5191 5192 switch (opcode) { 5193 case RSM_INTR_Q_OP_CREATE: 5194 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n")); 5195 rsm_sqcreateop_callback(src, arg); 5196 break; 5197 case RSM_INTR_Q_OP_DESTROY: 5198 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n")); 5199 break; 5200 case RSM_INTR_Q_OP_RECEIVE: 5201 rsm_intr_callback(data, src, arg); 5202 break; 5203 default: 5204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5205 "rsm_srv_func: unknown opcode = %x\n", opcode)); 5206 } 5207 5208 chd = chd; 5209 size = size; 5210 5211 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n")); 5212 5213 return (RSM_INTR_HAND_CLAIMED); 5214 } 5215 5216 /* *************************** IPC slots ************************* */ 5217 static rsmipc_slot_t * 5218 rsmipc_alloc() 5219 { 5220 int i; 5221 rsmipc_slot_t *slot; 5222 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5223 5224 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n")); 5225 5226 /* try to find a free slot, if not wait */ 5227 mutex_enter(&rsm_ipc.lock); 5228 5229 while (rsm_ipc.count == 0) { 5230 rsm_ipc.wanted = 1; 5231 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock); 5232 } 5233 5234 /* An empty slot is available, find it */ 5235 slot = &rsm_ipc.slots[0]; 5236 for (i = 0; i < RSMIPC_SZ; i++, slot++) { 5237 if (RSMIPC_GET(slot, RSMIPC_FREE)) { 5238 RSMIPC_CLEAR(slot, RSMIPC_FREE); 5239 break; 5240 } 5241 } 5242 5243 ASSERT(i < RSMIPC_SZ); 5244 rsm_ipc.count--; /* one less is available */ 5245 rsm_ipc.sequence++; /* new sequence */ 5246 5247 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence; 5248 slot->rsmipc_cookie.ic.index = (uint_t)i; 5249 5250 mutex_exit(&rsm_ipc.lock); 5251 5252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n")); 5253 5254 return (slot); 5255 } 5256 5257 static void 5258 rsmipc_free(rsmipc_slot_t *slot) 5259 { 5260 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5261 5262 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n")); 5263 5264 ASSERT(MUTEX_HELD(&slot->rsmipc_lock)); 5265 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot); 5266 5267 mutex_enter(&rsm_ipc.lock); 5268 5269 RSMIPC_SET(slot, RSMIPC_FREE); 5270 5271 slot->rsmipc_cookie.ic.sequence = 0; 5272 5273 mutex_exit(&slot->rsmipc_lock); 5274 rsm_ipc.count++; 5275 ASSERT(rsm_ipc.count <= RSMIPC_SZ); 5276 if (rsm_ipc.wanted) { 5277 rsm_ipc.wanted = 0; 5278 cv_broadcast(&rsm_ipc.cv); 5279 } 5280 5281 mutex_exit(&rsm_ipc.lock); 5282 5283 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n")); 5284 } 5285 5286 static int 5287 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply) 5288 { 5289 int e = 0; 5290 int credit_check = 0; 5291 int retry_cnt = 0; 5292 int min_retry_cnt = 10; 5293 clock_t ticks; 5294 rsm_send_t is; 5295 rsmipc_slot_t *rslot; 5296 adapter_t *adapter; 5297 path_t *path; 5298 sendq_token_t *sendq_token; 5299 sendq_token_t *used_sendq_token = NULL; 5300 rsm_send_q_handle_t ipc_handle; 5301 DBG_DEFINE(category, 5302 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5303 5304 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d", 5305 dest)); 5306 5307 /* 5308 * Check if this is a local case 5309 */ 5310 if (dest == my_nodeid) { 5311 switch (req->rsmipc_hdr.rsmipc_type) { 5312 case RSMIPC_MSG_SEGCONNECT: 5313 reply->rsmipc_status = (short)rsmsegacl_validate( 5314 req, dest, reply); 5315 break; 5316 case RSMIPC_MSG_BELL: 5317 req->rsmipc_hdr.rsmipc_src = dest; 5318 rsm_intr_event(req); 5319 break; 5320 case RSMIPC_MSG_IMPORTING: 5321 importer_list_add(dest, req->rsmipc_key, 5322 req->rsmipc_adapter_hwaddr, 5323 req->rsmipc_segment_cookie); 5324 break; 5325 case RSMIPC_MSG_NOTIMPORTING: 5326 importer_list_rm(dest, req->rsmipc_key, 5327 req->rsmipc_segment_cookie); 5328 break; 5329 case RSMIPC_MSG_REPUBLISH: 5330 importer_update(dest, req->rsmipc_key, 5331 req->rsmipc_perm); 5332 break; 5333 case RSMIPC_MSG_SUSPEND: 5334 importer_suspend(dest); 5335 break; 5336 case RSMIPC_MSG_SUSPEND_DONE: 5337 rsm_suspend_complete(dest, 0); 5338 break; 5339 case RSMIPC_MSG_RESUME: 5340 importer_resume(dest); 5341 break; 5342 default: 5343 ASSERT(0); 5344 } 5345 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5346 "rsmipc_send done\n")); 5347 return (0); 5348 } 5349 5350 if (dest >= MAX_NODES) { 5351 DBG_PRINTF((category, RSM_ERR, 5352 "rsm: rsmipc_send bad node number %x\n", dest)); 5353 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5354 } 5355 5356 /* 5357 * Oh boy! we are going remote. 5358 */ 5359 5360 /* 5361 * identify if we need to have credits to send this message 5362 * - only selected requests are flow controlled 5363 */ 5364 if (req != NULL) { 5365 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5366 "rsmipc_send:request type=%d\n", 5367 req->rsmipc_hdr.rsmipc_type)); 5368 5369 switch (req->rsmipc_hdr.rsmipc_type) { 5370 case RSMIPC_MSG_SEGCONNECT: 5371 case RSMIPC_MSG_DISCONNECT: 5372 case RSMIPC_MSG_IMPORTING: 5373 case RSMIPC_MSG_SUSPEND: 5374 case RSMIPC_MSG_SUSPEND_DONE: 5375 case RSMIPC_MSG_RESUME: 5376 credit_check = 1; 5377 break; 5378 default: 5379 credit_check = 0; 5380 } 5381 } 5382 5383 again: 5384 if (retry_cnt++ == min_retry_cnt) { 5385 /* backoff before further retries for 10ms */ 5386 delay(drv_usectohz(10000)); 5387 retry_cnt = 0; /* reset retry_cnt */ 5388 } 5389 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token); 5390 if (sendq_token == NULL) { 5391 DBG_PRINTF((category, RSM_ERR, 5392 "rsm: rsmipc_send no device to reach node %d\n", dest)); 5393 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5394 } 5395 5396 if ((sendq_token == used_sendq_token) && 5397 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) || 5398 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) { 5399 rele_sendq_token(sendq_token); 5400 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e)); 5401 return (RSMERR_CONN_ABORTED); 5402 } else 5403 used_sendq_token = sendq_token; 5404 5405 /* lint -save -e413 */ 5406 path = SQ_TOKEN_TO_PATH(sendq_token); 5407 adapter = path->local_adapter; 5408 /* lint -restore */ 5409 ipc_handle = sendq_token->rsmpi_sendq_handle; 5410 5411 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5412 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle)); 5413 5414 if (reply == NULL) { 5415 /* Send request without ack */ 5416 /* 5417 * Set the rsmipc_version number in the msghdr for KA 5418 * communication versioning 5419 */ 5420 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5421 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5422 /* 5423 * remote endpoints incn should match the value in our 5424 * path's remote_incn field. No need to grab any lock 5425 * since we have refcnted the path in rsmka_get_sendq_token 5426 */ 5427 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5428 5429 is.is_data = (void *)req; 5430 is.is_size = sizeof (*req); 5431 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5432 is.is_wait = 0; 5433 5434 if (credit_check) { 5435 mutex_enter(&path->mutex); 5436 /* 5437 * wait till we recv credits or path goes down. If path 5438 * goes down rsm_send will fail and we handle the error 5439 * then 5440 */ 5441 while ((sendq_token->msgbuf_avail == 0) && 5442 (path->state == RSMKA_PATH_ACTIVE)) { 5443 e = cv_wait_sig(&sendq_token->sendq_cv, 5444 &path->mutex); 5445 if (e == 0) { 5446 mutex_exit(&path->mutex); 5447 no_reply_cnt++; 5448 rele_sendq_token(sendq_token); 5449 DBG_PRINTF((category, RSM_DEBUG, 5450 "rsmipc_send done: " 5451 "cv_wait INTERRUPTED")); 5452 return (RSMERR_INTERRUPTED); 5453 } 5454 } 5455 5456 /* 5457 * path is not active retry on another path. 5458 */ 5459 if (path->state != RSMKA_PATH_ACTIVE) { 5460 mutex_exit(&path->mutex); 5461 rele_sendq_token(sendq_token); 5462 e = RSMERR_CONN_ABORTED; 5463 DBG_PRINTF((category, RSM_ERR, 5464 "rsm: rsmipc_send: path !ACTIVE")); 5465 goto again; 5466 } 5467 5468 ASSERT(sendq_token->msgbuf_avail > 0); 5469 5470 /* 5471 * reserve a msgbuf 5472 */ 5473 sendq_token->msgbuf_avail--; 5474 5475 mutex_exit(&path->mutex); 5476 5477 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5478 NULL); 5479 5480 if (e != RSM_SUCCESS) { 5481 mutex_enter(&path->mutex); 5482 /* 5483 * release the reserved msgbuf since 5484 * the send failed 5485 */ 5486 sendq_token->msgbuf_avail++; 5487 cv_broadcast(&sendq_token->sendq_cv); 5488 mutex_exit(&path->mutex); 5489 } 5490 } else 5491 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5492 NULL); 5493 5494 no_reply_cnt++; 5495 rele_sendq_token(sendq_token); 5496 if (e != RSM_SUCCESS) { 5497 DBG_PRINTF((category, RSM_ERR, 5498 "rsm: rsmipc_send no reply send" 5499 " err = %d no reply count = %d\n", 5500 e, no_reply_cnt)); 5501 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5502 e != RSMERR_BAD_BARRIER_HNDL); 5503 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5504 goto again; 5505 } else { 5506 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5507 "rsmipc_send done\n")); 5508 return (e); 5509 } 5510 5511 } 5512 5513 if (req == NULL) { 5514 /* Send reply - No flow control is done for reply */ 5515 /* 5516 * Set the version in the msg header for KA communication 5517 * versioning 5518 */ 5519 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5520 reply->rsmipc_hdr.rsmipc_src = my_nodeid; 5521 /* incn number is not used for reply msgs currently */ 5522 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5523 5524 is.is_data = (void *)reply; 5525 is.is_size = sizeof (*reply); 5526 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5527 is.is_wait = 0; 5528 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5529 rele_sendq_token(sendq_token); 5530 if (e != RSM_SUCCESS) { 5531 DBG_PRINTF((category, RSM_ERR, 5532 "rsm: rsmipc_send reply send" 5533 " err = %d\n", e)); 5534 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5535 goto again; 5536 } else { 5537 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5538 "rsmipc_send done\n")); 5539 return (e); 5540 } 5541 } 5542 5543 /* Reply needed */ 5544 rslot = rsmipc_alloc(); /* allocate a new ipc slot */ 5545 5546 mutex_enter(&rslot->rsmipc_lock); 5547 5548 rslot->rsmipc_data = (void *)reply; 5549 RSMIPC_SET(rslot, RSMIPC_PENDING); 5550 5551 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) { 5552 /* 5553 * Set the rsmipc_version number in the msghdr for KA 5554 * communication versioning 5555 */ 5556 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5557 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5558 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie; 5559 /* 5560 * remote endpoints incn should match the value in our 5561 * path's remote_incn field. No need to grab any lock 5562 * since we have refcnted the path in rsmka_get_sendq_token 5563 */ 5564 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5565 5566 is.is_data = (void *)req; 5567 is.is_size = sizeof (*req); 5568 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5569 is.is_wait = 0; 5570 if (credit_check) { 5571 5572 mutex_enter(&path->mutex); 5573 /* 5574 * wait till we recv credits or path goes down. If path 5575 * goes down rsm_send will fail and we handle the error 5576 * then. 5577 */ 5578 while ((sendq_token->msgbuf_avail == 0) && 5579 (path->state == RSMKA_PATH_ACTIVE)) { 5580 e = cv_wait_sig(&sendq_token->sendq_cv, 5581 &path->mutex); 5582 if (e == 0) { 5583 mutex_exit(&path->mutex); 5584 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5585 rsmipc_free(rslot); 5586 rele_sendq_token(sendq_token); 5587 DBG_PRINTF((category, RSM_DEBUG, 5588 "rsmipc_send done: " 5589 "cv_wait INTERRUPTED")); 5590 return (RSMERR_INTERRUPTED); 5591 } 5592 } 5593 5594 /* 5595 * path is not active retry on another path. 5596 */ 5597 if (path->state != RSMKA_PATH_ACTIVE) { 5598 mutex_exit(&path->mutex); 5599 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5600 rsmipc_free(rslot); 5601 rele_sendq_token(sendq_token); 5602 e = RSMERR_CONN_ABORTED; 5603 DBG_PRINTF((category, RSM_ERR, 5604 "rsm: rsmipc_send: path !ACTIVE")); 5605 goto again; 5606 } 5607 5608 ASSERT(sendq_token->msgbuf_avail > 0); 5609 5610 /* 5611 * reserve a msgbuf 5612 */ 5613 sendq_token->msgbuf_avail--; 5614 5615 mutex_exit(&path->mutex); 5616 5617 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5618 NULL); 5619 5620 if (e != RSM_SUCCESS) { 5621 mutex_enter(&path->mutex); 5622 /* 5623 * release the reserved msgbuf since 5624 * the send failed 5625 */ 5626 sendq_token->msgbuf_avail++; 5627 cv_broadcast(&sendq_token->sendq_cv); 5628 mutex_exit(&path->mutex); 5629 } 5630 } else 5631 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5632 NULL); 5633 5634 if (e != RSM_SUCCESS) { 5635 DBG_PRINTF((category, RSM_ERR, 5636 "rsm: rsmipc_send rsmpi send err = %d\n", e)); 5637 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5638 rsmipc_free(rslot); 5639 rele_sendq_token(sendq_token); 5640 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5641 goto again; 5642 } 5643 5644 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */ 5645 (void) drv_getparm(LBOLT, &ticks); 5646 ticks += drv_usectohz(5000000); 5647 e = cv_timedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock, 5648 ticks); 5649 if (e < 0) { 5650 /* timed out - retry */ 5651 e = RSMERR_TIMEOUT; 5652 } else if (e == 0) { 5653 /* signalled - return error */ 5654 e = RSMERR_INTERRUPTED; 5655 break; 5656 } else { 5657 e = RSM_SUCCESS; 5658 } 5659 } 5660 5661 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5662 rsmipc_free(rslot); 5663 rele_sendq_token(sendq_token); 5664 5665 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e)); 5666 return (e); 5667 } 5668 5669 static int 5670 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie) 5671 { 5672 rsmipc_request_t request; 5673 5674 /* 5675 * inform the exporter to delete this importer 5676 */ 5677 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 5678 request.rsmipc_key = segid; 5679 request.rsmipc_segment_cookie = cookie; 5680 return (rsmipc_send(dest, &request, RSM_NO_REPLY)); 5681 } 5682 5683 static void 5684 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl, 5685 int acl_len, rsm_permission_t default_permission) 5686 { 5687 int i; 5688 importing_token_t *token; 5689 rsmipc_request_t request; 5690 republish_token_t *republish_list = NULL; 5691 republish_token_t *rp; 5692 rsm_permission_t permission; 5693 int index; 5694 5695 /* 5696 * send the new access mode to all the nodes that have imported 5697 * this segment. 5698 * If the new acl does not have a node that was present in 5699 * the old acl a access permission of 0 is sent. 5700 */ 5701 5702 index = rsmhash(segid); 5703 5704 /* 5705 * create a list of node/permissions to send the republish message 5706 */ 5707 mutex_enter(&importer_list.lock); 5708 5709 token = importer_list.bucket[index]; 5710 while (token != NULL) { 5711 if (segid == token->key) { 5712 permission = default_permission; 5713 5714 for (i = 0; i < acl_len; i++) { 5715 if (token->importing_node == acl[i].ae_node) { 5716 permission = acl[i].ae_permission; 5717 break; 5718 } 5719 } 5720 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP); 5721 5722 rp->key = segid; 5723 rp->importing_node = token->importing_node; 5724 rp->permission = permission; 5725 rp->next = republish_list; 5726 republish_list = rp; 5727 } 5728 token = token->next; 5729 } 5730 5731 mutex_exit(&importer_list.lock); 5732 5733 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH; 5734 request.rsmipc_key = segid; 5735 5736 while (republish_list != NULL) { 5737 request.rsmipc_perm = republish_list->permission; 5738 (void) rsmipc_send(republish_list->importing_node, 5739 &request, RSM_NO_REPLY); 5740 rp = republish_list; 5741 republish_list = republish_list->next; 5742 kmem_free(rp, sizeof (republish_token_t)); 5743 } 5744 } 5745 5746 static void 5747 rsm_send_suspend() 5748 { 5749 int i, e; 5750 rsmipc_request_t request; 5751 list_element_t *tokp; 5752 list_element_t *head = NULL; 5753 importing_token_t *token; 5754 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5755 "rsm_send_suspend enter\n")); 5756 5757 /* 5758 * create a list of node to send the suspend message 5759 * 5760 * Currently the whole importer list is scanned and we obtain 5761 * all the nodes - this basically gets all nodes that at least 5762 * import one segment from the local node. 5763 * 5764 * no need to grab the rsm_suspend_list lock here since we are 5765 * single threaded when suspend is called. 5766 */ 5767 5768 mutex_enter(&importer_list.lock); 5769 for (i = 0; i < rsm_hash_size; i++) { 5770 5771 token = importer_list.bucket[i]; 5772 5773 while (token != NULL) { 5774 5775 tokp = head; 5776 5777 /* 5778 * make sure that the token's node 5779 * is not already on the suspend list 5780 */ 5781 while (tokp != NULL) { 5782 if (tokp->nodeid == token->importing_node) { 5783 break; 5784 } 5785 tokp = tokp->next; 5786 } 5787 5788 if (tokp == NULL) { /* not in suspend list */ 5789 tokp = kmem_zalloc(sizeof (list_element_t), 5790 KM_SLEEP); 5791 tokp->nodeid = token->importing_node; 5792 tokp->next = head; 5793 head = tokp; 5794 } 5795 5796 token = token->next; 5797 } 5798 } 5799 mutex_exit(&importer_list.lock); 5800 5801 if (head == NULL) { /* no importers so go ahead and quiesce segments */ 5802 exporter_quiesce(); 5803 return; 5804 } 5805 5806 mutex_enter(&rsm_suspend_list.list_lock); 5807 ASSERT(rsm_suspend_list.list_head == NULL); 5808 /* 5809 * update the suspend list righaway so that if a node dies the 5810 * pathmanager can set the NODE dead flag 5811 */ 5812 rsm_suspend_list.list_head = head; 5813 mutex_exit(&rsm_suspend_list.list_lock); 5814 5815 tokp = head; 5816 5817 while (tokp != NULL) { 5818 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND; 5819 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY); 5820 /* 5821 * Error in rsmipc_send currently happens due to inaccessibility 5822 * of the remote node. 5823 */ 5824 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */ 5825 tokp->flags |= RSM_SUSPEND_ACKPENDING; 5826 } 5827 5828 tokp = tokp->next; 5829 } 5830 5831 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5832 "rsm_send_suspend done\n")); 5833 5834 } 5835 5836 static void 5837 rsm_send_resume() 5838 { 5839 rsmipc_request_t request; 5840 list_element_t *elem, *head; 5841 5842 /* 5843 * save the suspend list so that we know where to send 5844 * the resume messages and make the suspend list head 5845 * NULL. 5846 */ 5847 mutex_enter(&rsm_suspend_list.list_lock); 5848 head = rsm_suspend_list.list_head; 5849 rsm_suspend_list.list_head = NULL; 5850 mutex_exit(&rsm_suspend_list.list_lock); 5851 5852 while (head != NULL) { 5853 elem = head; 5854 head = head->next; 5855 5856 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME; 5857 5858 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY); 5859 5860 kmem_free((void *)elem, sizeof (list_element_t)); 5861 5862 } 5863 5864 } 5865 5866 /* 5867 * This function takes path and sends a message using the sendq 5868 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK 5869 * and RSMIPC_MSG_CREDIT are sent using this function. 5870 */ 5871 int 5872 rsmipc_send_controlmsg(path_t *path, int msgtype) 5873 { 5874 int e; 5875 int retry_cnt = 0; 5876 int min_retry_cnt = 10; 5877 clock_t timeout; 5878 adapter_t *adapter; 5879 rsm_send_t is; 5880 rsm_send_q_handle_t ipc_handle; 5881 rsmipc_controlmsg_t msg; 5882 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL); 5883 5884 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5885 "rsmipc_send_controlmsg enter\n")); 5886 5887 ASSERT(MUTEX_HELD(&path->mutex)); 5888 5889 adapter = path->local_adapter; 5890 5891 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx " 5892 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype, 5893 my_nodeid, adapter->hwaddr, path->remote_node, 5894 path->remote_hwaddr, path->procmsg_cnt)); 5895 5896 if (path->state != RSMKA_PATH_ACTIVE) { 5897 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5898 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE")); 5899 return (1); 5900 } 5901 5902 ipc_handle = path->sendq_token.rsmpi_sendq_handle; 5903 5904 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION; 5905 msg.rsmipc_hdr.rsmipc_src = my_nodeid; 5906 msg.rsmipc_hdr.rsmipc_type = msgtype; 5907 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn; 5908 5909 if (msgtype == RSMIPC_MSG_CREDIT) 5910 msg.rsmipc_credits = path->procmsg_cnt; 5911 5912 msg.rsmipc_local_incn = path->local_incn; 5913 5914 msg.rsmipc_adapter_hwaddr = adapter->hwaddr; 5915 /* incr the sendq, path refcnt */ 5916 PATH_HOLD_NOLOCK(path); 5917 SENDQ_TOKEN_HOLD(path); 5918 5919 do { 5920 /* drop the path lock before doing the rsm_send */ 5921 mutex_exit(&path->mutex); 5922 5923 is.is_data = (void *)&msg; 5924 is.is_size = sizeof (msg); 5925 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5926 is.is_wait = 0; 5927 5928 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5929 5930 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5931 e != RSMERR_BAD_BARRIER_HNDL); 5932 5933 mutex_enter(&path->mutex); 5934 5935 if (e == RSM_SUCCESS) { 5936 break; 5937 } 5938 /* error counter for statistics */ 5939 atomic_add_64(&rsm_ctrlmsg_errcnt, 1); 5940 5941 DBG_PRINTF((category, RSM_ERR, 5942 "rsmipc_send_controlmsg:rsm_send error=%d", e)); 5943 5944 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */ 5945 timeout = ddi_get_lbolt() + drv_usectohz(10000); 5946 (void) cv_timedwait(&path->sendq_token.sendq_cv, 5947 &path->mutex, timeout); 5948 retry_cnt = 0; 5949 } 5950 } while (path->state == RSMKA_PATH_ACTIVE); 5951 5952 /* decrement the sendq,path refcnt that we incr before rsm_send */ 5953 SENDQ_TOKEN_RELE(path); 5954 PATH_RELE_NOLOCK(path); 5955 5956 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5957 "rsmipc_send_controlmsg done=%d", e)); 5958 return (e); 5959 } 5960 5961 /* 5962 * Called from rsm_force_unload and path_importer_disconnect. The memory 5963 * mapping for the imported segment is removed and the segment is 5964 * disconnected at the interconnect layer if disconnect_flag is TRUE. 5965 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback 5966 * and FALSE from rsm_rebind. 5967 * 5968 * When subsequent accesses cause page faulting, the dummy page is mapped 5969 * to resolve the fault, and the mapping generation number is incremented 5970 * so that the application can be notified on a close barrier operation. 5971 * 5972 * It is important to note that the caller of rsmseg_unload is responsible for 5973 * acquiring the segment lock before making a call to rsmseg_unload. This is 5974 * required to make the caller and rsmseg_unload thread safe. The segment lock 5975 * will be released by the rsmseg_unload function. 5976 */ 5977 void 5978 rsmseg_unload(rsmseg_t *im_seg) 5979 { 5980 rsmcookie_t *hdl; 5981 void *shared_cookie; 5982 rsmipc_request_t request; 5983 uint_t maxprot; 5984 5985 DBG_DEFINE(category, 5986 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5987 5988 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n")); 5989 5990 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 5991 5992 /* wait until segment leaves the mapping state */ 5993 while (im_seg->s_state == RSM_STATE_MAPPING) 5994 cv_wait(&im_seg->s_cv, &im_seg->s_lock); 5995 /* 5996 * An unload is only necessary if the segment is connected. However, 5997 * if the segment was on the import list in state RSM_STATE_CONNECTING 5998 * then a connection was in progress. Change to RSM_STATE_NEW 5999 * here to cause an early exit from the connection process. 6000 */ 6001 if (im_seg->s_state == RSM_STATE_NEW) { 6002 rsmseglock_release(im_seg); 6003 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6004 "rsmseg_unload done: RSM_STATE_NEW\n")); 6005 return; 6006 } else if (im_seg->s_state == RSM_STATE_CONNECTING) { 6007 im_seg->s_state = RSM_STATE_ABORT_CONNECT; 6008 rsmsharelock_acquire(im_seg); 6009 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 6010 rsmsharelock_release(im_seg); 6011 rsmseglock_release(im_seg); 6012 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6013 "rsmseg_unload done: RSM_STATE_CONNECTING\n")); 6014 return; 6015 } 6016 6017 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) { 6018 if (im_seg->s_ckl != NULL) { 6019 int e; 6020 /* Setup protections for remap */ 6021 maxprot = PROT_USER; 6022 if (im_seg->s_mode & RSM_PERM_READ) { 6023 maxprot |= PROT_READ; 6024 } 6025 if (im_seg->s_mode & RSM_PERM_WRITE) { 6026 maxprot |= PROT_WRITE; 6027 } 6028 hdl = im_seg->s_ckl; 6029 for (; hdl != NULL; hdl = hdl->c_next) { 6030 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 6031 remap_cookie, 6032 hdl->c_off, hdl->c_len, 6033 maxprot, 0, NULL); 6034 6035 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6036 "remap returns %d\n", e)); 6037 } 6038 } 6039 6040 (void) rsm_closeconnection(im_seg, &shared_cookie); 6041 6042 if (shared_cookie != NULL) { 6043 /* 6044 * inform the exporting node so this import 6045 * can be deleted from the list of importers. 6046 */ 6047 request.rsmipc_hdr.rsmipc_type = 6048 RSMIPC_MSG_NOTIMPORTING; 6049 request.rsmipc_key = im_seg->s_segid; 6050 request.rsmipc_segment_cookie = shared_cookie; 6051 rsmseglock_release(im_seg); 6052 (void) rsmipc_send(im_seg->s_node, &request, 6053 RSM_NO_REPLY); 6054 } else { 6055 rsmseglock_release(im_seg); 6056 } 6057 } 6058 else 6059 rsmseglock_release(im_seg); 6060 6061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n")); 6062 6063 } 6064 6065 /* ****************************** Importer Calls ************************ */ 6066 6067 static int 6068 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr) 6069 { 6070 int shifts = 0; 6071 6072 if (crgetuid(cr) != owner) { 6073 shifts += 3; 6074 if (!groupmember(group, cr)) 6075 shifts += 3; 6076 } 6077 6078 mode &= ~(perm << shifts); 6079 6080 if (mode == 0) 6081 return (0); 6082 6083 return (secpolicy_rsm_access(cr, owner, mode)); 6084 } 6085 6086 6087 static int 6088 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred, 6089 intptr_t dataptr, int mode) 6090 { 6091 int e; 6092 int recheck_state = 0; 6093 void *shared_cookie; 6094 rsmipc_request_t request; 6095 rsmipc_reply_t reply; 6096 rsm_permission_t access; 6097 adapter_t *adapter; 6098 rsm_addr_t addr = 0; 6099 rsm_import_share_t *sharedp; 6100 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6101 6102 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n")); 6103 6104 adapter = rsm_getadapter(msg, mode); 6105 if (adapter == NULL) { 6106 DBG_PRINTF((category, RSM_ERR, 6107 "rsm_connect done:ENODEV adapter=NULL\n")); 6108 return (RSMERR_CTLR_NOT_PRESENT); 6109 } 6110 6111 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) { 6112 rsmka_release_adapter(adapter); 6113 DBG_PRINTF((category, RSM_ERR, 6114 "rsm_connect done:ENODEV loopback\n")); 6115 return (RSMERR_CTLR_NOT_PRESENT); 6116 } 6117 6118 6119 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6120 ASSERT(seg->s_state == RSM_STATE_NEW); 6121 6122 /* 6123 * Translate perm to access 6124 */ 6125 if (msg->perm & ~RSM_PERM_RDWR) { 6126 rsmka_release_adapter(adapter); 6127 DBG_PRINTF((category, RSM_ERR, 6128 "rsm_connect done:EINVAL invalid perms\n")); 6129 return (RSMERR_BAD_PERMS); 6130 } 6131 access = 0; 6132 if (msg->perm & RSM_PERM_READ) 6133 access |= RSM_ACCESS_READ; 6134 if (msg->perm & RSM_PERM_WRITE) 6135 access |= RSM_ACCESS_WRITE; 6136 6137 seg->s_node = msg->nodeid; 6138 6139 /* 6140 * Adding to the import list locks the segment; release the segment 6141 * lock so we can get the reply for the send. 6142 */ 6143 e = rsmimport_add(seg, msg->key); 6144 if (e) { 6145 rsmka_release_adapter(adapter); 6146 DBG_PRINTF((category, RSM_ERR, 6147 "rsm_connect done:rsmimport_add failed %d\n", e)); 6148 return (e); 6149 } 6150 seg->s_state = RSM_STATE_CONNECTING; 6151 6152 /* 6153 * Set the s_adapter field here so as to have a valid comparison of 6154 * the adapter and the s_adapter value during rsmshare_get. For 6155 * any error, set s_adapter to NULL before doing a release_adapter 6156 */ 6157 seg->s_adapter = adapter; 6158 6159 rsmseglock_release(seg); 6160 6161 /* 6162 * get the pointer to the shared data structure; the 6163 * shared data is locked and refcount has been incremented 6164 */ 6165 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg); 6166 6167 ASSERT(rsmsharelock_held(seg)); 6168 6169 do { 6170 /* flag indicates whether we need to recheck the state */ 6171 recheck_state = 0; 6172 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6173 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 6174 switch (sharedp->rsmsi_state) { 6175 case RSMSI_STATE_NEW: 6176 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6177 break; 6178 case RSMSI_STATE_CONNECTING: 6179 /* FALLTHRU */ 6180 case RSMSI_STATE_CONN_QUIESCE: 6181 /* FALLTHRU */ 6182 case RSMSI_STATE_MAP_QUIESCE: 6183 /* wait for the state to change */ 6184 while ((sharedp->rsmsi_state == 6185 RSMSI_STATE_CONNECTING) || 6186 (sharedp->rsmsi_state == 6187 RSMSI_STATE_CONN_QUIESCE) || 6188 (sharedp->rsmsi_state == 6189 RSMSI_STATE_MAP_QUIESCE)) { 6190 if (cv_wait_sig(&sharedp->rsmsi_cv, 6191 &sharedp->rsmsi_lock) == 0) { 6192 /* signalled - clean up and return */ 6193 rsmsharelock_release(seg); 6194 rsmimport_rm(seg); 6195 seg->s_adapter = NULL; 6196 rsmka_release_adapter(adapter); 6197 seg->s_state = RSM_STATE_NEW; 6198 DBG_PRINTF((category, RSM_ERR, 6199 "rsm_connect done: INTERRUPTED\n")); 6200 return (RSMERR_INTERRUPTED); 6201 } 6202 } 6203 /* 6204 * the state changed, loop back and check what it is 6205 */ 6206 recheck_state = 1; 6207 break; 6208 case RSMSI_STATE_ABORT_CONNECT: 6209 /* exit the loop and clean up further down */ 6210 break; 6211 case RSMSI_STATE_CONNECTED: 6212 /* already connected, good - fall through */ 6213 case RSMSI_STATE_MAPPED: 6214 /* already mapped, wow - fall through */ 6215 /* access validation etc is done further down */ 6216 break; 6217 case RSMSI_STATE_DISCONNECTED: 6218 /* disconnected - so reconnect now */ 6219 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6220 break; 6221 default: 6222 ASSERT(0); /* Invalid State */ 6223 } 6224 } while (recheck_state); 6225 6226 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6227 /* we are the first to connect */ 6228 rsmsharelock_release(seg); 6229 6230 if (msg->nodeid != my_nodeid) { 6231 addr = get_remote_hwaddr(adapter, msg->nodeid); 6232 6233 if ((int64_t)addr < 0) { 6234 rsmsharelock_acquire(seg); 6235 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6236 RSMSI_STATE_NEW); 6237 rsmsharelock_release(seg); 6238 rsmimport_rm(seg); 6239 seg->s_adapter = NULL; 6240 rsmka_release_adapter(adapter); 6241 seg->s_state = RSM_STATE_NEW; 6242 DBG_PRINTF((category, RSM_ERR, 6243 "rsm_connect done: hwaddr<0\n")); 6244 return (RSMERR_INTERNAL_ERROR); 6245 } 6246 } else { 6247 addr = adapter->hwaddr; 6248 } 6249 6250 /* 6251 * send request to node [src, dest, key, msgid] and get back 6252 * [status, msgid, cookie] 6253 */ 6254 request.rsmipc_key = msg->key; 6255 /* 6256 * we need the s_mode of the exporter so pass 6257 * RSM_ACCESS_TRUSTED 6258 */ 6259 request.rsmipc_perm = RSM_ACCESS_TRUSTED; 6260 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT; 6261 request.rsmipc_adapter_hwaddr = addr; 6262 request.rsmipc_segment_cookie = sharedp; 6263 6264 e = (int)rsmipc_send(msg->nodeid, &request, &reply); 6265 if (e) { 6266 rsmsharelock_acquire(seg); 6267 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6268 RSMSI_STATE_NEW); 6269 rsmsharelock_release(seg); 6270 rsmimport_rm(seg); 6271 seg->s_adapter = NULL; 6272 rsmka_release_adapter(adapter); 6273 seg->s_state = RSM_STATE_NEW; 6274 DBG_PRINTF((category, RSM_ERR, 6275 "rsm_connect done:rsmipc_send failed %d\n", e)); 6276 return (e); 6277 } 6278 6279 if (reply.rsmipc_status != RSM_SUCCESS) { 6280 rsmsharelock_acquire(seg); 6281 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6282 RSMSI_STATE_NEW); 6283 rsmsharelock_release(seg); 6284 rsmimport_rm(seg); 6285 seg->s_adapter = NULL; 6286 rsmka_release_adapter(adapter); 6287 seg->s_state = RSM_STATE_NEW; 6288 DBG_PRINTF((category, RSM_ERR, 6289 "rsm_connect done:rsmipc_send reply err %d\n", 6290 reply.rsmipc_status)); 6291 return (reply.rsmipc_status); 6292 } 6293 6294 rsmsharelock_acquire(seg); 6295 /* store the information recvd into the shared data struct */ 6296 sharedp->rsmsi_mode = reply.rsmipc_mode; 6297 sharedp->rsmsi_uid = reply.rsmipc_uid; 6298 sharedp->rsmsi_gid = reply.rsmipc_gid; 6299 sharedp->rsmsi_seglen = reply.rsmipc_seglen; 6300 sharedp->rsmsi_cookie = sharedp; 6301 } 6302 6303 rsmsharelock_release(seg); 6304 6305 /* 6306 * Get the segment lock and check for a force disconnect 6307 * from the export side which would have changed the state 6308 * back to RSM_STATE_NEW. Once the segment lock is acquired a 6309 * force disconnect will be held off until the connection 6310 * has completed. 6311 */ 6312 rsmseglock_acquire(seg); 6313 rsmsharelock_acquire(seg); 6314 ASSERT(seg->s_state == RSM_STATE_CONNECTING || 6315 seg->s_state == RSM_STATE_ABORT_CONNECT); 6316 6317 shared_cookie = sharedp->rsmsi_cookie; 6318 6319 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) || 6320 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) { 6321 seg->s_state = RSM_STATE_NEW; 6322 seg->s_adapter = NULL; 6323 rsmsharelock_release(seg); 6324 rsmseglock_release(seg); 6325 rsmimport_rm(seg); 6326 rsmka_release_adapter(adapter); 6327 6328 rsmsharelock_acquire(seg); 6329 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) { 6330 /* 6331 * set a flag indicating abort handling has been 6332 * done 6333 */ 6334 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE; 6335 rsmsharelock_release(seg); 6336 /* send a message to exporter - only once */ 6337 (void) rsm_send_notimporting(msg->nodeid, 6338 msg->key, shared_cookie); 6339 rsmsharelock_acquire(seg); 6340 /* 6341 * wake up any waiting importers and inform that 6342 * connection has been aborted 6343 */ 6344 cv_broadcast(&sharedp->rsmsi_cv); 6345 } 6346 rsmsharelock_release(seg); 6347 6348 DBG_PRINTF((category, RSM_ERR, 6349 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n")); 6350 return (RSMERR_INTERRUPTED); 6351 } 6352 6353 6354 /* 6355 * We need to verify that this process has access 6356 */ 6357 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid, 6358 access & sharedp->rsmsi_mode, 6359 (int)(msg->perm & RSM_PERM_RDWR), cred); 6360 if (e) { 6361 rsmsharelock_release(seg); 6362 seg->s_state = RSM_STATE_NEW; 6363 seg->s_adapter = NULL; 6364 rsmseglock_release(seg); 6365 rsmimport_rm(seg); 6366 rsmka_release_adapter(adapter); 6367 /* 6368 * No need to lock segment it has been removed 6369 * from the hash table 6370 */ 6371 rsmsharelock_acquire(seg); 6372 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6373 rsmsharelock_release(seg); 6374 /* this is the first importer */ 6375 6376 (void) rsm_send_notimporting(msg->nodeid, msg->key, 6377 shared_cookie); 6378 rsmsharelock_acquire(seg); 6379 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6380 cv_broadcast(&sharedp->rsmsi_cv); 6381 } 6382 rsmsharelock_release(seg); 6383 6384 DBG_PRINTF((category, RSM_ERR, 6385 "rsm_connect done: ipcaccess failed\n")); 6386 return (RSMERR_PERM_DENIED); 6387 } 6388 6389 /* update state and cookie */ 6390 seg->s_segid = sharedp->rsmsi_segid; 6391 seg->s_len = sharedp->rsmsi_seglen; 6392 seg->s_mode = access & sharedp->rsmsi_mode; 6393 seg->s_pid = ddi_get_pid(); 6394 seg->s_mapinfo = NULL; 6395 6396 if (seg->s_node != my_nodeid) { 6397 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6398 e = adapter->rsmpi_ops->rsm_connect( 6399 adapter->rsmpi_handle, 6400 addr, seg->s_segid, &sharedp->rsmsi_handle); 6401 6402 if (e != RSM_SUCCESS) { 6403 seg->s_state = RSM_STATE_NEW; 6404 seg->s_adapter = NULL; 6405 rsmsharelock_release(seg); 6406 rsmseglock_release(seg); 6407 rsmimport_rm(seg); 6408 rsmka_release_adapter(adapter); 6409 /* 6410 * inform the exporter to delete this importer 6411 */ 6412 (void) rsm_send_notimporting(msg->nodeid, 6413 msg->key, shared_cookie); 6414 6415 /* 6416 * Now inform any waiting importers to 6417 * retry connect. This needs to be done 6418 * after sending notimporting so that 6419 * the notimporting is sent before a waiting 6420 * importer sends a segconnect while retrying 6421 * 6422 * No need to lock segment it has been removed 6423 * from the hash table 6424 */ 6425 6426 rsmsharelock_acquire(seg); 6427 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6428 cv_broadcast(&sharedp->rsmsi_cv); 6429 rsmsharelock_release(seg); 6430 6431 DBG_PRINTF((category, RSM_ERR, 6432 "rsm_connect error %d\n", e)); 6433 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR) 6434 return ( 6435 RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 6436 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) || 6437 (e == RSMERR_UNKNOWN_RSM_ADDR)) 6438 return (RSMERR_REMOTE_NODE_UNREACHABLE); 6439 else 6440 return (e); 6441 } 6442 6443 } 6444 seg->s_handle.in = sharedp->rsmsi_handle; 6445 6446 } 6447 6448 seg->s_state = RSM_STATE_CONNECT; 6449 6450 6451 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */ 6452 if (bar_va) { 6453 /* increment generation number on barrier page */ 6454 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1); 6455 /* return user off into barrier page where status will be */ 6456 msg->off = (int)seg->s_hdr.rsmrc_num; 6457 msg->gnum = bar_va[msg->off]; /* gnum race */ 6458 } else { 6459 msg->off = 0; 6460 msg->gnum = 0; /* gnum race */ 6461 } 6462 6463 msg->len = (int)sharedp->rsmsi_seglen; 6464 msg->rnum = seg->s_minor; 6465 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED); 6466 rsmsharelock_release(seg); 6467 rsmseglock_release(seg); 6468 6469 /* Return back to user the segment size & perm in case it's needed */ 6470 6471 #ifdef _MULTI_DATAMODEL 6472 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6473 rsm_ioctlmsg32_t msg32; 6474 6475 if (msg->len > UINT_MAX) 6476 msg32.len = RSM_MAXSZ_PAGE_ALIGNED; 6477 else 6478 msg32.len = msg->len; 6479 msg32.off = msg->off; 6480 msg32.perm = msg->perm; 6481 msg32.gnum = msg->gnum; 6482 msg32.rnum = msg->rnum; 6483 6484 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6485 "rsm_connect done\n")); 6486 6487 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr, 6488 sizeof (msg32), mode)) 6489 return (RSMERR_BAD_ADDR); 6490 else 6491 return (RSM_SUCCESS); 6492 } 6493 #endif 6494 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n")); 6495 6496 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg), 6497 mode)) 6498 return (RSMERR_BAD_ADDR); 6499 else 6500 return (RSM_SUCCESS); 6501 } 6502 6503 static int 6504 rsm_unmap(rsmseg_t *seg) 6505 { 6506 int err; 6507 adapter_t *adapter; 6508 rsm_import_share_t *sharedp; 6509 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6510 6511 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6512 "rsm_unmap enter %u\n", seg->s_segid)); 6513 6514 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6515 6516 /* assert seg is locked */ 6517 ASSERT(rsmseglock_held(seg)); 6518 ASSERT(seg->s_state != RSM_STATE_MAPPING); 6519 6520 if ((seg->s_state != RSM_STATE_ACTIVE) && 6521 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 6522 /* segment unmap has already been done */ 6523 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6524 return (RSM_SUCCESS); 6525 } 6526 6527 sharedp = seg->s_share; 6528 6529 rsmsharelock_acquire(seg); 6530 6531 /* 6532 * - shared data struct is in MAPPED or MAP_QUIESCE state 6533 */ 6534 6535 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED || 6536 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 6537 6538 /* 6539 * Unmap pages - previously rsm_memseg_import_unmap was called only if 6540 * the segment cookie list was NULL; but it is always NULL when 6541 * called from rsmmap_unmap and won't be NULL when called for 6542 * a force disconnect - so the check for NULL cookie list was removed 6543 */ 6544 6545 ASSERT(sharedp->rsmsi_mapcnt > 0); 6546 6547 sharedp->rsmsi_mapcnt--; 6548 6549 if (sharedp->rsmsi_mapcnt == 0) { 6550 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) { 6551 /* unmap the shared RSMPI mapping */ 6552 adapter = seg->s_adapter; 6553 if (seg->s_node != my_nodeid) { 6554 ASSERT(sharedp->rsmsi_handle != NULL); 6555 err = adapter->rsmpi_ops-> 6556 rsm_unmap(sharedp->rsmsi_handle); 6557 DBG_PRINTF((category, RSM_DEBUG, 6558 "rsm_unmap: rsmpi unmap %d\n", err)); 6559 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 6560 sharedp->rsmsi_mapinfo = NULL; 6561 } 6562 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 6563 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */ 6564 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 6565 } 6566 } 6567 6568 rsmsharelock_release(seg); 6569 6570 /* 6571 * The s_cookie field is used to store the cookie returned from the 6572 * ddi_umem_lock when binding the pages for an export segment. This 6573 * is the primary use of the s_cookie field and does not normally 6574 * pertain to any importing segment except in the loopback case. 6575 * For the loopback case, the import segment and export segment are 6576 * on the same node, the s_cookie field of the segment structure for 6577 * the importer is initialized to the s_cookie field in the exported 6578 * segment during the map operation and is used during the call to 6579 * devmap_umem_setup for the import mapping. 6580 * Thus, during unmap, we simply need to set s_cookie to NULL to 6581 * indicate that the mapping no longer exists. 6582 */ 6583 seg->s_cookie = NULL; 6584 6585 seg->s_mapinfo = NULL; 6586 6587 if (seg->s_state == RSM_STATE_ACTIVE) 6588 seg->s_state = RSM_STATE_CONNECT; 6589 else 6590 seg->s_state = RSM_STATE_CONN_QUIESCE; 6591 6592 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6593 6594 return (RSM_SUCCESS); 6595 } 6596 6597 /* 6598 * cookie returned here if not null indicates that it is 6599 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING 6600 * message. 6601 */ 6602 static int 6603 rsm_closeconnection(rsmseg_t *seg, void **cookie) 6604 { 6605 int e; 6606 adapter_t *adapter; 6607 rsm_import_share_t *sharedp; 6608 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6609 6610 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6611 "rsm_closeconnection enter\n")); 6612 6613 *cookie = (void *)NULL; 6614 6615 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6616 6617 /* assert seg is locked */ 6618 ASSERT(rsmseglock_held(seg)); 6619 6620 if (seg->s_state == RSM_STATE_DISCONNECT) { 6621 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6622 "rsm_closeconnection done: already disconnected\n")); 6623 return (RSM_SUCCESS); 6624 } 6625 6626 /* wait for all putv/getv ops to get done */ 6627 while (seg->s_rdmacnt > 0) { 6628 cv_wait(&seg->s_cv, &seg->s_lock); 6629 } 6630 6631 (void) rsm_unmap(seg); 6632 6633 ASSERT(seg->s_state == RSM_STATE_CONNECT || 6634 seg->s_state == RSM_STATE_CONN_QUIESCE); 6635 6636 adapter = seg->s_adapter; 6637 sharedp = seg->s_share; 6638 6639 ASSERT(sharedp != NULL); 6640 6641 rsmsharelock_acquire(seg); 6642 6643 /* 6644 * Disconnect on adapter 6645 * 6646 * The current algorithm is stateless, I don't have to contact 6647 * server when I go away. He only gives me permissions. Of course, 6648 * the adapters will talk to terminate the connect. 6649 * 6650 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE 6651 */ 6652 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) && 6653 (sharedp->rsmsi_node != my_nodeid)) { 6654 6655 if (sharedp->rsmsi_refcnt == 1) { 6656 /* this is the last importer */ 6657 ASSERT(sharedp->rsmsi_mapcnt == 0); 6658 6659 e = adapter->rsmpi_ops-> 6660 rsm_disconnect(sharedp->rsmsi_handle); 6661 if (e != RSM_SUCCESS) { 6662 DBG_PRINTF((category, RSM_DEBUG, 6663 "rsm:disconnect failed seg=%x:err=%d\n", 6664 seg->s_key, e)); 6665 } 6666 } 6667 } 6668 6669 seg->s_handle.in = NULL; 6670 6671 sharedp->rsmsi_refcnt--; 6672 6673 if (sharedp->rsmsi_refcnt == 0) { 6674 *cookie = (void *)sharedp->rsmsi_cookie; 6675 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 6676 sharedp->rsmsi_handle = NULL; 6677 rsmsharelock_release(seg); 6678 6679 /* clean up the shared data structure */ 6680 mutex_destroy(&sharedp->rsmsi_lock); 6681 cv_destroy(&sharedp->rsmsi_cv); 6682 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t)); 6683 6684 } else { 6685 rsmsharelock_release(seg); 6686 } 6687 6688 /* increment generation number on barrier page */ 6689 if (bar_va) { 6690 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1); 6691 } 6692 6693 /* 6694 * The following needs to be done after any 6695 * rsmsharelock calls which use seg->s_share. 6696 */ 6697 seg->s_share = NULL; 6698 6699 seg->s_state = RSM_STATE_DISCONNECT; 6700 /* signal anyone waiting in the CONN_QUIESCE state */ 6701 cv_broadcast(&seg->s_cv); 6702 6703 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6704 "rsm_closeconnection done\n")); 6705 6706 return (RSM_SUCCESS); 6707 } 6708 6709 int 6710 rsm_disconnect(rsmseg_t *seg) 6711 { 6712 rsmipc_request_t request; 6713 void *shared_cookie; 6714 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6715 6716 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n")); 6717 6718 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6719 6720 /* assert seg isn't locked */ 6721 ASSERT(!rsmseglock_held(seg)); 6722 6723 6724 /* Remove segment from imported list */ 6725 rsmimport_rm(seg); 6726 6727 /* acquire the segment */ 6728 rsmseglock_acquire(seg); 6729 6730 /* wait until segment leaves the mapping state */ 6731 while (seg->s_state == RSM_STATE_MAPPING) 6732 cv_wait(&seg->s_cv, &seg->s_lock); 6733 6734 if (seg->s_state == RSM_STATE_DISCONNECT) { 6735 seg->s_state = RSM_STATE_NEW; 6736 rsmseglock_release(seg); 6737 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6738 "rsm_disconnect done: already disconnected\n")); 6739 return (RSM_SUCCESS); 6740 } 6741 6742 (void) rsm_closeconnection(seg, &shared_cookie); 6743 6744 /* update state */ 6745 seg->s_state = RSM_STATE_NEW; 6746 6747 if (shared_cookie != NULL) { 6748 /* 6749 * This is the last importer so inform the exporting node 6750 * so this import can be deleted from the list of importers. 6751 */ 6752 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 6753 request.rsmipc_key = seg->s_segid; 6754 request.rsmipc_segment_cookie = shared_cookie; 6755 rsmseglock_release(seg); 6756 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 6757 } else { 6758 rsmseglock_release(seg); 6759 } 6760 6761 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n")); 6762 6763 return (DDI_SUCCESS); 6764 } 6765 6766 /*ARGSUSED*/ 6767 static int 6768 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 6769 struct pollhead **phpp) 6770 { 6771 minor_t rnum; 6772 rsmresource_t *res; 6773 rsmseg_t *seg; 6774 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 6775 6776 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n")); 6777 6778 /* find minor, no lock */ 6779 rnum = getminor(dev); 6780 res = rsmresource_lookup(rnum, RSM_NOLOCK); 6781 6782 /* poll is supported only for export/import segments */ 6783 if ((res == NULL) || (res == RSMRC_RESERVED) || 6784 (res->rsmrc_type == RSM_RESOURCE_BAR)) { 6785 return (ENXIO); 6786 } 6787 6788 *reventsp = 0; 6789 6790 /* 6791 * An exported segment must be in state RSM_STATE_EXPORT; an 6792 * imported segment must be in state RSM_STATE_ACTIVE. 6793 */ 6794 seg = (rsmseg_t *)res; 6795 6796 if (seg->s_pollevent) { 6797 *reventsp = POLLRDNORM; 6798 } else if (!anyyet) { 6799 /* cannot take segment lock here */ 6800 *phpp = &seg->s_poll; 6801 seg->s_pollflag |= RSM_SEGMENT_POLL; 6802 } 6803 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n")); 6804 return (0); 6805 } 6806 6807 6808 6809 /* ************************* IOCTL Commands ********************* */ 6810 6811 static rsmseg_t * 6812 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp, 6813 rsm_resource_type_t type) 6814 { 6815 /* get segment from resource handle */ 6816 rsmseg_t *seg; 6817 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 6818 6819 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n")); 6820 6821 6822 if (res != RSMRC_RESERVED) { 6823 seg = (rsmseg_t *)res; 6824 } else { 6825 /* Allocate segment now and bind it */ 6826 seg = rsmseg_alloc(rnum, credp); 6827 6828 /* 6829 * if DR pre-processing is going on or DR is in progress 6830 * then the new export segments should be in the NEW_QSCD state 6831 */ 6832 if (type == RSM_RESOURCE_EXPORT_SEGMENT) { 6833 mutex_enter(&rsm_drv_data.drv_lock); 6834 if ((rsm_drv_data.drv_state == 6835 RSM_DRV_PREDEL_STARTED) || 6836 (rsm_drv_data.drv_state == 6837 RSM_DRV_PREDEL_COMPLETED) || 6838 (rsm_drv_data.drv_state == 6839 RSM_DRV_DR_IN_PROGRESS)) { 6840 seg->s_state = RSM_STATE_NEW_QUIESCED; 6841 } 6842 mutex_exit(&rsm_drv_data.drv_lock); 6843 } 6844 6845 rsmresource_insert(rnum, (rsmresource_t *)seg, type); 6846 } 6847 6848 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n")); 6849 6850 return (seg); 6851 } 6852 6853 static int 6854 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6855 int mode, cred_t *credp) 6856 { 6857 int error; 6858 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 6859 6860 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n")); 6861 6862 arg = arg; 6863 credp = credp; 6864 6865 ASSERT(seg != NULL); 6866 6867 switch (cmd) { 6868 case RSM_IOCTL_BIND: 6869 error = rsm_bind(seg, msg, arg, mode); 6870 break; 6871 case RSM_IOCTL_REBIND: 6872 error = rsm_rebind(seg, msg); 6873 break; 6874 case RSM_IOCTL_UNBIND: 6875 error = ENOTSUP; 6876 break; 6877 case RSM_IOCTL_PUBLISH: 6878 error = rsm_publish(seg, msg, arg, mode); 6879 break; 6880 case RSM_IOCTL_REPUBLISH: 6881 error = rsm_republish(seg, msg, mode); 6882 break; 6883 case RSM_IOCTL_UNPUBLISH: 6884 error = rsm_unpublish(seg, 1); 6885 break; 6886 default: 6887 error = EINVAL; 6888 break; 6889 } 6890 6891 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n", 6892 error)); 6893 6894 return (error); 6895 } 6896 static int 6897 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6898 int mode, cred_t *credp) 6899 { 6900 int error; 6901 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6902 6903 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n")); 6904 6905 ASSERT(seg); 6906 6907 switch (cmd) { 6908 case RSM_IOCTL_CONNECT: 6909 error = rsm_connect(seg, msg, credp, arg, mode); 6910 break; 6911 default: 6912 error = EINVAL; 6913 break; 6914 } 6915 6916 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n", 6917 error)); 6918 return (error); 6919 } 6920 6921 static int 6922 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6923 int mode) 6924 { 6925 int e; 6926 adapter_t *adapter; 6927 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6928 6929 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n")); 6930 6931 6932 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) { 6933 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6934 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n")); 6935 return (RSMERR_CONN_ABORTED); 6936 } else if (seg->s_node == my_nodeid) { 6937 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6938 "rsmbar_ioctl done: loopback\n")); 6939 return (RSM_SUCCESS); 6940 } 6941 6942 adapter = seg->s_adapter; 6943 6944 switch (cmd) { 6945 case RSM_IOCTL_BAR_CHECK: 6946 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6947 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va)); 6948 return (bar_va ? RSM_SUCCESS : EINVAL); 6949 case RSM_IOCTL_BAR_OPEN: 6950 e = adapter->rsmpi_ops-> 6951 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar); 6952 break; 6953 case RSM_IOCTL_BAR_ORDER: 6954 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar); 6955 break; 6956 case RSM_IOCTL_BAR_CLOSE: 6957 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar); 6958 break; 6959 default: 6960 e = EINVAL; 6961 break; 6962 } 6963 6964 if (e == RSM_SUCCESS) { 6965 #ifdef _MULTI_DATAMODEL 6966 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6967 rsm_ioctlmsg32_t msg32; 6968 int i; 6969 6970 for (i = 0; i < 4; i++) { 6971 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64; 6972 } 6973 6974 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6975 "rsmbar_ioctl done\n")); 6976 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 6977 sizeof (msg32), mode)) 6978 return (RSMERR_BAD_ADDR); 6979 else 6980 return (RSM_SUCCESS); 6981 } 6982 #endif 6983 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6984 "rsmbar_ioctl done\n")); 6985 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg, 6986 sizeof (*msg), mode)) 6987 return (RSMERR_BAD_ADDR); 6988 else 6989 return (RSM_SUCCESS); 6990 } 6991 6992 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6993 "rsmbar_ioctl done: error=%d\n", e)); 6994 6995 return (e); 6996 } 6997 6998 /* 6999 * Ring the doorbell of the export segment to which this segment is 7000 * connected. 7001 */ 7002 static int 7003 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7004 { 7005 int e = 0; 7006 rsmipc_request_t request; 7007 7008 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7009 7010 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n")); 7011 7012 request.rsmipc_key = seg->s_segid; 7013 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7014 request.rsmipc_segment_cookie = NULL; 7015 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 7016 7017 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7018 "exportbell_ioctl done: %d\n", e)); 7019 7020 return (e); 7021 } 7022 7023 /* 7024 * Ring the doorbells of all segments importing this segment 7025 */ 7026 static int 7027 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7028 { 7029 importing_token_t *token = NULL; 7030 rsmipc_request_t request; 7031 int index; 7032 7033 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 7034 7035 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n")); 7036 7037 ASSERT(seg->s_state != RSM_STATE_NEW && 7038 seg->s_state != RSM_STATE_NEW_QUIESCED); 7039 7040 request.rsmipc_key = seg->s_segid; 7041 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7042 7043 index = rsmhash(seg->s_segid); 7044 7045 token = importer_list.bucket[index]; 7046 7047 while (token != NULL) { 7048 if (seg->s_key == token->key) { 7049 request.rsmipc_segment_cookie = 7050 token->import_segment_cookie; 7051 (void) rsmipc_send(token->importing_node, 7052 &request, RSM_NO_REPLY); 7053 } 7054 token = token->next; 7055 } 7056 7057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7058 "importbell_ioctl done\n")); 7059 return (RSM_SUCCESS); 7060 } 7061 7062 static int 7063 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp, 7064 rsm_poll_event_t **eventspp, int mode) 7065 { 7066 rsm_poll_event_t *evlist = NULL; 7067 size_t evlistsz; 7068 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7069 7070 #ifdef _MULTI_DATAMODEL 7071 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7072 int i; 7073 rsm_consume_event_msg32_t cemsg32 = {0}; 7074 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7075 rsm_poll_event32_t *evlist32; 7076 size_t evlistsz32; 7077 7078 /* copyin the ioctl message */ 7079 if (ddi_copyin(arg, (caddr_t)&cemsg32, 7080 sizeof (rsm_consume_event_msg32_t), mode)) { 7081 DBG_PRINTF((category, RSM_ERR, 7082 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7083 return (RSMERR_BAD_ADDR); 7084 } 7085 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist; 7086 msgp->numents = (int)cemsg32.numents; 7087 7088 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents; 7089 /* 7090 * If numents is large alloc events list on heap otherwise 7091 * use the address of array that was passed in. 7092 */ 7093 if (msgp->numents > RSM_MAX_POLLFDS) { 7094 if (msgp->numents > max_segs) { /* validate numents */ 7095 DBG_PRINTF((category, RSM_ERR, 7096 "consumeevent_copyin: " 7097 "RSMERR_BAD_ARGS_ERRORS\n")); 7098 return (RSMERR_BAD_ARGS_ERRORS); 7099 } 7100 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7101 } else { 7102 evlist32 = event32; 7103 } 7104 7105 /* copyin the seglist into the rsm_poll_event32_t array */ 7106 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32, 7107 evlistsz32, mode)) { 7108 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7109 kmem_free(evlist32, evlistsz32); 7110 } 7111 DBG_PRINTF((category, RSM_ERR, 7112 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7113 return (RSMERR_BAD_ADDR); 7114 } 7115 7116 /* evlist and evlistsz are based on rsm_poll_event_t type */ 7117 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents; 7118 7119 if (msgp->numents > RSM_MAX_POLLFDS) { 7120 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7121 *eventspp = evlist; 7122 } else { 7123 evlist = *eventspp; 7124 } 7125 /* 7126 * copy the rsm_poll_event32_t array to the rsm_poll_event_t 7127 * array 7128 */ 7129 for (i = 0; i < msgp->numents; i++) { 7130 evlist[i].rnum = evlist32[i].rnum; 7131 evlist[i].fdsidx = evlist32[i].fdsidx; 7132 evlist[i].revent = evlist32[i].revent; 7133 } 7134 /* free the temp 32-bit event list */ 7135 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7136 kmem_free(evlist32, evlistsz32); 7137 } 7138 7139 return (RSM_SUCCESS); 7140 } 7141 #endif 7142 /* copyin the ioctl message */ 7143 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t), 7144 mode)) { 7145 DBG_PRINTF((category, RSM_ERR, 7146 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7147 return (RSMERR_BAD_ADDR); 7148 } 7149 /* 7150 * If numents is large alloc events list on heap otherwise 7151 * use the address of array that was passed in. 7152 */ 7153 if (msgp->numents > RSM_MAX_POLLFDS) { 7154 if (msgp->numents > max_segs) { /* validate numents */ 7155 DBG_PRINTF((category, RSM_ERR, 7156 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n")); 7157 return (RSMERR_BAD_ARGS_ERRORS); 7158 } 7159 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7160 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7161 *eventspp = evlist; 7162 } 7163 7164 /* copyin the seglist */ 7165 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp), 7166 sizeof (rsm_poll_event_t)*msgp->numents, mode)) { 7167 if (evlist) { 7168 kmem_free(evlist, evlistsz); 7169 *eventspp = NULL; 7170 } 7171 DBG_PRINTF((category, RSM_ERR, 7172 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7173 return (RSMERR_BAD_ADDR); 7174 } 7175 7176 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7177 "consumeevent_copyin done\n")); 7178 return (RSM_SUCCESS); 7179 } 7180 7181 static int 7182 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp, 7183 rsm_poll_event_t *eventsp, int mode) 7184 { 7185 size_t evlistsz; 7186 int err = RSM_SUCCESS; 7187 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7188 7189 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7190 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n", 7191 msgp->numents, eventsp)); 7192 7193 #ifdef _MULTI_DATAMODEL 7194 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7195 int i; 7196 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7197 rsm_poll_event32_t *evlist32; 7198 size_t evlistsz32; 7199 7200 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents; 7201 if (msgp->numents > RSM_MAX_POLLFDS) { 7202 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7203 } else { 7204 evlist32 = event32; 7205 } 7206 7207 /* 7208 * copy the rsm_poll_event_t array to the rsm_poll_event32_t 7209 * array 7210 */ 7211 for (i = 0; i < msgp->numents; i++) { 7212 evlist32[i].rnum = eventsp[i].rnum; 7213 evlist32[i].fdsidx = eventsp[i].fdsidx; 7214 evlist32[i].revent = eventsp[i].revent; 7215 } 7216 7217 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist, 7218 evlistsz32, mode)) { 7219 err = RSMERR_BAD_ADDR; 7220 } 7221 7222 if (msgp->numents > RSM_MAX_POLLFDS) { 7223 if (evlist32) { /* free the temp 32-bit event list */ 7224 kmem_free(evlist32, evlistsz32); 7225 } 7226 /* 7227 * eventsp and evlistsz are based on rsm_poll_event_t 7228 * type 7229 */ 7230 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7231 /* event list on the heap and needs to be freed here */ 7232 if (eventsp) { 7233 kmem_free(eventsp, evlistsz); 7234 } 7235 } 7236 7237 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7238 "consumeevent_copyout done: err=%d\n", err)); 7239 return (err); 7240 } 7241 #endif 7242 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7243 7244 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz, 7245 mode)) { 7246 err = RSMERR_BAD_ADDR; 7247 } 7248 7249 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) { 7250 /* event list on the heap and needs to be freed here */ 7251 kmem_free(eventsp, evlistsz); 7252 } 7253 7254 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7255 "consumeevent_copyout done: err=%d\n", err)); 7256 return (err); 7257 } 7258 7259 static int 7260 rsm_consumeevent_ioctl(caddr_t arg, int mode) 7261 { 7262 int rc; 7263 int i; 7264 minor_t rnum; 7265 rsm_consume_event_msg_t msg = {0}; 7266 rsmseg_t *seg; 7267 rsm_poll_event_t *event_list; 7268 rsm_poll_event_t events[RSM_MAX_POLLFDS]; 7269 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7270 7271 event_list = events; 7272 7273 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) != 7274 RSM_SUCCESS) { 7275 return (rc); 7276 } 7277 7278 for (i = 0; i < msg.numents; i++) { 7279 rnum = event_list[i].rnum; 7280 event_list[i].revent = 0; 7281 /* get the segment structure */ 7282 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 7283 if (seg) { 7284 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7285 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum, 7286 seg)); 7287 if (seg->s_pollevent) { 7288 /* consume the event */ 7289 atomic_add_32(&seg->s_pollevent, -1); 7290 event_list[i].revent = POLLRDNORM; 7291 } 7292 rsmseglock_release(seg); 7293 } 7294 } 7295 7296 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) != 7297 RSM_SUCCESS) { 7298 return (rc); 7299 } 7300 7301 return (RSM_SUCCESS); 7302 } 7303 7304 static int 7305 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode) 7306 { 7307 int size; 7308 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7309 7310 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n")); 7311 7312 #ifdef _MULTI_DATAMODEL 7313 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7314 rsmka_iovec32_t *iovec32, *iovec32_base; 7315 int i; 7316 7317 size = count * sizeof (rsmka_iovec32_t); 7318 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP); 7319 if (ddi_copyin((caddr_t)user_vec, 7320 (caddr_t)iovec32, size, mode)) { 7321 kmem_free(iovec32, size); 7322 DBG_PRINTF((category, RSM_DEBUG, 7323 "iovec_copyin: returning RSMERR_BAD_ADDR\n")); 7324 return (RSMERR_BAD_ADDR); 7325 } 7326 7327 for (i = 0; i < count; i++, iovec++, iovec32++) { 7328 iovec->io_type = (int)iovec32->io_type; 7329 if (iovec->io_type == RSM_HANDLE_TYPE) 7330 iovec->local.segid = (rsm_memseg_id_t) 7331 iovec32->local; 7332 else 7333 iovec->local.vaddr = 7334 (caddr_t)(uintptr_t)iovec32->local; 7335 iovec->local_offset = (size_t)iovec32->local_offset; 7336 iovec->remote_offset = (size_t)iovec32->remote_offset; 7337 iovec->transfer_len = (size_t)iovec32->transfer_len; 7338 7339 } 7340 kmem_free(iovec32_base, size); 7341 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7342 "iovec_copyin done\n")); 7343 return (DDI_SUCCESS); 7344 } 7345 #endif 7346 7347 size = count * sizeof (rsmka_iovec_t); 7348 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) { 7349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7350 "iovec_copyin done: RSMERR_BAD_ADDR\n")); 7351 return (RSMERR_BAD_ADDR); 7352 } 7353 7354 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n")); 7355 7356 return (DDI_SUCCESS); 7357 } 7358 7359 7360 static int 7361 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7362 { 7363 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7364 7365 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n")); 7366 7367 #ifdef _MULTI_DATAMODEL 7368 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7369 rsmka_scat_gath32_t sg_io32; 7370 7371 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32), 7372 mode)) { 7373 DBG_PRINTF((category, RSM_DEBUG, 7374 "sgio_copyin done: returning EFAULT\n")); 7375 return (RSMERR_BAD_ADDR); 7376 } 7377 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid; 7378 sg_io->io_request_count = (size_t)sg_io32.io_request_count; 7379 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count; 7380 sg_io->flags = (size_t)sg_io32.flags; 7381 sg_io->remote_handle = (rsm_memseg_import_handle_t) 7382 (uintptr_t)sg_io32.remote_handle; 7383 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec; 7384 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7385 "sgio_copyin done\n")); 7386 return (DDI_SUCCESS); 7387 } 7388 #endif 7389 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t), 7390 mode)) { 7391 DBG_PRINTF((category, RSM_DEBUG, 7392 "sgio_copyin done: returning EFAULT\n")); 7393 return (RSMERR_BAD_ADDR); 7394 } 7395 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n")); 7396 return (DDI_SUCCESS); 7397 } 7398 7399 static int 7400 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7401 { 7402 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7403 7404 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7405 "sgio_resid_copyout enter\n")); 7406 7407 #ifdef _MULTI_DATAMODEL 7408 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7409 rsmka_scat_gath32_t sg_io32; 7410 7411 sg_io32.io_residual_count = sg_io->io_residual_count; 7412 sg_io32.flags = sg_io->flags; 7413 7414 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count, 7415 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count, 7416 sizeof (uint32_t), mode)) { 7417 7418 DBG_PRINTF((category, RSM_ERR, 7419 "sgio_resid_copyout error: rescnt\n")); 7420 return (RSMERR_BAD_ADDR); 7421 } 7422 7423 if (ddi_copyout((caddr_t)&sg_io32.flags, 7424 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags, 7425 sizeof (uint32_t), mode)) { 7426 7427 DBG_PRINTF((category, RSM_ERR, 7428 "sgio_resid_copyout error: flags\n")); 7429 return (RSMERR_BAD_ADDR); 7430 } 7431 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7432 "sgio_resid_copyout done\n")); 7433 return (DDI_SUCCESS); 7434 } 7435 #endif 7436 if (ddi_copyout((caddr_t)&sg_io->io_residual_count, 7437 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count, 7438 sizeof (ulong_t), mode)) { 7439 7440 DBG_PRINTF((category, RSM_ERR, 7441 "sgio_resid_copyout error:rescnt\n")); 7442 return (RSMERR_BAD_ADDR); 7443 } 7444 7445 if (ddi_copyout((caddr_t)&sg_io->flags, 7446 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags, 7447 sizeof (uint_t), mode)) { 7448 7449 DBG_PRINTF((category, RSM_ERR, 7450 "sgio_resid_copyout error:flags\n")); 7451 return (RSMERR_BAD_ADDR); 7452 } 7453 7454 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n")); 7455 return (DDI_SUCCESS); 7456 } 7457 7458 7459 static int 7460 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp) 7461 { 7462 rsmka_scat_gath_t sg_io; 7463 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN]; 7464 rsmka_iovec_t *ka_iovec; 7465 rsmka_iovec_t *ka_iovec_start; 7466 rsmpi_scat_gath_t rsmpi_sg_io; 7467 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN]; 7468 rsmpi_iovec_t *iovec; 7469 rsmpi_iovec_t *iovec_start = NULL; 7470 rsmapi_access_entry_t *acl; 7471 rsmresource_t *res; 7472 minor_t rnum; 7473 rsmseg_t *im_seg, *ex_seg; 7474 int e; 7475 int error = 0; 7476 uint_t i; 7477 uint_t iov_proc = 0; /* num of iovecs processed */ 7478 size_t size = 0; 7479 size_t ka_size; 7480 7481 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7482 7483 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n")); 7484 7485 credp = credp; 7486 7487 /* 7488 * Copyin the scatter/gather structure and build new structure 7489 * for rsmpi. 7490 */ 7491 e = sgio_copyin(arg, &sg_io, mode); 7492 if (e != DDI_SUCCESS) { 7493 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7494 "rsm_iovec_ioctl done: sgio_copyin %d\n", e)); 7495 return (e); 7496 } 7497 7498 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) { 7499 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7500 "rsm_iovec_ioctl done: request_count(%d) too large\n", 7501 sg_io.io_request_count)); 7502 return (RSMERR_BAD_SGIO); 7503 } 7504 7505 rsmpi_sg_io.io_request_count = sg_io.io_request_count; 7506 rsmpi_sg_io.io_residual_count = sg_io.io_request_count; 7507 rsmpi_sg_io.io_segflg = 0; 7508 7509 /* Allocate memory and copyin io vector array */ 7510 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7511 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t); 7512 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP); 7513 } else { 7514 ka_iovec_start = ka_iovec = ka_iovec_arr; 7515 } 7516 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec, 7517 sg_io.io_request_count, mode); 7518 if (e != DDI_SUCCESS) { 7519 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7520 kmem_free(ka_iovec, ka_size); 7521 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7522 "rsm_iovec_ioctl done: iovec_copyin %d\n", e)); 7523 return (e); 7524 } 7525 7526 /* get the import segment descriptor */ 7527 rnum = getminor(dev); 7528 res = rsmresource_lookup(rnum, RSM_LOCK); 7529 7530 /* 7531 * The following sequence of locking may (or MAY NOT) cause a 7532 * deadlock but this is currently not addressed here since the 7533 * implementation will be changed to incorporate the use of 7534 * reference counting for both the import and the export segments. 7535 */ 7536 7537 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */ 7538 7539 im_seg = (rsmseg_t *)res; 7540 7541 if (im_seg == NULL) { 7542 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7543 kmem_free(ka_iovec, ka_size); 7544 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7545 "rsm_iovec_ioctl done: rsmresource_lookup failed\n")); 7546 return (EINVAL); 7547 } 7548 /* putv/getv supported is supported only on import segments */ 7549 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) { 7550 rsmseglock_release(im_seg); 7551 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7552 kmem_free(ka_iovec, ka_size); 7553 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7554 "rsm_iovec_ioctl done: not an import segment\n")); 7555 return (EINVAL); 7556 } 7557 7558 /* 7559 * wait for a remote DR to complete ie. for segments to get UNQUIESCED 7560 * as well as wait for a local DR to complete. 7561 */ 7562 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) || 7563 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) || 7564 (im_seg->s_flags & RSM_DR_INPROGRESS)) { 7565 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) { 7566 DBG_PRINTF((category, RSM_DEBUG, 7567 "rsm_iovec_ioctl done: cv_wait INTR")); 7568 rsmseglock_release(im_seg); 7569 return (RSMERR_INTERRUPTED); 7570 } 7571 } 7572 7573 if ((im_seg->s_state != RSM_STATE_CONNECT) && 7574 (im_seg->s_state != RSM_STATE_ACTIVE)) { 7575 7576 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT || 7577 im_seg->s_state == RSM_STATE_NEW); 7578 7579 DBG_PRINTF((category, RSM_DEBUG, 7580 "rsm_iovec_ioctl done: im_seg not conn/map")); 7581 rsmseglock_release(im_seg); 7582 e = RSMERR_BAD_SGIO; 7583 goto out; 7584 } 7585 7586 im_seg->s_rdmacnt++; 7587 rsmseglock_release(im_seg); 7588 7589 /* 7590 * Allocate and set up the io vector for rsmpi 7591 */ 7592 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7593 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t); 7594 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP); 7595 } else { 7596 iovec_start = iovec = iovec_arr; 7597 } 7598 7599 rsmpi_sg_io.iovec = iovec; 7600 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) { 7601 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7602 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7603 7604 if (ex_seg == NULL) { 7605 e = RSMERR_BAD_SGIO; 7606 break; 7607 } 7608 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7609 7610 acl = ex_seg->s_acl; 7611 if (acl[0].ae_permission == 0) { 7612 struct buf *xbuf; 7613 dev_t sdev = 0; 7614 7615 xbuf = ddi_umem_iosetup(ex_seg->s_cookie, 7616 0, ex_seg->s_len, B_WRITE, 7617 sdev, 0, NULL, DDI_UMEM_SLEEP); 7618 7619 ASSERT(xbuf != NULL); 7620 7621 iovec->local_mem.ms_type = RSM_MEM_BUF; 7622 iovec->local_mem.ms_memory.bp = xbuf; 7623 } else { 7624 iovec->local_mem.ms_type = RSM_MEM_HANDLE; 7625 iovec->local_mem.ms_memory.handle = 7626 ex_seg->s_handle.out; 7627 } 7628 ex_seg->s_rdmacnt++; /* refcnt the handle */ 7629 rsmseglock_release(ex_seg); 7630 } else { 7631 iovec->local_mem.ms_type = RSM_MEM_VADDR; 7632 iovec->local_mem.ms_memory.vr.vaddr = 7633 ka_iovec->local.vaddr; 7634 } 7635 7636 iovec->local_offset = ka_iovec->local_offset; 7637 iovec->remote_handle = im_seg->s_handle.in; 7638 iovec->remote_offset = ka_iovec->remote_offset; 7639 iovec->transfer_length = ka_iovec->transfer_len; 7640 iovec++; 7641 ka_iovec++; 7642 } 7643 7644 if (iov_proc < sg_io.io_request_count) { 7645 /* error while processing handle */ 7646 rsmseglock_acquire(im_seg); 7647 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */ 7648 if (im_seg->s_rdmacnt == 0) { 7649 cv_broadcast(&im_seg->s_cv); 7650 } 7651 rsmseglock_release(im_seg); 7652 goto out; 7653 } 7654 7655 /* call rsmpi */ 7656 if (cmd == RSM_IOCTL_PUTV) 7657 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv( 7658 im_seg->s_adapter->rsmpi_handle, 7659 &rsmpi_sg_io); 7660 else if (cmd == RSM_IOCTL_GETV) 7661 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv( 7662 im_seg->s_adapter->rsmpi_handle, 7663 &rsmpi_sg_io); 7664 else { 7665 e = EINVAL; 7666 DBG_PRINTF((category, RSM_DEBUG, 7667 "iovec_ioctl: bad command = %x\n", cmd)); 7668 } 7669 7670 7671 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7672 "rsm_iovec_ioctl RSMPI oper done %d\n", e)); 7673 7674 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count; 7675 7676 /* 7677 * Check for implicit signal post flag and do the signal 7678 * post if needed 7679 */ 7680 if (sg_io.flags & RSM_IMPLICIT_SIGPOST && 7681 e == RSM_SUCCESS) { 7682 rsmipc_request_t request; 7683 7684 request.rsmipc_key = im_seg->s_segid; 7685 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7686 request.rsmipc_segment_cookie = NULL; 7687 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY); 7688 /* 7689 * Reset the implicit signal post flag to 0 to indicate 7690 * that the signal post has been done and need not be 7691 * done in the RSMAPI library 7692 */ 7693 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST; 7694 } 7695 7696 rsmseglock_acquire(im_seg); 7697 im_seg->s_rdmacnt--; 7698 if (im_seg->s_rdmacnt == 0) { 7699 cv_broadcast(&im_seg->s_cv); 7700 } 7701 rsmseglock_release(im_seg); 7702 error = sgio_resid_copyout(arg, &sg_io, mode); 7703 out: 7704 iovec = iovec_start; 7705 ka_iovec = ka_iovec_start; 7706 for (i = 0; i < iov_proc; i++) { 7707 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7708 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7709 7710 ASSERT(ex_seg != NULL); 7711 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7712 7713 ex_seg->s_rdmacnt--; /* unrefcnt the handle */ 7714 if (ex_seg->s_rdmacnt == 0) { 7715 cv_broadcast(&ex_seg->s_cv); 7716 } 7717 rsmseglock_release(ex_seg); 7718 } 7719 7720 ASSERT(iovec != NULL); /* true if iov_proc > 0 */ 7721 7722 /* 7723 * At present there is no dependency on the existence of xbufs 7724 * created by ddi_umem_iosetup for each of the iovecs. So we 7725 * can these xbufs here. 7726 */ 7727 if (iovec->local_mem.ms_type == RSM_MEM_BUF) { 7728 freerbuf(iovec->local_mem.ms_memory.bp); 7729 } 7730 7731 iovec++; 7732 ka_iovec++; 7733 } 7734 7735 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7736 if (iovec_start) 7737 kmem_free(iovec_start, size); 7738 kmem_free(ka_iovec_start, ka_size); 7739 } 7740 7741 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7742 "rsm_iovec_ioctl done %d\n", e)); 7743 /* if RSMPI call fails return that else return copyout's retval */ 7744 return ((e != RSM_SUCCESS) ? e : error); 7745 7746 } 7747 7748 7749 static int 7750 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode) 7751 { 7752 adapter_t *adapter; 7753 rsm_addr_t addr; 7754 rsm_node_id_t node; 7755 int rval = DDI_SUCCESS; 7756 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7757 7758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n")); 7759 7760 adapter = rsm_getadapter(msg, mode); 7761 if (adapter == NULL) { 7762 DBG_PRINTF((category, RSM_DEBUG, 7763 "rsmaddr_ioctl done: adapter not found\n")); 7764 return (RSMERR_CTLR_NOT_PRESENT); 7765 } 7766 7767 switch (cmd) { 7768 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */ 7769 /* returns the hwaddr in msg->hwaddr */ 7770 if (msg->nodeid == my_nodeid) { 7771 msg->hwaddr = adapter->hwaddr; 7772 } else { 7773 addr = get_remote_hwaddr(adapter, msg->nodeid); 7774 if ((int64_t)addr < 0) { 7775 rval = RSMERR_INTERNAL_ERROR; 7776 } else { 7777 msg->hwaddr = addr; 7778 } 7779 } 7780 break; 7781 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */ 7782 /* returns the nodeid in msg->nodeid */ 7783 if (msg->hwaddr == adapter->hwaddr) { 7784 msg->nodeid = my_nodeid; 7785 } else { 7786 node = get_remote_nodeid(adapter, msg->hwaddr); 7787 if ((int)node < 0) { 7788 rval = RSMERR_INTERNAL_ERROR; 7789 } else { 7790 msg->nodeid = (rsm_node_id_t)node; 7791 } 7792 } 7793 break; 7794 default: 7795 rval = EINVAL; 7796 break; 7797 } 7798 7799 rsmka_release_adapter(adapter); 7800 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7801 "rsmaddr_ioctl done: %d\n", rval)); 7802 return (rval); 7803 } 7804 7805 static int 7806 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode) 7807 { 7808 DBG_DEFINE(category, 7809 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7810 7811 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n")); 7812 7813 #ifdef _MULTI_DATAMODEL 7814 7815 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7816 rsm_ioctlmsg32_t msg32; 7817 int i; 7818 7819 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) { 7820 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7821 "rsm_ddi_copyin done: EFAULT\n")); 7822 return (RSMERR_BAD_ADDR); 7823 } 7824 msg->len = msg32.len; 7825 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr; 7826 msg->arg = (caddr_t)(uintptr_t)msg32.arg; 7827 msg->key = msg32.key; 7828 msg->acl_len = msg32.acl_len; 7829 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl; 7830 msg->cnum = msg32.cnum; 7831 msg->cname = (caddr_t)(uintptr_t)msg32.cname; 7832 msg->cname_len = msg32.cname_len; 7833 msg->nodeid = msg32.nodeid; 7834 msg->hwaddr = msg32.hwaddr; 7835 msg->perm = msg32.perm; 7836 for (i = 0; i < 4; i++) { 7837 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64; 7838 } 7839 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7840 "rsm_ddi_copyin done\n")); 7841 return (RSM_SUCCESS); 7842 } 7843 #endif 7844 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n")); 7845 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode)) 7846 return (RSMERR_BAD_ADDR); 7847 else 7848 return (RSM_SUCCESS); 7849 } 7850 7851 static int 7852 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode) 7853 { 7854 rsmka_int_controller_attr_t rsm_cattr; 7855 DBG_DEFINE(category, 7856 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7857 7858 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7859 "rsmattr_ddi_copyout enter\n")); 7860 /* 7861 * need to copy appropriate data from rsm_controller_attr_t 7862 * to rsmka_int_controller_attr_t 7863 */ 7864 #ifdef _MULTI_DATAMODEL 7865 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7866 rsmka_int_controller_attr32_t rsm_cattr32; 7867 7868 rsm_cattr32.attr_direct_access_sizes = 7869 adapter->rsm_attr.attr_direct_access_sizes; 7870 rsm_cattr32.attr_atomic_sizes = 7871 adapter->rsm_attr.attr_atomic_sizes; 7872 rsm_cattr32.attr_page_size = 7873 adapter->rsm_attr.attr_page_size; 7874 if (adapter->rsm_attr.attr_max_export_segment_size > 7875 UINT_MAX) 7876 rsm_cattr32.attr_max_export_segment_size = 7877 RSM_MAXSZ_PAGE_ALIGNED; 7878 else 7879 rsm_cattr32.attr_max_export_segment_size = 7880 adapter->rsm_attr.attr_max_export_segment_size; 7881 if (adapter->rsm_attr.attr_tot_export_segment_size > 7882 UINT_MAX) 7883 rsm_cattr32.attr_tot_export_segment_size = 7884 RSM_MAXSZ_PAGE_ALIGNED; 7885 else 7886 rsm_cattr32.attr_tot_export_segment_size = 7887 adapter->rsm_attr.attr_tot_export_segment_size; 7888 if (adapter->rsm_attr.attr_max_export_segments > 7889 UINT_MAX) 7890 rsm_cattr32.attr_max_export_segments = 7891 UINT_MAX; 7892 else 7893 rsm_cattr32.attr_max_export_segments = 7894 adapter->rsm_attr.attr_max_export_segments; 7895 if (adapter->rsm_attr.attr_max_import_map_size > 7896 UINT_MAX) 7897 rsm_cattr32.attr_max_import_map_size = 7898 RSM_MAXSZ_PAGE_ALIGNED; 7899 else 7900 rsm_cattr32.attr_max_import_map_size = 7901 adapter->rsm_attr.attr_max_import_map_size; 7902 if (adapter->rsm_attr.attr_tot_import_map_size > 7903 UINT_MAX) 7904 rsm_cattr32.attr_tot_import_map_size = 7905 RSM_MAXSZ_PAGE_ALIGNED; 7906 else 7907 rsm_cattr32.attr_tot_import_map_size = 7908 adapter->rsm_attr.attr_tot_import_map_size; 7909 if (adapter->rsm_attr.attr_max_import_segments > 7910 UINT_MAX) 7911 rsm_cattr32.attr_max_import_segments = 7912 UINT_MAX; 7913 else 7914 rsm_cattr32.attr_max_import_segments = 7915 adapter->rsm_attr.attr_max_import_segments; 7916 rsm_cattr32.attr_controller_addr = 7917 adapter->rsm_attr.attr_controller_addr; 7918 7919 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7920 "rsmattr_ddi_copyout done\n")); 7921 if (ddi_copyout((caddr_t)&rsm_cattr32, arg, 7922 sizeof (rsmka_int_controller_attr32_t), mode)) { 7923 return (RSMERR_BAD_ADDR); 7924 } 7925 else 7926 return (RSM_SUCCESS); 7927 } 7928 #endif 7929 rsm_cattr.attr_direct_access_sizes = 7930 adapter->rsm_attr.attr_direct_access_sizes; 7931 rsm_cattr.attr_atomic_sizes = 7932 adapter->rsm_attr.attr_atomic_sizes; 7933 rsm_cattr.attr_page_size = 7934 adapter->rsm_attr.attr_page_size; 7935 rsm_cattr.attr_max_export_segment_size = 7936 adapter->rsm_attr.attr_max_export_segment_size; 7937 rsm_cattr.attr_tot_export_segment_size = 7938 adapter->rsm_attr.attr_tot_export_segment_size; 7939 rsm_cattr.attr_max_export_segments = 7940 adapter->rsm_attr.attr_max_export_segments; 7941 rsm_cattr.attr_max_import_map_size = 7942 adapter->rsm_attr.attr_max_import_map_size; 7943 rsm_cattr.attr_tot_import_map_size = 7944 adapter->rsm_attr.attr_tot_import_map_size; 7945 rsm_cattr.attr_max_import_segments = 7946 adapter->rsm_attr.attr_max_import_segments; 7947 rsm_cattr.attr_controller_addr = 7948 adapter->rsm_attr.attr_controller_addr; 7949 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7950 "rsmattr_ddi_copyout done\n")); 7951 if (ddi_copyout((caddr_t)&rsm_cattr, arg, 7952 sizeof (rsmka_int_controller_attr_t), mode)) { 7953 return (RSMERR_BAD_ADDR); 7954 } 7955 else 7956 return (RSM_SUCCESS); 7957 } 7958 7959 /*ARGSUSED*/ 7960 static int 7961 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7962 int *rvalp) 7963 { 7964 rsmseg_t *seg; 7965 rsmresource_t *res; 7966 minor_t rnum; 7967 rsm_ioctlmsg_t msg = {0}; 7968 int error; 7969 adapter_t *adapter; 7970 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7971 7972 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n")); 7973 7974 if (cmd == RSM_IOCTL_CONSUMEEVENT) { 7975 error = rsm_consumeevent_ioctl((caddr_t)arg, mode); 7976 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7977 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error)); 7978 return (error); 7979 } 7980 7981 /* topology cmd does not use the arg common to other cmds */ 7982 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) { 7983 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode); 7984 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7985 "rsm_ioctl done: %d\n", error)); 7986 return (error); 7987 } 7988 7989 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) { 7990 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp); 7991 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7992 "rsm_ioctl done: %d\n", error)); 7993 return (error); 7994 } 7995 7996 /* 7997 * try to load arguments 7998 */ 7999 if (cmd != RSM_IOCTL_RING_BELL && 8000 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) { 8001 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8002 "rsm_ioctl done: EFAULT\n")); 8003 return (RSMERR_BAD_ADDR); 8004 } 8005 8006 if (cmd == RSM_IOCTL_ATTR) { 8007 adapter = rsm_getadapter(&msg, mode); 8008 if (adapter == NULL) { 8009 DBG_PRINTF((category, RSM_DEBUG, 8010 "rsm_ioctl done: ENODEV\n")); 8011 return (RSMERR_CTLR_NOT_PRESENT); 8012 } 8013 error = rsmattr_ddi_copyout(adapter, msg.arg, mode); 8014 rsmka_release_adapter(adapter); 8015 DBG_PRINTF((category, RSM_DEBUG, 8016 "rsm_ioctl:after copyout %d\n", error)); 8017 return (error); 8018 } 8019 8020 if (cmd == RSM_IOCTL_BAR_INFO) { 8021 /* Return library off,len of barrier page */ 8022 msg.off = barrier_offset; 8023 msg.len = (int)barrier_size; 8024 #ifdef _MULTI_DATAMODEL 8025 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8026 rsm_ioctlmsg32_t msg32; 8027 8028 if (msg.len > UINT_MAX) 8029 msg.len = RSM_MAXSZ_PAGE_ALIGNED; 8030 else 8031 msg32.len = (int32_t)msg.len; 8032 msg32.off = (int32_t)msg.off; 8033 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8034 "rsm_ioctl done\n")); 8035 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8036 sizeof (msg32), mode)) 8037 return (RSMERR_BAD_ADDR); 8038 else 8039 return (RSM_SUCCESS); 8040 } 8041 #endif 8042 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8043 "rsm_ioctl done\n")); 8044 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8045 sizeof (msg), mode)) 8046 return (RSMERR_BAD_ADDR); 8047 else 8048 return (RSM_SUCCESS); 8049 } 8050 8051 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) { 8052 /* map the nodeid or hwaddr */ 8053 error = rsmaddr_ioctl(cmd, &msg, mode); 8054 if (error == RSM_SUCCESS) { 8055 #ifdef _MULTI_DATAMODEL 8056 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8057 rsm_ioctlmsg32_t msg32; 8058 8059 msg32.hwaddr = (uint64_t)msg.hwaddr; 8060 msg32.nodeid = (uint32_t)msg.nodeid; 8061 8062 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8063 "rsm_ioctl done\n")); 8064 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8065 sizeof (msg32), mode)) 8066 return (RSMERR_BAD_ADDR); 8067 else 8068 return (RSM_SUCCESS); 8069 } 8070 #endif 8071 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8072 "rsm_ioctl done\n")); 8073 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8074 sizeof (msg), mode)) 8075 return (RSMERR_BAD_ADDR); 8076 else 8077 return (RSM_SUCCESS); 8078 } 8079 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8080 "rsm_ioctl done: %d\n", error)); 8081 return (error); 8082 } 8083 8084 /* Find resource and look it in read mode */ 8085 rnum = getminor(dev); 8086 res = rsmresource_lookup(rnum, RSM_NOLOCK); 8087 ASSERT(res != NULL); 8088 8089 /* 8090 * Find command group 8091 */ 8092 switch (RSM_IOCTL_CMDGRP(cmd)) { 8093 case RSM_IOCTL_EXPORT_SEG: 8094 /* 8095 * Export list is searched during publish, loopback and 8096 * remote lookup call. 8097 */ 8098 seg = rsmresource_seg(res, rnum, credp, 8099 RSM_RESOURCE_EXPORT_SEGMENT); 8100 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) { 8101 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode, 8102 credp); 8103 } else { /* export ioctl on an import/barrier resource */ 8104 error = RSMERR_BAD_SEG_HNDL; 8105 } 8106 break; 8107 case RSM_IOCTL_IMPORT_SEG: 8108 /* Import list is searched during remote unmap call. */ 8109 seg = rsmresource_seg(res, rnum, credp, 8110 RSM_RESOURCE_IMPORT_SEGMENT); 8111 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8112 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode, 8113 credp); 8114 } else { /* import ioctl on an export/barrier resource */ 8115 error = RSMERR_BAD_SEG_HNDL; 8116 } 8117 break; 8118 case RSM_IOCTL_BAR: 8119 if (res != RSMRC_RESERVED && 8120 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8121 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg, 8122 mode); 8123 } else { /* invalid res value */ 8124 error = RSMERR_BAD_SEG_HNDL; 8125 } 8126 break; 8127 case RSM_IOCTL_BELL: 8128 if (res != RSMRC_RESERVED) { 8129 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) 8130 error = exportbell_ioctl((rsmseg_t *)res, cmd); 8131 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT) 8132 error = importbell_ioctl((rsmseg_t *)res, cmd); 8133 else /* RSM_RESOURCE_BAR */ 8134 error = RSMERR_BAD_SEG_HNDL; 8135 } else { /* invalid res value */ 8136 error = RSMERR_BAD_SEG_HNDL; 8137 } 8138 break; 8139 default: 8140 error = EINVAL; 8141 } 8142 8143 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n", 8144 error)); 8145 return (error); 8146 } 8147 8148 8149 /* **************************** Segment Mapping Operations ********* */ 8150 static rsm_mapinfo_t * 8151 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset, 8152 size_t *map_len) 8153 { 8154 rsm_mapinfo_t *p; 8155 /* 8156 * Find the correct mapinfo structure to use during the mapping 8157 * from the seg->s_mapinfo list. 8158 * The seg->s_mapinfo list contains in reverse order the mappings 8159 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to 8160 * access the correct entry within this list for the mapping 8161 * requested. 8162 * 8163 * The algorithm for selecting a list entry is as follows: 8164 * 8165 * When start_offset of an entry <= off we have found the entry 8166 * we were looking for. Adjust the dev_offset and map_len (needs 8167 * to be PAGESIZE aligned). 8168 */ 8169 p = seg->s_mapinfo; 8170 for (; p; p = p->next) { 8171 if (p->start_offset <= off) { 8172 *dev_offset = p->dev_offset + off - p->start_offset; 8173 *map_len = (len > p->individual_len) ? 8174 p->individual_len : ptob(btopr(len)); 8175 return (p); 8176 } 8177 p = p->next; 8178 } 8179 8180 return (NULL); 8181 } 8182 8183 static void 8184 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo) 8185 { 8186 rsm_mapinfo_t *p; 8187 8188 while (mapinfo != NULL) { 8189 p = mapinfo; 8190 mapinfo = mapinfo->next; 8191 kmem_free(p, sizeof (*p)); 8192 } 8193 } 8194 8195 static int 8196 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 8197 size_t len, void **pvtp) 8198 { 8199 rsmcookie_t *p; 8200 rsmresource_t *res; 8201 rsmseg_t *seg; 8202 minor_t rnum; 8203 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8204 8205 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n")); 8206 8207 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8208 "rsmmap_map: dhp = %x\n", dhp)); 8209 8210 flags = flags; 8211 8212 rnum = getminor(dev); 8213 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8214 ASSERT(res != NULL); 8215 8216 seg = (rsmseg_t *)res; 8217 8218 rsmseglock_acquire(seg); 8219 8220 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8221 8222 /* 8223 * Allocate structure and add cookie to segment list 8224 */ 8225 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8226 8227 p->c_dhp = dhp; 8228 p->c_off = off; 8229 p->c_len = len; 8230 p->c_next = seg->s_ckl; 8231 seg->s_ckl = p; 8232 8233 *pvtp = (void *)seg; 8234 8235 rsmseglock_release(seg); 8236 8237 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n")); 8238 return (DDI_SUCCESS); 8239 } 8240 8241 /* 8242 * Page fault handling is done here. The prerequisite mapping setup 8243 * has been done in rsm_devmap with calls to ddi_devmem_setup or 8244 * ddi_umem_setup 8245 */ 8246 static int 8247 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len, 8248 uint_t type, uint_t rw) 8249 { 8250 int e; 8251 rsmseg_t *seg = (rsmseg_t *)pvt; 8252 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8253 8254 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n")); 8255 8256 rsmseglock_acquire(seg); 8257 8258 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8259 8260 while (seg->s_state == RSM_STATE_MAP_QUIESCE) { 8261 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8262 DBG_PRINTF((category, RSM_DEBUG, 8263 "rsmmap_access done: cv_wait INTR")); 8264 rsmseglock_release(seg); 8265 return (RSMERR_INTERRUPTED); 8266 } 8267 } 8268 8269 ASSERT(seg->s_state == RSM_STATE_DISCONNECT || 8270 seg->s_state == RSM_STATE_ACTIVE); 8271 8272 if (seg->s_state == RSM_STATE_DISCONNECT) 8273 seg->s_flags |= RSM_IMPORT_DUMMY; 8274 8275 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8276 "rsmmap_access: dhp = %x\n", dhp)); 8277 8278 rsmseglock_release(seg); 8279 8280 if (e = devmap_load(dhp, offset, len, type, rw)) { 8281 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n")); 8282 } 8283 8284 8285 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n")); 8286 8287 return (e); 8288 } 8289 8290 static int 8291 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 8292 void **newpvt) 8293 { 8294 rsmseg_t *seg = (rsmseg_t *)oldpvt; 8295 rsmcookie_t *p, *old; 8296 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8297 8298 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n")); 8299 8300 /* 8301 * Same as map, create an entry to hold cookie and add it to 8302 * connect segment list. The oldpvt is a pointer to segment. 8303 * Return segment pointer in newpvt. 8304 */ 8305 rsmseglock_acquire(seg); 8306 8307 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8308 8309 /* 8310 * Find old cookie 8311 */ 8312 for (old = seg->s_ckl; old != NULL; old = old->c_next) { 8313 if (old->c_dhp == dhp) { 8314 break; 8315 } 8316 } 8317 if (old == NULL) { 8318 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8319 "rsmmap_dup done: EINVAL\n")); 8320 rsmseglock_release(seg); 8321 return (EINVAL); 8322 } 8323 8324 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8325 8326 p->c_dhp = new_dhp; 8327 p->c_off = old->c_off; 8328 p->c_len = old->c_len; 8329 p->c_next = seg->s_ckl; 8330 seg->s_ckl = p; 8331 8332 *newpvt = (void *)seg; 8333 8334 rsmseglock_release(seg); 8335 8336 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n")); 8337 8338 return (DDI_SUCCESS); 8339 } 8340 8341 static void 8342 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 8343 devmap_cookie_t new_dhp1, void **pvtp1, 8344 devmap_cookie_t new_dhp2, void **pvtp2) 8345 { 8346 /* 8347 * Remove pvtp structure from segment list. 8348 */ 8349 rsmseg_t *seg = (rsmseg_t *)pvtp; 8350 int freeflag; 8351 8352 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8353 8354 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n")); 8355 8356 off = off; len = len; 8357 pvtp1 = pvtp1; pvtp2 = pvtp2; 8358 8359 rsmseglock_acquire(seg); 8360 8361 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8362 8363 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8364 "rsmmap_unmap: dhp = %x\n", dhp)); 8365 /* 8366 * We can go ahead and remove the dhps even if we are in 8367 * the MAPPING state because the dhps being removed here 8368 * belong to a different mmap and we are holding the segment 8369 * lock. 8370 */ 8371 if (new_dhp1 == NULL && new_dhp2 == NULL) { 8372 /* find and remove dhp handle */ 8373 rsmcookie_t *tmp, **back = &seg->s_ckl; 8374 8375 while (*back != NULL) { 8376 tmp = *back; 8377 if (tmp->c_dhp == dhp) { 8378 *back = tmp->c_next; 8379 kmem_free(tmp, sizeof (*tmp)); 8380 break; 8381 } 8382 back = &tmp->c_next; 8383 } 8384 } else { 8385 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8386 "rsmmap_unmap:parital unmap" 8387 "new_dhp1 %lx, new_dhp2 %lx\n", 8388 (size_t)new_dhp1, (size_t)new_dhp2)); 8389 } 8390 8391 /* 8392 * rsmmap_unmap is called for each mapping cookie on the list. 8393 * When the list becomes empty and we are not in the MAPPING 8394 * state then unmap in the rsmpi driver. 8395 */ 8396 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING)) 8397 (void) rsm_unmap(seg); 8398 8399 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) { 8400 freeflag = 1; 8401 } else { 8402 freeflag = 0; 8403 } 8404 8405 rsmseglock_release(seg); 8406 8407 if (freeflag) { 8408 /* Free the segment structure */ 8409 rsmseg_free(seg); 8410 } 8411 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n")); 8412 8413 } 8414 8415 static struct devmap_callback_ctl rsmmap_ops = { 8416 DEVMAP_OPS_REV, /* devmap_ops version number */ 8417 rsmmap_map, /* devmap_ops map routine */ 8418 rsmmap_access, /* devmap_ops access routine */ 8419 rsmmap_dup, /* devmap_ops dup routine */ 8420 rsmmap_unmap, /* devmap_ops unmap routine */ 8421 }; 8422 8423 static int 8424 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len, 8425 size_t *maplen, uint_t model /*ARGSUSED*/) 8426 { 8427 struct devmap_callback_ctl *callbackops = &rsmmap_ops; 8428 int err; 8429 uint_t maxprot; 8430 minor_t rnum; 8431 rsmseg_t *seg; 8432 off_t dev_offset; 8433 size_t cur_len; 8434 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8435 8436 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n")); 8437 8438 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8439 "rsm_devmap: off = %lx, len = %lx\n", off, len)); 8440 rnum = getminor(dev); 8441 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8442 ASSERT(seg != NULL); 8443 8444 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8445 if ((off == barrier_offset) && 8446 (len == barrier_size)) { 8447 8448 ASSERT(bar_va != NULL && bar_cookie != NULL); 8449 8450 /* 8451 * The offset argument in devmap_umem_setup represents 8452 * the offset within the kernel memory defined by the 8453 * cookie. We use this offset as barrier_offset. 8454 */ 8455 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie, 8456 barrier_offset, len, PROT_USER|PROT_READ, 8457 DEVMAP_DEFAULTS, 0); 8458 8459 if (err != 0) { 8460 DBG_PRINTF((category, RSM_ERR, 8461 "rsm_devmap done: %d\n", err)); 8462 return (RSMERR_MAP_FAILED); 8463 } 8464 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8465 "rsm_devmap done: %d\n", err)); 8466 8467 *maplen = barrier_size; 8468 8469 return (err); 8470 } else { 8471 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8472 "rsm_devmap done: %d\n", err)); 8473 return (RSMERR_MAP_FAILED); 8474 } 8475 } 8476 8477 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8478 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8479 8480 /* 8481 * Make sure we still have permission for the map operation. 8482 */ 8483 maxprot = PROT_USER; 8484 if (seg->s_mode & RSM_PERM_READ) { 8485 maxprot |= PROT_READ; 8486 } 8487 8488 if (seg->s_mode & RSM_PERM_WRITE) { 8489 maxprot |= PROT_WRITE; 8490 } 8491 8492 /* 8493 * For each devmap call, rsmmap_map is called. This maintains driver 8494 * private information for the mapping. Thus, if there are multiple 8495 * devmap calls there will be multiple rsmmap_map calls and for each 8496 * call, the mapping information will be stored. 8497 * In case of an error during the processing of the devmap call, error 8498 * will be returned. This error return causes the caller of rsm_devmap 8499 * to undo all the mappings by calling rsmmap_unmap for each one. 8500 * rsmmap_unmap will free up the private information for the requested 8501 * mapping. 8502 */ 8503 if (seg->s_node != my_nodeid) { 8504 rsm_mapinfo_t *p; 8505 8506 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len); 8507 if (p == NULL) { 8508 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8509 "rsm_devmap: incorrect mapping info\n")); 8510 return (RSMERR_MAP_FAILED); 8511 } 8512 err = devmap_devmem_setup(dhc, p->dip, 8513 callbackops, p->dev_register, 8514 dev_offset, cur_len, maxprot, 8515 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0); 8516 8517 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8518 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx," 8519 "off=%lx,len=%lx\n", 8520 p->dip, p->dev_register, dev_offset, off, cur_len)); 8521 8522 if (err != 0) { 8523 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8524 "rsm_devmap: devmap_devmem_setup failed %d\n", 8525 err)); 8526 return (RSMERR_MAP_FAILED); 8527 } 8528 /* cur_len is always an integral multiple pagesize */ 8529 ASSERT((cur_len & (PAGESIZE-1)) == 0); 8530 *maplen = cur_len; 8531 return (err); 8532 8533 } else { 8534 err = devmap_umem_setup(dhc, rsm_dip, callbackops, 8535 seg->s_cookie, off, len, maxprot, 8536 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0); 8537 if (err != 0) { 8538 DBG_PRINTF((category, RSM_DEBUG, 8539 "rsm_devmap: devmap_umem_setup failed %d\n", 8540 err)); 8541 return (RSMERR_MAP_FAILED); 8542 } 8543 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8544 "rsm_devmap: loopback done\n")); 8545 8546 *maplen = ptob(btopr(len)); 8547 8548 return (err); 8549 } 8550 } 8551 8552 /* 8553 * We can use the devmap framework for mapping device memory to user space by 8554 * specifying this routine in the rsm_cb_ops structure. The kernel mmap 8555 * processing calls this entry point and devmap_setup is called within this 8556 * function, which eventually calls rsm_devmap 8557 */ 8558 static int 8559 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 8560 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 8561 { 8562 int error = 0; 8563 int old_state; 8564 minor_t rnum; 8565 rsmseg_t *seg, *eseg; 8566 adapter_t *adapter; 8567 rsm_import_share_t *sharedp; 8568 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8569 8570 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n")); 8571 8572 /* 8573 * find segment 8574 */ 8575 rnum = getminor(dev); 8576 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 8577 8578 if (seg == NULL) { 8579 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8580 "rsm_segmap done: invalid segment\n")); 8581 return (EINVAL); 8582 } 8583 8584 /* 8585 * the user is trying to map a resource that has not been 8586 * defined yet. The library uses this to map in the 8587 * barrier page. 8588 */ 8589 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8590 rsmseglock_release(seg); 8591 8592 /* 8593 * The mapping for the barrier page is identified 8594 * by the special offset barrier_offset 8595 */ 8596 8597 if (off == (off_t)barrier_offset || 8598 len == (off_t)barrier_size) { 8599 if (bar_cookie == NULL || bar_va == NULL) { 8600 DBG_PRINTF((category, RSM_DEBUG, 8601 "rsm_segmap: bar cookie/va is NULL\n")); 8602 return (EINVAL); 8603 } 8604 8605 error = devmap_setup(dev, (offset_t)off, as, addrp, 8606 (size_t)len, prot, maxprot, flags, cred); 8607 8608 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8609 "rsm_segmap done: %d\n", error)); 8610 return (error); 8611 } else { 8612 DBG_PRINTF((category, RSM_DEBUG, 8613 "rsm_segmap: bad offset/length\n")); 8614 return (EINVAL); 8615 } 8616 } 8617 8618 /* Make sure you can only map imported segments */ 8619 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) { 8620 rsmseglock_release(seg); 8621 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8622 "rsm_segmap done: not an import segment\n")); 8623 return (EINVAL); 8624 } 8625 /* check means library is broken */ 8626 ASSERT(seg->s_hdr.rsmrc_num == rnum); 8627 8628 /* wait for the segment to become unquiesced */ 8629 while (seg->s_state == RSM_STATE_CONN_QUIESCE) { 8630 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8631 rsmseglock_release(seg); 8632 DBG_PRINTF((category, RSM_DEBUG, 8633 "rsm_segmap done: cv_wait INTR")); 8634 return (ENODEV); 8635 } 8636 } 8637 8638 /* wait until segment leaves the mapping state */ 8639 while (seg->s_state == RSM_STATE_MAPPING) 8640 cv_wait(&seg->s_cv, &seg->s_lock); 8641 8642 /* 8643 * we allow multiple maps of the same segment in the KA 8644 * and it works because we do an rsmpi map of the whole 8645 * segment during the first map and all the device mapping 8646 * information needed in rsm_devmap is in the mapinfo list. 8647 */ 8648 if ((seg->s_state != RSM_STATE_CONNECT) && 8649 (seg->s_state != RSM_STATE_ACTIVE)) { 8650 rsmseglock_release(seg); 8651 DBG_PRINTF((category, RSM_DEBUG, 8652 "rsm_segmap done: segment not connected\n")); 8653 return (ENODEV); 8654 } 8655 8656 /* 8657 * Make sure we are not mapping a larger segment than what's 8658 * exported 8659 */ 8660 if ((size_t)off + ptob(btopr(len)) > seg->s_len) { 8661 rsmseglock_release(seg); 8662 DBG_PRINTF((category, RSM_DEBUG, 8663 "rsm_segmap done: off+len>seg size\n")); 8664 return (ENXIO); 8665 } 8666 8667 /* 8668 * Make sure we still have permission for the map operation. 8669 */ 8670 maxprot = PROT_USER; 8671 if (seg->s_mode & RSM_PERM_READ) { 8672 maxprot |= PROT_READ; 8673 } 8674 8675 if (seg->s_mode & RSM_PERM_WRITE) { 8676 maxprot |= PROT_WRITE; 8677 } 8678 8679 if ((prot & maxprot) != prot) { 8680 /* No permission */ 8681 rsmseglock_release(seg); 8682 DBG_PRINTF((category, RSM_DEBUG, 8683 "rsm_segmap done: no permission\n")); 8684 return (EACCES); 8685 } 8686 8687 old_state = seg->s_state; 8688 8689 ASSERT(seg->s_share != NULL); 8690 8691 rsmsharelock_acquire(seg); 8692 8693 sharedp = seg->s_share; 8694 8695 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8696 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 8697 8698 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) && 8699 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) { 8700 rsmsharelock_release(seg); 8701 rsmseglock_release(seg); 8702 DBG_PRINTF((category, RSM_DEBUG, 8703 "rsm_segmap done:RSMSI_STATE %d invalid\n", 8704 sharedp->rsmsi_state)); 8705 return (ENODEV); 8706 } 8707 8708 /* 8709 * Do the map - since we want importers to share mappings 8710 * we do the rsmpi map for the whole segment 8711 */ 8712 if (seg->s_node != my_nodeid) { 8713 uint_t dev_register; 8714 off_t dev_offset; 8715 dev_info_t *dip; 8716 size_t tmp_len; 8717 size_t total_length_mapped = 0; 8718 size_t length_to_map = seg->s_len; 8719 off_t tmp_off = 0; 8720 rsm_mapinfo_t *p; 8721 8722 /* 8723 * length_to_map = seg->s_len is always an integral 8724 * multiple of PAGESIZE. Length mapped in each entry in mapinfo 8725 * list is a multiple of PAGESIZE - RSMPI map ensures this 8726 */ 8727 8728 adapter = seg->s_adapter; 8729 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8730 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8731 8732 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) { 8733 error = 0; 8734 /* map the whole segment */ 8735 while (total_length_mapped < seg->s_len) { 8736 tmp_len = 0; 8737 8738 error = adapter->rsmpi_ops->rsm_map( 8739 seg->s_handle.in, tmp_off, 8740 length_to_map, &tmp_len, 8741 &dip, &dev_register, &dev_offset, 8742 NULL, NULL); 8743 8744 if (error != 0) 8745 break; 8746 8747 /* 8748 * Store the mapping info obtained from rsm_map 8749 */ 8750 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8751 p->dev_register = dev_register; 8752 p->dev_offset = dev_offset; 8753 p->dip = dip; 8754 p->individual_len = tmp_len; 8755 p->start_offset = tmp_off; 8756 p->next = sharedp->rsmsi_mapinfo; 8757 sharedp->rsmsi_mapinfo = p; 8758 8759 total_length_mapped += tmp_len; 8760 length_to_map -= tmp_len; 8761 tmp_off += tmp_len; 8762 } 8763 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8764 8765 if (error != RSM_SUCCESS) { 8766 /* Check if this is the the first rsm_map */ 8767 if (sharedp->rsmsi_mapinfo != NULL) { 8768 /* 8769 * A single rsm_unmap undoes 8770 * multiple rsm_maps. 8771 */ 8772 (void) seg->s_adapter->rsmpi_ops-> 8773 rsm_unmap(sharedp->rsmsi_handle); 8774 rsm_free_mapinfo(sharedp-> 8775 rsmsi_mapinfo); 8776 } 8777 sharedp->rsmsi_mapinfo = NULL; 8778 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8779 rsmsharelock_release(seg); 8780 rsmseglock_release(seg); 8781 DBG_PRINTF((category, RSM_DEBUG, 8782 "rsm_segmap done: rsmpi map err %d\n", 8783 error)); 8784 ASSERT(error != RSMERR_BAD_LENGTH && 8785 error != RSMERR_BAD_MEM_ALIGNMENT && 8786 error != RSMERR_BAD_SEG_HNDL); 8787 if (error == RSMERR_UNSUPPORTED_OPERATION) 8788 return (ENOTSUP); 8789 else if (error == RSMERR_INSUFFICIENT_RESOURCES) 8790 return (EAGAIN); 8791 else if (error == RSMERR_CONN_ABORTED) 8792 return (ENODEV); 8793 else 8794 return (error); 8795 } else { 8796 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8797 } 8798 } else { 8799 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8800 } 8801 8802 sharedp->rsmsi_mapcnt++; 8803 8804 rsmsharelock_release(seg); 8805 8806 /* move to an intermediate mapping state */ 8807 seg->s_state = RSM_STATE_MAPPING; 8808 rsmseglock_release(seg); 8809 8810 error = devmap_setup(dev, (offset_t)off, as, addrp, 8811 len, prot, maxprot, flags, cred); 8812 8813 rsmseglock_acquire(seg); 8814 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8815 8816 if (error == DDI_SUCCESS) { 8817 seg->s_state = RSM_STATE_ACTIVE; 8818 } else { 8819 rsmsharelock_acquire(seg); 8820 8821 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8822 8823 sharedp->rsmsi_mapcnt--; 8824 if (sharedp->rsmsi_mapcnt == 0) { 8825 /* unmap the shared RSMPI mapping */ 8826 ASSERT(sharedp->rsmsi_handle != NULL); 8827 (void) adapter->rsmpi_ops-> 8828 rsm_unmap(sharedp->rsmsi_handle); 8829 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 8830 sharedp->rsmsi_mapinfo = NULL; 8831 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8832 } 8833 8834 rsmsharelock_release(seg); 8835 seg->s_state = old_state; 8836 DBG_PRINTF((category, RSM_ERR, 8837 "rsm: devmap_setup failed %d\n", error)); 8838 } 8839 cv_broadcast(&seg->s_cv); 8840 rsmseglock_release(seg); 8841 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n", 8842 error)); 8843 return (error); 8844 } else { 8845 /* 8846 * For loopback, the export segment mapping cookie (s_cookie) 8847 * is also used as the s_cookie value for its import segments 8848 * during mapping. 8849 * Note that reference counting for s_cookie of the export 8850 * segment is not required due to the following: 8851 * We never have a case of the export segment being destroyed, 8852 * leaving the import segments with a stale value for the 8853 * s_cookie field, since a force disconnect is done prior to a 8854 * destroy of an export segment. The force disconnect causes 8855 * the s_cookie value to be reset to NULL. Also for the 8856 * rsm_rebind operation, we change the s_cookie value of the 8857 * export segment as well as of all its local (loopback) 8858 * importers. 8859 */ 8860 DBG_ADDCATEGORY(category, RSM_LOOPBACK); 8861 8862 rsmsharelock_release(seg); 8863 /* 8864 * In order to maintain the lock ordering between the export 8865 * and import segment locks, we need to acquire the export 8866 * segment lock first and only then acquire the import 8867 * segment lock. 8868 * The above is necessary to avoid any deadlock scenarios 8869 * with rsm_rebind which also acquires both the export 8870 * and import segment locks in the above mentioned order. 8871 * Based on code inspection, there seem to be no other 8872 * situations in which both the export and import segment 8873 * locks are acquired either in the same or opposite order 8874 * as mentioned above. 8875 * Thus in order to conform to the above lock order, we 8876 * need to change the state of the import segment to 8877 * RSM_STATE_MAPPING, release the lock. Once this is done we 8878 * can now safely acquire the export segment lock first 8879 * followed by the import segment lock which is as per 8880 * the lock order mentioned above. 8881 */ 8882 /* move to an intermediate mapping state */ 8883 seg->s_state = RSM_STATE_MAPPING; 8884 rsmseglock_release(seg); 8885 8886 eseg = rsmexport_lookup(seg->s_key); 8887 8888 if (eseg == NULL) { 8889 rsmseglock_acquire(seg); 8890 /* 8891 * Revert to old_state and signal any waiters 8892 * The shared state is not changed 8893 */ 8894 8895 seg->s_state = old_state; 8896 cv_broadcast(&seg->s_cv); 8897 rsmseglock_release(seg); 8898 DBG_PRINTF((category, RSM_DEBUG, 8899 "rsm_segmap done: key %d not found\n", seg->s_key)); 8900 return (ENODEV); 8901 } 8902 8903 rsmsharelock_acquire(seg); 8904 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8905 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8906 8907 sharedp->rsmsi_mapcnt++; 8908 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8909 rsmsharelock_release(seg); 8910 8911 ASSERT(eseg->s_cookie != NULL); 8912 8913 /* 8914 * It is not required or necessary to acquire the import 8915 * segment lock here to change the value of s_cookie since 8916 * no one will touch the import segment as long as it is 8917 * in the RSM_STATE_MAPPING state. 8918 */ 8919 seg->s_cookie = eseg->s_cookie; 8920 8921 rsmseglock_release(eseg); 8922 8923 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len, 8924 prot, maxprot, flags, cred); 8925 8926 rsmseglock_acquire(seg); 8927 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8928 if (error == 0) { 8929 seg->s_state = RSM_STATE_ACTIVE; 8930 } else { 8931 rsmsharelock_acquire(seg); 8932 8933 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8934 8935 sharedp->rsmsi_mapcnt--; 8936 if (sharedp->rsmsi_mapcnt == 0) { 8937 sharedp->rsmsi_mapinfo = NULL; 8938 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8939 } 8940 rsmsharelock_release(seg); 8941 seg->s_state = old_state; 8942 seg->s_cookie = NULL; 8943 } 8944 cv_broadcast(&seg->s_cv); 8945 rsmseglock_release(seg); 8946 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8947 "rsm_segmap done: %d\n", error)); 8948 return (error); 8949 } 8950 } 8951 8952 int 8953 rsmka_null_seg_create( 8954 rsm_controller_handle_t argcp, 8955 rsm_memseg_export_handle_t *handle, 8956 size_t size, 8957 uint_t flags, 8958 rsm_memory_local_t *memory, 8959 rsm_resource_callback_t callback, 8960 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8961 { 8962 return (RSM_SUCCESS); 8963 } 8964 8965 8966 int 8967 rsmka_null_seg_destroy( 8968 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 8969 { 8970 return (RSM_SUCCESS); 8971 } 8972 8973 8974 int 8975 rsmka_null_bind( 8976 rsm_memseg_export_handle_t argmemseg, 8977 off_t offset, 8978 rsm_memory_local_t *argmemory, 8979 rsm_resource_callback_t callback, 8980 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8981 { 8982 return (RSM_SUCCESS); 8983 } 8984 8985 8986 int 8987 rsmka_null_unbind( 8988 rsm_memseg_export_handle_t argmemseg, 8989 off_t offset, 8990 size_t length /*ARGSUSED*/) 8991 { 8992 return (DDI_SUCCESS); 8993 } 8994 8995 int 8996 rsmka_null_rebind( 8997 rsm_memseg_export_handle_t argmemseg, 8998 off_t offset, 8999 rsm_memory_local_t *memory, 9000 rsm_resource_callback_t callback, 9001 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9002 { 9003 return (RSM_SUCCESS); 9004 } 9005 9006 int 9007 rsmka_null_publish( 9008 rsm_memseg_export_handle_t argmemseg, 9009 rsm_access_entry_t access_list[], 9010 uint_t access_list_length, 9011 rsm_memseg_id_t segment_id, 9012 rsm_resource_callback_t callback, 9013 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9014 { 9015 return (RSM_SUCCESS); 9016 } 9017 9018 9019 int 9020 rsmka_null_republish( 9021 rsm_memseg_export_handle_t memseg, 9022 rsm_access_entry_t access_list[], 9023 uint_t access_list_length, 9024 rsm_resource_callback_t callback, 9025 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9026 { 9027 return (RSM_SUCCESS); 9028 } 9029 9030 int 9031 rsmka_null_unpublish( 9032 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 9033 { 9034 return (RSM_SUCCESS); 9035 } 9036 9037 9038 void 9039 rsmka_init_loopback() 9040 { 9041 rsm_ops_t *ops = &null_rsmpi_ops; 9042 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK); 9043 9044 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9045 "rsmka_init_loopback enter\n")); 9046 9047 /* initialize null ops vector */ 9048 ops->rsm_seg_create = rsmka_null_seg_create; 9049 ops->rsm_seg_destroy = rsmka_null_seg_destroy; 9050 ops->rsm_bind = rsmka_null_bind; 9051 ops->rsm_unbind = rsmka_null_unbind; 9052 ops->rsm_rebind = rsmka_null_rebind; 9053 ops->rsm_publish = rsmka_null_publish; 9054 ops->rsm_unpublish = rsmka_null_unpublish; 9055 ops->rsm_republish = rsmka_null_republish; 9056 9057 /* initialize attributes for loopback adapter */ 9058 loopback_attr.attr_name = loopback_str; 9059 loopback_attr.attr_page_size = 0x8; /* 8K */ 9060 9061 /* initialize loopback adapter */ 9062 loopback_adapter.rsm_attr = loopback_attr; 9063 loopback_adapter.rsmpi_ops = &null_rsmpi_ops; 9064 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9065 "rsmka_init_loopback done\n")); 9066 } 9067 9068 /* ************** DR functions ********************************** */ 9069 static void 9070 rsm_quiesce_exp_seg(rsmresource_t *resp) 9071 { 9072 int recheck_state; 9073 rsmseg_t *segp = (rsmseg_t *)resp; 9074 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9075 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9076 9077 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9078 "%s enter: key=%u\n", function, segp->s_key)); 9079 9080 rsmseglock_acquire(segp); 9081 do { 9082 recheck_state = 0; 9083 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) || 9084 (segp->s_state == RSM_STATE_BIND_QUIESCED) || 9085 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) || 9086 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) { 9087 rsmseglock_release(segp); 9088 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9089 "%s done:state =%d\n", function, 9090 segp->s_state)); 9091 return; 9092 } 9093 9094 if (segp->s_state == RSM_STATE_NEW) { 9095 segp->s_state = RSM_STATE_NEW_QUIESCED; 9096 rsmseglock_release(segp); 9097 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9098 "%s done:state =%d\n", function, 9099 segp->s_state)); 9100 return; 9101 } 9102 9103 if (segp->s_state == RSM_STATE_BIND) { 9104 /* unbind */ 9105 (void) rsm_unbind_pages(segp); 9106 segp->s_state = RSM_STATE_BIND_QUIESCED; 9107 rsmseglock_release(segp); 9108 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9109 "%s done:state =%d\n", function, 9110 segp->s_state)); 9111 return; 9112 } 9113 9114 if (segp->s_state == RSM_STATE_EXPORT) { 9115 /* 9116 * wait for putv/getv to complete if the segp is 9117 * a local memory handle 9118 */ 9119 while ((segp->s_state == RSM_STATE_EXPORT) && 9120 (segp->s_rdmacnt != 0)) { 9121 cv_wait(&segp->s_cv, &segp->s_lock); 9122 } 9123 9124 if (segp->s_state != RSM_STATE_EXPORT) { 9125 /* 9126 * state changed need to see what it 9127 * should be changed to. 9128 */ 9129 recheck_state = 1; 9130 continue; 9131 } 9132 9133 segp->s_state = RSM_STATE_EXPORT_QUIESCING; 9134 rsmseglock_release(segp); 9135 /* 9136 * send SUSPEND messages - currently it will be 9137 * done at the end 9138 */ 9139 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9140 "%s done:state =%d\n", function, 9141 segp->s_state)); 9142 return; 9143 } 9144 } while (recheck_state); 9145 9146 rsmseglock_release(segp); 9147 } 9148 9149 static void 9150 rsm_unquiesce_exp_seg(rsmresource_t *resp) 9151 { 9152 int ret; 9153 rsmseg_t *segp = (rsmseg_t *)resp; 9154 rsmapi_access_entry_t *acl; 9155 rsm_access_entry_t *rsmpi_acl; 9156 int acl_len; 9157 int create_flags = 0; 9158 struct buf *xbuf; 9159 rsm_memory_local_t mem; 9160 adapter_t *adapter; 9161 dev_t sdev = 0; 9162 rsm_resource_callback_t callback_flag; 9163 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9164 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9165 9166 rsmseglock_acquire(segp); 9167 9168 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9169 "%s enter: key=%u, state=%d\n", function, segp->s_key, 9170 segp->s_state)); 9171 9172 if ((segp->s_state == RSM_STATE_NEW) || 9173 (segp->s_state == RSM_STATE_BIND) || 9174 (segp->s_state == RSM_STATE_EXPORT)) { 9175 rsmseglock_release(segp); 9176 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9177 function, segp->s_state)); 9178 return; 9179 } 9180 9181 if (segp->s_state == RSM_STATE_NEW_QUIESCED) { 9182 segp->s_state = RSM_STATE_NEW; 9183 cv_broadcast(&segp->s_cv); 9184 rsmseglock_release(segp); 9185 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9186 function, segp->s_state)); 9187 return; 9188 } 9189 9190 if (segp->s_state == RSM_STATE_BIND_QUIESCED) { 9191 /* bind the segment */ 9192 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9193 segp->s_len, segp->s_proc); 9194 if (ret == RSM_SUCCESS) { /* bind successful */ 9195 segp->s_state = RSM_STATE_BIND; 9196 } else { /* bind failed - resource unavailable */ 9197 segp->s_state = RSM_STATE_NEW; 9198 } 9199 cv_broadcast(&segp->s_cv); 9200 rsmseglock_release(segp); 9201 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9202 "%s done: bind_qscd bind = %d\n", function, ret)); 9203 return; 9204 } 9205 9206 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) { 9207 /* wait for the segment to move to EXPORT_QUIESCED state */ 9208 cv_wait(&segp->s_cv, &segp->s_lock); 9209 } 9210 9211 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) { 9212 /* bind the segment */ 9213 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9214 segp->s_len, segp->s_proc); 9215 9216 if (ret != RSM_SUCCESS) { 9217 /* bind failed - resource unavailable */ 9218 acl_len = segp->s_acl_len; 9219 acl = segp->s_acl; 9220 rsmpi_acl = segp->s_acl_in; 9221 segp->s_acl_len = 0; 9222 segp->s_acl = NULL; 9223 segp->s_acl_in = NULL; 9224 rsmseglock_release(segp); 9225 9226 rsmexport_rm(segp); 9227 rsmacl_free(acl, acl_len); 9228 rsmpiacl_free(rsmpi_acl, acl_len); 9229 9230 rsmseglock_acquire(segp); 9231 segp->s_state = RSM_STATE_NEW; 9232 cv_broadcast(&segp->s_cv); 9233 rsmseglock_release(segp); 9234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9235 "%s done: exp_qscd bind failed = %d\n", 9236 function, ret)); 9237 return; 9238 } 9239 /* 9240 * publish the segment 9241 * if successful 9242 * segp->s_state = RSM_STATE_EXPORT; 9243 * else failed 9244 * segp->s_state = RSM_STATE_BIND; 9245 */ 9246 9247 /* check whether it is a local_memory_handle */ 9248 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) { 9249 if ((segp->s_acl[0].ae_node == my_nodeid) && 9250 (segp->s_acl[0].ae_permission == 0)) { 9251 segp->s_state = RSM_STATE_EXPORT; 9252 cv_broadcast(&segp->s_cv); 9253 rsmseglock_release(segp); 9254 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9255 "%s done:exp_qscd\n", function)); 9256 return; 9257 } 9258 } 9259 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE, 9260 sdev, 0, NULL, DDI_UMEM_SLEEP); 9261 ASSERT(xbuf != NULL); 9262 9263 mem.ms_type = RSM_MEM_BUF; 9264 mem.ms_bp = xbuf; 9265 9266 adapter = segp->s_adapter; 9267 9268 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 9269 create_flags = RSM_ALLOW_UNBIND_REBIND; 9270 } 9271 9272 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 9273 callback_flag = RSM_RESOURCE_DONTWAIT; 9274 } else { 9275 callback_flag = RSM_RESOURCE_SLEEP; 9276 } 9277 9278 ret = adapter->rsmpi_ops->rsm_seg_create( 9279 adapter->rsmpi_handle, &segp->s_handle.out, 9280 segp->s_len, create_flags, &mem, 9281 callback_flag, NULL); 9282 9283 if (ret != RSM_SUCCESS) { 9284 acl_len = segp->s_acl_len; 9285 acl = segp->s_acl; 9286 rsmpi_acl = segp->s_acl_in; 9287 segp->s_acl_len = 0; 9288 segp->s_acl = NULL; 9289 segp->s_acl_in = NULL; 9290 rsmseglock_release(segp); 9291 9292 rsmexport_rm(segp); 9293 rsmacl_free(acl, acl_len); 9294 rsmpiacl_free(rsmpi_acl, acl_len); 9295 9296 rsmseglock_acquire(segp); 9297 segp->s_state = RSM_STATE_BIND; 9298 cv_broadcast(&segp->s_cv); 9299 rsmseglock_release(segp); 9300 DBG_PRINTF((category, RSM_ERR, 9301 "%s done: exp_qscd create failed = %d\n", 9302 function, ret)); 9303 return; 9304 } 9305 9306 ret = adapter->rsmpi_ops->rsm_publish( 9307 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len, 9308 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL); 9309 9310 if (ret != RSM_SUCCESS) { 9311 acl_len = segp->s_acl_len; 9312 acl = segp->s_acl; 9313 rsmpi_acl = segp->s_acl_in; 9314 segp->s_acl_len = 0; 9315 segp->s_acl = NULL; 9316 segp->s_acl_in = NULL; 9317 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out); 9318 rsmseglock_release(segp); 9319 9320 rsmexport_rm(segp); 9321 rsmacl_free(acl, acl_len); 9322 rsmpiacl_free(rsmpi_acl, acl_len); 9323 9324 rsmseglock_acquire(segp); 9325 segp->s_state = RSM_STATE_BIND; 9326 cv_broadcast(&segp->s_cv); 9327 rsmseglock_release(segp); 9328 DBG_PRINTF((category, RSM_ERR, 9329 "%s done: exp_qscd publish failed = %d\n", 9330 function, ret)); 9331 return; 9332 } 9333 9334 segp->s_state = RSM_STATE_EXPORT; 9335 cv_broadcast(&segp->s_cv); 9336 rsmseglock_release(segp); 9337 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n", 9338 function)); 9339 return; 9340 } 9341 9342 rsmseglock_release(segp); 9343 9344 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9345 } 9346 9347 static void 9348 rsm_quiesce_imp_seg(rsmresource_t *resp) 9349 { 9350 rsmseg_t *segp = (rsmseg_t *)resp; 9351 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9352 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg"); 9353 9354 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9355 "%s enter: key=%u\n", function, segp->s_key)); 9356 9357 rsmseglock_acquire(segp); 9358 segp->s_flags |= RSM_DR_INPROGRESS; 9359 9360 while (segp->s_rdmacnt != 0) { 9361 /* wait for the RDMA to complete */ 9362 cv_wait(&segp->s_cv, &segp->s_lock); 9363 } 9364 9365 rsmseglock_release(segp); 9366 9367 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9368 9369 } 9370 9371 static void 9372 rsm_unquiesce_imp_seg(rsmresource_t *resp) 9373 { 9374 rsmseg_t *segp = (rsmseg_t *)resp; 9375 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9376 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg"); 9377 9378 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9379 "%s enter: key=%u\n", function, segp->s_key)); 9380 9381 rsmseglock_acquire(segp); 9382 9383 segp->s_flags &= ~RSM_DR_INPROGRESS; 9384 /* wake up any waiting putv/getv ops */ 9385 cv_broadcast(&segp->s_cv); 9386 9387 rsmseglock_release(segp); 9388 9389 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9390 9391 9392 } 9393 9394 static void 9395 rsm_process_exp_seg(rsmresource_t *resp, int event) 9396 { 9397 if (event == RSM_DR_QUIESCE) 9398 rsm_quiesce_exp_seg(resp); 9399 else /* UNQUIESCE */ 9400 rsm_unquiesce_exp_seg(resp); 9401 } 9402 9403 static void 9404 rsm_process_imp_seg(rsmresource_t *resp, int event) 9405 { 9406 if (event == RSM_DR_QUIESCE) 9407 rsm_quiesce_imp_seg(resp); 9408 else /* UNQUIESCE */ 9409 rsm_unquiesce_imp_seg(resp); 9410 } 9411 9412 static void 9413 rsm_dr_process_local_segments(int event) 9414 { 9415 9416 int i, j; 9417 rsmresource_blk_t *blk; 9418 rsmresource_t *p; 9419 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9420 9421 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9422 "rsm_dr_process_local_segments enter\n")); 9423 9424 /* iterate through the resource structure */ 9425 9426 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 9427 9428 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 9429 blk = rsm_resource.rsmrc_root[i]; 9430 if (blk != NULL) { 9431 for (j = 0; j < RSMRC_BLKSZ; j++) { 9432 p = blk->rsmrcblk_blks[j]; 9433 if ((p != NULL) && (p != RSMRC_RESERVED)) { 9434 /* valid resource */ 9435 if (p->rsmrc_type == 9436 RSM_RESOURCE_EXPORT_SEGMENT) 9437 rsm_process_exp_seg(p, event); 9438 else if (p->rsmrc_type == 9439 RSM_RESOURCE_IMPORT_SEGMENT) 9440 rsm_process_imp_seg(p, event); 9441 } 9442 } 9443 } 9444 } 9445 9446 rw_exit(&rsm_resource.rsmrc_lock); 9447 9448 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9449 "rsm_dr_process_local_segments done\n")); 9450 } 9451 9452 /* *************** DR callback functions ************ */ 9453 static void 9454 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */) 9455 { 9456 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9457 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9458 "rsm_dr_callback_post_add is a no-op\n")); 9459 /* Noop */ 9460 } 9461 9462 static int 9463 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */) 9464 { 9465 int recheck_state = 0; 9466 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9467 9468 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9469 "rsm_dr_callback_pre_del enter\n")); 9470 9471 mutex_enter(&rsm_drv_data.drv_lock); 9472 9473 do { 9474 recheck_state = 0; 9475 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9476 "rsm_dr_callback_pre_del:state=%d\n", 9477 rsm_drv_data.drv_state)); 9478 9479 switch (rsm_drv_data.drv_state) { 9480 case RSM_DRV_NEW: 9481 /* 9482 * The state should usually never be RSM_DRV_NEW 9483 * since in this state the callbacks have not yet 9484 * been registered. So, ASSERT. 9485 */ 9486 ASSERT(0); 9487 return (0); 9488 case RSM_DRV_REG_PROCESSING: 9489 /* 9490 * The driver is in the process of registering 9491 * with the DR framework. So, wait till the 9492 * registration process is complete. 9493 */ 9494 recheck_state = 1; 9495 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9496 break; 9497 case RSM_DRV_UNREG_PROCESSING: 9498 /* 9499 * If the state is RSM_DRV_UNREG_PROCESSING, the 9500 * module is in the process of detaching and 9501 * unregistering the callbacks from the DR 9502 * framework. So, simply return. 9503 */ 9504 mutex_exit(&rsm_drv_data.drv_lock); 9505 DBG_PRINTF((category, RSM_DEBUG, 9506 "rsm_dr_callback_pre_del:" 9507 "pre-del on NEW/UNREG\n")); 9508 return (0); 9509 case RSM_DRV_OK: 9510 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED; 9511 break; 9512 case RSM_DRV_PREDEL_STARTED: 9513 /* FALLTHRU */ 9514 case RSM_DRV_PREDEL_COMPLETED: 9515 /* FALLTHRU */ 9516 case RSM_DRV_POSTDEL_IN_PROGRESS: 9517 recheck_state = 1; 9518 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9519 break; 9520 case RSM_DRV_DR_IN_PROGRESS: 9521 rsm_drv_data.drv_memdel_cnt++; 9522 mutex_exit(&rsm_drv_data.drv_lock); 9523 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9524 "rsm_dr_callback_pre_del done\n")); 9525 return (0); 9526 /* break; */ 9527 default: 9528 ASSERT(0); 9529 break; 9530 } 9531 9532 } while (recheck_state); 9533 9534 rsm_drv_data.drv_memdel_cnt++; 9535 9536 mutex_exit(&rsm_drv_data.drv_lock); 9537 9538 /* Do all the quiescing stuff here */ 9539 DBG_PRINTF((category, RSM_DEBUG, 9540 "rsm_dr_callback_pre_del: quiesce things now\n")); 9541 9542 rsm_dr_process_local_segments(RSM_DR_QUIESCE); 9543 9544 /* 9545 * now that all local segments have been quiesced lets inform 9546 * the importers 9547 */ 9548 rsm_send_suspend(); 9549 9550 /* 9551 * In response to the suspend message the remote node(s) will process 9552 * the segments and send a suspend_complete message. Till all 9553 * the nodes send the suspend_complete message we wait in the 9554 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce 9555 * function we transition to the RSM_DRV_PREDEL_COMPLETED state. 9556 */ 9557 mutex_enter(&rsm_drv_data.drv_lock); 9558 9559 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) { 9560 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9561 } 9562 9563 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED); 9564 9565 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS; 9566 cv_broadcast(&rsm_drv_data.drv_cv); 9567 9568 mutex_exit(&rsm_drv_data.drv_lock); 9569 9570 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9571 "rsm_dr_callback_pre_del done\n")); 9572 9573 return (0); 9574 } 9575 9576 static void 9577 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */) 9578 { 9579 int recheck_state = 0; 9580 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9581 9582 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9583 "rsm_dr_callback_post_del enter\n")); 9584 9585 mutex_enter(&rsm_drv_data.drv_lock); 9586 9587 do { 9588 recheck_state = 0; 9589 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9590 "rsm_dr_callback_post_del:state=%d\n", 9591 rsm_drv_data.drv_state)); 9592 9593 switch (rsm_drv_data.drv_state) { 9594 case RSM_DRV_NEW: 9595 /* 9596 * The driver state cannot not be RSM_DRV_NEW 9597 * since in this state the callbacks have not 9598 * yet been registered. 9599 */ 9600 ASSERT(0); 9601 return; 9602 case RSM_DRV_REG_PROCESSING: 9603 /* 9604 * The driver is in the process of registering with 9605 * the DR framework. Wait till the registration is 9606 * complete. 9607 */ 9608 recheck_state = 1; 9609 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9610 break; 9611 case RSM_DRV_UNREG_PROCESSING: 9612 /* 9613 * RSM_DRV_UNREG_PROCESSING state means the module 9614 * is detaching and unregistering the callbacks 9615 * from the DR framework. So simply return. 9616 */ 9617 /* FALLTHRU */ 9618 case RSM_DRV_OK: 9619 /* 9620 * RSM_DRV_OK means we missed the pre-del 9621 * corresponding to this post-del coz we had not 9622 * registered yet, so simply return. 9623 */ 9624 mutex_exit(&rsm_drv_data.drv_lock); 9625 DBG_PRINTF((category, RSM_DEBUG, 9626 "rsm_dr_callback_post_del:" 9627 "post-del on OK/UNREG\n")); 9628 return; 9629 /* break; */ 9630 case RSM_DRV_PREDEL_STARTED: 9631 /* FALLTHRU */ 9632 case RSM_DRV_PREDEL_COMPLETED: 9633 /* FALLTHRU */ 9634 case RSM_DRV_POSTDEL_IN_PROGRESS: 9635 recheck_state = 1; 9636 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9637 break; 9638 case RSM_DRV_DR_IN_PROGRESS: 9639 rsm_drv_data.drv_memdel_cnt--; 9640 if (rsm_drv_data.drv_memdel_cnt > 0) { 9641 mutex_exit(&rsm_drv_data.drv_lock); 9642 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9643 "rsm_dr_callback_post_del done:\n")); 9644 return; 9645 } 9646 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS; 9647 break; 9648 default: 9649 ASSERT(0); 9650 return; 9651 /* break; */ 9652 } 9653 } while (recheck_state); 9654 9655 mutex_exit(&rsm_drv_data.drv_lock); 9656 9657 /* Do all the unquiescing stuff here */ 9658 DBG_PRINTF((category, RSM_DEBUG, 9659 "rsm_dr_callback_post_del: unquiesce things now\n")); 9660 9661 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE); 9662 9663 /* 9664 * now that all local segments have been unquiesced lets inform 9665 * the importers 9666 */ 9667 rsm_send_resume(); 9668 9669 mutex_enter(&rsm_drv_data.drv_lock); 9670 9671 rsm_drv_data.drv_state = RSM_DRV_OK; 9672 9673 cv_broadcast(&rsm_drv_data.drv_cv); 9674 9675 mutex_exit(&rsm_drv_data.drv_lock); 9676 9677 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9678 "rsm_dr_callback_post_del done\n")); 9679 9680 return; 9681 9682 } 9683