1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2012 Milan Jurik. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 * Copyright 2017 Joyent, Inc. 27 */ 28 29 30 /* 31 * Overview of the RSM Kernel Agent: 32 * --------------------------------- 33 * 34 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM 35 * kernel agent is a pseudo device driver which makes use of the RSMPI 36 * interface on behalf of the RSMAPI user library. 37 * 38 * The kernel agent functionality can be categorized into the following 39 * components: 40 * 1. Driver Infrastructure 41 * 2. Export/Import Segment Management 42 * 3. Internal resource allocation/deallocation 43 * 44 * The driver infrastructure includes the basic module loading entry points 45 * like _init, _info, _fini to load, unload and report information about 46 * the driver module. The driver infrastructure also includes the 47 * autoconfiguration entry points namely, attach, detach and getinfo for 48 * the device autoconfiguration. 49 * 50 * The kernel agent is a pseudo character device driver and exports 51 * a cb_ops structure which defines the driver entry points for character 52 * device access. This includes the open and close entry points. The 53 * other entry points provided include ioctl, devmap and segmap and chpoll. 54 * read and write entry points are not used since the device is memory 55 * mapped. Also ddi_prop_op is used for the prop_op entry point. 56 * 57 * The ioctl entry point supports a number of commands, which are used by 58 * the RSMAPI library in order to export and import segments. These 59 * commands include commands for binding and rebinding the physical pages 60 * allocated to the virtual address range, publishing the export segment, 61 * unpublishing and republishing an export segment, creating an 62 * import segment and a virtual connection from this import segment to 63 * an export segment, performing scatter-gather data transfer, barrier 64 * operations. 65 * 66 * 67 * Export and Import segments: 68 * --------------------------- 69 * 70 * In order to create an RSM export segment a process allocates a range in its 71 * virtual address space for the segment using standard Solaris interfaces. 72 * The process then calls RSMAPI, which in turn makes an ioctl call to the 73 * RSM kernel agent for an allocation of physical memory pages and for 74 * creation of the export segment by binding these pages to the virtual 75 * address range. These pages are locked in memory so that remote accesses 76 * are always applied to the correct page. Then the RSM segment is published, 77 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id 78 * is assigned to it. 79 * 80 * In order to import a published RSM segment, RSMAPI creates an import 81 * segment and forms a virtual connection across the interconnect to the 82 * export segment, via an ioctl into the kernel agent with the connect 83 * command. The import segment setup is completed by mapping the 84 * local device memory into the importers virtual address space. The 85 * mapping of the import segment is handled by the segmap/devmap 86 * infrastructure described as follows. 87 * 88 * Segmap and Devmap interfaces: 89 * 90 * The RSM kernel agent allows device memory to be directly accessed by user 91 * threads via memory mapping. In order to do so, the RSM kernel agent 92 * supports the devmap and segmap entry points. 93 * 94 * The segmap entry point(rsm_segmap) is responsible for setting up a memory 95 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is 96 * responsible for exporting the device memory to the user applications. 97 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the 98 * control is transfered to the devmap_setup call which calls rsm_devmap. 99 * 100 * rsm_devmap validates the user mapping to the device or kernel memory 101 * and passes the information to the system for setting up the mapping. The 102 * actual setting up of the mapping is done by devmap_devmem_setup(for 103 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are 104 * registered for device context management via the devmap_devmem_setup 105 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap, 106 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping 107 * is created, a mapping is freed, a mapping is accessed or an existing 108 * mapping is duplicated respectively. These callbacks allow the RSM kernel 109 * agent to maintain state information associated with the mappings. 110 * The state information is mainly in the form of a cookie list for the import 111 * segment for which mapping has been done. 112 * 113 * Forced disconnect of import segments: 114 * 115 * When an exported segment is unpublished, the exporter sends a forced 116 * disconnect message to all its importers. The importer segments are 117 * unloaded and disconnected. This involves unloading the original 118 * mappings and remapping to a preallocated kernel trash page. This is 119 * done by devmap_umem_remap. The trash/dummy page is a kernel page, 120 * preallocated by the kernel agent during attach using ddi_umem_alloc with 121 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application 122 * due to unloading of the original mappings. 123 * 124 * Additionally every segment has a mapping generation number associated 125 * with it. This is an entry in the barrier generation page, created 126 * during attach time. This mapping generation number for the import 127 * segments is incremented on a force disconnect to notify the application 128 * of the force disconnect. On this notification, the application needs 129 * to reconnect the segment to establish a new legitimate mapping. 130 * 131 * 132 * Locks used in the kernel agent: 133 * ------------------------------- 134 * 135 * The kernel agent uses a variety of mutexes and condition variables for 136 * mutual exclusion of the shared data structures and for synchronization 137 * between the various threads. Some of the locks are described as follows. 138 * 139 * Each resource structure, which represents either an export/import segment 140 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock. 141 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the 142 * rsmseglock_acquire and rsmseglock_release macros. An additional 143 * lock called the rsmsi_lock is used for the shared import data structure 144 * that is relevant for resources representing import segments. There is 145 * also a condition variable associated with the resource called s_cv. This 146 * is used to wait for events like the segment state change etc. 147 * 148 * The resource structures are allocated from a pool of resource structures, 149 * called rsm_resource. This pool is protected via a reader-writer lock, 150 * called rsmrc_lock. 151 * 152 * There are two separate hash tables, one for the export segments and 153 * one for the import segments. The export segments are inserted into the 154 * export segment hash table only after they have been published and the 155 * import segments are inserted in the import segments list only after they 156 * have successfully connected to an exported segment. These tables are 157 * protected via reader-writer locks. 158 * 159 * Debug Support in the kernel agent: 160 * ---------------------------------- 161 * 162 * Debugging support in the kernel agent is provided by the following 163 * macros. 164 * 165 * DBG_PRINTF((category, level, message)) is a macro which logs a debug 166 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer 167 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based 168 * on the definition of the category and level. All messages that belong to 169 * the specified category(rsmdbg_category) and are of an equal or greater 170 * severity than the specified level(rsmdbg_level) are logged. The message 171 * is a string which uses the same formatting rules as the strings used in 172 * printf. 173 * 174 * The category defines which component of the kernel agent has logged this 175 * message. There are a number of categories that have been defined such as 176 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro, 177 * DBG_ADDCATEGORY is used to add in another category to the currently 178 * specified category value so that the component using this new category 179 * can also effectively log debug messages. Thus, the category of a specific 180 * message is some combination of the available categories and we can define 181 * sub-categories if we want a finer level of granularity. 182 * 183 * The level defines the severity of the message. Different level values are 184 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being 185 * the least severe(debug level is 0). 186 * 187 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug 188 * variable or a string respectively. 189 * 190 * 191 * NOTES: 192 * 193 * Special Fork and Exec Handling: 194 * ------------------------------- 195 * 196 * The backing physical pages of an exported segment are always locked down. 197 * Thus, there are two cases in which a process having exported segments 198 * will cause a cpu to hang: (1) the process invokes exec; (2) a process 199 * forks and invokes exit before the duped file descriptors for the export 200 * segments are closed in the child process. The hang is caused because the 201 * address space release algorithm in Solaris VM subsystem is based on a 202 * non-blocking loop which does not terminate while segments are locked 203 * down. In addition to this, Solaris VM subsystem lacks a callback 204 * mechanism to the rsm kernel agent to allow unlocking these export 205 * segment pages. 206 * 207 * In order to circumvent this problem, the kernel agent does the following. 208 * The Solaris VM subsystem keeps memory segments in increasing order of 209 * virtual addressses. Thus a special page(special_exit_offset) is allocated 210 * by the kernel agent and is mmapped into the heap area of the process address 211 * space(the mmap is done by the RSMAPI library). During the mmap processing 212 * of this special page by the devmap infrastructure, a callback(the same 213 * devmap context management callbacks discussed above) is registered for an 214 * unmap. 215 * 216 * As discussed above, this page is processed by the Solaris address space 217 * release code before any of the exported segments pages(which are allocated 218 * from high memory). It is during this processing that the unmap callback gets 219 * called and this callback is responsible for force destroying the exported 220 * segments and thus eliminating the problem of locked pages. 221 * 222 * Flow-control: 223 * ------------ 224 * 225 * A credit based flow control algorithm is used for messages whose 226 * processing cannot be done in the interrupt context because it might 227 * involve invoking rsmpi calls, or might take a long time to complete 228 * or might need to allocate resources. The algorithm operates on a per 229 * path basis. To send a message the pathend needs to have a credit and 230 * it consumes one for every message that is flow controlled. On the 231 * receiving pathend the message is put on a msgbuf_queue and a task is 232 * dispatched on the worker thread - recv_taskq where it is processed. 233 * After processing the message, the receiving pathend dequeues the message, 234 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends 235 * credits to the sender pathend. 236 * 237 * RSM_DRTEST: 238 * ----------- 239 * 240 * This is used to enable the DR testing using a test driver on test 241 * platforms which do not supported DR. 242 * 243 */ 244 245 #include <sys/types.h> 246 #include <sys/param.h> 247 #include <sys/user.h> 248 #include <sys/buf.h> 249 #include <sys/systm.h> 250 #include <sys/cred.h> 251 #include <sys/vm.h> 252 #include <sys/uio.h> 253 #include <vm/seg.h> 254 #include <vm/page.h> 255 #include <sys/stat.h> 256 257 #include <sys/time.h> 258 #include <sys/errno.h> 259 260 #include <sys/file.h> 261 #include <sys/uio.h> 262 #include <sys/proc.h> 263 #include <sys/mman.h> 264 #include <sys/open.h> 265 #include <sys/atomic.h> 266 #include <sys/mem_config.h> 267 268 269 #include <sys/ddi.h> 270 #include <sys/devops.h> 271 #include <sys/ddidevmap.h> 272 #include <sys/sunddi.h> 273 #include <sys/esunddi.h> 274 #include <sys/ddi_impldefs.h> 275 276 #include <sys/kmem.h> 277 #include <sys/conf.h> 278 #include <sys/devops.h> 279 #include <sys/ddi_impldefs.h> 280 281 #include <sys/modctl.h> 282 283 #include <sys/policy.h> 284 #include <sys/types.h> 285 #include <sys/conf.h> 286 #include <sys/param.h> 287 288 #include <sys/taskq.h> 289 290 #include <sys/rsm/rsm_common.h> 291 #include <sys/rsm/rsmapi_common.h> 292 #include <sys/rsm/rsm.h> 293 #include <rsm_in.h> 294 #include <sys/rsm/rsmka_path_int.h> 295 #include <sys/rsm/rsmpi.h> 296 297 #include <sys/modctl.h> 298 #include <sys/debug.h> 299 300 #include <sys/tuneable.h> 301 302 #ifdef RSM_DRTEST 303 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec, 304 void *arg); 305 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, 306 void *arg); 307 #endif 308 309 extern void dbg_printf(int category, int level, char *fmt, ...); 310 extern void rsmka_pathmanager_init(); 311 extern void rsmka_pathmanager_cleanup(); 312 extern void rele_sendq_token(sendq_token_t *); 313 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t); 314 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t); 315 extern int rsmka_topology_ioctl(caddr_t, int, int); 316 317 extern pri_t maxclsyspri; 318 extern work_queue_t work_queue; 319 extern kmutex_t ipc_info_lock; 320 extern kmutex_t ipc_info_cvlock; 321 extern kcondvar_t ipc_info_cv; 322 extern kmutex_t path_hold_cvlock; 323 extern kcondvar_t path_hold_cv; 324 325 extern kmutex_t rsmka_buf_lock; 326 327 extern path_t *rsm_find_path(char *, int, rsm_addr_t); 328 extern adapter_t *rsmka_lookup_adapter(char *, int); 329 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *); 330 extern boolean_t rsmka_do_path_active(path_t *, int); 331 extern boolean_t rsmka_check_node_alive(rsm_node_id_t); 332 extern void rsmka_release_adapter(adapter_t *); 333 extern void rsmka_enqueue_msgbuf(path_t *path, void *data); 334 extern void rsmka_dequeue_msgbuf(path_t *path); 335 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path); 336 /* lint -w2 */ 337 338 static int rsm_open(dev_t *, int, int, cred_t *); 339 static int rsm_close(dev_t, int, int, cred_t *); 340 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 341 cred_t *credp, int *rvalp); 342 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, 343 uint_t); 344 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t, 345 uint_t, uint_t, cred_t *); 346 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 347 struct pollhead **phpp); 348 349 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 350 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t); 351 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t); 352 353 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *); 354 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t); 355 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t); 356 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int, 357 rsm_permission_t); 358 static void rsm_export_force_destroy(ddi_umem_cookie_t *); 359 static void rsmacl_free(rsmapi_access_entry_t *, int); 360 static void rsmpiacl_free(rsm_access_entry_t *, int); 361 362 static int rsm_inc_pgcnt(pgcnt_t); 363 static void rsm_dec_pgcnt(pgcnt_t); 364 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop); 365 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *, 366 size_t *); 367 static void exporter_quiesce(); 368 static void rsmseg_suspend(rsmseg_t *, int *); 369 static void rsmsegshare_suspend(rsmseg_t *); 370 static int rsmseg_resume(rsmseg_t *, void **); 371 static int rsmsegshare_resume(rsmseg_t *); 372 373 static struct cb_ops rsm_cb_ops = { 374 rsm_open, /* open */ 375 rsm_close, /* close */ 376 nodev, /* strategy */ 377 nodev, /* print */ 378 nodev, /* dump */ 379 nodev, /* read */ 380 nodev, /* write */ 381 rsm_ioctl, /* ioctl */ 382 rsm_devmap, /* devmap */ 383 NULL, /* mmap */ 384 rsm_segmap, /* segmap */ 385 rsm_chpoll, /* poll */ 386 ddi_prop_op, /* cb_prop_op */ 387 0, /* streamtab */ 388 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 389 0, 390 0, 391 0 392 }; 393 394 static struct dev_ops rsm_ops = { 395 DEVO_REV, /* devo_rev, */ 396 0, /* refcnt */ 397 rsm_info, /* get_dev_info */ 398 nulldev, /* identify */ 399 nulldev, /* probe */ 400 rsm_attach, /* attach */ 401 rsm_detach, /* detach */ 402 nodev, /* reset */ 403 &rsm_cb_ops, /* driver operations */ 404 (struct bus_ops *)0, /* bus operations */ 405 0, 406 ddi_quiesce_not_needed, /* quiesce */ 407 }; 408 409 /* 410 * Module linkage information for the kernel. 411 */ 412 413 static struct modldrv modldrv = { 414 &mod_driverops, /* Type of module. This one is a pseudo driver */ 415 "Remote Shared Memory Driver", 416 &rsm_ops, /* driver ops */ 417 }; 418 419 static struct modlinkage modlinkage = { 420 MODREV_1, 421 (void *)&modldrv, 422 0, 423 0, 424 0 425 }; 426 427 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta); 428 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta); 429 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled); 430 431 static kphysm_setup_vector_t rsm_dr_callback_vec = { 432 KPHYSM_SETUP_VECTOR_VERSION, 433 rsm_dr_callback_post_add, 434 rsm_dr_callback_pre_del, 435 rsm_dr_callback_post_del 436 }; 437 438 /* This flag can be changed to 0 to help with PIT testing */ 439 int rsmka_modunloadok = 1; 440 int no_reply_cnt = 0; 441 442 uint64_t rsm_ctrlmsg_errcnt = 0; 443 uint64_t rsm_ipcsend_errcnt = 0; 444 445 #define MAX_NODES 64 446 447 static struct rsm_driver_data rsm_drv_data; 448 static struct rsmresource_table rsm_resource; 449 450 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t); 451 static void rsmresource_destroy(void); 452 static int rsmresource_alloc(minor_t *); 453 static rsmresource_t *rsmresource_free(minor_t rnum); 454 static int rsm_closeconnection(rsmseg_t *seg, void **cookie); 455 static int rsm_unpublish(rsmseg_t *seg, int mode); 456 static int rsm_unbind(rsmseg_t *seg); 457 static uint_t rsmhash(rsm_memseg_id_t key); 458 static void rsmhash_alloc(rsmhash_table_t *rhash, int size); 459 static void rsmhash_free(rsmhash_table_t *rhash, int size); 460 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval); 461 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval); 462 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, 463 void *cookie); 464 int rsm_disconnect(rsmseg_t *seg); 465 void rsmseg_unload(rsmseg_t *); 466 void rsm_suspend_complete(rsm_node_id_t src_node, int flag); 467 468 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 469 rsm_intr_q_op_t opcode, rsm_addr_t src, 470 void *data, size_t size, rsm_intr_hand_arg_t arg); 471 472 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t); 473 474 rsm_node_id_t my_nodeid; 475 476 /* cookie, va, offsets and length for the barrier */ 477 static rsm_gnum_t *bar_va; 478 static ddi_umem_cookie_t bar_cookie; 479 static off_t barrier_offset; 480 static size_t barrier_size; 481 static int max_segs; 482 483 /* cookie for the trash memory */ 484 static ddi_umem_cookie_t remap_cookie; 485 486 static rsm_memseg_id_t rsm_nextavail_segmentid; 487 488 extern taskq_t *work_taskq; 489 extern char *taskq_name; 490 491 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */ 492 493 static rsmhash_table_t rsm_export_segs; /* list of exported segs */ 494 rsmhash_table_t rsm_import_segs; /* list of imported segs */ 495 static rsmhash_table_t rsm_event_queues; /* list of event queues */ 496 497 static rsm_ipc_t rsm_ipc; /* ipc info */ 498 499 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */ 500 static list_head_t rsm_suspend_list; 501 502 /* list of descriptors for remote importers */ 503 static importers_table_t importer_list; 504 505 kmutex_t rsm_suspend_cvlock; 506 kcondvar_t rsm_suspend_cv; 507 508 static kmutex_t rsm_lock; 509 510 adapter_t loopback_adapter; 511 rsm_controller_attr_t loopback_attr; 512 513 int rsmipc_send_controlmsg(path_t *path, int msgtype); 514 515 void rsmka_init_loopback(); 516 517 int rsmka_null_seg_create( 518 rsm_controller_handle_t, 519 rsm_memseg_export_handle_t *, 520 size_t, 521 uint_t, 522 rsm_memory_local_t *, 523 rsm_resource_callback_t, 524 rsm_resource_callback_arg_t); 525 526 int rsmka_null_seg_destroy( 527 rsm_memseg_export_handle_t); 528 529 int rsmka_null_bind( 530 rsm_memseg_export_handle_t, 531 off_t, 532 rsm_memory_local_t *, 533 rsm_resource_callback_t, 534 rsm_resource_callback_arg_t); 535 536 int rsmka_null_unbind( 537 rsm_memseg_export_handle_t, 538 off_t, 539 size_t); 540 541 int rsmka_null_rebind( 542 rsm_memseg_export_handle_t, 543 off_t, 544 rsm_memory_local_t *, 545 rsm_resource_callback_t, 546 rsm_resource_callback_arg_t); 547 548 int rsmka_null_publish( 549 rsm_memseg_export_handle_t, 550 rsm_access_entry_t [], 551 uint_t, 552 rsm_memseg_id_t, 553 rsm_resource_callback_t, 554 rsm_resource_callback_arg_t); 555 556 557 int rsmka_null_republish( 558 rsm_memseg_export_handle_t, 559 rsm_access_entry_t [], 560 uint_t, 561 rsm_resource_callback_t, 562 rsm_resource_callback_arg_t); 563 564 int rsmka_null_unpublish( 565 rsm_memseg_export_handle_t); 566 567 rsm_ops_t null_rsmpi_ops; 568 569 /* 570 * data and locks to keep track of total amount of exported memory 571 */ 572 static pgcnt_t rsm_pgcnt; 573 static pgcnt_t rsm_pgcnt_max; /* max allowed */ 574 static kmutex_t rsm_pgcnt_lock; 575 576 static int rsm_enable_dr; 577 578 static char loopback_str[] = "loopback"; 579 580 int rsm_hash_size; 581 582 /* 583 * The locking model is as follows: 584 * 585 * Local operations: 586 * find resource - grab reader lock on resouce list 587 * insert rc - grab writer lock 588 * delete rc - grab writer lock and resource mutex 589 * read/write - no lock 590 * 591 * Remote invocations: 592 * find resource - grab read lock and resource mutex 593 * 594 * State: 595 * resource state - grab resource mutex 596 */ 597 598 int 599 _init(void) 600 { 601 int e; 602 603 e = mod_install(&modlinkage); 604 if (e != 0) { 605 return (e); 606 } 607 608 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL); 609 610 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL); 611 612 613 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL); 614 615 rsm_hash_size = RSM_HASHSZ; 616 617 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 618 619 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 620 621 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL); 622 623 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL); 624 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0); 625 626 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL); 627 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0); 628 629 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL); 630 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0); 631 632 rsm_ipc.count = RSMIPC_SZ; 633 rsm_ipc.wanted = 0; 634 rsm_ipc.sequence = 0; 635 636 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL); 637 638 for (e = 0; e < RSMIPC_SZ; e++) { 639 rsmipc_slot_t *slot = &rsm_ipc.slots[e]; 640 641 RSMIPC_SET(slot, RSMIPC_FREE); 642 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL); 643 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0); 644 } 645 646 /* 647 * Initialize the suspend message list 648 */ 649 rsm_suspend_list.list_head = NULL; 650 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL); 651 652 /* 653 * It is assumed here that configuration data is available 654 * during system boot since _init may be called at that time. 655 */ 656 657 rsmka_pathmanager_init(); 658 659 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 660 "rsm: _init done\n")); 661 662 return (DDI_SUCCESS); 663 664 } 665 666 int 667 _info(struct modinfo *modinfop) 668 { 669 670 return (mod_info(&modlinkage, modinfop)); 671 } 672 673 int 674 _fini(void) 675 { 676 int e; 677 678 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 679 "rsm: _fini enter\n")); 680 681 /* 682 * The rsmka_modunloadok flag is simply used to help with 683 * the PIT testing. Make this flag 0 to disallow modunload. 684 */ 685 if (rsmka_modunloadok == 0) 686 return (EBUSY); 687 688 /* rsm_detach will be called as a result of mod_remove */ 689 e = mod_remove(&modlinkage); 690 if (e) { 691 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR, 692 "Unable to fini RSM %x\n", e)); 693 return (e); 694 } 695 696 rsmka_pathmanager_cleanup(); 697 698 rw_destroy(&rsm_resource.rsmrc_lock); 699 700 rw_destroy(&rsm_export_segs.rsmhash_rw); 701 rw_destroy(&rsm_import_segs.rsmhash_rw); 702 rw_destroy(&rsm_event_queues.rsmhash_rw); 703 704 mutex_destroy(&importer_list.lock); 705 706 mutex_destroy(&rsm_ipc.lock); 707 cv_destroy(&rsm_ipc.cv); 708 709 (void) mutex_destroy(&rsm_suspend_list.list_lock); 710 711 (void) mutex_destroy(&rsm_pgcnt_lock); 712 713 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n")); 714 715 return (DDI_SUCCESS); 716 717 } 718 719 /*ARGSUSED1*/ 720 static int 721 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 722 { 723 minor_t rnum; 724 int percent; 725 int ret; 726 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 727 728 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n")); 729 730 switch (cmd) { 731 case DDI_ATTACH: 732 break; 733 case DDI_RESUME: 734 default: 735 DBG_PRINTF((category, RSM_ERR, 736 "rsm:rsm_attach - cmd not supported\n")); 737 return (DDI_FAILURE); 738 } 739 740 if (rsm_dip != NULL) { 741 DBG_PRINTF((category, RSM_ERR, 742 "rsm:rsm_attach - supports only " 743 "one instance\n")); 744 return (DDI_FAILURE); 745 } 746 747 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 748 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 749 "enable-dynamic-reconfiguration", 1); 750 751 mutex_enter(&rsm_drv_data.drv_lock); 752 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING; 753 mutex_exit(&rsm_drv_data.drv_lock); 754 755 if (rsm_enable_dr) { 756 #ifdef RSM_DRTEST 757 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec, 758 (void *)NULL); 759 #else 760 ret = kphysm_setup_func_register(&rsm_dr_callback_vec, 761 (void *)NULL); 762 #endif 763 if (ret != 0) { 764 mutex_exit(&rsm_drv_data.drv_lock); 765 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic " 766 "reconfiguration setup failed\n"); 767 return (DDI_FAILURE); 768 } 769 } 770 771 mutex_enter(&rsm_drv_data.drv_lock); 772 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING); 773 rsm_drv_data.drv_state = RSM_DRV_OK; 774 cv_broadcast(&rsm_drv_data.drv_cv); 775 mutex_exit(&rsm_drv_data.drv_lock); 776 777 /* 778 * page_list_read_lock(); 779 * xx_setup(); 780 * page_list_read_unlock(); 781 */ 782 783 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 784 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 785 "segment-hashtable-size", RSM_HASHSZ); 786 if (rsm_hash_size == 0) { 787 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 788 "rsm: segment-hashtable-size in rsm.conf " 789 "must be greater than 0, defaulting to 128\n")); 790 rsm_hash_size = RSM_HASHSZ; 791 } 792 793 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n", 794 rsm_hash_size)); 795 796 rsm_pgcnt = 0; 797 798 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 799 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 800 "max-exported-memory", 0); 801 if (percent < 0) { 802 DBG_PRINTF((category, RSM_ERR, 803 "rsm:rsm_attach not enough memory available to " 804 "export, or max-exported-memory set incorrectly.\n")); 805 return (DDI_FAILURE); 806 } 807 /* 0 indicates no fixed upper limit. maxmem is the max */ 808 /* available pageable physical mem */ 809 rsm_pgcnt_max = (percent*maxmem)/100; 810 811 if (rsm_pgcnt_max > 0) { 812 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 813 "rsm: Available physical memory = %lu pages, " 814 "Max exportable memory = %lu pages", 815 maxmem, rsm_pgcnt_max)); 816 } 817 818 /* 819 * Create minor number 820 */ 821 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) { 822 DBG_PRINTF((category, RSM_ERR, 823 "rsm: rsm_attach - Unable to get " 824 "minor number\n")); 825 return (DDI_FAILURE); 826 } 827 828 ASSERT(rnum == RSM_DRIVER_MINOR); 829 830 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR, 831 rnum, DDI_PSEUDO, 0) == DDI_FAILURE) { 832 DBG_PRINTF((category, RSM_ERR, 833 "rsm: rsm_attach - unable to allocate " 834 "minor #\n")); 835 return (DDI_FAILURE); 836 } 837 838 rsm_dip = devi; 839 /* 840 * Allocate the hashtables 841 */ 842 rsmhash_alloc(&rsm_export_segs, rsm_hash_size); 843 rsmhash_alloc(&rsm_import_segs, rsm_hash_size); 844 845 importer_list.bucket = (importing_token_t **) 846 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP); 847 848 /* 849 * Allocate a resource struct 850 */ 851 { 852 rsmresource_t *p; 853 854 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP); 855 856 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL); 857 858 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR); 859 } 860 861 /* 862 * Based on the rsm.conf property max-segments, determine the maximum 863 * number of segments that can be exported/imported. This is then used 864 * to determine the size for barrier failure pages. 865 */ 866 867 /* First get the max number of segments from the rsm.conf file */ 868 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 869 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 870 "max-segments", 0); 871 if (max_segs == 0) { 872 /* Use default number of segments */ 873 max_segs = RSM_MAX_NUM_SEG; 874 } 875 876 /* 877 * Based on the max number of segments allowed, determine the barrier 878 * page size. add 1 to max_segs since the barrier page itself uses 879 * a slot 880 */ 881 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t), 882 PAGESIZE); 883 884 /* 885 * allocation of the barrier failure page 886 */ 887 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size, 888 DDI_UMEM_SLEEP, &bar_cookie); 889 890 /* 891 * Set the barrier_offset 892 */ 893 barrier_offset = 0; 894 895 /* 896 * Allocate a trash memory and get a cookie for it. This will be used 897 * when remapping segments during force disconnects. Allocate the 898 * trash memory with a large size which is page aligned. 899 */ 900 (void) ddi_umem_alloc((size_t)TRASHSIZE, 901 DDI_UMEM_TRASH, &remap_cookie); 902 903 /* initialize user segment id allocation variable */ 904 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE; 905 906 /* 907 * initialize the null_rsmpi_ops vector and the loopback adapter 908 */ 909 rsmka_init_loopback(); 910 911 912 ddi_report_dev(devi); 913 914 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n")); 915 916 return (DDI_SUCCESS); 917 } 918 919 /* 920 * The call to mod_remove in the _fine routine will cause the system 921 * to call rsm_detach 922 */ 923 /*ARGSUSED*/ 924 static int 925 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 926 { 927 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 928 929 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n")); 930 931 switch (cmd) { 932 case DDI_DETACH: 933 break; 934 default: 935 DBG_PRINTF((category, RSM_ERR, 936 "rsm:rsm_detach - cmd %x not supported\n", 937 cmd)); 938 return (DDI_FAILURE); 939 } 940 941 mutex_enter(&rsm_drv_data.drv_lock); 942 while (rsm_drv_data.drv_state != RSM_DRV_OK) 943 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 944 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING; 945 mutex_exit(&rsm_drv_data.drv_lock); 946 947 /* 948 * Unregister the DR callback functions 949 */ 950 if (rsm_enable_dr) { 951 #ifdef RSM_DRTEST 952 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec, 953 (void *)NULL); 954 #else 955 kphysm_setup_func_unregister(&rsm_dr_callback_vec, 956 (void *)NULL); 957 #endif 958 } 959 960 mutex_enter(&rsm_drv_data.drv_lock); 961 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING); 962 rsm_drv_data.drv_state = RSM_DRV_NEW; 963 mutex_exit(&rsm_drv_data.drv_lock); 964 965 ASSERT(rsm_suspend_list.list_head == NULL); 966 967 /* 968 * Release all resources, seglist, controller, ... 969 */ 970 971 /* remove intersend queues */ 972 /* remove registered services */ 973 974 975 ddi_remove_minor_node(dip, DRIVER_NAME); 976 rsm_dip = NULL; 977 978 /* 979 * Free minor zero resource 980 */ 981 { 982 rsmresource_t *p; 983 984 p = rsmresource_free(RSM_DRIVER_MINOR); 985 if (p) { 986 mutex_destroy(&p->rsmrc_lock); 987 kmem_free((void *)p, sizeof (*p)); 988 } 989 } 990 991 /* 992 * Free resource table 993 */ 994 995 rsmresource_destroy(); 996 997 /* 998 * Free the hash tables 999 */ 1000 rsmhash_free(&rsm_export_segs, rsm_hash_size); 1001 rsmhash_free(&rsm_import_segs, rsm_hash_size); 1002 1003 kmem_free((void *)importer_list.bucket, 1004 rsm_hash_size * sizeof (importing_token_t *)); 1005 importer_list.bucket = NULL; 1006 1007 1008 /* free barrier page */ 1009 if (bar_cookie != NULL) { 1010 ddi_umem_free(bar_cookie); 1011 } 1012 bar_va = NULL; 1013 bar_cookie = NULL; 1014 1015 /* 1016 * Free the memory allocated for the trash 1017 */ 1018 if (remap_cookie != NULL) { 1019 ddi_umem_free(remap_cookie); 1020 } 1021 remap_cookie = NULL; 1022 1023 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n")); 1024 1025 return (DDI_SUCCESS); 1026 } 1027 1028 /*ARGSUSED*/ 1029 static int 1030 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1031 { 1032 register int error; 1033 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 1034 1035 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n")); 1036 1037 switch (infocmd) { 1038 case DDI_INFO_DEVT2DEVINFO: 1039 if (rsm_dip == NULL) 1040 error = DDI_FAILURE; 1041 else { 1042 *result = (void *)rsm_dip; 1043 error = DDI_SUCCESS; 1044 } 1045 break; 1046 case DDI_INFO_DEVT2INSTANCE: 1047 *result = (void *)0; 1048 error = DDI_SUCCESS; 1049 break; 1050 default: 1051 error = DDI_FAILURE; 1052 } 1053 1054 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n")); 1055 return (error); 1056 } 1057 1058 adapter_t * 1059 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode) 1060 { 1061 adapter_t *adapter; 1062 char adapter_devname[MAXNAMELEN]; 1063 int instance; 1064 DBG_DEFINE(category, 1065 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL); 1066 1067 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n")); 1068 1069 instance = msg->cnum; 1070 1071 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) { 1072 return (NULL); 1073 } 1074 1075 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode)) 1076 return (NULL); 1077 1078 if (strcmp(adapter_devname, "loopback") == 0) 1079 return (&loopback_adapter); 1080 1081 adapter = rsmka_lookup_adapter(adapter_devname, instance); 1082 1083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n")); 1084 1085 return (adapter); 1086 } 1087 1088 1089 /* 1090 * *********************** Resource Number Management ******************** 1091 * All resources are stored in a simple hash table. The table is an array 1092 * of pointers to resource blks. Each blk contains: 1093 * base - base number of this blk 1094 * used - number of used slots in this blk. 1095 * blks - array of pointers to resource items. 1096 * An entry in a resource blk is empty if it's NULL. 1097 * 1098 * We start with no resource array. Each time we run out of slots, we 1099 * reallocate a new larger array and copy the pointer to the new array and 1100 * a new resource blk is allocated and added to the hash table. 1101 * 1102 * The resource control block contains: 1103 * root - array of pointer of resource blks 1104 * sz - current size of array. 1105 * len - last valid entry in array. 1106 * 1107 * A search operation based on a resource number is as follows: 1108 * index = rnum / RESOURCE_BLKSZ; 1109 * ASSERT(index < resource_block.len); 1110 * ASSERT(index < resource_block.sz); 1111 * offset = rnum % RESOURCE_BLKSZ; 1112 * ASSERT(offset >= resource_block.root[index]->base); 1113 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 1114 * return resource_block.root[index]->blks[offset]; 1115 * 1116 * A resource blk is freed with its used count reachs zero. 1117 */ 1118 static int 1119 rsmresource_alloc(minor_t *rnum) 1120 { 1121 1122 /* search for available resource slot */ 1123 int i, j, empty = -1; 1124 rsmresource_blk_t *blk; 1125 1126 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1127 "rsmresource_alloc enter\n")); 1128 1129 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1130 1131 /* Try to find an empty slot */ 1132 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1133 blk = rsm_resource.rsmrc_root[i]; 1134 if (blk != NULL && blk->rsmrcblk_avail > 0) { 1135 /* found an empty slot in this blk */ 1136 for (j = 0; j < RSMRC_BLKSZ; j++) { 1137 if (blk->rsmrcblk_blks[j] == NULL) { 1138 *rnum = (minor_t) 1139 (j + (i * RSMRC_BLKSZ)); 1140 /* 1141 * obey gen page limits 1142 */ 1143 if (*rnum >= max_segs + 1) { 1144 if (empty < 0) { 1145 rw_exit(&rsm_resource. 1146 rsmrc_lock); 1147 DBG_PRINTF(( 1148 RSM_KERNEL_ALL, 1149 RSM_ERR, 1150 "rsmresource" 1151 "_alloc failed:" 1152 "not enough res" 1153 "%d\n", *rnum)); 1154 return (RSMERR_INSUFFICIENT_RESOURCES); 1155 } else { 1156 /* use empty slot */ 1157 break; 1158 } 1159 1160 } 1161 1162 blk->rsmrcblk_blks[j] = RSMRC_RESERVED; 1163 blk->rsmrcblk_avail--; 1164 rw_exit(&rsm_resource.rsmrc_lock); 1165 DBG_PRINTF((RSM_KERNEL_ALL, 1166 RSM_DEBUG_VERBOSE, 1167 "rsmresource_alloc done\n")); 1168 return (RSM_SUCCESS); 1169 } 1170 } 1171 } else if (blk == NULL && empty < 0) { 1172 /* remember first empty slot */ 1173 empty = i; 1174 } 1175 } 1176 1177 /* Couldn't find anything, allocate a new blk */ 1178 /* 1179 * Do we need to reallocate the root array 1180 */ 1181 if (empty < 0) { 1182 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) { 1183 /* 1184 * Allocate new array and copy current stuff into it 1185 */ 1186 rsmresource_blk_t **p; 1187 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz + 1188 RSMRC_BLKSZ; 1189 /* 1190 * Don't allocate more that max valid rnum 1191 */ 1192 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >= 1193 max_segs + 1) { 1194 rw_exit(&rsm_resource.rsmrc_lock); 1195 return (RSMERR_INSUFFICIENT_RESOURCES); 1196 } 1197 1198 p = (rsmresource_blk_t **)kmem_zalloc( 1199 newsz * sizeof (*p), 1200 KM_SLEEP); 1201 1202 if (rsm_resource.rsmrc_root) { 1203 uint_t oldsz; 1204 1205 oldsz = (uint_t)(rsm_resource.rsmrc_sz * 1206 (int)sizeof (*p)); 1207 1208 /* 1209 * Copy old data into new space and 1210 * free old stuff 1211 */ 1212 bcopy(rsm_resource.rsmrc_root, p, oldsz); 1213 kmem_free(rsm_resource.rsmrc_root, oldsz); 1214 } 1215 1216 rsm_resource.rsmrc_root = p; 1217 rsm_resource.rsmrc_sz = (int)newsz; 1218 } 1219 1220 empty = rsm_resource.rsmrc_len; 1221 rsm_resource.rsmrc_len++; 1222 } 1223 1224 /* 1225 * Allocate a new blk 1226 */ 1227 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP); 1228 ASSERT(rsm_resource.rsmrc_root[empty] == NULL); 1229 rsm_resource.rsmrc_root[empty] = blk; 1230 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1; 1231 1232 /* 1233 * Allocate slot 1234 */ 1235 1236 *rnum = (minor_t)(empty * RSMRC_BLKSZ); 1237 1238 /* 1239 * watch out not to exceed bounds of barrier page 1240 */ 1241 if (*rnum >= max_segs + 1) { 1242 rw_exit(&rsm_resource.rsmrc_lock); 1243 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR, 1244 "rsmresource_alloc failed %d\n", *rnum)); 1245 1246 return (RSMERR_INSUFFICIENT_RESOURCES); 1247 } 1248 blk->rsmrcblk_blks[0] = RSMRC_RESERVED; 1249 1250 1251 rw_exit(&rsm_resource.rsmrc_lock); 1252 1253 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1254 "rsmresource_alloc done\n")); 1255 1256 return (RSM_SUCCESS); 1257 } 1258 1259 static rsmresource_t * 1260 rsmresource_free(minor_t rnum) 1261 { 1262 1263 /* search for available resource slot */ 1264 int i, j; 1265 rsmresource_blk_t *blk; 1266 rsmresource_t *p; 1267 1268 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1269 "rsmresource_free enter\n")); 1270 1271 i = (int)(rnum / RSMRC_BLKSZ); 1272 j = (int)(rnum % RSMRC_BLKSZ); 1273 1274 if (i >= rsm_resource.rsmrc_len) { 1275 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1276 "rsmresource_free done\n")); 1277 return (NULL); 1278 } 1279 1280 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1281 1282 ASSERT(rsm_resource.rsmrc_root); 1283 ASSERT(i < rsm_resource.rsmrc_len); 1284 ASSERT(i < rsm_resource.rsmrc_sz); 1285 blk = rsm_resource.rsmrc_root[i]; 1286 if (blk == NULL) { 1287 rw_exit(&rsm_resource.rsmrc_lock); 1288 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1289 "rsmresource_free done\n")); 1290 return (NULL); 1291 } 1292 1293 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */ 1294 1295 p = blk->rsmrcblk_blks[j]; 1296 if (p == RSMRC_RESERVED) { 1297 p = NULL; 1298 } 1299 1300 blk->rsmrcblk_blks[j] = NULL; 1301 blk->rsmrcblk_avail++; 1302 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) { 1303 /* free this blk */ 1304 kmem_free(blk, sizeof (*blk)); 1305 rsm_resource.rsmrc_root[i] = NULL; 1306 } 1307 1308 rw_exit(&rsm_resource.rsmrc_lock); 1309 1310 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1311 "rsmresource_free done\n")); 1312 1313 return (p); 1314 } 1315 1316 static rsmresource_t * 1317 rsmresource_lookup(minor_t rnum, int lock) 1318 { 1319 int i, j; 1320 rsmresource_blk_t *blk; 1321 rsmresource_t *p; 1322 1323 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1324 "rsmresource_lookup enter\n")); 1325 1326 /* Find resource and lock it in READER mode */ 1327 /* search for available resource slot */ 1328 1329 i = (int)(rnum / RSMRC_BLKSZ); 1330 j = (int)(rnum % RSMRC_BLKSZ); 1331 1332 if (i >= rsm_resource.rsmrc_len) { 1333 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1334 "rsmresource_lookup done\n")); 1335 return (NULL); 1336 } 1337 1338 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1339 1340 blk = rsm_resource.rsmrc_root[i]; 1341 if (blk != NULL) { 1342 ASSERT(i < rsm_resource.rsmrc_len); 1343 ASSERT(i < rsm_resource.rsmrc_sz); 1344 1345 p = blk->rsmrcblk_blks[j]; 1346 if (lock == RSM_LOCK) { 1347 if (p != RSMRC_RESERVED) { 1348 mutex_enter(&p->rsmrc_lock); 1349 } else { 1350 p = NULL; 1351 } 1352 } 1353 } else { 1354 p = NULL; 1355 } 1356 rw_exit(&rsm_resource.rsmrc_lock); 1357 1358 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1359 "rsmresource_lookup done\n")); 1360 1361 return (p); 1362 } 1363 1364 static void 1365 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type) 1366 { 1367 /* Find resource and lock it in READER mode */ 1368 /* Caller can upgrade if need be */ 1369 /* search for available resource slot */ 1370 int i, j; 1371 rsmresource_blk_t *blk; 1372 1373 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1374 "rsmresource_insert enter\n")); 1375 1376 i = (int)(rnum / RSMRC_BLKSZ); 1377 j = (int)(rnum % RSMRC_BLKSZ); 1378 1379 p->rsmrc_type = type; 1380 p->rsmrc_num = rnum; 1381 1382 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1383 1384 ASSERT(rsm_resource.rsmrc_root); 1385 ASSERT(i < rsm_resource.rsmrc_len); 1386 ASSERT(i < rsm_resource.rsmrc_sz); 1387 1388 blk = rsm_resource.rsmrc_root[i]; 1389 ASSERT(blk); 1390 1391 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED); 1392 1393 blk->rsmrcblk_blks[j] = p; 1394 1395 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1396 "rsmresource_insert done\n")); 1397 1398 rw_exit(&rsm_resource.rsmrc_lock); 1399 } 1400 1401 static void 1402 rsmresource_destroy() 1403 { 1404 int i, j; 1405 1406 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1407 "rsmresource_destroy enter\n")); 1408 1409 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1410 1411 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1412 rsmresource_blk_t *blk; 1413 1414 blk = rsm_resource.rsmrc_root[i]; 1415 if (blk == NULL) { 1416 continue; 1417 } 1418 for (j = 0; j < RSMRC_BLKSZ; j++) { 1419 if (blk->rsmrcblk_blks[j] != NULL) { 1420 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1421 "Not null slot %d, %lx\n", j, 1422 (size_t)blk->rsmrcblk_blks[j])); 1423 } 1424 } 1425 kmem_free(blk, sizeof (*blk)); 1426 rsm_resource.rsmrc_root[i] = NULL; 1427 } 1428 if (rsm_resource.rsmrc_root) { 1429 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *); 1430 kmem_free(rsm_resource.rsmrc_root, (uint_t)i); 1431 rsm_resource.rsmrc_root = NULL; 1432 rsm_resource.rsmrc_len = 0; 1433 rsm_resource.rsmrc_sz = 0; 1434 } 1435 1436 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1437 "rsmresource_destroy done\n")); 1438 1439 rw_exit(&rsm_resource.rsmrc_lock); 1440 } 1441 1442 1443 /* ******************** Generic Key Hash Table Management ********* */ 1444 static rsmresource_t * 1445 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key, 1446 rsm_resource_state_t state) 1447 { 1448 rsmresource_t *p; 1449 uint_t hashval; 1450 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1451 1452 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n")); 1453 1454 hashval = rsmhash(key); 1455 1456 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n", 1457 key, hashval)); 1458 1459 rw_enter(&rhash->rsmhash_rw, RW_READER); 1460 1461 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1462 1463 for (; p; p = p->rsmrc_next) { 1464 if (p->rsmrc_key == key) { 1465 /* acquire resource lock */ 1466 RSMRC_LOCK(p); 1467 break; 1468 } 1469 } 1470 1471 rw_exit(&rhash->rsmhash_rw); 1472 1473 if (p != NULL && p->rsmrc_state != state) { 1474 /* state changed, release lock and return null */ 1475 RSMRC_UNLOCK(p); 1476 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1477 "rsmhash_lookup done: state changed\n")); 1478 return (NULL); 1479 } 1480 1481 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n")); 1482 1483 return (p); 1484 } 1485 1486 static void 1487 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm) 1488 { 1489 rsmresource_t *p, **back; 1490 uint_t hashval; 1491 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1492 1493 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n")); 1494 1495 hashval = rsmhash(rcelm->rsmrc_key); 1496 1497 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n", 1498 rcelm->rsmrc_key, hashval)); 1499 1500 /* 1501 * It's ok not to find the segment. 1502 */ 1503 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1504 1505 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1506 1507 for (; (p = *back) != NULL; back = &p->rsmrc_next) { 1508 if (p == rcelm) { 1509 *back = rcelm->rsmrc_next; 1510 break; 1511 } 1512 } 1513 1514 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n")); 1515 1516 rw_exit(&rhash->rsmhash_rw); 1517 } 1518 1519 static int 1520 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key, 1521 int dup_check, rsm_resource_state_t state) 1522 { 1523 rsmresource_t *p = NULL, **bktp; 1524 uint_t hashval; 1525 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1526 1527 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n")); 1528 1529 /* lock table */ 1530 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1531 1532 /* 1533 * If the current resource state is other than the state passed in 1534 * then the resource is (probably) already on the list. eg. for an 1535 * import segment if the state is not RSM_STATE_NEW then it's on the 1536 * list already. 1537 */ 1538 RSMRC_LOCK(new); 1539 if (new->rsmrc_state != state) { 1540 RSMRC_UNLOCK(new); 1541 rw_exit(&rhash->rsmhash_rw); 1542 return (RSMERR_BAD_SEG_HNDL); 1543 } 1544 1545 hashval = rsmhash(key); 1546 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval)); 1547 1548 if (dup_check) { 1549 /* 1550 * Used for checking export segments; don't want to have 1551 * the same key used for multiple segments. 1552 */ 1553 1554 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1555 1556 for (; p; p = p->rsmrc_next) { 1557 if (p->rsmrc_key == key) { 1558 RSMRC_UNLOCK(new); 1559 break; 1560 } 1561 } 1562 } 1563 1564 if (p == NULL) { 1565 /* Key doesn't exist, add it */ 1566 1567 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1568 1569 new->rsmrc_key = key; 1570 new->rsmrc_next = *bktp; 1571 *bktp = new; 1572 } 1573 1574 rw_exit(&rhash->rsmhash_rw); 1575 1576 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n")); 1577 1578 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE); 1579 } 1580 1581 /* 1582 * XOR each byte of the key. 1583 */ 1584 static uint_t 1585 rsmhash(rsm_memseg_id_t key) 1586 { 1587 uint_t hash = key; 1588 1589 hash ^= (key >> 8); 1590 hash ^= (key >> 16); 1591 hash ^= (key >> 24); 1592 1593 return (hash % rsm_hash_size); 1594 1595 } 1596 1597 /* 1598 * generic function to get a specific bucket 1599 */ 1600 static void * 1601 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval) 1602 { 1603 1604 if (rhash->bucket == NULL) 1605 return (NULL); 1606 else 1607 return ((void *)rhash->bucket[hashval]); 1608 } 1609 1610 /* 1611 * generic function to get a specific bucket's address 1612 */ 1613 static void ** 1614 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval) 1615 { 1616 if (rhash->bucket == NULL) 1617 return (NULL); 1618 else 1619 return ((void **)&(rhash->bucket[hashval])); 1620 } 1621 1622 /* 1623 * generic function to alloc a hash table 1624 */ 1625 static void 1626 rsmhash_alloc(rsmhash_table_t *rhash, int size) 1627 { 1628 rhash->bucket = (rsmresource_t **) 1629 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP); 1630 } 1631 1632 /* 1633 * generic function to free a hash table 1634 */ 1635 static void 1636 rsmhash_free(rsmhash_table_t *rhash, int size) 1637 { 1638 1639 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t)); 1640 rhash->bucket = NULL; 1641 1642 } 1643 /* *********************** Exported Segment Key Management ************ */ 1644 1645 #define rsmexport_add(new, key) \ 1646 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \ 1647 RSM_STATE_BIND) 1648 1649 #define rsmexport_rm(arg) \ 1650 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg)) 1651 1652 #define rsmexport_lookup(key) \ 1653 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT) 1654 1655 /* ************************** Import Segment List Management ********** */ 1656 1657 /* 1658 * Add segment to import list. This will be useful for paging and loopback 1659 * segment unloading. 1660 */ 1661 #define rsmimport_add(arg, key) \ 1662 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \ 1663 RSM_STATE_NEW) 1664 1665 #define rsmimport_rm(arg) \ 1666 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg)) 1667 1668 /* 1669 * #define rsmimport_lookup(key) \ 1670 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT) 1671 */ 1672 1673 /* 1674 * increase the ref count and make the import segment point to the 1675 * shared data structure. Return a pointer to the share data struct 1676 * and the shared data struct is locked upon return 1677 */ 1678 static rsm_import_share_t * 1679 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter, 1680 rsmseg_t *segp) 1681 { 1682 uint_t hash; 1683 rsmresource_t *p; 1684 rsm_import_share_t *shdatap; 1685 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1686 1687 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n")); 1688 1689 hash = rsmhash(key); 1690 /* lock table */ 1691 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER); 1692 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n", 1693 key, hash)); 1694 1695 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash); 1696 1697 for (; p; p = p->rsmrc_next) { 1698 /* 1699 * Look for an entry that is importing the same exporter 1700 * with the share data structure allocated. 1701 */ 1702 if ((p->rsmrc_key == key) && 1703 (p->rsmrc_node == node) && 1704 (p->rsmrc_adapter == adapter) && 1705 (((rsmseg_t *)p)->s_share != NULL)) { 1706 shdatap = ((rsmseg_t *)p)->s_share; 1707 break; 1708 } 1709 } 1710 1711 if (p == NULL) { 1712 /* we are the first importer, create the shared data struct */ 1713 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP); 1714 shdatap->rsmsi_state = RSMSI_STATE_NEW; 1715 shdatap->rsmsi_segid = key; 1716 shdatap->rsmsi_node = node; 1717 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL); 1718 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0); 1719 } 1720 1721 rsmseglock_acquire(segp); 1722 1723 /* we grab the shared lock before returning from this function */ 1724 mutex_enter(&shdatap->rsmsi_lock); 1725 1726 shdatap->rsmsi_refcnt++; 1727 segp->s_share = shdatap; 1728 1729 rsmseglock_release(segp); 1730 1731 rw_exit(&rsm_import_segs.rsmhash_rw); 1732 1733 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n")); 1734 1735 return (shdatap); 1736 } 1737 1738 /* 1739 * the shared data structure should be locked before calling 1740 * rsmsharecv_signal(). 1741 * Change the state and signal any waiting segments. 1742 */ 1743 void 1744 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate) 1745 { 1746 ASSERT(rsmsharelock_held(seg)); 1747 1748 if (seg->s_share->rsmsi_state == oldstate) { 1749 seg->s_share->rsmsi_state = newstate; 1750 cv_broadcast(&seg->s_share->rsmsi_cv); 1751 } 1752 } 1753 1754 /* 1755 * Add to the hash table 1756 */ 1757 static void 1758 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr, 1759 void *cookie) 1760 { 1761 1762 importing_token_t *head; 1763 importing_token_t *new_token; 1764 int index; 1765 1766 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1767 1768 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n")); 1769 1770 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP); 1771 new_token->importing_node = node; 1772 new_token->key = key; 1773 new_token->import_segment_cookie = cookie; 1774 new_token->importing_adapter_hwaddr = hwaddr; 1775 1776 index = rsmhash(key); 1777 1778 mutex_enter(&importer_list.lock); 1779 1780 head = importer_list.bucket[index]; 1781 importer_list.bucket[index] = new_token; 1782 new_token->next = head; 1783 mutex_exit(&importer_list.lock); 1784 1785 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n")); 1786 } 1787 1788 static void 1789 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie) 1790 { 1791 1792 importing_token_t *prev, *token = NULL; 1793 int index; 1794 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1795 1796 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n")); 1797 1798 index = rsmhash(key); 1799 1800 mutex_enter(&importer_list.lock); 1801 1802 token = importer_list.bucket[index]; 1803 1804 prev = token; 1805 while (token != NULL) { 1806 if (token->importing_node == node && 1807 token->import_segment_cookie == cookie) { 1808 if (prev == token) 1809 importer_list.bucket[index] = token->next; 1810 else 1811 prev->next = token->next; 1812 kmem_free((void *)token, sizeof (*token)); 1813 break; 1814 } else { 1815 prev = token; 1816 token = token->next; 1817 } 1818 } 1819 1820 mutex_exit(&importer_list.lock); 1821 1822 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n")); 1823 1824 1825 } 1826 1827 /* **************************Segment Structure Management ************* */ 1828 1829 /* 1830 * Free segment structure 1831 */ 1832 static void 1833 rsmseg_free(rsmseg_t *seg) 1834 { 1835 1836 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1837 1838 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n")); 1839 1840 /* need to take seglock here to avoid race with rsmmap_unmap() */ 1841 rsmseglock_acquire(seg); 1842 if (seg->s_ckl != NULL) { 1843 /* Segment is still busy */ 1844 seg->s_state = RSM_STATE_END; 1845 rsmseglock_release(seg); 1846 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1847 "rsmseg_free done\n")); 1848 return; 1849 } 1850 1851 rsmseglock_release(seg); 1852 1853 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW); 1854 1855 /* 1856 * If it's an importer decrement the refcount 1857 * and if its down to zero free the shared data structure. 1858 * This is where failures during rsm_connect() are unrefcounted 1859 */ 1860 if (seg->s_share != NULL) { 1861 1862 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT); 1863 1864 rsmsharelock_acquire(seg); 1865 1866 ASSERT(seg->s_share->rsmsi_refcnt > 0); 1867 1868 seg->s_share->rsmsi_refcnt--; 1869 1870 if (seg->s_share->rsmsi_refcnt == 0) { 1871 rsmsharelock_release(seg); 1872 mutex_destroy(&seg->s_share->rsmsi_lock); 1873 cv_destroy(&seg->s_share->rsmsi_cv); 1874 kmem_free((void *)(seg->s_share), 1875 sizeof (rsm_import_share_t)); 1876 } else { 1877 rsmsharelock_release(seg); 1878 } 1879 /* 1880 * The following needs to be done after any 1881 * rsmsharelock calls which use seg->s_share. 1882 */ 1883 seg->s_share = NULL; 1884 } 1885 1886 cv_destroy(&seg->s_cv); 1887 mutex_destroy(&seg->s_lock); 1888 rsmacl_free(seg->s_acl, seg->s_acl_len); 1889 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len); 1890 if (seg->s_adapter) 1891 rsmka_release_adapter(seg->s_adapter); 1892 1893 kmem_free((void *)seg, sizeof (*seg)); 1894 1895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n")); 1896 1897 } 1898 1899 1900 static rsmseg_t * 1901 rsmseg_alloc(minor_t num, struct cred *cred) 1902 { 1903 rsmseg_t *new; 1904 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1905 1906 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n")); 1907 /* 1908 * allocate memory for new segment. This should be a segkmem cache. 1909 */ 1910 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); 1911 1912 new->s_state = RSM_STATE_NEW; 1913 new->s_minor = num; 1914 new->s_acl_len = 0; 1915 new->s_cookie = NULL; 1916 new->s_adapter = NULL; 1917 1918 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask; 1919 /* we don't have a key yet, will set at export/connect */ 1920 new->s_uid = crgetuid(cred); 1921 new->s_gid = crgetgid(cred); 1922 1923 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL); 1924 cv_init(&new->s_cv, NULL, CV_DRIVER, 0); 1925 1926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n")); 1927 1928 return (new); 1929 } 1930 1931 /* ******************************** Driver Open/Close/Poll *************** */ 1932 1933 /*ARGSUSED1*/ 1934 static int 1935 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred) 1936 { 1937 minor_t rnum; 1938 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 1939 1940 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n")); 1941 /* 1942 * Char only 1943 */ 1944 if (otyp != OTYP_CHR) { 1945 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n")); 1946 return (EINVAL); 1947 } 1948 1949 /* 1950 * Only zero can be opened, clones are used for resources. 1951 */ 1952 if (getminor(*devp) != RSM_DRIVER_MINOR) { 1953 DBG_PRINTF((category, RSM_ERR, 1954 "rsm_open: bad minor %d\n", getminor(*devp))); 1955 return (ENODEV); 1956 } 1957 1958 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) { 1959 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n")); 1960 return (EPERM); 1961 } 1962 1963 if (!(flag & FWRITE)) { 1964 /* 1965 * The library function _rsm_librsm_init calls open for 1966 * /dev/rsm with flag set to O_RDONLY. We want a valid 1967 * file descriptor to be returned for minor device zero. 1968 */ 1969 1970 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1971 "rsm_open RDONLY done\n")); 1972 return (DDI_SUCCESS); 1973 } 1974 1975 /* 1976 * - allocate new minor number and segment. 1977 * - add segment to list of all segments. 1978 * - set minordev data to segment 1979 * - update devp argument to new device 1980 * - update s_cred to cred; make sure you do crhold(cred); 1981 */ 1982 1983 /* allocate a new resource number */ 1984 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) { 1985 /* 1986 * We will bind this minor to a specific resource in first 1987 * ioctl 1988 */ 1989 *devp = makedevice(getmajor(*devp), rnum); 1990 } else { 1991 return (EAGAIN); 1992 } 1993 1994 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n")); 1995 return (DDI_SUCCESS); 1996 } 1997 1998 static void 1999 rsmseg_close(rsmseg_t *seg, int force_flag) 2000 { 2001 int e = RSM_SUCCESS; 2002 2003 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2004 2005 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n")); 2006 2007 rsmseglock_acquire(seg); 2008 if (!force_flag && (seg->s_hdr.rsmrc_type == 2009 RSM_RESOURCE_EXPORT_SEGMENT)) { 2010 /* 2011 * If we are processing rsm_close wait for force_destroy 2012 * processing to complete since force_destroy processing 2013 * needs to finish first before we can free the segment. 2014 * force_destroy is only for export segments 2015 */ 2016 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) { 2017 cv_wait(&seg->s_cv, &seg->s_lock); 2018 } 2019 } 2020 rsmseglock_release(seg); 2021 2022 /* It's ok to read the state without a lock */ 2023 switch (seg->s_state) { 2024 case RSM_STATE_EXPORT: 2025 case RSM_STATE_EXPORT_QUIESCING: 2026 case RSM_STATE_EXPORT_QUIESCED: 2027 e = rsm_unpublish(seg, 1); 2028 /* FALLTHRU */ 2029 case RSM_STATE_BIND_QUIESCED: 2030 /* FALLTHRU */ 2031 case RSM_STATE_BIND: 2032 e = rsm_unbind(seg); 2033 if (e != RSM_SUCCESS && force_flag == 1) 2034 return; 2035 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT); 2036 /* FALLTHRU */ 2037 case RSM_STATE_NEW_QUIESCED: 2038 rsmseglock_acquire(seg); 2039 seg->s_state = RSM_STATE_NEW; 2040 cv_broadcast(&seg->s_cv); 2041 rsmseglock_release(seg); 2042 break; 2043 case RSM_STATE_NEW: 2044 break; 2045 case RSM_STATE_ZOMBIE: 2046 /* 2047 * Segments in this state have been removed off the 2048 * exported segments list and have been unpublished 2049 * and unbind. These segments have been removed during 2050 * a callback to the rsm_export_force_destroy, which 2051 * is called for the purpose of unlocking these 2052 * exported memory segments when a process exits but 2053 * leaves the segments locked down since rsm_close is 2054 * is not called for the segments. This can happen 2055 * when a process calls fork or exec and then exits. 2056 * Once the segments are in the ZOMBIE state, all that 2057 * remains is to destroy them when rsm_close is called. 2058 * This is done here. Thus, for such segments the 2059 * the state is changed to new so that later in this 2060 * function rsmseg_free is called. 2061 */ 2062 rsmseglock_acquire(seg); 2063 seg->s_state = RSM_STATE_NEW; 2064 rsmseglock_release(seg); 2065 break; 2066 case RSM_STATE_MAP_QUIESCE: 2067 case RSM_STATE_ACTIVE: 2068 /* Disconnect will handle the unmap */ 2069 case RSM_STATE_CONN_QUIESCE: 2070 case RSM_STATE_CONNECT: 2071 case RSM_STATE_DISCONNECT: 2072 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 2073 (void) rsm_disconnect(seg); 2074 break; 2075 case RSM_STATE_MAPPING: 2076 /*FALLTHRU*/ 2077 case RSM_STATE_END: 2078 DBG_PRINTF((category, RSM_ERR, 2079 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2080 break; 2081 default: 2082 DBG_PRINTF((category, RSM_ERR, 2083 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2084 break; 2085 } 2086 2087 /* 2088 * check state. 2089 * - make sure you do crfree(s_cred); 2090 * release segment and minor number 2091 */ 2092 ASSERT(seg->s_state == RSM_STATE_NEW); 2093 2094 /* 2095 * The export_force_destroy callback is created to unlock 2096 * the exported segments of a process 2097 * when the process does a fork or exec and then exits calls this 2098 * function with the force flag set to 1 which indicates that the 2099 * segment state must be converted to ZOMBIE. This state means that the 2100 * segments still exist and have been unlocked and most importantly the 2101 * only operation allowed is to destroy them on an rsm_close. 2102 */ 2103 if (force_flag) { 2104 rsmseglock_acquire(seg); 2105 seg->s_state = RSM_STATE_ZOMBIE; 2106 rsmseglock_release(seg); 2107 } else { 2108 rsmseg_free(seg); 2109 } 2110 2111 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n")); 2112 } 2113 2114 static int 2115 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred) 2116 { 2117 minor_t rnum = getminor(dev); 2118 rsmresource_t *res; 2119 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2120 2121 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n")); 2122 2123 flag = flag; cred = cred; 2124 2125 if (otyp != OTYP_CHR) 2126 return (EINVAL); 2127 2128 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum)); 2129 2130 /* 2131 * At this point we are the last reference to the resource. 2132 * Free resource number from resource table. 2133 * It's ok to remove number before we free the segment. 2134 * We need to lock the resource to protect against remote calls. 2135 */ 2136 if (rnum == RSM_DRIVER_MINOR || 2137 (res = rsmresource_free(rnum)) == NULL) { 2138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2139 return (DDI_SUCCESS); 2140 } 2141 2142 switch (res->rsmrc_type) { 2143 case RSM_RESOURCE_EXPORT_SEGMENT: 2144 case RSM_RESOURCE_IMPORT_SEGMENT: 2145 rsmseg_close((rsmseg_t *)res, 0); 2146 break; 2147 case RSM_RESOURCE_BAR: 2148 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n")); 2149 break; 2150 default: 2151 break; 2152 } 2153 2154 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2155 2156 return (DDI_SUCCESS); 2157 } 2158 2159 /* 2160 * rsm_inc_pgcnt 2161 * 2162 * Description: increment rsm page counter. 2163 * 2164 * Parameters: pgcnt_t pnum; number of pages to be used 2165 * 2166 * Returns: RSM_SUCCESS if memory limit not exceeded 2167 * ENOSPC if memory limit exceeded. In this case, the 2168 * page counter remains unchanged. 2169 * 2170 */ 2171 static int 2172 rsm_inc_pgcnt(pgcnt_t pnum) 2173 { 2174 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2175 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2176 return (RSM_SUCCESS); 2177 } 2178 2179 mutex_enter(&rsm_pgcnt_lock); 2180 2181 if (rsm_pgcnt + pnum > rsm_pgcnt_max) { 2182 /* ensure that limits have not been exceeded */ 2183 mutex_exit(&rsm_pgcnt_lock); 2184 return (RSMERR_INSUFFICIENT_MEM); 2185 } 2186 2187 rsm_pgcnt += pnum; 2188 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n", 2189 rsm_pgcnt)); 2190 mutex_exit(&rsm_pgcnt_lock); 2191 2192 return (RSM_SUCCESS); 2193 } 2194 2195 /* 2196 * rsm_dec_pgcnt 2197 * 2198 * Description: decrement rsm page counter. 2199 * 2200 * Parameters: pgcnt_t pnum; number of pages freed 2201 * 2202 */ 2203 static void 2204 rsm_dec_pgcnt(pgcnt_t pnum) 2205 { 2206 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2207 2208 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2209 return; 2210 } 2211 2212 mutex_enter(&rsm_pgcnt_lock); 2213 ASSERT(rsm_pgcnt >= pnum); 2214 rsm_pgcnt -= pnum; 2215 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n", 2216 rsm_pgcnt)); 2217 mutex_exit(&rsm_pgcnt_lock); 2218 } 2219 2220 static struct umem_callback_ops rsm_as_ops = { 2221 UMEM_CALLBACK_VERSION, /* version number */ 2222 rsm_export_force_destroy, 2223 }; 2224 2225 static int 2226 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len, 2227 proc_t *procp) 2228 { 2229 int error = RSM_SUCCESS; 2230 ulong_t pnum; 2231 struct umem_callback_ops *callbackops = &rsm_as_ops; 2232 2233 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2234 2235 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n")); 2236 2237 /* 2238 * Make sure vaddr and len are aligned on a page boundary 2239 */ 2240 if ((uintptr_t)vaddr & (PAGESIZE - 1)) { 2241 return (RSMERR_BAD_ADDR); 2242 } 2243 2244 if (len & (PAGESIZE - 1)) { 2245 return (RSMERR_BAD_LENGTH); 2246 } 2247 2248 /* 2249 * Find number of pages 2250 */ 2251 pnum = btopr(len); 2252 error = rsm_inc_pgcnt(pnum); 2253 if (error != RSM_SUCCESS) { 2254 DBG_PRINTF((category, RSM_ERR, 2255 "rsm_bind_pages:mem limit exceeded\n")); 2256 return (RSMERR_INSUFFICIENT_MEM); 2257 } 2258 2259 error = umem_lockmemory(vaddr, len, 2260 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM, 2261 cookie, 2262 callbackops, procp); 2263 2264 if (error) { 2265 rsm_dec_pgcnt(pnum); 2266 DBG_PRINTF((category, RSM_ERR, 2267 "rsm_bind_pages:ddi_umem_lock failed\n")); 2268 /* 2269 * ddi_umem_lock, in the case of failure, returns one of 2270 * the following three errors. These are translated into 2271 * the RSMERR namespace and returned. 2272 */ 2273 if (error == EFAULT) 2274 return (RSMERR_BAD_ADDR); 2275 else if (error == EACCES) 2276 return (RSMERR_PERM_DENIED); 2277 else 2278 return (RSMERR_INSUFFICIENT_MEM); 2279 } 2280 2281 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n")); 2282 2283 return (error); 2284 2285 } 2286 2287 static int 2288 rsm_unbind_pages(rsmseg_t *seg) 2289 { 2290 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2291 2292 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n")); 2293 2294 ASSERT(rsmseglock_held(seg)); 2295 2296 if (seg->s_cookie != NULL) { 2297 /* unlock address range */ 2298 ddi_umem_unlock(seg->s_cookie); 2299 rsm_dec_pgcnt(btopr(seg->s_len)); 2300 seg->s_cookie = NULL; 2301 } 2302 2303 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n")); 2304 2305 return (RSM_SUCCESS); 2306 } 2307 2308 2309 static int 2310 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2311 { 2312 int e; 2313 adapter_t *adapter; 2314 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2315 2316 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n")); 2317 2318 adapter = rsm_getadapter(msg, mode); 2319 if (adapter == NULL) { 2320 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2321 "rsm_bind done:no adapter\n")); 2322 return (RSMERR_CTLR_NOT_PRESENT); 2323 } 2324 2325 /* lock address range */ 2326 if (msg->vaddr == NULL) { 2327 rsmka_release_adapter(adapter); 2328 DBG_PRINTF((category, RSM_ERR, 2329 "rsm: rsm_bind done: invalid vaddr\n")); 2330 return (RSMERR_BAD_ADDR); 2331 } 2332 if (msg->len <= 0) { 2333 rsmka_release_adapter(adapter); 2334 DBG_PRINTF((category, RSM_ERR, 2335 "rsm_bind: invalid length\n")); 2336 return (RSMERR_BAD_LENGTH); 2337 } 2338 2339 /* Lock segment */ 2340 rsmseglock_acquire(seg); 2341 2342 while (seg->s_state == RSM_STATE_NEW_QUIESCED) { 2343 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2344 DBG_PRINTF((category, RSM_DEBUG, 2345 "rsm_bind done: cv_wait INTERRUPTED")); 2346 rsmka_release_adapter(adapter); 2347 rsmseglock_release(seg); 2348 return (RSMERR_INTERRUPTED); 2349 } 2350 } 2351 2352 ASSERT(seg->s_state == RSM_STATE_NEW); 2353 2354 ASSERT(seg->s_cookie == NULL); 2355 2356 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc); 2357 if (e == RSM_SUCCESS) { 2358 seg->s_flags |= RSM_USER_MEMORY; 2359 if (msg->perm & RSM_ALLOW_REBIND) { 2360 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND; 2361 } 2362 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) { 2363 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT; 2364 } 2365 seg->s_region.r_vaddr = msg->vaddr; 2366 /* 2367 * Set the s_pid value in the segment structure. This is used 2368 * to identify exported segments belonging to a particular 2369 * process so that when the process exits, these segments can 2370 * be unlocked forcefully even if rsm_close is not called on 2371 * process exit since there maybe other processes referencing 2372 * them (for example on a fork or exec). 2373 * The s_pid value is also used to authenticate the process 2374 * doing a publish or unpublish on the export segment. Only 2375 * the creator of the export segment has a right to do a 2376 * publish or unpublish and unbind on the segment. 2377 */ 2378 seg->s_pid = ddi_get_pid(); 2379 seg->s_len = msg->len; 2380 seg->s_state = RSM_STATE_BIND; 2381 seg->s_adapter = adapter; 2382 seg->s_proc = curproc; 2383 } else { 2384 rsmka_release_adapter(adapter); 2385 DBG_PRINTF((category, RSM_WARNING, 2386 "unable to lock down pages\n")); 2387 } 2388 2389 msg->rnum = seg->s_minor; 2390 /* Unlock segment */ 2391 rsmseglock_release(seg); 2392 2393 if (e == RSM_SUCCESS) { 2394 /* copyout the resource number */ 2395 #ifdef _MULTI_DATAMODEL 2396 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2397 rsm_ioctlmsg32_t msg32; 2398 2399 msg32.rnum = msg->rnum; 2400 if (ddi_copyout((caddr_t)&msg32.rnum, 2401 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum, 2402 sizeof (minor_t), mode)) { 2403 rsmka_release_adapter(adapter); 2404 e = RSMERR_BAD_ADDR; 2405 } 2406 } 2407 #endif 2408 if (ddi_copyout((caddr_t)&msg->rnum, 2409 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum, 2410 sizeof (minor_t), mode)) { 2411 rsmka_release_adapter(adapter); 2412 e = RSMERR_BAD_ADDR; 2413 } 2414 } 2415 2416 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n")); 2417 2418 return (e); 2419 } 2420 2421 static void 2422 rsm_remap_local_importers(rsm_node_id_t src_nodeid, 2423 rsm_memseg_id_t ex_segid, ddi_umem_cookie_t cookie) 2424 { 2425 rsmresource_t *p = NULL; 2426 rsmhash_table_t *rhash = &rsm_import_segs; 2427 uint_t index; 2428 2429 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2430 "rsm_remap_local_importers enter\n")); 2431 2432 index = rsmhash(ex_segid); 2433 2434 rw_enter(&rhash->rsmhash_rw, RW_READER); 2435 2436 p = rsmhash_getbkt(rhash, index); 2437 2438 for (; p; p = p->rsmrc_next) { 2439 rsmseg_t *seg = (rsmseg_t *)p; 2440 rsmseglock_acquire(seg); 2441 /* 2442 * Change the s_cookie value of only the local importers 2443 * which have been mapped (in state RSM_STATE_ACTIVE). 2444 * Note that there is no need to change the s_cookie value 2445 * if the imported segment is in RSM_STATE_MAPPING since 2446 * eventually the s_cookie will be updated via the mapping 2447 * functionality. 2448 */ 2449 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) && 2450 (seg->s_state == RSM_STATE_ACTIVE)) { 2451 seg->s_cookie = cookie; 2452 } 2453 rsmseglock_release(seg); 2454 } 2455 rw_exit(&rhash->rsmhash_rw); 2456 2457 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2458 "rsm_remap_local_importers done\n")); 2459 } 2460 2461 static int 2462 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg) 2463 { 2464 int e; 2465 adapter_t *adapter; 2466 ddi_umem_cookie_t cookie; 2467 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2468 2469 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n")); 2470 2471 /* Check for permissions to rebind */ 2472 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) { 2473 return (RSMERR_REBIND_NOT_ALLOWED); 2474 } 2475 2476 if (seg->s_pid != ddi_get_pid() && 2477 ddi_get_pid() != 0) { 2478 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n")); 2479 return (RSMERR_NOT_CREATOR); 2480 } 2481 2482 /* 2483 * We will not be allowing partial rebind and hence length passed 2484 * in must be same as segment length 2485 */ 2486 if (msg->vaddr == NULL) { 2487 DBG_PRINTF((category, RSM_ERR, 2488 "rsm_rebind done: null msg->vaddr\n")); 2489 return (RSMERR_BAD_ADDR); 2490 } 2491 if (msg->len != seg->s_len) { 2492 DBG_PRINTF((category, RSM_ERR, 2493 "rsm_rebind: invalid length\n")); 2494 return (RSMERR_BAD_LENGTH); 2495 } 2496 2497 /* Lock segment */ 2498 rsmseglock_acquire(seg); 2499 2500 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) || 2501 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 2502 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) { 2503 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2504 rsmseglock_release(seg); 2505 DBG_PRINTF((category, RSM_DEBUG, 2506 "rsm_rebind done: cv_wait INTERRUPTED")); 2507 return (RSMERR_INTERRUPTED); 2508 } 2509 } 2510 2511 /* verify segment state */ 2512 if ((seg->s_state != RSM_STATE_BIND) && 2513 (seg->s_state != RSM_STATE_EXPORT)) { 2514 /* Unlock segment */ 2515 rsmseglock_release(seg); 2516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2517 "rsm_rebind done: invalid state\n")); 2518 return (RSMERR_BAD_SEG_HNDL); 2519 } 2520 2521 ASSERT(seg->s_cookie != NULL); 2522 2523 if (msg->vaddr == seg->s_region.r_vaddr) { 2524 rsmseglock_release(seg); 2525 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2526 return (RSM_SUCCESS); 2527 } 2528 2529 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc); 2530 if (e == RSM_SUCCESS) { 2531 struct buf *xbuf; 2532 dev_t sdev = 0; 2533 rsm_memory_local_t mem; 2534 2535 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE, 2536 sdev, 0, NULL, DDI_UMEM_SLEEP); 2537 ASSERT(xbuf != NULL); 2538 2539 mem.ms_type = RSM_MEM_BUF; 2540 mem.ms_bp = xbuf; 2541 2542 adapter = seg->s_adapter; 2543 e = adapter->rsmpi_ops->rsm_rebind( 2544 seg->s_handle.out, 0, &mem, 2545 RSM_RESOURCE_DONTWAIT, NULL); 2546 2547 if (e == RSM_SUCCESS) { 2548 /* 2549 * unbind the older pages, and unload local importers; 2550 * but don't disconnect importers 2551 */ 2552 (void) rsm_unbind_pages(seg); 2553 seg->s_cookie = cookie; 2554 seg->s_region.r_vaddr = msg->vaddr; 2555 rsm_remap_local_importers(my_nodeid, seg->s_segid, 2556 cookie); 2557 } else { 2558 /* 2559 * Unbind the pages associated with "cookie" by the 2560 * rsm_bind_pages calls prior to this. This is 2561 * similar to what is done in the rsm_unbind_pages 2562 * routine for the seg->s_cookie. 2563 */ 2564 ddi_umem_unlock(cookie); 2565 rsm_dec_pgcnt(btopr(msg->len)); 2566 DBG_PRINTF((category, RSM_ERR, 2567 "rsm_rebind failed with %d\n", e)); 2568 } 2569 /* 2570 * At present there is no dependency on the existence of xbuf. 2571 * So we can free it here. If in the future this changes, it can 2572 * be freed sometime during the segment destroy. 2573 */ 2574 freerbuf(xbuf); 2575 } 2576 2577 /* Unlock segment */ 2578 rsmseglock_release(seg); 2579 2580 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2581 2582 return (e); 2583 } 2584 2585 static int 2586 rsm_unbind(rsmseg_t *seg) 2587 { 2588 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2589 2590 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n")); 2591 2592 rsmseglock_acquire(seg); 2593 2594 /* verify segment state */ 2595 if ((seg->s_state != RSM_STATE_BIND) && 2596 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2597 rsmseglock_release(seg); 2598 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2599 "rsm_unbind: invalid state\n")); 2600 return (RSMERR_BAD_SEG_HNDL); 2601 } 2602 2603 /* unlock current range */ 2604 (void) rsm_unbind_pages(seg); 2605 2606 if (seg->s_state == RSM_STATE_BIND) { 2607 seg->s_state = RSM_STATE_NEW; 2608 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 2609 seg->s_state = RSM_STATE_NEW_QUIESCED; 2610 } 2611 2612 rsmseglock_release(seg); 2613 2614 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n")); 2615 2616 return (RSM_SUCCESS); 2617 } 2618 2619 /* **************************** Exporter Access List Management ******* */ 2620 static void 2621 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len) 2622 { 2623 int acl_sz; 2624 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2625 2626 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n")); 2627 2628 /* acl could be NULL */ 2629 2630 if (acl != NULL && acl_len > 0) { 2631 acl_sz = acl_len * sizeof (rsmapi_access_entry_t); 2632 kmem_free((void *)acl, acl_sz); 2633 } 2634 2635 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n")); 2636 } 2637 2638 static void 2639 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len) 2640 { 2641 int acl_sz; 2642 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2643 2644 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n")); 2645 2646 if (acl != NULL && acl_len > 0) { 2647 acl_sz = acl_len * sizeof (rsm_access_entry_t); 2648 kmem_free((void *)acl, acl_sz); 2649 } 2650 2651 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n")); 2652 2653 } 2654 2655 static int 2656 rsmacl_build(rsm_ioctlmsg_t *msg, int mode, 2657 rsmapi_access_entry_t **list, int *len, int loopback) 2658 { 2659 rsmapi_access_entry_t *acl; 2660 int acl_len; 2661 int i; 2662 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2663 2664 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n")); 2665 2666 *len = 0; 2667 *list = NULL; 2668 2669 acl_len = msg->acl_len; 2670 if ((loopback && acl_len > 1) || (acl_len < 0) || 2671 (acl_len > MAX_NODES)) { 2672 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2673 "rsmacl_build done: acl invalid\n")); 2674 return (RSMERR_BAD_ACL); 2675 } 2676 2677 if (acl_len > 0 && acl_len <= MAX_NODES) { 2678 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t); 2679 2680 acl = kmem_alloc(acl_size, KM_SLEEP); 2681 2682 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl, 2683 acl_size, mode)) { 2684 kmem_free((void *) acl, acl_size); 2685 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2686 "rsmacl_build done: BAD_ADDR\n")); 2687 return (RSMERR_BAD_ADDR); 2688 } 2689 2690 /* 2691 * Verify access list 2692 */ 2693 for (i = 0; i < acl_len; i++) { 2694 if (acl[i].ae_node > MAX_NODES || 2695 (loopback && (acl[i].ae_node != my_nodeid)) || 2696 acl[i].ae_permission > RSM_ACCESS_TRUSTED) { 2697 /* invalid entry */ 2698 kmem_free((void *) acl, acl_size); 2699 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2700 "rsmacl_build done: EINVAL\n")); 2701 return (RSMERR_BAD_ACL); 2702 } 2703 } 2704 2705 *len = acl_len; 2706 *list = acl; 2707 } 2708 2709 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n")); 2710 2711 return (DDI_SUCCESS); 2712 } 2713 2714 static int 2715 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest, 2716 int acl_len, adapter_t *adapter) 2717 { 2718 rsm_access_entry_t *acl; 2719 rsm_addr_t hwaddr; 2720 int i; 2721 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2722 2723 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n")); 2724 2725 if (src != NULL) { 2726 size_t acl_size = acl_len * sizeof (rsm_access_entry_t); 2727 acl = kmem_alloc(acl_size, KM_SLEEP); 2728 2729 /* 2730 * translate access list 2731 */ 2732 for (i = 0; i < acl_len; i++) { 2733 if (src[i].ae_node == my_nodeid) { 2734 acl[i].ae_addr = adapter->hwaddr; 2735 } else { 2736 hwaddr = get_remote_hwaddr(adapter, 2737 src[i].ae_node); 2738 if ((int64_t)hwaddr < 0) { 2739 /* invalid hwaddr */ 2740 kmem_free((void *) acl, acl_size); 2741 DBG_PRINTF((category, 2742 RSM_DEBUG_VERBOSE, 2743 "rsmpiacl_create done:" 2744 "EINVAL hwaddr\n")); 2745 return (RSMERR_INTERNAL_ERROR); 2746 } 2747 acl[i].ae_addr = hwaddr; 2748 } 2749 /* rsmpi understands only RSM_PERM_XXXX */ 2750 acl[i].ae_permission = 2751 src[i].ae_permission & RSM_PERM_RDWR; 2752 } 2753 *dest = acl; 2754 } else { 2755 *dest = NULL; 2756 } 2757 2758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n")); 2759 2760 return (RSM_SUCCESS); 2761 } 2762 2763 static int 2764 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode, 2765 rsmipc_reply_t *reply) 2766 { 2767 2768 int i; 2769 rsmseg_t *seg; 2770 rsm_memseg_id_t key = req->rsmipc_key; 2771 rsm_permission_t perm = req->rsmipc_perm; 2772 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2773 2774 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2775 "rsmsegacl_validate enter\n")); 2776 2777 /* 2778 * Find segment and grab its lock. The reason why we grab the segment 2779 * lock in side the search is to avoid the race when the segment is 2780 * being deleted and we already have a pointer to it. 2781 */ 2782 seg = rsmexport_lookup(key); 2783 if (!seg) { 2784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2785 "rsmsegacl_validate done: %u ENXIO\n", key)); 2786 return (RSMERR_SEG_NOT_PUBLISHED); 2787 } 2788 2789 ASSERT(rsmseglock_held(seg)); 2790 ASSERT(seg->s_state == RSM_STATE_EXPORT); 2791 2792 /* 2793 * We implement a 2-level protection scheme. 2794 * First, we check if local/remote host has access rights. 2795 * Second, we check if the user has access rights. 2796 * 2797 * This routine only validates the rnode access_list 2798 */ 2799 if (seg->s_acl_len > 0) { 2800 /* 2801 * Check host access list 2802 */ 2803 ASSERT(seg->s_acl != NULL); 2804 for (i = 0; i < seg->s_acl_len; i++) { 2805 if (seg->s_acl[i].ae_node == rnode) { 2806 perm &= seg->s_acl[i].ae_permission; 2807 goto found; 2808 } 2809 } 2810 /* rnode is not found in the list */ 2811 rsmseglock_release(seg); 2812 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2813 "rsmsegacl_validate done: EPERM\n")); 2814 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 2815 } else { 2816 /* use default owner creation umask */ 2817 perm &= seg->s_mode; 2818 } 2819 2820 found: 2821 /* update perm for this node */ 2822 reply->rsmipc_mode = perm; 2823 reply->rsmipc_uid = seg->s_uid; 2824 reply->rsmipc_gid = seg->s_gid; 2825 reply->rsmipc_segid = seg->s_segid; 2826 reply->rsmipc_seglen = seg->s_len; 2827 2828 /* 2829 * Perm of requesting node is valid; source will validate user 2830 */ 2831 rsmseglock_release(seg); 2832 2833 /* 2834 * Add the importer to the list right away, if connect fails 2835 * the importer will ask the exporter to remove it. 2836 */ 2837 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr, 2838 req->rsmipc_segment_cookie); 2839 2840 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n")); 2841 2842 return (RSM_SUCCESS); 2843 } 2844 2845 2846 /* ************************** Exporter Calls ************************* */ 2847 2848 static int 2849 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2850 { 2851 int e; 2852 int acl_len; 2853 rsmapi_access_entry_t *acl; 2854 rsm_access_entry_t *rsmpi_acl; 2855 rsm_memory_local_t mem; 2856 struct buf *xbuf; 2857 dev_t sdev = 0; 2858 adapter_t *adapter; 2859 rsm_memseg_id_t segment_id = 0; 2860 int loopback_flag = 0; 2861 int create_flags = 0; 2862 rsm_resource_callback_t callback_flag; 2863 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2864 2865 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n")); 2866 2867 if (seg->s_adapter == &loopback_adapter) 2868 loopback_flag = 1; 2869 2870 if (seg->s_pid != ddi_get_pid() && 2871 ddi_get_pid() != 0) { 2872 DBG_PRINTF((category, RSM_ERR, 2873 "rsm_publish: Not creator\n")); 2874 return (RSMERR_NOT_CREATOR); 2875 } 2876 2877 /* 2878 * Get per node access list 2879 */ 2880 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag); 2881 if (e != DDI_SUCCESS) { 2882 DBG_PRINTF((category, RSM_ERR, 2883 "rsm_publish done: rsmacl_build failed\n")); 2884 return (e); 2885 } 2886 2887 /* 2888 * The application provided msg->key is used for resolving a 2889 * segment id according to the following: 2890 * key = 0 Kernel Agent selects the segment id 2891 * key <= RSM_DLPI_ID_END Reserved for system usage except 2892 * RSMLIB range 2893 * key < RSM_USER_APP_ID_BASE segment id = key 2894 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections 2895 * 2896 * rsm_nextavail_segmentid is initialized to 0x80000000 and 2897 * overflows to zero after 0x80000000 allocations. 2898 * An algorithm is needed which allows reinitialization and provides 2899 * for reallocation after overflow. For now, ENOMEM is returned 2900 * once the overflow condition has occurred. 2901 */ 2902 if (msg->key == 0) { 2903 mutex_enter(&rsm_lock); 2904 segment_id = rsm_nextavail_segmentid; 2905 if (segment_id != 0) { 2906 rsm_nextavail_segmentid++; 2907 mutex_exit(&rsm_lock); 2908 } else { 2909 mutex_exit(&rsm_lock); 2910 DBG_PRINTF((category, RSM_ERR, 2911 "rsm_publish done: no more keys avlbl\n")); 2912 return (RSMERR_INSUFFICIENT_RESOURCES); 2913 } 2914 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END) 2915 /* range reserved for internal use by base/ndi libraries */ 2916 segment_id = msg->key; 2917 else if (msg->key <= RSM_DLPI_ID_END) 2918 return (RSMERR_RESERVED_SEGID); 2919 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1) 2920 segment_id = msg->key; 2921 else { 2922 DBG_PRINTF((category, RSM_ERR, 2923 "rsm_publish done: invalid key %u\n", msg->key)); 2924 return (RSMERR_RESERVED_SEGID); 2925 } 2926 2927 /* Add key to exportlist; The segment lock is held on success */ 2928 e = rsmexport_add(seg, segment_id); 2929 if (e) { 2930 rsmacl_free(acl, acl_len); 2931 DBG_PRINTF((category, RSM_ERR, 2932 "rsm_publish done: export_add failed: %d\n", e)); 2933 return (e); 2934 } 2935 2936 seg->s_segid = segment_id; 2937 2938 if ((seg->s_state != RSM_STATE_BIND) && 2939 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2940 /* state changed since then, free acl and return */ 2941 rsmseglock_release(seg); 2942 rsmexport_rm(seg); 2943 rsmacl_free(acl, acl_len); 2944 DBG_PRINTF((category, RSM_ERR, 2945 "rsm_publish done: segment in wrong state: %d\n", 2946 seg->s_state)); 2947 return (RSMERR_BAD_SEG_HNDL); 2948 } 2949 2950 /* 2951 * If this is for a local memory handle and permissions are zero, 2952 * then the surrogate segment is very large and we want to skip 2953 * allocation of DVMA space. 2954 * 2955 * Careful! If the user didn't use an ACL list, acl will be a NULL 2956 * pointer. Check that before dereferencing it. 2957 */ 2958 if (acl != (rsmapi_access_entry_t *)NULL) { 2959 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 2960 goto skipdriver; 2961 } 2962 2963 /* create segment */ 2964 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE, 2965 sdev, 0, NULL, DDI_UMEM_SLEEP); 2966 ASSERT(xbuf != NULL); 2967 2968 mem.ms_type = RSM_MEM_BUF; 2969 mem.ms_bp = xbuf; 2970 2971 /* This call includes a bind operations */ 2972 2973 adapter = seg->s_adapter; 2974 /* 2975 * create a acl list with hwaddr for RSMPI publish 2976 */ 2977 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter); 2978 2979 if (e != RSM_SUCCESS) { 2980 rsmseglock_release(seg); 2981 rsmexport_rm(seg); 2982 rsmacl_free(acl, acl_len); 2983 freerbuf(xbuf); 2984 DBG_PRINTF((category, RSM_ERR, 2985 "rsm_publish done: rsmpiacl_create failed: %d\n", e)); 2986 return (e); 2987 } 2988 2989 if (seg->s_state == RSM_STATE_BIND) { 2990 /* create segment */ 2991 2992 /* This call includes a bind operations */ 2993 2994 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 2995 create_flags = RSM_ALLOW_UNBIND_REBIND; 2996 } 2997 2998 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 2999 callback_flag = RSM_RESOURCE_DONTWAIT; 3000 } else { 3001 callback_flag = RSM_RESOURCE_SLEEP; 3002 } 3003 3004 e = adapter->rsmpi_ops->rsm_seg_create( 3005 adapter->rsmpi_handle, 3006 &seg->s_handle.out, seg->s_len, 3007 create_flags, &mem, 3008 callback_flag, NULL); 3009 /* 3010 * At present there is no dependency on the existence of xbuf. 3011 * So we can free it here. If in the future this changes, it can 3012 * be freed sometime during the segment destroy. 3013 */ 3014 freerbuf(xbuf); 3015 3016 if (e != RSM_SUCCESS) { 3017 rsmseglock_release(seg); 3018 rsmexport_rm(seg); 3019 rsmacl_free(acl, acl_len); 3020 rsmpiacl_free(rsmpi_acl, acl_len); 3021 DBG_PRINTF((category, RSM_ERR, 3022 "rsm_publish done: export_create failed: %d\n", e)); 3023 /* 3024 * The following assertion ensures that the two errors 3025 * related to the length and its alignment do not occur 3026 * since they have been checked during export_create 3027 */ 3028 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT && 3029 e != RSMERR_BAD_LENGTH); 3030 if (e == RSMERR_NOT_MEM) 3031 e = RSMERR_INSUFFICIENT_MEM; 3032 3033 return (e); 3034 } 3035 /* export segment, this should create an IMMU mapping */ 3036 e = adapter->rsmpi_ops->rsm_publish( 3037 seg->s_handle.out, 3038 rsmpi_acl, acl_len, 3039 seg->s_segid, 3040 RSM_RESOURCE_DONTWAIT, NULL); 3041 3042 if (e != RSM_SUCCESS) { 3043 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3044 rsmseglock_release(seg); 3045 rsmexport_rm(seg); 3046 rsmacl_free(acl, acl_len); 3047 rsmpiacl_free(rsmpi_acl, acl_len); 3048 DBG_PRINTF((category, RSM_ERR, 3049 "rsm_publish done: export_publish failed: %d\n", 3050 e)); 3051 return (e); 3052 } 3053 } 3054 3055 seg->s_acl_in = rsmpi_acl; 3056 3057 skipdriver: 3058 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */ 3059 seg->s_acl_len = acl_len; 3060 seg->s_acl = acl; 3061 3062 if (seg->s_state == RSM_STATE_BIND) { 3063 seg->s_state = RSM_STATE_EXPORT; 3064 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 3065 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 3066 cv_broadcast(&seg->s_cv); 3067 } 3068 3069 rsmseglock_release(seg); 3070 3071 /* 3072 * If the segment id was solicited, then return it in 3073 * the original incoming message. 3074 */ 3075 if (msg->key == 0) { 3076 msg->key = segment_id; 3077 #ifdef _MULTI_DATAMODEL 3078 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 3079 rsm_ioctlmsg32_t msg32; 3080 3081 msg32.key = msg->key; 3082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3083 "rsm_publish done\n")); 3084 return (ddi_copyout((caddr_t)&msg32, 3085 (caddr_t)dataptr, sizeof (msg32), mode)); 3086 } 3087 #endif 3088 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3089 "rsm_publish done\n")); 3090 return (ddi_copyout((caddr_t)msg, 3091 (caddr_t)dataptr, sizeof (*msg), mode)); 3092 } 3093 3094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n")); 3095 return (DDI_SUCCESS); 3096 } 3097 3098 /* 3099 * This function modifies the access control list of an already published 3100 * segment. There is no effect on import segments which are already 3101 * connected. 3102 */ 3103 static int 3104 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode) 3105 { 3106 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl; 3107 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl; 3108 int new_acl_len, old_acl_len, tmp_acl_len; 3109 int e, i; 3110 adapter_t *adapter; 3111 int loopback_flag = 0; 3112 rsm_memseg_id_t key; 3113 rsm_permission_t permission; 3114 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3115 3116 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n")); 3117 3118 if ((seg->s_state != RSM_STATE_EXPORT) && 3119 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) && 3120 (seg->s_state != RSM_STATE_EXPORT_QUIESCING)) 3121 return (RSMERR_SEG_NOT_PUBLISHED); 3122 3123 if (seg->s_pid != ddi_get_pid() && 3124 ddi_get_pid() != 0) { 3125 DBG_PRINTF((category, RSM_ERR, 3126 "rsm_republish: Not owner\n")); 3127 return (RSMERR_NOT_CREATOR); 3128 } 3129 3130 if (seg->s_adapter == &loopback_adapter) 3131 loopback_flag = 1; 3132 3133 /* 3134 * Build new list first 3135 */ 3136 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag); 3137 if (e) { 3138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3139 "rsm_republish done: rsmacl_build failed %d", e)); 3140 return (e); 3141 } 3142 3143 /* Lock segment */ 3144 rsmseglock_acquire(seg); 3145 /* 3146 * a republish is in progress - REPUBLISH message is being 3147 * sent to the importers so wait for it to complete OR 3148 * wait till DR completes 3149 */ 3150 while (((seg->s_state == RSM_STATE_EXPORT) && 3151 (seg->s_flags & RSM_REPUBLISH_WAIT)) || 3152 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) || 3153 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) { 3154 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3155 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3156 "rsm_republish done: cv_wait INTERRUPTED")); 3157 rsmseglock_release(seg); 3158 rsmacl_free(new_acl, new_acl_len); 3159 return (RSMERR_INTERRUPTED); 3160 } 3161 } 3162 3163 /* recheck if state is valid */ 3164 if (seg->s_state != RSM_STATE_EXPORT) { 3165 rsmseglock_release(seg); 3166 rsmacl_free(new_acl, new_acl_len); 3167 return (RSMERR_SEG_NOT_PUBLISHED); 3168 } 3169 3170 key = seg->s_key; 3171 old_acl = seg->s_acl; 3172 old_acl_len = seg->s_acl_len; 3173 3174 seg->s_acl = new_acl; 3175 seg->s_acl_len = new_acl_len; 3176 3177 /* 3178 * This call will only be meaningful if and when the interconnect 3179 * layer makes use of the access list 3180 */ 3181 adapter = seg->s_adapter; 3182 /* 3183 * create a acl list with hwaddr for RSMPI publish 3184 */ 3185 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter); 3186 3187 if (e != RSM_SUCCESS) { 3188 seg->s_acl = old_acl; 3189 seg->s_acl_len = old_acl_len; 3190 rsmseglock_release(seg); 3191 rsmacl_free(new_acl, new_acl_len); 3192 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3193 "rsm_republish done: rsmpiacl_create failed %d", e)); 3194 return (e); 3195 } 3196 rsmpi_old_acl = seg->s_acl_in; 3197 seg->s_acl_in = rsmpi_new_acl; 3198 3199 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out, 3200 seg->s_acl_in, seg->s_acl_len, 3201 RSM_RESOURCE_DONTWAIT, NULL); 3202 3203 if (e != RSM_SUCCESS) { 3204 seg->s_acl = old_acl; 3205 seg->s_acl_in = rsmpi_old_acl; 3206 seg->s_acl_len = old_acl_len; 3207 rsmseglock_release(seg); 3208 rsmacl_free(new_acl, new_acl_len); 3209 rsmpiacl_free(rsmpi_new_acl, new_acl_len); 3210 3211 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3212 "rsm_republish done: rsmpi republish failed %d\n", e)); 3213 return (e); 3214 } 3215 3216 /* create a tmp copy of the new acl */ 3217 tmp_acl_len = new_acl_len; 3218 if (tmp_acl_len > 0) { 3219 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP); 3220 for (i = 0; i < tmp_acl_len; i++) { 3221 tmp_acl[i].ae_node = new_acl[i].ae_node; 3222 tmp_acl[i].ae_permission = new_acl[i].ae_permission; 3223 } 3224 /* 3225 * The default permission of a node which was in the old 3226 * ACL but not in the new ACL is 0 ie no access. 3227 */ 3228 permission = 0; 3229 } else { 3230 /* 3231 * NULL acl means all importers can connect and 3232 * default permission will be owner creation umask 3233 */ 3234 tmp_acl = NULL; 3235 permission = seg->s_mode; 3236 } 3237 3238 /* make other republishers to wait for republish to complete */ 3239 seg->s_flags |= RSM_REPUBLISH_WAIT; 3240 3241 rsmseglock_release(seg); 3242 3243 /* send the new perms to the importing nodes */ 3244 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission); 3245 3246 rsmseglock_acquire(seg); 3247 seg->s_flags &= ~RSM_REPUBLISH_WAIT; 3248 /* wake up any one waiting for republish to complete */ 3249 cv_broadcast(&seg->s_cv); 3250 rsmseglock_release(seg); 3251 3252 rsmacl_free(tmp_acl, tmp_acl_len); 3253 rsmacl_free(old_acl, old_acl_len); 3254 rsmpiacl_free(rsmpi_old_acl, old_acl_len); 3255 3256 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n")); 3257 return (DDI_SUCCESS); 3258 } 3259 3260 static int 3261 rsm_unpublish(rsmseg_t *seg, int mode) 3262 { 3263 rsmapi_access_entry_t *acl; 3264 rsm_access_entry_t *rsmpi_acl; 3265 int acl_len; 3266 int e; 3267 adapter_t *adapter; 3268 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3269 3270 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n")); 3271 3272 if (seg->s_pid != ddi_get_pid() && 3273 ddi_get_pid() != 0) { 3274 DBG_PRINTF((category, RSM_ERR, 3275 "rsm_unpublish: Not creator\n")); 3276 return (RSMERR_NOT_CREATOR); 3277 } 3278 3279 rsmseglock_acquire(seg); 3280 /* 3281 * wait for QUIESCING to complete here before rsmexport_rm 3282 * is called because the SUSPEND_COMPLETE mesg which changes 3283 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and 3284 * signals the cv_wait needs to find it in the hashtable. 3285 */ 3286 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 3287 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) { 3288 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3289 rsmseglock_release(seg); 3290 DBG_PRINTF((category, RSM_ERR, 3291 "rsm_unpublish done: cv_wait INTR qscing" 3292 "getv/putv in progress")); 3293 return (RSMERR_INTERRUPTED); 3294 } 3295 } 3296 3297 /* verify segment state */ 3298 if ((seg->s_state != RSM_STATE_EXPORT) && 3299 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3300 rsmseglock_release(seg); 3301 DBG_PRINTF((category, RSM_ERR, 3302 "rsm_unpublish done: bad state %x\n", seg->s_state)); 3303 return (RSMERR_SEG_NOT_PUBLISHED); 3304 } 3305 3306 rsmseglock_release(seg); 3307 3308 rsmexport_rm(seg); 3309 3310 rsm_send_importer_disconnects(seg->s_segid, my_nodeid); 3311 3312 rsmseglock_acquire(seg); 3313 /* 3314 * wait for republish to complete 3315 */ 3316 while ((seg->s_state == RSM_STATE_EXPORT) && 3317 (seg->s_flags & RSM_REPUBLISH_WAIT)) { 3318 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3319 DBG_PRINTF((category, RSM_ERR, 3320 "rsm_unpublish done: cv_wait INTR repubing")); 3321 rsmseglock_release(seg); 3322 return (RSMERR_INTERRUPTED); 3323 } 3324 } 3325 3326 if ((seg->s_state != RSM_STATE_EXPORT) && 3327 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3328 DBG_PRINTF((category, RSM_ERR, 3329 "rsm_unpublish done: invalid state")); 3330 rsmseglock_release(seg); 3331 return (RSMERR_SEG_NOT_PUBLISHED); 3332 } 3333 3334 /* 3335 * check for putv/get surrogate segment which was not published 3336 * to the driver. 3337 * 3338 * Be certain to see if there is an ACL first! If this segment was 3339 * not published with an ACL, acl will be a null pointer. Check 3340 * that before dereferencing it. 3341 */ 3342 acl = seg->s_acl; 3343 if (acl != (rsmapi_access_entry_t *)NULL) { 3344 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 3345 goto bypass; 3346 } 3347 3348 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */ 3349 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) 3350 goto bypass; 3351 3352 adapter = seg->s_adapter; 3353 for (;;) { 3354 if (seg->s_state != RSM_STATE_EXPORT) { 3355 rsmseglock_release(seg); 3356 DBG_PRINTF((category, RSM_ERR, 3357 "rsm_unpublish done: bad state %x\n", 3358 seg->s_state)); 3359 return (RSMERR_SEG_NOT_PUBLISHED); 3360 } 3361 3362 /* unpublish from adapter */ 3363 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out); 3364 3365 if (e == RSM_SUCCESS) { 3366 break; 3367 } 3368 3369 if (e == RSMERR_SEG_IN_USE && mode == 1) { 3370 /* 3371 * wait for unpublish to succeed, it's busy. 3372 */ 3373 seg->s_flags |= RSM_EXPORT_WAIT; 3374 3375 /* wait for a max of 1 ms - this is an empirical */ 3376 /* value that was found by some minimal testing */ 3377 /* can be fine tuned when we have better numbers */ 3378 /* A long term fix would be to send cv_signal */ 3379 /* from the intr callback routine */ 3380 /* currently nobody signals this wait */ 3381 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock, 3382 drv_usectohz(1000), TR_CLOCK_TICK); 3383 3384 DBG_PRINTF((category, RSM_ERR, 3385 "rsm_unpublish: SEG_IN_USE\n")); 3386 3387 seg->s_flags &= ~RSM_EXPORT_WAIT; 3388 } else { 3389 if (mode == 1) { 3390 DBG_PRINTF((category, RSM_ERR, 3391 "rsm:rsmpi unpublish err %x\n", e)); 3392 seg->s_state = RSM_STATE_BIND; 3393 } 3394 rsmseglock_release(seg); 3395 return (e); 3396 } 3397 } 3398 3399 /* Free segment */ 3400 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3401 3402 if (e != RSM_SUCCESS) { 3403 DBG_PRINTF((category, RSM_ERR, 3404 "rsm_unpublish: rsmpi destroy key=%x failed %x\n", 3405 seg->s_key, e)); 3406 } 3407 3408 bypass: 3409 acl = seg->s_acl; 3410 rsmpi_acl = seg->s_acl_in; 3411 acl_len = seg->s_acl_len; 3412 3413 seg->s_acl = NULL; 3414 seg->s_acl_in = NULL; 3415 seg->s_acl_len = 0; 3416 3417 if (seg->s_state == RSM_STATE_EXPORT) { 3418 seg->s_state = RSM_STATE_BIND; 3419 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) { 3420 seg->s_state = RSM_STATE_BIND_QUIESCED; 3421 cv_broadcast(&seg->s_cv); 3422 } 3423 3424 rsmseglock_release(seg); 3425 3426 rsmacl_free(acl, acl_len); 3427 rsmpiacl_free(rsmpi_acl, acl_len); 3428 3429 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n")); 3430 3431 return (DDI_SUCCESS); 3432 } 3433 3434 /* 3435 * Called from rsm_unpublish to force an unload and disconnection of all 3436 * importers of the unpublished segment. 3437 * 3438 * First build the list of segments requiring a force disconnect, then 3439 * send a request for each. 3440 */ 3441 static void 3442 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid, 3443 rsm_node_id_t ex_nodeid) 3444 { 3445 rsmipc_request_t request; 3446 importing_token_t *prev_token, *token, *tmp_token, *tokp; 3447 importing_token_t *force_disconnect_list = NULL; 3448 int index; 3449 3450 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3451 "rsm_send_importer_disconnects enter\n")); 3452 3453 index = rsmhash(ex_segid); 3454 3455 mutex_enter(&importer_list.lock); 3456 3457 prev_token = NULL; 3458 token = importer_list.bucket[index]; 3459 3460 while (token != NULL) { 3461 if (token->key == ex_segid) { 3462 /* 3463 * take it off the importer list and add it 3464 * to the force disconnect list. 3465 */ 3466 if (prev_token == NULL) 3467 importer_list.bucket[index] = token->next; 3468 else 3469 prev_token->next = token->next; 3470 tmp_token = token; 3471 token = token->next; 3472 if (force_disconnect_list == NULL) { 3473 force_disconnect_list = tmp_token; 3474 tmp_token->next = NULL; 3475 } else { 3476 tokp = force_disconnect_list; 3477 /* 3478 * make sure that the tmp_token's node 3479 * is not already on the force disconnect 3480 * list. 3481 */ 3482 while (tokp != NULL) { 3483 if (tokp->importing_node == 3484 tmp_token->importing_node) { 3485 break; 3486 } 3487 tokp = tokp->next; 3488 } 3489 if (tokp == NULL) { 3490 tmp_token->next = 3491 force_disconnect_list; 3492 force_disconnect_list = tmp_token; 3493 } else { 3494 kmem_free((void *)tmp_token, 3495 sizeof (*token)); 3496 } 3497 } 3498 3499 } else { 3500 prev_token = token; 3501 token = token->next; 3502 } 3503 } 3504 mutex_exit(&importer_list.lock); 3505 3506 token = force_disconnect_list; 3507 while (token != NULL) { 3508 if (token->importing_node == my_nodeid) { 3509 rsm_force_unload(ex_nodeid, ex_segid, 3510 DISCONNECT); 3511 } else { 3512 request.rsmipc_hdr.rsmipc_type = 3513 RSMIPC_MSG_DISCONNECT; 3514 request.rsmipc_key = token->key; 3515 for (;;) { 3516 if (rsmipc_send(token->importing_node, 3517 &request, 3518 RSM_NO_REPLY) == RSM_SUCCESS) { 3519 break; 3520 } else { 3521 delay(drv_usectohz(10000)); 3522 } 3523 } 3524 } 3525 tmp_token = token; 3526 token = token->next; 3527 kmem_free((void *)tmp_token, sizeof (*token)); 3528 } 3529 3530 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3531 "rsm_send_importer_disconnects done\n")); 3532 } 3533 3534 /* 3535 * This function is used as a callback for unlocking the pages locked 3536 * down by a process which then does a fork or an exec. 3537 * It marks the export segments corresponding to umem cookie given by 3538 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be 3539 * destroyed later when an rsm_close occurs). 3540 */ 3541 static void 3542 rsm_export_force_destroy(ddi_umem_cookie_t *ck) 3543 { 3544 rsmresource_blk_t *blk; 3545 rsmresource_t *p; 3546 rsmseg_t *eseg = NULL; 3547 int i, j; 3548 int found = 0; 3549 3550 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3551 "rsm_export_force_destroy enter\n")); 3552 3553 /* 3554 * Walk the resource list and locate the export segment (either 3555 * in the BIND or the EXPORT state) which corresponds to the 3556 * ddi_umem_cookie_t being freed up, and call rsmseg_close. 3557 * Change the state to ZOMBIE by calling rsmseg_close with the 3558 * force_flag argument (the second argument) set to 1. Also, 3559 * unpublish and unbind the segment, but don't free it. Free it 3560 * only on a rsm_close call for the segment. 3561 */ 3562 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 3563 3564 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 3565 blk = rsm_resource.rsmrc_root[i]; 3566 if (blk == NULL) { 3567 continue; 3568 } 3569 3570 for (j = 0; j < RSMRC_BLKSZ; j++) { 3571 p = blk->rsmrcblk_blks[j]; 3572 if ((p != NULL) && (p != RSMRC_RESERVED) && 3573 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) { 3574 eseg = (rsmseg_t *)p; 3575 if (eseg->s_cookie != ck) 3576 continue; /* continue searching */ 3577 /* 3578 * Found the segment, set flag to indicate 3579 * force destroy processing is in progress 3580 */ 3581 rsmseglock_acquire(eseg); 3582 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT; 3583 rsmseglock_release(eseg); 3584 found = 1; 3585 break; 3586 } 3587 } 3588 3589 if (found) 3590 break; 3591 } 3592 3593 rw_exit(&rsm_resource.rsmrc_lock); 3594 3595 if (found) { 3596 ASSERT(eseg != NULL); 3597 /* call rsmseg_close with force flag set to 1 */ 3598 rsmseg_close(eseg, 1); 3599 /* 3600 * force destroy processing done, clear flag and signal any 3601 * thread waiting in rsmseg_close. 3602 */ 3603 rsmseglock_acquire(eseg); 3604 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT; 3605 cv_broadcast(&eseg->s_cv); 3606 rsmseglock_release(eseg); 3607 } 3608 3609 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3610 "rsm_export_force_destroy done\n")); 3611 } 3612 3613 /* ******************************* Remote Calls *********************** */ 3614 static void 3615 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req) 3616 { 3617 rsmipc_reply_t reply; 3618 DBG_DEFINE(category, 3619 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3620 3621 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3622 "rsm_intr_segconnect enter\n")); 3623 3624 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply); 3625 3626 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 3627 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie; 3628 3629 (void) rsmipc_send(src, NULL, &reply); 3630 3631 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3632 "rsm_intr_segconnect done\n")); 3633 } 3634 3635 3636 /* 3637 * When an exported segment is unpublished the exporter sends an ipc 3638 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher 3639 * calls this function. The import list is scanned; segments which match the 3640 * exported segment id are unloaded and disconnected. 3641 * 3642 * Will also be called from rsm_rebind with disconnect_flag FALSE. 3643 * 3644 */ 3645 static void 3646 rsm_force_unload(rsm_node_id_t src_nodeid, rsm_memseg_id_t ex_segid, 3647 boolean_t disconnect_flag) 3648 { 3649 rsmresource_t *p = NULL; 3650 rsmhash_table_t *rhash = &rsm_import_segs; 3651 uint_t index; 3652 DBG_DEFINE(category, 3653 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3654 3655 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n")); 3656 3657 index = rsmhash(ex_segid); 3658 3659 rw_enter(&rhash->rsmhash_rw, RW_READER); 3660 3661 p = rsmhash_getbkt(rhash, index); 3662 3663 for (; p; p = p->rsmrc_next) { 3664 rsmseg_t *seg = (rsmseg_t *)p; 3665 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) { 3666 /* 3667 * In order to make rsmseg_unload and rsm_force_unload 3668 * thread safe, acquire the segment lock here. 3669 * rsmseg_unload is responsible for releasing the lock. 3670 * rsmseg_unload releases the lock just before a call 3671 * to rsmipc_send or in case of an early exit which 3672 * occurs if the segment was in the state 3673 * RSM_STATE_CONNECTING or RSM_STATE_NEW. 3674 */ 3675 rsmseglock_acquire(seg); 3676 if (disconnect_flag) 3677 seg->s_flags |= RSM_FORCE_DISCONNECT; 3678 rsmseg_unload(seg); 3679 } 3680 } 3681 rw_exit(&rhash->rsmhash_rw); 3682 3683 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n")); 3684 } 3685 3686 static void 3687 rsm_intr_reply(rsmipc_msghdr_t *msg) 3688 { 3689 /* 3690 * Find slot for cookie in reply. 3691 * Match sequence with sequence in cookie 3692 * If no match; return 3693 * Try to grap lock of slot, if locked return 3694 * copy data into reply slot area 3695 * signal waiter 3696 */ 3697 rsmipc_slot_t *slot; 3698 rsmipc_cookie_t *cookie; 3699 void *data = (void *) msg; 3700 size_t size = sizeof (rsmipc_reply_t); 3701 DBG_DEFINE(category, 3702 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3703 3704 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n")); 3705 3706 cookie = &msg->rsmipc_cookie; 3707 if (cookie->ic.index >= RSMIPC_SZ) { 3708 DBG_PRINTF((category, RSM_ERR, 3709 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index)); 3710 return; 3711 } 3712 3713 ASSERT(cookie->ic.index < RSMIPC_SZ); 3714 slot = &rsm_ipc.slots[cookie->ic.index]; 3715 mutex_enter(&slot->rsmipc_lock); 3716 if (slot->rsmipc_cookie.value == cookie->value) { 3717 /* found a match */ 3718 if (RSMIPC_GET(slot, RSMIPC_PENDING)) { 3719 bcopy(data, slot->rsmipc_data, size); 3720 RSMIPC_CLEAR(slot, RSMIPC_PENDING); 3721 cv_signal(&slot->rsmipc_cv); 3722 } 3723 } else { 3724 DBG_PRINTF((category, RSM_DEBUG, 3725 "rsm: rsm_intr_reply mismatched reply %d\n", 3726 cookie->ic.index)); 3727 } 3728 mutex_exit(&slot->rsmipc_lock); 3729 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n")); 3730 } 3731 3732 /* 3733 * This function gets dispatched on the worker thread when we receive 3734 * the SQREADY message. This function sends the SQREADY_ACK message. 3735 */ 3736 static void 3737 rsm_sqready_ack_deferred(void *arg) 3738 { 3739 path_t *path = (path_t *)arg; 3740 DBG_DEFINE(category, 3741 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3742 3743 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3744 "rsm_sqready_ack_deferred enter\n")); 3745 3746 mutex_enter(&path->mutex); 3747 3748 /* 3749 * If path is not active no point in sending the ACK 3750 * because the whole SQREADY protocol will again start 3751 * when the path becomes active. 3752 */ 3753 if (path->state != RSMKA_PATH_ACTIVE) { 3754 /* 3755 * decrement the path refcnt incremented in rsm_proc_sqready 3756 */ 3757 PATH_RELE_NOLOCK(path); 3758 mutex_exit(&path->mutex); 3759 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3760 "rsm_sqready_ack_deferred done:!ACTIVE\n")); 3761 return; 3762 } 3763 3764 /* send an SQREADY_ACK message */ 3765 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK); 3766 3767 /* initialize credits to the max level */ 3768 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3769 3770 /* wake up any send that is waiting for credits */ 3771 cv_broadcast(&path->sendq_token.sendq_cv); 3772 3773 /* 3774 * decrement the path refcnt since we incremented it in 3775 * rsm_proc_sqready 3776 */ 3777 PATH_RELE_NOLOCK(path); 3778 3779 mutex_exit(&path->mutex); 3780 3781 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3782 "rsm_sqready_ack_deferred done\n")); 3783 } 3784 3785 /* 3786 * Process the SQREADY message 3787 */ 3788 static void 3789 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3790 rsm_intr_hand_arg_t arg) 3791 { 3792 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3793 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3794 path_t *path; 3795 DBG_DEFINE(category, 3796 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3797 3798 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n")); 3799 3800 /* look up the path - incr the path refcnt */ 3801 path = rsm_find_path(hdlr_argp->adapter_name, 3802 hdlr_argp->adapter_instance, src_hwaddr); 3803 3804 /* 3805 * No path exists or path is not active - drop the message 3806 */ 3807 if (path == NULL) { 3808 DBG_PRINTF((category, RSM_DEBUG, 3809 "rsm_proc_sqready done: msg dropped no path\n")); 3810 return; 3811 } 3812 3813 mutex_exit(&path->mutex); 3814 3815 /* drain any tasks from the previous incarnation */ 3816 taskq_wait(path->recv_taskq); 3817 3818 mutex_enter(&path->mutex); 3819 /* 3820 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK 3821 * in the meanwhile we received an SQREADY message, blindly reset 3822 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK 3823 * and forget about the SQREADY that we sent. 3824 */ 3825 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3826 3827 if (path->state != RSMKA_PATH_ACTIVE) { 3828 /* decr refcnt and drop the mutex */ 3829 PATH_RELE_NOLOCK(path); 3830 mutex_exit(&path->mutex); 3831 DBG_PRINTF((category, RSM_DEBUG, 3832 "rsm_proc_sqready done: msg dropped path !ACTIVE\n")); 3833 return; 3834 } 3835 3836 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx " 3837 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3838 3839 /* 3840 * The sender's local incarnation number is our remote incarnation 3841 * number save it in the path data structure 3842 */ 3843 path->remote_incn = msg->rsmipc_local_incn; 3844 path->sendq_token.msgbuf_avail = 0; 3845 path->procmsg_cnt = 0; 3846 3847 /* 3848 * path is active - dispatch task to send SQREADY_ACK - remember 3849 * RSMPI calls can't be done in interrupt context 3850 * 3851 * We can use the recv_taskq to send because the remote endpoint 3852 * cannot start sending messages till it receives SQREADY_ACK hence 3853 * at this point there are no tasks on recv_taskq. 3854 * 3855 * The path refcnt will be decremented in rsm_sqready_ack_deferred. 3856 */ 3857 (void) taskq_dispatch(path->recv_taskq, 3858 rsm_sqready_ack_deferred, path, KM_NOSLEEP); 3859 3860 mutex_exit(&path->mutex); 3861 3862 3863 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n")); 3864 } 3865 3866 /* 3867 * Process the SQREADY_ACK message 3868 */ 3869 static void 3870 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3871 rsm_intr_hand_arg_t arg) 3872 { 3873 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3874 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3875 path_t *path; 3876 DBG_DEFINE(category, 3877 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3878 3879 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3880 "rsm_proc_sqready_ack enter\n")); 3881 3882 /* look up the path - incr the path refcnt */ 3883 path = rsm_find_path(hdlr_argp->adapter_name, 3884 hdlr_argp->adapter_instance, src_hwaddr); 3885 3886 /* 3887 * drop the message if - no path exists or path is not active 3888 * or if its not waiting for SQREADY_ACK message 3889 */ 3890 if (path == NULL) { 3891 DBG_PRINTF((category, RSM_DEBUG, 3892 "rsm_proc_sqready_ack done: msg dropped no path\n")); 3893 return; 3894 } 3895 3896 if ((path->state != RSMKA_PATH_ACTIVE) || 3897 !(path->flags & RSMKA_WAIT_FOR_SQACK)) { 3898 /* decrement the refcnt */ 3899 PATH_RELE_NOLOCK(path); 3900 mutex_exit(&path->mutex); 3901 DBG_PRINTF((category, RSM_DEBUG, 3902 "rsm_proc_sqready_ack done: msg dropped\n")); 3903 return; 3904 } 3905 3906 /* 3907 * Check if this message is in response to the last RSMIPC_MSG_SQREADY 3908 * sent, if not drop it. 3909 */ 3910 if (path->local_incn != msghdr->rsmipc_incn) { 3911 /* decrement the refcnt */ 3912 PATH_RELE_NOLOCK(path); 3913 mutex_exit(&path->mutex); 3914 DBG_PRINTF((category, RSM_DEBUG, 3915 "rsm_proc_sqready_ack done: msg old incn %lld\n", 3916 msghdr->rsmipc_incn)); 3917 return; 3918 } 3919 3920 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx " 3921 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3922 3923 /* 3924 * clear the WAIT_FOR_SQACK flag since we have recvd the ack 3925 */ 3926 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3927 3928 /* save the remote sendq incn number */ 3929 path->remote_incn = msg->rsmipc_local_incn; 3930 3931 /* initialize credits to the max level */ 3932 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3933 3934 /* wake up any send that is waiting for credits */ 3935 cv_broadcast(&path->sendq_token.sendq_cv); 3936 3937 /* decrement the refcnt */ 3938 PATH_RELE_NOLOCK(path); 3939 3940 mutex_exit(&path->mutex); 3941 3942 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3943 "rsm_proc_sqready_ack done\n")); 3944 } 3945 3946 /* 3947 * process the RSMIPC_MSG_CREDIT message 3948 */ 3949 static void 3950 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3951 rsm_intr_hand_arg_t arg) 3952 { 3953 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3954 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3955 path_t *path; 3956 DBG_DEFINE(category, 3957 RSM_KERNEL_AGENT | RSM_FUNC_ALL | 3958 RSM_INTR_CALLBACK | RSM_FLOWCONTROL); 3959 3960 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n")); 3961 3962 /* look up the path - incr the path refcnt */ 3963 path = rsm_find_path(hdlr_argp->adapter_name, 3964 hdlr_argp->adapter_instance, src_hwaddr); 3965 3966 if (path == NULL) { 3967 DBG_PRINTF((category, RSM_DEBUG, 3968 "rsm_add_credits enter: path not found\n")); 3969 return; 3970 } 3971 3972 /* the path is not active - discard credits */ 3973 if (path->state != RSMKA_PATH_ACTIVE) { 3974 PATH_RELE_NOLOCK(path); 3975 mutex_exit(&path->mutex); 3976 DBG_PRINTF((category, RSM_DEBUG, 3977 "rsm_add_credits enter:path=%lx !ACTIVE\n", path)); 3978 return; 3979 } 3980 3981 /* 3982 * Check if these credits are for current incarnation of the path. 3983 */ 3984 if (path->local_incn != msghdr->rsmipc_incn) { 3985 /* decrement the refcnt */ 3986 PATH_RELE_NOLOCK(path); 3987 mutex_exit(&path->mutex); 3988 DBG_PRINTF((category, RSM_DEBUG, 3989 "rsm_add_credits enter: old incn %lld\n", 3990 msghdr->rsmipc_incn)); 3991 return; 3992 } 3993 3994 DBG_PRINTF((category, RSM_DEBUG, 3995 "rsm_add_credits:path=%lx new-creds=%d " 3996 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits, 3997 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src, 3998 src_hwaddr)); 3999 4000 4001 /* add credits to the path's sendq */ 4002 path->sendq_token.msgbuf_avail += msg->rsmipc_credits; 4003 4004 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES); 4005 4006 /* wake up any send that is waiting for credits */ 4007 cv_broadcast(&path->sendq_token.sendq_cv); 4008 4009 /* decrement the refcnt */ 4010 PATH_RELE_NOLOCK(path); 4011 4012 mutex_exit(&path->mutex); 4013 4014 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n")); 4015 } 4016 4017 static void 4018 rsm_intr_event(rsmipc_request_t *msg) 4019 { 4020 rsmseg_t *seg; 4021 rsmresource_t *p; 4022 rsm_node_id_t src_node; 4023 DBG_DEFINE(category, 4024 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4025 4026 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n")); 4027 4028 src_node = msg->rsmipc_hdr.rsmipc_src; 4029 4030 if ((seg = msg->rsmipc_segment_cookie) != NULL) { 4031 /* This is for an import segment */ 4032 uint_t hashval = rsmhash(msg->rsmipc_key); 4033 4034 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4035 4036 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4037 4038 for (; p; p = p->rsmrc_next) { 4039 if ((p->rsmrc_key == msg->rsmipc_key) && 4040 (p->rsmrc_node == src_node)) { 4041 seg = (rsmseg_t *)p; 4042 rsmseglock_acquire(seg); 4043 4044 atomic_inc_32(&seg->s_pollevent); 4045 4046 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4047 pollwakeup(&seg->s_poll, POLLRDNORM); 4048 4049 rsmseglock_release(seg); 4050 } 4051 } 4052 4053 rw_exit(&rsm_import_segs.rsmhash_rw); 4054 } else { 4055 /* This is for an export segment */ 4056 seg = rsmexport_lookup(msg->rsmipc_key); 4057 if (!seg) { 4058 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4059 "rsm_intr_event done: exp seg not found\n")); 4060 return; 4061 } 4062 4063 ASSERT(rsmseglock_held(seg)); 4064 4065 atomic_inc_32(&seg->s_pollevent); 4066 4067 /* 4068 * We must hold the segment lock here, or else the segment 4069 * can be freed while pollwakeup is using it. This implies 4070 * that we MUST NOT grab the segment lock during rsm_chpoll, 4071 * as outlined in the chpoll(2) man page. 4072 */ 4073 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4074 pollwakeup(&seg->s_poll, POLLRDNORM); 4075 4076 rsmseglock_release(seg); 4077 } 4078 4079 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n")); 4080 } 4081 4082 /* 4083 * The exporter did a republish and changed the ACL - this change is only 4084 * visible to new importers. 4085 */ 4086 static void 4087 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key, 4088 rsm_permission_t perm) 4089 { 4090 4091 rsmresource_t *p; 4092 rsmseg_t *seg; 4093 uint_t hashval = rsmhash(key); 4094 DBG_DEFINE(category, 4095 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4096 4097 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n")); 4098 4099 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4100 4101 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4102 4103 for (; p; p = p->rsmrc_next) { 4104 /* 4105 * find the importer and update the permission in the shared 4106 * data structure. Any new importers will use the new perms 4107 */ 4108 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) { 4109 seg = (rsmseg_t *)p; 4110 4111 rsmseglock_acquire(seg); 4112 rsmsharelock_acquire(seg); 4113 seg->s_share->rsmsi_mode = perm; 4114 rsmsharelock_release(seg); 4115 rsmseglock_release(seg); 4116 4117 break; 4118 } 4119 } 4120 4121 rw_exit(&rsm_import_segs.rsmhash_rw); 4122 4123 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n")); 4124 } 4125 4126 void 4127 rsm_suspend_complete(rsm_node_id_t src_node, int flag) 4128 { 4129 int done = 1; /* indicate all SUSPENDS have been acked */ 4130 list_element_t *elem; 4131 DBG_DEFINE(category, 4132 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4133 4134 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4135 "rsm_suspend_complete enter\n")); 4136 4137 mutex_enter(&rsm_suspend_list.list_lock); 4138 4139 if (rsm_suspend_list.list_head == NULL) { 4140 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4141 "rsm_suspend_complete done: suspend_list is empty\n")); 4142 mutex_exit(&rsm_suspend_list.list_lock); 4143 return; 4144 } 4145 4146 elem = rsm_suspend_list.list_head; 4147 while (elem != NULL) { 4148 if (elem->nodeid == src_node) { 4149 /* clear the pending flag for the node */ 4150 elem->flags &= ~RSM_SUSPEND_ACKPENDING; 4151 elem->flags |= flag; 4152 } 4153 4154 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING)) 4155 done = 0; /* still some nodes have not yet ACKED */ 4156 4157 elem = elem->next; 4158 } 4159 4160 mutex_exit(&rsm_suspend_list.list_lock); 4161 4162 if (!done) { 4163 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4164 "rsm_suspend_complete done: acks pending\n")); 4165 return; 4166 } 4167 /* 4168 * Now that we are done with suspending all the remote importers 4169 * time to quiesce the local exporters 4170 */ 4171 exporter_quiesce(); 4172 4173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4174 "rsm_suspend_complete done\n")); 4175 } 4176 4177 static void 4178 exporter_quiesce() 4179 { 4180 int i, e; 4181 rsmresource_t *current; 4182 rsmseg_t *seg; 4183 adapter_t *adapter; 4184 DBG_DEFINE(category, 4185 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4186 4187 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n")); 4188 /* 4189 * The importers send a SUSPEND_COMPLETE to the exporter node 4190 * Unpublish, unbind the export segment and 4191 * move the segments to the EXPORT_QUIESCED state 4192 */ 4193 4194 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER); 4195 4196 for (i = 0; i < rsm_hash_size; i++) { 4197 current = rsm_export_segs.bucket[i]; 4198 while (current != NULL) { 4199 seg = (rsmseg_t *)current; 4200 rsmseglock_acquire(seg); 4201 if (current->rsmrc_state == 4202 RSM_STATE_EXPORT_QUIESCING) { 4203 adapter = seg->s_adapter; 4204 /* 4205 * some local memory handles are not published 4206 * check if it was published 4207 */ 4208 if ((seg->s_acl == NULL) || 4209 (seg->s_acl[0].ae_node != my_nodeid) || 4210 (seg->s_acl[0].ae_permission != 0)) { 4211 4212 e = adapter->rsmpi_ops->rsm_unpublish( 4213 seg->s_handle.out); 4214 DBG_PRINTF((category, RSM_DEBUG, 4215 "exporter_quiesce:unpub %d\n", e)); 4216 4217 e = adapter->rsmpi_ops->rsm_seg_destroy( 4218 seg->s_handle.out); 4219 4220 DBG_PRINTF((category, RSM_DEBUG, 4221 "exporter_quiesce:destroy %d\n", 4222 e)); 4223 } 4224 4225 (void) rsm_unbind_pages(seg); 4226 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 4227 cv_broadcast(&seg->s_cv); 4228 } 4229 rsmseglock_release(seg); 4230 current = current->rsmrc_next; 4231 } 4232 } 4233 rw_exit(&rsm_export_segs.rsmhash_rw); 4234 4235 /* 4236 * All the local segments we are done with the pre-del processing 4237 * - time to move to PREDEL_COMPLETED. 4238 */ 4239 4240 mutex_enter(&rsm_drv_data.drv_lock); 4241 4242 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED); 4243 4244 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED; 4245 4246 cv_broadcast(&rsm_drv_data.drv_cv); 4247 4248 mutex_exit(&rsm_drv_data.drv_lock); 4249 4250 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n")); 4251 } 4252 4253 static void 4254 importer_suspend(rsm_node_id_t src_node) 4255 { 4256 int i; 4257 int susp_flg; /* true means already suspended */ 4258 int num_importers; 4259 rsmresource_t *p = NULL, *curp; 4260 rsmhash_table_t *rhash = &rsm_import_segs; 4261 rsmseg_t *seg; 4262 rsmipc_request_t request; 4263 DBG_DEFINE(category, 4264 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4265 4266 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n")); 4267 4268 rw_enter(&rhash->rsmhash_rw, RW_READER); 4269 for (i = 0; i < rsm_hash_size; i++) { 4270 p = rhash->bucket[i]; 4271 4272 /* 4273 * Suspend all importers with same <node, key> pair. 4274 * After the last one of the shared importers has been 4275 * suspended - suspend the shared mappings/connection. 4276 */ 4277 for (; p; p = p->rsmrc_next) { 4278 rsmseg_t *first = (rsmseg_t *)p; 4279 if ((first->s_node != src_node) || 4280 (first->s_state == RSM_STATE_DISCONNECT)) 4281 continue; /* go to next entry */ 4282 /* 4283 * search the rest of the bucket for 4284 * other siblings (imprtrs with the same key) 4285 * of "first" and suspend them. 4286 * All importers with same key fall in 4287 * the same bucket. 4288 */ 4289 num_importers = 0; 4290 for (curp = p; curp; curp = curp->rsmrc_next) { 4291 seg = (rsmseg_t *)curp; 4292 4293 rsmseglock_acquire(seg); 4294 4295 if ((seg->s_node != first->s_node) || 4296 (seg->s_key != first->s_key) || 4297 (seg->s_state == RSM_STATE_DISCONNECT)) { 4298 /* 4299 * either not a peer segment or its a 4300 * disconnected segment - skip it 4301 */ 4302 rsmseglock_release(seg); 4303 continue; 4304 } 4305 4306 rsmseg_suspend(seg, &susp_flg); 4307 4308 if (susp_flg) { /* seg already suspended */ 4309 rsmseglock_release(seg); 4310 break; /* the inner for loop */ 4311 } 4312 4313 num_importers++; 4314 rsmsharelock_acquire(seg); 4315 /* 4316 * we've processed all importers that are 4317 * siblings of "first" 4318 */ 4319 if (num_importers == 4320 seg->s_share->rsmsi_refcnt) { 4321 rsmsharelock_release(seg); 4322 rsmseglock_release(seg); 4323 break; 4324 } 4325 rsmsharelock_release(seg); 4326 rsmseglock_release(seg); 4327 } 4328 4329 /* 4330 * All the importers with the same key and 4331 * nodeid as "first" have been suspended. 4332 * Now suspend the shared connect/mapping. 4333 * This is done only once. 4334 */ 4335 if (!susp_flg) { 4336 rsmsegshare_suspend(seg); 4337 } 4338 } 4339 } 4340 4341 rw_exit(&rhash->rsmhash_rw); 4342 4343 /* send an ACK for SUSPEND message */ 4344 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE; 4345 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY); 4346 4347 4348 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n")); 4349 4350 } 4351 4352 static void 4353 rsmseg_suspend(rsmseg_t *seg, int *susp_flg) 4354 { 4355 int recheck_state; 4356 rsmcookie_t *hdl; 4357 DBG_DEFINE(category, 4358 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4359 4360 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4361 "rsmseg_suspend enter: key=%u\n", seg->s_key)); 4362 4363 *susp_flg = 0; 4364 4365 ASSERT(rsmseglock_held(seg)); 4366 /* wait if putv/getv is in progress */ 4367 while (seg->s_rdmacnt > 0) 4368 cv_wait(&seg->s_cv, &seg->s_lock); 4369 4370 do { 4371 recheck_state = 0; 4372 4373 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4374 "rsmseg_suspend:segment %x state=%d\n", 4375 seg->s_key, seg->s_state)); 4376 4377 switch (seg->s_state) { 4378 case RSM_STATE_NEW: 4379 /* not a valid state */ 4380 break; 4381 case RSM_STATE_CONNECTING: 4382 seg->s_state = RSM_STATE_ABORT_CONNECT; 4383 break; 4384 case RSM_STATE_ABORT_CONNECT: 4385 break; 4386 case RSM_STATE_CONNECT: 4387 seg->s_handle.in = NULL; 4388 seg->s_state = RSM_STATE_CONN_QUIESCE; 4389 break; 4390 case RSM_STATE_MAPPING: 4391 /* wait until segment leaves the mapping state */ 4392 while (seg->s_state == RSM_STATE_MAPPING) 4393 cv_wait(&seg->s_cv, &seg->s_lock); 4394 recheck_state = 1; 4395 break; 4396 case RSM_STATE_ACTIVE: 4397 /* unload the mappings */ 4398 if (seg->s_ckl != NULL) { 4399 hdl = seg->s_ckl; 4400 for (; hdl != NULL; hdl = hdl->c_next) { 4401 (void) devmap_unload(hdl->c_dhp, 4402 hdl->c_off, hdl->c_len); 4403 } 4404 } 4405 seg->s_mapinfo = NULL; 4406 seg->s_state = RSM_STATE_MAP_QUIESCE; 4407 break; 4408 case RSM_STATE_CONN_QUIESCE: 4409 /* FALLTHRU */ 4410 case RSM_STATE_MAP_QUIESCE: 4411 /* rsmseg_suspend already done for seg */ 4412 *susp_flg = 1; 4413 break; 4414 case RSM_STATE_DISCONNECT: 4415 break; 4416 default: 4417 ASSERT(0); /* invalid state */ 4418 } 4419 } while (recheck_state); 4420 4421 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n")); 4422 } 4423 4424 static void 4425 rsmsegshare_suspend(rsmseg_t *seg) 4426 { 4427 int e; 4428 adapter_t *adapter; 4429 rsm_import_share_t *sharedp; 4430 DBG_DEFINE(category, 4431 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4432 4433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4434 "rsmsegshare_suspend enter\n")); 4435 4436 rsmseglock_acquire(seg); 4437 rsmsharelock_acquire(seg); 4438 4439 sharedp = seg->s_share; 4440 adapter = seg->s_adapter; 4441 switch (sharedp->rsmsi_state) { 4442 case RSMSI_STATE_NEW: 4443 break; 4444 case RSMSI_STATE_CONNECTING: 4445 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 4446 break; 4447 case RSMSI_STATE_ABORT_CONNECT: 4448 break; 4449 case RSMSI_STATE_CONNECTED: 4450 /* do the rsmpi disconnect */ 4451 if (sharedp->rsmsi_node != my_nodeid) { 4452 e = adapter->rsmpi_ops-> 4453 rsm_disconnect(sharedp->rsmsi_handle); 4454 4455 DBG_PRINTF((category, RSM_DEBUG, 4456 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4457 sharedp->rsmsi_segid, e)); 4458 } 4459 4460 sharedp->rsmsi_handle = NULL; 4461 4462 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 4463 break; 4464 case RSMSI_STATE_CONN_QUIESCE: 4465 break; 4466 case RSMSI_STATE_MAPPED: 4467 /* do the rsmpi unmap and disconnect */ 4468 if (sharedp->rsmsi_node != my_nodeid) { 4469 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in); 4470 4471 DBG_PRINTF((category, RSM_DEBUG, 4472 "rsmshare_suspend: rsmpi unmap %d\n", e)); 4473 4474 e = adapter->rsmpi_ops-> 4475 rsm_disconnect(sharedp->rsmsi_handle); 4476 DBG_PRINTF((category, RSM_DEBUG, 4477 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4478 sharedp->rsmsi_segid, e)); 4479 } 4480 4481 sharedp->rsmsi_handle = NULL; 4482 4483 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE; 4484 break; 4485 case RSMSI_STATE_MAP_QUIESCE: 4486 break; 4487 case RSMSI_STATE_DISCONNECTED: 4488 break; 4489 default: 4490 ASSERT(0); /* invalid state */ 4491 } 4492 4493 rsmsharelock_release(seg); 4494 rsmseglock_release(seg); 4495 4496 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4497 "rsmsegshare_suspend done\n")); 4498 } 4499 4500 /* 4501 * This should get called on receiving a RESUME message or from 4502 * the pathmanger if the node undergoing DR dies. 4503 */ 4504 static void 4505 importer_resume(rsm_node_id_t src_node) 4506 { 4507 int i; 4508 rsmresource_t *p = NULL; 4509 rsmhash_table_t *rhash = &rsm_import_segs; 4510 void *cookie; 4511 DBG_DEFINE(category, 4512 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4513 4514 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n")); 4515 4516 rw_enter(&rhash->rsmhash_rw, RW_READER); 4517 4518 for (i = 0; i < rsm_hash_size; i++) { 4519 p = rhash->bucket[i]; 4520 4521 for (; p; p = p->rsmrc_next) { 4522 rsmseg_t *seg = (rsmseg_t *)p; 4523 4524 rsmseglock_acquire(seg); 4525 4526 /* process only importers of node undergoing DR */ 4527 if (seg->s_node != src_node) { 4528 rsmseglock_release(seg); 4529 continue; 4530 } 4531 4532 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) { 4533 rsmipc_request_t request; 4534 /* 4535 * rsmpi map/connect failed 4536 * inform the exporter so that it can 4537 * remove the importer. 4538 */ 4539 request.rsmipc_hdr.rsmipc_type = 4540 RSMIPC_MSG_NOTIMPORTING; 4541 request.rsmipc_key = seg->s_segid; 4542 request.rsmipc_segment_cookie = cookie; 4543 rsmseglock_release(seg); 4544 (void) rsmipc_send(seg->s_node, &request, 4545 RSM_NO_REPLY); 4546 } else { 4547 rsmseglock_release(seg); 4548 } 4549 } 4550 } 4551 4552 rw_exit(&rhash->rsmhash_rw); 4553 4554 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n")); 4555 } 4556 4557 static int 4558 rsmseg_resume(rsmseg_t *seg, void **cookie) 4559 { 4560 int e; 4561 int retc; 4562 off_t dev_offset; 4563 size_t maplen; 4564 uint_t maxprot; 4565 rsm_mapinfo_t *p; 4566 rsmcookie_t *hdl; 4567 rsm_import_share_t *sharedp; 4568 DBG_DEFINE(category, 4569 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4570 4571 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4572 "rsmseg_resume enter: key=%u\n", seg->s_key)); 4573 4574 *cookie = NULL; 4575 4576 ASSERT(rsmseglock_held(seg)); 4577 4578 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) && 4579 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 4580 return (RSM_SUCCESS); 4581 } 4582 4583 sharedp = seg->s_share; 4584 4585 rsmsharelock_acquire(seg); 4586 4587 /* resume the shared connection and/or mapping */ 4588 retc = rsmsegshare_resume(seg); 4589 4590 if (seg->s_state == RSM_STATE_CONN_QUIESCE) { 4591 /* shared state can either be connected or mapped */ 4592 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) || 4593 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) { 4594 ASSERT(retc == RSM_SUCCESS); 4595 seg->s_handle.in = sharedp->rsmsi_handle; 4596 rsmsharelock_release(seg); 4597 seg->s_state = RSM_STATE_CONNECT; 4598 4599 } else { /* error in rsmpi connect during resume */ 4600 seg->s_handle.in = NULL; 4601 seg->s_state = RSM_STATE_DISCONNECT; 4602 4603 sharedp->rsmsi_refcnt--; 4604 cookie = (void *)sharedp->rsmsi_cookie; 4605 4606 if (sharedp->rsmsi_refcnt == 0) { 4607 ASSERT(sharedp->rsmsi_mapcnt == 0); 4608 rsmsharelock_release(seg); 4609 4610 /* clean up the shared data structure */ 4611 mutex_destroy(&sharedp->rsmsi_lock); 4612 cv_destroy(&sharedp->rsmsi_cv); 4613 kmem_free((void *)(sharedp), 4614 sizeof (rsm_import_share_t)); 4615 4616 } else { 4617 rsmsharelock_release(seg); 4618 } 4619 /* 4620 * The following needs to be done after any 4621 * rsmsharelock calls which use seg->s_share. 4622 */ 4623 seg->s_share = NULL; 4624 } 4625 4626 /* signal any waiting segment */ 4627 cv_broadcast(&seg->s_cv); 4628 4629 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4630 "rsmseg_resume done:state=%d\n", seg->s_state)); 4631 return (retc); 4632 } 4633 4634 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE); 4635 4636 /* Setup protections for remap */ 4637 maxprot = PROT_USER; 4638 if (seg->s_mode & RSM_PERM_READ) { 4639 maxprot |= PROT_READ; 4640 } 4641 if (seg->s_mode & RSM_PERM_WRITE) { 4642 maxprot |= PROT_WRITE; 4643 } 4644 4645 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) { 4646 /* error in rsmpi connect or map during resume */ 4647 4648 /* remap to trash page */ 4649 ASSERT(seg->s_ckl != NULL); 4650 4651 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4652 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 4653 remap_cookie, hdl->c_off, hdl->c_len, 4654 maxprot, 0, NULL); 4655 4656 DBG_PRINTF((category, RSM_ERR, 4657 "rsmseg_resume:remap=%d\n", e)); 4658 } 4659 4660 seg->s_handle.in = NULL; 4661 seg->s_state = RSM_STATE_DISCONNECT; 4662 4663 sharedp->rsmsi_refcnt--; 4664 4665 sharedp->rsmsi_mapcnt--; 4666 seg->s_mapinfo = NULL; 4667 4668 if (sharedp->rsmsi_refcnt == 0) { 4669 ASSERT(sharedp->rsmsi_mapcnt == 0); 4670 rsmsharelock_release(seg); 4671 4672 /* clean up the shared data structure */ 4673 mutex_destroy(&sharedp->rsmsi_lock); 4674 cv_destroy(&sharedp->rsmsi_cv); 4675 kmem_free((void *)(sharedp), 4676 sizeof (rsm_import_share_t)); 4677 4678 } else { 4679 rsmsharelock_release(seg); 4680 } 4681 /* 4682 * The following needs to be done after any 4683 * rsmsharelock calls which use seg->s_share. 4684 */ 4685 seg->s_share = NULL; 4686 4687 /* signal any waiting segment */ 4688 cv_broadcast(&seg->s_cv); 4689 4690 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4691 "rsmseg_resume done:seg=%x,err=%d\n", 4692 seg->s_key, retc)); 4693 return (retc); 4694 4695 } 4696 4697 seg->s_handle.in = sharedp->rsmsi_handle; 4698 4699 if (seg->s_node == my_nodeid) { /* loopback */ 4700 ASSERT(seg->s_mapinfo == NULL); 4701 4702 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4703 e = devmap_umem_remap(hdl->c_dhp, 4704 rsm_dip, seg->s_cookie, 4705 hdl->c_off, hdl->c_len, 4706 maxprot, 0, NULL); 4707 4708 DBG_PRINTF((category, RSM_ERR, 4709 "rsmseg_resume:remap=%d\n", e)); 4710 } 4711 } else { /* remote exporter */ 4712 /* remap to the new rsmpi maps */ 4713 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 4714 4715 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4716 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len, 4717 &dev_offset, &maplen); 4718 e = devmap_devmem_remap(hdl->c_dhp, 4719 p->dip, p->dev_register, dev_offset, 4720 maplen, maxprot, 0, NULL); 4721 4722 DBG_PRINTF((category, RSM_ERR, 4723 "rsmseg_resume:remap=%d\n", e)); 4724 } 4725 } 4726 4727 rsmsharelock_release(seg); 4728 4729 seg->s_state = RSM_STATE_ACTIVE; 4730 cv_broadcast(&seg->s_cv); 4731 4732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n")); 4733 4734 return (retc); 4735 } 4736 4737 static int 4738 rsmsegshare_resume(rsmseg_t *seg) 4739 { 4740 int e = RSM_SUCCESS; 4741 adapter_t *adapter; 4742 rsm_import_share_t *sharedp; 4743 DBG_DEFINE(category, 4744 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4745 4746 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n")); 4747 4748 ASSERT(rsmseglock_held(seg)); 4749 ASSERT(rsmsharelock_held(seg)); 4750 4751 sharedp = seg->s_share; 4752 4753 /* 4754 * If we are not in a xxxx_QUIESCE state that means shared 4755 * connect/mapping processing has been already been done 4756 * so return success. 4757 */ 4758 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) && 4759 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) { 4760 return (RSM_SUCCESS); 4761 } 4762 4763 adapter = seg->s_adapter; 4764 4765 if (sharedp->rsmsi_node != my_nodeid) { 4766 rsm_addr_t hwaddr; 4767 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node); 4768 4769 e = adapter->rsmpi_ops->rsm_connect( 4770 adapter->rsmpi_handle, hwaddr, 4771 sharedp->rsmsi_segid, &sharedp->rsmsi_handle); 4772 4773 DBG_PRINTF((category, RSM_DEBUG, 4774 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n", 4775 sharedp->rsmsi_segid, e)); 4776 4777 if (e != RSM_SUCCESS) { 4778 /* when do we send the NOT_IMPORTING message */ 4779 sharedp->rsmsi_handle = NULL; 4780 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4781 /* signal any waiting segment */ 4782 cv_broadcast(&sharedp->rsmsi_cv); 4783 return (e); 4784 } 4785 } 4786 4787 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) { 4788 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 4789 /* signal any waiting segment */ 4790 cv_broadcast(&sharedp->rsmsi_cv); 4791 return (e); 4792 } 4793 4794 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 4795 4796 /* do the rsmpi map of the whole segment here */ 4797 if (sharedp->rsmsi_node != my_nodeid) { 4798 size_t mapped_len; 4799 rsm_mapinfo_t *p; 4800 4801 /* 4802 * We need to do rsmpi maps with <off, lens> identical to 4803 * the old mapinfo list because the segment mapping handles 4804 * dhp and such need the fragmentation of rsmpi maps to be 4805 * identical to what it was during the mmap of the segment 4806 */ 4807 p = sharedp->rsmsi_mapinfo; 4808 4809 while (p != NULL) { 4810 mapped_len = 0; 4811 4812 e = adapter->rsmpi_ops->rsm_map( 4813 sharedp->rsmsi_handle, p->start_offset, 4814 p->individual_len, &mapped_len, 4815 &p->dip, &p->dev_register, &p->dev_offset, 4816 NULL, NULL); 4817 4818 if (e != 0) { 4819 DBG_PRINTF((category, RSM_ERR, 4820 "rsmsegshare_resume: rsmpi map err=%d\n", 4821 e)); 4822 break; 4823 } 4824 4825 if (mapped_len != p->individual_len) { 4826 DBG_PRINTF((category, RSM_ERR, 4827 "rsmsegshare_resume: rsmpi maplen" 4828 "< reqlen=%lx\n", mapped_len)); 4829 e = RSMERR_BAD_LENGTH; 4830 break; 4831 } 4832 4833 p = p->next; 4834 4835 } 4836 4837 4838 if (e != RSM_SUCCESS) { /* rsmpi map failed */ 4839 int err; 4840 /* Check if this is the first rsm_map */ 4841 if (p != sharedp->rsmsi_mapinfo) { 4842 /* 4843 * A single rsm_unmap undoes multiple rsm_maps. 4844 */ 4845 (void) seg->s_adapter->rsmpi_ops-> 4846 rsm_unmap(sharedp->rsmsi_handle); 4847 } 4848 4849 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 4850 sharedp->rsmsi_mapinfo = NULL; 4851 4852 err = adapter->rsmpi_ops-> 4853 rsm_disconnect(sharedp->rsmsi_handle); 4854 4855 DBG_PRINTF((category, RSM_DEBUG, 4856 "rsmsegshare_resume:disconn seg=%x:err=%d\n", 4857 sharedp->rsmsi_segid, err)); 4858 4859 sharedp->rsmsi_handle = NULL; 4860 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4861 4862 /* signal the waiting segments */ 4863 cv_broadcast(&sharedp->rsmsi_cv); 4864 DBG_PRINTF((category, RSM_DEBUG, 4865 "rsmsegshare_resume done: rsmpi map err\n")); 4866 return (e); 4867 } 4868 } 4869 4870 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 4871 4872 /* signal any waiting segment */ 4873 cv_broadcast(&sharedp->rsmsi_cv); 4874 4875 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n")); 4876 4877 return (e); 4878 } 4879 4880 /* 4881 * this is the routine that gets called by recv_taskq which is the 4882 * thread that processes messages that are flow-controlled. 4883 */ 4884 static void 4885 rsm_intr_proc_deferred(void *arg) 4886 { 4887 path_t *path = (path_t *)arg; 4888 rsmipc_request_t *msg; 4889 rsmipc_msghdr_t *msghdr; 4890 rsm_node_id_t src_node; 4891 msgbuf_elem_t *head; 4892 int e; 4893 DBG_DEFINE(category, 4894 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4895 4896 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4897 "rsm_intr_proc_deferred enter\n")); 4898 4899 mutex_enter(&path->mutex); 4900 4901 /* use the head of the msgbuf_queue */ 4902 head = rsmka_gethead_msgbuf(path); 4903 4904 mutex_exit(&path->mutex); 4905 4906 msg = (rsmipc_request_t *)&(head->msg); 4907 msghdr = (rsmipc_msghdr_t *)msg; 4908 4909 src_node = msghdr->rsmipc_src; 4910 4911 /* 4912 * messages that need to send a reply should check the message version 4913 * before processing the message. And all messages that need to 4914 * send a reply should be processed here by the worker thread. 4915 */ 4916 switch (msghdr->rsmipc_type) { 4917 case RSMIPC_MSG_SEGCONNECT: 4918 if (msghdr->rsmipc_version != RSM_VERSION) { 4919 rsmipc_reply_t reply; 4920 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION; 4921 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 4922 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie; 4923 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply); 4924 } else { 4925 rsm_intr_segconnect(src_node, msg); 4926 } 4927 break; 4928 case RSMIPC_MSG_DISCONNECT: 4929 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT); 4930 break; 4931 case RSMIPC_MSG_SUSPEND: 4932 importer_suspend(src_node); 4933 break; 4934 case RSMIPC_MSG_SUSPEND_DONE: 4935 rsm_suspend_complete(src_node, 0); 4936 break; 4937 case RSMIPC_MSG_RESUME: 4938 importer_resume(src_node); 4939 break; 4940 default: 4941 ASSERT(0); 4942 } 4943 4944 mutex_enter(&path->mutex); 4945 4946 rsmka_dequeue_msgbuf(path); 4947 4948 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */ 4949 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES) 4950 path->procmsg_cnt++; 4951 4952 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES); 4953 4954 /* No need to send credits if path is going down */ 4955 if ((path->state == RSMKA_PATH_ACTIVE) && 4956 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) { 4957 /* 4958 * send credits and reset procmsg_cnt if success otherwise 4959 * credits will be sent after processing the next message 4960 */ 4961 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT); 4962 if (e == 0) 4963 path->procmsg_cnt = 0; 4964 else 4965 DBG_PRINTF((category, RSM_ERR, 4966 "rsm_intr_proc_deferred:send credits err=%d\n", e)); 4967 } 4968 4969 /* 4970 * decrement the path refcnt since we incremented it in 4971 * rsm_intr_callback_dispatch 4972 */ 4973 PATH_RELE_NOLOCK(path); 4974 4975 mutex_exit(&path->mutex); 4976 4977 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4978 "rsm_intr_proc_deferred done\n")); 4979 } 4980 4981 /* 4982 * Flow-controlled messages are enqueued and dispatched onto a taskq here 4983 */ 4984 static void 4985 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr, 4986 rsm_intr_hand_arg_t arg) 4987 { 4988 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 4989 path_t *path; 4990 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 4991 DBG_DEFINE(category, 4992 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4993 4994 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4995 "rsm_intr_callback_dispatch enter\n")); 4996 ASSERT(data && hdlr_argp); 4997 4998 /* look up the path - incr the path refcnt */ 4999 path = rsm_find_path(hdlr_argp->adapter_name, 5000 hdlr_argp->adapter_instance, src_hwaddr); 5001 5002 /* the path has been removed - drop this message */ 5003 if (path == NULL) { 5004 DBG_PRINTF((category, RSM_DEBUG, 5005 "rsm_intr_callback_dispatch done: msg dropped\n")); 5006 return; 5007 } 5008 /* the path is not active - don't accept new messages */ 5009 if (path->state != RSMKA_PATH_ACTIVE) { 5010 PATH_RELE_NOLOCK(path); 5011 mutex_exit(&path->mutex); 5012 DBG_PRINTF((category, RSM_DEBUG, 5013 "rsm_intr_callback_dispatch done: msg dropped" 5014 " path=%lx !ACTIVE\n", path)); 5015 return; 5016 } 5017 5018 /* 5019 * Check if this message was sent to an older incarnation 5020 * of the path/sendq. 5021 */ 5022 if (path->local_incn != msghdr->rsmipc_incn) { 5023 /* decrement the refcnt */ 5024 PATH_RELE_NOLOCK(path); 5025 mutex_exit(&path->mutex); 5026 DBG_PRINTF((category, RSM_DEBUG, 5027 "rsm_intr_callback_dispatch done: old incn %lld\n", 5028 msghdr->rsmipc_incn)); 5029 return; 5030 } 5031 5032 /* copy and enqueue msg on the path's msgbuf queue */ 5033 rsmka_enqueue_msgbuf(path, data); 5034 5035 /* 5036 * schedule task to process messages - ignore retval from 5037 * task_dispatch because we sender cannot send more than 5038 * what receiver can handle. 5039 */ 5040 (void) taskq_dispatch(path->recv_taskq, 5041 rsm_intr_proc_deferred, path, KM_NOSLEEP); 5042 5043 mutex_exit(&path->mutex); 5044 5045 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5046 "rsm_intr_callback_dispatch done\n")); 5047 } 5048 5049 /* 5050 * This procedure is called from rsm_srv_func when a remote node creates a 5051 * a send queue. This event is used as a hint that an earlier failed 5052 * attempt to create a send queue to that remote node may now succeed and 5053 * should be retried. Indication of an earlier failed attempt is provided 5054 * by the RSMKA_SQCREATE_PENDING flag. 5055 */ 5056 static void 5057 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5058 { 5059 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 5060 path_t *path; 5061 DBG_DEFINE(category, 5062 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5063 5064 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5065 "rsm_sqcreateop_callback enter\n")); 5066 5067 /* look up the path - incr the path refcnt */ 5068 path = rsm_find_path(hdlr_argp->adapter_name, 5069 hdlr_argp->adapter_instance, src_hwaddr); 5070 5071 if (path == NULL) { 5072 DBG_PRINTF((category, RSM_DEBUG, 5073 "rsm_sqcreateop_callback done: no path\n")); 5074 return; 5075 } 5076 5077 if ((path->state == RSMKA_PATH_UP) && 5078 (path->flags & RSMKA_SQCREATE_PENDING)) { 5079 /* 5080 * previous attempt to create sendq had failed, retry 5081 * it and move to RSMKA_PATH_ACTIVE state if successful. 5082 * the refcnt will be decremented in the do_deferred_work 5083 */ 5084 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP); 5085 } else { 5086 /* decrement the refcnt */ 5087 PATH_RELE_NOLOCK(path); 5088 } 5089 mutex_exit(&path->mutex); 5090 5091 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5092 "rsm_sqcreateop_callback done\n")); 5093 } 5094 5095 static void 5096 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5097 { 5098 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 5099 rsmipc_request_t *msg = (rsmipc_request_t *)data; 5100 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data; 5101 rsm_node_id_t src_node; 5102 DBG_DEFINE(category, 5103 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5104 5105 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:" 5106 "src=%d, type=%d\n", msghdr->rsmipc_src, 5107 msghdr->rsmipc_type)); 5108 5109 /* 5110 * Check for the version number in the msg header. If it is not 5111 * RSM_VERSION, drop the message. In the future, we need to manage 5112 * incompatible version numbers in some way 5113 */ 5114 if (msghdr->rsmipc_version != RSM_VERSION) { 5115 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n")); 5116 /* 5117 * Drop requests that don't have a reply right here 5118 * Request with reply will send a BAD_VERSION reply 5119 * when they get processed by the worker thread. 5120 */ 5121 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) { 5122 return; 5123 } 5124 5125 } 5126 5127 src_node = msghdr->rsmipc_src; 5128 5129 switch (msghdr->rsmipc_type) { 5130 case RSMIPC_MSG_SEGCONNECT: 5131 case RSMIPC_MSG_DISCONNECT: 5132 case RSMIPC_MSG_SUSPEND: 5133 case RSMIPC_MSG_SUSPEND_DONE: 5134 case RSMIPC_MSG_RESUME: 5135 /* 5136 * These message types are handled by a worker thread using 5137 * the flow-control algorithm. 5138 * Any message processing that does one or more of the 5139 * following should be handled in a worker thread. 5140 * - allocates resources and might sleep 5141 * - makes RSMPI calls down to the interconnect driver 5142 * this by defn include requests with reply. 5143 * - takes a long duration of time 5144 */ 5145 rsm_intr_callback_dispatch(data, src_hwaddr, arg); 5146 break; 5147 case RSMIPC_MSG_NOTIMPORTING: 5148 importer_list_rm(src_node, msg->rsmipc_key, 5149 msg->rsmipc_segment_cookie); 5150 break; 5151 case RSMIPC_MSG_SQREADY: 5152 rsm_proc_sqready(data, src_hwaddr, arg); 5153 break; 5154 case RSMIPC_MSG_SQREADY_ACK: 5155 rsm_proc_sqready_ack(data, src_hwaddr, arg); 5156 break; 5157 case RSMIPC_MSG_CREDIT: 5158 rsm_add_credits(ctrlmsg, src_hwaddr, arg); 5159 break; 5160 case RSMIPC_MSG_REPLY: 5161 rsm_intr_reply(msghdr); 5162 break; 5163 case RSMIPC_MSG_BELL: 5164 rsm_intr_event(msg); 5165 break; 5166 case RSMIPC_MSG_IMPORTING: 5167 importer_list_add(src_node, msg->rsmipc_key, 5168 msg->rsmipc_adapter_hwaddr, 5169 msg->rsmipc_segment_cookie); 5170 break; 5171 case RSMIPC_MSG_REPUBLISH: 5172 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm); 5173 break; 5174 default: 5175 DBG_PRINTF((category, RSM_DEBUG, 5176 "rsm_intr_callback: bad msg %lx type %d data %lx\n", 5177 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data)); 5178 } 5179 5180 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n")); 5181 5182 } 5183 5184 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 5185 rsm_intr_q_op_t opcode, rsm_addr_t src, 5186 void *data, size_t size, rsm_intr_hand_arg_t arg) 5187 { 5188 DBG_DEFINE(category, 5189 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5190 5191 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n")); 5192 5193 switch (opcode) { 5194 case RSM_INTR_Q_OP_CREATE: 5195 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n")); 5196 rsm_sqcreateop_callback(src, arg); 5197 break; 5198 case RSM_INTR_Q_OP_DESTROY: 5199 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n")); 5200 break; 5201 case RSM_INTR_Q_OP_RECEIVE: 5202 rsm_intr_callback(data, src, arg); 5203 break; 5204 default: 5205 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5206 "rsm_srv_func: unknown opcode = %x\n", opcode)); 5207 } 5208 5209 chd = chd; 5210 size = size; 5211 5212 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n")); 5213 5214 return (RSM_INTR_HAND_CLAIMED); 5215 } 5216 5217 /* *************************** IPC slots ************************* */ 5218 static rsmipc_slot_t * 5219 rsmipc_alloc() 5220 { 5221 int i; 5222 rsmipc_slot_t *slot; 5223 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5224 5225 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n")); 5226 5227 /* try to find a free slot, if not wait */ 5228 mutex_enter(&rsm_ipc.lock); 5229 5230 while (rsm_ipc.count == 0) { 5231 rsm_ipc.wanted = 1; 5232 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock); 5233 } 5234 5235 /* An empty slot is available, find it */ 5236 slot = &rsm_ipc.slots[0]; 5237 for (i = 0; i < RSMIPC_SZ; i++, slot++) { 5238 if (RSMIPC_GET(slot, RSMIPC_FREE)) { 5239 RSMIPC_CLEAR(slot, RSMIPC_FREE); 5240 break; 5241 } 5242 } 5243 5244 ASSERT(i < RSMIPC_SZ); 5245 rsm_ipc.count--; /* one less is available */ 5246 rsm_ipc.sequence++; /* new sequence */ 5247 5248 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence; 5249 slot->rsmipc_cookie.ic.index = (uint_t)i; 5250 5251 mutex_exit(&rsm_ipc.lock); 5252 5253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n")); 5254 5255 return (slot); 5256 } 5257 5258 static void 5259 rsmipc_free(rsmipc_slot_t *slot) 5260 { 5261 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5262 5263 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n")); 5264 5265 ASSERT(MUTEX_HELD(&slot->rsmipc_lock)); 5266 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot); 5267 5268 mutex_enter(&rsm_ipc.lock); 5269 5270 RSMIPC_SET(slot, RSMIPC_FREE); 5271 5272 slot->rsmipc_cookie.ic.sequence = 0; 5273 5274 mutex_exit(&slot->rsmipc_lock); 5275 rsm_ipc.count++; 5276 ASSERT(rsm_ipc.count <= RSMIPC_SZ); 5277 if (rsm_ipc.wanted) { 5278 rsm_ipc.wanted = 0; 5279 cv_broadcast(&rsm_ipc.cv); 5280 } 5281 5282 mutex_exit(&rsm_ipc.lock); 5283 5284 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n")); 5285 } 5286 5287 static int 5288 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply) 5289 { 5290 int e = 0; 5291 int credit_check = 0; 5292 int retry_cnt = 0; 5293 int min_retry_cnt = 10; 5294 rsm_send_t is; 5295 rsmipc_slot_t *rslot; 5296 adapter_t *adapter; 5297 path_t *path; 5298 sendq_token_t *sendq_token; 5299 sendq_token_t *used_sendq_token = NULL; 5300 rsm_send_q_handle_t ipc_handle; 5301 DBG_DEFINE(category, 5302 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5303 5304 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d", 5305 dest)); 5306 5307 /* 5308 * Check if this is a local case 5309 */ 5310 if (dest == my_nodeid) { 5311 switch (req->rsmipc_hdr.rsmipc_type) { 5312 case RSMIPC_MSG_SEGCONNECT: 5313 reply->rsmipc_status = (short)rsmsegacl_validate( 5314 req, dest, reply); 5315 break; 5316 case RSMIPC_MSG_BELL: 5317 req->rsmipc_hdr.rsmipc_src = dest; 5318 rsm_intr_event(req); 5319 break; 5320 case RSMIPC_MSG_IMPORTING: 5321 importer_list_add(dest, req->rsmipc_key, 5322 req->rsmipc_adapter_hwaddr, 5323 req->rsmipc_segment_cookie); 5324 break; 5325 case RSMIPC_MSG_NOTIMPORTING: 5326 importer_list_rm(dest, req->rsmipc_key, 5327 req->rsmipc_segment_cookie); 5328 break; 5329 case RSMIPC_MSG_REPUBLISH: 5330 importer_update(dest, req->rsmipc_key, 5331 req->rsmipc_perm); 5332 break; 5333 case RSMIPC_MSG_SUSPEND: 5334 importer_suspend(dest); 5335 break; 5336 case RSMIPC_MSG_SUSPEND_DONE: 5337 rsm_suspend_complete(dest, 0); 5338 break; 5339 case RSMIPC_MSG_RESUME: 5340 importer_resume(dest); 5341 break; 5342 default: 5343 ASSERT(0); 5344 } 5345 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5346 "rsmipc_send done\n")); 5347 return (0); 5348 } 5349 5350 if (dest >= MAX_NODES) { 5351 DBG_PRINTF((category, RSM_ERR, 5352 "rsm: rsmipc_send bad node number %x\n", dest)); 5353 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5354 } 5355 5356 /* 5357 * Oh boy! we are going remote. 5358 */ 5359 5360 /* 5361 * identify if we need to have credits to send this message 5362 * - only selected requests are flow controlled 5363 */ 5364 if (req != NULL) { 5365 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5366 "rsmipc_send:request type=%d\n", 5367 req->rsmipc_hdr.rsmipc_type)); 5368 5369 switch (req->rsmipc_hdr.rsmipc_type) { 5370 case RSMIPC_MSG_SEGCONNECT: 5371 case RSMIPC_MSG_DISCONNECT: 5372 case RSMIPC_MSG_IMPORTING: 5373 case RSMIPC_MSG_SUSPEND: 5374 case RSMIPC_MSG_SUSPEND_DONE: 5375 case RSMIPC_MSG_RESUME: 5376 credit_check = 1; 5377 break; 5378 default: 5379 credit_check = 0; 5380 } 5381 } 5382 5383 again: 5384 if (retry_cnt++ == min_retry_cnt) { 5385 /* backoff before further retries for 10ms */ 5386 delay(drv_usectohz(10000)); 5387 retry_cnt = 0; /* reset retry_cnt */ 5388 } 5389 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token); 5390 if (sendq_token == NULL) { 5391 DBG_PRINTF((category, RSM_ERR, 5392 "rsm: rsmipc_send no device to reach node %d\n", dest)); 5393 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5394 } 5395 5396 if ((sendq_token == used_sendq_token) && 5397 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) || 5398 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) { 5399 rele_sendq_token(sendq_token); 5400 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e)); 5401 return (RSMERR_CONN_ABORTED); 5402 } else 5403 used_sendq_token = sendq_token; 5404 5405 /* lint -save -e413 */ 5406 path = SQ_TOKEN_TO_PATH(sendq_token); 5407 adapter = path->local_adapter; 5408 /* lint -restore */ 5409 ipc_handle = sendq_token->rsmpi_sendq_handle; 5410 5411 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5412 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle)); 5413 5414 if (reply == NULL) { 5415 /* Send request without ack */ 5416 /* 5417 * Set the rsmipc_version number in the msghdr for KA 5418 * communication versioning 5419 */ 5420 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5421 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5422 /* 5423 * remote endpoints incn should match the value in our 5424 * path's remote_incn field. No need to grab any lock 5425 * since we have refcnted the path in rsmka_get_sendq_token 5426 */ 5427 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5428 5429 is.is_data = (void *)req; 5430 is.is_size = sizeof (*req); 5431 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5432 is.is_wait = 0; 5433 5434 if (credit_check) { 5435 mutex_enter(&path->mutex); 5436 /* 5437 * wait till we recv credits or path goes down. If path 5438 * goes down rsm_send will fail and we handle the error 5439 * then 5440 */ 5441 while ((sendq_token->msgbuf_avail == 0) && 5442 (path->state == RSMKA_PATH_ACTIVE)) { 5443 e = cv_wait_sig(&sendq_token->sendq_cv, 5444 &path->mutex); 5445 if (e == 0) { 5446 mutex_exit(&path->mutex); 5447 no_reply_cnt++; 5448 rele_sendq_token(sendq_token); 5449 DBG_PRINTF((category, RSM_DEBUG, 5450 "rsmipc_send done: " 5451 "cv_wait INTERRUPTED")); 5452 return (RSMERR_INTERRUPTED); 5453 } 5454 } 5455 5456 /* 5457 * path is not active retry on another path. 5458 */ 5459 if (path->state != RSMKA_PATH_ACTIVE) { 5460 mutex_exit(&path->mutex); 5461 rele_sendq_token(sendq_token); 5462 e = RSMERR_CONN_ABORTED; 5463 DBG_PRINTF((category, RSM_ERR, 5464 "rsm: rsmipc_send: path !ACTIVE")); 5465 goto again; 5466 } 5467 5468 ASSERT(sendq_token->msgbuf_avail > 0); 5469 5470 /* 5471 * reserve a msgbuf 5472 */ 5473 sendq_token->msgbuf_avail--; 5474 5475 mutex_exit(&path->mutex); 5476 5477 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5478 NULL); 5479 5480 if (e != RSM_SUCCESS) { 5481 mutex_enter(&path->mutex); 5482 /* 5483 * release the reserved msgbuf since 5484 * the send failed 5485 */ 5486 sendq_token->msgbuf_avail++; 5487 cv_broadcast(&sendq_token->sendq_cv); 5488 mutex_exit(&path->mutex); 5489 } 5490 } else 5491 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5492 NULL); 5493 5494 no_reply_cnt++; 5495 rele_sendq_token(sendq_token); 5496 if (e != RSM_SUCCESS) { 5497 DBG_PRINTF((category, RSM_ERR, 5498 "rsm: rsmipc_send no reply send" 5499 " err = %d no reply count = %d\n", 5500 e, no_reply_cnt)); 5501 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5502 e != RSMERR_BAD_BARRIER_HNDL); 5503 atomic_inc_64(&rsm_ipcsend_errcnt); 5504 goto again; 5505 } else { 5506 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5507 "rsmipc_send done\n")); 5508 return (e); 5509 } 5510 5511 } 5512 5513 if (req == NULL) { 5514 /* Send reply - No flow control is done for reply */ 5515 /* 5516 * Set the version in the msg header for KA communication 5517 * versioning 5518 */ 5519 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5520 reply->rsmipc_hdr.rsmipc_src = my_nodeid; 5521 /* incn number is not used for reply msgs currently */ 5522 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5523 5524 is.is_data = (void *)reply; 5525 is.is_size = sizeof (*reply); 5526 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5527 is.is_wait = 0; 5528 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5529 rele_sendq_token(sendq_token); 5530 if (e != RSM_SUCCESS) { 5531 DBG_PRINTF((category, RSM_ERR, 5532 "rsm: rsmipc_send reply send" 5533 " err = %d\n", e)); 5534 atomic_inc_64(&rsm_ipcsend_errcnt); 5535 goto again; 5536 } else { 5537 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5538 "rsmipc_send done\n")); 5539 return (e); 5540 } 5541 } 5542 5543 /* Reply needed */ 5544 rslot = rsmipc_alloc(); /* allocate a new ipc slot */ 5545 5546 mutex_enter(&rslot->rsmipc_lock); 5547 5548 rslot->rsmipc_data = (void *)reply; 5549 RSMIPC_SET(rslot, RSMIPC_PENDING); 5550 5551 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) { 5552 /* 5553 * Set the rsmipc_version number in the msghdr for KA 5554 * communication versioning 5555 */ 5556 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5557 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5558 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie; 5559 /* 5560 * remote endpoints incn should match the value in our 5561 * path's remote_incn field. No need to grab any lock 5562 * since we have refcnted the path in rsmka_get_sendq_token 5563 */ 5564 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5565 5566 is.is_data = (void *)req; 5567 is.is_size = sizeof (*req); 5568 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5569 is.is_wait = 0; 5570 if (credit_check) { 5571 5572 mutex_enter(&path->mutex); 5573 /* 5574 * wait till we recv credits or path goes down. If path 5575 * goes down rsm_send will fail and we handle the error 5576 * then. 5577 */ 5578 while ((sendq_token->msgbuf_avail == 0) && 5579 (path->state == RSMKA_PATH_ACTIVE)) { 5580 e = cv_wait_sig(&sendq_token->sendq_cv, 5581 &path->mutex); 5582 if (e == 0) { 5583 mutex_exit(&path->mutex); 5584 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5585 rsmipc_free(rslot); 5586 rele_sendq_token(sendq_token); 5587 DBG_PRINTF((category, RSM_DEBUG, 5588 "rsmipc_send done: " 5589 "cv_wait INTERRUPTED")); 5590 return (RSMERR_INTERRUPTED); 5591 } 5592 } 5593 5594 /* 5595 * path is not active retry on another path. 5596 */ 5597 if (path->state != RSMKA_PATH_ACTIVE) { 5598 mutex_exit(&path->mutex); 5599 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5600 rsmipc_free(rslot); 5601 rele_sendq_token(sendq_token); 5602 e = RSMERR_CONN_ABORTED; 5603 DBG_PRINTF((category, RSM_ERR, 5604 "rsm: rsmipc_send: path !ACTIVE")); 5605 goto again; 5606 } 5607 5608 ASSERT(sendq_token->msgbuf_avail > 0); 5609 5610 /* 5611 * reserve a msgbuf 5612 */ 5613 sendq_token->msgbuf_avail--; 5614 5615 mutex_exit(&path->mutex); 5616 5617 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5618 NULL); 5619 5620 if (e != RSM_SUCCESS) { 5621 mutex_enter(&path->mutex); 5622 /* 5623 * release the reserved msgbuf since 5624 * the send failed 5625 */ 5626 sendq_token->msgbuf_avail++; 5627 cv_broadcast(&sendq_token->sendq_cv); 5628 mutex_exit(&path->mutex); 5629 } 5630 } else 5631 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5632 NULL); 5633 5634 if (e != RSM_SUCCESS) { 5635 DBG_PRINTF((category, RSM_ERR, 5636 "rsm: rsmipc_send rsmpi send err = %d\n", e)); 5637 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5638 rsmipc_free(rslot); 5639 rele_sendq_token(sendq_token); 5640 atomic_inc_64(&rsm_ipcsend_errcnt); 5641 goto again; 5642 } 5643 5644 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */ 5645 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock, 5646 drv_usectohz(5000000), TR_CLOCK_TICK); 5647 if (e < 0) { 5648 /* timed out - retry */ 5649 e = RSMERR_TIMEOUT; 5650 } else if (e == 0) { 5651 /* signalled - return error */ 5652 e = RSMERR_INTERRUPTED; 5653 break; 5654 } else { 5655 e = RSM_SUCCESS; 5656 } 5657 } 5658 5659 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5660 rsmipc_free(rslot); 5661 rele_sendq_token(sendq_token); 5662 5663 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e)); 5664 return (e); 5665 } 5666 5667 static int 5668 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie) 5669 { 5670 rsmipc_request_t request; 5671 5672 /* 5673 * inform the exporter to delete this importer 5674 */ 5675 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 5676 request.rsmipc_key = segid; 5677 request.rsmipc_segment_cookie = cookie; 5678 return (rsmipc_send(dest, &request, RSM_NO_REPLY)); 5679 } 5680 5681 static void 5682 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl, 5683 int acl_len, rsm_permission_t default_permission) 5684 { 5685 int i; 5686 importing_token_t *token; 5687 rsmipc_request_t request; 5688 republish_token_t *republish_list = NULL; 5689 republish_token_t *rp; 5690 rsm_permission_t permission; 5691 int index; 5692 5693 /* 5694 * send the new access mode to all the nodes that have imported 5695 * this segment. 5696 * If the new acl does not have a node that was present in 5697 * the old acl a access permission of 0 is sent. 5698 */ 5699 5700 index = rsmhash(segid); 5701 5702 /* 5703 * create a list of node/permissions to send the republish message 5704 */ 5705 mutex_enter(&importer_list.lock); 5706 5707 token = importer_list.bucket[index]; 5708 while (token != NULL) { 5709 if (segid == token->key) { 5710 permission = default_permission; 5711 5712 for (i = 0; i < acl_len; i++) { 5713 if (token->importing_node == acl[i].ae_node) { 5714 permission = acl[i].ae_permission; 5715 break; 5716 } 5717 } 5718 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP); 5719 5720 rp->key = segid; 5721 rp->importing_node = token->importing_node; 5722 rp->permission = permission; 5723 rp->next = republish_list; 5724 republish_list = rp; 5725 } 5726 token = token->next; 5727 } 5728 5729 mutex_exit(&importer_list.lock); 5730 5731 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH; 5732 request.rsmipc_key = segid; 5733 5734 while (republish_list != NULL) { 5735 request.rsmipc_perm = republish_list->permission; 5736 (void) rsmipc_send(republish_list->importing_node, 5737 &request, RSM_NO_REPLY); 5738 rp = republish_list; 5739 republish_list = republish_list->next; 5740 kmem_free(rp, sizeof (republish_token_t)); 5741 } 5742 } 5743 5744 static void 5745 rsm_send_suspend() 5746 { 5747 int i, e; 5748 rsmipc_request_t request; 5749 list_element_t *tokp; 5750 list_element_t *head = NULL; 5751 importing_token_t *token; 5752 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5753 "rsm_send_suspend enter\n")); 5754 5755 /* 5756 * create a list of node to send the suspend message 5757 * 5758 * Currently the whole importer list is scanned and we obtain 5759 * all the nodes - this basically gets all nodes that at least 5760 * import one segment from the local node. 5761 * 5762 * no need to grab the rsm_suspend_list lock here since we are 5763 * single threaded when suspend is called. 5764 */ 5765 5766 mutex_enter(&importer_list.lock); 5767 for (i = 0; i < rsm_hash_size; i++) { 5768 5769 token = importer_list.bucket[i]; 5770 5771 while (token != NULL) { 5772 5773 tokp = head; 5774 5775 /* 5776 * make sure that the token's node 5777 * is not already on the suspend list 5778 */ 5779 while (tokp != NULL) { 5780 if (tokp->nodeid == token->importing_node) { 5781 break; 5782 } 5783 tokp = tokp->next; 5784 } 5785 5786 if (tokp == NULL) { /* not in suspend list */ 5787 tokp = kmem_zalloc(sizeof (list_element_t), 5788 KM_SLEEP); 5789 tokp->nodeid = token->importing_node; 5790 tokp->next = head; 5791 head = tokp; 5792 } 5793 5794 token = token->next; 5795 } 5796 } 5797 mutex_exit(&importer_list.lock); 5798 5799 if (head == NULL) { /* no importers so go ahead and quiesce segments */ 5800 exporter_quiesce(); 5801 return; 5802 } 5803 5804 mutex_enter(&rsm_suspend_list.list_lock); 5805 ASSERT(rsm_suspend_list.list_head == NULL); 5806 /* 5807 * update the suspend list righaway so that if a node dies the 5808 * pathmanager can set the NODE dead flag 5809 */ 5810 rsm_suspend_list.list_head = head; 5811 mutex_exit(&rsm_suspend_list.list_lock); 5812 5813 tokp = head; 5814 5815 while (tokp != NULL) { 5816 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND; 5817 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY); 5818 /* 5819 * Error in rsmipc_send currently happens due to inaccessibility 5820 * of the remote node. 5821 */ 5822 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */ 5823 tokp->flags |= RSM_SUSPEND_ACKPENDING; 5824 } 5825 5826 tokp = tokp->next; 5827 } 5828 5829 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5830 "rsm_send_suspend done\n")); 5831 5832 } 5833 5834 static void 5835 rsm_send_resume() 5836 { 5837 rsmipc_request_t request; 5838 list_element_t *elem, *head; 5839 5840 /* 5841 * save the suspend list so that we know where to send 5842 * the resume messages and make the suspend list head 5843 * NULL. 5844 */ 5845 mutex_enter(&rsm_suspend_list.list_lock); 5846 head = rsm_suspend_list.list_head; 5847 rsm_suspend_list.list_head = NULL; 5848 mutex_exit(&rsm_suspend_list.list_lock); 5849 5850 while (head != NULL) { 5851 elem = head; 5852 head = head->next; 5853 5854 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME; 5855 5856 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY); 5857 5858 kmem_free((void *)elem, sizeof (list_element_t)); 5859 5860 } 5861 5862 } 5863 5864 /* 5865 * This function takes path and sends a message using the sendq 5866 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK 5867 * and RSMIPC_MSG_CREDIT are sent using this function. 5868 */ 5869 int 5870 rsmipc_send_controlmsg(path_t *path, int msgtype) 5871 { 5872 int e; 5873 int retry_cnt = 0; 5874 int min_retry_cnt = 10; 5875 adapter_t *adapter; 5876 rsm_send_t is; 5877 rsm_send_q_handle_t ipc_handle; 5878 rsmipc_controlmsg_t msg; 5879 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL); 5880 5881 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5882 "rsmipc_send_controlmsg enter\n")); 5883 5884 ASSERT(MUTEX_HELD(&path->mutex)); 5885 5886 adapter = path->local_adapter; 5887 5888 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx " 5889 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype, 5890 my_nodeid, adapter->hwaddr, path->remote_node, 5891 path->remote_hwaddr, path->procmsg_cnt)); 5892 5893 if (path->state != RSMKA_PATH_ACTIVE) { 5894 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5895 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE")); 5896 return (1); 5897 } 5898 5899 ipc_handle = path->sendq_token.rsmpi_sendq_handle; 5900 5901 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION; 5902 msg.rsmipc_hdr.rsmipc_src = my_nodeid; 5903 msg.rsmipc_hdr.rsmipc_type = msgtype; 5904 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn; 5905 5906 if (msgtype == RSMIPC_MSG_CREDIT) 5907 msg.rsmipc_credits = path->procmsg_cnt; 5908 5909 msg.rsmipc_local_incn = path->local_incn; 5910 5911 msg.rsmipc_adapter_hwaddr = adapter->hwaddr; 5912 /* incr the sendq, path refcnt */ 5913 PATH_HOLD_NOLOCK(path); 5914 SENDQ_TOKEN_HOLD(path); 5915 5916 do { 5917 /* drop the path lock before doing the rsm_send */ 5918 mutex_exit(&path->mutex); 5919 5920 is.is_data = (void *)&msg; 5921 is.is_size = sizeof (msg); 5922 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5923 is.is_wait = 0; 5924 5925 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5926 5927 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5928 e != RSMERR_BAD_BARRIER_HNDL); 5929 5930 mutex_enter(&path->mutex); 5931 5932 if (e == RSM_SUCCESS) { 5933 break; 5934 } 5935 /* error counter for statistics */ 5936 atomic_inc_64(&rsm_ctrlmsg_errcnt); 5937 5938 DBG_PRINTF((category, RSM_ERR, 5939 "rsmipc_send_controlmsg:rsm_send error=%d", e)); 5940 5941 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */ 5942 (void) cv_reltimedwait(&path->sendq_token.sendq_cv, 5943 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK); 5944 retry_cnt = 0; 5945 } 5946 } while (path->state == RSMKA_PATH_ACTIVE); 5947 5948 /* decrement the sendq,path refcnt that we incr before rsm_send */ 5949 SENDQ_TOKEN_RELE(path); 5950 PATH_RELE_NOLOCK(path); 5951 5952 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5953 "rsmipc_send_controlmsg done=%d", e)); 5954 return (e); 5955 } 5956 5957 /* 5958 * Called from rsm_force_unload and path_importer_disconnect. The memory 5959 * mapping for the imported segment is removed and the segment is 5960 * disconnected at the interconnect layer if disconnect_flag is TRUE. 5961 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback 5962 * and FALSE from rsm_rebind. 5963 * 5964 * When subsequent accesses cause page faulting, the dummy page is mapped 5965 * to resolve the fault, and the mapping generation number is incremented 5966 * so that the application can be notified on a close barrier operation. 5967 * 5968 * It is important to note that the caller of rsmseg_unload is responsible for 5969 * acquiring the segment lock before making a call to rsmseg_unload. This is 5970 * required to make the caller and rsmseg_unload thread safe. The segment lock 5971 * will be released by the rsmseg_unload function. 5972 */ 5973 void 5974 rsmseg_unload(rsmseg_t *im_seg) 5975 { 5976 rsmcookie_t *hdl; 5977 void *shared_cookie; 5978 rsmipc_request_t request; 5979 uint_t maxprot; 5980 5981 DBG_DEFINE(category, 5982 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5983 5984 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n")); 5985 5986 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 5987 5988 /* wait until segment leaves the mapping state */ 5989 while (im_seg->s_state == RSM_STATE_MAPPING) 5990 cv_wait(&im_seg->s_cv, &im_seg->s_lock); 5991 /* 5992 * An unload is only necessary if the segment is connected. However, 5993 * if the segment was on the import list in state RSM_STATE_CONNECTING 5994 * then a connection was in progress. Change to RSM_STATE_NEW 5995 * here to cause an early exit from the connection process. 5996 */ 5997 if (im_seg->s_state == RSM_STATE_NEW) { 5998 rsmseglock_release(im_seg); 5999 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6000 "rsmseg_unload done: RSM_STATE_NEW\n")); 6001 return; 6002 } else if (im_seg->s_state == RSM_STATE_CONNECTING) { 6003 im_seg->s_state = RSM_STATE_ABORT_CONNECT; 6004 rsmsharelock_acquire(im_seg); 6005 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 6006 rsmsharelock_release(im_seg); 6007 rsmseglock_release(im_seg); 6008 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6009 "rsmseg_unload done: RSM_STATE_CONNECTING\n")); 6010 return; 6011 } 6012 6013 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) { 6014 if (im_seg->s_ckl != NULL) { 6015 int e; 6016 /* Setup protections for remap */ 6017 maxprot = PROT_USER; 6018 if (im_seg->s_mode & RSM_PERM_READ) { 6019 maxprot |= PROT_READ; 6020 } 6021 if (im_seg->s_mode & RSM_PERM_WRITE) { 6022 maxprot |= PROT_WRITE; 6023 } 6024 hdl = im_seg->s_ckl; 6025 for (; hdl != NULL; hdl = hdl->c_next) { 6026 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 6027 remap_cookie, 6028 hdl->c_off, hdl->c_len, 6029 maxprot, 0, NULL); 6030 6031 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6032 "remap returns %d\n", e)); 6033 } 6034 } 6035 6036 (void) rsm_closeconnection(im_seg, &shared_cookie); 6037 6038 if (shared_cookie != NULL) { 6039 /* 6040 * inform the exporting node so this import 6041 * can be deleted from the list of importers. 6042 */ 6043 request.rsmipc_hdr.rsmipc_type = 6044 RSMIPC_MSG_NOTIMPORTING; 6045 request.rsmipc_key = im_seg->s_segid; 6046 request.rsmipc_segment_cookie = shared_cookie; 6047 rsmseglock_release(im_seg); 6048 (void) rsmipc_send(im_seg->s_node, &request, 6049 RSM_NO_REPLY); 6050 } else { 6051 rsmseglock_release(im_seg); 6052 } 6053 } 6054 else 6055 rsmseglock_release(im_seg); 6056 6057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n")); 6058 6059 } 6060 6061 /* ****************************** Importer Calls ************************ */ 6062 6063 static int 6064 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr) 6065 { 6066 int shifts = 0; 6067 6068 if (crgetuid(cr) != owner) { 6069 shifts += 3; 6070 if (!groupmember(group, cr)) 6071 shifts += 3; 6072 } 6073 6074 mode &= ~(perm << shifts); 6075 6076 if (mode == 0) 6077 return (0); 6078 6079 return (secpolicy_rsm_access(cr, owner, mode)); 6080 } 6081 6082 6083 static int 6084 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred, 6085 intptr_t dataptr, int mode) 6086 { 6087 int e; 6088 int recheck_state = 0; 6089 void *shared_cookie; 6090 rsmipc_request_t request; 6091 rsmipc_reply_t reply; 6092 rsm_permission_t access; 6093 adapter_t *adapter; 6094 rsm_addr_t addr = 0; 6095 rsm_import_share_t *sharedp; 6096 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6097 6098 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n")); 6099 6100 adapter = rsm_getadapter(msg, mode); 6101 if (adapter == NULL) { 6102 DBG_PRINTF((category, RSM_ERR, 6103 "rsm_connect done:ENODEV adapter=NULL\n")); 6104 return (RSMERR_CTLR_NOT_PRESENT); 6105 } 6106 6107 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) { 6108 rsmka_release_adapter(adapter); 6109 DBG_PRINTF((category, RSM_ERR, 6110 "rsm_connect done:ENODEV loopback\n")); 6111 return (RSMERR_CTLR_NOT_PRESENT); 6112 } 6113 6114 6115 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6116 ASSERT(seg->s_state == RSM_STATE_NEW); 6117 6118 /* 6119 * Translate perm to access 6120 */ 6121 if (msg->perm & ~RSM_PERM_RDWR) { 6122 rsmka_release_adapter(adapter); 6123 DBG_PRINTF((category, RSM_ERR, 6124 "rsm_connect done:EINVAL invalid perms\n")); 6125 return (RSMERR_BAD_PERMS); 6126 } 6127 access = 0; 6128 if (msg->perm & RSM_PERM_READ) 6129 access |= RSM_ACCESS_READ; 6130 if (msg->perm & RSM_PERM_WRITE) 6131 access |= RSM_ACCESS_WRITE; 6132 6133 seg->s_node = msg->nodeid; 6134 6135 /* 6136 * Adding to the import list locks the segment; release the segment 6137 * lock so we can get the reply for the send. 6138 */ 6139 e = rsmimport_add(seg, msg->key); 6140 if (e) { 6141 rsmka_release_adapter(adapter); 6142 DBG_PRINTF((category, RSM_ERR, 6143 "rsm_connect done:rsmimport_add failed %d\n", e)); 6144 return (e); 6145 } 6146 seg->s_state = RSM_STATE_CONNECTING; 6147 6148 /* 6149 * Set the s_adapter field here so as to have a valid comparison of 6150 * the adapter and the s_adapter value during rsmshare_get. For 6151 * any error, set s_adapter to NULL before doing a release_adapter 6152 */ 6153 seg->s_adapter = adapter; 6154 6155 rsmseglock_release(seg); 6156 6157 /* 6158 * get the pointer to the shared data structure; the 6159 * shared data is locked and refcount has been incremented 6160 */ 6161 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg); 6162 6163 ASSERT(rsmsharelock_held(seg)); 6164 6165 do { 6166 /* flag indicates whether we need to recheck the state */ 6167 recheck_state = 0; 6168 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6169 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 6170 switch (sharedp->rsmsi_state) { 6171 case RSMSI_STATE_NEW: 6172 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6173 break; 6174 case RSMSI_STATE_CONNECTING: 6175 /* FALLTHRU */ 6176 case RSMSI_STATE_CONN_QUIESCE: 6177 /* FALLTHRU */ 6178 case RSMSI_STATE_MAP_QUIESCE: 6179 /* wait for the state to change */ 6180 while ((sharedp->rsmsi_state == 6181 RSMSI_STATE_CONNECTING) || 6182 (sharedp->rsmsi_state == 6183 RSMSI_STATE_CONN_QUIESCE) || 6184 (sharedp->rsmsi_state == 6185 RSMSI_STATE_MAP_QUIESCE)) { 6186 if (cv_wait_sig(&sharedp->rsmsi_cv, 6187 &sharedp->rsmsi_lock) == 0) { 6188 /* signalled - clean up and return */ 6189 rsmsharelock_release(seg); 6190 rsmimport_rm(seg); 6191 seg->s_adapter = NULL; 6192 rsmka_release_adapter(adapter); 6193 seg->s_state = RSM_STATE_NEW; 6194 DBG_PRINTF((category, RSM_ERR, 6195 "rsm_connect done: INTERRUPTED\n")); 6196 return (RSMERR_INTERRUPTED); 6197 } 6198 } 6199 /* 6200 * the state changed, loop back and check what it is 6201 */ 6202 recheck_state = 1; 6203 break; 6204 case RSMSI_STATE_ABORT_CONNECT: 6205 /* exit the loop and clean up further down */ 6206 break; 6207 case RSMSI_STATE_CONNECTED: 6208 /* already connected, good - fall through */ 6209 case RSMSI_STATE_MAPPED: 6210 /* already mapped, wow - fall through */ 6211 /* access validation etc is done further down */ 6212 break; 6213 case RSMSI_STATE_DISCONNECTED: 6214 /* disconnected - so reconnect now */ 6215 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6216 break; 6217 default: 6218 ASSERT(0); /* Invalid State */ 6219 } 6220 } while (recheck_state); 6221 6222 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6223 /* we are the first to connect */ 6224 rsmsharelock_release(seg); 6225 6226 if (msg->nodeid != my_nodeid) { 6227 addr = get_remote_hwaddr(adapter, msg->nodeid); 6228 6229 if ((int64_t)addr < 0) { 6230 rsmsharelock_acquire(seg); 6231 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6232 RSMSI_STATE_NEW); 6233 rsmsharelock_release(seg); 6234 rsmimport_rm(seg); 6235 seg->s_adapter = NULL; 6236 rsmka_release_adapter(adapter); 6237 seg->s_state = RSM_STATE_NEW; 6238 DBG_PRINTF((category, RSM_ERR, 6239 "rsm_connect done: hwaddr<0\n")); 6240 return (RSMERR_INTERNAL_ERROR); 6241 } 6242 } else { 6243 addr = adapter->hwaddr; 6244 } 6245 6246 /* 6247 * send request to node [src, dest, key, msgid] and get back 6248 * [status, msgid, cookie] 6249 */ 6250 request.rsmipc_key = msg->key; 6251 /* 6252 * we need the s_mode of the exporter so pass 6253 * RSM_ACCESS_TRUSTED 6254 */ 6255 request.rsmipc_perm = RSM_ACCESS_TRUSTED; 6256 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT; 6257 request.rsmipc_adapter_hwaddr = addr; 6258 request.rsmipc_segment_cookie = sharedp; 6259 6260 e = (int)rsmipc_send(msg->nodeid, &request, &reply); 6261 if (e) { 6262 rsmsharelock_acquire(seg); 6263 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6264 RSMSI_STATE_NEW); 6265 rsmsharelock_release(seg); 6266 rsmimport_rm(seg); 6267 seg->s_adapter = NULL; 6268 rsmka_release_adapter(adapter); 6269 seg->s_state = RSM_STATE_NEW; 6270 DBG_PRINTF((category, RSM_ERR, 6271 "rsm_connect done:rsmipc_send failed %d\n", e)); 6272 return (e); 6273 } 6274 6275 if (reply.rsmipc_status != RSM_SUCCESS) { 6276 rsmsharelock_acquire(seg); 6277 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6278 RSMSI_STATE_NEW); 6279 rsmsharelock_release(seg); 6280 rsmimport_rm(seg); 6281 seg->s_adapter = NULL; 6282 rsmka_release_adapter(adapter); 6283 seg->s_state = RSM_STATE_NEW; 6284 DBG_PRINTF((category, RSM_ERR, 6285 "rsm_connect done:rsmipc_send reply err %d\n", 6286 reply.rsmipc_status)); 6287 return (reply.rsmipc_status); 6288 } 6289 6290 rsmsharelock_acquire(seg); 6291 /* store the information recvd into the shared data struct */ 6292 sharedp->rsmsi_mode = reply.rsmipc_mode; 6293 sharedp->rsmsi_uid = reply.rsmipc_uid; 6294 sharedp->rsmsi_gid = reply.rsmipc_gid; 6295 sharedp->rsmsi_seglen = reply.rsmipc_seglen; 6296 sharedp->rsmsi_cookie = sharedp; 6297 } 6298 6299 rsmsharelock_release(seg); 6300 6301 /* 6302 * Get the segment lock and check for a force disconnect 6303 * from the export side which would have changed the state 6304 * back to RSM_STATE_NEW. Once the segment lock is acquired a 6305 * force disconnect will be held off until the connection 6306 * has completed. 6307 */ 6308 rsmseglock_acquire(seg); 6309 rsmsharelock_acquire(seg); 6310 ASSERT(seg->s_state == RSM_STATE_CONNECTING || 6311 seg->s_state == RSM_STATE_ABORT_CONNECT); 6312 6313 shared_cookie = sharedp->rsmsi_cookie; 6314 6315 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) || 6316 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) { 6317 seg->s_state = RSM_STATE_NEW; 6318 seg->s_adapter = NULL; 6319 rsmsharelock_release(seg); 6320 rsmseglock_release(seg); 6321 rsmimport_rm(seg); 6322 rsmka_release_adapter(adapter); 6323 6324 rsmsharelock_acquire(seg); 6325 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) { 6326 /* 6327 * set a flag indicating abort handling has been 6328 * done 6329 */ 6330 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE; 6331 rsmsharelock_release(seg); 6332 /* send a message to exporter - only once */ 6333 (void) rsm_send_notimporting(msg->nodeid, 6334 msg->key, shared_cookie); 6335 rsmsharelock_acquire(seg); 6336 /* 6337 * wake up any waiting importers and inform that 6338 * connection has been aborted 6339 */ 6340 cv_broadcast(&sharedp->rsmsi_cv); 6341 } 6342 rsmsharelock_release(seg); 6343 6344 DBG_PRINTF((category, RSM_ERR, 6345 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n")); 6346 return (RSMERR_INTERRUPTED); 6347 } 6348 6349 6350 /* 6351 * We need to verify that this process has access 6352 */ 6353 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid, 6354 access & sharedp->rsmsi_mode, 6355 (int)(msg->perm & RSM_PERM_RDWR), cred); 6356 if (e) { 6357 rsmsharelock_release(seg); 6358 seg->s_state = RSM_STATE_NEW; 6359 seg->s_adapter = NULL; 6360 rsmseglock_release(seg); 6361 rsmimport_rm(seg); 6362 rsmka_release_adapter(adapter); 6363 /* 6364 * No need to lock segment it has been removed 6365 * from the hash table 6366 */ 6367 rsmsharelock_acquire(seg); 6368 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6369 rsmsharelock_release(seg); 6370 /* this is the first importer */ 6371 6372 (void) rsm_send_notimporting(msg->nodeid, msg->key, 6373 shared_cookie); 6374 rsmsharelock_acquire(seg); 6375 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6376 cv_broadcast(&sharedp->rsmsi_cv); 6377 } 6378 rsmsharelock_release(seg); 6379 6380 DBG_PRINTF((category, RSM_ERR, 6381 "rsm_connect done: ipcaccess failed\n")); 6382 return (RSMERR_PERM_DENIED); 6383 } 6384 6385 /* update state and cookie */ 6386 seg->s_segid = sharedp->rsmsi_segid; 6387 seg->s_len = sharedp->rsmsi_seglen; 6388 seg->s_mode = access & sharedp->rsmsi_mode; 6389 seg->s_pid = ddi_get_pid(); 6390 seg->s_mapinfo = NULL; 6391 6392 if (seg->s_node != my_nodeid) { 6393 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6394 e = adapter->rsmpi_ops->rsm_connect( 6395 adapter->rsmpi_handle, 6396 addr, seg->s_segid, &sharedp->rsmsi_handle); 6397 6398 if (e != RSM_SUCCESS) { 6399 seg->s_state = RSM_STATE_NEW; 6400 seg->s_adapter = NULL; 6401 rsmsharelock_release(seg); 6402 rsmseglock_release(seg); 6403 rsmimport_rm(seg); 6404 rsmka_release_adapter(adapter); 6405 /* 6406 * inform the exporter to delete this importer 6407 */ 6408 (void) rsm_send_notimporting(msg->nodeid, 6409 msg->key, shared_cookie); 6410 6411 /* 6412 * Now inform any waiting importers to 6413 * retry connect. This needs to be done 6414 * after sending notimporting so that 6415 * the notimporting is sent before a waiting 6416 * importer sends a segconnect while retrying 6417 * 6418 * No need to lock segment it has been removed 6419 * from the hash table 6420 */ 6421 6422 rsmsharelock_acquire(seg); 6423 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6424 cv_broadcast(&sharedp->rsmsi_cv); 6425 rsmsharelock_release(seg); 6426 6427 DBG_PRINTF((category, RSM_ERR, 6428 "rsm_connect error %d\n", e)); 6429 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR) 6430 return ( 6431 RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 6432 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) || 6433 (e == RSMERR_UNKNOWN_RSM_ADDR)) 6434 return (RSMERR_REMOTE_NODE_UNREACHABLE); 6435 else 6436 return (e); 6437 } 6438 6439 } 6440 seg->s_handle.in = sharedp->rsmsi_handle; 6441 6442 } 6443 6444 seg->s_state = RSM_STATE_CONNECT; 6445 6446 6447 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */ 6448 if (bar_va) { 6449 /* increment generation number on barrier page */ 6450 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num); 6451 /* return user off into barrier page where status will be */ 6452 msg->off = (int)seg->s_hdr.rsmrc_num; 6453 msg->gnum = bar_va[msg->off]; /* gnum race */ 6454 } else { 6455 msg->off = 0; 6456 msg->gnum = 0; /* gnum race */ 6457 } 6458 6459 msg->len = (int)sharedp->rsmsi_seglen; 6460 msg->rnum = seg->s_minor; 6461 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED); 6462 rsmsharelock_release(seg); 6463 rsmseglock_release(seg); 6464 6465 /* Return back to user the segment size & perm in case it's needed */ 6466 6467 #ifdef _MULTI_DATAMODEL 6468 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6469 rsm_ioctlmsg32_t msg32; 6470 6471 if (msg->len > UINT_MAX) 6472 msg32.len = RSM_MAXSZ_PAGE_ALIGNED; 6473 else 6474 msg32.len = msg->len; 6475 msg32.off = msg->off; 6476 msg32.perm = msg->perm; 6477 msg32.gnum = msg->gnum; 6478 msg32.rnum = msg->rnum; 6479 6480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6481 "rsm_connect done\n")); 6482 6483 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr, 6484 sizeof (msg32), mode)) 6485 return (RSMERR_BAD_ADDR); 6486 else 6487 return (RSM_SUCCESS); 6488 } 6489 #endif 6490 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n")); 6491 6492 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg), 6493 mode)) 6494 return (RSMERR_BAD_ADDR); 6495 else 6496 return (RSM_SUCCESS); 6497 } 6498 6499 static int 6500 rsm_unmap(rsmseg_t *seg) 6501 { 6502 int err; 6503 adapter_t *adapter; 6504 rsm_import_share_t *sharedp; 6505 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6506 6507 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6508 "rsm_unmap enter %u\n", seg->s_segid)); 6509 6510 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6511 6512 /* assert seg is locked */ 6513 ASSERT(rsmseglock_held(seg)); 6514 ASSERT(seg->s_state != RSM_STATE_MAPPING); 6515 6516 if ((seg->s_state != RSM_STATE_ACTIVE) && 6517 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 6518 /* segment unmap has already been done */ 6519 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6520 return (RSM_SUCCESS); 6521 } 6522 6523 sharedp = seg->s_share; 6524 6525 rsmsharelock_acquire(seg); 6526 6527 /* 6528 * - shared data struct is in MAPPED or MAP_QUIESCE state 6529 */ 6530 6531 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED || 6532 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 6533 6534 /* 6535 * Unmap pages - previously rsm_memseg_import_unmap was called only if 6536 * the segment cookie list was NULL; but it is always NULL when 6537 * called from rsmmap_unmap and won't be NULL when called for 6538 * a force disconnect - so the check for NULL cookie list was removed 6539 */ 6540 6541 ASSERT(sharedp->rsmsi_mapcnt > 0); 6542 6543 sharedp->rsmsi_mapcnt--; 6544 6545 if (sharedp->rsmsi_mapcnt == 0) { 6546 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) { 6547 /* unmap the shared RSMPI mapping */ 6548 adapter = seg->s_adapter; 6549 if (seg->s_node != my_nodeid) { 6550 ASSERT(sharedp->rsmsi_handle != NULL); 6551 err = adapter->rsmpi_ops-> 6552 rsm_unmap(sharedp->rsmsi_handle); 6553 DBG_PRINTF((category, RSM_DEBUG, 6554 "rsm_unmap: rsmpi unmap %d\n", err)); 6555 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 6556 sharedp->rsmsi_mapinfo = NULL; 6557 } 6558 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 6559 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */ 6560 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 6561 } 6562 } 6563 6564 rsmsharelock_release(seg); 6565 6566 /* 6567 * The s_cookie field is used to store the cookie returned from the 6568 * ddi_umem_lock when binding the pages for an export segment. This 6569 * is the primary use of the s_cookie field and does not normally 6570 * pertain to any importing segment except in the loopback case. 6571 * For the loopback case, the import segment and export segment are 6572 * on the same node, the s_cookie field of the segment structure for 6573 * the importer is initialized to the s_cookie field in the exported 6574 * segment during the map operation and is used during the call to 6575 * devmap_umem_setup for the import mapping. 6576 * Thus, during unmap, we simply need to set s_cookie to NULL to 6577 * indicate that the mapping no longer exists. 6578 */ 6579 seg->s_cookie = NULL; 6580 6581 seg->s_mapinfo = NULL; 6582 6583 if (seg->s_state == RSM_STATE_ACTIVE) 6584 seg->s_state = RSM_STATE_CONNECT; 6585 else 6586 seg->s_state = RSM_STATE_CONN_QUIESCE; 6587 6588 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6589 6590 return (RSM_SUCCESS); 6591 } 6592 6593 /* 6594 * cookie returned here if not null indicates that it is 6595 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING 6596 * message. 6597 */ 6598 static int 6599 rsm_closeconnection(rsmseg_t *seg, void **cookie) 6600 { 6601 int e; 6602 adapter_t *adapter; 6603 rsm_import_share_t *sharedp; 6604 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6605 6606 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6607 "rsm_closeconnection enter\n")); 6608 6609 *cookie = (void *)NULL; 6610 6611 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6612 6613 /* assert seg is locked */ 6614 ASSERT(rsmseglock_held(seg)); 6615 6616 if (seg->s_state == RSM_STATE_DISCONNECT) { 6617 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6618 "rsm_closeconnection done: already disconnected\n")); 6619 return (RSM_SUCCESS); 6620 } 6621 6622 /* wait for all putv/getv ops to get done */ 6623 while (seg->s_rdmacnt > 0) { 6624 cv_wait(&seg->s_cv, &seg->s_lock); 6625 } 6626 6627 (void) rsm_unmap(seg); 6628 6629 ASSERT(seg->s_state == RSM_STATE_CONNECT || 6630 seg->s_state == RSM_STATE_CONN_QUIESCE); 6631 6632 adapter = seg->s_adapter; 6633 sharedp = seg->s_share; 6634 6635 ASSERT(sharedp != NULL); 6636 6637 rsmsharelock_acquire(seg); 6638 6639 /* 6640 * Disconnect on adapter 6641 * 6642 * The current algorithm is stateless, I don't have to contact 6643 * server when I go away. It only gives me permissions. Of course, 6644 * the adapters will talk to terminate the connect. 6645 * 6646 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE 6647 */ 6648 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) && 6649 (sharedp->rsmsi_node != my_nodeid)) { 6650 6651 if (sharedp->rsmsi_refcnt == 1) { 6652 /* this is the last importer */ 6653 ASSERT(sharedp->rsmsi_mapcnt == 0); 6654 6655 e = adapter->rsmpi_ops-> 6656 rsm_disconnect(sharedp->rsmsi_handle); 6657 if (e != RSM_SUCCESS) { 6658 DBG_PRINTF((category, RSM_DEBUG, 6659 "rsm:disconnect failed seg=%x:err=%d\n", 6660 seg->s_key, e)); 6661 } 6662 } 6663 } 6664 6665 seg->s_handle.in = NULL; 6666 6667 sharedp->rsmsi_refcnt--; 6668 6669 if (sharedp->rsmsi_refcnt == 0) { 6670 *cookie = (void *)sharedp->rsmsi_cookie; 6671 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 6672 sharedp->rsmsi_handle = NULL; 6673 rsmsharelock_release(seg); 6674 6675 /* clean up the shared data structure */ 6676 mutex_destroy(&sharedp->rsmsi_lock); 6677 cv_destroy(&sharedp->rsmsi_cv); 6678 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t)); 6679 6680 } else { 6681 rsmsharelock_release(seg); 6682 } 6683 6684 /* increment generation number on barrier page */ 6685 if (bar_va) { 6686 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num); 6687 } 6688 6689 /* 6690 * The following needs to be done after any 6691 * rsmsharelock calls which use seg->s_share. 6692 */ 6693 seg->s_share = NULL; 6694 6695 seg->s_state = RSM_STATE_DISCONNECT; 6696 /* signal anyone waiting in the CONN_QUIESCE state */ 6697 cv_broadcast(&seg->s_cv); 6698 6699 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6700 "rsm_closeconnection done\n")); 6701 6702 return (RSM_SUCCESS); 6703 } 6704 6705 int 6706 rsm_disconnect(rsmseg_t *seg) 6707 { 6708 rsmipc_request_t request; 6709 void *shared_cookie; 6710 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6711 6712 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n")); 6713 6714 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6715 6716 /* assert seg isn't locked */ 6717 ASSERT(!rsmseglock_held(seg)); 6718 6719 6720 /* Remove segment from imported list */ 6721 rsmimport_rm(seg); 6722 6723 /* acquire the segment */ 6724 rsmseglock_acquire(seg); 6725 6726 /* wait until segment leaves the mapping state */ 6727 while (seg->s_state == RSM_STATE_MAPPING) 6728 cv_wait(&seg->s_cv, &seg->s_lock); 6729 6730 if (seg->s_state == RSM_STATE_DISCONNECT) { 6731 seg->s_state = RSM_STATE_NEW; 6732 rsmseglock_release(seg); 6733 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6734 "rsm_disconnect done: already disconnected\n")); 6735 return (RSM_SUCCESS); 6736 } 6737 6738 (void) rsm_closeconnection(seg, &shared_cookie); 6739 6740 /* update state */ 6741 seg->s_state = RSM_STATE_NEW; 6742 6743 if (shared_cookie != NULL) { 6744 /* 6745 * This is the last importer so inform the exporting node 6746 * so this import can be deleted from the list of importers. 6747 */ 6748 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 6749 request.rsmipc_key = seg->s_segid; 6750 request.rsmipc_segment_cookie = shared_cookie; 6751 rsmseglock_release(seg); 6752 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 6753 } else { 6754 rsmseglock_release(seg); 6755 } 6756 6757 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n")); 6758 6759 return (DDI_SUCCESS); 6760 } 6761 6762 static int 6763 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 6764 struct pollhead **phpp) 6765 { 6766 minor_t rnum; 6767 rsmresource_t *res; 6768 rsmseg_t *seg; 6769 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 6770 6771 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n")); 6772 6773 /* find minor, no lock */ 6774 rnum = getminor(dev); 6775 res = rsmresource_lookup(rnum, RSM_NOLOCK); 6776 6777 /* poll is supported only for export/import segments */ 6778 if ((res == NULL) || (res == RSMRC_RESERVED) || 6779 (res->rsmrc_type == RSM_RESOURCE_BAR)) { 6780 return (ENXIO); 6781 } 6782 6783 /* 6784 * An exported segment must be in state RSM_STATE_EXPORT; an 6785 * imported segment must be in state RSM_STATE_ACTIVE. 6786 */ 6787 seg = (rsmseg_t *)res; 6788 6789 if (seg->s_pollevent) { 6790 *reventsp = POLLRDNORM; 6791 } else { 6792 *reventsp = 0; 6793 } 6794 6795 if ((*reventsp == 0 && !anyyet) || (events & POLLET)) { 6796 /* cannot take segment lock here */ 6797 *phpp = &seg->s_poll; 6798 seg->s_pollflag |= RSM_SEGMENT_POLL; 6799 } 6800 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n")); 6801 return (0); 6802 } 6803 6804 6805 6806 /* ************************* IOCTL Commands ********************* */ 6807 6808 static rsmseg_t * 6809 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp, 6810 rsm_resource_type_t type) 6811 { 6812 /* get segment from resource handle */ 6813 rsmseg_t *seg; 6814 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 6815 6816 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n")); 6817 6818 6819 if (res != RSMRC_RESERVED) { 6820 seg = (rsmseg_t *)res; 6821 } else { 6822 /* Allocate segment now and bind it */ 6823 seg = rsmseg_alloc(rnum, credp); 6824 6825 /* 6826 * if DR pre-processing is going on or DR is in progress 6827 * then the new export segments should be in the NEW_QSCD state 6828 */ 6829 if (type == RSM_RESOURCE_EXPORT_SEGMENT) { 6830 mutex_enter(&rsm_drv_data.drv_lock); 6831 if ((rsm_drv_data.drv_state == 6832 RSM_DRV_PREDEL_STARTED) || 6833 (rsm_drv_data.drv_state == 6834 RSM_DRV_PREDEL_COMPLETED) || 6835 (rsm_drv_data.drv_state == 6836 RSM_DRV_DR_IN_PROGRESS)) { 6837 seg->s_state = RSM_STATE_NEW_QUIESCED; 6838 } 6839 mutex_exit(&rsm_drv_data.drv_lock); 6840 } 6841 6842 rsmresource_insert(rnum, (rsmresource_t *)seg, type); 6843 } 6844 6845 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n")); 6846 6847 return (seg); 6848 } 6849 6850 static int 6851 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6852 int mode, cred_t *credp) 6853 { 6854 int error; 6855 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 6856 6857 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n")); 6858 6859 arg = arg; 6860 credp = credp; 6861 6862 ASSERT(seg != NULL); 6863 6864 switch (cmd) { 6865 case RSM_IOCTL_BIND: 6866 error = rsm_bind(seg, msg, arg, mode); 6867 break; 6868 case RSM_IOCTL_REBIND: 6869 error = rsm_rebind(seg, msg); 6870 break; 6871 case RSM_IOCTL_UNBIND: 6872 error = ENOTSUP; 6873 break; 6874 case RSM_IOCTL_PUBLISH: 6875 error = rsm_publish(seg, msg, arg, mode); 6876 break; 6877 case RSM_IOCTL_REPUBLISH: 6878 error = rsm_republish(seg, msg, mode); 6879 break; 6880 case RSM_IOCTL_UNPUBLISH: 6881 error = rsm_unpublish(seg, 1); 6882 break; 6883 default: 6884 error = EINVAL; 6885 break; 6886 } 6887 6888 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n", 6889 error)); 6890 6891 return (error); 6892 } 6893 static int 6894 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6895 int mode, cred_t *credp) 6896 { 6897 int error; 6898 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6899 6900 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n")); 6901 6902 ASSERT(seg); 6903 6904 switch (cmd) { 6905 case RSM_IOCTL_CONNECT: 6906 error = rsm_connect(seg, msg, credp, arg, mode); 6907 break; 6908 default: 6909 error = EINVAL; 6910 break; 6911 } 6912 6913 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n", 6914 error)); 6915 return (error); 6916 } 6917 6918 static int 6919 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6920 int mode) 6921 { 6922 int e; 6923 adapter_t *adapter; 6924 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6925 6926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n")); 6927 6928 6929 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) { 6930 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6931 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n")); 6932 return (RSMERR_CONN_ABORTED); 6933 } else if (seg->s_node == my_nodeid) { 6934 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6935 "rsmbar_ioctl done: loopback\n")); 6936 return (RSM_SUCCESS); 6937 } 6938 6939 adapter = seg->s_adapter; 6940 6941 switch (cmd) { 6942 case RSM_IOCTL_BAR_CHECK: 6943 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6944 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va)); 6945 return (bar_va ? RSM_SUCCESS : EINVAL); 6946 case RSM_IOCTL_BAR_OPEN: 6947 e = adapter->rsmpi_ops-> 6948 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar); 6949 break; 6950 case RSM_IOCTL_BAR_ORDER: 6951 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar); 6952 break; 6953 case RSM_IOCTL_BAR_CLOSE: 6954 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar); 6955 break; 6956 default: 6957 e = EINVAL; 6958 break; 6959 } 6960 6961 if (e == RSM_SUCCESS) { 6962 #ifdef _MULTI_DATAMODEL 6963 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6964 rsm_ioctlmsg32_t msg32; 6965 int i; 6966 6967 for (i = 0; i < 4; i++) { 6968 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64; 6969 } 6970 6971 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6972 "rsmbar_ioctl done\n")); 6973 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 6974 sizeof (msg32), mode)) 6975 return (RSMERR_BAD_ADDR); 6976 else 6977 return (RSM_SUCCESS); 6978 } 6979 #endif 6980 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6981 "rsmbar_ioctl done\n")); 6982 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg, 6983 sizeof (*msg), mode)) 6984 return (RSMERR_BAD_ADDR); 6985 else 6986 return (RSM_SUCCESS); 6987 } 6988 6989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6990 "rsmbar_ioctl done: error=%d\n", e)); 6991 6992 return (e); 6993 } 6994 6995 /* 6996 * Ring the doorbell of the export segment to which this segment is 6997 * connected. 6998 */ 6999 static int 7000 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7001 { 7002 int e = 0; 7003 rsmipc_request_t request; 7004 7005 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7006 7007 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n")); 7008 7009 request.rsmipc_key = seg->s_segid; 7010 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7011 request.rsmipc_segment_cookie = NULL; 7012 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 7013 7014 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7015 "exportbell_ioctl done: %d\n", e)); 7016 7017 return (e); 7018 } 7019 7020 /* 7021 * Ring the doorbells of all segments importing this segment 7022 */ 7023 static int 7024 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7025 { 7026 importing_token_t *token = NULL; 7027 rsmipc_request_t request; 7028 int index; 7029 7030 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 7031 7032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n")); 7033 7034 ASSERT(seg->s_state != RSM_STATE_NEW && 7035 seg->s_state != RSM_STATE_NEW_QUIESCED); 7036 7037 request.rsmipc_key = seg->s_segid; 7038 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7039 7040 index = rsmhash(seg->s_segid); 7041 7042 token = importer_list.bucket[index]; 7043 7044 while (token != NULL) { 7045 if (seg->s_key == token->key) { 7046 request.rsmipc_segment_cookie = 7047 token->import_segment_cookie; 7048 (void) rsmipc_send(token->importing_node, 7049 &request, RSM_NO_REPLY); 7050 } 7051 token = token->next; 7052 } 7053 7054 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7055 "importbell_ioctl done\n")); 7056 return (RSM_SUCCESS); 7057 } 7058 7059 static int 7060 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp, 7061 rsm_poll_event_t **eventspp, int mode) 7062 { 7063 rsm_poll_event_t *evlist = NULL; 7064 size_t evlistsz; 7065 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7066 7067 #ifdef _MULTI_DATAMODEL 7068 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7069 int i; 7070 rsm_consume_event_msg32_t cemsg32 = {0}; 7071 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7072 rsm_poll_event32_t *evlist32; 7073 size_t evlistsz32; 7074 7075 /* copyin the ioctl message */ 7076 if (ddi_copyin(arg, (caddr_t)&cemsg32, 7077 sizeof (rsm_consume_event_msg32_t), mode)) { 7078 DBG_PRINTF((category, RSM_ERR, 7079 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7080 return (RSMERR_BAD_ADDR); 7081 } 7082 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist; 7083 msgp->numents = (int)cemsg32.numents; 7084 7085 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents; 7086 /* 7087 * If numents is large alloc events list on heap otherwise 7088 * use the address of array that was passed in. 7089 */ 7090 if (msgp->numents > RSM_MAX_POLLFDS) { 7091 if (msgp->numents > max_segs) { /* validate numents */ 7092 DBG_PRINTF((category, RSM_ERR, 7093 "consumeevent_copyin: " 7094 "RSMERR_BAD_ARGS_ERRORS\n")); 7095 return (RSMERR_BAD_ARGS_ERRORS); 7096 } 7097 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7098 } else { 7099 evlist32 = event32; 7100 } 7101 7102 /* copyin the seglist into the rsm_poll_event32_t array */ 7103 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32, 7104 evlistsz32, mode)) { 7105 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7106 kmem_free(evlist32, evlistsz32); 7107 } 7108 DBG_PRINTF((category, RSM_ERR, 7109 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7110 return (RSMERR_BAD_ADDR); 7111 } 7112 7113 /* evlist and evlistsz are based on rsm_poll_event_t type */ 7114 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents; 7115 7116 if (msgp->numents > RSM_MAX_POLLFDS) { 7117 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7118 *eventspp = evlist; 7119 } else { 7120 evlist = *eventspp; 7121 } 7122 /* 7123 * copy the rsm_poll_event32_t array to the rsm_poll_event_t 7124 * array 7125 */ 7126 for (i = 0; i < msgp->numents; i++) { 7127 evlist[i].rnum = evlist32[i].rnum; 7128 evlist[i].fdsidx = evlist32[i].fdsidx; 7129 evlist[i].revent = evlist32[i].revent; 7130 } 7131 /* free the temp 32-bit event list */ 7132 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7133 kmem_free(evlist32, evlistsz32); 7134 } 7135 7136 return (RSM_SUCCESS); 7137 } 7138 #endif 7139 /* copyin the ioctl message */ 7140 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t), 7141 mode)) { 7142 DBG_PRINTF((category, RSM_ERR, 7143 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7144 return (RSMERR_BAD_ADDR); 7145 } 7146 /* 7147 * If numents is large alloc events list on heap otherwise 7148 * use the address of array that was passed in. 7149 */ 7150 if (msgp->numents > RSM_MAX_POLLFDS) { 7151 if (msgp->numents > max_segs) { /* validate numents */ 7152 DBG_PRINTF((category, RSM_ERR, 7153 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n")); 7154 return (RSMERR_BAD_ARGS_ERRORS); 7155 } 7156 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7157 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7158 *eventspp = evlist; 7159 } 7160 7161 /* copyin the seglist */ 7162 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp), 7163 sizeof (rsm_poll_event_t)*msgp->numents, mode)) { 7164 if (evlist) { 7165 kmem_free(evlist, evlistsz); 7166 *eventspp = NULL; 7167 } 7168 DBG_PRINTF((category, RSM_ERR, 7169 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7170 return (RSMERR_BAD_ADDR); 7171 } 7172 7173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7174 "consumeevent_copyin done\n")); 7175 return (RSM_SUCCESS); 7176 } 7177 7178 static int 7179 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp, 7180 rsm_poll_event_t *eventsp, int mode) 7181 { 7182 size_t evlistsz; 7183 int err = RSM_SUCCESS; 7184 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7185 7186 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7187 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n", 7188 msgp->numents, eventsp)); 7189 7190 #ifdef _MULTI_DATAMODEL 7191 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7192 int i; 7193 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7194 rsm_poll_event32_t *evlist32; 7195 size_t evlistsz32; 7196 7197 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents; 7198 if (msgp->numents > RSM_MAX_POLLFDS) { 7199 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7200 } else { 7201 evlist32 = event32; 7202 } 7203 7204 /* 7205 * copy the rsm_poll_event_t array to the rsm_poll_event32_t 7206 * array 7207 */ 7208 for (i = 0; i < msgp->numents; i++) { 7209 evlist32[i].rnum = eventsp[i].rnum; 7210 evlist32[i].fdsidx = eventsp[i].fdsidx; 7211 evlist32[i].revent = eventsp[i].revent; 7212 } 7213 7214 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist, 7215 evlistsz32, mode)) { 7216 err = RSMERR_BAD_ADDR; 7217 } 7218 7219 if (msgp->numents > RSM_MAX_POLLFDS) { 7220 if (evlist32) { /* free the temp 32-bit event list */ 7221 kmem_free(evlist32, evlistsz32); 7222 } 7223 /* 7224 * eventsp and evlistsz are based on rsm_poll_event_t 7225 * type 7226 */ 7227 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7228 /* event list on the heap and needs to be freed here */ 7229 if (eventsp) { 7230 kmem_free(eventsp, evlistsz); 7231 } 7232 } 7233 7234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7235 "consumeevent_copyout done: err=%d\n", err)); 7236 return (err); 7237 } 7238 #endif 7239 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7240 7241 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz, 7242 mode)) { 7243 err = RSMERR_BAD_ADDR; 7244 } 7245 7246 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) { 7247 /* event list on the heap and needs to be freed here */ 7248 kmem_free(eventsp, evlistsz); 7249 } 7250 7251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7252 "consumeevent_copyout done: err=%d\n", err)); 7253 return (err); 7254 } 7255 7256 static int 7257 rsm_consumeevent_ioctl(caddr_t arg, int mode) 7258 { 7259 int rc; 7260 int i; 7261 minor_t rnum; 7262 rsm_consume_event_msg_t msg = {0}; 7263 rsmseg_t *seg; 7264 rsm_poll_event_t *event_list; 7265 rsm_poll_event_t events[RSM_MAX_POLLFDS]; 7266 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7267 7268 event_list = events; 7269 7270 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) != 7271 RSM_SUCCESS) { 7272 return (rc); 7273 } 7274 7275 for (i = 0; i < msg.numents; i++) { 7276 rnum = event_list[i].rnum; 7277 event_list[i].revent = 0; 7278 /* get the segment structure */ 7279 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 7280 if (seg) { 7281 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7282 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum, 7283 seg)); 7284 if (seg->s_pollevent) { 7285 /* consume the event */ 7286 atomic_dec_32(&seg->s_pollevent); 7287 event_list[i].revent = POLLRDNORM; 7288 } 7289 rsmseglock_release(seg); 7290 } 7291 } 7292 7293 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) != 7294 RSM_SUCCESS) { 7295 return (rc); 7296 } 7297 7298 return (RSM_SUCCESS); 7299 } 7300 7301 static int 7302 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode) 7303 { 7304 int size; 7305 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7306 7307 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n")); 7308 7309 #ifdef _MULTI_DATAMODEL 7310 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7311 rsmka_iovec32_t *iovec32, *iovec32_base; 7312 int i; 7313 7314 size = count * sizeof (rsmka_iovec32_t); 7315 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP); 7316 if (ddi_copyin((caddr_t)user_vec, 7317 (caddr_t)iovec32, size, mode)) { 7318 kmem_free(iovec32, size); 7319 DBG_PRINTF((category, RSM_DEBUG, 7320 "iovec_copyin: returning RSMERR_BAD_ADDR\n")); 7321 return (RSMERR_BAD_ADDR); 7322 } 7323 7324 for (i = 0; i < count; i++, iovec++, iovec32++) { 7325 iovec->io_type = (int)iovec32->io_type; 7326 if (iovec->io_type == RSM_HANDLE_TYPE) 7327 iovec->local.segid = (rsm_memseg_id_t) 7328 iovec32->local; 7329 else 7330 iovec->local.vaddr = 7331 (caddr_t)(uintptr_t)iovec32->local; 7332 iovec->local_offset = (size_t)iovec32->local_offset; 7333 iovec->remote_offset = (size_t)iovec32->remote_offset; 7334 iovec->transfer_len = (size_t)iovec32->transfer_len; 7335 7336 } 7337 kmem_free(iovec32_base, size); 7338 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7339 "iovec_copyin done\n")); 7340 return (DDI_SUCCESS); 7341 } 7342 #endif 7343 7344 size = count * sizeof (rsmka_iovec_t); 7345 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) { 7346 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7347 "iovec_copyin done: RSMERR_BAD_ADDR\n")); 7348 return (RSMERR_BAD_ADDR); 7349 } 7350 7351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n")); 7352 7353 return (DDI_SUCCESS); 7354 } 7355 7356 7357 static int 7358 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7359 { 7360 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7361 7362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n")); 7363 7364 #ifdef _MULTI_DATAMODEL 7365 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7366 rsmka_scat_gath32_t sg_io32; 7367 7368 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32), 7369 mode)) { 7370 DBG_PRINTF((category, RSM_DEBUG, 7371 "sgio_copyin done: returning EFAULT\n")); 7372 return (RSMERR_BAD_ADDR); 7373 } 7374 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid; 7375 sg_io->io_request_count = (size_t)sg_io32.io_request_count; 7376 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count; 7377 sg_io->flags = (size_t)sg_io32.flags; 7378 sg_io->remote_handle = (rsm_memseg_import_handle_t) 7379 (uintptr_t)sg_io32.remote_handle; 7380 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec; 7381 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7382 "sgio_copyin done\n")); 7383 return (DDI_SUCCESS); 7384 } 7385 #endif 7386 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t), 7387 mode)) { 7388 DBG_PRINTF((category, RSM_DEBUG, 7389 "sgio_copyin done: returning EFAULT\n")); 7390 return (RSMERR_BAD_ADDR); 7391 } 7392 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n")); 7393 return (DDI_SUCCESS); 7394 } 7395 7396 static int 7397 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7398 { 7399 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7400 7401 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7402 "sgio_resid_copyout enter\n")); 7403 7404 #ifdef _MULTI_DATAMODEL 7405 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7406 rsmka_scat_gath32_t sg_io32; 7407 7408 sg_io32.io_residual_count = sg_io->io_residual_count; 7409 sg_io32.flags = sg_io->flags; 7410 7411 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count, 7412 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count, 7413 sizeof (uint32_t), mode)) { 7414 7415 DBG_PRINTF((category, RSM_ERR, 7416 "sgio_resid_copyout error: rescnt\n")); 7417 return (RSMERR_BAD_ADDR); 7418 } 7419 7420 if (ddi_copyout((caddr_t)&sg_io32.flags, 7421 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags, 7422 sizeof (uint32_t), mode)) { 7423 7424 DBG_PRINTF((category, RSM_ERR, 7425 "sgio_resid_copyout error: flags\n")); 7426 return (RSMERR_BAD_ADDR); 7427 } 7428 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7429 "sgio_resid_copyout done\n")); 7430 return (DDI_SUCCESS); 7431 } 7432 #endif 7433 if (ddi_copyout((caddr_t)&sg_io->io_residual_count, 7434 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count, 7435 sizeof (ulong_t), mode)) { 7436 7437 DBG_PRINTF((category, RSM_ERR, 7438 "sgio_resid_copyout error:rescnt\n")); 7439 return (RSMERR_BAD_ADDR); 7440 } 7441 7442 if (ddi_copyout((caddr_t)&sg_io->flags, 7443 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags, 7444 sizeof (uint_t), mode)) { 7445 7446 DBG_PRINTF((category, RSM_ERR, 7447 "sgio_resid_copyout error:flags\n")); 7448 return (RSMERR_BAD_ADDR); 7449 } 7450 7451 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n")); 7452 return (DDI_SUCCESS); 7453 } 7454 7455 7456 static int 7457 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp) 7458 { 7459 rsmka_scat_gath_t sg_io; 7460 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN]; 7461 rsmka_iovec_t *ka_iovec; 7462 rsmka_iovec_t *ka_iovec_start; 7463 rsmpi_scat_gath_t rsmpi_sg_io; 7464 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN]; 7465 rsmpi_iovec_t *iovec; 7466 rsmpi_iovec_t *iovec_start = NULL; 7467 rsmapi_access_entry_t *acl; 7468 rsmresource_t *res; 7469 minor_t rnum; 7470 rsmseg_t *im_seg, *ex_seg; 7471 int e; 7472 int error = 0; 7473 uint_t i; 7474 uint_t iov_proc = 0; /* num of iovecs processed */ 7475 size_t size = 0; 7476 size_t ka_size; 7477 7478 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7479 7480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n")); 7481 7482 credp = credp; 7483 7484 /* 7485 * Copyin the scatter/gather structure and build new structure 7486 * for rsmpi. 7487 */ 7488 e = sgio_copyin(arg, &sg_io, mode); 7489 if (e != DDI_SUCCESS) { 7490 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7491 "rsm_iovec_ioctl done: sgio_copyin %d\n", e)); 7492 return (e); 7493 } 7494 7495 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) { 7496 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7497 "rsm_iovec_ioctl done: request_count(%d) too large\n", 7498 sg_io.io_request_count)); 7499 return (RSMERR_BAD_SGIO); 7500 } 7501 7502 rsmpi_sg_io.io_request_count = sg_io.io_request_count; 7503 rsmpi_sg_io.io_residual_count = sg_io.io_request_count; 7504 rsmpi_sg_io.io_segflg = 0; 7505 7506 /* Allocate memory and copyin io vector array */ 7507 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7508 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t); 7509 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP); 7510 } else { 7511 ka_iovec_start = ka_iovec = ka_iovec_arr; 7512 } 7513 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec, 7514 sg_io.io_request_count, mode); 7515 if (e != DDI_SUCCESS) { 7516 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7517 kmem_free(ka_iovec, ka_size); 7518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7519 "rsm_iovec_ioctl done: iovec_copyin %d\n", e)); 7520 return (e); 7521 } 7522 7523 /* get the import segment descriptor */ 7524 rnum = getminor(dev); 7525 res = rsmresource_lookup(rnum, RSM_LOCK); 7526 7527 /* 7528 * The following sequence of locking may (or MAY NOT) cause a 7529 * deadlock but this is currently not addressed here since the 7530 * implementation will be changed to incorporate the use of 7531 * reference counting for both the import and the export segments. 7532 */ 7533 7534 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */ 7535 7536 im_seg = (rsmseg_t *)res; 7537 7538 if (im_seg == NULL) { 7539 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7540 kmem_free(ka_iovec, ka_size); 7541 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7542 "rsm_iovec_ioctl done: rsmresource_lookup failed\n")); 7543 return (EINVAL); 7544 } 7545 /* putv/getv supported is supported only on import segments */ 7546 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) { 7547 rsmseglock_release(im_seg); 7548 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7549 kmem_free(ka_iovec, ka_size); 7550 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7551 "rsm_iovec_ioctl done: not an import segment\n")); 7552 return (EINVAL); 7553 } 7554 7555 /* 7556 * wait for a remote DR to complete ie. for segments to get UNQUIESCED 7557 * as well as wait for a local DR to complete. 7558 */ 7559 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) || 7560 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) || 7561 (im_seg->s_flags & RSM_DR_INPROGRESS)) { 7562 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) { 7563 DBG_PRINTF((category, RSM_DEBUG, 7564 "rsm_iovec_ioctl done: cv_wait INTR")); 7565 rsmseglock_release(im_seg); 7566 return (RSMERR_INTERRUPTED); 7567 } 7568 } 7569 7570 if ((im_seg->s_state != RSM_STATE_CONNECT) && 7571 (im_seg->s_state != RSM_STATE_ACTIVE)) { 7572 7573 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT || 7574 im_seg->s_state == RSM_STATE_NEW); 7575 7576 DBG_PRINTF((category, RSM_DEBUG, 7577 "rsm_iovec_ioctl done: im_seg not conn/map")); 7578 rsmseglock_release(im_seg); 7579 e = RSMERR_BAD_SGIO; 7580 goto out; 7581 } 7582 7583 im_seg->s_rdmacnt++; 7584 rsmseglock_release(im_seg); 7585 7586 /* 7587 * Allocate and set up the io vector for rsmpi 7588 */ 7589 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7590 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t); 7591 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP); 7592 } else { 7593 iovec_start = iovec = iovec_arr; 7594 } 7595 7596 rsmpi_sg_io.iovec = iovec; 7597 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) { 7598 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7599 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7600 7601 if (ex_seg == NULL) { 7602 e = RSMERR_BAD_SGIO; 7603 break; 7604 } 7605 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7606 7607 acl = ex_seg->s_acl; 7608 if (acl[0].ae_permission == 0) { 7609 struct buf *xbuf; 7610 dev_t sdev = 0; 7611 7612 xbuf = ddi_umem_iosetup(ex_seg->s_cookie, 7613 0, ex_seg->s_len, B_WRITE, 7614 sdev, 0, NULL, DDI_UMEM_SLEEP); 7615 7616 ASSERT(xbuf != NULL); 7617 7618 iovec->local_mem.ms_type = RSM_MEM_BUF; 7619 iovec->local_mem.ms_memory.bp = xbuf; 7620 } else { 7621 iovec->local_mem.ms_type = RSM_MEM_HANDLE; 7622 iovec->local_mem.ms_memory.handle = 7623 ex_seg->s_handle.out; 7624 } 7625 ex_seg->s_rdmacnt++; /* refcnt the handle */ 7626 rsmseglock_release(ex_seg); 7627 } else { 7628 iovec->local_mem.ms_type = RSM_MEM_VADDR; 7629 iovec->local_mem.ms_memory.vr.vaddr = 7630 ka_iovec->local.vaddr; 7631 } 7632 7633 iovec->local_offset = ka_iovec->local_offset; 7634 iovec->remote_handle = im_seg->s_handle.in; 7635 iovec->remote_offset = ka_iovec->remote_offset; 7636 iovec->transfer_length = ka_iovec->transfer_len; 7637 iovec++; 7638 ka_iovec++; 7639 } 7640 7641 if (iov_proc < sg_io.io_request_count) { 7642 /* error while processing handle */ 7643 rsmseglock_acquire(im_seg); 7644 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */ 7645 if (im_seg->s_rdmacnt == 0) { 7646 cv_broadcast(&im_seg->s_cv); 7647 } 7648 rsmseglock_release(im_seg); 7649 goto out; 7650 } 7651 7652 /* call rsmpi */ 7653 if (cmd == RSM_IOCTL_PUTV) 7654 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv( 7655 im_seg->s_adapter->rsmpi_handle, 7656 &rsmpi_sg_io); 7657 else if (cmd == RSM_IOCTL_GETV) 7658 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv( 7659 im_seg->s_adapter->rsmpi_handle, 7660 &rsmpi_sg_io); 7661 else { 7662 e = EINVAL; 7663 DBG_PRINTF((category, RSM_DEBUG, 7664 "iovec_ioctl: bad command = %x\n", cmd)); 7665 } 7666 7667 7668 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7669 "rsm_iovec_ioctl RSMPI oper done %d\n", e)); 7670 7671 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count; 7672 7673 /* 7674 * Check for implicit signal post flag and do the signal 7675 * post if needed 7676 */ 7677 if (sg_io.flags & RSM_IMPLICIT_SIGPOST && 7678 e == RSM_SUCCESS) { 7679 rsmipc_request_t request; 7680 7681 request.rsmipc_key = im_seg->s_segid; 7682 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7683 request.rsmipc_segment_cookie = NULL; 7684 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY); 7685 /* 7686 * Reset the implicit signal post flag to 0 to indicate 7687 * that the signal post has been done and need not be 7688 * done in the RSMAPI library 7689 */ 7690 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST; 7691 } 7692 7693 rsmseglock_acquire(im_seg); 7694 im_seg->s_rdmacnt--; 7695 if (im_seg->s_rdmacnt == 0) { 7696 cv_broadcast(&im_seg->s_cv); 7697 } 7698 rsmseglock_release(im_seg); 7699 error = sgio_resid_copyout(arg, &sg_io, mode); 7700 out: 7701 iovec = iovec_start; 7702 ka_iovec = ka_iovec_start; 7703 for (i = 0; i < iov_proc; i++) { 7704 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7705 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7706 7707 ASSERT(ex_seg != NULL); 7708 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7709 7710 ex_seg->s_rdmacnt--; /* unrefcnt the handle */ 7711 if (ex_seg->s_rdmacnt == 0) { 7712 cv_broadcast(&ex_seg->s_cv); 7713 } 7714 rsmseglock_release(ex_seg); 7715 } 7716 7717 ASSERT(iovec != NULL); /* true if iov_proc > 0 */ 7718 7719 /* 7720 * At present there is no dependency on the existence of xbufs 7721 * created by ddi_umem_iosetup for each of the iovecs. So we 7722 * can these xbufs here. 7723 */ 7724 if (iovec->local_mem.ms_type == RSM_MEM_BUF) { 7725 freerbuf(iovec->local_mem.ms_memory.bp); 7726 } 7727 7728 iovec++; 7729 ka_iovec++; 7730 } 7731 7732 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7733 if (iovec_start) 7734 kmem_free(iovec_start, size); 7735 kmem_free(ka_iovec_start, ka_size); 7736 } 7737 7738 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7739 "rsm_iovec_ioctl done %d\n", e)); 7740 /* if RSMPI call fails return that else return copyout's retval */ 7741 return ((e != RSM_SUCCESS) ? e : error); 7742 7743 } 7744 7745 7746 static int 7747 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode) 7748 { 7749 adapter_t *adapter; 7750 rsm_addr_t addr; 7751 rsm_node_id_t node; 7752 int rval = DDI_SUCCESS; 7753 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7754 7755 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n")); 7756 7757 adapter = rsm_getadapter(msg, mode); 7758 if (adapter == NULL) { 7759 DBG_PRINTF((category, RSM_DEBUG, 7760 "rsmaddr_ioctl done: adapter not found\n")); 7761 return (RSMERR_CTLR_NOT_PRESENT); 7762 } 7763 7764 switch (cmd) { 7765 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */ 7766 /* returns the hwaddr in msg->hwaddr */ 7767 if (msg->nodeid == my_nodeid) { 7768 msg->hwaddr = adapter->hwaddr; 7769 } else { 7770 addr = get_remote_hwaddr(adapter, msg->nodeid); 7771 if ((int64_t)addr < 0) { 7772 rval = RSMERR_INTERNAL_ERROR; 7773 } else { 7774 msg->hwaddr = addr; 7775 } 7776 } 7777 break; 7778 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */ 7779 /* returns the nodeid in msg->nodeid */ 7780 if (msg->hwaddr == adapter->hwaddr) { 7781 msg->nodeid = my_nodeid; 7782 } else { 7783 node = get_remote_nodeid(adapter, msg->hwaddr); 7784 if ((int)node < 0) { 7785 rval = RSMERR_INTERNAL_ERROR; 7786 } else { 7787 msg->nodeid = (rsm_node_id_t)node; 7788 } 7789 } 7790 break; 7791 default: 7792 rval = EINVAL; 7793 break; 7794 } 7795 7796 rsmka_release_adapter(adapter); 7797 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7798 "rsmaddr_ioctl done: %d\n", rval)); 7799 return (rval); 7800 } 7801 7802 static int 7803 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode) 7804 { 7805 DBG_DEFINE(category, 7806 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7807 7808 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n")); 7809 7810 #ifdef _MULTI_DATAMODEL 7811 7812 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7813 rsm_ioctlmsg32_t msg32; 7814 int i; 7815 7816 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) { 7817 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7818 "rsm_ddi_copyin done: EFAULT\n")); 7819 return (RSMERR_BAD_ADDR); 7820 } 7821 msg->len = msg32.len; 7822 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr; 7823 msg->arg = (caddr_t)(uintptr_t)msg32.arg; 7824 msg->key = msg32.key; 7825 msg->acl_len = msg32.acl_len; 7826 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl; 7827 msg->cnum = msg32.cnum; 7828 msg->cname = (caddr_t)(uintptr_t)msg32.cname; 7829 msg->cname_len = msg32.cname_len; 7830 msg->nodeid = msg32.nodeid; 7831 msg->hwaddr = msg32.hwaddr; 7832 msg->perm = msg32.perm; 7833 for (i = 0; i < 4; i++) { 7834 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64; 7835 } 7836 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7837 "rsm_ddi_copyin done\n")); 7838 return (RSM_SUCCESS); 7839 } 7840 #endif 7841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n")); 7842 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode)) 7843 return (RSMERR_BAD_ADDR); 7844 else 7845 return (RSM_SUCCESS); 7846 } 7847 7848 static int 7849 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode) 7850 { 7851 rsmka_int_controller_attr_t rsm_cattr; 7852 DBG_DEFINE(category, 7853 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7854 7855 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7856 "rsmattr_ddi_copyout enter\n")); 7857 /* 7858 * need to copy appropriate data from rsm_controller_attr_t 7859 * to rsmka_int_controller_attr_t 7860 */ 7861 #ifdef _MULTI_DATAMODEL 7862 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7863 rsmka_int_controller_attr32_t rsm_cattr32; 7864 7865 rsm_cattr32.attr_direct_access_sizes = 7866 adapter->rsm_attr.attr_direct_access_sizes; 7867 rsm_cattr32.attr_atomic_sizes = 7868 adapter->rsm_attr.attr_atomic_sizes; 7869 rsm_cattr32.attr_page_size = 7870 adapter->rsm_attr.attr_page_size; 7871 if (adapter->rsm_attr.attr_max_export_segment_size > 7872 UINT_MAX) 7873 rsm_cattr32.attr_max_export_segment_size = 7874 RSM_MAXSZ_PAGE_ALIGNED; 7875 else 7876 rsm_cattr32.attr_max_export_segment_size = 7877 adapter->rsm_attr.attr_max_export_segment_size; 7878 if (adapter->rsm_attr.attr_tot_export_segment_size > 7879 UINT_MAX) 7880 rsm_cattr32.attr_tot_export_segment_size = 7881 RSM_MAXSZ_PAGE_ALIGNED; 7882 else 7883 rsm_cattr32.attr_tot_export_segment_size = 7884 adapter->rsm_attr.attr_tot_export_segment_size; 7885 if (adapter->rsm_attr.attr_max_export_segments > 7886 UINT_MAX) 7887 rsm_cattr32.attr_max_export_segments = 7888 UINT_MAX; 7889 else 7890 rsm_cattr32.attr_max_export_segments = 7891 adapter->rsm_attr.attr_max_export_segments; 7892 if (adapter->rsm_attr.attr_max_import_map_size > 7893 UINT_MAX) 7894 rsm_cattr32.attr_max_import_map_size = 7895 RSM_MAXSZ_PAGE_ALIGNED; 7896 else 7897 rsm_cattr32.attr_max_import_map_size = 7898 adapter->rsm_attr.attr_max_import_map_size; 7899 if (adapter->rsm_attr.attr_tot_import_map_size > 7900 UINT_MAX) 7901 rsm_cattr32.attr_tot_import_map_size = 7902 RSM_MAXSZ_PAGE_ALIGNED; 7903 else 7904 rsm_cattr32.attr_tot_import_map_size = 7905 adapter->rsm_attr.attr_tot_import_map_size; 7906 if (adapter->rsm_attr.attr_max_import_segments > 7907 UINT_MAX) 7908 rsm_cattr32.attr_max_import_segments = 7909 UINT_MAX; 7910 else 7911 rsm_cattr32.attr_max_import_segments = 7912 adapter->rsm_attr.attr_max_import_segments; 7913 rsm_cattr32.attr_controller_addr = 7914 adapter->rsm_attr.attr_controller_addr; 7915 7916 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7917 "rsmattr_ddi_copyout done\n")); 7918 if (ddi_copyout((caddr_t)&rsm_cattr32, arg, 7919 sizeof (rsmka_int_controller_attr32_t), mode)) { 7920 return (RSMERR_BAD_ADDR); 7921 } 7922 else 7923 return (RSM_SUCCESS); 7924 } 7925 #endif 7926 rsm_cattr.attr_direct_access_sizes = 7927 adapter->rsm_attr.attr_direct_access_sizes; 7928 rsm_cattr.attr_atomic_sizes = 7929 adapter->rsm_attr.attr_atomic_sizes; 7930 rsm_cattr.attr_page_size = 7931 adapter->rsm_attr.attr_page_size; 7932 rsm_cattr.attr_max_export_segment_size = 7933 adapter->rsm_attr.attr_max_export_segment_size; 7934 rsm_cattr.attr_tot_export_segment_size = 7935 adapter->rsm_attr.attr_tot_export_segment_size; 7936 rsm_cattr.attr_max_export_segments = 7937 adapter->rsm_attr.attr_max_export_segments; 7938 rsm_cattr.attr_max_import_map_size = 7939 adapter->rsm_attr.attr_max_import_map_size; 7940 rsm_cattr.attr_tot_import_map_size = 7941 adapter->rsm_attr.attr_tot_import_map_size; 7942 rsm_cattr.attr_max_import_segments = 7943 adapter->rsm_attr.attr_max_import_segments; 7944 rsm_cattr.attr_controller_addr = 7945 adapter->rsm_attr.attr_controller_addr; 7946 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7947 "rsmattr_ddi_copyout done\n")); 7948 if (ddi_copyout((caddr_t)&rsm_cattr, arg, 7949 sizeof (rsmka_int_controller_attr_t), mode)) { 7950 return (RSMERR_BAD_ADDR); 7951 } 7952 else 7953 return (RSM_SUCCESS); 7954 } 7955 7956 /*ARGSUSED*/ 7957 static int 7958 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7959 int *rvalp) 7960 { 7961 rsmseg_t *seg; 7962 rsmresource_t *res; 7963 minor_t rnum; 7964 rsm_ioctlmsg_t msg = {0}; 7965 int error; 7966 adapter_t *adapter; 7967 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7968 7969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n")); 7970 7971 if (cmd == RSM_IOCTL_CONSUMEEVENT) { 7972 error = rsm_consumeevent_ioctl((caddr_t)arg, mode); 7973 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7974 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error)); 7975 return (error); 7976 } 7977 7978 /* topology cmd does not use the arg common to other cmds */ 7979 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) { 7980 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode); 7981 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7982 "rsm_ioctl done: %d\n", error)); 7983 return (error); 7984 } 7985 7986 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) { 7987 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp); 7988 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7989 "rsm_ioctl done: %d\n", error)); 7990 return (error); 7991 } 7992 7993 /* 7994 * try to load arguments 7995 */ 7996 if (cmd != RSM_IOCTL_RING_BELL && 7997 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) { 7998 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7999 "rsm_ioctl done: EFAULT\n")); 8000 return (RSMERR_BAD_ADDR); 8001 } 8002 8003 if (cmd == RSM_IOCTL_ATTR) { 8004 adapter = rsm_getadapter(&msg, mode); 8005 if (adapter == NULL) { 8006 DBG_PRINTF((category, RSM_DEBUG, 8007 "rsm_ioctl done: ENODEV\n")); 8008 return (RSMERR_CTLR_NOT_PRESENT); 8009 } 8010 error = rsmattr_ddi_copyout(adapter, msg.arg, mode); 8011 rsmka_release_adapter(adapter); 8012 DBG_PRINTF((category, RSM_DEBUG, 8013 "rsm_ioctl:after copyout %d\n", error)); 8014 return (error); 8015 } 8016 8017 if (cmd == RSM_IOCTL_BAR_INFO) { 8018 /* Return library off,len of barrier page */ 8019 msg.off = barrier_offset; 8020 msg.len = (int)barrier_size; 8021 #ifdef _MULTI_DATAMODEL 8022 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8023 rsm_ioctlmsg32_t msg32; 8024 8025 if (msg.len > UINT_MAX) 8026 msg.len = RSM_MAXSZ_PAGE_ALIGNED; 8027 else 8028 msg32.len = (int32_t)msg.len; 8029 msg32.off = (int32_t)msg.off; 8030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8031 "rsm_ioctl done\n")); 8032 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8033 sizeof (msg32), mode)) 8034 return (RSMERR_BAD_ADDR); 8035 else 8036 return (RSM_SUCCESS); 8037 } 8038 #endif 8039 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8040 "rsm_ioctl done\n")); 8041 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8042 sizeof (msg), mode)) 8043 return (RSMERR_BAD_ADDR); 8044 else 8045 return (RSM_SUCCESS); 8046 } 8047 8048 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) { 8049 /* map the nodeid or hwaddr */ 8050 error = rsmaddr_ioctl(cmd, &msg, mode); 8051 if (error == RSM_SUCCESS) { 8052 #ifdef _MULTI_DATAMODEL 8053 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8054 rsm_ioctlmsg32_t msg32; 8055 8056 msg32.hwaddr = (uint64_t)msg.hwaddr; 8057 msg32.nodeid = (uint32_t)msg.nodeid; 8058 8059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8060 "rsm_ioctl done\n")); 8061 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8062 sizeof (msg32), mode)) 8063 return (RSMERR_BAD_ADDR); 8064 else 8065 return (RSM_SUCCESS); 8066 } 8067 #endif 8068 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8069 "rsm_ioctl done\n")); 8070 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8071 sizeof (msg), mode)) 8072 return (RSMERR_BAD_ADDR); 8073 else 8074 return (RSM_SUCCESS); 8075 } 8076 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8077 "rsm_ioctl done: %d\n", error)); 8078 return (error); 8079 } 8080 8081 /* Find resource and look it in read mode */ 8082 rnum = getminor(dev); 8083 res = rsmresource_lookup(rnum, RSM_NOLOCK); 8084 ASSERT(res != NULL); 8085 8086 /* 8087 * Find command group 8088 */ 8089 switch (RSM_IOCTL_CMDGRP(cmd)) { 8090 case RSM_IOCTL_EXPORT_SEG: 8091 /* 8092 * Export list is searched during publish, loopback and 8093 * remote lookup call. 8094 */ 8095 seg = rsmresource_seg(res, rnum, credp, 8096 RSM_RESOURCE_EXPORT_SEGMENT); 8097 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) { 8098 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode, 8099 credp); 8100 } else { /* export ioctl on an import/barrier resource */ 8101 error = RSMERR_BAD_SEG_HNDL; 8102 } 8103 break; 8104 case RSM_IOCTL_IMPORT_SEG: 8105 /* Import list is searched during remote unmap call. */ 8106 seg = rsmresource_seg(res, rnum, credp, 8107 RSM_RESOURCE_IMPORT_SEGMENT); 8108 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8109 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode, 8110 credp); 8111 } else { /* import ioctl on an export/barrier resource */ 8112 error = RSMERR_BAD_SEG_HNDL; 8113 } 8114 break; 8115 case RSM_IOCTL_BAR: 8116 if (res != RSMRC_RESERVED && 8117 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8118 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg, 8119 mode); 8120 } else { /* invalid res value */ 8121 error = RSMERR_BAD_SEG_HNDL; 8122 } 8123 break; 8124 case RSM_IOCTL_BELL: 8125 if (res != RSMRC_RESERVED) { 8126 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) 8127 error = exportbell_ioctl((rsmseg_t *)res, cmd); 8128 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT) 8129 error = importbell_ioctl((rsmseg_t *)res, cmd); 8130 else /* RSM_RESOURCE_BAR */ 8131 error = RSMERR_BAD_SEG_HNDL; 8132 } else { /* invalid res value */ 8133 error = RSMERR_BAD_SEG_HNDL; 8134 } 8135 break; 8136 default: 8137 error = EINVAL; 8138 } 8139 8140 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n", 8141 error)); 8142 return (error); 8143 } 8144 8145 8146 /* **************************** Segment Mapping Operations ********* */ 8147 static rsm_mapinfo_t * 8148 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset, 8149 size_t *map_len) 8150 { 8151 rsm_mapinfo_t *p; 8152 /* 8153 * Find the correct mapinfo structure to use during the mapping 8154 * from the seg->s_mapinfo list. 8155 * The seg->s_mapinfo list contains in reverse order the mappings 8156 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to 8157 * access the correct entry within this list for the mapping 8158 * requested. 8159 * 8160 * The algorithm for selecting a list entry is as follows: 8161 * 8162 * When start_offset of an entry <= off we have found the entry 8163 * we were looking for. Adjust the dev_offset and map_len (needs 8164 * to be PAGESIZE aligned). 8165 */ 8166 p = seg->s_mapinfo; 8167 for (; p; p = p->next) { 8168 if (p->start_offset <= off) { 8169 *dev_offset = p->dev_offset + off - p->start_offset; 8170 *map_len = (len > p->individual_len) ? 8171 p->individual_len : ptob(btopr(len)); 8172 return (p); 8173 } 8174 p = p->next; 8175 } 8176 8177 return (NULL); 8178 } 8179 8180 static void 8181 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo) 8182 { 8183 rsm_mapinfo_t *p; 8184 8185 while (mapinfo != NULL) { 8186 p = mapinfo; 8187 mapinfo = mapinfo->next; 8188 kmem_free(p, sizeof (*p)); 8189 } 8190 } 8191 8192 static int 8193 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 8194 size_t len, void **pvtp) 8195 { 8196 rsmcookie_t *p; 8197 rsmresource_t *res; 8198 rsmseg_t *seg; 8199 minor_t rnum; 8200 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8201 8202 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n")); 8203 8204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8205 "rsmmap_map: dhp = %x\n", dhp)); 8206 8207 flags = flags; 8208 8209 rnum = getminor(dev); 8210 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8211 ASSERT(res != NULL); 8212 8213 seg = (rsmseg_t *)res; 8214 8215 rsmseglock_acquire(seg); 8216 8217 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8218 8219 /* 8220 * Allocate structure and add cookie to segment list 8221 */ 8222 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8223 8224 p->c_dhp = dhp; 8225 p->c_off = off; 8226 p->c_len = len; 8227 p->c_next = seg->s_ckl; 8228 seg->s_ckl = p; 8229 8230 *pvtp = (void *)seg; 8231 8232 rsmseglock_release(seg); 8233 8234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n")); 8235 return (DDI_SUCCESS); 8236 } 8237 8238 /* 8239 * Page fault handling is done here. The prerequisite mapping setup 8240 * has been done in rsm_devmap with calls to ddi_devmem_setup or 8241 * ddi_umem_setup 8242 */ 8243 static int 8244 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len, 8245 uint_t type, uint_t rw) 8246 { 8247 int e; 8248 rsmseg_t *seg = (rsmseg_t *)pvt; 8249 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8250 8251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n")); 8252 8253 rsmseglock_acquire(seg); 8254 8255 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8256 8257 while (seg->s_state == RSM_STATE_MAP_QUIESCE) { 8258 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8259 DBG_PRINTF((category, RSM_DEBUG, 8260 "rsmmap_access done: cv_wait INTR")); 8261 rsmseglock_release(seg); 8262 return (RSMERR_INTERRUPTED); 8263 } 8264 } 8265 8266 ASSERT(seg->s_state == RSM_STATE_DISCONNECT || 8267 seg->s_state == RSM_STATE_ACTIVE); 8268 8269 if (seg->s_state == RSM_STATE_DISCONNECT) 8270 seg->s_flags |= RSM_IMPORT_DUMMY; 8271 8272 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8273 "rsmmap_access: dhp = %x\n", dhp)); 8274 8275 rsmseglock_release(seg); 8276 8277 if (e = devmap_load(dhp, offset, len, type, rw)) { 8278 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n")); 8279 } 8280 8281 8282 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n")); 8283 8284 return (e); 8285 } 8286 8287 static int 8288 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 8289 void **newpvt) 8290 { 8291 rsmseg_t *seg = (rsmseg_t *)oldpvt; 8292 rsmcookie_t *p, *old; 8293 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8294 8295 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n")); 8296 8297 /* 8298 * Same as map, create an entry to hold cookie and add it to 8299 * connect segment list. The oldpvt is a pointer to segment. 8300 * Return segment pointer in newpvt. 8301 */ 8302 rsmseglock_acquire(seg); 8303 8304 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8305 8306 /* 8307 * Find old cookie 8308 */ 8309 for (old = seg->s_ckl; old != NULL; old = old->c_next) { 8310 if (old->c_dhp == dhp) { 8311 break; 8312 } 8313 } 8314 if (old == NULL) { 8315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8316 "rsmmap_dup done: EINVAL\n")); 8317 rsmseglock_release(seg); 8318 return (EINVAL); 8319 } 8320 8321 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8322 8323 p->c_dhp = new_dhp; 8324 p->c_off = old->c_off; 8325 p->c_len = old->c_len; 8326 p->c_next = seg->s_ckl; 8327 seg->s_ckl = p; 8328 8329 *newpvt = (void *)seg; 8330 8331 rsmseglock_release(seg); 8332 8333 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n")); 8334 8335 return (DDI_SUCCESS); 8336 } 8337 8338 static void 8339 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 8340 devmap_cookie_t new_dhp1, void **pvtp1, 8341 devmap_cookie_t new_dhp2, void **pvtp2) 8342 { 8343 /* 8344 * Remove pvtp structure from segment list. 8345 */ 8346 rsmseg_t *seg = (rsmseg_t *)pvtp; 8347 int freeflag; 8348 8349 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8350 8351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n")); 8352 8353 off = off; len = len; 8354 pvtp1 = pvtp1; pvtp2 = pvtp2; 8355 8356 rsmseglock_acquire(seg); 8357 8358 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8359 8360 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8361 "rsmmap_unmap: dhp = %x\n", dhp)); 8362 /* 8363 * We can go ahead and remove the dhps even if we are in 8364 * the MAPPING state because the dhps being removed here 8365 * belong to a different mmap and we are holding the segment 8366 * lock. 8367 */ 8368 if (new_dhp1 == NULL && new_dhp2 == NULL) { 8369 /* find and remove dhp handle */ 8370 rsmcookie_t *tmp, **back = &seg->s_ckl; 8371 8372 while (*back != NULL) { 8373 tmp = *back; 8374 if (tmp->c_dhp == dhp) { 8375 *back = tmp->c_next; 8376 kmem_free(tmp, sizeof (*tmp)); 8377 break; 8378 } 8379 back = &tmp->c_next; 8380 } 8381 } else { 8382 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8383 "rsmmap_unmap:parital unmap" 8384 "new_dhp1 %lx, new_dhp2 %lx\n", 8385 (size_t)new_dhp1, (size_t)new_dhp2)); 8386 } 8387 8388 /* 8389 * rsmmap_unmap is called for each mapping cookie on the list. 8390 * When the list becomes empty and we are not in the MAPPING 8391 * state then unmap in the rsmpi driver. 8392 */ 8393 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING)) 8394 (void) rsm_unmap(seg); 8395 8396 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) { 8397 freeflag = 1; 8398 } else { 8399 freeflag = 0; 8400 } 8401 8402 rsmseglock_release(seg); 8403 8404 if (freeflag) { 8405 /* Free the segment structure */ 8406 rsmseg_free(seg); 8407 } 8408 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n")); 8409 8410 } 8411 8412 static struct devmap_callback_ctl rsmmap_ops = { 8413 DEVMAP_OPS_REV, /* devmap_ops version number */ 8414 rsmmap_map, /* devmap_ops map routine */ 8415 rsmmap_access, /* devmap_ops access routine */ 8416 rsmmap_dup, /* devmap_ops dup routine */ 8417 rsmmap_unmap, /* devmap_ops unmap routine */ 8418 }; 8419 8420 static int 8421 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len, 8422 size_t *maplen, uint_t model /*ARGSUSED*/) 8423 { 8424 struct devmap_callback_ctl *callbackops = &rsmmap_ops; 8425 int err; 8426 uint_t maxprot; 8427 minor_t rnum; 8428 rsmseg_t *seg; 8429 off_t dev_offset; 8430 size_t cur_len; 8431 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8432 8433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n")); 8434 8435 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8436 "rsm_devmap: off = %lx, len = %lx\n", off, len)); 8437 rnum = getminor(dev); 8438 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8439 ASSERT(seg != NULL); 8440 8441 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8442 if ((off == barrier_offset) && 8443 (len == barrier_size)) { 8444 8445 ASSERT(bar_va != NULL && bar_cookie != NULL); 8446 8447 /* 8448 * The offset argument in devmap_umem_setup represents 8449 * the offset within the kernel memory defined by the 8450 * cookie. We use this offset as barrier_offset. 8451 */ 8452 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie, 8453 barrier_offset, len, PROT_USER|PROT_READ, 8454 DEVMAP_DEFAULTS, 0); 8455 8456 if (err != 0) { 8457 DBG_PRINTF((category, RSM_ERR, 8458 "rsm_devmap done: %d\n", err)); 8459 return (RSMERR_MAP_FAILED); 8460 } 8461 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8462 "rsm_devmap done: %d\n", err)); 8463 8464 *maplen = barrier_size; 8465 8466 return (err); 8467 } else { 8468 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8469 "rsm_devmap done: %d\n", err)); 8470 return (RSMERR_MAP_FAILED); 8471 } 8472 } 8473 8474 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8475 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8476 8477 /* 8478 * Make sure we still have permission for the map operation. 8479 */ 8480 maxprot = PROT_USER; 8481 if (seg->s_mode & RSM_PERM_READ) { 8482 maxprot |= PROT_READ; 8483 } 8484 8485 if (seg->s_mode & RSM_PERM_WRITE) { 8486 maxprot |= PROT_WRITE; 8487 } 8488 8489 /* 8490 * For each devmap call, rsmmap_map is called. This maintains driver 8491 * private information for the mapping. Thus, if there are multiple 8492 * devmap calls there will be multiple rsmmap_map calls and for each 8493 * call, the mapping information will be stored. 8494 * In case of an error during the processing of the devmap call, error 8495 * will be returned. This error return causes the caller of rsm_devmap 8496 * to undo all the mappings by calling rsmmap_unmap for each one. 8497 * rsmmap_unmap will free up the private information for the requested 8498 * mapping. 8499 */ 8500 if (seg->s_node != my_nodeid) { 8501 rsm_mapinfo_t *p; 8502 8503 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len); 8504 if (p == NULL) { 8505 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8506 "rsm_devmap: incorrect mapping info\n")); 8507 return (RSMERR_MAP_FAILED); 8508 } 8509 err = devmap_devmem_setup(dhc, p->dip, 8510 callbackops, p->dev_register, 8511 dev_offset, cur_len, maxprot, 8512 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0); 8513 8514 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8515 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx," 8516 "off=%lx,len=%lx\n", 8517 p->dip, p->dev_register, dev_offset, off, cur_len)); 8518 8519 if (err != 0) { 8520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8521 "rsm_devmap: devmap_devmem_setup failed %d\n", 8522 err)); 8523 return (RSMERR_MAP_FAILED); 8524 } 8525 /* cur_len is always an integral multiple pagesize */ 8526 ASSERT((cur_len & (PAGESIZE-1)) == 0); 8527 *maplen = cur_len; 8528 return (err); 8529 8530 } else { 8531 err = devmap_umem_setup(dhc, rsm_dip, callbackops, 8532 seg->s_cookie, off, len, maxprot, 8533 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0); 8534 if (err != 0) { 8535 DBG_PRINTF((category, RSM_DEBUG, 8536 "rsm_devmap: devmap_umem_setup failed %d\n", 8537 err)); 8538 return (RSMERR_MAP_FAILED); 8539 } 8540 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8541 "rsm_devmap: loopback done\n")); 8542 8543 *maplen = ptob(btopr(len)); 8544 8545 return (err); 8546 } 8547 } 8548 8549 /* 8550 * We can use the devmap framework for mapping device memory to user space by 8551 * specifying this routine in the rsm_cb_ops structure. The kernel mmap 8552 * processing calls this entry point and devmap_setup is called within this 8553 * function, which eventually calls rsm_devmap 8554 */ 8555 static int 8556 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 8557 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 8558 { 8559 int error = 0; 8560 int old_state; 8561 minor_t rnum; 8562 rsmseg_t *seg, *eseg; 8563 adapter_t *adapter; 8564 rsm_import_share_t *sharedp; 8565 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8566 8567 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n")); 8568 8569 /* 8570 * find segment 8571 */ 8572 rnum = getminor(dev); 8573 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 8574 8575 if (seg == NULL) { 8576 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8577 "rsm_segmap done: invalid segment\n")); 8578 return (EINVAL); 8579 } 8580 8581 /* 8582 * the user is trying to map a resource that has not been 8583 * defined yet. The library uses this to map in the 8584 * barrier page. 8585 */ 8586 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8587 rsmseglock_release(seg); 8588 8589 /* 8590 * The mapping for the barrier page is identified 8591 * by the special offset barrier_offset 8592 */ 8593 8594 if (off == (off_t)barrier_offset || 8595 len == (off_t)barrier_size) { 8596 if (bar_cookie == NULL || bar_va == NULL) { 8597 DBG_PRINTF((category, RSM_DEBUG, 8598 "rsm_segmap: bar cookie/va is NULL\n")); 8599 return (EINVAL); 8600 } 8601 8602 error = devmap_setup(dev, (offset_t)off, as, addrp, 8603 (size_t)len, prot, maxprot, flags, cred); 8604 8605 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8606 "rsm_segmap done: %d\n", error)); 8607 return (error); 8608 } else { 8609 DBG_PRINTF((category, RSM_DEBUG, 8610 "rsm_segmap: bad offset/length\n")); 8611 return (EINVAL); 8612 } 8613 } 8614 8615 /* Make sure you can only map imported segments */ 8616 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) { 8617 rsmseglock_release(seg); 8618 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8619 "rsm_segmap done: not an import segment\n")); 8620 return (EINVAL); 8621 } 8622 /* check means library is broken */ 8623 ASSERT(seg->s_hdr.rsmrc_num == rnum); 8624 8625 /* wait for the segment to become unquiesced */ 8626 while (seg->s_state == RSM_STATE_CONN_QUIESCE) { 8627 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8628 rsmseglock_release(seg); 8629 DBG_PRINTF((category, RSM_DEBUG, 8630 "rsm_segmap done: cv_wait INTR")); 8631 return (ENODEV); 8632 } 8633 } 8634 8635 /* wait until segment leaves the mapping state */ 8636 while (seg->s_state == RSM_STATE_MAPPING) 8637 cv_wait(&seg->s_cv, &seg->s_lock); 8638 8639 /* 8640 * we allow multiple maps of the same segment in the KA 8641 * and it works because we do an rsmpi map of the whole 8642 * segment during the first map and all the device mapping 8643 * information needed in rsm_devmap is in the mapinfo list. 8644 */ 8645 if ((seg->s_state != RSM_STATE_CONNECT) && 8646 (seg->s_state != RSM_STATE_ACTIVE)) { 8647 rsmseglock_release(seg); 8648 DBG_PRINTF((category, RSM_DEBUG, 8649 "rsm_segmap done: segment not connected\n")); 8650 return (ENODEV); 8651 } 8652 8653 /* 8654 * Make sure we are not mapping a larger segment than what's 8655 * exported 8656 */ 8657 if ((size_t)off + ptob(btopr(len)) > seg->s_len) { 8658 rsmseglock_release(seg); 8659 DBG_PRINTF((category, RSM_DEBUG, 8660 "rsm_segmap done: off+len>seg size\n")); 8661 return (ENXIO); 8662 } 8663 8664 /* 8665 * Make sure we still have permission for the map operation. 8666 */ 8667 maxprot = PROT_USER; 8668 if (seg->s_mode & RSM_PERM_READ) { 8669 maxprot |= PROT_READ; 8670 } 8671 8672 if (seg->s_mode & RSM_PERM_WRITE) { 8673 maxprot |= PROT_WRITE; 8674 } 8675 8676 if ((prot & maxprot) != prot) { 8677 /* No permission */ 8678 rsmseglock_release(seg); 8679 DBG_PRINTF((category, RSM_DEBUG, 8680 "rsm_segmap done: no permission\n")); 8681 return (EACCES); 8682 } 8683 8684 old_state = seg->s_state; 8685 8686 ASSERT(seg->s_share != NULL); 8687 8688 rsmsharelock_acquire(seg); 8689 8690 sharedp = seg->s_share; 8691 8692 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8693 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 8694 8695 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) && 8696 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) { 8697 rsmsharelock_release(seg); 8698 rsmseglock_release(seg); 8699 DBG_PRINTF((category, RSM_DEBUG, 8700 "rsm_segmap done:RSMSI_STATE %d invalid\n", 8701 sharedp->rsmsi_state)); 8702 return (ENODEV); 8703 } 8704 8705 /* 8706 * Do the map - since we want importers to share mappings 8707 * we do the rsmpi map for the whole segment 8708 */ 8709 if (seg->s_node != my_nodeid) { 8710 uint_t dev_register; 8711 off_t dev_offset; 8712 dev_info_t *dip; 8713 size_t tmp_len; 8714 size_t total_length_mapped = 0; 8715 size_t length_to_map = seg->s_len; 8716 off_t tmp_off = 0; 8717 rsm_mapinfo_t *p; 8718 8719 /* 8720 * length_to_map = seg->s_len is always an integral 8721 * multiple of PAGESIZE. Length mapped in each entry in mapinfo 8722 * list is a multiple of PAGESIZE - RSMPI map ensures this 8723 */ 8724 8725 adapter = seg->s_adapter; 8726 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8727 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8728 8729 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) { 8730 error = 0; 8731 /* map the whole segment */ 8732 while (total_length_mapped < seg->s_len) { 8733 tmp_len = 0; 8734 8735 error = adapter->rsmpi_ops->rsm_map( 8736 seg->s_handle.in, tmp_off, 8737 length_to_map, &tmp_len, 8738 &dip, &dev_register, &dev_offset, 8739 NULL, NULL); 8740 8741 if (error != 0) 8742 break; 8743 8744 /* 8745 * Store the mapping info obtained from rsm_map 8746 */ 8747 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8748 p->dev_register = dev_register; 8749 p->dev_offset = dev_offset; 8750 p->dip = dip; 8751 p->individual_len = tmp_len; 8752 p->start_offset = tmp_off; 8753 p->next = sharedp->rsmsi_mapinfo; 8754 sharedp->rsmsi_mapinfo = p; 8755 8756 total_length_mapped += tmp_len; 8757 length_to_map -= tmp_len; 8758 tmp_off += tmp_len; 8759 } 8760 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8761 8762 if (error != RSM_SUCCESS) { 8763 /* Check if this is the the first rsm_map */ 8764 if (sharedp->rsmsi_mapinfo != NULL) { 8765 /* 8766 * A single rsm_unmap undoes 8767 * multiple rsm_maps. 8768 */ 8769 (void) seg->s_adapter->rsmpi_ops-> 8770 rsm_unmap(sharedp->rsmsi_handle); 8771 rsm_free_mapinfo(sharedp-> 8772 rsmsi_mapinfo); 8773 } 8774 sharedp->rsmsi_mapinfo = NULL; 8775 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8776 rsmsharelock_release(seg); 8777 rsmseglock_release(seg); 8778 DBG_PRINTF((category, RSM_DEBUG, 8779 "rsm_segmap done: rsmpi map err %d\n", 8780 error)); 8781 ASSERT(error != RSMERR_BAD_LENGTH && 8782 error != RSMERR_BAD_MEM_ALIGNMENT && 8783 error != RSMERR_BAD_SEG_HNDL); 8784 if (error == RSMERR_UNSUPPORTED_OPERATION) 8785 return (ENOTSUP); 8786 else if (error == RSMERR_INSUFFICIENT_RESOURCES) 8787 return (EAGAIN); 8788 else if (error == RSMERR_CONN_ABORTED) 8789 return (ENODEV); 8790 else 8791 return (error); 8792 } else { 8793 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8794 } 8795 } else { 8796 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8797 } 8798 8799 sharedp->rsmsi_mapcnt++; 8800 8801 rsmsharelock_release(seg); 8802 8803 /* move to an intermediate mapping state */ 8804 seg->s_state = RSM_STATE_MAPPING; 8805 rsmseglock_release(seg); 8806 8807 error = devmap_setup(dev, (offset_t)off, as, addrp, 8808 len, prot, maxprot, flags, cred); 8809 8810 rsmseglock_acquire(seg); 8811 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8812 8813 if (error == DDI_SUCCESS) { 8814 seg->s_state = RSM_STATE_ACTIVE; 8815 } else { 8816 rsmsharelock_acquire(seg); 8817 8818 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8819 8820 sharedp->rsmsi_mapcnt--; 8821 if (sharedp->rsmsi_mapcnt == 0) { 8822 /* unmap the shared RSMPI mapping */ 8823 ASSERT(sharedp->rsmsi_handle != NULL); 8824 (void) adapter->rsmpi_ops-> 8825 rsm_unmap(sharedp->rsmsi_handle); 8826 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 8827 sharedp->rsmsi_mapinfo = NULL; 8828 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8829 } 8830 8831 rsmsharelock_release(seg); 8832 seg->s_state = old_state; 8833 DBG_PRINTF((category, RSM_ERR, 8834 "rsm: devmap_setup failed %d\n", error)); 8835 } 8836 cv_broadcast(&seg->s_cv); 8837 rsmseglock_release(seg); 8838 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n", 8839 error)); 8840 return (error); 8841 } else { 8842 /* 8843 * For loopback, the export segment mapping cookie (s_cookie) 8844 * is also used as the s_cookie value for its import segments 8845 * during mapping. 8846 * Note that reference counting for s_cookie of the export 8847 * segment is not required due to the following: 8848 * We never have a case of the export segment being destroyed, 8849 * leaving the import segments with a stale value for the 8850 * s_cookie field, since a force disconnect is done prior to a 8851 * destroy of an export segment. The force disconnect causes 8852 * the s_cookie value to be reset to NULL. Also for the 8853 * rsm_rebind operation, we change the s_cookie value of the 8854 * export segment as well as of all its local (loopback) 8855 * importers. 8856 */ 8857 DBG_ADDCATEGORY(category, RSM_LOOPBACK); 8858 8859 rsmsharelock_release(seg); 8860 /* 8861 * In order to maintain the lock ordering between the export 8862 * and import segment locks, we need to acquire the export 8863 * segment lock first and only then acquire the import 8864 * segment lock. 8865 * The above is necessary to avoid any deadlock scenarios 8866 * with rsm_rebind which also acquires both the export 8867 * and import segment locks in the above mentioned order. 8868 * Based on code inspection, there seem to be no other 8869 * situations in which both the export and import segment 8870 * locks are acquired either in the same or opposite order 8871 * as mentioned above. 8872 * Thus in order to conform to the above lock order, we 8873 * need to change the state of the import segment to 8874 * RSM_STATE_MAPPING, release the lock. Once this is done we 8875 * can now safely acquire the export segment lock first 8876 * followed by the import segment lock which is as per 8877 * the lock order mentioned above. 8878 */ 8879 /* move to an intermediate mapping state */ 8880 seg->s_state = RSM_STATE_MAPPING; 8881 rsmseglock_release(seg); 8882 8883 eseg = rsmexport_lookup(seg->s_key); 8884 8885 if (eseg == NULL) { 8886 rsmseglock_acquire(seg); 8887 /* 8888 * Revert to old_state and signal any waiters 8889 * The shared state is not changed 8890 */ 8891 8892 seg->s_state = old_state; 8893 cv_broadcast(&seg->s_cv); 8894 rsmseglock_release(seg); 8895 DBG_PRINTF((category, RSM_DEBUG, 8896 "rsm_segmap done: key %d not found\n", seg->s_key)); 8897 return (ENODEV); 8898 } 8899 8900 rsmsharelock_acquire(seg); 8901 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8902 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8903 8904 sharedp->rsmsi_mapcnt++; 8905 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8906 rsmsharelock_release(seg); 8907 8908 ASSERT(eseg->s_cookie != NULL); 8909 8910 /* 8911 * It is not required or necessary to acquire the import 8912 * segment lock here to change the value of s_cookie since 8913 * no one will touch the import segment as long as it is 8914 * in the RSM_STATE_MAPPING state. 8915 */ 8916 seg->s_cookie = eseg->s_cookie; 8917 8918 rsmseglock_release(eseg); 8919 8920 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len, 8921 prot, maxprot, flags, cred); 8922 8923 rsmseglock_acquire(seg); 8924 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8925 if (error == 0) { 8926 seg->s_state = RSM_STATE_ACTIVE; 8927 } else { 8928 rsmsharelock_acquire(seg); 8929 8930 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8931 8932 sharedp->rsmsi_mapcnt--; 8933 if (sharedp->rsmsi_mapcnt == 0) { 8934 sharedp->rsmsi_mapinfo = NULL; 8935 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8936 } 8937 rsmsharelock_release(seg); 8938 seg->s_state = old_state; 8939 seg->s_cookie = NULL; 8940 } 8941 cv_broadcast(&seg->s_cv); 8942 rsmseglock_release(seg); 8943 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8944 "rsm_segmap done: %d\n", error)); 8945 return (error); 8946 } 8947 } 8948 8949 int 8950 rsmka_null_seg_create( 8951 rsm_controller_handle_t argcp, 8952 rsm_memseg_export_handle_t *handle, 8953 size_t size, 8954 uint_t flags, 8955 rsm_memory_local_t *memory, 8956 rsm_resource_callback_t callback, 8957 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8958 { 8959 return (RSM_SUCCESS); 8960 } 8961 8962 8963 int 8964 rsmka_null_seg_destroy( 8965 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 8966 { 8967 return (RSM_SUCCESS); 8968 } 8969 8970 8971 int 8972 rsmka_null_bind( 8973 rsm_memseg_export_handle_t argmemseg, 8974 off_t offset, 8975 rsm_memory_local_t *argmemory, 8976 rsm_resource_callback_t callback, 8977 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8978 { 8979 return (RSM_SUCCESS); 8980 } 8981 8982 8983 int 8984 rsmka_null_unbind( 8985 rsm_memseg_export_handle_t argmemseg, 8986 off_t offset, 8987 size_t length /*ARGSUSED*/) 8988 { 8989 return (DDI_SUCCESS); 8990 } 8991 8992 int 8993 rsmka_null_rebind( 8994 rsm_memseg_export_handle_t argmemseg, 8995 off_t offset, 8996 rsm_memory_local_t *memory, 8997 rsm_resource_callback_t callback, 8998 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8999 { 9000 return (RSM_SUCCESS); 9001 } 9002 9003 int 9004 rsmka_null_publish( 9005 rsm_memseg_export_handle_t argmemseg, 9006 rsm_access_entry_t access_list[], 9007 uint_t access_list_length, 9008 rsm_memseg_id_t segment_id, 9009 rsm_resource_callback_t callback, 9010 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9011 { 9012 return (RSM_SUCCESS); 9013 } 9014 9015 9016 int 9017 rsmka_null_republish( 9018 rsm_memseg_export_handle_t memseg, 9019 rsm_access_entry_t access_list[], 9020 uint_t access_list_length, 9021 rsm_resource_callback_t callback, 9022 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9023 { 9024 return (RSM_SUCCESS); 9025 } 9026 9027 int 9028 rsmka_null_unpublish( 9029 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 9030 { 9031 return (RSM_SUCCESS); 9032 } 9033 9034 9035 void 9036 rsmka_init_loopback() 9037 { 9038 rsm_ops_t *ops = &null_rsmpi_ops; 9039 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK); 9040 9041 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9042 "rsmka_init_loopback enter\n")); 9043 9044 /* initialize null ops vector */ 9045 ops->rsm_seg_create = rsmka_null_seg_create; 9046 ops->rsm_seg_destroy = rsmka_null_seg_destroy; 9047 ops->rsm_bind = rsmka_null_bind; 9048 ops->rsm_unbind = rsmka_null_unbind; 9049 ops->rsm_rebind = rsmka_null_rebind; 9050 ops->rsm_publish = rsmka_null_publish; 9051 ops->rsm_unpublish = rsmka_null_unpublish; 9052 ops->rsm_republish = rsmka_null_republish; 9053 9054 /* initialize attributes for loopback adapter */ 9055 loopback_attr.attr_name = loopback_str; 9056 loopback_attr.attr_page_size = 0x8; /* 8K */ 9057 9058 /* initialize loopback adapter */ 9059 loopback_adapter.rsm_attr = loopback_attr; 9060 loopback_adapter.rsmpi_ops = &null_rsmpi_ops; 9061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9062 "rsmka_init_loopback done\n")); 9063 } 9064 9065 /* ************** DR functions ********************************** */ 9066 static void 9067 rsm_quiesce_exp_seg(rsmresource_t *resp) 9068 { 9069 int recheck_state; 9070 rsmseg_t *segp = (rsmseg_t *)resp; 9071 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9072 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9073 9074 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9075 "%s enter: key=%u\n", function, segp->s_key)); 9076 9077 rsmseglock_acquire(segp); 9078 do { 9079 recheck_state = 0; 9080 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) || 9081 (segp->s_state == RSM_STATE_BIND_QUIESCED) || 9082 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) || 9083 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) { 9084 rsmseglock_release(segp); 9085 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9086 "%s done:state =%d\n", function, 9087 segp->s_state)); 9088 return; 9089 } 9090 9091 if (segp->s_state == RSM_STATE_NEW) { 9092 segp->s_state = RSM_STATE_NEW_QUIESCED; 9093 rsmseglock_release(segp); 9094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9095 "%s done:state =%d\n", function, 9096 segp->s_state)); 9097 return; 9098 } 9099 9100 if (segp->s_state == RSM_STATE_BIND) { 9101 /* unbind */ 9102 (void) rsm_unbind_pages(segp); 9103 segp->s_state = RSM_STATE_BIND_QUIESCED; 9104 rsmseglock_release(segp); 9105 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9106 "%s done:state =%d\n", function, 9107 segp->s_state)); 9108 return; 9109 } 9110 9111 if (segp->s_state == RSM_STATE_EXPORT) { 9112 /* 9113 * wait for putv/getv to complete if the segp is 9114 * a local memory handle 9115 */ 9116 while ((segp->s_state == RSM_STATE_EXPORT) && 9117 (segp->s_rdmacnt != 0)) { 9118 cv_wait(&segp->s_cv, &segp->s_lock); 9119 } 9120 9121 if (segp->s_state != RSM_STATE_EXPORT) { 9122 /* 9123 * state changed need to see what it 9124 * should be changed to. 9125 */ 9126 recheck_state = 1; 9127 continue; 9128 } 9129 9130 segp->s_state = RSM_STATE_EXPORT_QUIESCING; 9131 rsmseglock_release(segp); 9132 /* 9133 * send SUSPEND messages - currently it will be 9134 * done at the end 9135 */ 9136 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9137 "%s done:state =%d\n", function, 9138 segp->s_state)); 9139 return; 9140 } 9141 } while (recheck_state); 9142 9143 rsmseglock_release(segp); 9144 } 9145 9146 static void 9147 rsm_unquiesce_exp_seg(rsmresource_t *resp) 9148 { 9149 int ret; 9150 rsmseg_t *segp = (rsmseg_t *)resp; 9151 rsmapi_access_entry_t *acl; 9152 rsm_access_entry_t *rsmpi_acl; 9153 int acl_len; 9154 int create_flags = 0; 9155 struct buf *xbuf; 9156 rsm_memory_local_t mem; 9157 adapter_t *adapter; 9158 dev_t sdev = 0; 9159 rsm_resource_callback_t callback_flag; 9160 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9161 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9162 9163 rsmseglock_acquire(segp); 9164 9165 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9166 "%s enter: key=%u, state=%d\n", function, segp->s_key, 9167 segp->s_state)); 9168 9169 if ((segp->s_state == RSM_STATE_NEW) || 9170 (segp->s_state == RSM_STATE_BIND) || 9171 (segp->s_state == RSM_STATE_EXPORT)) { 9172 rsmseglock_release(segp); 9173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9174 function, segp->s_state)); 9175 return; 9176 } 9177 9178 if (segp->s_state == RSM_STATE_NEW_QUIESCED) { 9179 segp->s_state = RSM_STATE_NEW; 9180 cv_broadcast(&segp->s_cv); 9181 rsmseglock_release(segp); 9182 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9183 function, segp->s_state)); 9184 return; 9185 } 9186 9187 if (segp->s_state == RSM_STATE_BIND_QUIESCED) { 9188 /* bind the segment */ 9189 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9190 segp->s_len, segp->s_proc); 9191 if (ret == RSM_SUCCESS) { /* bind successful */ 9192 segp->s_state = RSM_STATE_BIND; 9193 } else { /* bind failed - resource unavailable */ 9194 segp->s_state = RSM_STATE_NEW; 9195 } 9196 cv_broadcast(&segp->s_cv); 9197 rsmseglock_release(segp); 9198 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9199 "%s done: bind_qscd bind = %d\n", function, ret)); 9200 return; 9201 } 9202 9203 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) { 9204 /* wait for the segment to move to EXPORT_QUIESCED state */ 9205 cv_wait(&segp->s_cv, &segp->s_lock); 9206 } 9207 9208 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) { 9209 /* bind the segment */ 9210 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9211 segp->s_len, segp->s_proc); 9212 9213 if (ret != RSM_SUCCESS) { 9214 /* bind failed - resource unavailable */ 9215 acl_len = segp->s_acl_len; 9216 acl = segp->s_acl; 9217 rsmpi_acl = segp->s_acl_in; 9218 segp->s_acl_len = 0; 9219 segp->s_acl = NULL; 9220 segp->s_acl_in = NULL; 9221 rsmseglock_release(segp); 9222 9223 rsmexport_rm(segp); 9224 rsmacl_free(acl, acl_len); 9225 rsmpiacl_free(rsmpi_acl, acl_len); 9226 9227 rsmseglock_acquire(segp); 9228 segp->s_state = RSM_STATE_NEW; 9229 cv_broadcast(&segp->s_cv); 9230 rsmseglock_release(segp); 9231 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9232 "%s done: exp_qscd bind failed = %d\n", 9233 function, ret)); 9234 return; 9235 } 9236 /* 9237 * publish the segment 9238 * if successful 9239 * segp->s_state = RSM_STATE_EXPORT; 9240 * else failed 9241 * segp->s_state = RSM_STATE_BIND; 9242 */ 9243 9244 /* check whether it is a local_memory_handle */ 9245 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) { 9246 if ((segp->s_acl[0].ae_node == my_nodeid) && 9247 (segp->s_acl[0].ae_permission == 0)) { 9248 segp->s_state = RSM_STATE_EXPORT; 9249 cv_broadcast(&segp->s_cv); 9250 rsmseglock_release(segp); 9251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9252 "%s done:exp_qscd\n", function)); 9253 return; 9254 } 9255 } 9256 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE, 9257 sdev, 0, NULL, DDI_UMEM_SLEEP); 9258 ASSERT(xbuf != NULL); 9259 9260 mem.ms_type = RSM_MEM_BUF; 9261 mem.ms_bp = xbuf; 9262 9263 adapter = segp->s_adapter; 9264 9265 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 9266 create_flags = RSM_ALLOW_UNBIND_REBIND; 9267 } 9268 9269 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 9270 callback_flag = RSM_RESOURCE_DONTWAIT; 9271 } else { 9272 callback_flag = RSM_RESOURCE_SLEEP; 9273 } 9274 9275 ret = adapter->rsmpi_ops->rsm_seg_create( 9276 adapter->rsmpi_handle, &segp->s_handle.out, 9277 segp->s_len, create_flags, &mem, 9278 callback_flag, NULL); 9279 9280 if (ret != RSM_SUCCESS) { 9281 acl_len = segp->s_acl_len; 9282 acl = segp->s_acl; 9283 rsmpi_acl = segp->s_acl_in; 9284 segp->s_acl_len = 0; 9285 segp->s_acl = NULL; 9286 segp->s_acl_in = NULL; 9287 rsmseglock_release(segp); 9288 9289 rsmexport_rm(segp); 9290 rsmacl_free(acl, acl_len); 9291 rsmpiacl_free(rsmpi_acl, acl_len); 9292 9293 rsmseglock_acquire(segp); 9294 segp->s_state = RSM_STATE_BIND; 9295 cv_broadcast(&segp->s_cv); 9296 rsmseglock_release(segp); 9297 DBG_PRINTF((category, RSM_ERR, 9298 "%s done: exp_qscd create failed = %d\n", 9299 function, ret)); 9300 return; 9301 } 9302 9303 ret = adapter->rsmpi_ops->rsm_publish( 9304 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len, 9305 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL); 9306 9307 if (ret != RSM_SUCCESS) { 9308 acl_len = segp->s_acl_len; 9309 acl = segp->s_acl; 9310 rsmpi_acl = segp->s_acl_in; 9311 segp->s_acl_len = 0; 9312 segp->s_acl = NULL; 9313 segp->s_acl_in = NULL; 9314 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out); 9315 rsmseglock_release(segp); 9316 9317 rsmexport_rm(segp); 9318 rsmacl_free(acl, acl_len); 9319 rsmpiacl_free(rsmpi_acl, acl_len); 9320 9321 rsmseglock_acquire(segp); 9322 segp->s_state = RSM_STATE_BIND; 9323 cv_broadcast(&segp->s_cv); 9324 rsmseglock_release(segp); 9325 DBG_PRINTF((category, RSM_ERR, 9326 "%s done: exp_qscd publish failed = %d\n", 9327 function, ret)); 9328 return; 9329 } 9330 9331 segp->s_state = RSM_STATE_EXPORT; 9332 cv_broadcast(&segp->s_cv); 9333 rsmseglock_release(segp); 9334 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n", 9335 function)); 9336 return; 9337 } 9338 9339 rsmseglock_release(segp); 9340 9341 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9342 } 9343 9344 static void 9345 rsm_quiesce_imp_seg(rsmresource_t *resp) 9346 { 9347 rsmseg_t *segp = (rsmseg_t *)resp; 9348 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9349 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg"); 9350 9351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9352 "%s enter: key=%u\n", function, segp->s_key)); 9353 9354 rsmseglock_acquire(segp); 9355 segp->s_flags |= RSM_DR_INPROGRESS; 9356 9357 while (segp->s_rdmacnt != 0) { 9358 /* wait for the RDMA to complete */ 9359 cv_wait(&segp->s_cv, &segp->s_lock); 9360 } 9361 9362 rsmseglock_release(segp); 9363 9364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9365 9366 } 9367 9368 static void 9369 rsm_unquiesce_imp_seg(rsmresource_t *resp) 9370 { 9371 rsmseg_t *segp = (rsmseg_t *)resp; 9372 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9373 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg"); 9374 9375 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9376 "%s enter: key=%u\n", function, segp->s_key)); 9377 9378 rsmseglock_acquire(segp); 9379 9380 segp->s_flags &= ~RSM_DR_INPROGRESS; 9381 /* wake up any waiting putv/getv ops */ 9382 cv_broadcast(&segp->s_cv); 9383 9384 rsmseglock_release(segp); 9385 9386 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9387 9388 9389 } 9390 9391 static void 9392 rsm_process_exp_seg(rsmresource_t *resp, int event) 9393 { 9394 if (event == RSM_DR_QUIESCE) 9395 rsm_quiesce_exp_seg(resp); 9396 else /* UNQUIESCE */ 9397 rsm_unquiesce_exp_seg(resp); 9398 } 9399 9400 static void 9401 rsm_process_imp_seg(rsmresource_t *resp, int event) 9402 { 9403 if (event == RSM_DR_QUIESCE) 9404 rsm_quiesce_imp_seg(resp); 9405 else /* UNQUIESCE */ 9406 rsm_unquiesce_imp_seg(resp); 9407 } 9408 9409 static void 9410 rsm_dr_process_local_segments(int event) 9411 { 9412 9413 int i, j; 9414 rsmresource_blk_t *blk; 9415 rsmresource_t *p; 9416 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9417 9418 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9419 "rsm_dr_process_local_segments enter\n")); 9420 9421 /* iterate through the resource structure */ 9422 9423 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 9424 9425 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 9426 blk = rsm_resource.rsmrc_root[i]; 9427 if (blk != NULL) { 9428 for (j = 0; j < RSMRC_BLKSZ; j++) { 9429 p = blk->rsmrcblk_blks[j]; 9430 if ((p != NULL) && (p != RSMRC_RESERVED)) { 9431 /* valid resource */ 9432 if (p->rsmrc_type == 9433 RSM_RESOURCE_EXPORT_SEGMENT) 9434 rsm_process_exp_seg(p, event); 9435 else if (p->rsmrc_type == 9436 RSM_RESOURCE_IMPORT_SEGMENT) 9437 rsm_process_imp_seg(p, event); 9438 } 9439 } 9440 } 9441 } 9442 9443 rw_exit(&rsm_resource.rsmrc_lock); 9444 9445 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9446 "rsm_dr_process_local_segments done\n")); 9447 } 9448 9449 /* *************** DR callback functions ************ */ 9450 static void 9451 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */) 9452 { 9453 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9454 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9455 "rsm_dr_callback_post_add is a no-op\n")); 9456 /* Noop */ 9457 } 9458 9459 static int 9460 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */) 9461 { 9462 int recheck_state = 0; 9463 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9464 9465 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9466 "rsm_dr_callback_pre_del enter\n")); 9467 9468 mutex_enter(&rsm_drv_data.drv_lock); 9469 9470 do { 9471 recheck_state = 0; 9472 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9473 "rsm_dr_callback_pre_del:state=%d\n", 9474 rsm_drv_data.drv_state)); 9475 9476 switch (rsm_drv_data.drv_state) { 9477 case RSM_DRV_NEW: 9478 /* 9479 * The state should usually never be RSM_DRV_NEW 9480 * since in this state the callbacks have not yet 9481 * been registered. So, ASSERT. 9482 */ 9483 ASSERT(0); 9484 return (0); 9485 case RSM_DRV_REG_PROCESSING: 9486 /* 9487 * The driver is in the process of registering 9488 * with the DR framework. So, wait till the 9489 * registration process is complete. 9490 */ 9491 recheck_state = 1; 9492 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9493 break; 9494 case RSM_DRV_UNREG_PROCESSING: 9495 /* 9496 * If the state is RSM_DRV_UNREG_PROCESSING, the 9497 * module is in the process of detaching and 9498 * unregistering the callbacks from the DR 9499 * framework. So, simply return. 9500 */ 9501 mutex_exit(&rsm_drv_data.drv_lock); 9502 DBG_PRINTF((category, RSM_DEBUG, 9503 "rsm_dr_callback_pre_del:" 9504 "pre-del on NEW/UNREG\n")); 9505 return (0); 9506 case RSM_DRV_OK: 9507 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED; 9508 break; 9509 case RSM_DRV_PREDEL_STARTED: 9510 /* FALLTHRU */ 9511 case RSM_DRV_PREDEL_COMPLETED: 9512 /* FALLTHRU */ 9513 case RSM_DRV_POSTDEL_IN_PROGRESS: 9514 recheck_state = 1; 9515 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9516 break; 9517 case RSM_DRV_DR_IN_PROGRESS: 9518 rsm_drv_data.drv_memdel_cnt++; 9519 mutex_exit(&rsm_drv_data.drv_lock); 9520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9521 "rsm_dr_callback_pre_del done\n")); 9522 return (0); 9523 /* break; */ 9524 default: 9525 ASSERT(0); 9526 break; 9527 } 9528 9529 } while (recheck_state); 9530 9531 rsm_drv_data.drv_memdel_cnt++; 9532 9533 mutex_exit(&rsm_drv_data.drv_lock); 9534 9535 /* Do all the quiescing stuff here */ 9536 DBG_PRINTF((category, RSM_DEBUG, 9537 "rsm_dr_callback_pre_del: quiesce things now\n")); 9538 9539 rsm_dr_process_local_segments(RSM_DR_QUIESCE); 9540 9541 /* 9542 * now that all local segments have been quiesced lets inform 9543 * the importers 9544 */ 9545 rsm_send_suspend(); 9546 9547 /* 9548 * In response to the suspend message the remote node(s) will process 9549 * the segments and send a suspend_complete message. Till all 9550 * the nodes send the suspend_complete message we wait in the 9551 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce 9552 * function we transition to the RSM_DRV_PREDEL_COMPLETED state. 9553 */ 9554 mutex_enter(&rsm_drv_data.drv_lock); 9555 9556 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) { 9557 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9558 } 9559 9560 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED); 9561 9562 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS; 9563 cv_broadcast(&rsm_drv_data.drv_cv); 9564 9565 mutex_exit(&rsm_drv_data.drv_lock); 9566 9567 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9568 "rsm_dr_callback_pre_del done\n")); 9569 9570 return (0); 9571 } 9572 9573 static void 9574 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */) 9575 { 9576 int recheck_state = 0; 9577 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9578 9579 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9580 "rsm_dr_callback_post_del enter\n")); 9581 9582 mutex_enter(&rsm_drv_data.drv_lock); 9583 9584 do { 9585 recheck_state = 0; 9586 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9587 "rsm_dr_callback_post_del:state=%d\n", 9588 rsm_drv_data.drv_state)); 9589 9590 switch (rsm_drv_data.drv_state) { 9591 case RSM_DRV_NEW: 9592 /* 9593 * The driver state cannot not be RSM_DRV_NEW 9594 * since in this state the callbacks have not 9595 * yet been registered. 9596 */ 9597 ASSERT(0); 9598 return; 9599 case RSM_DRV_REG_PROCESSING: 9600 /* 9601 * The driver is in the process of registering with 9602 * the DR framework. Wait till the registration is 9603 * complete. 9604 */ 9605 recheck_state = 1; 9606 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9607 break; 9608 case RSM_DRV_UNREG_PROCESSING: 9609 /* 9610 * RSM_DRV_UNREG_PROCESSING state means the module 9611 * is detaching and unregistering the callbacks 9612 * from the DR framework. So simply return. 9613 */ 9614 /* FALLTHRU */ 9615 case RSM_DRV_OK: 9616 /* 9617 * RSM_DRV_OK means we missed the pre-del 9618 * corresponding to this post-del coz we had not 9619 * registered yet, so simply return. 9620 */ 9621 mutex_exit(&rsm_drv_data.drv_lock); 9622 DBG_PRINTF((category, RSM_DEBUG, 9623 "rsm_dr_callback_post_del:" 9624 "post-del on OK/UNREG\n")); 9625 return; 9626 /* break; */ 9627 case RSM_DRV_PREDEL_STARTED: 9628 /* FALLTHRU */ 9629 case RSM_DRV_PREDEL_COMPLETED: 9630 /* FALLTHRU */ 9631 case RSM_DRV_POSTDEL_IN_PROGRESS: 9632 recheck_state = 1; 9633 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9634 break; 9635 case RSM_DRV_DR_IN_PROGRESS: 9636 rsm_drv_data.drv_memdel_cnt--; 9637 if (rsm_drv_data.drv_memdel_cnt > 0) { 9638 mutex_exit(&rsm_drv_data.drv_lock); 9639 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9640 "rsm_dr_callback_post_del done:\n")); 9641 return; 9642 } 9643 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS; 9644 break; 9645 default: 9646 ASSERT(0); 9647 return; 9648 /* break; */ 9649 } 9650 } while (recheck_state); 9651 9652 mutex_exit(&rsm_drv_data.drv_lock); 9653 9654 /* Do all the unquiescing stuff here */ 9655 DBG_PRINTF((category, RSM_DEBUG, 9656 "rsm_dr_callback_post_del: unquiesce things now\n")); 9657 9658 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE); 9659 9660 /* 9661 * now that all local segments have been unquiesced lets inform 9662 * the importers 9663 */ 9664 rsm_send_resume(); 9665 9666 mutex_enter(&rsm_drv_data.drv_lock); 9667 9668 rsm_drv_data.drv_state = RSM_DRV_OK; 9669 9670 cv_broadcast(&rsm_drv_data.drv_cv); 9671 9672 mutex_exit(&rsm_drv_data.drv_lock); 9673 9674 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9675 "rsm_dr_callback_post_del done\n")); 9676 9677 return; 9678 9679 } 9680