1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2012 Milan Jurik. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 29 /* 30 * Overview of the RSM Kernel Agent: 31 * --------------------------------- 32 * 33 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM 34 * kernel agent is a pseudo device driver which makes use of the RSMPI 35 * interface on behalf of the RSMAPI user library. 36 * 37 * The kernel agent functionality can be categorized into the following 38 * components: 39 * 1. Driver Infrastructure 40 * 2. Export/Import Segment Management 41 * 3. Internal resource allocation/deallocation 42 * 43 * The driver infrastructure includes the basic module loading entry points 44 * like _init, _info, _fini to load, unload and report information about 45 * the driver module. The driver infrastructure also includes the 46 * autoconfiguration entry points namely, attach, detach and getinfo for 47 * the device autoconfiguration. 48 * 49 * The kernel agent is a pseudo character device driver and exports 50 * a cb_ops structure which defines the driver entry points for character 51 * device access. This includes the open and close entry points. The 52 * other entry points provided include ioctl, devmap and segmap and chpoll. 53 * read and write entry points are not used since the device is memory 54 * mapped. Also ddi_prop_op is used for the prop_op entry point. 55 * 56 * The ioctl entry point supports a number of commands, which are used by 57 * the RSMAPI library in order to export and import segments. These 58 * commands include commands for binding and rebinding the physical pages 59 * allocated to the virtual address range, publishing the export segment, 60 * unpublishing and republishing an export segment, creating an 61 * import segment and a virtual connection from this import segment to 62 * an export segment, performing scatter-gather data transfer, barrier 63 * operations. 64 * 65 * 66 * Export and Import segments: 67 * --------------------------- 68 * 69 * In order to create an RSM export segment a process allocates a range in its 70 * virtual address space for the segment using standard Solaris interfaces. 71 * The process then calls RSMAPI, which in turn makes an ioctl call to the 72 * RSM kernel agent for an allocation of physical memory pages and for 73 * creation of the export segment by binding these pages to the virtual 74 * address range. These pages are locked in memory so that remote accesses 75 * are always applied to the correct page. Then the RSM segment is published, 76 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id 77 * is assigned to it. 78 * 79 * In order to import a published RSM segment, RSMAPI creates an import 80 * segment and forms a virtual connection across the interconnect to the 81 * export segment, via an ioctl into the kernel agent with the connect 82 * command. The import segment setup is completed by mapping the 83 * local device memory into the importers virtual address space. The 84 * mapping of the import segment is handled by the segmap/devmap 85 * infrastructure described as follows. 86 * 87 * Segmap and Devmap interfaces: 88 * 89 * The RSM kernel agent allows device memory to be directly accessed by user 90 * threads via memory mapping. In order to do so, the RSM kernel agent 91 * supports the devmap and segmap entry points. 92 * 93 * The segmap entry point(rsm_segmap) is responsible for setting up a memory 94 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is 95 * responsible for exporting the device memory to the user applications. 96 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the 97 * control is transfered to the devmap_setup call which calls rsm_devmap. 98 * 99 * rsm_devmap validates the user mapping to the device or kernel memory 100 * and passes the information to the system for setting up the mapping. The 101 * actual setting up of the mapping is done by devmap_devmem_setup(for 102 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are 103 * registered for device context management via the devmap_devmem_setup 104 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap, 105 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping 106 * is created, a mapping is freed, a mapping is accessed or an existing 107 * mapping is duplicated respectively. These callbacks allow the RSM kernel 108 * agent to maintain state information associated with the mappings. 109 * The state information is mainly in the form of a cookie list for the import 110 * segment for which mapping has been done. 111 * 112 * Forced disconnect of import segments: 113 * 114 * When an exported segment is unpublished, the exporter sends a forced 115 * disconnect message to all its importers. The importer segments are 116 * unloaded and disconnected. This involves unloading the original 117 * mappings and remapping to a preallocated kernel trash page. This is 118 * done by devmap_umem_remap. The trash/dummy page is a kernel page, 119 * preallocated by the kernel agent during attach using ddi_umem_alloc with 120 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application 121 * due to unloading of the original mappings. 122 * 123 * Additionally every segment has a mapping generation number associated 124 * with it. This is an entry in the barrier generation page, created 125 * during attach time. This mapping generation number for the import 126 * segments is incremented on a force disconnect to notify the application 127 * of the force disconnect. On this notification, the application needs 128 * to reconnect the segment to establish a new legitimate mapping. 129 * 130 * 131 * Locks used in the kernel agent: 132 * ------------------------------- 133 * 134 * The kernel agent uses a variety of mutexes and condition variables for 135 * mutual exclusion of the shared data structures and for synchronization 136 * between the various threads. Some of the locks are described as follows. 137 * 138 * Each resource structure, which represents either an export/import segment 139 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock. 140 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the 141 * rsmseglock_acquire and rsmseglock_release macros. An additional 142 * lock called the rsmsi_lock is used for the shared import data structure 143 * that is relevant for resources representing import segments. There is 144 * also a condition variable associated with the resource called s_cv. This 145 * is used to wait for events like the segment state change etc. 146 * 147 * The resource structures are allocated from a pool of resource structures, 148 * called rsm_resource. This pool is protected via a reader-writer lock, 149 * called rsmrc_lock. 150 * 151 * There are two separate hash tables, one for the export segments and 152 * one for the import segments. The export segments are inserted into the 153 * export segment hash table only after they have been published and the 154 * import segments are inserted in the import segments list only after they 155 * have successfully connected to an exported segment. These tables are 156 * protected via reader-writer locks. 157 * 158 * Debug Support in the kernel agent: 159 * ---------------------------------- 160 * 161 * Debugging support in the kernel agent is provided by the following 162 * macros. 163 * 164 * DBG_PRINTF((category, level, message)) is a macro which logs a debug 165 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer 166 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based 167 * on the definition of the category and level. All messages that belong to 168 * the specified category(rsmdbg_category) and are of an equal or greater 169 * severity than the specified level(rsmdbg_level) are logged. The message 170 * is a string which uses the same formatting rules as the strings used in 171 * printf. 172 * 173 * The category defines which component of the kernel agent has logged this 174 * message. There are a number of categories that have been defined such as 175 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro, 176 * DBG_ADDCATEGORY is used to add in another category to the currently 177 * specified category value so that the component using this new category 178 * can also effectively log debug messages. Thus, the category of a specific 179 * message is some combination of the available categories and we can define 180 * sub-categories if we want a finer level of granularity. 181 * 182 * The level defines the severity of the message. Different level values are 183 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being 184 * the least severe(debug level is 0). 185 * 186 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug 187 * variable or a string respectively. 188 * 189 * 190 * NOTES: 191 * 192 * Special Fork and Exec Handling: 193 * ------------------------------- 194 * 195 * The backing physical pages of an exported segment are always locked down. 196 * Thus, there are two cases in which a process having exported segments 197 * will cause a cpu to hang: (1) the process invokes exec; (2) a process 198 * forks and invokes exit before the duped file descriptors for the export 199 * segments are closed in the child process. The hang is caused because the 200 * address space release algorithm in Solaris VM subsystem is based on a 201 * non-blocking loop which does not terminate while segments are locked 202 * down. In addition to this, Solaris VM subsystem lacks a callback 203 * mechanism to the rsm kernel agent to allow unlocking these export 204 * segment pages. 205 * 206 * In order to circumvent this problem, the kernel agent does the following. 207 * The Solaris VM subsystem keeps memory segments in increasing order of 208 * virtual addressses. Thus a special page(special_exit_offset) is allocated 209 * by the kernel agent and is mmapped into the heap area of the process address 210 * space(the mmap is done by the RSMAPI library). During the mmap processing 211 * of this special page by the devmap infrastructure, a callback(the same 212 * devmap context management callbacks discussed above) is registered for an 213 * unmap. 214 * 215 * As discussed above, this page is processed by the Solaris address space 216 * release code before any of the exported segments pages(which are allocated 217 * from high memory). It is during this processing that the unmap callback gets 218 * called and this callback is responsible for force destroying the exported 219 * segments and thus eliminating the problem of locked pages. 220 * 221 * Flow-control: 222 * ------------ 223 * 224 * A credit based flow control algorithm is used for messages whose 225 * processing cannot be done in the interrupt context because it might 226 * involve invoking rsmpi calls, or might take a long time to complete 227 * or might need to allocate resources. The algorithm operates on a per 228 * path basis. To send a message the pathend needs to have a credit and 229 * it consumes one for every message that is flow controlled. On the 230 * receiving pathend the message is put on a msgbuf_queue and a task is 231 * dispatched on the worker thread - recv_taskq where it is processed. 232 * After processing the message, the receiving pathend dequeues the message, 233 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends 234 * credits to the sender pathend. 235 * 236 * RSM_DRTEST: 237 * ----------- 238 * 239 * This is used to enable the DR testing using a test driver on test 240 * platforms which do not supported DR. 241 * 242 */ 243 244 #include <sys/types.h> 245 #include <sys/param.h> 246 #include <sys/user.h> 247 #include <sys/buf.h> 248 #include <sys/systm.h> 249 #include <sys/cred.h> 250 #include <sys/vm.h> 251 #include <sys/uio.h> 252 #include <vm/seg.h> 253 #include <vm/page.h> 254 #include <sys/stat.h> 255 256 #include <sys/time.h> 257 #include <sys/errno.h> 258 259 #include <sys/file.h> 260 #include <sys/uio.h> 261 #include <sys/proc.h> 262 #include <sys/mman.h> 263 #include <sys/open.h> 264 #include <sys/atomic.h> 265 #include <sys/mem_config.h> 266 267 268 #include <sys/ddi.h> 269 #include <sys/devops.h> 270 #include <sys/ddidevmap.h> 271 #include <sys/sunddi.h> 272 #include <sys/esunddi.h> 273 #include <sys/ddi_impldefs.h> 274 275 #include <sys/kmem.h> 276 #include <sys/conf.h> 277 #include <sys/devops.h> 278 #include <sys/ddi_impldefs.h> 279 280 #include <sys/modctl.h> 281 282 #include <sys/policy.h> 283 #include <sys/types.h> 284 #include <sys/conf.h> 285 #include <sys/param.h> 286 287 #include <sys/taskq.h> 288 289 #include <sys/rsm/rsm_common.h> 290 #include <sys/rsm/rsmapi_common.h> 291 #include <sys/rsm/rsm.h> 292 #include <rsm_in.h> 293 #include <sys/rsm/rsmka_path_int.h> 294 #include <sys/rsm/rsmpi.h> 295 296 #include <sys/modctl.h> 297 #include <sys/debug.h> 298 299 #include <sys/tuneable.h> 300 301 #ifdef RSM_DRTEST 302 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec, 303 void *arg); 304 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, 305 void *arg); 306 #endif 307 308 extern void dbg_printf(int category, int level, char *fmt, ...); 309 extern void rsmka_pathmanager_init(); 310 extern void rsmka_pathmanager_cleanup(); 311 extern void rele_sendq_token(sendq_token_t *); 312 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t); 313 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t); 314 extern int rsmka_topology_ioctl(caddr_t, int, int); 315 316 extern pri_t maxclsyspri; 317 extern work_queue_t work_queue; 318 extern kmutex_t ipc_info_lock; 319 extern kmutex_t ipc_info_cvlock; 320 extern kcondvar_t ipc_info_cv; 321 extern kmutex_t path_hold_cvlock; 322 extern kcondvar_t path_hold_cv; 323 324 extern kmutex_t rsmka_buf_lock; 325 326 extern path_t *rsm_find_path(char *, int, rsm_addr_t); 327 extern adapter_t *rsmka_lookup_adapter(char *, int); 328 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *); 329 extern boolean_t rsmka_do_path_active(path_t *, int); 330 extern boolean_t rsmka_check_node_alive(rsm_node_id_t); 331 extern void rsmka_release_adapter(adapter_t *); 332 extern void rsmka_enqueue_msgbuf(path_t *path, void *data); 333 extern void rsmka_dequeue_msgbuf(path_t *path); 334 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path); 335 /* lint -w2 */ 336 337 static int rsm_open(dev_t *, int, int, cred_t *); 338 static int rsm_close(dev_t, int, int, cred_t *); 339 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 340 cred_t *credp, int *rvalp); 341 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, 342 uint_t); 343 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t, 344 uint_t, uint_t, cred_t *); 345 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 346 struct pollhead **phpp); 347 348 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 349 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t); 350 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t); 351 352 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *); 353 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t); 354 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t); 355 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int, 356 rsm_permission_t); 357 static void rsm_export_force_destroy(ddi_umem_cookie_t *); 358 static void rsmacl_free(rsmapi_access_entry_t *, int); 359 static void rsmpiacl_free(rsm_access_entry_t *, int); 360 361 static int rsm_inc_pgcnt(pgcnt_t); 362 static void rsm_dec_pgcnt(pgcnt_t); 363 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop); 364 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *, 365 size_t *); 366 static void exporter_quiesce(); 367 static void rsmseg_suspend(rsmseg_t *, int *); 368 static void rsmsegshare_suspend(rsmseg_t *); 369 static int rsmseg_resume(rsmseg_t *, void **); 370 static int rsmsegshare_resume(rsmseg_t *); 371 372 static struct cb_ops rsm_cb_ops = { 373 rsm_open, /* open */ 374 rsm_close, /* close */ 375 nodev, /* strategy */ 376 nodev, /* print */ 377 nodev, /* dump */ 378 nodev, /* read */ 379 nodev, /* write */ 380 rsm_ioctl, /* ioctl */ 381 rsm_devmap, /* devmap */ 382 NULL, /* mmap */ 383 rsm_segmap, /* segmap */ 384 rsm_chpoll, /* poll */ 385 ddi_prop_op, /* cb_prop_op */ 386 0, /* streamtab */ 387 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 388 0, 389 0, 390 0 391 }; 392 393 static struct dev_ops rsm_ops = { 394 DEVO_REV, /* devo_rev, */ 395 0, /* refcnt */ 396 rsm_info, /* get_dev_info */ 397 nulldev, /* identify */ 398 nulldev, /* probe */ 399 rsm_attach, /* attach */ 400 rsm_detach, /* detach */ 401 nodev, /* reset */ 402 &rsm_cb_ops, /* driver operations */ 403 (struct bus_ops *)0, /* bus operations */ 404 0, 405 ddi_quiesce_not_needed, /* quiesce */ 406 }; 407 408 /* 409 * Module linkage information for the kernel. 410 */ 411 412 static struct modldrv modldrv = { 413 &mod_driverops, /* Type of module. This one is a pseudo driver */ 414 "Remote Shared Memory Driver", 415 &rsm_ops, /* driver ops */ 416 }; 417 418 static struct modlinkage modlinkage = { 419 MODREV_1, 420 (void *)&modldrv, 421 0, 422 0, 423 0 424 }; 425 426 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta); 427 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta); 428 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled); 429 430 static kphysm_setup_vector_t rsm_dr_callback_vec = { 431 KPHYSM_SETUP_VECTOR_VERSION, 432 rsm_dr_callback_post_add, 433 rsm_dr_callback_pre_del, 434 rsm_dr_callback_post_del 435 }; 436 437 /* This flag can be changed to 0 to help with PIT testing */ 438 int rsmka_modunloadok = 1; 439 int no_reply_cnt = 0; 440 441 uint64_t rsm_ctrlmsg_errcnt = 0; 442 uint64_t rsm_ipcsend_errcnt = 0; 443 444 #define MAX_NODES 64 445 446 static struct rsm_driver_data rsm_drv_data; 447 static struct rsmresource_table rsm_resource; 448 449 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t); 450 static void rsmresource_destroy(void); 451 static int rsmresource_alloc(minor_t *); 452 static rsmresource_t *rsmresource_free(minor_t rnum); 453 static int rsm_closeconnection(rsmseg_t *seg, void **cookie); 454 static int rsm_unpublish(rsmseg_t *seg, int mode); 455 static int rsm_unbind(rsmseg_t *seg); 456 static uint_t rsmhash(rsm_memseg_id_t key); 457 static void rsmhash_alloc(rsmhash_table_t *rhash, int size); 458 static void rsmhash_free(rsmhash_table_t *rhash, int size); 459 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval); 460 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval); 461 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, 462 void *cookie); 463 int rsm_disconnect(rsmseg_t *seg); 464 void rsmseg_unload(rsmseg_t *); 465 void rsm_suspend_complete(rsm_node_id_t src_node, int flag); 466 467 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 468 rsm_intr_q_op_t opcode, rsm_addr_t src, 469 void *data, size_t size, rsm_intr_hand_arg_t arg); 470 471 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t); 472 473 rsm_node_id_t my_nodeid; 474 475 /* cookie, va, offsets and length for the barrier */ 476 static rsm_gnum_t *bar_va; 477 static ddi_umem_cookie_t bar_cookie; 478 static off_t barrier_offset; 479 static size_t barrier_size; 480 static int max_segs; 481 482 /* cookie for the trash memory */ 483 static ddi_umem_cookie_t remap_cookie; 484 485 static rsm_memseg_id_t rsm_nextavail_segmentid; 486 487 extern taskq_t *work_taskq; 488 extern char *taskq_name; 489 490 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */ 491 492 static rsmhash_table_t rsm_export_segs; /* list of exported segs */ 493 rsmhash_table_t rsm_import_segs; /* list of imported segs */ 494 static rsmhash_table_t rsm_event_queues; /* list of event queues */ 495 496 static rsm_ipc_t rsm_ipc; /* ipc info */ 497 498 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */ 499 static list_head_t rsm_suspend_list; 500 501 /* list of descriptors for remote importers */ 502 static importers_table_t importer_list; 503 504 kmutex_t rsm_suspend_cvlock; 505 kcondvar_t rsm_suspend_cv; 506 507 static kmutex_t rsm_lock; 508 509 adapter_t loopback_adapter; 510 rsm_controller_attr_t loopback_attr; 511 512 int rsmipc_send_controlmsg(path_t *path, int msgtype); 513 514 void rsmka_init_loopback(); 515 516 int rsmka_null_seg_create( 517 rsm_controller_handle_t, 518 rsm_memseg_export_handle_t *, 519 size_t, 520 uint_t, 521 rsm_memory_local_t *, 522 rsm_resource_callback_t, 523 rsm_resource_callback_arg_t); 524 525 int rsmka_null_seg_destroy( 526 rsm_memseg_export_handle_t); 527 528 int rsmka_null_bind( 529 rsm_memseg_export_handle_t, 530 off_t, 531 rsm_memory_local_t *, 532 rsm_resource_callback_t, 533 rsm_resource_callback_arg_t); 534 535 int rsmka_null_unbind( 536 rsm_memseg_export_handle_t, 537 off_t, 538 size_t); 539 540 int rsmka_null_rebind( 541 rsm_memseg_export_handle_t, 542 off_t, 543 rsm_memory_local_t *, 544 rsm_resource_callback_t, 545 rsm_resource_callback_arg_t); 546 547 int rsmka_null_publish( 548 rsm_memseg_export_handle_t, 549 rsm_access_entry_t [], 550 uint_t, 551 rsm_memseg_id_t, 552 rsm_resource_callback_t, 553 rsm_resource_callback_arg_t); 554 555 556 int rsmka_null_republish( 557 rsm_memseg_export_handle_t, 558 rsm_access_entry_t [], 559 uint_t, 560 rsm_resource_callback_t, 561 rsm_resource_callback_arg_t); 562 563 int rsmka_null_unpublish( 564 rsm_memseg_export_handle_t); 565 566 rsm_ops_t null_rsmpi_ops; 567 568 /* 569 * data and locks to keep track of total amount of exported memory 570 */ 571 static pgcnt_t rsm_pgcnt; 572 static pgcnt_t rsm_pgcnt_max; /* max allowed */ 573 static kmutex_t rsm_pgcnt_lock; 574 575 static int rsm_enable_dr; 576 577 static char loopback_str[] = "loopback"; 578 579 int rsm_hash_size; 580 581 /* 582 * The locking model is as follows: 583 * 584 * Local operations: 585 * find resource - grab reader lock on resouce list 586 * insert rc - grab writer lock 587 * delete rc - grab writer lock and resource mutex 588 * read/write - no lock 589 * 590 * Remote invocations: 591 * find resource - grab read lock and resource mutex 592 * 593 * State: 594 * resource state - grab resource mutex 595 */ 596 597 int 598 _init(void) 599 { 600 int e; 601 602 e = mod_install(&modlinkage); 603 if (e != 0) { 604 return (e); 605 } 606 607 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL); 608 609 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL); 610 611 612 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL); 613 614 rsm_hash_size = RSM_HASHSZ; 615 616 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 617 618 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 619 620 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL); 621 622 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL); 623 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0); 624 625 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL); 626 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0); 627 628 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL); 629 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0); 630 631 rsm_ipc.count = RSMIPC_SZ; 632 rsm_ipc.wanted = 0; 633 rsm_ipc.sequence = 0; 634 635 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL); 636 637 for (e = 0; e < RSMIPC_SZ; e++) { 638 rsmipc_slot_t *slot = &rsm_ipc.slots[e]; 639 640 RSMIPC_SET(slot, RSMIPC_FREE); 641 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL); 642 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0); 643 } 644 645 /* 646 * Initialize the suspend message list 647 */ 648 rsm_suspend_list.list_head = NULL; 649 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL); 650 651 /* 652 * It is assumed here that configuration data is available 653 * during system boot since _init may be called at that time. 654 */ 655 656 rsmka_pathmanager_init(); 657 658 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 659 "rsm: _init done\n")); 660 661 return (DDI_SUCCESS); 662 663 } 664 665 int 666 _info(struct modinfo *modinfop) 667 { 668 669 return (mod_info(&modlinkage, modinfop)); 670 } 671 672 int 673 _fini(void) 674 { 675 int e; 676 677 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 678 "rsm: _fini enter\n")); 679 680 /* 681 * The rsmka_modunloadok flag is simply used to help with 682 * the PIT testing. Make this flag 0 to disallow modunload. 683 */ 684 if (rsmka_modunloadok == 0) 685 return (EBUSY); 686 687 /* rsm_detach will be called as a result of mod_remove */ 688 e = mod_remove(&modlinkage); 689 if (e) { 690 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR, 691 "Unable to fini RSM %x\n", e)); 692 return (e); 693 } 694 695 rsmka_pathmanager_cleanup(); 696 697 rw_destroy(&rsm_resource.rsmrc_lock); 698 699 rw_destroy(&rsm_export_segs.rsmhash_rw); 700 rw_destroy(&rsm_import_segs.rsmhash_rw); 701 rw_destroy(&rsm_event_queues.rsmhash_rw); 702 703 mutex_destroy(&importer_list.lock); 704 705 mutex_destroy(&rsm_ipc.lock); 706 cv_destroy(&rsm_ipc.cv); 707 708 (void) mutex_destroy(&rsm_suspend_list.list_lock); 709 710 (void) mutex_destroy(&rsm_pgcnt_lock); 711 712 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n")); 713 714 return (DDI_SUCCESS); 715 716 } 717 718 /*ARGSUSED1*/ 719 static int 720 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 721 { 722 minor_t rnum; 723 int percent; 724 int ret; 725 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 726 727 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n")); 728 729 switch (cmd) { 730 case DDI_ATTACH: 731 break; 732 case DDI_RESUME: 733 default: 734 DBG_PRINTF((category, RSM_ERR, 735 "rsm:rsm_attach - cmd not supported\n")); 736 return (DDI_FAILURE); 737 } 738 739 if (rsm_dip != NULL) { 740 DBG_PRINTF((category, RSM_ERR, 741 "rsm:rsm_attach - supports only " 742 "one instance\n")); 743 return (DDI_FAILURE); 744 } 745 746 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 747 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 748 "enable-dynamic-reconfiguration", 1); 749 750 mutex_enter(&rsm_drv_data.drv_lock); 751 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING; 752 mutex_exit(&rsm_drv_data.drv_lock); 753 754 if (rsm_enable_dr) { 755 #ifdef RSM_DRTEST 756 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec, 757 (void *)NULL); 758 #else 759 ret = kphysm_setup_func_register(&rsm_dr_callback_vec, 760 (void *)NULL); 761 #endif 762 if (ret != 0) { 763 mutex_exit(&rsm_drv_data.drv_lock); 764 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic " 765 "reconfiguration setup failed\n"); 766 return (DDI_FAILURE); 767 } 768 } 769 770 mutex_enter(&rsm_drv_data.drv_lock); 771 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING); 772 rsm_drv_data.drv_state = RSM_DRV_OK; 773 cv_broadcast(&rsm_drv_data.drv_cv); 774 mutex_exit(&rsm_drv_data.drv_lock); 775 776 /* 777 * page_list_read_lock(); 778 * xx_setup(); 779 * page_list_read_unlock(); 780 */ 781 782 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 783 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 784 "segment-hashtable-size", RSM_HASHSZ); 785 if (rsm_hash_size == 0) { 786 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 787 "rsm: segment-hashtable-size in rsm.conf " 788 "must be greater than 0, defaulting to 128\n")); 789 rsm_hash_size = RSM_HASHSZ; 790 } 791 792 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n", 793 rsm_hash_size)); 794 795 rsm_pgcnt = 0; 796 797 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 798 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 799 "max-exported-memory", 0); 800 if (percent < 0) { 801 DBG_PRINTF((category, RSM_ERR, 802 "rsm:rsm_attach not enough memory available to " 803 "export, or max-exported-memory set incorrectly.\n")); 804 return (DDI_FAILURE); 805 } 806 /* 0 indicates no fixed upper limit. maxmem is the max */ 807 /* available pageable physical mem */ 808 rsm_pgcnt_max = (percent*maxmem)/100; 809 810 if (rsm_pgcnt_max > 0) { 811 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 812 "rsm: Available physical memory = %lu pages, " 813 "Max exportable memory = %lu pages", 814 maxmem, rsm_pgcnt_max)); 815 } 816 817 /* 818 * Create minor number 819 */ 820 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) { 821 DBG_PRINTF((category, RSM_ERR, 822 "rsm: rsm_attach - Unable to get " 823 "minor number\n")); 824 return (DDI_FAILURE); 825 } 826 827 ASSERT(rnum == RSM_DRIVER_MINOR); 828 829 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR, 830 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) { 831 DBG_PRINTF((category, RSM_ERR, 832 "rsm: rsm_attach - unable to allocate " 833 "minor #\n")); 834 return (DDI_FAILURE); 835 } 836 837 rsm_dip = devi; 838 /* 839 * Allocate the hashtables 840 */ 841 rsmhash_alloc(&rsm_export_segs, rsm_hash_size); 842 rsmhash_alloc(&rsm_import_segs, rsm_hash_size); 843 844 importer_list.bucket = (importing_token_t **) 845 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP); 846 847 /* 848 * Allocate a resource struct 849 */ 850 { 851 rsmresource_t *p; 852 853 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP); 854 855 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL); 856 857 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR); 858 } 859 860 /* 861 * Based on the rsm.conf property max-segments, determine the maximum 862 * number of segments that can be exported/imported. This is then used 863 * to determine the size for barrier failure pages. 864 */ 865 866 /* First get the max number of segments from the rsm.conf file */ 867 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 868 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 869 "max-segments", 0); 870 if (max_segs == 0) { 871 /* Use default number of segments */ 872 max_segs = RSM_MAX_NUM_SEG; 873 } 874 875 /* 876 * Based on the max number of segments allowed, determine the barrier 877 * page size. add 1 to max_segs since the barrier page itself uses 878 * a slot 879 */ 880 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t), 881 PAGESIZE); 882 883 /* 884 * allocation of the barrier failure page 885 */ 886 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size, 887 DDI_UMEM_SLEEP, &bar_cookie); 888 889 /* 890 * Set the barrier_offset 891 */ 892 barrier_offset = 0; 893 894 /* 895 * Allocate a trash memory and get a cookie for it. This will be used 896 * when remapping segments during force disconnects. Allocate the 897 * trash memory with a large size which is page aligned. 898 */ 899 (void) ddi_umem_alloc((size_t)TRASHSIZE, 900 DDI_UMEM_TRASH, &remap_cookie); 901 902 /* initialize user segment id allocation variable */ 903 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE; 904 905 /* 906 * initialize the null_rsmpi_ops vector and the loopback adapter 907 */ 908 rsmka_init_loopback(); 909 910 911 ddi_report_dev(devi); 912 913 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n")); 914 915 return (DDI_SUCCESS); 916 } 917 918 /* 919 * The call to mod_remove in the _fine routine will cause the system 920 * to call rsm_detach 921 */ 922 /*ARGSUSED*/ 923 static int 924 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 925 { 926 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 927 928 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n")); 929 930 switch (cmd) { 931 case DDI_DETACH: 932 break; 933 default: 934 DBG_PRINTF((category, RSM_ERR, 935 "rsm:rsm_detach - cmd %x not supported\n", 936 cmd)); 937 return (DDI_FAILURE); 938 } 939 940 mutex_enter(&rsm_drv_data.drv_lock); 941 while (rsm_drv_data.drv_state != RSM_DRV_OK) 942 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 943 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING; 944 mutex_exit(&rsm_drv_data.drv_lock); 945 946 /* 947 * Unregister the DR callback functions 948 */ 949 if (rsm_enable_dr) { 950 #ifdef RSM_DRTEST 951 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec, 952 (void *)NULL); 953 #else 954 kphysm_setup_func_unregister(&rsm_dr_callback_vec, 955 (void *)NULL); 956 #endif 957 } 958 959 mutex_enter(&rsm_drv_data.drv_lock); 960 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING); 961 rsm_drv_data.drv_state = RSM_DRV_NEW; 962 mutex_exit(&rsm_drv_data.drv_lock); 963 964 ASSERT(rsm_suspend_list.list_head == NULL); 965 966 /* 967 * Release all resources, seglist, controller, ... 968 */ 969 970 /* remove intersend queues */ 971 /* remove registered services */ 972 973 974 ddi_remove_minor_node(dip, DRIVER_NAME); 975 rsm_dip = NULL; 976 977 /* 978 * Free minor zero resource 979 */ 980 { 981 rsmresource_t *p; 982 983 p = rsmresource_free(RSM_DRIVER_MINOR); 984 if (p) { 985 mutex_destroy(&p->rsmrc_lock); 986 kmem_free((void *)p, sizeof (*p)); 987 } 988 } 989 990 /* 991 * Free resource table 992 */ 993 994 rsmresource_destroy(); 995 996 /* 997 * Free the hash tables 998 */ 999 rsmhash_free(&rsm_export_segs, rsm_hash_size); 1000 rsmhash_free(&rsm_import_segs, rsm_hash_size); 1001 1002 kmem_free((void *)importer_list.bucket, 1003 rsm_hash_size * sizeof (importing_token_t *)); 1004 importer_list.bucket = NULL; 1005 1006 1007 /* free barrier page */ 1008 if (bar_cookie != NULL) { 1009 ddi_umem_free(bar_cookie); 1010 } 1011 bar_va = NULL; 1012 bar_cookie = NULL; 1013 1014 /* 1015 * Free the memory allocated for the trash 1016 */ 1017 if (remap_cookie != NULL) { 1018 ddi_umem_free(remap_cookie); 1019 } 1020 remap_cookie = NULL; 1021 1022 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n")); 1023 1024 return (DDI_SUCCESS); 1025 } 1026 1027 /*ARGSUSED*/ 1028 static int 1029 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1030 { 1031 register int error; 1032 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 1033 1034 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n")); 1035 1036 switch (infocmd) { 1037 case DDI_INFO_DEVT2DEVINFO: 1038 if (rsm_dip == NULL) 1039 error = DDI_FAILURE; 1040 else { 1041 *result = (void *)rsm_dip; 1042 error = DDI_SUCCESS; 1043 } 1044 break; 1045 case DDI_INFO_DEVT2INSTANCE: 1046 *result = (void *)0; 1047 error = DDI_SUCCESS; 1048 break; 1049 default: 1050 error = DDI_FAILURE; 1051 } 1052 1053 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n")); 1054 return (error); 1055 } 1056 1057 adapter_t * 1058 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode) 1059 { 1060 adapter_t *adapter; 1061 char adapter_devname[MAXNAMELEN]; 1062 int instance; 1063 DBG_DEFINE(category, 1064 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL); 1065 1066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n")); 1067 1068 instance = msg->cnum; 1069 1070 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) { 1071 return (NULL); 1072 } 1073 1074 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode)) 1075 return (NULL); 1076 1077 if (strcmp(adapter_devname, "loopback") == 0) 1078 return (&loopback_adapter); 1079 1080 adapter = rsmka_lookup_adapter(adapter_devname, instance); 1081 1082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n")); 1083 1084 return (adapter); 1085 } 1086 1087 1088 /* 1089 * *********************** Resource Number Management ******************** 1090 * All resources are stored in a simple hash table. The table is an array 1091 * of pointers to resource blks. Each blk contains: 1092 * base - base number of this blk 1093 * used - number of used slots in this blk. 1094 * blks - array of pointers to resource items. 1095 * An entry in a resource blk is empty if it's NULL. 1096 * 1097 * We start with no resource array. Each time we run out of slots, we 1098 * reallocate a new larger array and copy the pointer to the new array and 1099 * a new resource blk is allocated and added to the hash table. 1100 * 1101 * The resource control block contains: 1102 * root - array of pointer of resource blks 1103 * sz - current size of array. 1104 * len - last valid entry in array. 1105 * 1106 * A search operation based on a resource number is as follows: 1107 * index = rnum / RESOURCE_BLKSZ; 1108 * ASSERT(index < resource_block.len); 1109 * ASSERT(index < resource_block.sz); 1110 * offset = rnum % RESOURCE_BLKSZ; 1111 * ASSERT(offset >= resource_block.root[index]->base); 1112 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 1113 * return resource_block.root[index]->blks[offset]; 1114 * 1115 * A resource blk is freed with its used count reachs zero. 1116 */ 1117 static int 1118 rsmresource_alloc(minor_t *rnum) 1119 { 1120 1121 /* search for available resource slot */ 1122 int i, j, empty = -1; 1123 rsmresource_blk_t *blk; 1124 1125 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1126 "rsmresource_alloc enter\n")); 1127 1128 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1129 1130 /* Try to find an empty slot */ 1131 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1132 blk = rsm_resource.rsmrc_root[i]; 1133 if (blk != NULL && blk->rsmrcblk_avail > 0) { 1134 /* found an empty slot in this blk */ 1135 for (j = 0; j < RSMRC_BLKSZ; j++) { 1136 if (blk->rsmrcblk_blks[j] == NULL) { 1137 *rnum = (minor_t) 1138 (j + (i * RSMRC_BLKSZ)); 1139 /* 1140 * obey gen page limits 1141 */ 1142 if (*rnum >= max_segs + 1) { 1143 if (empty < 0) { 1144 rw_exit(&rsm_resource. 1145 rsmrc_lock); 1146 DBG_PRINTF(( 1147 RSM_KERNEL_ALL, 1148 RSM_ERR, 1149 "rsmresource" 1150 "_alloc failed:" 1151 "not enough res" 1152 "%d\n", *rnum)); 1153 return (RSMERR_INSUFFICIENT_RESOURCES); 1154 } else { 1155 /* use empty slot */ 1156 break; 1157 } 1158 1159 } 1160 1161 blk->rsmrcblk_blks[j] = RSMRC_RESERVED; 1162 blk->rsmrcblk_avail--; 1163 rw_exit(&rsm_resource.rsmrc_lock); 1164 DBG_PRINTF((RSM_KERNEL_ALL, 1165 RSM_DEBUG_VERBOSE, 1166 "rsmresource_alloc done\n")); 1167 return (RSM_SUCCESS); 1168 } 1169 } 1170 } else if (blk == NULL && empty < 0) { 1171 /* remember first empty slot */ 1172 empty = i; 1173 } 1174 } 1175 1176 /* Couldn't find anything, allocate a new blk */ 1177 /* 1178 * Do we need to reallocate the root array 1179 */ 1180 if (empty < 0) { 1181 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) { 1182 /* 1183 * Allocate new array and copy current stuff into it 1184 */ 1185 rsmresource_blk_t **p; 1186 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz + 1187 RSMRC_BLKSZ; 1188 /* 1189 * Don't allocate more that max valid rnum 1190 */ 1191 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >= 1192 max_segs + 1) { 1193 rw_exit(&rsm_resource.rsmrc_lock); 1194 return (RSMERR_INSUFFICIENT_RESOURCES); 1195 } 1196 1197 p = (rsmresource_blk_t **)kmem_zalloc( 1198 newsz * sizeof (*p), 1199 KM_SLEEP); 1200 1201 if (rsm_resource.rsmrc_root) { 1202 uint_t oldsz; 1203 1204 oldsz = (uint_t)(rsm_resource.rsmrc_sz * 1205 (int)sizeof (*p)); 1206 1207 /* 1208 * Copy old data into new space and 1209 * free old stuff 1210 */ 1211 bcopy(rsm_resource.rsmrc_root, p, oldsz); 1212 kmem_free(rsm_resource.rsmrc_root, oldsz); 1213 } 1214 1215 rsm_resource.rsmrc_root = p; 1216 rsm_resource.rsmrc_sz = (int)newsz; 1217 } 1218 1219 empty = rsm_resource.rsmrc_len; 1220 rsm_resource.rsmrc_len++; 1221 } 1222 1223 /* 1224 * Allocate a new blk 1225 */ 1226 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP); 1227 ASSERT(rsm_resource.rsmrc_root[empty] == NULL); 1228 rsm_resource.rsmrc_root[empty] = blk; 1229 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1; 1230 1231 /* 1232 * Allocate slot 1233 */ 1234 1235 *rnum = (minor_t)(empty * RSMRC_BLKSZ); 1236 1237 /* 1238 * watch out not to exceed bounds of barrier page 1239 */ 1240 if (*rnum >= max_segs + 1) { 1241 rw_exit(&rsm_resource.rsmrc_lock); 1242 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR, 1243 "rsmresource_alloc failed %d\n", *rnum)); 1244 1245 return (RSMERR_INSUFFICIENT_RESOURCES); 1246 } 1247 blk->rsmrcblk_blks[0] = RSMRC_RESERVED; 1248 1249 1250 rw_exit(&rsm_resource.rsmrc_lock); 1251 1252 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1253 "rsmresource_alloc done\n")); 1254 1255 return (RSM_SUCCESS); 1256 } 1257 1258 static rsmresource_t * 1259 rsmresource_free(minor_t rnum) 1260 { 1261 1262 /* search for available resource slot */ 1263 int i, j; 1264 rsmresource_blk_t *blk; 1265 rsmresource_t *p; 1266 1267 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1268 "rsmresource_free enter\n")); 1269 1270 i = (int)(rnum / RSMRC_BLKSZ); 1271 j = (int)(rnum % RSMRC_BLKSZ); 1272 1273 if (i >= rsm_resource.rsmrc_len) { 1274 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1275 "rsmresource_free done\n")); 1276 return (NULL); 1277 } 1278 1279 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1280 1281 ASSERT(rsm_resource.rsmrc_root); 1282 ASSERT(i < rsm_resource.rsmrc_len); 1283 ASSERT(i < rsm_resource.rsmrc_sz); 1284 blk = rsm_resource.rsmrc_root[i]; 1285 if (blk == NULL) { 1286 rw_exit(&rsm_resource.rsmrc_lock); 1287 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1288 "rsmresource_free done\n")); 1289 return (NULL); 1290 } 1291 1292 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */ 1293 1294 p = blk->rsmrcblk_blks[j]; 1295 if (p == RSMRC_RESERVED) { 1296 p = NULL; 1297 } 1298 1299 blk->rsmrcblk_blks[j] = NULL; 1300 blk->rsmrcblk_avail++; 1301 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) { 1302 /* free this blk */ 1303 kmem_free(blk, sizeof (*blk)); 1304 rsm_resource.rsmrc_root[i] = NULL; 1305 } 1306 1307 rw_exit(&rsm_resource.rsmrc_lock); 1308 1309 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1310 "rsmresource_free done\n")); 1311 1312 return (p); 1313 } 1314 1315 static rsmresource_t * 1316 rsmresource_lookup(minor_t rnum, int lock) 1317 { 1318 int i, j; 1319 rsmresource_blk_t *blk; 1320 rsmresource_t *p; 1321 1322 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1323 "rsmresource_lookup enter\n")); 1324 1325 /* Find resource and lock it in READER mode */ 1326 /* search for available resource slot */ 1327 1328 i = (int)(rnum / RSMRC_BLKSZ); 1329 j = (int)(rnum % RSMRC_BLKSZ); 1330 1331 if (i >= rsm_resource.rsmrc_len) { 1332 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1333 "rsmresource_lookup done\n")); 1334 return (NULL); 1335 } 1336 1337 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1338 1339 blk = rsm_resource.rsmrc_root[i]; 1340 if (blk != NULL) { 1341 ASSERT(i < rsm_resource.rsmrc_len); 1342 ASSERT(i < rsm_resource.rsmrc_sz); 1343 1344 p = blk->rsmrcblk_blks[j]; 1345 if (lock == RSM_LOCK) { 1346 if (p != RSMRC_RESERVED) { 1347 mutex_enter(&p->rsmrc_lock); 1348 } else { 1349 p = NULL; 1350 } 1351 } 1352 } else { 1353 p = NULL; 1354 } 1355 rw_exit(&rsm_resource.rsmrc_lock); 1356 1357 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1358 "rsmresource_lookup done\n")); 1359 1360 return (p); 1361 } 1362 1363 static void 1364 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type) 1365 { 1366 /* Find resource and lock it in READER mode */ 1367 /* Caller can upgrade if need be */ 1368 /* search for available resource slot */ 1369 int i, j; 1370 rsmresource_blk_t *blk; 1371 1372 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1373 "rsmresource_insert enter\n")); 1374 1375 i = (int)(rnum / RSMRC_BLKSZ); 1376 j = (int)(rnum % RSMRC_BLKSZ); 1377 1378 p->rsmrc_type = type; 1379 p->rsmrc_num = rnum; 1380 1381 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1382 1383 ASSERT(rsm_resource.rsmrc_root); 1384 ASSERT(i < rsm_resource.rsmrc_len); 1385 ASSERT(i < rsm_resource.rsmrc_sz); 1386 1387 blk = rsm_resource.rsmrc_root[i]; 1388 ASSERT(blk); 1389 1390 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED); 1391 1392 blk->rsmrcblk_blks[j] = p; 1393 1394 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1395 "rsmresource_insert done\n")); 1396 1397 rw_exit(&rsm_resource.rsmrc_lock); 1398 } 1399 1400 static void 1401 rsmresource_destroy() 1402 { 1403 int i, j; 1404 1405 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1406 "rsmresource_destroy enter\n")); 1407 1408 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1409 1410 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1411 rsmresource_blk_t *blk; 1412 1413 blk = rsm_resource.rsmrc_root[i]; 1414 if (blk == NULL) { 1415 continue; 1416 } 1417 for (j = 0; j < RSMRC_BLKSZ; j++) { 1418 if (blk->rsmrcblk_blks[j] != NULL) { 1419 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1420 "Not null slot %d, %lx\n", j, 1421 (size_t)blk->rsmrcblk_blks[j])); 1422 } 1423 } 1424 kmem_free(blk, sizeof (*blk)); 1425 rsm_resource.rsmrc_root[i] = NULL; 1426 } 1427 if (rsm_resource.rsmrc_root) { 1428 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *); 1429 kmem_free(rsm_resource.rsmrc_root, (uint_t)i); 1430 rsm_resource.rsmrc_root = NULL; 1431 rsm_resource.rsmrc_len = 0; 1432 rsm_resource.rsmrc_sz = 0; 1433 } 1434 1435 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1436 "rsmresource_destroy done\n")); 1437 1438 rw_exit(&rsm_resource.rsmrc_lock); 1439 } 1440 1441 1442 /* ******************** Generic Key Hash Table Management ********* */ 1443 static rsmresource_t * 1444 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key, 1445 rsm_resource_state_t state) 1446 { 1447 rsmresource_t *p; 1448 uint_t hashval; 1449 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1450 1451 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n")); 1452 1453 hashval = rsmhash(key); 1454 1455 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n", 1456 key, hashval)); 1457 1458 rw_enter(&rhash->rsmhash_rw, RW_READER); 1459 1460 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1461 1462 for (; p; p = p->rsmrc_next) { 1463 if (p->rsmrc_key == key) { 1464 /* acquire resource lock */ 1465 RSMRC_LOCK(p); 1466 break; 1467 } 1468 } 1469 1470 rw_exit(&rhash->rsmhash_rw); 1471 1472 if (p != NULL && p->rsmrc_state != state) { 1473 /* state changed, release lock and return null */ 1474 RSMRC_UNLOCK(p); 1475 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1476 "rsmhash_lookup done: state changed\n")); 1477 return (NULL); 1478 } 1479 1480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n")); 1481 1482 return (p); 1483 } 1484 1485 static void 1486 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm) 1487 { 1488 rsmresource_t *p, **back; 1489 uint_t hashval; 1490 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1491 1492 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n")); 1493 1494 hashval = rsmhash(rcelm->rsmrc_key); 1495 1496 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n", 1497 rcelm->rsmrc_key, hashval)); 1498 1499 /* 1500 * It's ok not to find the segment. 1501 */ 1502 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1503 1504 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1505 1506 for (; (p = *back) != NULL; back = &p->rsmrc_next) { 1507 if (p == rcelm) { 1508 *back = rcelm->rsmrc_next; 1509 break; 1510 } 1511 } 1512 1513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n")); 1514 1515 rw_exit(&rhash->rsmhash_rw); 1516 } 1517 1518 static int 1519 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key, 1520 int dup_check, rsm_resource_state_t state) 1521 { 1522 rsmresource_t *p = NULL, **bktp; 1523 uint_t hashval; 1524 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1525 1526 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n")); 1527 1528 /* lock table */ 1529 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1530 1531 /* 1532 * If the current resource state is other than the state passed in 1533 * then the resource is (probably) already on the list. eg. for an 1534 * import segment if the state is not RSM_STATE_NEW then it's on the 1535 * list already. 1536 */ 1537 RSMRC_LOCK(new); 1538 if (new->rsmrc_state != state) { 1539 RSMRC_UNLOCK(new); 1540 rw_exit(&rhash->rsmhash_rw); 1541 return (RSMERR_BAD_SEG_HNDL); 1542 } 1543 1544 hashval = rsmhash(key); 1545 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval)); 1546 1547 if (dup_check) { 1548 /* 1549 * Used for checking export segments; don't want to have 1550 * the same key used for multiple segments. 1551 */ 1552 1553 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1554 1555 for (; p; p = p->rsmrc_next) { 1556 if (p->rsmrc_key == key) { 1557 RSMRC_UNLOCK(new); 1558 break; 1559 } 1560 } 1561 } 1562 1563 if (p == NULL) { 1564 /* Key doesn't exist, add it */ 1565 1566 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1567 1568 new->rsmrc_key = key; 1569 new->rsmrc_next = *bktp; 1570 *bktp = new; 1571 } 1572 1573 rw_exit(&rhash->rsmhash_rw); 1574 1575 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n")); 1576 1577 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE); 1578 } 1579 1580 /* 1581 * XOR each byte of the key. 1582 */ 1583 static uint_t 1584 rsmhash(rsm_memseg_id_t key) 1585 { 1586 uint_t hash = key; 1587 1588 hash ^= (key >> 8); 1589 hash ^= (key >> 16); 1590 hash ^= (key >> 24); 1591 1592 return (hash % rsm_hash_size); 1593 1594 } 1595 1596 /* 1597 * generic function to get a specific bucket 1598 */ 1599 static void * 1600 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval) 1601 { 1602 1603 if (rhash->bucket == NULL) 1604 return (NULL); 1605 else 1606 return ((void *)rhash->bucket[hashval]); 1607 } 1608 1609 /* 1610 * generic function to get a specific bucket's address 1611 */ 1612 static void ** 1613 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval) 1614 { 1615 if (rhash->bucket == NULL) 1616 return (NULL); 1617 else 1618 return ((void **)&(rhash->bucket[hashval])); 1619 } 1620 1621 /* 1622 * generic function to alloc a hash table 1623 */ 1624 static void 1625 rsmhash_alloc(rsmhash_table_t *rhash, int size) 1626 { 1627 rhash->bucket = (rsmresource_t **) 1628 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP); 1629 } 1630 1631 /* 1632 * generic function to free a hash table 1633 */ 1634 static void 1635 rsmhash_free(rsmhash_table_t *rhash, int size) 1636 { 1637 1638 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t)); 1639 rhash->bucket = NULL; 1640 1641 } 1642 /* *********************** Exported Segment Key Management ************ */ 1643 1644 #define rsmexport_add(new, key) \ 1645 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \ 1646 RSM_STATE_BIND) 1647 1648 #define rsmexport_rm(arg) \ 1649 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg)) 1650 1651 #define rsmexport_lookup(key) \ 1652 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT) 1653 1654 /* ************************** Import Segment List Management ********** */ 1655 1656 /* 1657 * Add segment to import list. This will be useful for paging and loopback 1658 * segment unloading. 1659 */ 1660 #define rsmimport_add(arg, key) \ 1661 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \ 1662 RSM_STATE_NEW) 1663 1664 #define rsmimport_rm(arg) \ 1665 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg)) 1666 1667 /* 1668 * #define rsmimport_lookup(key) \ 1669 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT) 1670 */ 1671 1672 /* 1673 * increase the ref count and make the import segment point to the 1674 * shared data structure. Return a pointer to the share data struct 1675 * and the shared data struct is locked upon return 1676 */ 1677 static rsm_import_share_t * 1678 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter, 1679 rsmseg_t *segp) 1680 { 1681 uint_t hash; 1682 rsmresource_t *p; 1683 rsm_import_share_t *shdatap; 1684 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1685 1686 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n")); 1687 1688 hash = rsmhash(key); 1689 /* lock table */ 1690 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER); 1691 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n", 1692 key, hash)); 1693 1694 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash); 1695 1696 for (; p; p = p->rsmrc_next) { 1697 /* 1698 * Look for an entry that is importing the same exporter 1699 * with the share data structure allocated. 1700 */ 1701 if ((p->rsmrc_key == key) && 1702 (p->rsmrc_node == node) && 1703 (p->rsmrc_adapter == adapter) && 1704 (((rsmseg_t *)p)->s_share != NULL)) { 1705 shdatap = ((rsmseg_t *)p)->s_share; 1706 break; 1707 } 1708 } 1709 1710 if (p == NULL) { 1711 /* we are the first importer, create the shared data struct */ 1712 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP); 1713 shdatap->rsmsi_state = RSMSI_STATE_NEW; 1714 shdatap->rsmsi_segid = key; 1715 shdatap->rsmsi_node = node; 1716 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL); 1717 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0); 1718 } 1719 1720 rsmseglock_acquire(segp); 1721 1722 /* we grab the shared lock before returning from this function */ 1723 mutex_enter(&shdatap->rsmsi_lock); 1724 1725 shdatap->rsmsi_refcnt++; 1726 segp->s_share = shdatap; 1727 1728 rsmseglock_release(segp); 1729 1730 rw_exit(&rsm_import_segs.rsmhash_rw); 1731 1732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n")); 1733 1734 return (shdatap); 1735 } 1736 1737 /* 1738 * the shared data structure should be locked before calling 1739 * rsmsharecv_signal(). 1740 * Change the state and signal any waiting segments. 1741 */ 1742 void 1743 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate) 1744 { 1745 ASSERT(rsmsharelock_held(seg)); 1746 1747 if (seg->s_share->rsmsi_state == oldstate) { 1748 seg->s_share->rsmsi_state = newstate; 1749 cv_broadcast(&seg->s_share->rsmsi_cv); 1750 } 1751 } 1752 1753 /* 1754 * Add to the hash table 1755 */ 1756 static void 1757 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr, 1758 void *cookie) 1759 { 1760 1761 importing_token_t *head; 1762 importing_token_t *new_token; 1763 int index; 1764 1765 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1766 1767 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n")); 1768 1769 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP); 1770 new_token->importing_node = node; 1771 new_token->key = key; 1772 new_token->import_segment_cookie = cookie; 1773 new_token->importing_adapter_hwaddr = hwaddr; 1774 1775 index = rsmhash(key); 1776 1777 mutex_enter(&importer_list.lock); 1778 1779 head = importer_list.bucket[index]; 1780 importer_list.bucket[index] = new_token; 1781 new_token->next = head; 1782 mutex_exit(&importer_list.lock); 1783 1784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n")); 1785 } 1786 1787 static void 1788 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie) 1789 { 1790 1791 importing_token_t *prev, *token = NULL; 1792 int index; 1793 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1794 1795 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n")); 1796 1797 index = rsmhash(key); 1798 1799 mutex_enter(&importer_list.lock); 1800 1801 token = importer_list.bucket[index]; 1802 1803 prev = token; 1804 while (token != NULL) { 1805 if (token->importing_node == node && 1806 token->import_segment_cookie == cookie) { 1807 if (prev == token) 1808 importer_list.bucket[index] = token->next; 1809 else 1810 prev->next = token->next; 1811 kmem_free((void *)token, sizeof (*token)); 1812 break; 1813 } else { 1814 prev = token; 1815 token = token->next; 1816 } 1817 } 1818 1819 mutex_exit(&importer_list.lock); 1820 1821 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n")); 1822 1823 1824 } 1825 1826 /* **************************Segment Structure Management ************* */ 1827 1828 /* 1829 * Free segment structure 1830 */ 1831 static void 1832 rsmseg_free(rsmseg_t *seg) 1833 { 1834 1835 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1836 1837 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n")); 1838 1839 /* need to take seglock here to avoid race with rsmmap_unmap() */ 1840 rsmseglock_acquire(seg); 1841 if (seg->s_ckl != NULL) { 1842 /* Segment is still busy */ 1843 seg->s_state = RSM_STATE_END; 1844 rsmseglock_release(seg); 1845 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1846 "rsmseg_free done\n")); 1847 return; 1848 } 1849 1850 rsmseglock_release(seg); 1851 1852 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW); 1853 1854 /* 1855 * If it's an importer decrement the refcount 1856 * and if its down to zero free the shared data structure. 1857 * This is where failures during rsm_connect() are unrefcounted 1858 */ 1859 if (seg->s_share != NULL) { 1860 1861 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT); 1862 1863 rsmsharelock_acquire(seg); 1864 1865 ASSERT(seg->s_share->rsmsi_refcnt > 0); 1866 1867 seg->s_share->rsmsi_refcnt--; 1868 1869 if (seg->s_share->rsmsi_refcnt == 0) { 1870 rsmsharelock_release(seg); 1871 mutex_destroy(&seg->s_share->rsmsi_lock); 1872 cv_destroy(&seg->s_share->rsmsi_cv); 1873 kmem_free((void *)(seg->s_share), 1874 sizeof (rsm_import_share_t)); 1875 } else { 1876 rsmsharelock_release(seg); 1877 } 1878 /* 1879 * The following needs to be done after any 1880 * rsmsharelock calls which use seg->s_share. 1881 */ 1882 seg->s_share = NULL; 1883 } 1884 1885 cv_destroy(&seg->s_cv); 1886 mutex_destroy(&seg->s_lock); 1887 rsmacl_free(seg->s_acl, seg->s_acl_len); 1888 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len); 1889 if (seg->s_adapter) 1890 rsmka_release_adapter(seg->s_adapter); 1891 1892 kmem_free((void *)seg, sizeof (*seg)); 1893 1894 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n")); 1895 1896 } 1897 1898 1899 static rsmseg_t * 1900 rsmseg_alloc(minor_t num, struct cred *cred) 1901 { 1902 rsmseg_t *new; 1903 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1904 1905 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n")); 1906 /* 1907 * allocate memory for new segment. This should be a segkmem cache. 1908 */ 1909 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); 1910 1911 new->s_state = RSM_STATE_NEW; 1912 new->s_minor = num; 1913 new->s_acl_len = 0; 1914 new->s_cookie = NULL; 1915 new->s_adapter = NULL; 1916 1917 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask; 1918 /* we don't have a key yet, will set at export/connect */ 1919 new->s_uid = crgetuid(cred); 1920 new->s_gid = crgetgid(cred); 1921 1922 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL); 1923 cv_init(&new->s_cv, NULL, CV_DRIVER, 0); 1924 1925 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n")); 1926 1927 return (new); 1928 } 1929 1930 /* ******************************** Driver Open/Close/Poll *************** */ 1931 1932 /*ARGSUSED1*/ 1933 static int 1934 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred) 1935 { 1936 minor_t rnum; 1937 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 1938 1939 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n")); 1940 /* 1941 * Char only 1942 */ 1943 if (otyp != OTYP_CHR) { 1944 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n")); 1945 return (EINVAL); 1946 } 1947 1948 /* 1949 * Only zero can be opened, clones are used for resources. 1950 */ 1951 if (getminor(*devp) != RSM_DRIVER_MINOR) { 1952 DBG_PRINTF((category, RSM_ERR, 1953 "rsm_open: bad minor %d\n", getminor(*devp))); 1954 return (ENODEV); 1955 } 1956 1957 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) { 1958 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n")); 1959 return (EPERM); 1960 } 1961 1962 if (!(flag & FWRITE)) { 1963 /* 1964 * The library function _rsm_librsm_init calls open for 1965 * /dev/rsm with flag set to O_RDONLY. We want a valid 1966 * file descriptor to be returned for minor device zero. 1967 */ 1968 1969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1970 "rsm_open RDONLY done\n")); 1971 return (DDI_SUCCESS); 1972 } 1973 1974 /* 1975 * - allocate new minor number and segment. 1976 * - add segment to list of all segments. 1977 * - set minordev data to segment 1978 * - update devp argument to new device 1979 * - update s_cred to cred; make sure you do crhold(cred); 1980 */ 1981 1982 /* allocate a new resource number */ 1983 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) { 1984 /* 1985 * We will bind this minor to a specific resource in first 1986 * ioctl 1987 */ 1988 *devp = makedevice(getmajor(*devp), rnum); 1989 } else { 1990 return (EAGAIN); 1991 } 1992 1993 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n")); 1994 return (DDI_SUCCESS); 1995 } 1996 1997 static void 1998 rsmseg_close(rsmseg_t *seg, int force_flag) 1999 { 2000 int e = RSM_SUCCESS; 2001 2002 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2003 2004 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n")); 2005 2006 rsmseglock_acquire(seg); 2007 if (!force_flag && (seg->s_hdr.rsmrc_type == 2008 RSM_RESOURCE_EXPORT_SEGMENT)) { 2009 /* 2010 * If we are processing rsm_close wait for force_destroy 2011 * processing to complete since force_destroy processing 2012 * needs to finish first before we can free the segment. 2013 * force_destroy is only for export segments 2014 */ 2015 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) { 2016 cv_wait(&seg->s_cv, &seg->s_lock); 2017 } 2018 } 2019 rsmseglock_release(seg); 2020 2021 /* It's ok to read the state without a lock */ 2022 switch (seg->s_state) { 2023 case RSM_STATE_EXPORT: 2024 case RSM_STATE_EXPORT_QUIESCING: 2025 case RSM_STATE_EXPORT_QUIESCED: 2026 e = rsm_unpublish(seg, 1); 2027 /* FALLTHRU */ 2028 case RSM_STATE_BIND_QUIESCED: 2029 /* FALLTHRU */ 2030 case RSM_STATE_BIND: 2031 e = rsm_unbind(seg); 2032 if (e != RSM_SUCCESS && force_flag == 1) 2033 return; 2034 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT); 2035 /* FALLTHRU */ 2036 case RSM_STATE_NEW_QUIESCED: 2037 rsmseglock_acquire(seg); 2038 seg->s_state = RSM_STATE_NEW; 2039 cv_broadcast(&seg->s_cv); 2040 rsmseglock_release(seg); 2041 break; 2042 case RSM_STATE_NEW: 2043 break; 2044 case RSM_STATE_ZOMBIE: 2045 /* 2046 * Segments in this state have been removed off the 2047 * exported segments list and have been unpublished 2048 * and unbind. These segments have been removed during 2049 * a callback to the rsm_export_force_destroy, which 2050 * is called for the purpose of unlocking these 2051 * exported memory segments when a process exits but 2052 * leaves the segments locked down since rsm_close is 2053 * is not called for the segments. This can happen 2054 * when a process calls fork or exec and then exits. 2055 * Once the segments are in the ZOMBIE state, all that 2056 * remains is to destroy them when rsm_close is called. 2057 * This is done here. Thus, for such segments the 2058 * the state is changed to new so that later in this 2059 * function rsmseg_free is called. 2060 */ 2061 rsmseglock_acquire(seg); 2062 seg->s_state = RSM_STATE_NEW; 2063 rsmseglock_release(seg); 2064 break; 2065 case RSM_STATE_MAP_QUIESCE: 2066 case RSM_STATE_ACTIVE: 2067 /* Disconnect will handle the unmap */ 2068 case RSM_STATE_CONN_QUIESCE: 2069 case RSM_STATE_CONNECT: 2070 case RSM_STATE_DISCONNECT: 2071 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 2072 (void) rsm_disconnect(seg); 2073 break; 2074 case RSM_STATE_MAPPING: 2075 /*FALLTHRU*/ 2076 case RSM_STATE_END: 2077 DBG_PRINTF((category, RSM_ERR, 2078 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2079 break; 2080 default: 2081 DBG_PRINTF((category, RSM_ERR, 2082 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2083 break; 2084 } 2085 2086 /* 2087 * check state. 2088 * - make sure you do crfree(s_cred); 2089 * release segment and minor number 2090 */ 2091 ASSERT(seg->s_state == RSM_STATE_NEW); 2092 2093 /* 2094 * The export_force_destroy callback is created to unlock 2095 * the exported segments of a process 2096 * when the process does a fork or exec and then exits calls this 2097 * function with the force flag set to 1 which indicates that the 2098 * segment state must be converted to ZOMBIE. This state means that the 2099 * segments still exist and have been unlocked and most importantly the 2100 * only operation allowed is to destroy them on an rsm_close. 2101 */ 2102 if (force_flag) { 2103 rsmseglock_acquire(seg); 2104 seg->s_state = RSM_STATE_ZOMBIE; 2105 rsmseglock_release(seg); 2106 } else { 2107 rsmseg_free(seg); 2108 } 2109 2110 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n")); 2111 } 2112 2113 static int 2114 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred) 2115 { 2116 minor_t rnum = getminor(dev); 2117 rsmresource_t *res; 2118 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2119 2120 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n")); 2121 2122 flag = flag; cred = cred; 2123 2124 if (otyp != OTYP_CHR) 2125 return (EINVAL); 2126 2127 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum)); 2128 2129 /* 2130 * At this point we are the last reference to the resource. 2131 * Free resource number from resource table. 2132 * It's ok to remove number before we free the segment. 2133 * We need to lock the resource to protect against remote calls. 2134 */ 2135 if (rnum == RSM_DRIVER_MINOR || 2136 (res = rsmresource_free(rnum)) == NULL) { 2137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2138 return (DDI_SUCCESS); 2139 } 2140 2141 switch (res->rsmrc_type) { 2142 case RSM_RESOURCE_EXPORT_SEGMENT: 2143 case RSM_RESOURCE_IMPORT_SEGMENT: 2144 rsmseg_close((rsmseg_t *)res, 0); 2145 break; 2146 case RSM_RESOURCE_BAR: 2147 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n")); 2148 break; 2149 default: 2150 break; 2151 } 2152 2153 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2154 2155 return (DDI_SUCCESS); 2156 } 2157 2158 /* 2159 * rsm_inc_pgcnt 2160 * 2161 * Description: increment rsm page counter. 2162 * 2163 * Parameters: pgcnt_t pnum; number of pages to be used 2164 * 2165 * Returns: RSM_SUCCESS if memory limit not exceeded 2166 * ENOSPC if memory limit exceeded. In this case, the 2167 * page counter remains unchanged. 2168 * 2169 */ 2170 static int 2171 rsm_inc_pgcnt(pgcnt_t pnum) 2172 { 2173 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2174 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2175 return (RSM_SUCCESS); 2176 } 2177 2178 mutex_enter(&rsm_pgcnt_lock); 2179 2180 if (rsm_pgcnt + pnum > rsm_pgcnt_max) { 2181 /* ensure that limits have not been exceeded */ 2182 mutex_exit(&rsm_pgcnt_lock); 2183 return (RSMERR_INSUFFICIENT_MEM); 2184 } 2185 2186 rsm_pgcnt += pnum; 2187 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n", 2188 rsm_pgcnt)); 2189 mutex_exit(&rsm_pgcnt_lock); 2190 2191 return (RSM_SUCCESS); 2192 } 2193 2194 /* 2195 * rsm_dec_pgcnt 2196 * 2197 * Description: decrement rsm page counter. 2198 * 2199 * Parameters: pgcnt_t pnum; number of pages freed 2200 * 2201 */ 2202 static void 2203 rsm_dec_pgcnt(pgcnt_t pnum) 2204 { 2205 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2206 2207 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2208 return; 2209 } 2210 2211 mutex_enter(&rsm_pgcnt_lock); 2212 ASSERT(rsm_pgcnt >= pnum); 2213 rsm_pgcnt -= pnum; 2214 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n", 2215 rsm_pgcnt)); 2216 mutex_exit(&rsm_pgcnt_lock); 2217 } 2218 2219 static struct umem_callback_ops rsm_as_ops = { 2220 UMEM_CALLBACK_VERSION, /* version number */ 2221 rsm_export_force_destroy, 2222 }; 2223 2224 static int 2225 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len, 2226 proc_t *procp) 2227 { 2228 int error = RSM_SUCCESS; 2229 ulong_t pnum; 2230 struct umem_callback_ops *callbackops = &rsm_as_ops; 2231 2232 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2233 2234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n")); 2235 2236 /* 2237 * Make sure vaddr and len are aligned on a page boundary 2238 */ 2239 if ((uintptr_t)vaddr & (PAGESIZE - 1)) { 2240 return (RSMERR_BAD_ADDR); 2241 } 2242 2243 if (len & (PAGESIZE - 1)) { 2244 return (RSMERR_BAD_LENGTH); 2245 } 2246 2247 /* 2248 * Find number of pages 2249 */ 2250 pnum = btopr(len); 2251 error = rsm_inc_pgcnt(pnum); 2252 if (error != RSM_SUCCESS) { 2253 DBG_PRINTF((category, RSM_ERR, 2254 "rsm_bind_pages:mem limit exceeded\n")); 2255 return (RSMERR_INSUFFICIENT_MEM); 2256 } 2257 2258 error = umem_lockmemory(vaddr, len, 2259 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM, 2260 cookie, 2261 callbackops, procp); 2262 2263 if (error) { 2264 rsm_dec_pgcnt(pnum); 2265 DBG_PRINTF((category, RSM_ERR, 2266 "rsm_bind_pages:ddi_umem_lock failed\n")); 2267 /* 2268 * ddi_umem_lock, in the case of failure, returns one of 2269 * the following three errors. These are translated into 2270 * the RSMERR namespace and returned. 2271 */ 2272 if (error == EFAULT) 2273 return (RSMERR_BAD_ADDR); 2274 else if (error == EACCES) 2275 return (RSMERR_PERM_DENIED); 2276 else 2277 return (RSMERR_INSUFFICIENT_MEM); 2278 } 2279 2280 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n")); 2281 2282 return (error); 2283 2284 } 2285 2286 static int 2287 rsm_unbind_pages(rsmseg_t *seg) 2288 { 2289 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2290 2291 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n")); 2292 2293 ASSERT(rsmseglock_held(seg)); 2294 2295 if (seg->s_cookie != NULL) { 2296 /* unlock address range */ 2297 ddi_umem_unlock(seg->s_cookie); 2298 rsm_dec_pgcnt(btopr(seg->s_len)); 2299 seg->s_cookie = NULL; 2300 } 2301 2302 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n")); 2303 2304 return (RSM_SUCCESS); 2305 } 2306 2307 2308 static int 2309 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2310 { 2311 int e; 2312 adapter_t *adapter; 2313 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2314 2315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n")); 2316 2317 adapter = rsm_getadapter(msg, mode); 2318 if (adapter == NULL) { 2319 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2320 "rsm_bind done:no adapter\n")); 2321 return (RSMERR_CTLR_NOT_PRESENT); 2322 } 2323 2324 /* lock address range */ 2325 if (msg->vaddr == NULL) { 2326 rsmka_release_adapter(adapter); 2327 DBG_PRINTF((category, RSM_ERR, 2328 "rsm: rsm_bind done: invalid vaddr\n")); 2329 return (RSMERR_BAD_ADDR); 2330 } 2331 if (msg->len <= 0) { 2332 rsmka_release_adapter(adapter); 2333 DBG_PRINTF((category, RSM_ERR, 2334 "rsm_bind: invalid length\n")); 2335 return (RSMERR_BAD_LENGTH); 2336 } 2337 2338 /* Lock segment */ 2339 rsmseglock_acquire(seg); 2340 2341 while (seg->s_state == RSM_STATE_NEW_QUIESCED) { 2342 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2343 DBG_PRINTF((category, RSM_DEBUG, 2344 "rsm_bind done: cv_wait INTERRUPTED")); 2345 rsmka_release_adapter(adapter); 2346 rsmseglock_release(seg); 2347 return (RSMERR_INTERRUPTED); 2348 } 2349 } 2350 2351 ASSERT(seg->s_state == RSM_STATE_NEW); 2352 2353 ASSERT(seg->s_cookie == NULL); 2354 2355 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc); 2356 if (e == RSM_SUCCESS) { 2357 seg->s_flags |= RSM_USER_MEMORY; 2358 if (msg->perm & RSM_ALLOW_REBIND) { 2359 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND; 2360 } 2361 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) { 2362 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT; 2363 } 2364 seg->s_region.r_vaddr = msg->vaddr; 2365 /* 2366 * Set the s_pid value in the segment structure. This is used 2367 * to identify exported segments belonging to a particular 2368 * process so that when the process exits, these segments can 2369 * be unlocked forcefully even if rsm_close is not called on 2370 * process exit since there maybe other processes referencing 2371 * them (for example on a fork or exec). 2372 * The s_pid value is also used to authenticate the process 2373 * doing a publish or unpublish on the export segment. Only 2374 * the creator of the export segment has a right to do a 2375 * publish or unpublish and unbind on the segment. 2376 */ 2377 seg->s_pid = ddi_get_pid(); 2378 seg->s_len = msg->len; 2379 seg->s_state = RSM_STATE_BIND; 2380 seg->s_adapter = adapter; 2381 seg->s_proc = curproc; 2382 } else { 2383 rsmka_release_adapter(adapter); 2384 DBG_PRINTF((category, RSM_WARNING, 2385 "unable to lock down pages\n")); 2386 } 2387 2388 msg->rnum = seg->s_minor; 2389 /* Unlock segment */ 2390 rsmseglock_release(seg); 2391 2392 if (e == RSM_SUCCESS) { 2393 /* copyout the resource number */ 2394 #ifdef _MULTI_DATAMODEL 2395 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2396 rsm_ioctlmsg32_t msg32; 2397 2398 msg32.rnum = msg->rnum; 2399 if (ddi_copyout((caddr_t)&msg32.rnum, 2400 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum, 2401 sizeof (minor_t), mode)) { 2402 rsmka_release_adapter(adapter); 2403 e = RSMERR_BAD_ADDR; 2404 } 2405 } 2406 #endif 2407 if (ddi_copyout((caddr_t)&msg->rnum, 2408 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum, 2409 sizeof (minor_t), mode)) { 2410 rsmka_release_adapter(adapter); 2411 e = RSMERR_BAD_ADDR; 2412 } 2413 } 2414 2415 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n")); 2416 2417 return (e); 2418 } 2419 2420 static void 2421 rsm_remap_local_importers(rsm_node_id_t src_nodeid, 2422 rsm_memseg_id_t ex_segid, 2423 ddi_umem_cookie_t cookie) 2424 2425 { 2426 rsmresource_t *p = NULL; 2427 rsmhash_table_t *rhash = &rsm_import_segs; 2428 uint_t index; 2429 2430 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2431 "rsm_remap_local_importers enter\n")); 2432 2433 index = rsmhash(ex_segid); 2434 2435 rw_enter(&rhash->rsmhash_rw, RW_READER); 2436 2437 p = rsmhash_getbkt(rhash, index); 2438 2439 for (; p; p = p->rsmrc_next) { 2440 rsmseg_t *seg = (rsmseg_t *)p; 2441 rsmseglock_acquire(seg); 2442 /* 2443 * Change the s_cookie value of only the local importers 2444 * which have been mapped (in state RSM_STATE_ACTIVE). 2445 * Note that there is no need to change the s_cookie value 2446 * if the imported segment is in RSM_STATE_MAPPING since 2447 * eventually the s_cookie will be updated via the mapping 2448 * functionality. 2449 */ 2450 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) && 2451 (seg->s_state == RSM_STATE_ACTIVE)) { 2452 seg->s_cookie = cookie; 2453 } 2454 rsmseglock_release(seg); 2455 } 2456 rw_exit(&rhash->rsmhash_rw); 2457 2458 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2459 "rsm_remap_local_importers done\n")); 2460 } 2461 2462 static int 2463 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg) 2464 { 2465 int e; 2466 adapter_t *adapter; 2467 ddi_umem_cookie_t cookie; 2468 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2469 2470 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n")); 2471 2472 /* Check for permissions to rebind */ 2473 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) { 2474 return (RSMERR_REBIND_NOT_ALLOWED); 2475 } 2476 2477 if (seg->s_pid != ddi_get_pid() && 2478 ddi_get_pid() != 0) { 2479 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n")); 2480 return (RSMERR_NOT_CREATOR); 2481 } 2482 2483 /* 2484 * We will not be allowing partial rebind and hence length passed 2485 * in must be same as segment length 2486 */ 2487 if (msg->vaddr == NULL) { 2488 DBG_PRINTF((category, RSM_ERR, 2489 "rsm_rebind done: null msg->vaddr\n")); 2490 return (RSMERR_BAD_ADDR); 2491 } 2492 if (msg->len != seg->s_len) { 2493 DBG_PRINTF((category, RSM_ERR, 2494 "rsm_rebind: invalid length\n")); 2495 return (RSMERR_BAD_LENGTH); 2496 } 2497 2498 /* Lock segment */ 2499 rsmseglock_acquire(seg); 2500 2501 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) || 2502 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 2503 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) { 2504 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2505 rsmseglock_release(seg); 2506 DBG_PRINTF((category, RSM_DEBUG, 2507 "rsm_rebind done: cv_wait INTERRUPTED")); 2508 return (RSMERR_INTERRUPTED); 2509 } 2510 } 2511 2512 /* verify segment state */ 2513 if ((seg->s_state != RSM_STATE_BIND) && 2514 (seg->s_state != RSM_STATE_EXPORT)) { 2515 /* Unlock segment */ 2516 rsmseglock_release(seg); 2517 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2518 "rsm_rebind done: invalid state\n")); 2519 return (RSMERR_BAD_SEG_HNDL); 2520 } 2521 2522 ASSERT(seg->s_cookie != NULL); 2523 2524 if (msg->vaddr == seg->s_region.r_vaddr) { 2525 rsmseglock_release(seg); 2526 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2527 return (RSM_SUCCESS); 2528 } 2529 2530 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc); 2531 if (e == RSM_SUCCESS) { 2532 struct buf *xbuf; 2533 dev_t sdev = 0; 2534 rsm_memory_local_t mem; 2535 2536 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE, 2537 sdev, 0, NULL, DDI_UMEM_SLEEP); 2538 ASSERT(xbuf != NULL); 2539 2540 mem.ms_type = RSM_MEM_BUF; 2541 mem.ms_bp = xbuf; 2542 2543 adapter = seg->s_adapter; 2544 e = adapter->rsmpi_ops->rsm_rebind( 2545 seg->s_handle.out, 0, &mem, 2546 RSM_RESOURCE_DONTWAIT, NULL); 2547 2548 if (e == RSM_SUCCESS) { 2549 /* 2550 * unbind the older pages, and unload local importers; 2551 * but don't disconnect importers 2552 */ 2553 (void) rsm_unbind_pages(seg); 2554 seg->s_cookie = cookie; 2555 seg->s_region.r_vaddr = msg->vaddr; 2556 rsm_remap_local_importers(my_nodeid, seg->s_segid, 2557 cookie); 2558 } else { 2559 /* 2560 * Unbind the pages associated with "cookie" by the 2561 * rsm_bind_pages calls prior to this. This is 2562 * similar to what is done in the rsm_unbind_pages 2563 * routine for the seg->s_cookie. 2564 */ 2565 ddi_umem_unlock(cookie); 2566 rsm_dec_pgcnt(btopr(msg->len)); 2567 DBG_PRINTF((category, RSM_ERR, 2568 "rsm_rebind failed with %d\n", e)); 2569 } 2570 /* 2571 * At present there is no dependency on the existence of xbuf. 2572 * So we can free it here. If in the future this changes, it can 2573 * be freed sometime during the segment destroy. 2574 */ 2575 freerbuf(xbuf); 2576 } 2577 2578 /* Unlock segment */ 2579 rsmseglock_release(seg); 2580 2581 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2582 2583 return (e); 2584 } 2585 2586 static int 2587 rsm_unbind(rsmseg_t *seg) 2588 { 2589 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2590 2591 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n")); 2592 2593 rsmseglock_acquire(seg); 2594 2595 /* verify segment state */ 2596 if ((seg->s_state != RSM_STATE_BIND) && 2597 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2598 rsmseglock_release(seg); 2599 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2600 "rsm_unbind: invalid state\n")); 2601 return (RSMERR_BAD_SEG_HNDL); 2602 } 2603 2604 /* unlock current range */ 2605 (void) rsm_unbind_pages(seg); 2606 2607 if (seg->s_state == RSM_STATE_BIND) { 2608 seg->s_state = RSM_STATE_NEW; 2609 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 2610 seg->s_state = RSM_STATE_NEW_QUIESCED; 2611 } 2612 2613 rsmseglock_release(seg); 2614 2615 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n")); 2616 2617 return (RSM_SUCCESS); 2618 } 2619 2620 /* **************************** Exporter Access List Management ******* */ 2621 static void 2622 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len) 2623 { 2624 int acl_sz; 2625 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2626 2627 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n")); 2628 2629 /* acl could be NULL */ 2630 2631 if (acl != NULL && acl_len > 0) { 2632 acl_sz = acl_len * sizeof (rsmapi_access_entry_t); 2633 kmem_free((void *)acl, acl_sz); 2634 } 2635 2636 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n")); 2637 } 2638 2639 static void 2640 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len) 2641 { 2642 int acl_sz; 2643 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2644 2645 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n")); 2646 2647 if (acl != NULL && acl_len > 0) { 2648 acl_sz = acl_len * sizeof (rsm_access_entry_t); 2649 kmem_free((void *)acl, acl_sz); 2650 } 2651 2652 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n")); 2653 2654 } 2655 2656 static int 2657 rsmacl_build(rsm_ioctlmsg_t *msg, int mode, 2658 rsmapi_access_entry_t **list, int *len, int loopback) 2659 { 2660 rsmapi_access_entry_t *acl; 2661 int acl_len; 2662 int i; 2663 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2664 2665 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n")); 2666 2667 *len = 0; 2668 *list = NULL; 2669 2670 acl_len = msg->acl_len; 2671 if ((loopback && acl_len > 1) || (acl_len < 0) || 2672 (acl_len > MAX_NODES)) { 2673 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2674 "rsmacl_build done: acl invalid\n")); 2675 return (RSMERR_BAD_ACL); 2676 } 2677 2678 if (acl_len > 0 && acl_len <= MAX_NODES) { 2679 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t); 2680 2681 acl = kmem_alloc(acl_size, KM_SLEEP); 2682 2683 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl, 2684 acl_size, mode)) { 2685 kmem_free((void *) acl, acl_size); 2686 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2687 "rsmacl_build done: BAD_ADDR\n")); 2688 return (RSMERR_BAD_ADDR); 2689 } 2690 2691 /* 2692 * Verify access list 2693 */ 2694 for (i = 0; i < acl_len; i++) { 2695 if (acl[i].ae_node > MAX_NODES || 2696 (loopback && (acl[i].ae_node != my_nodeid)) || 2697 acl[i].ae_permission > RSM_ACCESS_TRUSTED) { 2698 /* invalid entry */ 2699 kmem_free((void *) acl, acl_size); 2700 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2701 "rsmacl_build done: EINVAL\n")); 2702 return (RSMERR_BAD_ACL); 2703 } 2704 } 2705 2706 *len = acl_len; 2707 *list = acl; 2708 } 2709 2710 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n")); 2711 2712 return (DDI_SUCCESS); 2713 } 2714 2715 static int 2716 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest, 2717 int acl_len, adapter_t *adapter) 2718 { 2719 rsm_access_entry_t *acl; 2720 rsm_addr_t hwaddr; 2721 int i; 2722 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2723 2724 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n")); 2725 2726 if (src != NULL) { 2727 size_t acl_size = acl_len * sizeof (rsm_access_entry_t); 2728 acl = kmem_alloc(acl_size, KM_SLEEP); 2729 2730 /* 2731 * translate access list 2732 */ 2733 for (i = 0; i < acl_len; i++) { 2734 if (src[i].ae_node == my_nodeid) { 2735 acl[i].ae_addr = adapter->hwaddr; 2736 } else { 2737 hwaddr = get_remote_hwaddr(adapter, 2738 src[i].ae_node); 2739 if ((int64_t)hwaddr < 0) { 2740 /* invalid hwaddr */ 2741 kmem_free((void *) acl, acl_size); 2742 DBG_PRINTF((category, 2743 RSM_DEBUG_VERBOSE, 2744 "rsmpiacl_create done:" 2745 "EINVAL hwaddr\n")); 2746 return (RSMERR_INTERNAL_ERROR); 2747 } 2748 acl[i].ae_addr = hwaddr; 2749 } 2750 /* rsmpi understands only RSM_PERM_XXXX */ 2751 acl[i].ae_permission = 2752 src[i].ae_permission & RSM_PERM_RDWR; 2753 } 2754 *dest = acl; 2755 } else { 2756 *dest = NULL; 2757 } 2758 2759 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n")); 2760 2761 return (RSM_SUCCESS); 2762 } 2763 2764 static int 2765 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode, 2766 rsmipc_reply_t *reply) 2767 { 2768 2769 int i; 2770 rsmseg_t *seg; 2771 rsm_memseg_id_t key = req->rsmipc_key; 2772 rsm_permission_t perm = req->rsmipc_perm; 2773 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2774 2775 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2776 "rsmsegacl_validate enter\n")); 2777 2778 /* 2779 * Find segment and grab its lock. The reason why we grab the segment 2780 * lock in side the search is to avoid the race when the segment is 2781 * being deleted and we already have a pointer to it. 2782 */ 2783 seg = rsmexport_lookup(key); 2784 if (!seg) { 2785 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2786 "rsmsegacl_validate done: %u ENXIO\n", key)); 2787 return (RSMERR_SEG_NOT_PUBLISHED); 2788 } 2789 2790 ASSERT(rsmseglock_held(seg)); 2791 ASSERT(seg->s_state == RSM_STATE_EXPORT); 2792 2793 /* 2794 * We implement a 2-level protection scheme. 2795 * First, we check if local/remote host has access rights. 2796 * Second, we check if the user has access rights. 2797 * 2798 * This routine only validates the rnode access_list 2799 */ 2800 if (seg->s_acl_len > 0) { 2801 /* 2802 * Check host access list 2803 */ 2804 ASSERT(seg->s_acl != NULL); 2805 for (i = 0; i < seg->s_acl_len; i++) { 2806 if (seg->s_acl[i].ae_node == rnode) { 2807 perm &= seg->s_acl[i].ae_permission; 2808 goto found; 2809 } 2810 } 2811 /* rnode is not found in the list */ 2812 rsmseglock_release(seg); 2813 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2814 "rsmsegacl_validate done: EPERM\n")); 2815 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 2816 } else { 2817 /* use default owner creation umask */ 2818 perm &= seg->s_mode; 2819 } 2820 2821 found: 2822 /* update perm for this node */ 2823 reply->rsmipc_mode = perm; 2824 reply->rsmipc_uid = seg->s_uid; 2825 reply->rsmipc_gid = seg->s_gid; 2826 reply->rsmipc_segid = seg->s_segid; 2827 reply->rsmipc_seglen = seg->s_len; 2828 2829 /* 2830 * Perm of requesting node is valid; source will validate user 2831 */ 2832 rsmseglock_release(seg); 2833 2834 /* 2835 * Add the importer to the list right away, if connect fails 2836 * the importer will ask the exporter to remove it. 2837 */ 2838 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr, 2839 req->rsmipc_segment_cookie); 2840 2841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n")); 2842 2843 return (RSM_SUCCESS); 2844 } 2845 2846 2847 /* ************************** Exporter Calls ************************* */ 2848 2849 static int 2850 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2851 { 2852 int e; 2853 int acl_len; 2854 rsmapi_access_entry_t *acl; 2855 rsm_access_entry_t *rsmpi_acl; 2856 rsm_memory_local_t mem; 2857 struct buf *xbuf; 2858 dev_t sdev = 0; 2859 adapter_t *adapter; 2860 rsm_memseg_id_t segment_id = 0; 2861 int loopback_flag = 0; 2862 int create_flags = 0; 2863 rsm_resource_callback_t callback_flag; 2864 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2865 2866 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n")); 2867 2868 if (seg->s_adapter == &loopback_adapter) 2869 loopback_flag = 1; 2870 2871 if (seg->s_pid != ddi_get_pid() && 2872 ddi_get_pid() != 0) { 2873 DBG_PRINTF((category, RSM_ERR, 2874 "rsm_publish: Not creator\n")); 2875 return (RSMERR_NOT_CREATOR); 2876 } 2877 2878 /* 2879 * Get per node access list 2880 */ 2881 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag); 2882 if (e != DDI_SUCCESS) { 2883 DBG_PRINTF((category, RSM_ERR, 2884 "rsm_publish done: rsmacl_build failed\n")); 2885 return (e); 2886 } 2887 2888 /* 2889 * The application provided msg->key is used for resolving a 2890 * segment id according to the following: 2891 * key = 0 Kernel Agent selects the segment id 2892 * key <= RSM_DLPI_ID_END Reserved for system usage except 2893 * RSMLIB range 2894 * key < RSM_USER_APP_ID_BASE segment id = key 2895 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections 2896 * 2897 * rsm_nextavail_segmentid is initialized to 0x80000000 and 2898 * overflows to zero after 0x80000000 allocations. 2899 * An algorithm is needed which allows reinitialization and provides 2900 * for reallocation after overflow. For now, ENOMEM is returned 2901 * once the overflow condition has occurred. 2902 */ 2903 if (msg->key == 0) { 2904 mutex_enter(&rsm_lock); 2905 segment_id = rsm_nextavail_segmentid; 2906 if (segment_id != 0) { 2907 rsm_nextavail_segmentid++; 2908 mutex_exit(&rsm_lock); 2909 } else { 2910 mutex_exit(&rsm_lock); 2911 DBG_PRINTF((category, RSM_ERR, 2912 "rsm_publish done: no more keys avlbl\n")); 2913 return (RSMERR_INSUFFICIENT_RESOURCES); 2914 } 2915 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END) 2916 /* range reserved for internal use by base/ndi libraries */ 2917 segment_id = msg->key; 2918 else if (msg->key <= RSM_DLPI_ID_END) 2919 return (RSMERR_RESERVED_SEGID); 2920 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1) 2921 segment_id = msg->key; 2922 else { 2923 DBG_PRINTF((category, RSM_ERR, 2924 "rsm_publish done: invalid key %u\n", msg->key)); 2925 return (RSMERR_RESERVED_SEGID); 2926 } 2927 2928 /* Add key to exportlist; The segment lock is held on success */ 2929 e = rsmexport_add(seg, segment_id); 2930 if (e) { 2931 rsmacl_free(acl, acl_len); 2932 DBG_PRINTF((category, RSM_ERR, 2933 "rsm_publish done: export_add failed: %d\n", e)); 2934 return (e); 2935 } 2936 2937 seg->s_segid = segment_id; 2938 2939 if ((seg->s_state != RSM_STATE_BIND) && 2940 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2941 /* state changed since then, free acl and return */ 2942 rsmseglock_release(seg); 2943 rsmexport_rm(seg); 2944 rsmacl_free(acl, acl_len); 2945 DBG_PRINTF((category, RSM_ERR, 2946 "rsm_publish done: segment in wrong state: %d\n", 2947 seg->s_state)); 2948 return (RSMERR_BAD_SEG_HNDL); 2949 } 2950 2951 /* 2952 * If this is for a local memory handle and permissions are zero, 2953 * then the surrogate segment is very large and we want to skip 2954 * allocation of DVMA space. 2955 * 2956 * Careful! If the user didn't use an ACL list, acl will be a NULL 2957 * pointer. Check that before dereferencing it. 2958 */ 2959 if (acl != (rsmapi_access_entry_t *)NULL) { 2960 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 2961 goto skipdriver; 2962 } 2963 2964 /* create segment */ 2965 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE, 2966 sdev, 0, NULL, DDI_UMEM_SLEEP); 2967 ASSERT(xbuf != NULL); 2968 2969 mem.ms_type = RSM_MEM_BUF; 2970 mem.ms_bp = xbuf; 2971 2972 /* This call includes a bind operations */ 2973 2974 adapter = seg->s_adapter; 2975 /* 2976 * create a acl list with hwaddr for RSMPI publish 2977 */ 2978 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter); 2979 2980 if (e != RSM_SUCCESS) { 2981 rsmseglock_release(seg); 2982 rsmexport_rm(seg); 2983 rsmacl_free(acl, acl_len); 2984 freerbuf(xbuf); 2985 DBG_PRINTF((category, RSM_ERR, 2986 "rsm_publish done: rsmpiacl_create failed: %d\n", e)); 2987 return (e); 2988 } 2989 2990 if (seg->s_state == RSM_STATE_BIND) { 2991 /* create segment */ 2992 2993 /* This call includes a bind operations */ 2994 2995 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 2996 create_flags = RSM_ALLOW_UNBIND_REBIND; 2997 } 2998 2999 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 3000 callback_flag = RSM_RESOURCE_DONTWAIT; 3001 } else { 3002 callback_flag = RSM_RESOURCE_SLEEP; 3003 } 3004 3005 e = adapter->rsmpi_ops->rsm_seg_create( 3006 adapter->rsmpi_handle, 3007 &seg->s_handle.out, seg->s_len, 3008 create_flags, &mem, 3009 callback_flag, NULL); 3010 /* 3011 * At present there is no dependency on the existence of xbuf. 3012 * So we can free it here. If in the future this changes, it can 3013 * be freed sometime during the segment destroy. 3014 */ 3015 freerbuf(xbuf); 3016 3017 if (e != RSM_SUCCESS) { 3018 rsmseglock_release(seg); 3019 rsmexport_rm(seg); 3020 rsmacl_free(acl, acl_len); 3021 rsmpiacl_free(rsmpi_acl, acl_len); 3022 DBG_PRINTF((category, RSM_ERR, 3023 "rsm_publish done: export_create failed: %d\n", e)); 3024 /* 3025 * The following assertion ensures that the two errors 3026 * related to the length and its alignment do not occur 3027 * since they have been checked during export_create 3028 */ 3029 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT && 3030 e != RSMERR_BAD_LENGTH); 3031 if (e == RSMERR_NOT_MEM) 3032 e = RSMERR_INSUFFICIENT_MEM; 3033 3034 return (e); 3035 } 3036 /* export segment, this should create an IMMU mapping */ 3037 e = adapter->rsmpi_ops->rsm_publish( 3038 seg->s_handle.out, 3039 rsmpi_acl, acl_len, 3040 seg->s_segid, 3041 RSM_RESOURCE_DONTWAIT, NULL); 3042 3043 if (e != RSM_SUCCESS) { 3044 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3045 rsmseglock_release(seg); 3046 rsmexport_rm(seg); 3047 rsmacl_free(acl, acl_len); 3048 rsmpiacl_free(rsmpi_acl, acl_len); 3049 DBG_PRINTF((category, RSM_ERR, 3050 "rsm_publish done: export_publish failed: %d\n", 3051 e)); 3052 return (e); 3053 } 3054 } 3055 3056 seg->s_acl_in = rsmpi_acl; 3057 3058 skipdriver: 3059 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */ 3060 seg->s_acl_len = acl_len; 3061 seg->s_acl = acl; 3062 3063 if (seg->s_state == RSM_STATE_BIND) { 3064 seg->s_state = RSM_STATE_EXPORT; 3065 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 3066 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 3067 cv_broadcast(&seg->s_cv); 3068 } 3069 3070 rsmseglock_release(seg); 3071 3072 /* 3073 * If the segment id was solicited, then return it in 3074 * the original incoming message. 3075 */ 3076 if (msg->key == 0) { 3077 msg->key = segment_id; 3078 #ifdef _MULTI_DATAMODEL 3079 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 3080 rsm_ioctlmsg32_t msg32; 3081 3082 msg32.key = msg->key; 3083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3084 "rsm_publish done\n")); 3085 return (ddi_copyout((caddr_t)&msg32, 3086 (caddr_t)dataptr, sizeof (msg32), mode)); 3087 } 3088 #endif 3089 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3090 "rsm_publish done\n")); 3091 return (ddi_copyout((caddr_t)msg, 3092 (caddr_t)dataptr, sizeof (*msg), mode)); 3093 } 3094 3095 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n")); 3096 return (DDI_SUCCESS); 3097 } 3098 3099 /* 3100 * This function modifies the access control list of an already published 3101 * segment. There is no effect on import segments which are already 3102 * connected. 3103 */ 3104 static int 3105 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode) 3106 { 3107 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl; 3108 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl; 3109 int new_acl_len, old_acl_len, tmp_acl_len; 3110 int e, i; 3111 adapter_t *adapter; 3112 int loopback_flag = 0; 3113 rsm_memseg_id_t key; 3114 rsm_permission_t permission; 3115 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3116 3117 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n")); 3118 3119 if ((seg->s_state != RSM_STATE_EXPORT) && 3120 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) && 3121 (seg->s_state != RSM_STATE_EXPORT_QUIESCING)) 3122 return (RSMERR_SEG_NOT_PUBLISHED); 3123 3124 if (seg->s_pid != ddi_get_pid() && 3125 ddi_get_pid() != 0) { 3126 DBG_PRINTF((category, RSM_ERR, 3127 "rsm_republish: Not owner\n")); 3128 return (RSMERR_NOT_CREATOR); 3129 } 3130 3131 if (seg->s_adapter == &loopback_adapter) 3132 loopback_flag = 1; 3133 3134 /* 3135 * Build new list first 3136 */ 3137 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag); 3138 if (e) { 3139 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3140 "rsm_republish done: rsmacl_build failed %d", e)); 3141 return (e); 3142 } 3143 3144 /* Lock segment */ 3145 rsmseglock_acquire(seg); 3146 /* 3147 * a republish is in progress - REPUBLISH message is being 3148 * sent to the importers so wait for it to complete OR 3149 * wait till DR completes 3150 */ 3151 while (((seg->s_state == RSM_STATE_EXPORT) && 3152 (seg->s_flags & RSM_REPUBLISH_WAIT)) || 3153 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) || 3154 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) { 3155 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3156 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3157 "rsm_republish done: cv_wait INTERRUPTED")); 3158 rsmseglock_release(seg); 3159 rsmacl_free(new_acl, new_acl_len); 3160 return (RSMERR_INTERRUPTED); 3161 } 3162 } 3163 3164 /* recheck if state is valid */ 3165 if (seg->s_state != RSM_STATE_EXPORT) { 3166 rsmseglock_release(seg); 3167 rsmacl_free(new_acl, new_acl_len); 3168 return (RSMERR_SEG_NOT_PUBLISHED); 3169 } 3170 3171 key = seg->s_key; 3172 old_acl = seg->s_acl; 3173 old_acl_len = seg->s_acl_len; 3174 3175 seg->s_acl = new_acl; 3176 seg->s_acl_len = new_acl_len; 3177 3178 /* 3179 * This call will only be meaningful if and when the interconnect 3180 * layer makes use of the access list 3181 */ 3182 adapter = seg->s_adapter; 3183 /* 3184 * create a acl list with hwaddr for RSMPI publish 3185 */ 3186 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter); 3187 3188 if (e != RSM_SUCCESS) { 3189 seg->s_acl = old_acl; 3190 seg->s_acl_len = old_acl_len; 3191 rsmseglock_release(seg); 3192 rsmacl_free(new_acl, new_acl_len); 3193 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3194 "rsm_republish done: rsmpiacl_create failed %d", e)); 3195 return (e); 3196 } 3197 rsmpi_old_acl = seg->s_acl_in; 3198 seg->s_acl_in = rsmpi_new_acl; 3199 3200 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out, 3201 seg->s_acl_in, seg->s_acl_len, 3202 RSM_RESOURCE_DONTWAIT, NULL); 3203 3204 if (e != RSM_SUCCESS) { 3205 seg->s_acl = old_acl; 3206 seg->s_acl_in = rsmpi_old_acl; 3207 seg->s_acl_len = old_acl_len; 3208 rsmseglock_release(seg); 3209 rsmacl_free(new_acl, new_acl_len); 3210 rsmpiacl_free(rsmpi_new_acl, new_acl_len); 3211 3212 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3213 "rsm_republish done: rsmpi republish failed %d\n", e)); 3214 return (e); 3215 } 3216 3217 /* create a tmp copy of the new acl */ 3218 tmp_acl_len = new_acl_len; 3219 if (tmp_acl_len > 0) { 3220 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP); 3221 for (i = 0; i < tmp_acl_len; i++) { 3222 tmp_acl[i].ae_node = new_acl[i].ae_node; 3223 tmp_acl[i].ae_permission = new_acl[i].ae_permission; 3224 } 3225 /* 3226 * The default permission of a node which was in the old 3227 * ACL but not in the new ACL is 0 ie no access. 3228 */ 3229 permission = 0; 3230 } else { 3231 /* 3232 * NULL acl means all importers can connect and 3233 * default permission will be owner creation umask 3234 */ 3235 tmp_acl = NULL; 3236 permission = seg->s_mode; 3237 } 3238 3239 /* make other republishers to wait for republish to complete */ 3240 seg->s_flags |= RSM_REPUBLISH_WAIT; 3241 3242 rsmseglock_release(seg); 3243 3244 /* send the new perms to the importing nodes */ 3245 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission); 3246 3247 rsmseglock_acquire(seg); 3248 seg->s_flags &= ~RSM_REPUBLISH_WAIT; 3249 /* wake up any one waiting for republish to complete */ 3250 cv_broadcast(&seg->s_cv); 3251 rsmseglock_release(seg); 3252 3253 rsmacl_free(tmp_acl, tmp_acl_len); 3254 rsmacl_free(old_acl, old_acl_len); 3255 rsmpiacl_free(rsmpi_old_acl, old_acl_len); 3256 3257 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n")); 3258 return (DDI_SUCCESS); 3259 } 3260 3261 static int 3262 rsm_unpublish(rsmseg_t *seg, int mode) 3263 { 3264 rsmapi_access_entry_t *acl; 3265 rsm_access_entry_t *rsmpi_acl; 3266 int acl_len; 3267 int e; 3268 adapter_t *adapter; 3269 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3270 3271 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n")); 3272 3273 if (seg->s_pid != ddi_get_pid() && 3274 ddi_get_pid() != 0) { 3275 DBG_PRINTF((category, RSM_ERR, 3276 "rsm_unpublish: Not creator\n")); 3277 return (RSMERR_NOT_CREATOR); 3278 } 3279 3280 rsmseglock_acquire(seg); 3281 /* 3282 * wait for QUIESCING to complete here before rsmexport_rm 3283 * is called because the SUSPEND_COMPLETE mesg which changes 3284 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and 3285 * signals the cv_wait needs to find it in the hashtable. 3286 */ 3287 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 3288 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) { 3289 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3290 rsmseglock_release(seg); 3291 DBG_PRINTF((category, RSM_ERR, 3292 "rsm_unpublish done: cv_wait INTR qscing" 3293 "getv/putv in progress")); 3294 return (RSMERR_INTERRUPTED); 3295 } 3296 } 3297 3298 /* verify segment state */ 3299 if ((seg->s_state != RSM_STATE_EXPORT) && 3300 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3301 rsmseglock_release(seg); 3302 DBG_PRINTF((category, RSM_ERR, 3303 "rsm_unpublish done: bad state %x\n", seg->s_state)); 3304 return (RSMERR_SEG_NOT_PUBLISHED); 3305 } 3306 3307 rsmseglock_release(seg); 3308 3309 rsmexport_rm(seg); 3310 3311 rsm_send_importer_disconnects(seg->s_segid, my_nodeid); 3312 3313 rsmseglock_acquire(seg); 3314 /* 3315 * wait for republish to complete 3316 */ 3317 while ((seg->s_state == RSM_STATE_EXPORT) && 3318 (seg->s_flags & RSM_REPUBLISH_WAIT)) { 3319 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3320 DBG_PRINTF((category, RSM_ERR, 3321 "rsm_unpublish done: cv_wait INTR repubing")); 3322 rsmseglock_release(seg); 3323 return (RSMERR_INTERRUPTED); 3324 } 3325 } 3326 3327 if ((seg->s_state != RSM_STATE_EXPORT) && 3328 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3329 DBG_PRINTF((category, RSM_ERR, 3330 "rsm_unpublish done: invalid state")); 3331 rsmseglock_release(seg); 3332 return (RSMERR_SEG_NOT_PUBLISHED); 3333 } 3334 3335 /* 3336 * check for putv/get surrogate segment which was not published 3337 * to the driver. 3338 * 3339 * Be certain to see if there is an ACL first! If this segment was 3340 * not published with an ACL, acl will be a null pointer. Check 3341 * that before dereferencing it. 3342 */ 3343 acl = seg->s_acl; 3344 if (acl != (rsmapi_access_entry_t *)NULL) { 3345 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 3346 goto bypass; 3347 } 3348 3349 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */ 3350 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) 3351 goto bypass; 3352 3353 adapter = seg->s_adapter; 3354 for (;;) { 3355 if (seg->s_state != RSM_STATE_EXPORT) { 3356 rsmseglock_release(seg); 3357 DBG_PRINTF((category, RSM_ERR, 3358 "rsm_unpublish done: bad state %x\n", 3359 seg->s_state)); 3360 return (RSMERR_SEG_NOT_PUBLISHED); 3361 } 3362 3363 /* unpublish from adapter */ 3364 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out); 3365 3366 if (e == RSM_SUCCESS) { 3367 break; 3368 } 3369 3370 if (e == RSMERR_SEG_IN_USE && mode == 1) { 3371 /* 3372 * wait for unpublish to succeed, it's busy. 3373 */ 3374 seg->s_flags |= RSM_EXPORT_WAIT; 3375 3376 /* wait for a max of 1 ms - this is an empirical */ 3377 /* value that was found by some minimal testing */ 3378 /* can be fine tuned when we have better numbers */ 3379 /* A long term fix would be to send cv_signal */ 3380 /* from the intr callback routine */ 3381 /* currently nobody signals this wait */ 3382 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock, 3383 drv_usectohz(1000), TR_CLOCK_TICK); 3384 3385 DBG_PRINTF((category, RSM_ERR, 3386 "rsm_unpublish: SEG_IN_USE\n")); 3387 3388 seg->s_flags &= ~RSM_EXPORT_WAIT; 3389 } else { 3390 if (mode == 1) { 3391 DBG_PRINTF((category, RSM_ERR, 3392 "rsm:rsmpi unpublish err %x\n", e)); 3393 seg->s_state = RSM_STATE_BIND; 3394 } 3395 rsmseglock_release(seg); 3396 return (e); 3397 } 3398 } 3399 3400 /* Free segment */ 3401 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3402 3403 if (e != RSM_SUCCESS) { 3404 DBG_PRINTF((category, RSM_ERR, 3405 "rsm_unpublish: rsmpi destroy key=%x failed %x\n", 3406 seg->s_key, e)); 3407 } 3408 3409 bypass: 3410 acl = seg->s_acl; 3411 rsmpi_acl = seg->s_acl_in; 3412 acl_len = seg->s_acl_len; 3413 3414 seg->s_acl = NULL; 3415 seg->s_acl_in = NULL; 3416 seg->s_acl_len = 0; 3417 3418 if (seg->s_state == RSM_STATE_EXPORT) { 3419 seg->s_state = RSM_STATE_BIND; 3420 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) { 3421 seg->s_state = RSM_STATE_BIND_QUIESCED; 3422 cv_broadcast(&seg->s_cv); 3423 } 3424 3425 rsmseglock_release(seg); 3426 3427 rsmacl_free(acl, acl_len); 3428 rsmpiacl_free(rsmpi_acl, acl_len); 3429 3430 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n")); 3431 3432 return (DDI_SUCCESS); 3433 } 3434 3435 /* 3436 * Called from rsm_unpublish to force an unload and disconnection of all 3437 * importers of the unpublished segment. 3438 * 3439 * First build the list of segments requiring a force disconnect, then 3440 * send a request for each. 3441 */ 3442 static void 3443 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid, 3444 rsm_node_id_t ex_nodeid) 3445 { 3446 rsmipc_request_t request; 3447 importing_token_t *prev_token, *token, *tmp_token, *tokp; 3448 importing_token_t *force_disconnect_list = NULL; 3449 int index; 3450 3451 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3452 "rsm_send_importer_disconnects enter\n")); 3453 3454 index = rsmhash(ex_segid); 3455 3456 mutex_enter(&importer_list.lock); 3457 3458 prev_token = NULL; 3459 token = importer_list.bucket[index]; 3460 3461 while (token != NULL) { 3462 if (token->key == ex_segid) { 3463 /* 3464 * take it off the importer list and add it 3465 * to the force disconnect list. 3466 */ 3467 if (prev_token == NULL) 3468 importer_list.bucket[index] = token->next; 3469 else 3470 prev_token->next = token->next; 3471 tmp_token = token; 3472 token = token->next; 3473 if (force_disconnect_list == NULL) { 3474 force_disconnect_list = tmp_token; 3475 tmp_token->next = NULL; 3476 } else { 3477 tokp = force_disconnect_list; 3478 /* 3479 * make sure that the tmp_token's node 3480 * is not already on the force disconnect 3481 * list. 3482 */ 3483 while (tokp != NULL) { 3484 if (tokp->importing_node == 3485 tmp_token->importing_node) { 3486 break; 3487 } 3488 tokp = tokp->next; 3489 } 3490 if (tokp == NULL) { 3491 tmp_token->next = 3492 force_disconnect_list; 3493 force_disconnect_list = tmp_token; 3494 } else { 3495 kmem_free((void *)tmp_token, 3496 sizeof (*token)); 3497 } 3498 } 3499 3500 } else { 3501 prev_token = token; 3502 token = token->next; 3503 } 3504 } 3505 mutex_exit(&importer_list.lock); 3506 3507 token = force_disconnect_list; 3508 while (token != NULL) { 3509 if (token->importing_node == my_nodeid) { 3510 rsm_force_unload(ex_nodeid, ex_segid, 3511 DISCONNECT); 3512 } else { 3513 request.rsmipc_hdr.rsmipc_type = 3514 RSMIPC_MSG_DISCONNECT; 3515 request.rsmipc_key = token->key; 3516 for (;;) { 3517 if (rsmipc_send(token->importing_node, 3518 &request, 3519 RSM_NO_REPLY) == RSM_SUCCESS) { 3520 break; 3521 } else { 3522 delay(drv_usectohz(10000)); 3523 } 3524 } 3525 } 3526 tmp_token = token; 3527 token = token->next; 3528 kmem_free((void *)tmp_token, sizeof (*token)); 3529 } 3530 3531 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3532 "rsm_send_importer_disconnects done\n")); 3533 } 3534 3535 /* 3536 * This function is used as a callback for unlocking the pages locked 3537 * down by a process which then does a fork or an exec. 3538 * It marks the export segments corresponding to umem cookie given by 3539 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be 3540 * destroyed later when an rsm_close occurs). 3541 */ 3542 static void 3543 rsm_export_force_destroy(ddi_umem_cookie_t *ck) 3544 { 3545 rsmresource_blk_t *blk; 3546 rsmresource_t *p; 3547 rsmseg_t *eseg = NULL; 3548 int i, j; 3549 int found = 0; 3550 3551 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3552 "rsm_export_force_destroy enter\n")); 3553 3554 /* 3555 * Walk the resource list and locate the export segment (either 3556 * in the BIND or the EXPORT state) which corresponds to the 3557 * ddi_umem_cookie_t being freed up, and call rsmseg_close. 3558 * Change the state to ZOMBIE by calling rsmseg_close with the 3559 * force_flag argument (the second argument) set to 1. Also, 3560 * unpublish and unbind the segment, but don't free it. Free it 3561 * only on a rsm_close call for the segment. 3562 */ 3563 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 3564 3565 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 3566 blk = rsm_resource.rsmrc_root[i]; 3567 if (blk == NULL) { 3568 continue; 3569 } 3570 3571 for (j = 0; j < RSMRC_BLKSZ; j++) { 3572 p = blk->rsmrcblk_blks[j]; 3573 if ((p != NULL) && (p != RSMRC_RESERVED) && 3574 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) { 3575 eseg = (rsmseg_t *)p; 3576 if (eseg->s_cookie != ck) 3577 continue; /* continue searching */ 3578 /* 3579 * Found the segment, set flag to indicate 3580 * force destroy processing is in progress 3581 */ 3582 rsmseglock_acquire(eseg); 3583 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT; 3584 rsmseglock_release(eseg); 3585 found = 1; 3586 break; 3587 } 3588 } 3589 3590 if (found) 3591 break; 3592 } 3593 3594 rw_exit(&rsm_resource.rsmrc_lock); 3595 3596 if (found) { 3597 ASSERT(eseg != NULL); 3598 /* call rsmseg_close with force flag set to 1 */ 3599 rsmseg_close(eseg, 1); 3600 /* 3601 * force destroy processing done, clear flag and signal any 3602 * thread waiting in rsmseg_close. 3603 */ 3604 rsmseglock_acquire(eseg); 3605 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT; 3606 cv_broadcast(&eseg->s_cv); 3607 rsmseglock_release(eseg); 3608 } 3609 3610 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3611 "rsm_export_force_destroy done\n")); 3612 } 3613 3614 /* ******************************* Remote Calls *********************** */ 3615 static void 3616 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req) 3617 { 3618 rsmipc_reply_t reply; 3619 DBG_DEFINE(category, 3620 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3621 3622 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3623 "rsm_intr_segconnect enter\n")); 3624 3625 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply); 3626 3627 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 3628 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie; 3629 3630 (void) rsmipc_send(src, NULL, &reply); 3631 3632 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3633 "rsm_intr_segconnect done\n")); 3634 } 3635 3636 3637 /* 3638 * When an exported segment is unpublished the exporter sends an ipc 3639 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher 3640 * calls this function. The import list is scanned; segments which match the 3641 * exported segment id are unloaded and disconnected. 3642 * 3643 * Will also be called from rsm_rebind with disconnect_flag FALSE. 3644 * 3645 */ 3646 static void 3647 rsm_force_unload(rsm_node_id_t src_nodeid, 3648 rsm_memseg_id_t ex_segid, 3649 boolean_t disconnect_flag) 3650 3651 { 3652 rsmresource_t *p = NULL; 3653 rsmhash_table_t *rhash = &rsm_import_segs; 3654 uint_t index; 3655 DBG_DEFINE(category, 3656 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3657 3658 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n")); 3659 3660 index = rsmhash(ex_segid); 3661 3662 rw_enter(&rhash->rsmhash_rw, RW_READER); 3663 3664 p = rsmhash_getbkt(rhash, index); 3665 3666 for (; p; p = p->rsmrc_next) { 3667 rsmseg_t *seg = (rsmseg_t *)p; 3668 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) { 3669 /* 3670 * In order to make rsmseg_unload and rsm_force_unload 3671 * thread safe, acquire the segment lock here. 3672 * rsmseg_unload is responsible for releasing the lock. 3673 * rsmseg_unload releases the lock just before a call 3674 * to rsmipc_send or in case of an early exit which 3675 * occurs if the segment was in the state 3676 * RSM_STATE_CONNECTING or RSM_STATE_NEW. 3677 */ 3678 rsmseglock_acquire(seg); 3679 if (disconnect_flag) 3680 seg->s_flags |= RSM_FORCE_DISCONNECT; 3681 rsmseg_unload(seg); 3682 } 3683 } 3684 rw_exit(&rhash->rsmhash_rw); 3685 3686 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n")); 3687 } 3688 3689 static void 3690 rsm_intr_reply(rsmipc_msghdr_t *msg) 3691 { 3692 /* 3693 * Find slot for cookie in reply. 3694 * Match sequence with sequence in cookie 3695 * If no match; return 3696 * Try to grap lock of slot, if locked return 3697 * copy data into reply slot area 3698 * signal waiter 3699 */ 3700 rsmipc_slot_t *slot; 3701 rsmipc_cookie_t *cookie; 3702 void *data = (void *) msg; 3703 size_t size = sizeof (rsmipc_reply_t); 3704 DBG_DEFINE(category, 3705 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3706 3707 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n")); 3708 3709 cookie = &msg->rsmipc_cookie; 3710 if (cookie->ic.index >= RSMIPC_SZ) { 3711 DBG_PRINTF((category, RSM_ERR, 3712 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index)); 3713 return; 3714 } 3715 3716 ASSERT(cookie->ic.index < RSMIPC_SZ); 3717 slot = &rsm_ipc.slots[cookie->ic.index]; 3718 mutex_enter(&slot->rsmipc_lock); 3719 if (slot->rsmipc_cookie.value == cookie->value) { 3720 /* found a match */ 3721 if (RSMIPC_GET(slot, RSMIPC_PENDING)) { 3722 bcopy(data, slot->rsmipc_data, size); 3723 RSMIPC_CLEAR(slot, RSMIPC_PENDING); 3724 cv_signal(&slot->rsmipc_cv); 3725 } 3726 } else { 3727 DBG_PRINTF((category, RSM_DEBUG, 3728 "rsm: rsm_intr_reply mismatched reply %d\n", 3729 cookie->ic.index)); 3730 } 3731 mutex_exit(&slot->rsmipc_lock); 3732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n")); 3733 } 3734 3735 /* 3736 * This function gets dispatched on the worker thread when we receive 3737 * the SQREADY message. This function sends the SQREADY_ACK message. 3738 */ 3739 static void 3740 rsm_sqready_ack_deferred(void *arg) 3741 { 3742 path_t *path = (path_t *)arg; 3743 DBG_DEFINE(category, 3744 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3745 3746 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3747 "rsm_sqready_ack_deferred enter\n")); 3748 3749 mutex_enter(&path->mutex); 3750 3751 /* 3752 * If path is not active no point in sending the ACK 3753 * because the whole SQREADY protocol will again start 3754 * when the path becomes active. 3755 */ 3756 if (path->state != RSMKA_PATH_ACTIVE) { 3757 /* 3758 * decrement the path refcnt incremented in rsm_proc_sqready 3759 */ 3760 PATH_RELE_NOLOCK(path); 3761 mutex_exit(&path->mutex); 3762 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3763 "rsm_sqready_ack_deferred done:!ACTIVE\n")); 3764 return; 3765 } 3766 3767 /* send an SQREADY_ACK message */ 3768 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK); 3769 3770 /* initialize credits to the max level */ 3771 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3772 3773 /* wake up any send that is waiting for credits */ 3774 cv_broadcast(&path->sendq_token.sendq_cv); 3775 3776 /* 3777 * decrement the path refcnt since we incremented it in 3778 * rsm_proc_sqready 3779 */ 3780 PATH_RELE_NOLOCK(path); 3781 3782 mutex_exit(&path->mutex); 3783 3784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3785 "rsm_sqready_ack_deferred done\n")); 3786 } 3787 3788 /* 3789 * Process the SQREADY message 3790 */ 3791 static void 3792 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3793 rsm_intr_hand_arg_t arg) 3794 { 3795 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3796 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3797 path_t *path; 3798 DBG_DEFINE(category, 3799 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3800 3801 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n")); 3802 3803 /* look up the path - incr the path refcnt */ 3804 path = rsm_find_path(hdlr_argp->adapter_name, 3805 hdlr_argp->adapter_instance, src_hwaddr); 3806 3807 /* 3808 * No path exists or path is not active - drop the message 3809 */ 3810 if (path == NULL) { 3811 DBG_PRINTF((category, RSM_DEBUG, 3812 "rsm_proc_sqready done: msg dropped no path\n")); 3813 return; 3814 } 3815 3816 mutex_exit(&path->mutex); 3817 3818 /* drain any tasks from the previous incarnation */ 3819 taskq_wait(path->recv_taskq); 3820 3821 mutex_enter(&path->mutex); 3822 /* 3823 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK 3824 * in the meanwhile we received an SQREADY message, blindly reset 3825 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK 3826 * and forget about the SQREADY that we sent. 3827 */ 3828 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3829 3830 if (path->state != RSMKA_PATH_ACTIVE) { 3831 /* decr refcnt and drop the mutex */ 3832 PATH_RELE_NOLOCK(path); 3833 mutex_exit(&path->mutex); 3834 DBG_PRINTF((category, RSM_DEBUG, 3835 "rsm_proc_sqready done: msg dropped path !ACTIVE\n")); 3836 return; 3837 } 3838 3839 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx " 3840 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3841 3842 /* 3843 * The sender's local incarnation number is our remote incarnation 3844 * number save it in the path data structure 3845 */ 3846 path->remote_incn = msg->rsmipc_local_incn; 3847 path->sendq_token.msgbuf_avail = 0; 3848 path->procmsg_cnt = 0; 3849 3850 /* 3851 * path is active - dispatch task to send SQREADY_ACK - remember 3852 * RSMPI calls can't be done in interrupt context 3853 * 3854 * We can use the recv_taskq to send because the remote endpoint 3855 * cannot start sending messages till it receives SQREADY_ACK hence 3856 * at this point there are no tasks on recv_taskq. 3857 * 3858 * The path refcnt will be decremented in rsm_sqready_ack_deferred. 3859 */ 3860 (void) taskq_dispatch(path->recv_taskq, 3861 rsm_sqready_ack_deferred, path, KM_NOSLEEP); 3862 3863 mutex_exit(&path->mutex); 3864 3865 3866 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n")); 3867 } 3868 3869 /* 3870 * Process the SQREADY_ACK message 3871 */ 3872 static void 3873 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3874 rsm_intr_hand_arg_t arg) 3875 { 3876 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3877 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3878 path_t *path; 3879 DBG_DEFINE(category, 3880 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3881 3882 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3883 "rsm_proc_sqready_ack enter\n")); 3884 3885 /* look up the path - incr the path refcnt */ 3886 path = rsm_find_path(hdlr_argp->adapter_name, 3887 hdlr_argp->adapter_instance, src_hwaddr); 3888 3889 /* 3890 * drop the message if - no path exists or path is not active 3891 * or if its not waiting for SQREADY_ACK message 3892 */ 3893 if (path == NULL) { 3894 DBG_PRINTF((category, RSM_DEBUG, 3895 "rsm_proc_sqready_ack done: msg dropped no path\n")); 3896 return; 3897 } 3898 3899 if ((path->state != RSMKA_PATH_ACTIVE) || 3900 !(path->flags & RSMKA_WAIT_FOR_SQACK)) { 3901 /* decrement the refcnt */ 3902 PATH_RELE_NOLOCK(path); 3903 mutex_exit(&path->mutex); 3904 DBG_PRINTF((category, RSM_DEBUG, 3905 "rsm_proc_sqready_ack done: msg dropped\n")); 3906 return; 3907 } 3908 3909 /* 3910 * Check if this message is in response to the last RSMIPC_MSG_SQREADY 3911 * sent, if not drop it. 3912 */ 3913 if (path->local_incn != msghdr->rsmipc_incn) { 3914 /* decrement the refcnt */ 3915 PATH_RELE_NOLOCK(path); 3916 mutex_exit(&path->mutex); 3917 DBG_PRINTF((category, RSM_DEBUG, 3918 "rsm_proc_sqready_ack done: msg old incn %lld\n", 3919 msghdr->rsmipc_incn)); 3920 return; 3921 } 3922 3923 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx " 3924 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3925 3926 /* 3927 * clear the WAIT_FOR_SQACK flag since we have recvd the ack 3928 */ 3929 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3930 3931 /* save the remote sendq incn number */ 3932 path->remote_incn = msg->rsmipc_local_incn; 3933 3934 /* initialize credits to the max level */ 3935 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3936 3937 /* wake up any send that is waiting for credits */ 3938 cv_broadcast(&path->sendq_token.sendq_cv); 3939 3940 /* decrement the refcnt */ 3941 PATH_RELE_NOLOCK(path); 3942 3943 mutex_exit(&path->mutex); 3944 3945 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3946 "rsm_proc_sqready_ack done\n")); 3947 } 3948 3949 /* 3950 * process the RSMIPC_MSG_CREDIT message 3951 */ 3952 static void 3953 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3954 rsm_intr_hand_arg_t arg) 3955 { 3956 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3957 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3958 path_t *path; 3959 DBG_DEFINE(category, 3960 RSM_KERNEL_AGENT | RSM_FUNC_ALL | 3961 RSM_INTR_CALLBACK | RSM_FLOWCONTROL); 3962 3963 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n")); 3964 3965 /* look up the path - incr the path refcnt */ 3966 path = rsm_find_path(hdlr_argp->adapter_name, 3967 hdlr_argp->adapter_instance, src_hwaddr); 3968 3969 if (path == NULL) { 3970 DBG_PRINTF((category, RSM_DEBUG, 3971 "rsm_add_credits enter: path not found\n")); 3972 return; 3973 } 3974 3975 /* the path is not active - discard credits */ 3976 if (path->state != RSMKA_PATH_ACTIVE) { 3977 PATH_RELE_NOLOCK(path); 3978 mutex_exit(&path->mutex); 3979 DBG_PRINTF((category, RSM_DEBUG, 3980 "rsm_add_credits enter:path=%lx !ACTIVE\n", path)); 3981 return; 3982 } 3983 3984 /* 3985 * Check if these credits are for current incarnation of the path. 3986 */ 3987 if (path->local_incn != msghdr->rsmipc_incn) { 3988 /* decrement the refcnt */ 3989 PATH_RELE_NOLOCK(path); 3990 mutex_exit(&path->mutex); 3991 DBG_PRINTF((category, RSM_DEBUG, 3992 "rsm_add_credits enter: old incn %lld\n", 3993 msghdr->rsmipc_incn)); 3994 return; 3995 } 3996 3997 DBG_PRINTF((category, RSM_DEBUG, 3998 "rsm_add_credits:path=%lx new-creds=%d " 3999 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits, 4000 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src, 4001 src_hwaddr)); 4002 4003 4004 /* add credits to the path's sendq */ 4005 path->sendq_token.msgbuf_avail += msg->rsmipc_credits; 4006 4007 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES); 4008 4009 /* wake up any send that is waiting for credits */ 4010 cv_broadcast(&path->sendq_token.sendq_cv); 4011 4012 /* decrement the refcnt */ 4013 PATH_RELE_NOLOCK(path); 4014 4015 mutex_exit(&path->mutex); 4016 4017 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n")); 4018 } 4019 4020 static void 4021 rsm_intr_event(rsmipc_request_t *msg) 4022 { 4023 rsmseg_t *seg; 4024 rsmresource_t *p; 4025 rsm_node_id_t src_node; 4026 DBG_DEFINE(category, 4027 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4028 4029 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n")); 4030 4031 src_node = msg->rsmipc_hdr.rsmipc_src; 4032 4033 if ((seg = msg->rsmipc_segment_cookie) != NULL) { 4034 /* This is for an import segment */ 4035 uint_t hashval = rsmhash(msg->rsmipc_key); 4036 4037 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4038 4039 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4040 4041 for (; p; p = p->rsmrc_next) { 4042 if ((p->rsmrc_key == msg->rsmipc_key) && 4043 (p->rsmrc_node == src_node)) { 4044 seg = (rsmseg_t *)p; 4045 rsmseglock_acquire(seg); 4046 4047 atomic_inc_32(&seg->s_pollevent); 4048 4049 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4050 pollwakeup(&seg->s_poll, POLLRDNORM); 4051 4052 rsmseglock_release(seg); 4053 } 4054 } 4055 4056 rw_exit(&rsm_import_segs.rsmhash_rw); 4057 } else { 4058 /* This is for an export segment */ 4059 seg = rsmexport_lookup(msg->rsmipc_key); 4060 if (!seg) { 4061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4062 "rsm_intr_event done: exp seg not found\n")); 4063 return; 4064 } 4065 4066 ASSERT(rsmseglock_held(seg)); 4067 4068 atomic_inc_32(&seg->s_pollevent); 4069 4070 /* 4071 * We must hold the segment lock here, or else the segment 4072 * can be freed while pollwakeup is using it. This implies 4073 * that we MUST NOT grab the segment lock during rsm_chpoll, 4074 * as outlined in the chpoll(2) man page. 4075 */ 4076 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4077 pollwakeup(&seg->s_poll, POLLRDNORM); 4078 4079 rsmseglock_release(seg); 4080 } 4081 4082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n")); 4083 } 4084 4085 /* 4086 * The exporter did a republish and changed the ACL - this change is only 4087 * visible to new importers. 4088 */ 4089 static void 4090 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key, 4091 rsm_permission_t perm) 4092 { 4093 4094 rsmresource_t *p; 4095 rsmseg_t *seg; 4096 uint_t hashval = rsmhash(key); 4097 DBG_DEFINE(category, 4098 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4099 4100 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n")); 4101 4102 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4103 4104 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4105 4106 for (; p; p = p->rsmrc_next) { 4107 /* 4108 * find the importer and update the permission in the shared 4109 * data structure. Any new importers will use the new perms 4110 */ 4111 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) { 4112 seg = (rsmseg_t *)p; 4113 4114 rsmseglock_acquire(seg); 4115 rsmsharelock_acquire(seg); 4116 seg->s_share->rsmsi_mode = perm; 4117 rsmsharelock_release(seg); 4118 rsmseglock_release(seg); 4119 4120 break; 4121 } 4122 } 4123 4124 rw_exit(&rsm_import_segs.rsmhash_rw); 4125 4126 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n")); 4127 } 4128 4129 void 4130 rsm_suspend_complete(rsm_node_id_t src_node, int flag) 4131 { 4132 int done = 1; /* indicate all SUSPENDS have been acked */ 4133 list_element_t *elem; 4134 DBG_DEFINE(category, 4135 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4136 4137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4138 "rsm_suspend_complete enter\n")); 4139 4140 mutex_enter(&rsm_suspend_list.list_lock); 4141 4142 if (rsm_suspend_list.list_head == NULL) { 4143 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4144 "rsm_suspend_complete done: suspend_list is empty\n")); 4145 mutex_exit(&rsm_suspend_list.list_lock); 4146 return; 4147 } 4148 4149 elem = rsm_suspend_list.list_head; 4150 while (elem != NULL) { 4151 if (elem->nodeid == src_node) { 4152 /* clear the pending flag for the node */ 4153 elem->flags &= ~RSM_SUSPEND_ACKPENDING; 4154 elem->flags |= flag; 4155 } 4156 4157 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING)) 4158 done = 0; /* still some nodes have not yet ACKED */ 4159 4160 elem = elem->next; 4161 } 4162 4163 mutex_exit(&rsm_suspend_list.list_lock); 4164 4165 if (!done) { 4166 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4167 "rsm_suspend_complete done: acks pending\n")); 4168 return; 4169 } 4170 /* 4171 * Now that we are done with suspending all the remote importers 4172 * time to quiesce the local exporters 4173 */ 4174 exporter_quiesce(); 4175 4176 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4177 "rsm_suspend_complete done\n")); 4178 } 4179 4180 static void 4181 exporter_quiesce() 4182 { 4183 int i, e; 4184 rsmresource_t *current; 4185 rsmseg_t *seg; 4186 adapter_t *adapter; 4187 DBG_DEFINE(category, 4188 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4189 4190 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n")); 4191 /* 4192 * The importers send a SUSPEND_COMPLETE to the exporter node 4193 * Unpublish, unbind the export segment and 4194 * move the segments to the EXPORT_QUIESCED state 4195 */ 4196 4197 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER); 4198 4199 for (i = 0; i < rsm_hash_size; i++) { 4200 current = rsm_export_segs.bucket[i]; 4201 while (current != NULL) { 4202 seg = (rsmseg_t *)current; 4203 rsmseglock_acquire(seg); 4204 if (current->rsmrc_state == 4205 RSM_STATE_EXPORT_QUIESCING) { 4206 adapter = seg->s_adapter; 4207 /* 4208 * some local memory handles are not published 4209 * check if it was published 4210 */ 4211 if ((seg->s_acl == NULL) || 4212 (seg->s_acl[0].ae_node != my_nodeid) || 4213 (seg->s_acl[0].ae_permission != 0)) { 4214 4215 e = adapter->rsmpi_ops->rsm_unpublish( 4216 seg->s_handle.out); 4217 DBG_PRINTF((category, RSM_DEBUG, 4218 "exporter_quiesce:unpub %d\n", e)); 4219 4220 e = adapter->rsmpi_ops->rsm_seg_destroy( 4221 seg->s_handle.out); 4222 4223 DBG_PRINTF((category, RSM_DEBUG, 4224 "exporter_quiesce:destroy %d\n", 4225 e)); 4226 } 4227 4228 (void) rsm_unbind_pages(seg); 4229 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 4230 cv_broadcast(&seg->s_cv); 4231 } 4232 rsmseglock_release(seg); 4233 current = current->rsmrc_next; 4234 } 4235 } 4236 rw_exit(&rsm_export_segs.rsmhash_rw); 4237 4238 /* 4239 * All the local segments we are done with the pre-del processing 4240 * - time to move to PREDEL_COMPLETED. 4241 */ 4242 4243 mutex_enter(&rsm_drv_data.drv_lock); 4244 4245 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED); 4246 4247 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED; 4248 4249 cv_broadcast(&rsm_drv_data.drv_cv); 4250 4251 mutex_exit(&rsm_drv_data.drv_lock); 4252 4253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n")); 4254 } 4255 4256 static void 4257 importer_suspend(rsm_node_id_t src_node) 4258 { 4259 int i; 4260 int susp_flg; /* true means already suspended */ 4261 int num_importers; 4262 rsmresource_t *p = NULL, *curp; 4263 rsmhash_table_t *rhash = &rsm_import_segs; 4264 rsmseg_t *seg; 4265 rsmipc_request_t request; 4266 DBG_DEFINE(category, 4267 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4268 4269 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n")); 4270 4271 rw_enter(&rhash->rsmhash_rw, RW_READER); 4272 for (i = 0; i < rsm_hash_size; i++) { 4273 p = rhash->bucket[i]; 4274 4275 /* 4276 * Suspend all importers with same <node, key> pair. 4277 * After the last one of the shared importers has been 4278 * suspended - suspend the shared mappings/connection. 4279 */ 4280 for (; p; p = p->rsmrc_next) { 4281 rsmseg_t *first = (rsmseg_t *)p; 4282 if ((first->s_node != src_node) || 4283 (first->s_state == RSM_STATE_DISCONNECT)) 4284 continue; /* go to next entry */ 4285 /* 4286 * search the rest of the bucket for 4287 * other siblings (imprtrs with the same key) 4288 * of "first" and suspend them. 4289 * All importers with same key fall in 4290 * the same bucket. 4291 */ 4292 num_importers = 0; 4293 for (curp = p; curp; curp = curp->rsmrc_next) { 4294 seg = (rsmseg_t *)curp; 4295 4296 rsmseglock_acquire(seg); 4297 4298 if ((seg->s_node != first->s_node) || 4299 (seg->s_key != first->s_key) || 4300 (seg->s_state == RSM_STATE_DISCONNECT)) { 4301 /* 4302 * either not a peer segment or its a 4303 * disconnected segment - skip it 4304 */ 4305 rsmseglock_release(seg); 4306 continue; 4307 } 4308 4309 rsmseg_suspend(seg, &susp_flg); 4310 4311 if (susp_flg) { /* seg already suspended */ 4312 rsmseglock_release(seg); 4313 break; /* the inner for loop */ 4314 } 4315 4316 num_importers++; 4317 rsmsharelock_acquire(seg); 4318 /* 4319 * we've processed all importers that are 4320 * siblings of "first" 4321 */ 4322 if (num_importers == 4323 seg->s_share->rsmsi_refcnt) { 4324 rsmsharelock_release(seg); 4325 rsmseglock_release(seg); 4326 break; 4327 } 4328 rsmsharelock_release(seg); 4329 rsmseglock_release(seg); 4330 } 4331 4332 /* 4333 * All the importers with the same key and 4334 * nodeid as "first" have been suspended. 4335 * Now suspend the shared connect/mapping. 4336 * This is done only once. 4337 */ 4338 if (!susp_flg) { 4339 rsmsegshare_suspend(seg); 4340 } 4341 } 4342 } 4343 4344 rw_exit(&rhash->rsmhash_rw); 4345 4346 /* send an ACK for SUSPEND message */ 4347 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE; 4348 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY); 4349 4350 4351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n")); 4352 4353 } 4354 4355 static void 4356 rsmseg_suspend(rsmseg_t *seg, int *susp_flg) 4357 { 4358 int recheck_state; 4359 rsmcookie_t *hdl; 4360 DBG_DEFINE(category, 4361 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4362 4363 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4364 "rsmseg_suspend enter: key=%u\n", seg->s_key)); 4365 4366 *susp_flg = 0; 4367 4368 ASSERT(rsmseglock_held(seg)); 4369 /* wait if putv/getv is in progress */ 4370 while (seg->s_rdmacnt > 0) 4371 cv_wait(&seg->s_cv, &seg->s_lock); 4372 4373 do { 4374 recheck_state = 0; 4375 4376 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4377 "rsmseg_suspend:segment %x state=%d\n", 4378 seg->s_key, seg->s_state)); 4379 4380 switch (seg->s_state) { 4381 case RSM_STATE_NEW: 4382 /* not a valid state */ 4383 break; 4384 case RSM_STATE_CONNECTING: 4385 seg->s_state = RSM_STATE_ABORT_CONNECT; 4386 break; 4387 case RSM_STATE_ABORT_CONNECT: 4388 break; 4389 case RSM_STATE_CONNECT: 4390 seg->s_handle.in = NULL; 4391 seg->s_state = RSM_STATE_CONN_QUIESCE; 4392 break; 4393 case RSM_STATE_MAPPING: 4394 /* wait until segment leaves the mapping state */ 4395 while (seg->s_state == RSM_STATE_MAPPING) 4396 cv_wait(&seg->s_cv, &seg->s_lock); 4397 recheck_state = 1; 4398 break; 4399 case RSM_STATE_ACTIVE: 4400 /* unload the mappings */ 4401 if (seg->s_ckl != NULL) { 4402 hdl = seg->s_ckl; 4403 for (; hdl != NULL; hdl = hdl->c_next) { 4404 (void) devmap_unload(hdl->c_dhp, 4405 hdl->c_off, hdl->c_len); 4406 } 4407 } 4408 seg->s_mapinfo = NULL; 4409 seg->s_state = RSM_STATE_MAP_QUIESCE; 4410 break; 4411 case RSM_STATE_CONN_QUIESCE: 4412 /* FALLTHRU */ 4413 case RSM_STATE_MAP_QUIESCE: 4414 /* rsmseg_suspend already done for seg */ 4415 *susp_flg = 1; 4416 break; 4417 case RSM_STATE_DISCONNECT: 4418 break; 4419 default: 4420 ASSERT(0); /* invalid state */ 4421 } 4422 } while (recheck_state); 4423 4424 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n")); 4425 } 4426 4427 static void 4428 rsmsegshare_suspend(rsmseg_t *seg) 4429 { 4430 int e; 4431 adapter_t *adapter; 4432 rsm_import_share_t *sharedp; 4433 DBG_DEFINE(category, 4434 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4435 4436 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4437 "rsmsegshare_suspend enter\n")); 4438 4439 rsmseglock_acquire(seg); 4440 rsmsharelock_acquire(seg); 4441 4442 sharedp = seg->s_share; 4443 adapter = seg->s_adapter; 4444 switch (sharedp->rsmsi_state) { 4445 case RSMSI_STATE_NEW: 4446 break; 4447 case RSMSI_STATE_CONNECTING: 4448 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 4449 break; 4450 case RSMSI_STATE_ABORT_CONNECT: 4451 break; 4452 case RSMSI_STATE_CONNECTED: 4453 /* do the rsmpi disconnect */ 4454 if (sharedp->rsmsi_node != my_nodeid) { 4455 e = adapter->rsmpi_ops-> 4456 rsm_disconnect(sharedp->rsmsi_handle); 4457 4458 DBG_PRINTF((category, RSM_DEBUG, 4459 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4460 sharedp->rsmsi_segid, e)); 4461 } 4462 4463 sharedp->rsmsi_handle = NULL; 4464 4465 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 4466 break; 4467 case RSMSI_STATE_CONN_QUIESCE: 4468 break; 4469 case RSMSI_STATE_MAPPED: 4470 /* do the rsmpi unmap and disconnect */ 4471 if (sharedp->rsmsi_node != my_nodeid) { 4472 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in); 4473 4474 DBG_PRINTF((category, RSM_DEBUG, 4475 "rsmshare_suspend: rsmpi unmap %d\n", e)); 4476 4477 e = adapter->rsmpi_ops-> 4478 rsm_disconnect(sharedp->rsmsi_handle); 4479 DBG_PRINTF((category, RSM_DEBUG, 4480 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4481 sharedp->rsmsi_segid, e)); 4482 } 4483 4484 sharedp->rsmsi_handle = NULL; 4485 4486 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE; 4487 break; 4488 case RSMSI_STATE_MAP_QUIESCE: 4489 break; 4490 case RSMSI_STATE_DISCONNECTED: 4491 break; 4492 default: 4493 ASSERT(0); /* invalid state */ 4494 } 4495 4496 rsmsharelock_release(seg); 4497 rsmseglock_release(seg); 4498 4499 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4500 "rsmsegshare_suspend done\n")); 4501 } 4502 4503 /* 4504 * This should get called on receiving a RESUME message or from 4505 * the pathmanger if the node undergoing DR dies. 4506 */ 4507 static void 4508 importer_resume(rsm_node_id_t src_node) 4509 { 4510 int i; 4511 rsmresource_t *p = NULL; 4512 rsmhash_table_t *rhash = &rsm_import_segs; 4513 void *cookie; 4514 DBG_DEFINE(category, 4515 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4516 4517 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n")); 4518 4519 rw_enter(&rhash->rsmhash_rw, RW_READER); 4520 4521 for (i = 0; i < rsm_hash_size; i++) { 4522 p = rhash->bucket[i]; 4523 4524 for (; p; p = p->rsmrc_next) { 4525 rsmseg_t *seg = (rsmseg_t *)p; 4526 4527 rsmseglock_acquire(seg); 4528 4529 /* process only importers of node undergoing DR */ 4530 if (seg->s_node != src_node) { 4531 rsmseglock_release(seg); 4532 continue; 4533 } 4534 4535 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) { 4536 rsmipc_request_t request; 4537 /* 4538 * rsmpi map/connect failed 4539 * inform the exporter so that it can 4540 * remove the importer. 4541 */ 4542 request.rsmipc_hdr.rsmipc_type = 4543 RSMIPC_MSG_NOTIMPORTING; 4544 request.rsmipc_key = seg->s_segid; 4545 request.rsmipc_segment_cookie = cookie; 4546 rsmseglock_release(seg); 4547 (void) rsmipc_send(seg->s_node, &request, 4548 RSM_NO_REPLY); 4549 } else { 4550 rsmseglock_release(seg); 4551 } 4552 } 4553 } 4554 4555 rw_exit(&rhash->rsmhash_rw); 4556 4557 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n")); 4558 } 4559 4560 static int 4561 rsmseg_resume(rsmseg_t *seg, void **cookie) 4562 { 4563 int e; 4564 int retc; 4565 off_t dev_offset; 4566 size_t maplen; 4567 uint_t maxprot; 4568 rsm_mapinfo_t *p; 4569 rsmcookie_t *hdl; 4570 rsm_import_share_t *sharedp; 4571 DBG_DEFINE(category, 4572 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4573 4574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4575 "rsmseg_resume enter: key=%u\n", seg->s_key)); 4576 4577 *cookie = NULL; 4578 4579 ASSERT(rsmseglock_held(seg)); 4580 4581 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) && 4582 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 4583 return (RSM_SUCCESS); 4584 } 4585 4586 sharedp = seg->s_share; 4587 4588 rsmsharelock_acquire(seg); 4589 4590 /* resume the shared connection and/or mapping */ 4591 retc = rsmsegshare_resume(seg); 4592 4593 if (seg->s_state == RSM_STATE_CONN_QUIESCE) { 4594 /* shared state can either be connected or mapped */ 4595 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) || 4596 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) { 4597 ASSERT(retc == RSM_SUCCESS); 4598 seg->s_handle.in = sharedp->rsmsi_handle; 4599 rsmsharelock_release(seg); 4600 seg->s_state = RSM_STATE_CONNECT; 4601 4602 } else { /* error in rsmpi connect during resume */ 4603 seg->s_handle.in = NULL; 4604 seg->s_state = RSM_STATE_DISCONNECT; 4605 4606 sharedp->rsmsi_refcnt--; 4607 cookie = (void *)sharedp->rsmsi_cookie; 4608 4609 if (sharedp->rsmsi_refcnt == 0) { 4610 ASSERT(sharedp->rsmsi_mapcnt == 0); 4611 rsmsharelock_release(seg); 4612 4613 /* clean up the shared data structure */ 4614 mutex_destroy(&sharedp->rsmsi_lock); 4615 cv_destroy(&sharedp->rsmsi_cv); 4616 kmem_free((void *)(sharedp), 4617 sizeof (rsm_import_share_t)); 4618 4619 } else { 4620 rsmsharelock_release(seg); 4621 } 4622 /* 4623 * The following needs to be done after any 4624 * rsmsharelock calls which use seg->s_share. 4625 */ 4626 seg->s_share = NULL; 4627 } 4628 4629 /* signal any waiting segment */ 4630 cv_broadcast(&seg->s_cv); 4631 4632 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4633 "rsmseg_resume done:state=%d\n", seg->s_state)); 4634 return (retc); 4635 } 4636 4637 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE); 4638 4639 /* Setup protections for remap */ 4640 maxprot = PROT_USER; 4641 if (seg->s_mode & RSM_PERM_READ) { 4642 maxprot |= PROT_READ; 4643 } 4644 if (seg->s_mode & RSM_PERM_WRITE) { 4645 maxprot |= PROT_WRITE; 4646 } 4647 4648 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) { 4649 /* error in rsmpi connect or map during resume */ 4650 4651 /* remap to trash page */ 4652 ASSERT(seg->s_ckl != NULL); 4653 4654 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4655 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 4656 remap_cookie, hdl->c_off, hdl->c_len, 4657 maxprot, 0, NULL); 4658 4659 DBG_PRINTF((category, RSM_ERR, 4660 "rsmseg_resume:remap=%d\n", e)); 4661 } 4662 4663 seg->s_handle.in = NULL; 4664 seg->s_state = RSM_STATE_DISCONNECT; 4665 4666 sharedp->rsmsi_refcnt--; 4667 4668 sharedp->rsmsi_mapcnt--; 4669 seg->s_mapinfo = NULL; 4670 4671 if (sharedp->rsmsi_refcnt == 0) { 4672 ASSERT(sharedp->rsmsi_mapcnt == 0); 4673 rsmsharelock_release(seg); 4674 4675 /* clean up the shared data structure */ 4676 mutex_destroy(&sharedp->rsmsi_lock); 4677 cv_destroy(&sharedp->rsmsi_cv); 4678 kmem_free((void *)(sharedp), 4679 sizeof (rsm_import_share_t)); 4680 4681 } else { 4682 rsmsharelock_release(seg); 4683 } 4684 /* 4685 * The following needs to be done after any 4686 * rsmsharelock calls which use seg->s_share. 4687 */ 4688 seg->s_share = NULL; 4689 4690 /* signal any waiting segment */ 4691 cv_broadcast(&seg->s_cv); 4692 4693 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4694 "rsmseg_resume done:seg=%x,err=%d\n", 4695 seg->s_key, retc)); 4696 return (retc); 4697 4698 } 4699 4700 seg->s_handle.in = sharedp->rsmsi_handle; 4701 4702 if (seg->s_node == my_nodeid) { /* loopback */ 4703 ASSERT(seg->s_mapinfo == NULL); 4704 4705 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4706 e = devmap_umem_remap(hdl->c_dhp, 4707 rsm_dip, seg->s_cookie, 4708 hdl->c_off, hdl->c_len, 4709 maxprot, 0, NULL); 4710 4711 DBG_PRINTF((category, RSM_ERR, 4712 "rsmseg_resume:remap=%d\n", e)); 4713 } 4714 } else { /* remote exporter */ 4715 /* remap to the new rsmpi maps */ 4716 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 4717 4718 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4719 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len, 4720 &dev_offset, &maplen); 4721 e = devmap_devmem_remap(hdl->c_dhp, 4722 p->dip, p->dev_register, dev_offset, 4723 maplen, maxprot, 0, NULL); 4724 4725 DBG_PRINTF((category, RSM_ERR, 4726 "rsmseg_resume:remap=%d\n", e)); 4727 } 4728 } 4729 4730 rsmsharelock_release(seg); 4731 4732 seg->s_state = RSM_STATE_ACTIVE; 4733 cv_broadcast(&seg->s_cv); 4734 4735 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n")); 4736 4737 return (retc); 4738 } 4739 4740 static int 4741 rsmsegshare_resume(rsmseg_t *seg) 4742 { 4743 int e = RSM_SUCCESS; 4744 adapter_t *adapter; 4745 rsm_import_share_t *sharedp; 4746 DBG_DEFINE(category, 4747 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4748 4749 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n")); 4750 4751 ASSERT(rsmseglock_held(seg)); 4752 ASSERT(rsmsharelock_held(seg)); 4753 4754 sharedp = seg->s_share; 4755 4756 /* 4757 * If we are not in a xxxx_QUIESCE state that means shared 4758 * connect/mapping processing has been already been done 4759 * so return success. 4760 */ 4761 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) && 4762 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) { 4763 return (RSM_SUCCESS); 4764 } 4765 4766 adapter = seg->s_adapter; 4767 4768 if (sharedp->rsmsi_node != my_nodeid) { 4769 rsm_addr_t hwaddr; 4770 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node); 4771 4772 e = adapter->rsmpi_ops->rsm_connect( 4773 adapter->rsmpi_handle, hwaddr, 4774 sharedp->rsmsi_segid, &sharedp->rsmsi_handle); 4775 4776 DBG_PRINTF((category, RSM_DEBUG, 4777 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n", 4778 sharedp->rsmsi_segid, e)); 4779 4780 if (e != RSM_SUCCESS) { 4781 /* when do we send the NOT_IMPORTING message */ 4782 sharedp->rsmsi_handle = NULL; 4783 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4784 /* signal any waiting segment */ 4785 cv_broadcast(&sharedp->rsmsi_cv); 4786 return (e); 4787 } 4788 } 4789 4790 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) { 4791 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 4792 /* signal any waiting segment */ 4793 cv_broadcast(&sharedp->rsmsi_cv); 4794 return (e); 4795 } 4796 4797 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 4798 4799 /* do the rsmpi map of the whole segment here */ 4800 if (sharedp->rsmsi_node != my_nodeid) { 4801 size_t mapped_len; 4802 rsm_mapinfo_t *p; 4803 4804 /* 4805 * We need to do rsmpi maps with <off, lens> identical to 4806 * the old mapinfo list because the segment mapping handles 4807 * dhp and such need the fragmentation of rsmpi maps to be 4808 * identical to what it was during the mmap of the segment 4809 */ 4810 p = sharedp->rsmsi_mapinfo; 4811 4812 while (p != NULL) { 4813 mapped_len = 0; 4814 4815 e = adapter->rsmpi_ops->rsm_map( 4816 sharedp->rsmsi_handle, p->start_offset, 4817 p->individual_len, &mapped_len, 4818 &p->dip, &p->dev_register, &p->dev_offset, 4819 NULL, NULL); 4820 4821 if (e != 0) { 4822 DBG_PRINTF((category, RSM_ERR, 4823 "rsmsegshare_resume: rsmpi map err=%d\n", 4824 e)); 4825 break; 4826 } 4827 4828 if (mapped_len != p->individual_len) { 4829 DBG_PRINTF((category, RSM_ERR, 4830 "rsmsegshare_resume: rsmpi maplen" 4831 "< reqlen=%lx\n", mapped_len)); 4832 e = RSMERR_BAD_LENGTH; 4833 break; 4834 } 4835 4836 p = p->next; 4837 4838 } 4839 4840 4841 if (e != RSM_SUCCESS) { /* rsmpi map failed */ 4842 int err; 4843 /* Check if this is the first rsm_map */ 4844 if (p != sharedp->rsmsi_mapinfo) { 4845 /* 4846 * A single rsm_unmap undoes multiple rsm_maps. 4847 */ 4848 (void) seg->s_adapter->rsmpi_ops-> 4849 rsm_unmap(sharedp->rsmsi_handle); 4850 } 4851 4852 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 4853 sharedp->rsmsi_mapinfo = NULL; 4854 4855 err = adapter->rsmpi_ops-> 4856 rsm_disconnect(sharedp->rsmsi_handle); 4857 4858 DBG_PRINTF((category, RSM_DEBUG, 4859 "rsmsegshare_resume:disconn seg=%x:err=%d\n", 4860 sharedp->rsmsi_segid, err)); 4861 4862 sharedp->rsmsi_handle = NULL; 4863 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4864 4865 /* signal the waiting segments */ 4866 cv_broadcast(&sharedp->rsmsi_cv); 4867 DBG_PRINTF((category, RSM_DEBUG, 4868 "rsmsegshare_resume done: rsmpi map err\n")); 4869 return (e); 4870 } 4871 } 4872 4873 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 4874 4875 /* signal any waiting segment */ 4876 cv_broadcast(&sharedp->rsmsi_cv); 4877 4878 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n")); 4879 4880 return (e); 4881 } 4882 4883 /* 4884 * this is the routine that gets called by recv_taskq which is the 4885 * thread that processes messages that are flow-controlled. 4886 */ 4887 static void 4888 rsm_intr_proc_deferred(void *arg) 4889 { 4890 path_t *path = (path_t *)arg; 4891 rsmipc_request_t *msg; 4892 rsmipc_msghdr_t *msghdr; 4893 rsm_node_id_t src_node; 4894 msgbuf_elem_t *head; 4895 int e; 4896 DBG_DEFINE(category, 4897 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4898 4899 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4900 "rsm_intr_proc_deferred enter\n")); 4901 4902 mutex_enter(&path->mutex); 4903 4904 /* use the head of the msgbuf_queue */ 4905 head = rsmka_gethead_msgbuf(path); 4906 4907 mutex_exit(&path->mutex); 4908 4909 msg = (rsmipc_request_t *)&(head->msg); 4910 msghdr = (rsmipc_msghdr_t *)msg; 4911 4912 src_node = msghdr->rsmipc_src; 4913 4914 /* 4915 * messages that need to send a reply should check the message version 4916 * before processing the message. And all messages that need to 4917 * send a reply should be processed here by the worker thread. 4918 */ 4919 switch (msghdr->rsmipc_type) { 4920 case RSMIPC_MSG_SEGCONNECT: 4921 if (msghdr->rsmipc_version != RSM_VERSION) { 4922 rsmipc_reply_t reply; 4923 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION; 4924 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 4925 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie; 4926 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply); 4927 } else { 4928 rsm_intr_segconnect(src_node, msg); 4929 } 4930 break; 4931 case RSMIPC_MSG_DISCONNECT: 4932 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT); 4933 break; 4934 case RSMIPC_MSG_SUSPEND: 4935 importer_suspend(src_node); 4936 break; 4937 case RSMIPC_MSG_SUSPEND_DONE: 4938 rsm_suspend_complete(src_node, 0); 4939 break; 4940 case RSMIPC_MSG_RESUME: 4941 importer_resume(src_node); 4942 break; 4943 default: 4944 ASSERT(0); 4945 } 4946 4947 mutex_enter(&path->mutex); 4948 4949 rsmka_dequeue_msgbuf(path); 4950 4951 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */ 4952 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES) 4953 path->procmsg_cnt++; 4954 4955 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES); 4956 4957 /* No need to send credits if path is going down */ 4958 if ((path->state == RSMKA_PATH_ACTIVE) && 4959 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) { 4960 /* 4961 * send credits and reset procmsg_cnt if success otherwise 4962 * credits will be sent after processing the next message 4963 */ 4964 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT); 4965 if (e == 0) 4966 path->procmsg_cnt = 0; 4967 else 4968 DBG_PRINTF((category, RSM_ERR, 4969 "rsm_intr_proc_deferred:send credits err=%d\n", e)); 4970 } 4971 4972 /* 4973 * decrement the path refcnt since we incremented it in 4974 * rsm_intr_callback_dispatch 4975 */ 4976 PATH_RELE_NOLOCK(path); 4977 4978 mutex_exit(&path->mutex); 4979 4980 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4981 "rsm_intr_proc_deferred done\n")); 4982 } 4983 4984 /* 4985 * Flow-controlled messages are enqueued and dispatched onto a taskq here 4986 */ 4987 static void 4988 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr, 4989 rsm_intr_hand_arg_t arg) 4990 { 4991 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 4992 path_t *path; 4993 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 4994 DBG_DEFINE(category, 4995 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4996 4997 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4998 "rsm_intr_callback_dispatch enter\n")); 4999 ASSERT(data && hdlr_argp); 5000 5001 /* look up the path - incr the path refcnt */ 5002 path = rsm_find_path(hdlr_argp->adapter_name, 5003 hdlr_argp->adapter_instance, src_hwaddr); 5004 5005 /* the path has been removed - drop this message */ 5006 if (path == NULL) { 5007 DBG_PRINTF((category, RSM_DEBUG, 5008 "rsm_intr_callback_dispatch done: msg dropped\n")); 5009 return; 5010 } 5011 /* the path is not active - don't accept new messages */ 5012 if (path->state != RSMKA_PATH_ACTIVE) { 5013 PATH_RELE_NOLOCK(path); 5014 mutex_exit(&path->mutex); 5015 DBG_PRINTF((category, RSM_DEBUG, 5016 "rsm_intr_callback_dispatch done: msg dropped" 5017 " path=%lx !ACTIVE\n", path)); 5018 return; 5019 } 5020 5021 /* 5022 * Check if this message was sent to an older incarnation 5023 * of the path/sendq. 5024 */ 5025 if (path->local_incn != msghdr->rsmipc_incn) { 5026 /* decrement the refcnt */ 5027 PATH_RELE_NOLOCK(path); 5028 mutex_exit(&path->mutex); 5029 DBG_PRINTF((category, RSM_DEBUG, 5030 "rsm_intr_callback_dispatch done: old incn %lld\n", 5031 msghdr->rsmipc_incn)); 5032 return; 5033 } 5034 5035 /* copy and enqueue msg on the path's msgbuf queue */ 5036 rsmka_enqueue_msgbuf(path, data); 5037 5038 /* 5039 * schedule task to process messages - ignore retval from 5040 * task_dispatch because we sender cannot send more than 5041 * what receiver can handle. 5042 */ 5043 (void) taskq_dispatch(path->recv_taskq, 5044 rsm_intr_proc_deferred, path, KM_NOSLEEP); 5045 5046 mutex_exit(&path->mutex); 5047 5048 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5049 "rsm_intr_callback_dispatch done\n")); 5050 } 5051 5052 /* 5053 * This procedure is called from rsm_srv_func when a remote node creates a 5054 * a send queue. This event is used as a hint that an earlier failed 5055 * attempt to create a send queue to that remote node may now succeed and 5056 * should be retried. Indication of an earlier failed attempt is provided 5057 * by the RSMKA_SQCREATE_PENDING flag. 5058 */ 5059 static void 5060 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5061 { 5062 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 5063 path_t *path; 5064 DBG_DEFINE(category, 5065 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5066 5067 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5068 "rsm_sqcreateop_callback enter\n")); 5069 5070 /* look up the path - incr the path refcnt */ 5071 path = rsm_find_path(hdlr_argp->adapter_name, 5072 hdlr_argp->adapter_instance, src_hwaddr); 5073 5074 if (path == NULL) { 5075 DBG_PRINTF((category, RSM_DEBUG, 5076 "rsm_sqcreateop_callback done: no path\n")); 5077 return; 5078 } 5079 5080 if ((path->state == RSMKA_PATH_UP) && 5081 (path->flags & RSMKA_SQCREATE_PENDING)) { 5082 /* 5083 * previous attempt to create sendq had failed, retry 5084 * it and move to RSMKA_PATH_ACTIVE state if successful. 5085 * the refcnt will be decremented in the do_deferred_work 5086 */ 5087 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP); 5088 } else { 5089 /* decrement the refcnt */ 5090 PATH_RELE_NOLOCK(path); 5091 } 5092 mutex_exit(&path->mutex); 5093 5094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5095 "rsm_sqcreateop_callback done\n")); 5096 } 5097 5098 static void 5099 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5100 { 5101 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 5102 rsmipc_request_t *msg = (rsmipc_request_t *)data; 5103 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data; 5104 rsm_node_id_t src_node; 5105 DBG_DEFINE(category, 5106 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5107 5108 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:" 5109 "src=%d, type=%d\n", msghdr->rsmipc_src, 5110 msghdr->rsmipc_type)); 5111 5112 /* 5113 * Check for the version number in the msg header. If it is not 5114 * RSM_VERSION, drop the message. In the future, we need to manage 5115 * incompatible version numbers in some way 5116 */ 5117 if (msghdr->rsmipc_version != RSM_VERSION) { 5118 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n")); 5119 /* 5120 * Drop requests that don't have a reply right here 5121 * Request with reply will send a BAD_VERSION reply 5122 * when they get processed by the worker thread. 5123 */ 5124 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) { 5125 return; 5126 } 5127 5128 } 5129 5130 src_node = msghdr->rsmipc_src; 5131 5132 switch (msghdr->rsmipc_type) { 5133 case RSMIPC_MSG_SEGCONNECT: 5134 case RSMIPC_MSG_DISCONNECT: 5135 case RSMIPC_MSG_SUSPEND: 5136 case RSMIPC_MSG_SUSPEND_DONE: 5137 case RSMIPC_MSG_RESUME: 5138 /* 5139 * These message types are handled by a worker thread using 5140 * the flow-control algorithm. 5141 * Any message processing that does one or more of the 5142 * following should be handled in a worker thread. 5143 * - allocates resources and might sleep 5144 * - makes RSMPI calls down to the interconnect driver 5145 * this by defn include requests with reply. 5146 * - takes a long duration of time 5147 */ 5148 rsm_intr_callback_dispatch(data, src_hwaddr, arg); 5149 break; 5150 case RSMIPC_MSG_NOTIMPORTING: 5151 importer_list_rm(src_node, msg->rsmipc_key, 5152 msg->rsmipc_segment_cookie); 5153 break; 5154 case RSMIPC_MSG_SQREADY: 5155 rsm_proc_sqready(data, src_hwaddr, arg); 5156 break; 5157 case RSMIPC_MSG_SQREADY_ACK: 5158 rsm_proc_sqready_ack(data, src_hwaddr, arg); 5159 break; 5160 case RSMIPC_MSG_CREDIT: 5161 rsm_add_credits(ctrlmsg, src_hwaddr, arg); 5162 break; 5163 case RSMIPC_MSG_REPLY: 5164 rsm_intr_reply(msghdr); 5165 break; 5166 case RSMIPC_MSG_BELL: 5167 rsm_intr_event(msg); 5168 break; 5169 case RSMIPC_MSG_IMPORTING: 5170 importer_list_add(src_node, msg->rsmipc_key, 5171 msg->rsmipc_adapter_hwaddr, 5172 msg->rsmipc_segment_cookie); 5173 break; 5174 case RSMIPC_MSG_REPUBLISH: 5175 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm); 5176 break; 5177 default: 5178 DBG_PRINTF((category, RSM_DEBUG, 5179 "rsm_intr_callback: bad msg %lx type %d data %lx\n", 5180 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data)); 5181 } 5182 5183 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n")); 5184 5185 } 5186 5187 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 5188 rsm_intr_q_op_t opcode, rsm_addr_t src, 5189 void *data, size_t size, rsm_intr_hand_arg_t arg) 5190 { 5191 DBG_DEFINE(category, 5192 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5193 5194 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n")); 5195 5196 switch (opcode) { 5197 case RSM_INTR_Q_OP_CREATE: 5198 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n")); 5199 rsm_sqcreateop_callback(src, arg); 5200 break; 5201 case RSM_INTR_Q_OP_DESTROY: 5202 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n")); 5203 break; 5204 case RSM_INTR_Q_OP_RECEIVE: 5205 rsm_intr_callback(data, src, arg); 5206 break; 5207 default: 5208 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5209 "rsm_srv_func: unknown opcode = %x\n", opcode)); 5210 } 5211 5212 chd = chd; 5213 size = size; 5214 5215 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n")); 5216 5217 return (RSM_INTR_HAND_CLAIMED); 5218 } 5219 5220 /* *************************** IPC slots ************************* */ 5221 static rsmipc_slot_t * 5222 rsmipc_alloc() 5223 { 5224 int i; 5225 rsmipc_slot_t *slot; 5226 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5227 5228 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n")); 5229 5230 /* try to find a free slot, if not wait */ 5231 mutex_enter(&rsm_ipc.lock); 5232 5233 while (rsm_ipc.count == 0) { 5234 rsm_ipc.wanted = 1; 5235 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock); 5236 } 5237 5238 /* An empty slot is available, find it */ 5239 slot = &rsm_ipc.slots[0]; 5240 for (i = 0; i < RSMIPC_SZ; i++, slot++) { 5241 if (RSMIPC_GET(slot, RSMIPC_FREE)) { 5242 RSMIPC_CLEAR(slot, RSMIPC_FREE); 5243 break; 5244 } 5245 } 5246 5247 ASSERT(i < RSMIPC_SZ); 5248 rsm_ipc.count--; /* one less is available */ 5249 rsm_ipc.sequence++; /* new sequence */ 5250 5251 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence; 5252 slot->rsmipc_cookie.ic.index = (uint_t)i; 5253 5254 mutex_exit(&rsm_ipc.lock); 5255 5256 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n")); 5257 5258 return (slot); 5259 } 5260 5261 static void 5262 rsmipc_free(rsmipc_slot_t *slot) 5263 { 5264 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5265 5266 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n")); 5267 5268 ASSERT(MUTEX_HELD(&slot->rsmipc_lock)); 5269 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot); 5270 5271 mutex_enter(&rsm_ipc.lock); 5272 5273 RSMIPC_SET(slot, RSMIPC_FREE); 5274 5275 slot->rsmipc_cookie.ic.sequence = 0; 5276 5277 mutex_exit(&slot->rsmipc_lock); 5278 rsm_ipc.count++; 5279 ASSERT(rsm_ipc.count <= RSMIPC_SZ); 5280 if (rsm_ipc.wanted) { 5281 rsm_ipc.wanted = 0; 5282 cv_broadcast(&rsm_ipc.cv); 5283 } 5284 5285 mutex_exit(&rsm_ipc.lock); 5286 5287 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n")); 5288 } 5289 5290 static int 5291 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply) 5292 { 5293 int e = 0; 5294 int credit_check = 0; 5295 int retry_cnt = 0; 5296 int min_retry_cnt = 10; 5297 rsm_send_t is; 5298 rsmipc_slot_t *rslot; 5299 adapter_t *adapter; 5300 path_t *path; 5301 sendq_token_t *sendq_token; 5302 sendq_token_t *used_sendq_token = NULL; 5303 rsm_send_q_handle_t ipc_handle; 5304 DBG_DEFINE(category, 5305 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5306 5307 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d", 5308 dest)); 5309 5310 /* 5311 * Check if this is a local case 5312 */ 5313 if (dest == my_nodeid) { 5314 switch (req->rsmipc_hdr.rsmipc_type) { 5315 case RSMIPC_MSG_SEGCONNECT: 5316 reply->rsmipc_status = (short)rsmsegacl_validate( 5317 req, dest, reply); 5318 break; 5319 case RSMIPC_MSG_BELL: 5320 req->rsmipc_hdr.rsmipc_src = dest; 5321 rsm_intr_event(req); 5322 break; 5323 case RSMIPC_MSG_IMPORTING: 5324 importer_list_add(dest, req->rsmipc_key, 5325 req->rsmipc_adapter_hwaddr, 5326 req->rsmipc_segment_cookie); 5327 break; 5328 case RSMIPC_MSG_NOTIMPORTING: 5329 importer_list_rm(dest, req->rsmipc_key, 5330 req->rsmipc_segment_cookie); 5331 break; 5332 case RSMIPC_MSG_REPUBLISH: 5333 importer_update(dest, req->rsmipc_key, 5334 req->rsmipc_perm); 5335 break; 5336 case RSMIPC_MSG_SUSPEND: 5337 importer_suspend(dest); 5338 break; 5339 case RSMIPC_MSG_SUSPEND_DONE: 5340 rsm_suspend_complete(dest, 0); 5341 break; 5342 case RSMIPC_MSG_RESUME: 5343 importer_resume(dest); 5344 break; 5345 default: 5346 ASSERT(0); 5347 } 5348 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5349 "rsmipc_send done\n")); 5350 return (0); 5351 } 5352 5353 if (dest >= MAX_NODES) { 5354 DBG_PRINTF((category, RSM_ERR, 5355 "rsm: rsmipc_send bad node number %x\n", dest)); 5356 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5357 } 5358 5359 /* 5360 * Oh boy! we are going remote. 5361 */ 5362 5363 /* 5364 * identify if we need to have credits to send this message 5365 * - only selected requests are flow controlled 5366 */ 5367 if (req != NULL) { 5368 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5369 "rsmipc_send:request type=%d\n", 5370 req->rsmipc_hdr.rsmipc_type)); 5371 5372 switch (req->rsmipc_hdr.rsmipc_type) { 5373 case RSMIPC_MSG_SEGCONNECT: 5374 case RSMIPC_MSG_DISCONNECT: 5375 case RSMIPC_MSG_IMPORTING: 5376 case RSMIPC_MSG_SUSPEND: 5377 case RSMIPC_MSG_SUSPEND_DONE: 5378 case RSMIPC_MSG_RESUME: 5379 credit_check = 1; 5380 break; 5381 default: 5382 credit_check = 0; 5383 } 5384 } 5385 5386 again: 5387 if (retry_cnt++ == min_retry_cnt) { 5388 /* backoff before further retries for 10ms */ 5389 delay(drv_usectohz(10000)); 5390 retry_cnt = 0; /* reset retry_cnt */ 5391 } 5392 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token); 5393 if (sendq_token == NULL) { 5394 DBG_PRINTF((category, RSM_ERR, 5395 "rsm: rsmipc_send no device to reach node %d\n", dest)); 5396 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5397 } 5398 5399 if ((sendq_token == used_sendq_token) && 5400 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) || 5401 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) { 5402 rele_sendq_token(sendq_token); 5403 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e)); 5404 return (RSMERR_CONN_ABORTED); 5405 } else 5406 used_sendq_token = sendq_token; 5407 5408 /* lint -save -e413 */ 5409 path = SQ_TOKEN_TO_PATH(sendq_token); 5410 adapter = path->local_adapter; 5411 /* lint -restore */ 5412 ipc_handle = sendq_token->rsmpi_sendq_handle; 5413 5414 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5415 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle)); 5416 5417 if (reply == NULL) { 5418 /* Send request without ack */ 5419 /* 5420 * Set the rsmipc_version number in the msghdr for KA 5421 * communication versioning 5422 */ 5423 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5424 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5425 /* 5426 * remote endpoints incn should match the value in our 5427 * path's remote_incn field. No need to grab any lock 5428 * since we have refcnted the path in rsmka_get_sendq_token 5429 */ 5430 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5431 5432 is.is_data = (void *)req; 5433 is.is_size = sizeof (*req); 5434 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5435 is.is_wait = 0; 5436 5437 if (credit_check) { 5438 mutex_enter(&path->mutex); 5439 /* 5440 * wait till we recv credits or path goes down. If path 5441 * goes down rsm_send will fail and we handle the error 5442 * then 5443 */ 5444 while ((sendq_token->msgbuf_avail == 0) && 5445 (path->state == RSMKA_PATH_ACTIVE)) { 5446 e = cv_wait_sig(&sendq_token->sendq_cv, 5447 &path->mutex); 5448 if (e == 0) { 5449 mutex_exit(&path->mutex); 5450 no_reply_cnt++; 5451 rele_sendq_token(sendq_token); 5452 DBG_PRINTF((category, RSM_DEBUG, 5453 "rsmipc_send done: " 5454 "cv_wait INTERRUPTED")); 5455 return (RSMERR_INTERRUPTED); 5456 } 5457 } 5458 5459 /* 5460 * path is not active retry on another path. 5461 */ 5462 if (path->state != RSMKA_PATH_ACTIVE) { 5463 mutex_exit(&path->mutex); 5464 rele_sendq_token(sendq_token); 5465 e = RSMERR_CONN_ABORTED; 5466 DBG_PRINTF((category, RSM_ERR, 5467 "rsm: rsmipc_send: path !ACTIVE")); 5468 goto again; 5469 } 5470 5471 ASSERT(sendq_token->msgbuf_avail > 0); 5472 5473 /* 5474 * reserve a msgbuf 5475 */ 5476 sendq_token->msgbuf_avail--; 5477 5478 mutex_exit(&path->mutex); 5479 5480 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5481 NULL); 5482 5483 if (e != RSM_SUCCESS) { 5484 mutex_enter(&path->mutex); 5485 /* 5486 * release the reserved msgbuf since 5487 * the send failed 5488 */ 5489 sendq_token->msgbuf_avail++; 5490 cv_broadcast(&sendq_token->sendq_cv); 5491 mutex_exit(&path->mutex); 5492 } 5493 } else 5494 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5495 NULL); 5496 5497 no_reply_cnt++; 5498 rele_sendq_token(sendq_token); 5499 if (e != RSM_SUCCESS) { 5500 DBG_PRINTF((category, RSM_ERR, 5501 "rsm: rsmipc_send no reply send" 5502 " err = %d no reply count = %d\n", 5503 e, no_reply_cnt)); 5504 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5505 e != RSMERR_BAD_BARRIER_HNDL); 5506 atomic_inc_64(&rsm_ipcsend_errcnt); 5507 goto again; 5508 } else { 5509 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5510 "rsmipc_send done\n")); 5511 return (e); 5512 } 5513 5514 } 5515 5516 if (req == NULL) { 5517 /* Send reply - No flow control is done for reply */ 5518 /* 5519 * Set the version in the msg header for KA communication 5520 * versioning 5521 */ 5522 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5523 reply->rsmipc_hdr.rsmipc_src = my_nodeid; 5524 /* incn number is not used for reply msgs currently */ 5525 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5526 5527 is.is_data = (void *)reply; 5528 is.is_size = sizeof (*reply); 5529 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5530 is.is_wait = 0; 5531 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5532 rele_sendq_token(sendq_token); 5533 if (e != RSM_SUCCESS) { 5534 DBG_PRINTF((category, RSM_ERR, 5535 "rsm: rsmipc_send reply send" 5536 " err = %d\n", e)); 5537 atomic_inc_64(&rsm_ipcsend_errcnt); 5538 goto again; 5539 } else { 5540 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5541 "rsmipc_send done\n")); 5542 return (e); 5543 } 5544 } 5545 5546 /* Reply needed */ 5547 rslot = rsmipc_alloc(); /* allocate a new ipc slot */ 5548 5549 mutex_enter(&rslot->rsmipc_lock); 5550 5551 rslot->rsmipc_data = (void *)reply; 5552 RSMIPC_SET(rslot, RSMIPC_PENDING); 5553 5554 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) { 5555 /* 5556 * Set the rsmipc_version number in the msghdr for KA 5557 * communication versioning 5558 */ 5559 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5560 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5561 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie; 5562 /* 5563 * remote endpoints incn should match the value in our 5564 * path's remote_incn field. No need to grab any lock 5565 * since we have refcnted the path in rsmka_get_sendq_token 5566 */ 5567 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5568 5569 is.is_data = (void *)req; 5570 is.is_size = sizeof (*req); 5571 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5572 is.is_wait = 0; 5573 if (credit_check) { 5574 5575 mutex_enter(&path->mutex); 5576 /* 5577 * wait till we recv credits or path goes down. If path 5578 * goes down rsm_send will fail and we handle the error 5579 * then. 5580 */ 5581 while ((sendq_token->msgbuf_avail == 0) && 5582 (path->state == RSMKA_PATH_ACTIVE)) { 5583 e = cv_wait_sig(&sendq_token->sendq_cv, 5584 &path->mutex); 5585 if (e == 0) { 5586 mutex_exit(&path->mutex); 5587 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5588 rsmipc_free(rslot); 5589 rele_sendq_token(sendq_token); 5590 DBG_PRINTF((category, RSM_DEBUG, 5591 "rsmipc_send done: " 5592 "cv_wait INTERRUPTED")); 5593 return (RSMERR_INTERRUPTED); 5594 } 5595 } 5596 5597 /* 5598 * path is not active retry on another path. 5599 */ 5600 if (path->state != RSMKA_PATH_ACTIVE) { 5601 mutex_exit(&path->mutex); 5602 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5603 rsmipc_free(rslot); 5604 rele_sendq_token(sendq_token); 5605 e = RSMERR_CONN_ABORTED; 5606 DBG_PRINTF((category, RSM_ERR, 5607 "rsm: rsmipc_send: path !ACTIVE")); 5608 goto again; 5609 } 5610 5611 ASSERT(sendq_token->msgbuf_avail > 0); 5612 5613 /* 5614 * reserve a msgbuf 5615 */ 5616 sendq_token->msgbuf_avail--; 5617 5618 mutex_exit(&path->mutex); 5619 5620 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5621 NULL); 5622 5623 if (e != RSM_SUCCESS) { 5624 mutex_enter(&path->mutex); 5625 /* 5626 * release the reserved msgbuf since 5627 * the send failed 5628 */ 5629 sendq_token->msgbuf_avail++; 5630 cv_broadcast(&sendq_token->sendq_cv); 5631 mutex_exit(&path->mutex); 5632 } 5633 } else 5634 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5635 NULL); 5636 5637 if (e != RSM_SUCCESS) { 5638 DBG_PRINTF((category, RSM_ERR, 5639 "rsm: rsmipc_send rsmpi send err = %d\n", e)); 5640 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5641 rsmipc_free(rslot); 5642 rele_sendq_token(sendq_token); 5643 atomic_inc_64(&rsm_ipcsend_errcnt); 5644 goto again; 5645 } 5646 5647 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */ 5648 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock, 5649 drv_usectohz(5000000), TR_CLOCK_TICK); 5650 if (e < 0) { 5651 /* timed out - retry */ 5652 e = RSMERR_TIMEOUT; 5653 } else if (e == 0) { 5654 /* signalled - return error */ 5655 e = RSMERR_INTERRUPTED; 5656 break; 5657 } else { 5658 e = RSM_SUCCESS; 5659 } 5660 } 5661 5662 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5663 rsmipc_free(rslot); 5664 rele_sendq_token(sendq_token); 5665 5666 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e)); 5667 return (e); 5668 } 5669 5670 static int 5671 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie) 5672 { 5673 rsmipc_request_t request; 5674 5675 /* 5676 * inform the exporter to delete this importer 5677 */ 5678 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 5679 request.rsmipc_key = segid; 5680 request.rsmipc_segment_cookie = cookie; 5681 return (rsmipc_send(dest, &request, RSM_NO_REPLY)); 5682 } 5683 5684 static void 5685 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl, 5686 int acl_len, rsm_permission_t default_permission) 5687 { 5688 int i; 5689 importing_token_t *token; 5690 rsmipc_request_t request; 5691 republish_token_t *republish_list = NULL; 5692 republish_token_t *rp; 5693 rsm_permission_t permission; 5694 int index; 5695 5696 /* 5697 * send the new access mode to all the nodes that have imported 5698 * this segment. 5699 * If the new acl does not have a node that was present in 5700 * the old acl a access permission of 0 is sent. 5701 */ 5702 5703 index = rsmhash(segid); 5704 5705 /* 5706 * create a list of node/permissions to send the republish message 5707 */ 5708 mutex_enter(&importer_list.lock); 5709 5710 token = importer_list.bucket[index]; 5711 while (token != NULL) { 5712 if (segid == token->key) { 5713 permission = default_permission; 5714 5715 for (i = 0; i < acl_len; i++) { 5716 if (token->importing_node == acl[i].ae_node) { 5717 permission = acl[i].ae_permission; 5718 break; 5719 } 5720 } 5721 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP); 5722 5723 rp->key = segid; 5724 rp->importing_node = token->importing_node; 5725 rp->permission = permission; 5726 rp->next = republish_list; 5727 republish_list = rp; 5728 } 5729 token = token->next; 5730 } 5731 5732 mutex_exit(&importer_list.lock); 5733 5734 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH; 5735 request.rsmipc_key = segid; 5736 5737 while (republish_list != NULL) { 5738 request.rsmipc_perm = republish_list->permission; 5739 (void) rsmipc_send(republish_list->importing_node, 5740 &request, RSM_NO_REPLY); 5741 rp = republish_list; 5742 republish_list = republish_list->next; 5743 kmem_free(rp, sizeof (republish_token_t)); 5744 } 5745 } 5746 5747 static void 5748 rsm_send_suspend() 5749 { 5750 int i, e; 5751 rsmipc_request_t request; 5752 list_element_t *tokp; 5753 list_element_t *head = NULL; 5754 importing_token_t *token; 5755 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5756 "rsm_send_suspend enter\n")); 5757 5758 /* 5759 * create a list of node to send the suspend message 5760 * 5761 * Currently the whole importer list is scanned and we obtain 5762 * all the nodes - this basically gets all nodes that at least 5763 * import one segment from the local node. 5764 * 5765 * no need to grab the rsm_suspend_list lock here since we are 5766 * single threaded when suspend is called. 5767 */ 5768 5769 mutex_enter(&importer_list.lock); 5770 for (i = 0; i < rsm_hash_size; i++) { 5771 5772 token = importer_list.bucket[i]; 5773 5774 while (token != NULL) { 5775 5776 tokp = head; 5777 5778 /* 5779 * make sure that the token's node 5780 * is not already on the suspend list 5781 */ 5782 while (tokp != NULL) { 5783 if (tokp->nodeid == token->importing_node) { 5784 break; 5785 } 5786 tokp = tokp->next; 5787 } 5788 5789 if (tokp == NULL) { /* not in suspend list */ 5790 tokp = kmem_zalloc(sizeof (list_element_t), 5791 KM_SLEEP); 5792 tokp->nodeid = token->importing_node; 5793 tokp->next = head; 5794 head = tokp; 5795 } 5796 5797 token = token->next; 5798 } 5799 } 5800 mutex_exit(&importer_list.lock); 5801 5802 if (head == NULL) { /* no importers so go ahead and quiesce segments */ 5803 exporter_quiesce(); 5804 return; 5805 } 5806 5807 mutex_enter(&rsm_suspend_list.list_lock); 5808 ASSERT(rsm_suspend_list.list_head == NULL); 5809 /* 5810 * update the suspend list righaway so that if a node dies the 5811 * pathmanager can set the NODE dead flag 5812 */ 5813 rsm_suspend_list.list_head = head; 5814 mutex_exit(&rsm_suspend_list.list_lock); 5815 5816 tokp = head; 5817 5818 while (tokp != NULL) { 5819 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND; 5820 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY); 5821 /* 5822 * Error in rsmipc_send currently happens due to inaccessibility 5823 * of the remote node. 5824 */ 5825 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */ 5826 tokp->flags |= RSM_SUSPEND_ACKPENDING; 5827 } 5828 5829 tokp = tokp->next; 5830 } 5831 5832 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5833 "rsm_send_suspend done\n")); 5834 5835 } 5836 5837 static void 5838 rsm_send_resume() 5839 { 5840 rsmipc_request_t request; 5841 list_element_t *elem, *head; 5842 5843 /* 5844 * save the suspend list so that we know where to send 5845 * the resume messages and make the suspend list head 5846 * NULL. 5847 */ 5848 mutex_enter(&rsm_suspend_list.list_lock); 5849 head = rsm_suspend_list.list_head; 5850 rsm_suspend_list.list_head = NULL; 5851 mutex_exit(&rsm_suspend_list.list_lock); 5852 5853 while (head != NULL) { 5854 elem = head; 5855 head = head->next; 5856 5857 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME; 5858 5859 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY); 5860 5861 kmem_free((void *)elem, sizeof (list_element_t)); 5862 5863 } 5864 5865 } 5866 5867 /* 5868 * This function takes path and sends a message using the sendq 5869 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK 5870 * and RSMIPC_MSG_CREDIT are sent using this function. 5871 */ 5872 int 5873 rsmipc_send_controlmsg(path_t *path, int msgtype) 5874 { 5875 int e; 5876 int retry_cnt = 0; 5877 int min_retry_cnt = 10; 5878 adapter_t *adapter; 5879 rsm_send_t is; 5880 rsm_send_q_handle_t ipc_handle; 5881 rsmipc_controlmsg_t msg; 5882 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL); 5883 5884 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5885 "rsmipc_send_controlmsg enter\n")); 5886 5887 ASSERT(MUTEX_HELD(&path->mutex)); 5888 5889 adapter = path->local_adapter; 5890 5891 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx " 5892 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype, 5893 my_nodeid, adapter->hwaddr, path->remote_node, 5894 path->remote_hwaddr, path->procmsg_cnt)); 5895 5896 if (path->state != RSMKA_PATH_ACTIVE) { 5897 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5898 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE")); 5899 return (1); 5900 } 5901 5902 ipc_handle = path->sendq_token.rsmpi_sendq_handle; 5903 5904 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION; 5905 msg.rsmipc_hdr.rsmipc_src = my_nodeid; 5906 msg.rsmipc_hdr.rsmipc_type = msgtype; 5907 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn; 5908 5909 if (msgtype == RSMIPC_MSG_CREDIT) 5910 msg.rsmipc_credits = path->procmsg_cnt; 5911 5912 msg.rsmipc_local_incn = path->local_incn; 5913 5914 msg.rsmipc_adapter_hwaddr = adapter->hwaddr; 5915 /* incr the sendq, path refcnt */ 5916 PATH_HOLD_NOLOCK(path); 5917 SENDQ_TOKEN_HOLD(path); 5918 5919 do { 5920 /* drop the path lock before doing the rsm_send */ 5921 mutex_exit(&path->mutex); 5922 5923 is.is_data = (void *)&msg; 5924 is.is_size = sizeof (msg); 5925 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5926 is.is_wait = 0; 5927 5928 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5929 5930 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5931 e != RSMERR_BAD_BARRIER_HNDL); 5932 5933 mutex_enter(&path->mutex); 5934 5935 if (e == RSM_SUCCESS) { 5936 break; 5937 } 5938 /* error counter for statistics */ 5939 atomic_inc_64(&rsm_ctrlmsg_errcnt); 5940 5941 DBG_PRINTF((category, RSM_ERR, 5942 "rsmipc_send_controlmsg:rsm_send error=%d", e)); 5943 5944 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */ 5945 (void) cv_reltimedwait(&path->sendq_token.sendq_cv, 5946 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK); 5947 retry_cnt = 0; 5948 } 5949 } while (path->state == RSMKA_PATH_ACTIVE); 5950 5951 /* decrement the sendq,path refcnt that we incr before rsm_send */ 5952 SENDQ_TOKEN_RELE(path); 5953 PATH_RELE_NOLOCK(path); 5954 5955 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5956 "rsmipc_send_controlmsg done=%d", e)); 5957 return (e); 5958 } 5959 5960 /* 5961 * Called from rsm_force_unload and path_importer_disconnect. The memory 5962 * mapping for the imported segment is removed and the segment is 5963 * disconnected at the interconnect layer if disconnect_flag is TRUE. 5964 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback 5965 * and FALSE from rsm_rebind. 5966 * 5967 * When subsequent accesses cause page faulting, the dummy page is mapped 5968 * to resolve the fault, and the mapping generation number is incremented 5969 * so that the application can be notified on a close barrier operation. 5970 * 5971 * It is important to note that the caller of rsmseg_unload is responsible for 5972 * acquiring the segment lock before making a call to rsmseg_unload. This is 5973 * required to make the caller and rsmseg_unload thread safe. The segment lock 5974 * will be released by the rsmseg_unload function. 5975 */ 5976 void 5977 rsmseg_unload(rsmseg_t *im_seg) 5978 { 5979 rsmcookie_t *hdl; 5980 void *shared_cookie; 5981 rsmipc_request_t request; 5982 uint_t maxprot; 5983 5984 DBG_DEFINE(category, 5985 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5986 5987 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n")); 5988 5989 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 5990 5991 /* wait until segment leaves the mapping state */ 5992 while (im_seg->s_state == RSM_STATE_MAPPING) 5993 cv_wait(&im_seg->s_cv, &im_seg->s_lock); 5994 /* 5995 * An unload is only necessary if the segment is connected. However, 5996 * if the segment was on the import list in state RSM_STATE_CONNECTING 5997 * then a connection was in progress. Change to RSM_STATE_NEW 5998 * here to cause an early exit from the connection process. 5999 */ 6000 if (im_seg->s_state == RSM_STATE_NEW) { 6001 rsmseglock_release(im_seg); 6002 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6003 "rsmseg_unload done: RSM_STATE_NEW\n")); 6004 return; 6005 } else if (im_seg->s_state == RSM_STATE_CONNECTING) { 6006 im_seg->s_state = RSM_STATE_ABORT_CONNECT; 6007 rsmsharelock_acquire(im_seg); 6008 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 6009 rsmsharelock_release(im_seg); 6010 rsmseglock_release(im_seg); 6011 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6012 "rsmseg_unload done: RSM_STATE_CONNECTING\n")); 6013 return; 6014 } 6015 6016 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) { 6017 if (im_seg->s_ckl != NULL) { 6018 int e; 6019 /* Setup protections for remap */ 6020 maxprot = PROT_USER; 6021 if (im_seg->s_mode & RSM_PERM_READ) { 6022 maxprot |= PROT_READ; 6023 } 6024 if (im_seg->s_mode & RSM_PERM_WRITE) { 6025 maxprot |= PROT_WRITE; 6026 } 6027 hdl = im_seg->s_ckl; 6028 for (; hdl != NULL; hdl = hdl->c_next) { 6029 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 6030 remap_cookie, 6031 hdl->c_off, hdl->c_len, 6032 maxprot, 0, NULL); 6033 6034 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6035 "remap returns %d\n", e)); 6036 } 6037 } 6038 6039 (void) rsm_closeconnection(im_seg, &shared_cookie); 6040 6041 if (shared_cookie != NULL) { 6042 /* 6043 * inform the exporting node so this import 6044 * can be deleted from the list of importers. 6045 */ 6046 request.rsmipc_hdr.rsmipc_type = 6047 RSMIPC_MSG_NOTIMPORTING; 6048 request.rsmipc_key = im_seg->s_segid; 6049 request.rsmipc_segment_cookie = shared_cookie; 6050 rsmseglock_release(im_seg); 6051 (void) rsmipc_send(im_seg->s_node, &request, 6052 RSM_NO_REPLY); 6053 } else { 6054 rsmseglock_release(im_seg); 6055 } 6056 } 6057 else 6058 rsmseglock_release(im_seg); 6059 6060 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n")); 6061 6062 } 6063 6064 /* ****************************** Importer Calls ************************ */ 6065 6066 static int 6067 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr) 6068 { 6069 int shifts = 0; 6070 6071 if (crgetuid(cr) != owner) { 6072 shifts += 3; 6073 if (!groupmember(group, cr)) 6074 shifts += 3; 6075 } 6076 6077 mode &= ~(perm << shifts); 6078 6079 if (mode == 0) 6080 return (0); 6081 6082 return (secpolicy_rsm_access(cr, owner, mode)); 6083 } 6084 6085 6086 static int 6087 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred, 6088 intptr_t dataptr, int mode) 6089 { 6090 int e; 6091 int recheck_state = 0; 6092 void *shared_cookie; 6093 rsmipc_request_t request; 6094 rsmipc_reply_t reply; 6095 rsm_permission_t access; 6096 adapter_t *adapter; 6097 rsm_addr_t addr = 0; 6098 rsm_import_share_t *sharedp; 6099 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6100 6101 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n")); 6102 6103 adapter = rsm_getadapter(msg, mode); 6104 if (adapter == NULL) { 6105 DBG_PRINTF((category, RSM_ERR, 6106 "rsm_connect done:ENODEV adapter=NULL\n")); 6107 return (RSMERR_CTLR_NOT_PRESENT); 6108 } 6109 6110 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) { 6111 rsmka_release_adapter(adapter); 6112 DBG_PRINTF((category, RSM_ERR, 6113 "rsm_connect done:ENODEV loopback\n")); 6114 return (RSMERR_CTLR_NOT_PRESENT); 6115 } 6116 6117 6118 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6119 ASSERT(seg->s_state == RSM_STATE_NEW); 6120 6121 /* 6122 * Translate perm to access 6123 */ 6124 if (msg->perm & ~RSM_PERM_RDWR) { 6125 rsmka_release_adapter(adapter); 6126 DBG_PRINTF((category, RSM_ERR, 6127 "rsm_connect done:EINVAL invalid perms\n")); 6128 return (RSMERR_BAD_PERMS); 6129 } 6130 access = 0; 6131 if (msg->perm & RSM_PERM_READ) 6132 access |= RSM_ACCESS_READ; 6133 if (msg->perm & RSM_PERM_WRITE) 6134 access |= RSM_ACCESS_WRITE; 6135 6136 seg->s_node = msg->nodeid; 6137 6138 /* 6139 * Adding to the import list locks the segment; release the segment 6140 * lock so we can get the reply for the send. 6141 */ 6142 e = rsmimport_add(seg, msg->key); 6143 if (e) { 6144 rsmka_release_adapter(adapter); 6145 DBG_PRINTF((category, RSM_ERR, 6146 "rsm_connect done:rsmimport_add failed %d\n", e)); 6147 return (e); 6148 } 6149 seg->s_state = RSM_STATE_CONNECTING; 6150 6151 /* 6152 * Set the s_adapter field here so as to have a valid comparison of 6153 * the adapter and the s_adapter value during rsmshare_get. For 6154 * any error, set s_adapter to NULL before doing a release_adapter 6155 */ 6156 seg->s_adapter = adapter; 6157 6158 rsmseglock_release(seg); 6159 6160 /* 6161 * get the pointer to the shared data structure; the 6162 * shared data is locked and refcount has been incremented 6163 */ 6164 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg); 6165 6166 ASSERT(rsmsharelock_held(seg)); 6167 6168 do { 6169 /* flag indicates whether we need to recheck the state */ 6170 recheck_state = 0; 6171 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6172 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 6173 switch (sharedp->rsmsi_state) { 6174 case RSMSI_STATE_NEW: 6175 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6176 break; 6177 case RSMSI_STATE_CONNECTING: 6178 /* FALLTHRU */ 6179 case RSMSI_STATE_CONN_QUIESCE: 6180 /* FALLTHRU */ 6181 case RSMSI_STATE_MAP_QUIESCE: 6182 /* wait for the state to change */ 6183 while ((sharedp->rsmsi_state == 6184 RSMSI_STATE_CONNECTING) || 6185 (sharedp->rsmsi_state == 6186 RSMSI_STATE_CONN_QUIESCE) || 6187 (sharedp->rsmsi_state == 6188 RSMSI_STATE_MAP_QUIESCE)) { 6189 if (cv_wait_sig(&sharedp->rsmsi_cv, 6190 &sharedp->rsmsi_lock) == 0) { 6191 /* signalled - clean up and return */ 6192 rsmsharelock_release(seg); 6193 rsmimport_rm(seg); 6194 seg->s_adapter = NULL; 6195 rsmka_release_adapter(adapter); 6196 seg->s_state = RSM_STATE_NEW; 6197 DBG_PRINTF((category, RSM_ERR, 6198 "rsm_connect done: INTERRUPTED\n")); 6199 return (RSMERR_INTERRUPTED); 6200 } 6201 } 6202 /* 6203 * the state changed, loop back and check what it is 6204 */ 6205 recheck_state = 1; 6206 break; 6207 case RSMSI_STATE_ABORT_CONNECT: 6208 /* exit the loop and clean up further down */ 6209 break; 6210 case RSMSI_STATE_CONNECTED: 6211 /* already connected, good - fall through */ 6212 case RSMSI_STATE_MAPPED: 6213 /* already mapped, wow - fall through */ 6214 /* access validation etc is done further down */ 6215 break; 6216 case RSMSI_STATE_DISCONNECTED: 6217 /* disconnected - so reconnect now */ 6218 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6219 break; 6220 default: 6221 ASSERT(0); /* Invalid State */ 6222 } 6223 } while (recheck_state); 6224 6225 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6226 /* we are the first to connect */ 6227 rsmsharelock_release(seg); 6228 6229 if (msg->nodeid != my_nodeid) { 6230 addr = get_remote_hwaddr(adapter, msg->nodeid); 6231 6232 if ((int64_t)addr < 0) { 6233 rsmsharelock_acquire(seg); 6234 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6235 RSMSI_STATE_NEW); 6236 rsmsharelock_release(seg); 6237 rsmimport_rm(seg); 6238 seg->s_adapter = NULL; 6239 rsmka_release_adapter(adapter); 6240 seg->s_state = RSM_STATE_NEW; 6241 DBG_PRINTF((category, RSM_ERR, 6242 "rsm_connect done: hwaddr<0\n")); 6243 return (RSMERR_INTERNAL_ERROR); 6244 } 6245 } else { 6246 addr = adapter->hwaddr; 6247 } 6248 6249 /* 6250 * send request to node [src, dest, key, msgid] and get back 6251 * [status, msgid, cookie] 6252 */ 6253 request.rsmipc_key = msg->key; 6254 /* 6255 * we need the s_mode of the exporter so pass 6256 * RSM_ACCESS_TRUSTED 6257 */ 6258 request.rsmipc_perm = RSM_ACCESS_TRUSTED; 6259 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT; 6260 request.rsmipc_adapter_hwaddr = addr; 6261 request.rsmipc_segment_cookie = sharedp; 6262 6263 e = (int)rsmipc_send(msg->nodeid, &request, &reply); 6264 if (e) { 6265 rsmsharelock_acquire(seg); 6266 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6267 RSMSI_STATE_NEW); 6268 rsmsharelock_release(seg); 6269 rsmimport_rm(seg); 6270 seg->s_adapter = NULL; 6271 rsmka_release_adapter(adapter); 6272 seg->s_state = RSM_STATE_NEW; 6273 DBG_PRINTF((category, RSM_ERR, 6274 "rsm_connect done:rsmipc_send failed %d\n", e)); 6275 return (e); 6276 } 6277 6278 if (reply.rsmipc_status != RSM_SUCCESS) { 6279 rsmsharelock_acquire(seg); 6280 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6281 RSMSI_STATE_NEW); 6282 rsmsharelock_release(seg); 6283 rsmimport_rm(seg); 6284 seg->s_adapter = NULL; 6285 rsmka_release_adapter(adapter); 6286 seg->s_state = RSM_STATE_NEW; 6287 DBG_PRINTF((category, RSM_ERR, 6288 "rsm_connect done:rsmipc_send reply err %d\n", 6289 reply.rsmipc_status)); 6290 return (reply.rsmipc_status); 6291 } 6292 6293 rsmsharelock_acquire(seg); 6294 /* store the information recvd into the shared data struct */ 6295 sharedp->rsmsi_mode = reply.rsmipc_mode; 6296 sharedp->rsmsi_uid = reply.rsmipc_uid; 6297 sharedp->rsmsi_gid = reply.rsmipc_gid; 6298 sharedp->rsmsi_seglen = reply.rsmipc_seglen; 6299 sharedp->rsmsi_cookie = sharedp; 6300 } 6301 6302 rsmsharelock_release(seg); 6303 6304 /* 6305 * Get the segment lock and check for a force disconnect 6306 * from the export side which would have changed the state 6307 * back to RSM_STATE_NEW. Once the segment lock is acquired a 6308 * force disconnect will be held off until the connection 6309 * has completed. 6310 */ 6311 rsmseglock_acquire(seg); 6312 rsmsharelock_acquire(seg); 6313 ASSERT(seg->s_state == RSM_STATE_CONNECTING || 6314 seg->s_state == RSM_STATE_ABORT_CONNECT); 6315 6316 shared_cookie = sharedp->rsmsi_cookie; 6317 6318 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) || 6319 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) { 6320 seg->s_state = RSM_STATE_NEW; 6321 seg->s_adapter = NULL; 6322 rsmsharelock_release(seg); 6323 rsmseglock_release(seg); 6324 rsmimport_rm(seg); 6325 rsmka_release_adapter(adapter); 6326 6327 rsmsharelock_acquire(seg); 6328 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) { 6329 /* 6330 * set a flag indicating abort handling has been 6331 * done 6332 */ 6333 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE; 6334 rsmsharelock_release(seg); 6335 /* send a message to exporter - only once */ 6336 (void) rsm_send_notimporting(msg->nodeid, 6337 msg->key, shared_cookie); 6338 rsmsharelock_acquire(seg); 6339 /* 6340 * wake up any waiting importers and inform that 6341 * connection has been aborted 6342 */ 6343 cv_broadcast(&sharedp->rsmsi_cv); 6344 } 6345 rsmsharelock_release(seg); 6346 6347 DBG_PRINTF((category, RSM_ERR, 6348 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n")); 6349 return (RSMERR_INTERRUPTED); 6350 } 6351 6352 6353 /* 6354 * We need to verify that this process has access 6355 */ 6356 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid, 6357 access & sharedp->rsmsi_mode, 6358 (int)(msg->perm & RSM_PERM_RDWR), cred); 6359 if (e) { 6360 rsmsharelock_release(seg); 6361 seg->s_state = RSM_STATE_NEW; 6362 seg->s_adapter = NULL; 6363 rsmseglock_release(seg); 6364 rsmimport_rm(seg); 6365 rsmka_release_adapter(adapter); 6366 /* 6367 * No need to lock segment it has been removed 6368 * from the hash table 6369 */ 6370 rsmsharelock_acquire(seg); 6371 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6372 rsmsharelock_release(seg); 6373 /* this is the first importer */ 6374 6375 (void) rsm_send_notimporting(msg->nodeid, msg->key, 6376 shared_cookie); 6377 rsmsharelock_acquire(seg); 6378 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6379 cv_broadcast(&sharedp->rsmsi_cv); 6380 } 6381 rsmsharelock_release(seg); 6382 6383 DBG_PRINTF((category, RSM_ERR, 6384 "rsm_connect done: ipcaccess failed\n")); 6385 return (RSMERR_PERM_DENIED); 6386 } 6387 6388 /* update state and cookie */ 6389 seg->s_segid = sharedp->rsmsi_segid; 6390 seg->s_len = sharedp->rsmsi_seglen; 6391 seg->s_mode = access & sharedp->rsmsi_mode; 6392 seg->s_pid = ddi_get_pid(); 6393 seg->s_mapinfo = NULL; 6394 6395 if (seg->s_node != my_nodeid) { 6396 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6397 e = adapter->rsmpi_ops->rsm_connect( 6398 adapter->rsmpi_handle, 6399 addr, seg->s_segid, &sharedp->rsmsi_handle); 6400 6401 if (e != RSM_SUCCESS) { 6402 seg->s_state = RSM_STATE_NEW; 6403 seg->s_adapter = NULL; 6404 rsmsharelock_release(seg); 6405 rsmseglock_release(seg); 6406 rsmimport_rm(seg); 6407 rsmka_release_adapter(adapter); 6408 /* 6409 * inform the exporter to delete this importer 6410 */ 6411 (void) rsm_send_notimporting(msg->nodeid, 6412 msg->key, shared_cookie); 6413 6414 /* 6415 * Now inform any waiting importers to 6416 * retry connect. This needs to be done 6417 * after sending notimporting so that 6418 * the notimporting is sent before a waiting 6419 * importer sends a segconnect while retrying 6420 * 6421 * No need to lock segment it has been removed 6422 * from the hash table 6423 */ 6424 6425 rsmsharelock_acquire(seg); 6426 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6427 cv_broadcast(&sharedp->rsmsi_cv); 6428 rsmsharelock_release(seg); 6429 6430 DBG_PRINTF((category, RSM_ERR, 6431 "rsm_connect error %d\n", e)); 6432 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR) 6433 return ( 6434 RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 6435 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) || 6436 (e == RSMERR_UNKNOWN_RSM_ADDR)) 6437 return (RSMERR_REMOTE_NODE_UNREACHABLE); 6438 else 6439 return (e); 6440 } 6441 6442 } 6443 seg->s_handle.in = sharedp->rsmsi_handle; 6444 6445 } 6446 6447 seg->s_state = RSM_STATE_CONNECT; 6448 6449 6450 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */ 6451 if (bar_va) { 6452 /* increment generation number on barrier page */ 6453 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num); 6454 /* return user off into barrier page where status will be */ 6455 msg->off = (int)seg->s_hdr.rsmrc_num; 6456 msg->gnum = bar_va[msg->off]; /* gnum race */ 6457 } else { 6458 msg->off = 0; 6459 msg->gnum = 0; /* gnum race */ 6460 } 6461 6462 msg->len = (int)sharedp->rsmsi_seglen; 6463 msg->rnum = seg->s_minor; 6464 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED); 6465 rsmsharelock_release(seg); 6466 rsmseglock_release(seg); 6467 6468 /* Return back to user the segment size & perm in case it's needed */ 6469 6470 #ifdef _MULTI_DATAMODEL 6471 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6472 rsm_ioctlmsg32_t msg32; 6473 6474 if (msg->len > UINT_MAX) 6475 msg32.len = RSM_MAXSZ_PAGE_ALIGNED; 6476 else 6477 msg32.len = msg->len; 6478 msg32.off = msg->off; 6479 msg32.perm = msg->perm; 6480 msg32.gnum = msg->gnum; 6481 msg32.rnum = msg->rnum; 6482 6483 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6484 "rsm_connect done\n")); 6485 6486 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr, 6487 sizeof (msg32), mode)) 6488 return (RSMERR_BAD_ADDR); 6489 else 6490 return (RSM_SUCCESS); 6491 } 6492 #endif 6493 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n")); 6494 6495 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg), 6496 mode)) 6497 return (RSMERR_BAD_ADDR); 6498 else 6499 return (RSM_SUCCESS); 6500 } 6501 6502 static int 6503 rsm_unmap(rsmseg_t *seg) 6504 { 6505 int err; 6506 adapter_t *adapter; 6507 rsm_import_share_t *sharedp; 6508 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6509 6510 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6511 "rsm_unmap enter %u\n", seg->s_segid)); 6512 6513 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6514 6515 /* assert seg is locked */ 6516 ASSERT(rsmseglock_held(seg)); 6517 ASSERT(seg->s_state != RSM_STATE_MAPPING); 6518 6519 if ((seg->s_state != RSM_STATE_ACTIVE) && 6520 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 6521 /* segment unmap has already been done */ 6522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6523 return (RSM_SUCCESS); 6524 } 6525 6526 sharedp = seg->s_share; 6527 6528 rsmsharelock_acquire(seg); 6529 6530 /* 6531 * - shared data struct is in MAPPED or MAP_QUIESCE state 6532 */ 6533 6534 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED || 6535 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 6536 6537 /* 6538 * Unmap pages - previously rsm_memseg_import_unmap was called only if 6539 * the segment cookie list was NULL; but it is always NULL when 6540 * called from rsmmap_unmap and won't be NULL when called for 6541 * a force disconnect - so the check for NULL cookie list was removed 6542 */ 6543 6544 ASSERT(sharedp->rsmsi_mapcnt > 0); 6545 6546 sharedp->rsmsi_mapcnt--; 6547 6548 if (sharedp->rsmsi_mapcnt == 0) { 6549 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) { 6550 /* unmap the shared RSMPI mapping */ 6551 adapter = seg->s_adapter; 6552 if (seg->s_node != my_nodeid) { 6553 ASSERT(sharedp->rsmsi_handle != NULL); 6554 err = adapter->rsmpi_ops-> 6555 rsm_unmap(sharedp->rsmsi_handle); 6556 DBG_PRINTF((category, RSM_DEBUG, 6557 "rsm_unmap: rsmpi unmap %d\n", err)); 6558 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 6559 sharedp->rsmsi_mapinfo = NULL; 6560 } 6561 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 6562 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */ 6563 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 6564 } 6565 } 6566 6567 rsmsharelock_release(seg); 6568 6569 /* 6570 * The s_cookie field is used to store the cookie returned from the 6571 * ddi_umem_lock when binding the pages for an export segment. This 6572 * is the primary use of the s_cookie field and does not normally 6573 * pertain to any importing segment except in the loopback case. 6574 * For the loopback case, the import segment and export segment are 6575 * on the same node, the s_cookie field of the segment structure for 6576 * the importer is initialized to the s_cookie field in the exported 6577 * segment during the map operation and is used during the call to 6578 * devmap_umem_setup for the import mapping. 6579 * Thus, during unmap, we simply need to set s_cookie to NULL to 6580 * indicate that the mapping no longer exists. 6581 */ 6582 seg->s_cookie = NULL; 6583 6584 seg->s_mapinfo = NULL; 6585 6586 if (seg->s_state == RSM_STATE_ACTIVE) 6587 seg->s_state = RSM_STATE_CONNECT; 6588 else 6589 seg->s_state = RSM_STATE_CONN_QUIESCE; 6590 6591 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6592 6593 return (RSM_SUCCESS); 6594 } 6595 6596 /* 6597 * cookie returned here if not null indicates that it is 6598 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING 6599 * message. 6600 */ 6601 static int 6602 rsm_closeconnection(rsmseg_t *seg, void **cookie) 6603 { 6604 int e; 6605 adapter_t *adapter; 6606 rsm_import_share_t *sharedp; 6607 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6608 6609 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6610 "rsm_closeconnection enter\n")); 6611 6612 *cookie = (void *)NULL; 6613 6614 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6615 6616 /* assert seg is locked */ 6617 ASSERT(rsmseglock_held(seg)); 6618 6619 if (seg->s_state == RSM_STATE_DISCONNECT) { 6620 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6621 "rsm_closeconnection done: already disconnected\n")); 6622 return (RSM_SUCCESS); 6623 } 6624 6625 /* wait for all putv/getv ops to get done */ 6626 while (seg->s_rdmacnt > 0) { 6627 cv_wait(&seg->s_cv, &seg->s_lock); 6628 } 6629 6630 (void) rsm_unmap(seg); 6631 6632 ASSERT(seg->s_state == RSM_STATE_CONNECT || 6633 seg->s_state == RSM_STATE_CONN_QUIESCE); 6634 6635 adapter = seg->s_adapter; 6636 sharedp = seg->s_share; 6637 6638 ASSERT(sharedp != NULL); 6639 6640 rsmsharelock_acquire(seg); 6641 6642 /* 6643 * Disconnect on adapter 6644 * 6645 * The current algorithm is stateless, I don't have to contact 6646 * server when I go away. It only gives me permissions. Of course, 6647 * the adapters will talk to terminate the connect. 6648 * 6649 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE 6650 */ 6651 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) && 6652 (sharedp->rsmsi_node != my_nodeid)) { 6653 6654 if (sharedp->rsmsi_refcnt == 1) { 6655 /* this is the last importer */ 6656 ASSERT(sharedp->rsmsi_mapcnt == 0); 6657 6658 e = adapter->rsmpi_ops-> 6659 rsm_disconnect(sharedp->rsmsi_handle); 6660 if (e != RSM_SUCCESS) { 6661 DBG_PRINTF((category, RSM_DEBUG, 6662 "rsm:disconnect failed seg=%x:err=%d\n", 6663 seg->s_key, e)); 6664 } 6665 } 6666 } 6667 6668 seg->s_handle.in = NULL; 6669 6670 sharedp->rsmsi_refcnt--; 6671 6672 if (sharedp->rsmsi_refcnt == 0) { 6673 *cookie = (void *)sharedp->rsmsi_cookie; 6674 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 6675 sharedp->rsmsi_handle = NULL; 6676 rsmsharelock_release(seg); 6677 6678 /* clean up the shared data structure */ 6679 mutex_destroy(&sharedp->rsmsi_lock); 6680 cv_destroy(&sharedp->rsmsi_cv); 6681 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t)); 6682 6683 } else { 6684 rsmsharelock_release(seg); 6685 } 6686 6687 /* increment generation number on barrier page */ 6688 if (bar_va) { 6689 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num); 6690 } 6691 6692 /* 6693 * The following needs to be done after any 6694 * rsmsharelock calls which use seg->s_share. 6695 */ 6696 seg->s_share = NULL; 6697 6698 seg->s_state = RSM_STATE_DISCONNECT; 6699 /* signal anyone waiting in the CONN_QUIESCE state */ 6700 cv_broadcast(&seg->s_cv); 6701 6702 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6703 "rsm_closeconnection done\n")); 6704 6705 return (RSM_SUCCESS); 6706 } 6707 6708 int 6709 rsm_disconnect(rsmseg_t *seg) 6710 { 6711 rsmipc_request_t request; 6712 void *shared_cookie; 6713 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6714 6715 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n")); 6716 6717 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6718 6719 /* assert seg isn't locked */ 6720 ASSERT(!rsmseglock_held(seg)); 6721 6722 6723 /* Remove segment from imported list */ 6724 rsmimport_rm(seg); 6725 6726 /* acquire the segment */ 6727 rsmseglock_acquire(seg); 6728 6729 /* wait until segment leaves the mapping state */ 6730 while (seg->s_state == RSM_STATE_MAPPING) 6731 cv_wait(&seg->s_cv, &seg->s_lock); 6732 6733 if (seg->s_state == RSM_STATE_DISCONNECT) { 6734 seg->s_state = RSM_STATE_NEW; 6735 rsmseglock_release(seg); 6736 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6737 "rsm_disconnect done: already disconnected\n")); 6738 return (RSM_SUCCESS); 6739 } 6740 6741 (void) rsm_closeconnection(seg, &shared_cookie); 6742 6743 /* update state */ 6744 seg->s_state = RSM_STATE_NEW; 6745 6746 if (shared_cookie != NULL) { 6747 /* 6748 * This is the last importer so inform the exporting node 6749 * so this import can be deleted from the list of importers. 6750 */ 6751 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 6752 request.rsmipc_key = seg->s_segid; 6753 request.rsmipc_segment_cookie = shared_cookie; 6754 rsmseglock_release(seg); 6755 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 6756 } else { 6757 rsmseglock_release(seg); 6758 } 6759 6760 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n")); 6761 6762 return (DDI_SUCCESS); 6763 } 6764 6765 /*ARGSUSED*/ 6766 static int 6767 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 6768 struct pollhead **phpp) 6769 { 6770 minor_t rnum; 6771 rsmresource_t *res; 6772 rsmseg_t *seg; 6773 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 6774 6775 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n")); 6776 6777 /* find minor, no lock */ 6778 rnum = getminor(dev); 6779 res = rsmresource_lookup(rnum, RSM_NOLOCK); 6780 6781 /* poll is supported only for export/import segments */ 6782 if ((res == NULL) || (res == RSMRC_RESERVED) || 6783 (res->rsmrc_type == RSM_RESOURCE_BAR)) { 6784 return (ENXIO); 6785 } 6786 6787 *reventsp = 0; 6788 6789 /* 6790 * An exported segment must be in state RSM_STATE_EXPORT; an 6791 * imported segment must be in state RSM_STATE_ACTIVE. 6792 */ 6793 seg = (rsmseg_t *)res; 6794 6795 if (seg->s_pollevent) { 6796 *reventsp = POLLRDNORM; 6797 } else if (!anyyet) { 6798 /* cannot take segment lock here */ 6799 *phpp = &seg->s_poll; 6800 seg->s_pollflag |= RSM_SEGMENT_POLL; 6801 } 6802 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n")); 6803 return (0); 6804 } 6805 6806 6807 6808 /* ************************* IOCTL Commands ********************* */ 6809 6810 static rsmseg_t * 6811 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp, 6812 rsm_resource_type_t type) 6813 { 6814 /* get segment from resource handle */ 6815 rsmseg_t *seg; 6816 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 6817 6818 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n")); 6819 6820 6821 if (res != RSMRC_RESERVED) { 6822 seg = (rsmseg_t *)res; 6823 } else { 6824 /* Allocate segment now and bind it */ 6825 seg = rsmseg_alloc(rnum, credp); 6826 6827 /* 6828 * if DR pre-processing is going on or DR is in progress 6829 * then the new export segments should be in the NEW_QSCD state 6830 */ 6831 if (type == RSM_RESOURCE_EXPORT_SEGMENT) { 6832 mutex_enter(&rsm_drv_data.drv_lock); 6833 if ((rsm_drv_data.drv_state == 6834 RSM_DRV_PREDEL_STARTED) || 6835 (rsm_drv_data.drv_state == 6836 RSM_DRV_PREDEL_COMPLETED) || 6837 (rsm_drv_data.drv_state == 6838 RSM_DRV_DR_IN_PROGRESS)) { 6839 seg->s_state = RSM_STATE_NEW_QUIESCED; 6840 } 6841 mutex_exit(&rsm_drv_data.drv_lock); 6842 } 6843 6844 rsmresource_insert(rnum, (rsmresource_t *)seg, type); 6845 } 6846 6847 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n")); 6848 6849 return (seg); 6850 } 6851 6852 static int 6853 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6854 int mode, cred_t *credp) 6855 { 6856 int error; 6857 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 6858 6859 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n")); 6860 6861 arg = arg; 6862 credp = credp; 6863 6864 ASSERT(seg != NULL); 6865 6866 switch (cmd) { 6867 case RSM_IOCTL_BIND: 6868 error = rsm_bind(seg, msg, arg, mode); 6869 break; 6870 case RSM_IOCTL_REBIND: 6871 error = rsm_rebind(seg, msg); 6872 break; 6873 case RSM_IOCTL_UNBIND: 6874 error = ENOTSUP; 6875 break; 6876 case RSM_IOCTL_PUBLISH: 6877 error = rsm_publish(seg, msg, arg, mode); 6878 break; 6879 case RSM_IOCTL_REPUBLISH: 6880 error = rsm_republish(seg, msg, mode); 6881 break; 6882 case RSM_IOCTL_UNPUBLISH: 6883 error = rsm_unpublish(seg, 1); 6884 break; 6885 default: 6886 error = EINVAL; 6887 break; 6888 } 6889 6890 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n", 6891 error)); 6892 6893 return (error); 6894 } 6895 static int 6896 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6897 int mode, cred_t *credp) 6898 { 6899 int error; 6900 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6901 6902 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n")); 6903 6904 ASSERT(seg); 6905 6906 switch (cmd) { 6907 case RSM_IOCTL_CONNECT: 6908 error = rsm_connect(seg, msg, credp, arg, mode); 6909 break; 6910 default: 6911 error = EINVAL; 6912 break; 6913 } 6914 6915 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n", 6916 error)); 6917 return (error); 6918 } 6919 6920 static int 6921 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6922 int mode) 6923 { 6924 int e; 6925 adapter_t *adapter; 6926 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6927 6928 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n")); 6929 6930 6931 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) { 6932 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6933 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n")); 6934 return (RSMERR_CONN_ABORTED); 6935 } else if (seg->s_node == my_nodeid) { 6936 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6937 "rsmbar_ioctl done: loopback\n")); 6938 return (RSM_SUCCESS); 6939 } 6940 6941 adapter = seg->s_adapter; 6942 6943 switch (cmd) { 6944 case RSM_IOCTL_BAR_CHECK: 6945 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6946 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va)); 6947 return (bar_va ? RSM_SUCCESS : EINVAL); 6948 case RSM_IOCTL_BAR_OPEN: 6949 e = adapter->rsmpi_ops-> 6950 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar); 6951 break; 6952 case RSM_IOCTL_BAR_ORDER: 6953 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar); 6954 break; 6955 case RSM_IOCTL_BAR_CLOSE: 6956 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar); 6957 break; 6958 default: 6959 e = EINVAL; 6960 break; 6961 } 6962 6963 if (e == RSM_SUCCESS) { 6964 #ifdef _MULTI_DATAMODEL 6965 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6966 rsm_ioctlmsg32_t msg32; 6967 int i; 6968 6969 for (i = 0; i < 4; i++) { 6970 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64; 6971 } 6972 6973 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6974 "rsmbar_ioctl done\n")); 6975 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 6976 sizeof (msg32), mode)) 6977 return (RSMERR_BAD_ADDR); 6978 else 6979 return (RSM_SUCCESS); 6980 } 6981 #endif 6982 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6983 "rsmbar_ioctl done\n")); 6984 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg, 6985 sizeof (*msg), mode)) 6986 return (RSMERR_BAD_ADDR); 6987 else 6988 return (RSM_SUCCESS); 6989 } 6990 6991 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6992 "rsmbar_ioctl done: error=%d\n", e)); 6993 6994 return (e); 6995 } 6996 6997 /* 6998 * Ring the doorbell of the export segment to which this segment is 6999 * connected. 7000 */ 7001 static int 7002 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7003 { 7004 int e = 0; 7005 rsmipc_request_t request; 7006 7007 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7008 7009 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n")); 7010 7011 request.rsmipc_key = seg->s_segid; 7012 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7013 request.rsmipc_segment_cookie = NULL; 7014 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 7015 7016 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7017 "exportbell_ioctl done: %d\n", e)); 7018 7019 return (e); 7020 } 7021 7022 /* 7023 * Ring the doorbells of all segments importing this segment 7024 */ 7025 static int 7026 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7027 { 7028 importing_token_t *token = NULL; 7029 rsmipc_request_t request; 7030 int index; 7031 7032 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 7033 7034 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n")); 7035 7036 ASSERT(seg->s_state != RSM_STATE_NEW && 7037 seg->s_state != RSM_STATE_NEW_QUIESCED); 7038 7039 request.rsmipc_key = seg->s_segid; 7040 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7041 7042 index = rsmhash(seg->s_segid); 7043 7044 token = importer_list.bucket[index]; 7045 7046 while (token != NULL) { 7047 if (seg->s_key == token->key) { 7048 request.rsmipc_segment_cookie = 7049 token->import_segment_cookie; 7050 (void) rsmipc_send(token->importing_node, 7051 &request, RSM_NO_REPLY); 7052 } 7053 token = token->next; 7054 } 7055 7056 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7057 "importbell_ioctl done\n")); 7058 return (RSM_SUCCESS); 7059 } 7060 7061 static int 7062 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp, 7063 rsm_poll_event_t **eventspp, int mode) 7064 { 7065 rsm_poll_event_t *evlist = NULL; 7066 size_t evlistsz; 7067 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7068 7069 #ifdef _MULTI_DATAMODEL 7070 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7071 int i; 7072 rsm_consume_event_msg32_t cemsg32 = {0}; 7073 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7074 rsm_poll_event32_t *evlist32; 7075 size_t evlistsz32; 7076 7077 /* copyin the ioctl message */ 7078 if (ddi_copyin(arg, (caddr_t)&cemsg32, 7079 sizeof (rsm_consume_event_msg32_t), mode)) { 7080 DBG_PRINTF((category, RSM_ERR, 7081 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7082 return (RSMERR_BAD_ADDR); 7083 } 7084 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist; 7085 msgp->numents = (int)cemsg32.numents; 7086 7087 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents; 7088 /* 7089 * If numents is large alloc events list on heap otherwise 7090 * use the address of array that was passed in. 7091 */ 7092 if (msgp->numents > RSM_MAX_POLLFDS) { 7093 if (msgp->numents > max_segs) { /* validate numents */ 7094 DBG_PRINTF((category, RSM_ERR, 7095 "consumeevent_copyin: " 7096 "RSMERR_BAD_ARGS_ERRORS\n")); 7097 return (RSMERR_BAD_ARGS_ERRORS); 7098 } 7099 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7100 } else { 7101 evlist32 = event32; 7102 } 7103 7104 /* copyin the seglist into the rsm_poll_event32_t array */ 7105 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32, 7106 evlistsz32, mode)) { 7107 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7108 kmem_free(evlist32, evlistsz32); 7109 } 7110 DBG_PRINTF((category, RSM_ERR, 7111 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7112 return (RSMERR_BAD_ADDR); 7113 } 7114 7115 /* evlist and evlistsz are based on rsm_poll_event_t type */ 7116 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents; 7117 7118 if (msgp->numents > RSM_MAX_POLLFDS) { 7119 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7120 *eventspp = evlist; 7121 } else { 7122 evlist = *eventspp; 7123 } 7124 /* 7125 * copy the rsm_poll_event32_t array to the rsm_poll_event_t 7126 * array 7127 */ 7128 for (i = 0; i < msgp->numents; i++) { 7129 evlist[i].rnum = evlist32[i].rnum; 7130 evlist[i].fdsidx = evlist32[i].fdsidx; 7131 evlist[i].revent = evlist32[i].revent; 7132 } 7133 /* free the temp 32-bit event list */ 7134 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7135 kmem_free(evlist32, evlistsz32); 7136 } 7137 7138 return (RSM_SUCCESS); 7139 } 7140 #endif 7141 /* copyin the ioctl message */ 7142 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t), 7143 mode)) { 7144 DBG_PRINTF((category, RSM_ERR, 7145 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7146 return (RSMERR_BAD_ADDR); 7147 } 7148 /* 7149 * If numents is large alloc events list on heap otherwise 7150 * use the address of array that was passed in. 7151 */ 7152 if (msgp->numents > RSM_MAX_POLLFDS) { 7153 if (msgp->numents > max_segs) { /* validate numents */ 7154 DBG_PRINTF((category, RSM_ERR, 7155 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n")); 7156 return (RSMERR_BAD_ARGS_ERRORS); 7157 } 7158 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7159 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7160 *eventspp = evlist; 7161 } 7162 7163 /* copyin the seglist */ 7164 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp), 7165 sizeof (rsm_poll_event_t)*msgp->numents, mode)) { 7166 if (evlist) { 7167 kmem_free(evlist, evlistsz); 7168 *eventspp = NULL; 7169 } 7170 DBG_PRINTF((category, RSM_ERR, 7171 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7172 return (RSMERR_BAD_ADDR); 7173 } 7174 7175 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7176 "consumeevent_copyin done\n")); 7177 return (RSM_SUCCESS); 7178 } 7179 7180 static int 7181 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp, 7182 rsm_poll_event_t *eventsp, int mode) 7183 { 7184 size_t evlistsz; 7185 int err = RSM_SUCCESS; 7186 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7187 7188 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7189 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n", 7190 msgp->numents, eventsp)); 7191 7192 #ifdef _MULTI_DATAMODEL 7193 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7194 int i; 7195 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7196 rsm_poll_event32_t *evlist32; 7197 size_t evlistsz32; 7198 7199 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents; 7200 if (msgp->numents > RSM_MAX_POLLFDS) { 7201 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7202 } else { 7203 evlist32 = event32; 7204 } 7205 7206 /* 7207 * copy the rsm_poll_event_t array to the rsm_poll_event32_t 7208 * array 7209 */ 7210 for (i = 0; i < msgp->numents; i++) { 7211 evlist32[i].rnum = eventsp[i].rnum; 7212 evlist32[i].fdsidx = eventsp[i].fdsidx; 7213 evlist32[i].revent = eventsp[i].revent; 7214 } 7215 7216 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist, 7217 evlistsz32, mode)) { 7218 err = RSMERR_BAD_ADDR; 7219 } 7220 7221 if (msgp->numents > RSM_MAX_POLLFDS) { 7222 if (evlist32) { /* free the temp 32-bit event list */ 7223 kmem_free(evlist32, evlistsz32); 7224 } 7225 /* 7226 * eventsp and evlistsz are based on rsm_poll_event_t 7227 * type 7228 */ 7229 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7230 /* event list on the heap and needs to be freed here */ 7231 if (eventsp) { 7232 kmem_free(eventsp, evlistsz); 7233 } 7234 } 7235 7236 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7237 "consumeevent_copyout done: err=%d\n", err)); 7238 return (err); 7239 } 7240 #endif 7241 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7242 7243 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz, 7244 mode)) { 7245 err = RSMERR_BAD_ADDR; 7246 } 7247 7248 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) { 7249 /* event list on the heap and needs to be freed here */ 7250 kmem_free(eventsp, evlistsz); 7251 } 7252 7253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7254 "consumeevent_copyout done: err=%d\n", err)); 7255 return (err); 7256 } 7257 7258 static int 7259 rsm_consumeevent_ioctl(caddr_t arg, int mode) 7260 { 7261 int rc; 7262 int i; 7263 minor_t rnum; 7264 rsm_consume_event_msg_t msg = {0}; 7265 rsmseg_t *seg; 7266 rsm_poll_event_t *event_list; 7267 rsm_poll_event_t events[RSM_MAX_POLLFDS]; 7268 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7269 7270 event_list = events; 7271 7272 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) != 7273 RSM_SUCCESS) { 7274 return (rc); 7275 } 7276 7277 for (i = 0; i < msg.numents; i++) { 7278 rnum = event_list[i].rnum; 7279 event_list[i].revent = 0; 7280 /* get the segment structure */ 7281 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 7282 if (seg) { 7283 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7284 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum, 7285 seg)); 7286 if (seg->s_pollevent) { 7287 /* consume the event */ 7288 atomic_dec_32(&seg->s_pollevent); 7289 event_list[i].revent = POLLRDNORM; 7290 } 7291 rsmseglock_release(seg); 7292 } 7293 } 7294 7295 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) != 7296 RSM_SUCCESS) { 7297 return (rc); 7298 } 7299 7300 return (RSM_SUCCESS); 7301 } 7302 7303 static int 7304 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode) 7305 { 7306 int size; 7307 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7308 7309 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n")); 7310 7311 #ifdef _MULTI_DATAMODEL 7312 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7313 rsmka_iovec32_t *iovec32, *iovec32_base; 7314 int i; 7315 7316 size = count * sizeof (rsmka_iovec32_t); 7317 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP); 7318 if (ddi_copyin((caddr_t)user_vec, 7319 (caddr_t)iovec32, size, mode)) { 7320 kmem_free(iovec32, size); 7321 DBG_PRINTF((category, RSM_DEBUG, 7322 "iovec_copyin: returning RSMERR_BAD_ADDR\n")); 7323 return (RSMERR_BAD_ADDR); 7324 } 7325 7326 for (i = 0; i < count; i++, iovec++, iovec32++) { 7327 iovec->io_type = (int)iovec32->io_type; 7328 if (iovec->io_type == RSM_HANDLE_TYPE) 7329 iovec->local.segid = (rsm_memseg_id_t) 7330 iovec32->local; 7331 else 7332 iovec->local.vaddr = 7333 (caddr_t)(uintptr_t)iovec32->local; 7334 iovec->local_offset = (size_t)iovec32->local_offset; 7335 iovec->remote_offset = (size_t)iovec32->remote_offset; 7336 iovec->transfer_len = (size_t)iovec32->transfer_len; 7337 7338 } 7339 kmem_free(iovec32_base, size); 7340 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7341 "iovec_copyin done\n")); 7342 return (DDI_SUCCESS); 7343 } 7344 #endif 7345 7346 size = count * sizeof (rsmka_iovec_t); 7347 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) { 7348 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7349 "iovec_copyin done: RSMERR_BAD_ADDR\n")); 7350 return (RSMERR_BAD_ADDR); 7351 } 7352 7353 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n")); 7354 7355 return (DDI_SUCCESS); 7356 } 7357 7358 7359 static int 7360 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7361 { 7362 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7363 7364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n")); 7365 7366 #ifdef _MULTI_DATAMODEL 7367 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7368 rsmka_scat_gath32_t sg_io32; 7369 7370 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32), 7371 mode)) { 7372 DBG_PRINTF((category, RSM_DEBUG, 7373 "sgio_copyin done: returning EFAULT\n")); 7374 return (RSMERR_BAD_ADDR); 7375 } 7376 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid; 7377 sg_io->io_request_count = (size_t)sg_io32.io_request_count; 7378 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count; 7379 sg_io->flags = (size_t)sg_io32.flags; 7380 sg_io->remote_handle = (rsm_memseg_import_handle_t) 7381 (uintptr_t)sg_io32.remote_handle; 7382 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec; 7383 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7384 "sgio_copyin done\n")); 7385 return (DDI_SUCCESS); 7386 } 7387 #endif 7388 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t), 7389 mode)) { 7390 DBG_PRINTF((category, RSM_DEBUG, 7391 "sgio_copyin done: returning EFAULT\n")); 7392 return (RSMERR_BAD_ADDR); 7393 } 7394 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n")); 7395 return (DDI_SUCCESS); 7396 } 7397 7398 static int 7399 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7400 { 7401 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7402 7403 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7404 "sgio_resid_copyout enter\n")); 7405 7406 #ifdef _MULTI_DATAMODEL 7407 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7408 rsmka_scat_gath32_t sg_io32; 7409 7410 sg_io32.io_residual_count = sg_io->io_residual_count; 7411 sg_io32.flags = sg_io->flags; 7412 7413 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count, 7414 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count, 7415 sizeof (uint32_t), mode)) { 7416 7417 DBG_PRINTF((category, RSM_ERR, 7418 "sgio_resid_copyout error: rescnt\n")); 7419 return (RSMERR_BAD_ADDR); 7420 } 7421 7422 if (ddi_copyout((caddr_t)&sg_io32.flags, 7423 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags, 7424 sizeof (uint32_t), mode)) { 7425 7426 DBG_PRINTF((category, RSM_ERR, 7427 "sgio_resid_copyout error: flags\n")); 7428 return (RSMERR_BAD_ADDR); 7429 } 7430 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7431 "sgio_resid_copyout done\n")); 7432 return (DDI_SUCCESS); 7433 } 7434 #endif 7435 if (ddi_copyout((caddr_t)&sg_io->io_residual_count, 7436 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count, 7437 sizeof (ulong_t), mode)) { 7438 7439 DBG_PRINTF((category, RSM_ERR, 7440 "sgio_resid_copyout error:rescnt\n")); 7441 return (RSMERR_BAD_ADDR); 7442 } 7443 7444 if (ddi_copyout((caddr_t)&sg_io->flags, 7445 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags, 7446 sizeof (uint_t), mode)) { 7447 7448 DBG_PRINTF((category, RSM_ERR, 7449 "sgio_resid_copyout error:flags\n")); 7450 return (RSMERR_BAD_ADDR); 7451 } 7452 7453 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n")); 7454 return (DDI_SUCCESS); 7455 } 7456 7457 7458 static int 7459 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp) 7460 { 7461 rsmka_scat_gath_t sg_io; 7462 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN]; 7463 rsmka_iovec_t *ka_iovec; 7464 rsmka_iovec_t *ka_iovec_start; 7465 rsmpi_scat_gath_t rsmpi_sg_io; 7466 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN]; 7467 rsmpi_iovec_t *iovec; 7468 rsmpi_iovec_t *iovec_start = NULL; 7469 rsmapi_access_entry_t *acl; 7470 rsmresource_t *res; 7471 minor_t rnum; 7472 rsmseg_t *im_seg, *ex_seg; 7473 int e; 7474 int error = 0; 7475 uint_t i; 7476 uint_t iov_proc = 0; /* num of iovecs processed */ 7477 size_t size = 0; 7478 size_t ka_size; 7479 7480 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7481 7482 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n")); 7483 7484 credp = credp; 7485 7486 /* 7487 * Copyin the scatter/gather structure and build new structure 7488 * for rsmpi. 7489 */ 7490 e = sgio_copyin(arg, &sg_io, mode); 7491 if (e != DDI_SUCCESS) { 7492 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7493 "rsm_iovec_ioctl done: sgio_copyin %d\n", e)); 7494 return (e); 7495 } 7496 7497 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) { 7498 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7499 "rsm_iovec_ioctl done: request_count(%d) too large\n", 7500 sg_io.io_request_count)); 7501 return (RSMERR_BAD_SGIO); 7502 } 7503 7504 rsmpi_sg_io.io_request_count = sg_io.io_request_count; 7505 rsmpi_sg_io.io_residual_count = sg_io.io_request_count; 7506 rsmpi_sg_io.io_segflg = 0; 7507 7508 /* Allocate memory and copyin io vector array */ 7509 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7510 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t); 7511 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP); 7512 } else { 7513 ka_iovec_start = ka_iovec = ka_iovec_arr; 7514 } 7515 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec, 7516 sg_io.io_request_count, mode); 7517 if (e != DDI_SUCCESS) { 7518 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7519 kmem_free(ka_iovec, ka_size); 7520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7521 "rsm_iovec_ioctl done: iovec_copyin %d\n", e)); 7522 return (e); 7523 } 7524 7525 /* get the import segment descriptor */ 7526 rnum = getminor(dev); 7527 res = rsmresource_lookup(rnum, RSM_LOCK); 7528 7529 /* 7530 * The following sequence of locking may (or MAY NOT) cause a 7531 * deadlock but this is currently not addressed here since the 7532 * implementation will be changed to incorporate the use of 7533 * reference counting for both the import and the export segments. 7534 */ 7535 7536 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */ 7537 7538 im_seg = (rsmseg_t *)res; 7539 7540 if (im_seg == NULL) { 7541 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7542 kmem_free(ka_iovec, ka_size); 7543 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7544 "rsm_iovec_ioctl done: rsmresource_lookup failed\n")); 7545 return (EINVAL); 7546 } 7547 /* putv/getv supported is supported only on import segments */ 7548 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) { 7549 rsmseglock_release(im_seg); 7550 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7551 kmem_free(ka_iovec, ka_size); 7552 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7553 "rsm_iovec_ioctl done: not an import segment\n")); 7554 return (EINVAL); 7555 } 7556 7557 /* 7558 * wait for a remote DR to complete ie. for segments to get UNQUIESCED 7559 * as well as wait for a local DR to complete. 7560 */ 7561 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) || 7562 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) || 7563 (im_seg->s_flags & RSM_DR_INPROGRESS)) { 7564 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) { 7565 DBG_PRINTF((category, RSM_DEBUG, 7566 "rsm_iovec_ioctl done: cv_wait INTR")); 7567 rsmseglock_release(im_seg); 7568 return (RSMERR_INTERRUPTED); 7569 } 7570 } 7571 7572 if ((im_seg->s_state != RSM_STATE_CONNECT) && 7573 (im_seg->s_state != RSM_STATE_ACTIVE)) { 7574 7575 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT || 7576 im_seg->s_state == RSM_STATE_NEW); 7577 7578 DBG_PRINTF((category, RSM_DEBUG, 7579 "rsm_iovec_ioctl done: im_seg not conn/map")); 7580 rsmseglock_release(im_seg); 7581 e = RSMERR_BAD_SGIO; 7582 goto out; 7583 } 7584 7585 im_seg->s_rdmacnt++; 7586 rsmseglock_release(im_seg); 7587 7588 /* 7589 * Allocate and set up the io vector for rsmpi 7590 */ 7591 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7592 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t); 7593 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP); 7594 } else { 7595 iovec_start = iovec = iovec_arr; 7596 } 7597 7598 rsmpi_sg_io.iovec = iovec; 7599 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) { 7600 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7601 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7602 7603 if (ex_seg == NULL) { 7604 e = RSMERR_BAD_SGIO; 7605 break; 7606 } 7607 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7608 7609 acl = ex_seg->s_acl; 7610 if (acl[0].ae_permission == 0) { 7611 struct buf *xbuf; 7612 dev_t sdev = 0; 7613 7614 xbuf = ddi_umem_iosetup(ex_seg->s_cookie, 7615 0, ex_seg->s_len, B_WRITE, 7616 sdev, 0, NULL, DDI_UMEM_SLEEP); 7617 7618 ASSERT(xbuf != NULL); 7619 7620 iovec->local_mem.ms_type = RSM_MEM_BUF; 7621 iovec->local_mem.ms_memory.bp = xbuf; 7622 } else { 7623 iovec->local_mem.ms_type = RSM_MEM_HANDLE; 7624 iovec->local_mem.ms_memory.handle = 7625 ex_seg->s_handle.out; 7626 } 7627 ex_seg->s_rdmacnt++; /* refcnt the handle */ 7628 rsmseglock_release(ex_seg); 7629 } else { 7630 iovec->local_mem.ms_type = RSM_MEM_VADDR; 7631 iovec->local_mem.ms_memory.vr.vaddr = 7632 ka_iovec->local.vaddr; 7633 } 7634 7635 iovec->local_offset = ka_iovec->local_offset; 7636 iovec->remote_handle = im_seg->s_handle.in; 7637 iovec->remote_offset = ka_iovec->remote_offset; 7638 iovec->transfer_length = ka_iovec->transfer_len; 7639 iovec++; 7640 ka_iovec++; 7641 } 7642 7643 if (iov_proc < sg_io.io_request_count) { 7644 /* error while processing handle */ 7645 rsmseglock_acquire(im_seg); 7646 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */ 7647 if (im_seg->s_rdmacnt == 0) { 7648 cv_broadcast(&im_seg->s_cv); 7649 } 7650 rsmseglock_release(im_seg); 7651 goto out; 7652 } 7653 7654 /* call rsmpi */ 7655 if (cmd == RSM_IOCTL_PUTV) 7656 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv( 7657 im_seg->s_adapter->rsmpi_handle, 7658 &rsmpi_sg_io); 7659 else if (cmd == RSM_IOCTL_GETV) 7660 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv( 7661 im_seg->s_adapter->rsmpi_handle, 7662 &rsmpi_sg_io); 7663 else { 7664 e = EINVAL; 7665 DBG_PRINTF((category, RSM_DEBUG, 7666 "iovec_ioctl: bad command = %x\n", cmd)); 7667 } 7668 7669 7670 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7671 "rsm_iovec_ioctl RSMPI oper done %d\n", e)); 7672 7673 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count; 7674 7675 /* 7676 * Check for implicit signal post flag and do the signal 7677 * post if needed 7678 */ 7679 if (sg_io.flags & RSM_IMPLICIT_SIGPOST && 7680 e == RSM_SUCCESS) { 7681 rsmipc_request_t request; 7682 7683 request.rsmipc_key = im_seg->s_segid; 7684 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7685 request.rsmipc_segment_cookie = NULL; 7686 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY); 7687 /* 7688 * Reset the implicit signal post flag to 0 to indicate 7689 * that the signal post has been done and need not be 7690 * done in the RSMAPI library 7691 */ 7692 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST; 7693 } 7694 7695 rsmseglock_acquire(im_seg); 7696 im_seg->s_rdmacnt--; 7697 if (im_seg->s_rdmacnt == 0) { 7698 cv_broadcast(&im_seg->s_cv); 7699 } 7700 rsmseglock_release(im_seg); 7701 error = sgio_resid_copyout(arg, &sg_io, mode); 7702 out: 7703 iovec = iovec_start; 7704 ka_iovec = ka_iovec_start; 7705 for (i = 0; i < iov_proc; i++) { 7706 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7707 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7708 7709 ASSERT(ex_seg != NULL); 7710 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7711 7712 ex_seg->s_rdmacnt--; /* unrefcnt the handle */ 7713 if (ex_seg->s_rdmacnt == 0) { 7714 cv_broadcast(&ex_seg->s_cv); 7715 } 7716 rsmseglock_release(ex_seg); 7717 } 7718 7719 ASSERT(iovec != NULL); /* true if iov_proc > 0 */ 7720 7721 /* 7722 * At present there is no dependency on the existence of xbufs 7723 * created by ddi_umem_iosetup for each of the iovecs. So we 7724 * can these xbufs here. 7725 */ 7726 if (iovec->local_mem.ms_type == RSM_MEM_BUF) { 7727 freerbuf(iovec->local_mem.ms_memory.bp); 7728 } 7729 7730 iovec++; 7731 ka_iovec++; 7732 } 7733 7734 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7735 if (iovec_start) 7736 kmem_free(iovec_start, size); 7737 kmem_free(ka_iovec_start, ka_size); 7738 } 7739 7740 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7741 "rsm_iovec_ioctl done %d\n", e)); 7742 /* if RSMPI call fails return that else return copyout's retval */ 7743 return ((e != RSM_SUCCESS) ? e : error); 7744 7745 } 7746 7747 7748 static int 7749 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode) 7750 { 7751 adapter_t *adapter; 7752 rsm_addr_t addr; 7753 rsm_node_id_t node; 7754 int rval = DDI_SUCCESS; 7755 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7756 7757 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n")); 7758 7759 adapter = rsm_getadapter(msg, mode); 7760 if (adapter == NULL) { 7761 DBG_PRINTF((category, RSM_DEBUG, 7762 "rsmaddr_ioctl done: adapter not found\n")); 7763 return (RSMERR_CTLR_NOT_PRESENT); 7764 } 7765 7766 switch (cmd) { 7767 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */ 7768 /* returns the hwaddr in msg->hwaddr */ 7769 if (msg->nodeid == my_nodeid) { 7770 msg->hwaddr = adapter->hwaddr; 7771 } else { 7772 addr = get_remote_hwaddr(adapter, msg->nodeid); 7773 if ((int64_t)addr < 0) { 7774 rval = RSMERR_INTERNAL_ERROR; 7775 } else { 7776 msg->hwaddr = addr; 7777 } 7778 } 7779 break; 7780 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */ 7781 /* returns the nodeid in msg->nodeid */ 7782 if (msg->hwaddr == adapter->hwaddr) { 7783 msg->nodeid = my_nodeid; 7784 } else { 7785 node = get_remote_nodeid(adapter, msg->hwaddr); 7786 if ((int)node < 0) { 7787 rval = RSMERR_INTERNAL_ERROR; 7788 } else { 7789 msg->nodeid = (rsm_node_id_t)node; 7790 } 7791 } 7792 break; 7793 default: 7794 rval = EINVAL; 7795 break; 7796 } 7797 7798 rsmka_release_adapter(adapter); 7799 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7800 "rsmaddr_ioctl done: %d\n", rval)); 7801 return (rval); 7802 } 7803 7804 static int 7805 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode) 7806 { 7807 DBG_DEFINE(category, 7808 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7809 7810 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n")); 7811 7812 #ifdef _MULTI_DATAMODEL 7813 7814 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7815 rsm_ioctlmsg32_t msg32; 7816 int i; 7817 7818 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) { 7819 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7820 "rsm_ddi_copyin done: EFAULT\n")); 7821 return (RSMERR_BAD_ADDR); 7822 } 7823 msg->len = msg32.len; 7824 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr; 7825 msg->arg = (caddr_t)(uintptr_t)msg32.arg; 7826 msg->key = msg32.key; 7827 msg->acl_len = msg32.acl_len; 7828 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl; 7829 msg->cnum = msg32.cnum; 7830 msg->cname = (caddr_t)(uintptr_t)msg32.cname; 7831 msg->cname_len = msg32.cname_len; 7832 msg->nodeid = msg32.nodeid; 7833 msg->hwaddr = msg32.hwaddr; 7834 msg->perm = msg32.perm; 7835 for (i = 0; i < 4; i++) { 7836 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64; 7837 } 7838 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7839 "rsm_ddi_copyin done\n")); 7840 return (RSM_SUCCESS); 7841 } 7842 #endif 7843 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n")); 7844 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode)) 7845 return (RSMERR_BAD_ADDR); 7846 else 7847 return (RSM_SUCCESS); 7848 } 7849 7850 static int 7851 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode) 7852 { 7853 rsmka_int_controller_attr_t rsm_cattr; 7854 DBG_DEFINE(category, 7855 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7856 7857 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7858 "rsmattr_ddi_copyout enter\n")); 7859 /* 7860 * need to copy appropriate data from rsm_controller_attr_t 7861 * to rsmka_int_controller_attr_t 7862 */ 7863 #ifdef _MULTI_DATAMODEL 7864 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7865 rsmka_int_controller_attr32_t rsm_cattr32; 7866 7867 rsm_cattr32.attr_direct_access_sizes = 7868 adapter->rsm_attr.attr_direct_access_sizes; 7869 rsm_cattr32.attr_atomic_sizes = 7870 adapter->rsm_attr.attr_atomic_sizes; 7871 rsm_cattr32.attr_page_size = 7872 adapter->rsm_attr.attr_page_size; 7873 if (adapter->rsm_attr.attr_max_export_segment_size > 7874 UINT_MAX) 7875 rsm_cattr32.attr_max_export_segment_size = 7876 RSM_MAXSZ_PAGE_ALIGNED; 7877 else 7878 rsm_cattr32.attr_max_export_segment_size = 7879 adapter->rsm_attr.attr_max_export_segment_size; 7880 if (adapter->rsm_attr.attr_tot_export_segment_size > 7881 UINT_MAX) 7882 rsm_cattr32.attr_tot_export_segment_size = 7883 RSM_MAXSZ_PAGE_ALIGNED; 7884 else 7885 rsm_cattr32.attr_tot_export_segment_size = 7886 adapter->rsm_attr.attr_tot_export_segment_size; 7887 if (adapter->rsm_attr.attr_max_export_segments > 7888 UINT_MAX) 7889 rsm_cattr32.attr_max_export_segments = 7890 UINT_MAX; 7891 else 7892 rsm_cattr32.attr_max_export_segments = 7893 adapter->rsm_attr.attr_max_export_segments; 7894 if (adapter->rsm_attr.attr_max_import_map_size > 7895 UINT_MAX) 7896 rsm_cattr32.attr_max_import_map_size = 7897 RSM_MAXSZ_PAGE_ALIGNED; 7898 else 7899 rsm_cattr32.attr_max_import_map_size = 7900 adapter->rsm_attr.attr_max_import_map_size; 7901 if (adapter->rsm_attr.attr_tot_import_map_size > 7902 UINT_MAX) 7903 rsm_cattr32.attr_tot_import_map_size = 7904 RSM_MAXSZ_PAGE_ALIGNED; 7905 else 7906 rsm_cattr32.attr_tot_import_map_size = 7907 adapter->rsm_attr.attr_tot_import_map_size; 7908 if (adapter->rsm_attr.attr_max_import_segments > 7909 UINT_MAX) 7910 rsm_cattr32.attr_max_import_segments = 7911 UINT_MAX; 7912 else 7913 rsm_cattr32.attr_max_import_segments = 7914 adapter->rsm_attr.attr_max_import_segments; 7915 rsm_cattr32.attr_controller_addr = 7916 adapter->rsm_attr.attr_controller_addr; 7917 7918 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7919 "rsmattr_ddi_copyout done\n")); 7920 if (ddi_copyout((caddr_t)&rsm_cattr32, arg, 7921 sizeof (rsmka_int_controller_attr32_t), mode)) { 7922 return (RSMERR_BAD_ADDR); 7923 } 7924 else 7925 return (RSM_SUCCESS); 7926 } 7927 #endif 7928 rsm_cattr.attr_direct_access_sizes = 7929 adapter->rsm_attr.attr_direct_access_sizes; 7930 rsm_cattr.attr_atomic_sizes = 7931 adapter->rsm_attr.attr_atomic_sizes; 7932 rsm_cattr.attr_page_size = 7933 adapter->rsm_attr.attr_page_size; 7934 rsm_cattr.attr_max_export_segment_size = 7935 adapter->rsm_attr.attr_max_export_segment_size; 7936 rsm_cattr.attr_tot_export_segment_size = 7937 adapter->rsm_attr.attr_tot_export_segment_size; 7938 rsm_cattr.attr_max_export_segments = 7939 adapter->rsm_attr.attr_max_export_segments; 7940 rsm_cattr.attr_max_import_map_size = 7941 adapter->rsm_attr.attr_max_import_map_size; 7942 rsm_cattr.attr_tot_import_map_size = 7943 adapter->rsm_attr.attr_tot_import_map_size; 7944 rsm_cattr.attr_max_import_segments = 7945 adapter->rsm_attr.attr_max_import_segments; 7946 rsm_cattr.attr_controller_addr = 7947 adapter->rsm_attr.attr_controller_addr; 7948 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7949 "rsmattr_ddi_copyout done\n")); 7950 if (ddi_copyout((caddr_t)&rsm_cattr, arg, 7951 sizeof (rsmka_int_controller_attr_t), mode)) { 7952 return (RSMERR_BAD_ADDR); 7953 } 7954 else 7955 return (RSM_SUCCESS); 7956 } 7957 7958 /*ARGSUSED*/ 7959 static int 7960 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7961 int *rvalp) 7962 { 7963 rsmseg_t *seg; 7964 rsmresource_t *res; 7965 minor_t rnum; 7966 rsm_ioctlmsg_t msg = {0}; 7967 int error; 7968 adapter_t *adapter; 7969 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7970 7971 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n")); 7972 7973 if (cmd == RSM_IOCTL_CONSUMEEVENT) { 7974 error = rsm_consumeevent_ioctl((caddr_t)arg, mode); 7975 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7976 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error)); 7977 return (error); 7978 } 7979 7980 /* topology cmd does not use the arg common to other cmds */ 7981 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) { 7982 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode); 7983 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7984 "rsm_ioctl done: %d\n", error)); 7985 return (error); 7986 } 7987 7988 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) { 7989 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp); 7990 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7991 "rsm_ioctl done: %d\n", error)); 7992 return (error); 7993 } 7994 7995 /* 7996 * try to load arguments 7997 */ 7998 if (cmd != RSM_IOCTL_RING_BELL && 7999 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) { 8000 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8001 "rsm_ioctl done: EFAULT\n")); 8002 return (RSMERR_BAD_ADDR); 8003 } 8004 8005 if (cmd == RSM_IOCTL_ATTR) { 8006 adapter = rsm_getadapter(&msg, mode); 8007 if (adapter == NULL) { 8008 DBG_PRINTF((category, RSM_DEBUG, 8009 "rsm_ioctl done: ENODEV\n")); 8010 return (RSMERR_CTLR_NOT_PRESENT); 8011 } 8012 error = rsmattr_ddi_copyout(adapter, msg.arg, mode); 8013 rsmka_release_adapter(adapter); 8014 DBG_PRINTF((category, RSM_DEBUG, 8015 "rsm_ioctl:after copyout %d\n", error)); 8016 return (error); 8017 } 8018 8019 if (cmd == RSM_IOCTL_BAR_INFO) { 8020 /* Return library off,len of barrier page */ 8021 msg.off = barrier_offset; 8022 msg.len = (int)barrier_size; 8023 #ifdef _MULTI_DATAMODEL 8024 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8025 rsm_ioctlmsg32_t msg32; 8026 8027 if (msg.len > UINT_MAX) 8028 msg.len = RSM_MAXSZ_PAGE_ALIGNED; 8029 else 8030 msg32.len = (int32_t)msg.len; 8031 msg32.off = (int32_t)msg.off; 8032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8033 "rsm_ioctl done\n")); 8034 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8035 sizeof (msg32), mode)) 8036 return (RSMERR_BAD_ADDR); 8037 else 8038 return (RSM_SUCCESS); 8039 } 8040 #endif 8041 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8042 "rsm_ioctl done\n")); 8043 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8044 sizeof (msg), mode)) 8045 return (RSMERR_BAD_ADDR); 8046 else 8047 return (RSM_SUCCESS); 8048 } 8049 8050 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) { 8051 /* map the nodeid or hwaddr */ 8052 error = rsmaddr_ioctl(cmd, &msg, mode); 8053 if (error == RSM_SUCCESS) { 8054 #ifdef _MULTI_DATAMODEL 8055 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8056 rsm_ioctlmsg32_t msg32; 8057 8058 msg32.hwaddr = (uint64_t)msg.hwaddr; 8059 msg32.nodeid = (uint32_t)msg.nodeid; 8060 8061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8062 "rsm_ioctl done\n")); 8063 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8064 sizeof (msg32), mode)) 8065 return (RSMERR_BAD_ADDR); 8066 else 8067 return (RSM_SUCCESS); 8068 } 8069 #endif 8070 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8071 "rsm_ioctl done\n")); 8072 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8073 sizeof (msg), mode)) 8074 return (RSMERR_BAD_ADDR); 8075 else 8076 return (RSM_SUCCESS); 8077 } 8078 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8079 "rsm_ioctl done: %d\n", error)); 8080 return (error); 8081 } 8082 8083 /* Find resource and look it in read mode */ 8084 rnum = getminor(dev); 8085 res = rsmresource_lookup(rnum, RSM_NOLOCK); 8086 ASSERT(res != NULL); 8087 8088 /* 8089 * Find command group 8090 */ 8091 switch (RSM_IOCTL_CMDGRP(cmd)) { 8092 case RSM_IOCTL_EXPORT_SEG: 8093 /* 8094 * Export list is searched during publish, loopback and 8095 * remote lookup call. 8096 */ 8097 seg = rsmresource_seg(res, rnum, credp, 8098 RSM_RESOURCE_EXPORT_SEGMENT); 8099 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) { 8100 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode, 8101 credp); 8102 } else { /* export ioctl on an import/barrier resource */ 8103 error = RSMERR_BAD_SEG_HNDL; 8104 } 8105 break; 8106 case RSM_IOCTL_IMPORT_SEG: 8107 /* Import list is searched during remote unmap call. */ 8108 seg = rsmresource_seg(res, rnum, credp, 8109 RSM_RESOURCE_IMPORT_SEGMENT); 8110 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8111 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode, 8112 credp); 8113 } else { /* import ioctl on an export/barrier resource */ 8114 error = RSMERR_BAD_SEG_HNDL; 8115 } 8116 break; 8117 case RSM_IOCTL_BAR: 8118 if (res != RSMRC_RESERVED && 8119 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8120 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg, 8121 mode); 8122 } else { /* invalid res value */ 8123 error = RSMERR_BAD_SEG_HNDL; 8124 } 8125 break; 8126 case RSM_IOCTL_BELL: 8127 if (res != RSMRC_RESERVED) { 8128 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) 8129 error = exportbell_ioctl((rsmseg_t *)res, cmd); 8130 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT) 8131 error = importbell_ioctl((rsmseg_t *)res, cmd); 8132 else /* RSM_RESOURCE_BAR */ 8133 error = RSMERR_BAD_SEG_HNDL; 8134 } else { /* invalid res value */ 8135 error = RSMERR_BAD_SEG_HNDL; 8136 } 8137 break; 8138 default: 8139 error = EINVAL; 8140 } 8141 8142 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n", 8143 error)); 8144 return (error); 8145 } 8146 8147 8148 /* **************************** Segment Mapping Operations ********* */ 8149 static rsm_mapinfo_t * 8150 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset, 8151 size_t *map_len) 8152 { 8153 rsm_mapinfo_t *p; 8154 /* 8155 * Find the correct mapinfo structure to use during the mapping 8156 * from the seg->s_mapinfo list. 8157 * The seg->s_mapinfo list contains in reverse order the mappings 8158 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to 8159 * access the correct entry within this list for the mapping 8160 * requested. 8161 * 8162 * The algorithm for selecting a list entry is as follows: 8163 * 8164 * When start_offset of an entry <= off we have found the entry 8165 * we were looking for. Adjust the dev_offset and map_len (needs 8166 * to be PAGESIZE aligned). 8167 */ 8168 p = seg->s_mapinfo; 8169 for (; p; p = p->next) { 8170 if (p->start_offset <= off) { 8171 *dev_offset = p->dev_offset + off - p->start_offset; 8172 *map_len = (len > p->individual_len) ? 8173 p->individual_len : ptob(btopr(len)); 8174 return (p); 8175 } 8176 p = p->next; 8177 } 8178 8179 return (NULL); 8180 } 8181 8182 static void 8183 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo) 8184 { 8185 rsm_mapinfo_t *p; 8186 8187 while (mapinfo != NULL) { 8188 p = mapinfo; 8189 mapinfo = mapinfo->next; 8190 kmem_free(p, sizeof (*p)); 8191 } 8192 } 8193 8194 static int 8195 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 8196 size_t len, void **pvtp) 8197 { 8198 rsmcookie_t *p; 8199 rsmresource_t *res; 8200 rsmseg_t *seg; 8201 minor_t rnum; 8202 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8203 8204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n")); 8205 8206 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8207 "rsmmap_map: dhp = %x\n", dhp)); 8208 8209 flags = flags; 8210 8211 rnum = getminor(dev); 8212 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8213 ASSERT(res != NULL); 8214 8215 seg = (rsmseg_t *)res; 8216 8217 rsmseglock_acquire(seg); 8218 8219 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8220 8221 /* 8222 * Allocate structure and add cookie to segment list 8223 */ 8224 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8225 8226 p->c_dhp = dhp; 8227 p->c_off = off; 8228 p->c_len = len; 8229 p->c_next = seg->s_ckl; 8230 seg->s_ckl = p; 8231 8232 *pvtp = (void *)seg; 8233 8234 rsmseglock_release(seg); 8235 8236 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n")); 8237 return (DDI_SUCCESS); 8238 } 8239 8240 /* 8241 * Page fault handling is done here. The prerequisite mapping setup 8242 * has been done in rsm_devmap with calls to ddi_devmem_setup or 8243 * ddi_umem_setup 8244 */ 8245 static int 8246 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len, 8247 uint_t type, uint_t rw) 8248 { 8249 int e; 8250 rsmseg_t *seg = (rsmseg_t *)pvt; 8251 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8252 8253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n")); 8254 8255 rsmseglock_acquire(seg); 8256 8257 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8258 8259 while (seg->s_state == RSM_STATE_MAP_QUIESCE) { 8260 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8261 DBG_PRINTF((category, RSM_DEBUG, 8262 "rsmmap_access done: cv_wait INTR")); 8263 rsmseglock_release(seg); 8264 return (RSMERR_INTERRUPTED); 8265 } 8266 } 8267 8268 ASSERT(seg->s_state == RSM_STATE_DISCONNECT || 8269 seg->s_state == RSM_STATE_ACTIVE); 8270 8271 if (seg->s_state == RSM_STATE_DISCONNECT) 8272 seg->s_flags |= RSM_IMPORT_DUMMY; 8273 8274 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8275 "rsmmap_access: dhp = %x\n", dhp)); 8276 8277 rsmseglock_release(seg); 8278 8279 if (e = devmap_load(dhp, offset, len, type, rw)) { 8280 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n")); 8281 } 8282 8283 8284 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n")); 8285 8286 return (e); 8287 } 8288 8289 static int 8290 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 8291 void **newpvt) 8292 { 8293 rsmseg_t *seg = (rsmseg_t *)oldpvt; 8294 rsmcookie_t *p, *old; 8295 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8296 8297 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n")); 8298 8299 /* 8300 * Same as map, create an entry to hold cookie and add it to 8301 * connect segment list. The oldpvt is a pointer to segment. 8302 * Return segment pointer in newpvt. 8303 */ 8304 rsmseglock_acquire(seg); 8305 8306 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8307 8308 /* 8309 * Find old cookie 8310 */ 8311 for (old = seg->s_ckl; old != NULL; old = old->c_next) { 8312 if (old->c_dhp == dhp) { 8313 break; 8314 } 8315 } 8316 if (old == NULL) { 8317 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8318 "rsmmap_dup done: EINVAL\n")); 8319 rsmseglock_release(seg); 8320 return (EINVAL); 8321 } 8322 8323 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8324 8325 p->c_dhp = new_dhp; 8326 p->c_off = old->c_off; 8327 p->c_len = old->c_len; 8328 p->c_next = seg->s_ckl; 8329 seg->s_ckl = p; 8330 8331 *newpvt = (void *)seg; 8332 8333 rsmseglock_release(seg); 8334 8335 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n")); 8336 8337 return (DDI_SUCCESS); 8338 } 8339 8340 static void 8341 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 8342 devmap_cookie_t new_dhp1, void **pvtp1, 8343 devmap_cookie_t new_dhp2, void **pvtp2) 8344 { 8345 /* 8346 * Remove pvtp structure from segment list. 8347 */ 8348 rsmseg_t *seg = (rsmseg_t *)pvtp; 8349 int freeflag; 8350 8351 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8352 8353 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n")); 8354 8355 off = off; len = len; 8356 pvtp1 = pvtp1; pvtp2 = pvtp2; 8357 8358 rsmseglock_acquire(seg); 8359 8360 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8361 8362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8363 "rsmmap_unmap: dhp = %x\n", dhp)); 8364 /* 8365 * We can go ahead and remove the dhps even if we are in 8366 * the MAPPING state because the dhps being removed here 8367 * belong to a different mmap and we are holding the segment 8368 * lock. 8369 */ 8370 if (new_dhp1 == NULL && new_dhp2 == NULL) { 8371 /* find and remove dhp handle */ 8372 rsmcookie_t *tmp, **back = &seg->s_ckl; 8373 8374 while (*back != NULL) { 8375 tmp = *back; 8376 if (tmp->c_dhp == dhp) { 8377 *back = tmp->c_next; 8378 kmem_free(tmp, sizeof (*tmp)); 8379 break; 8380 } 8381 back = &tmp->c_next; 8382 } 8383 } else { 8384 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8385 "rsmmap_unmap:parital unmap" 8386 "new_dhp1 %lx, new_dhp2 %lx\n", 8387 (size_t)new_dhp1, (size_t)new_dhp2)); 8388 } 8389 8390 /* 8391 * rsmmap_unmap is called for each mapping cookie on the list. 8392 * When the list becomes empty and we are not in the MAPPING 8393 * state then unmap in the rsmpi driver. 8394 */ 8395 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING)) 8396 (void) rsm_unmap(seg); 8397 8398 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) { 8399 freeflag = 1; 8400 } else { 8401 freeflag = 0; 8402 } 8403 8404 rsmseglock_release(seg); 8405 8406 if (freeflag) { 8407 /* Free the segment structure */ 8408 rsmseg_free(seg); 8409 } 8410 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n")); 8411 8412 } 8413 8414 static struct devmap_callback_ctl rsmmap_ops = { 8415 DEVMAP_OPS_REV, /* devmap_ops version number */ 8416 rsmmap_map, /* devmap_ops map routine */ 8417 rsmmap_access, /* devmap_ops access routine */ 8418 rsmmap_dup, /* devmap_ops dup routine */ 8419 rsmmap_unmap, /* devmap_ops unmap routine */ 8420 }; 8421 8422 static int 8423 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len, 8424 size_t *maplen, uint_t model /*ARGSUSED*/) 8425 { 8426 struct devmap_callback_ctl *callbackops = &rsmmap_ops; 8427 int err; 8428 uint_t maxprot; 8429 minor_t rnum; 8430 rsmseg_t *seg; 8431 off_t dev_offset; 8432 size_t cur_len; 8433 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8434 8435 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n")); 8436 8437 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8438 "rsm_devmap: off = %lx, len = %lx\n", off, len)); 8439 rnum = getminor(dev); 8440 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8441 ASSERT(seg != NULL); 8442 8443 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8444 if ((off == barrier_offset) && 8445 (len == barrier_size)) { 8446 8447 ASSERT(bar_va != NULL && bar_cookie != NULL); 8448 8449 /* 8450 * The offset argument in devmap_umem_setup represents 8451 * the offset within the kernel memory defined by the 8452 * cookie. We use this offset as barrier_offset. 8453 */ 8454 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie, 8455 barrier_offset, len, PROT_USER|PROT_READ, 8456 DEVMAP_DEFAULTS, 0); 8457 8458 if (err != 0) { 8459 DBG_PRINTF((category, RSM_ERR, 8460 "rsm_devmap done: %d\n", err)); 8461 return (RSMERR_MAP_FAILED); 8462 } 8463 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8464 "rsm_devmap done: %d\n", err)); 8465 8466 *maplen = barrier_size; 8467 8468 return (err); 8469 } else { 8470 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8471 "rsm_devmap done: %d\n", err)); 8472 return (RSMERR_MAP_FAILED); 8473 } 8474 } 8475 8476 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8477 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8478 8479 /* 8480 * Make sure we still have permission for the map operation. 8481 */ 8482 maxprot = PROT_USER; 8483 if (seg->s_mode & RSM_PERM_READ) { 8484 maxprot |= PROT_READ; 8485 } 8486 8487 if (seg->s_mode & RSM_PERM_WRITE) { 8488 maxprot |= PROT_WRITE; 8489 } 8490 8491 /* 8492 * For each devmap call, rsmmap_map is called. This maintains driver 8493 * private information for the mapping. Thus, if there are multiple 8494 * devmap calls there will be multiple rsmmap_map calls and for each 8495 * call, the mapping information will be stored. 8496 * In case of an error during the processing of the devmap call, error 8497 * will be returned. This error return causes the caller of rsm_devmap 8498 * to undo all the mappings by calling rsmmap_unmap for each one. 8499 * rsmmap_unmap will free up the private information for the requested 8500 * mapping. 8501 */ 8502 if (seg->s_node != my_nodeid) { 8503 rsm_mapinfo_t *p; 8504 8505 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len); 8506 if (p == NULL) { 8507 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8508 "rsm_devmap: incorrect mapping info\n")); 8509 return (RSMERR_MAP_FAILED); 8510 } 8511 err = devmap_devmem_setup(dhc, p->dip, 8512 callbackops, p->dev_register, 8513 dev_offset, cur_len, maxprot, 8514 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0); 8515 8516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8517 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx," 8518 "off=%lx,len=%lx\n", 8519 p->dip, p->dev_register, dev_offset, off, cur_len)); 8520 8521 if (err != 0) { 8522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8523 "rsm_devmap: devmap_devmem_setup failed %d\n", 8524 err)); 8525 return (RSMERR_MAP_FAILED); 8526 } 8527 /* cur_len is always an integral multiple pagesize */ 8528 ASSERT((cur_len & (PAGESIZE-1)) == 0); 8529 *maplen = cur_len; 8530 return (err); 8531 8532 } else { 8533 err = devmap_umem_setup(dhc, rsm_dip, callbackops, 8534 seg->s_cookie, off, len, maxprot, 8535 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0); 8536 if (err != 0) { 8537 DBG_PRINTF((category, RSM_DEBUG, 8538 "rsm_devmap: devmap_umem_setup failed %d\n", 8539 err)); 8540 return (RSMERR_MAP_FAILED); 8541 } 8542 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8543 "rsm_devmap: loopback done\n")); 8544 8545 *maplen = ptob(btopr(len)); 8546 8547 return (err); 8548 } 8549 } 8550 8551 /* 8552 * We can use the devmap framework for mapping device memory to user space by 8553 * specifying this routine in the rsm_cb_ops structure. The kernel mmap 8554 * processing calls this entry point and devmap_setup is called within this 8555 * function, which eventually calls rsm_devmap 8556 */ 8557 static int 8558 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 8559 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 8560 { 8561 int error = 0; 8562 int old_state; 8563 minor_t rnum; 8564 rsmseg_t *seg, *eseg; 8565 adapter_t *adapter; 8566 rsm_import_share_t *sharedp; 8567 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8568 8569 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n")); 8570 8571 /* 8572 * find segment 8573 */ 8574 rnum = getminor(dev); 8575 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 8576 8577 if (seg == NULL) { 8578 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8579 "rsm_segmap done: invalid segment\n")); 8580 return (EINVAL); 8581 } 8582 8583 /* 8584 * the user is trying to map a resource that has not been 8585 * defined yet. The library uses this to map in the 8586 * barrier page. 8587 */ 8588 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8589 rsmseglock_release(seg); 8590 8591 /* 8592 * The mapping for the barrier page is identified 8593 * by the special offset barrier_offset 8594 */ 8595 8596 if (off == (off_t)barrier_offset || 8597 len == (off_t)barrier_size) { 8598 if (bar_cookie == NULL || bar_va == NULL) { 8599 DBG_PRINTF((category, RSM_DEBUG, 8600 "rsm_segmap: bar cookie/va is NULL\n")); 8601 return (EINVAL); 8602 } 8603 8604 error = devmap_setup(dev, (offset_t)off, as, addrp, 8605 (size_t)len, prot, maxprot, flags, cred); 8606 8607 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8608 "rsm_segmap done: %d\n", error)); 8609 return (error); 8610 } else { 8611 DBG_PRINTF((category, RSM_DEBUG, 8612 "rsm_segmap: bad offset/length\n")); 8613 return (EINVAL); 8614 } 8615 } 8616 8617 /* Make sure you can only map imported segments */ 8618 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) { 8619 rsmseglock_release(seg); 8620 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8621 "rsm_segmap done: not an import segment\n")); 8622 return (EINVAL); 8623 } 8624 /* check means library is broken */ 8625 ASSERT(seg->s_hdr.rsmrc_num == rnum); 8626 8627 /* wait for the segment to become unquiesced */ 8628 while (seg->s_state == RSM_STATE_CONN_QUIESCE) { 8629 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8630 rsmseglock_release(seg); 8631 DBG_PRINTF((category, RSM_DEBUG, 8632 "rsm_segmap done: cv_wait INTR")); 8633 return (ENODEV); 8634 } 8635 } 8636 8637 /* wait until segment leaves the mapping state */ 8638 while (seg->s_state == RSM_STATE_MAPPING) 8639 cv_wait(&seg->s_cv, &seg->s_lock); 8640 8641 /* 8642 * we allow multiple maps of the same segment in the KA 8643 * and it works because we do an rsmpi map of the whole 8644 * segment during the first map and all the device mapping 8645 * information needed in rsm_devmap is in the mapinfo list. 8646 */ 8647 if ((seg->s_state != RSM_STATE_CONNECT) && 8648 (seg->s_state != RSM_STATE_ACTIVE)) { 8649 rsmseglock_release(seg); 8650 DBG_PRINTF((category, RSM_DEBUG, 8651 "rsm_segmap done: segment not connected\n")); 8652 return (ENODEV); 8653 } 8654 8655 /* 8656 * Make sure we are not mapping a larger segment than what's 8657 * exported 8658 */ 8659 if ((size_t)off + ptob(btopr(len)) > seg->s_len) { 8660 rsmseglock_release(seg); 8661 DBG_PRINTF((category, RSM_DEBUG, 8662 "rsm_segmap done: off+len>seg size\n")); 8663 return (ENXIO); 8664 } 8665 8666 /* 8667 * Make sure we still have permission for the map operation. 8668 */ 8669 maxprot = PROT_USER; 8670 if (seg->s_mode & RSM_PERM_READ) { 8671 maxprot |= PROT_READ; 8672 } 8673 8674 if (seg->s_mode & RSM_PERM_WRITE) { 8675 maxprot |= PROT_WRITE; 8676 } 8677 8678 if ((prot & maxprot) != prot) { 8679 /* No permission */ 8680 rsmseglock_release(seg); 8681 DBG_PRINTF((category, RSM_DEBUG, 8682 "rsm_segmap done: no permission\n")); 8683 return (EACCES); 8684 } 8685 8686 old_state = seg->s_state; 8687 8688 ASSERT(seg->s_share != NULL); 8689 8690 rsmsharelock_acquire(seg); 8691 8692 sharedp = seg->s_share; 8693 8694 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8695 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 8696 8697 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) && 8698 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) { 8699 rsmsharelock_release(seg); 8700 rsmseglock_release(seg); 8701 DBG_PRINTF((category, RSM_DEBUG, 8702 "rsm_segmap done:RSMSI_STATE %d invalid\n", 8703 sharedp->rsmsi_state)); 8704 return (ENODEV); 8705 } 8706 8707 /* 8708 * Do the map - since we want importers to share mappings 8709 * we do the rsmpi map for the whole segment 8710 */ 8711 if (seg->s_node != my_nodeid) { 8712 uint_t dev_register; 8713 off_t dev_offset; 8714 dev_info_t *dip; 8715 size_t tmp_len; 8716 size_t total_length_mapped = 0; 8717 size_t length_to_map = seg->s_len; 8718 off_t tmp_off = 0; 8719 rsm_mapinfo_t *p; 8720 8721 /* 8722 * length_to_map = seg->s_len is always an integral 8723 * multiple of PAGESIZE. Length mapped in each entry in mapinfo 8724 * list is a multiple of PAGESIZE - RSMPI map ensures this 8725 */ 8726 8727 adapter = seg->s_adapter; 8728 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8729 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8730 8731 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) { 8732 error = 0; 8733 /* map the whole segment */ 8734 while (total_length_mapped < seg->s_len) { 8735 tmp_len = 0; 8736 8737 error = adapter->rsmpi_ops->rsm_map( 8738 seg->s_handle.in, tmp_off, 8739 length_to_map, &tmp_len, 8740 &dip, &dev_register, &dev_offset, 8741 NULL, NULL); 8742 8743 if (error != 0) 8744 break; 8745 8746 /* 8747 * Store the mapping info obtained from rsm_map 8748 */ 8749 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8750 p->dev_register = dev_register; 8751 p->dev_offset = dev_offset; 8752 p->dip = dip; 8753 p->individual_len = tmp_len; 8754 p->start_offset = tmp_off; 8755 p->next = sharedp->rsmsi_mapinfo; 8756 sharedp->rsmsi_mapinfo = p; 8757 8758 total_length_mapped += tmp_len; 8759 length_to_map -= tmp_len; 8760 tmp_off += tmp_len; 8761 } 8762 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8763 8764 if (error != RSM_SUCCESS) { 8765 /* Check if this is the the first rsm_map */ 8766 if (sharedp->rsmsi_mapinfo != NULL) { 8767 /* 8768 * A single rsm_unmap undoes 8769 * multiple rsm_maps. 8770 */ 8771 (void) seg->s_adapter->rsmpi_ops-> 8772 rsm_unmap(sharedp->rsmsi_handle); 8773 rsm_free_mapinfo(sharedp-> 8774 rsmsi_mapinfo); 8775 } 8776 sharedp->rsmsi_mapinfo = NULL; 8777 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8778 rsmsharelock_release(seg); 8779 rsmseglock_release(seg); 8780 DBG_PRINTF((category, RSM_DEBUG, 8781 "rsm_segmap done: rsmpi map err %d\n", 8782 error)); 8783 ASSERT(error != RSMERR_BAD_LENGTH && 8784 error != RSMERR_BAD_MEM_ALIGNMENT && 8785 error != RSMERR_BAD_SEG_HNDL); 8786 if (error == RSMERR_UNSUPPORTED_OPERATION) 8787 return (ENOTSUP); 8788 else if (error == RSMERR_INSUFFICIENT_RESOURCES) 8789 return (EAGAIN); 8790 else if (error == RSMERR_CONN_ABORTED) 8791 return (ENODEV); 8792 else 8793 return (error); 8794 } else { 8795 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8796 } 8797 } else { 8798 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8799 } 8800 8801 sharedp->rsmsi_mapcnt++; 8802 8803 rsmsharelock_release(seg); 8804 8805 /* move to an intermediate mapping state */ 8806 seg->s_state = RSM_STATE_MAPPING; 8807 rsmseglock_release(seg); 8808 8809 error = devmap_setup(dev, (offset_t)off, as, addrp, 8810 len, prot, maxprot, flags, cred); 8811 8812 rsmseglock_acquire(seg); 8813 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8814 8815 if (error == DDI_SUCCESS) { 8816 seg->s_state = RSM_STATE_ACTIVE; 8817 } else { 8818 rsmsharelock_acquire(seg); 8819 8820 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8821 8822 sharedp->rsmsi_mapcnt--; 8823 if (sharedp->rsmsi_mapcnt == 0) { 8824 /* unmap the shared RSMPI mapping */ 8825 ASSERT(sharedp->rsmsi_handle != NULL); 8826 (void) adapter->rsmpi_ops-> 8827 rsm_unmap(sharedp->rsmsi_handle); 8828 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 8829 sharedp->rsmsi_mapinfo = NULL; 8830 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8831 } 8832 8833 rsmsharelock_release(seg); 8834 seg->s_state = old_state; 8835 DBG_PRINTF((category, RSM_ERR, 8836 "rsm: devmap_setup failed %d\n", error)); 8837 } 8838 cv_broadcast(&seg->s_cv); 8839 rsmseglock_release(seg); 8840 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n", 8841 error)); 8842 return (error); 8843 } else { 8844 /* 8845 * For loopback, the export segment mapping cookie (s_cookie) 8846 * is also used as the s_cookie value for its import segments 8847 * during mapping. 8848 * Note that reference counting for s_cookie of the export 8849 * segment is not required due to the following: 8850 * We never have a case of the export segment being destroyed, 8851 * leaving the import segments with a stale value for the 8852 * s_cookie field, since a force disconnect is done prior to a 8853 * destroy of an export segment. The force disconnect causes 8854 * the s_cookie value to be reset to NULL. Also for the 8855 * rsm_rebind operation, we change the s_cookie value of the 8856 * export segment as well as of all its local (loopback) 8857 * importers. 8858 */ 8859 DBG_ADDCATEGORY(category, RSM_LOOPBACK); 8860 8861 rsmsharelock_release(seg); 8862 /* 8863 * In order to maintain the lock ordering between the export 8864 * and import segment locks, we need to acquire the export 8865 * segment lock first and only then acquire the import 8866 * segment lock. 8867 * The above is necessary to avoid any deadlock scenarios 8868 * with rsm_rebind which also acquires both the export 8869 * and import segment locks in the above mentioned order. 8870 * Based on code inspection, there seem to be no other 8871 * situations in which both the export and import segment 8872 * locks are acquired either in the same or opposite order 8873 * as mentioned above. 8874 * Thus in order to conform to the above lock order, we 8875 * need to change the state of the import segment to 8876 * RSM_STATE_MAPPING, release the lock. Once this is done we 8877 * can now safely acquire the export segment lock first 8878 * followed by the import segment lock which is as per 8879 * the lock order mentioned above. 8880 */ 8881 /* move to an intermediate mapping state */ 8882 seg->s_state = RSM_STATE_MAPPING; 8883 rsmseglock_release(seg); 8884 8885 eseg = rsmexport_lookup(seg->s_key); 8886 8887 if (eseg == NULL) { 8888 rsmseglock_acquire(seg); 8889 /* 8890 * Revert to old_state and signal any waiters 8891 * The shared state is not changed 8892 */ 8893 8894 seg->s_state = old_state; 8895 cv_broadcast(&seg->s_cv); 8896 rsmseglock_release(seg); 8897 DBG_PRINTF((category, RSM_DEBUG, 8898 "rsm_segmap done: key %d not found\n", seg->s_key)); 8899 return (ENODEV); 8900 } 8901 8902 rsmsharelock_acquire(seg); 8903 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8904 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8905 8906 sharedp->rsmsi_mapcnt++; 8907 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8908 rsmsharelock_release(seg); 8909 8910 ASSERT(eseg->s_cookie != NULL); 8911 8912 /* 8913 * It is not required or necessary to acquire the import 8914 * segment lock here to change the value of s_cookie since 8915 * no one will touch the import segment as long as it is 8916 * in the RSM_STATE_MAPPING state. 8917 */ 8918 seg->s_cookie = eseg->s_cookie; 8919 8920 rsmseglock_release(eseg); 8921 8922 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len, 8923 prot, maxprot, flags, cred); 8924 8925 rsmseglock_acquire(seg); 8926 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8927 if (error == 0) { 8928 seg->s_state = RSM_STATE_ACTIVE; 8929 } else { 8930 rsmsharelock_acquire(seg); 8931 8932 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8933 8934 sharedp->rsmsi_mapcnt--; 8935 if (sharedp->rsmsi_mapcnt == 0) { 8936 sharedp->rsmsi_mapinfo = NULL; 8937 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8938 } 8939 rsmsharelock_release(seg); 8940 seg->s_state = old_state; 8941 seg->s_cookie = NULL; 8942 } 8943 cv_broadcast(&seg->s_cv); 8944 rsmseglock_release(seg); 8945 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8946 "rsm_segmap done: %d\n", error)); 8947 return (error); 8948 } 8949 } 8950 8951 int 8952 rsmka_null_seg_create( 8953 rsm_controller_handle_t argcp, 8954 rsm_memseg_export_handle_t *handle, 8955 size_t size, 8956 uint_t flags, 8957 rsm_memory_local_t *memory, 8958 rsm_resource_callback_t callback, 8959 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8960 { 8961 return (RSM_SUCCESS); 8962 } 8963 8964 8965 int 8966 rsmka_null_seg_destroy( 8967 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 8968 { 8969 return (RSM_SUCCESS); 8970 } 8971 8972 8973 int 8974 rsmka_null_bind( 8975 rsm_memseg_export_handle_t argmemseg, 8976 off_t offset, 8977 rsm_memory_local_t *argmemory, 8978 rsm_resource_callback_t callback, 8979 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8980 { 8981 return (RSM_SUCCESS); 8982 } 8983 8984 8985 int 8986 rsmka_null_unbind( 8987 rsm_memseg_export_handle_t argmemseg, 8988 off_t offset, 8989 size_t length /*ARGSUSED*/) 8990 { 8991 return (DDI_SUCCESS); 8992 } 8993 8994 int 8995 rsmka_null_rebind( 8996 rsm_memseg_export_handle_t argmemseg, 8997 off_t offset, 8998 rsm_memory_local_t *memory, 8999 rsm_resource_callback_t callback, 9000 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9001 { 9002 return (RSM_SUCCESS); 9003 } 9004 9005 int 9006 rsmka_null_publish( 9007 rsm_memseg_export_handle_t argmemseg, 9008 rsm_access_entry_t access_list[], 9009 uint_t access_list_length, 9010 rsm_memseg_id_t segment_id, 9011 rsm_resource_callback_t callback, 9012 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9013 { 9014 return (RSM_SUCCESS); 9015 } 9016 9017 9018 int 9019 rsmka_null_republish( 9020 rsm_memseg_export_handle_t memseg, 9021 rsm_access_entry_t access_list[], 9022 uint_t access_list_length, 9023 rsm_resource_callback_t callback, 9024 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9025 { 9026 return (RSM_SUCCESS); 9027 } 9028 9029 int 9030 rsmka_null_unpublish( 9031 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 9032 { 9033 return (RSM_SUCCESS); 9034 } 9035 9036 9037 void 9038 rsmka_init_loopback() 9039 { 9040 rsm_ops_t *ops = &null_rsmpi_ops; 9041 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK); 9042 9043 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9044 "rsmka_init_loopback enter\n")); 9045 9046 /* initialize null ops vector */ 9047 ops->rsm_seg_create = rsmka_null_seg_create; 9048 ops->rsm_seg_destroy = rsmka_null_seg_destroy; 9049 ops->rsm_bind = rsmka_null_bind; 9050 ops->rsm_unbind = rsmka_null_unbind; 9051 ops->rsm_rebind = rsmka_null_rebind; 9052 ops->rsm_publish = rsmka_null_publish; 9053 ops->rsm_unpublish = rsmka_null_unpublish; 9054 ops->rsm_republish = rsmka_null_republish; 9055 9056 /* initialize attributes for loopback adapter */ 9057 loopback_attr.attr_name = loopback_str; 9058 loopback_attr.attr_page_size = 0x8; /* 8K */ 9059 9060 /* initialize loopback adapter */ 9061 loopback_adapter.rsm_attr = loopback_attr; 9062 loopback_adapter.rsmpi_ops = &null_rsmpi_ops; 9063 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9064 "rsmka_init_loopback done\n")); 9065 } 9066 9067 /* ************** DR functions ********************************** */ 9068 static void 9069 rsm_quiesce_exp_seg(rsmresource_t *resp) 9070 { 9071 int recheck_state; 9072 rsmseg_t *segp = (rsmseg_t *)resp; 9073 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9074 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9075 9076 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9077 "%s enter: key=%u\n", function, segp->s_key)); 9078 9079 rsmseglock_acquire(segp); 9080 do { 9081 recheck_state = 0; 9082 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) || 9083 (segp->s_state == RSM_STATE_BIND_QUIESCED) || 9084 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) || 9085 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) { 9086 rsmseglock_release(segp); 9087 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9088 "%s done:state =%d\n", function, 9089 segp->s_state)); 9090 return; 9091 } 9092 9093 if (segp->s_state == RSM_STATE_NEW) { 9094 segp->s_state = RSM_STATE_NEW_QUIESCED; 9095 rsmseglock_release(segp); 9096 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9097 "%s done:state =%d\n", function, 9098 segp->s_state)); 9099 return; 9100 } 9101 9102 if (segp->s_state == RSM_STATE_BIND) { 9103 /* unbind */ 9104 (void) rsm_unbind_pages(segp); 9105 segp->s_state = RSM_STATE_BIND_QUIESCED; 9106 rsmseglock_release(segp); 9107 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9108 "%s done:state =%d\n", function, 9109 segp->s_state)); 9110 return; 9111 } 9112 9113 if (segp->s_state == RSM_STATE_EXPORT) { 9114 /* 9115 * wait for putv/getv to complete if the segp is 9116 * a local memory handle 9117 */ 9118 while ((segp->s_state == RSM_STATE_EXPORT) && 9119 (segp->s_rdmacnt != 0)) { 9120 cv_wait(&segp->s_cv, &segp->s_lock); 9121 } 9122 9123 if (segp->s_state != RSM_STATE_EXPORT) { 9124 /* 9125 * state changed need to see what it 9126 * should be changed to. 9127 */ 9128 recheck_state = 1; 9129 continue; 9130 } 9131 9132 segp->s_state = RSM_STATE_EXPORT_QUIESCING; 9133 rsmseglock_release(segp); 9134 /* 9135 * send SUSPEND messages - currently it will be 9136 * done at the end 9137 */ 9138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9139 "%s done:state =%d\n", function, 9140 segp->s_state)); 9141 return; 9142 } 9143 } while (recheck_state); 9144 9145 rsmseglock_release(segp); 9146 } 9147 9148 static void 9149 rsm_unquiesce_exp_seg(rsmresource_t *resp) 9150 { 9151 int ret; 9152 rsmseg_t *segp = (rsmseg_t *)resp; 9153 rsmapi_access_entry_t *acl; 9154 rsm_access_entry_t *rsmpi_acl; 9155 int acl_len; 9156 int create_flags = 0; 9157 struct buf *xbuf; 9158 rsm_memory_local_t mem; 9159 adapter_t *adapter; 9160 dev_t sdev = 0; 9161 rsm_resource_callback_t callback_flag; 9162 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9163 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9164 9165 rsmseglock_acquire(segp); 9166 9167 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9168 "%s enter: key=%u, state=%d\n", function, segp->s_key, 9169 segp->s_state)); 9170 9171 if ((segp->s_state == RSM_STATE_NEW) || 9172 (segp->s_state == RSM_STATE_BIND) || 9173 (segp->s_state == RSM_STATE_EXPORT)) { 9174 rsmseglock_release(segp); 9175 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9176 function, segp->s_state)); 9177 return; 9178 } 9179 9180 if (segp->s_state == RSM_STATE_NEW_QUIESCED) { 9181 segp->s_state = RSM_STATE_NEW; 9182 cv_broadcast(&segp->s_cv); 9183 rsmseglock_release(segp); 9184 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9185 function, segp->s_state)); 9186 return; 9187 } 9188 9189 if (segp->s_state == RSM_STATE_BIND_QUIESCED) { 9190 /* bind the segment */ 9191 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9192 segp->s_len, segp->s_proc); 9193 if (ret == RSM_SUCCESS) { /* bind successful */ 9194 segp->s_state = RSM_STATE_BIND; 9195 } else { /* bind failed - resource unavailable */ 9196 segp->s_state = RSM_STATE_NEW; 9197 } 9198 cv_broadcast(&segp->s_cv); 9199 rsmseglock_release(segp); 9200 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9201 "%s done: bind_qscd bind = %d\n", function, ret)); 9202 return; 9203 } 9204 9205 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) { 9206 /* wait for the segment to move to EXPORT_QUIESCED state */ 9207 cv_wait(&segp->s_cv, &segp->s_lock); 9208 } 9209 9210 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) { 9211 /* bind the segment */ 9212 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9213 segp->s_len, segp->s_proc); 9214 9215 if (ret != RSM_SUCCESS) { 9216 /* bind failed - resource unavailable */ 9217 acl_len = segp->s_acl_len; 9218 acl = segp->s_acl; 9219 rsmpi_acl = segp->s_acl_in; 9220 segp->s_acl_len = 0; 9221 segp->s_acl = NULL; 9222 segp->s_acl_in = NULL; 9223 rsmseglock_release(segp); 9224 9225 rsmexport_rm(segp); 9226 rsmacl_free(acl, acl_len); 9227 rsmpiacl_free(rsmpi_acl, acl_len); 9228 9229 rsmseglock_acquire(segp); 9230 segp->s_state = RSM_STATE_NEW; 9231 cv_broadcast(&segp->s_cv); 9232 rsmseglock_release(segp); 9233 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9234 "%s done: exp_qscd bind failed = %d\n", 9235 function, ret)); 9236 return; 9237 } 9238 /* 9239 * publish the segment 9240 * if successful 9241 * segp->s_state = RSM_STATE_EXPORT; 9242 * else failed 9243 * segp->s_state = RSM_STATE_BIND; 9244 */ 9245 9246 /* check whether it is a local_memory_handle */ 9247 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) { 9248 if ((segp->s_acl[0].ae_node == my_nodeid) && 9249 (segp->s_acl[0].ae_permission == 0)) { 9250 segp->s_state = RSM_STATE_EXPORT; 9251 cv_broadcast(&segp->s_cv); 9252 rsmseglock_release(segp); 9253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9254 "%s done:exp_qscd\n", function)); 9255 return; 9256 } 9257 } 9258 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE, 9259 sdev, 0, NULL, DDI_UMEM_SLEEP); 9260 ASSERT(xbuf != NULL); 9261 9262 mem.ms_type = RSM_MEM_BUF; 9263 mem.ms_bp = xbuf; 9264 9265 adapter = segp->s_adapter; 9266 9267 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 9268 create_flags = RSM_ALLOW_UNBIND_REBIND; 9269 } 9270 9271 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 9272 callback_flag = RSM_RESOURCE_DONTWAIT; 9273 } else { 9274 callback_flag = RSM_RESOURCE_SLEEP; 9275 } 9276 9277 ret = adapter->rsmpi_ops->rsm_seg_create( 9278 adapter->rsmpi_handle, &segp->s_handle.out, 9279 segp->s_len, create_flags, &mem, 9280 callback_flag, NULL); 9281 9282 if (ret != RSM_SUCCESS) { 9283 acl_len = segp->s_acl_len; 9284 acl = segp->s_acl; 9285 rsmpi_acl = segp->s_acl_in; 9286 segp->s_acl_len = 0; 9287 segp->s_acl = NULL; 9288 segp->s_acl_in = NULL; 9289 rsmseglock_release(segp); 9290 9291 rsmexport_rm(segp); 9292 rsmacl_free(acl, acl_len); 9293 rsmpiacl_free(rsmpi_acl, acl_len); 9294 9295 rsmseglock_acquire(segp); 9296 segp->s_state = RSM_STATE_BIND; 9297 cv_broadcast(&segp->s_cv); 9298 rsmseglock_release(segp); 9299 DBG_PRINTF((category, RSM_ERR, 9300 "%s done: exp_qscd create failed = %d\n", 9301 function, ret)); 9302 return; 9303 } 9304 9305 ret = adapter->rsmpi_ops->rsm_publish( 9306 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len, 9307 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL); 9308 9309 if (ret != RSM_SUCCESS) { 9310 acl_len = segp->s_acl_len; 9311 acl = segp->s_acl; 9312 rsmpi_acl = segp->s_acl_in; 9313 segp->s_acl_len = 0; 9314 segp->s_acl = NULL; 9315 segp->s_acl_in = NULL; 9316 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out); 9317 rsmseglock_release(segp); 9318 9319 rsmexport_rm(segp); 9320 rsmacl_free(acl, acl_len); 9321 rsmpiacl_free(rsmpi_acl, acl_len); 9322 9323 rsmseglock_acquire(segp); 9324 segp->s_state = RSM_STATE_BIND; 9325 cv_broadcast(&segp->s_cv); 9326 rsmseglock_release(segp); 9327 DBG_PRINTF((category, RSM_ERR, 9328 "%s done: exp_qscd publish failed = %d\n", 9329 function, ret)); 9330 return; 9331 } 9332 9333 segp->s_state = RSM_STATE_EXPORT; 9334 cv_broadcast(&segp->s_cv); 9335 rsmseglock_release(segp); 9336 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n", 9337 function)); 9338 return; 9339 } 9340 9341 rsmseglock_release(segp); 9342 9343 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9344 } 9345 9346 static void 9347 rsm_quiesce_imp_seg(rsmresource_t *resp) 9348 { 9349 rsmseg_t *segp = (rsmseg_t *)resp; 9350 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9351 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg"); 9352 9353 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9354 "%s enter: key=%u\n", function, segp->s_key)); 9355 9356 rsmseglock_acquire(segp); 9357 segp->s_flags |= RSM_DR_INPROGRESS; 9358 9359 while (segp->s_rdmacnt != 0) { 9360 /* wait for the RDMA to complete */ 9361 cv_wait(&segp->s_cv, &segp->s_lock); 9362 } 9363 9364 rsmseglock_release(segp); 9365 9366 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9367 9368 } 9369 9370 static void 9371 rsm_unquiesce_imp_seg(rsmresource_t *resp) 9372 { 9373 rsmseg_t *segp = (rsmseg_t *)resp; 9374 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9375 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg"); 9376 9377 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9378 "%s enter: key=%u\n", function, segp->s_key)); 9379 9380 rsmseglock_acquire(segp); 9381 9382 segp->s_flags &= ~RSM_DR_INPROGRESS; 9383 /* wake up any waiting putv/getv ops */ 9384 cv_broadcast(&segp->s_cv); 9385 9386 rsmseglock_release(segp); 9387 9388 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9389 9390 9391 } 9392 9393 static void 9394 rsm_process_exp_seg(rsmresource_t *resp, int event) 9395 { 9396 if (event == RSM_DR_QUIESCE) 9397 rsm_quiesce_exp_seg(resp); 9398 else /* UNQUIESCE */ 9399 rsm_unquiesce_exp_seg(resp); 9400 } 9401 9402 static void 9403 rsm_process_imp_seg(rsmresource_t *resp, int event) 9404 { 9405 if (event == RSM_DR_QUIESCE) 9406 rsm_quiesce_imp_seg(resp); 9407 else /* UNQUIESCE */ 9408 rsm_unquiesce_imp_seg(resp); 9409 } 9410 9411 static void 9412 rsm_dr_process_local_segments(int event) 9413 { 9414 9415 int i, j; 9416 rsmresource_blk_t *blk; 9417 rsmresource_t *p; 9418 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9419 9420 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9421 "rsm_dr_process_local_segments enter\n")); 9422 9423 /* iterate through the resource structure */ 9424 9425 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 9426 9427 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 9428 blk = rsm_resource.rsmrc_root[i]; 9429 if (blk != NULL) { 9430 for (j = 0; j < RSMRC_BLKSZ; j++) { 9431 p = blk->rsmrcblk_blks[j]; 9432 if ((p != NULL) && (p != RSMRC_RESERVED)) { 9433 /* valid resource */ 9434 if (p->rsmrc_type == 9435 RSM_RESOURCE_EXPORT_SEGMENT) 9436 rsm_process_exp_seg(p, event); 9437 else if (p->rsmrc_type == 9438 RSM_RESOURCE_IMPORT_SEGMENT) 9439 rsm_process_imp_seg(p, event); 9440 } 9441 } 9442 } 9443 } 9444 9445 rw_exit(&rsm_resource.rsmrc_lock); 9446 9447 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9448 "rsm_dr_process_local_segments done\n")); 9449 } 9450 9451 /* *************** DR callback functions ************ */ 9452 static void 9453 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */) 9454 { 9455 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9456 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9457 "rsm_dr_callback_post_add is a no-op\n")); 9458 /* Noop */ 9459 } 9460 9461 static int 9462 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */) 9463 { 9464 int recheck_state = 0; 9465 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9466 9467 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9468 "rsm_dr_callback_pre_del enter\n")); 9469 9470 mutex_enter(&rsm_drv_data.drv_lock); 9471 9472 do { 9473 recheck_state = 0; 9474 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9475 "rsm_dr_callback_pre_del:state=%d\n", 9476 rsm_drv_data.drv_state)); 9477 9478 switch (rsm_drv_data.drv_state) { 9479 case RSM_DRV_NEW: 9480 /* 9481 * The state should usually never be RSM_DRV_NEW 9482 * since in this state the callbacks have not yet 9483 * been registered. So, ASSERT. 9484 */ 9485 ASSERT(0); 9486 return (0); 9487 case RSM_DRV_REG_PROCESSING: 9488 /* 9489 * The driver is in the process of registering 9490 * with the DR framework. So, wait till the 9491 * registration process is complete. 9492 */ 9493 recheck_state = 1; 9494 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9495 break; 9496 case RSM_DRV_UNREG_PROCESSING: 9497 /* 9498 * If the state is RSM_DRV_UNREG_PROCESSING, the 9499 * module is in the process of detaching and 9500 * unregistering the callbacks from the DR 9501 * framework. So, simply return. 9502 */ 9503 mutex_exit(&rsm_drv_data.drv_lock); 9504 DBG_PRINTF((category, RSM_DEBUG, 9505 "rsm_dr_callback_pre_del:" 9506 "pre-del on NEW/UNREG\n")); 9507 return (0); 9508 case RSM_DRV_OK: 9509 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED; 9510 break; 9511 case RSM_DRV_PREDEL_STARTED: 9512 /* FALLTHRU */ 9513 case RSM_DRV_PREDEL_COMPLETED: 9514 /* FALLTHRU */ 9515 case RSM_DRV_POSTDEL_IN_PROGRESS: 9516 recheck_state = 1; 9517 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9518 break; 9519 case RSM_DRV_DR_IN_PROGRESS: 9520 rsm_drv_data.drv_memdel_cnt++; 9521 mutex_exit(&rsm_drv_data.drv_lock); 9522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9523 "rsm_dr_callback_pre_del done\n")); 9524 return (0); 9525 /* break; */ 9526 default: 9527 ASSERT(0); 9528 break; 9529 } 9530 9531 } while (recheck_state); 9532 9533 rsm_drv_data.drv_memdel_cnt++; 9534 9535 mutex_exit(&rsm_drv_data.drv_lock); 9536 9537 /* Do all the quiescing stuff here */ 9538 DBG_PRINTF((category, RSM_DEBUG, 9539 "rsm_dr_callback_pre_del: quiesce things now\n")); 9540 9541 rsm_dr_process_local_segments(RSM_DR_QUIESCE); 9542 9543 /* 9544 * now that all local segments have been quiesced lets inform 9545 * the importers 9546 */ 9547 rsm_send_suspend(); 9548 9549 /* 9550 * In response to the suspend message the remote node(s) will process 9551 * the segments and send a suspend_complete message. Till all 9552 * the nodes send the suspend_complete message we wait in the 9553 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce 9554 * function we transition to the RSM_DRV_PREDEL_COMPLETED state. 9555 */ 9556 mutex_enter(&rsm_drv_data.drv_lock); 9557 9558 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) { 9559 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9560 } 9561 9562 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED); 9563 9564 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS; 9565 cv_broadcast(&rsm_drv_data.drv_cv); 9566 9567 mutex_exit(&rsm_drv_data.drv_lock); 9568 9569 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9570 "rsm_dr_callback_pre_del done\n")); 9571 9572 return (0); 9573 } 9574 9575 static void 9576 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */) 9577 { 9578 int recheck_state = 0; 9579 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9580 9581 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9582 "rsm_dr_callback_post_del enter\n")); 9583 9584 mutex_enter(&rsm_drv_data.drv_lock); 9585 9586 do { 9587 recheck_state = 0; 9588 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9589 "rsm_dr_callback_post_del:state=%d\n", 9590 rsm_drv_data.drv_state)); 9591 9592 switch (rsm_drv_data.drv_state) { 9593 case RSM_DRV_NEW: 9594 /* 9595 * The driver state cannot not be RSM_DRV_NEW 9596 * since in this state the callbacks have not 9597 * yet been registered. 9598 */ 9599 ASSERT(0); 9600 return; 9601 case RSM_DRV_REG_PROCESSING: 9602 /* 9603 * The driver is in the process of registering with 9604 * the DR framework. Wait till the registration is 9605 * complete. 9606 */ 9607 recheck_state = 1; 9608 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9609 break; 9610 case RSM_DRV_UNREG_PROCESSING: 9611 /* 9612 * RSM_DRV_UNREG_PROCESSING state means the module 9613 * is detaching and unregistering the callbacks 9614 * from the DR framework. So simply return. 9615 */ 9616 /* FALLTHRU */ 9617 case RSM_DRV_OK: 9618 /* 9619 * RSM_DRV_OK means we missed the pre-del 9620 * corresponding to this post-del coz we had not 9621 * registered yet, so simply return. 9622 */ 9623 mutex_exit(&rsm_drv_data.drv_lock); 9624 DBG_PRINTF((category, RSM_DEBUG, 9625 "rsm_dr_callback_post_del:" 9626 "post-del on OK/UNREG\n")); 9627 return; 9628 /* break; */ 9629 case RSM_DRV_PREDEL_STARTED: 9630 /* FALLTHRU */ 9631 case RSM_DRV_PREDEL_COMPLETED: 9632 /* FALLTHRU */ 9633 case RSM_DRV_POSTDEL_IN_PROGRESS: 9634 recheck_state = 1; 9635 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9636 break; 9637 case RSM_DRV_DR_IN_PROGRESS: 9638 rsm_drv_data.drv_memdel_cnt--; 9639 if (rsm_drv_data.drv_memdel_cnt > 0) { 9640 mutex_exit(&rsm_drv_data.drv_lock); 9641 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9642 "rsm_dr_callback_post_del done:\n")); 9643 return; 9644 } 9645 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS; 9646 break; 9647 default: 9648 ASSERT(0); 9649 return; 9650 /* break; */ 9651 } 9652 } while (recheck_state); 9653 9654 mutex_exit(&rsm_drv_data.drv_lock); 9655 9656 /* Do all the unquiescing stuff here */ 9657 DBG_PRINTF((category, RSM_DEBUG, 9658 "rsm_dr_callback_post_del: unquiesce things now\n")); 9659 9660 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE); 9661 9662 /* 9663 * now that all local segments have been unquiesced lets inform 9664 * the importers 9665 */ 9666 rsm_send_resume(); 9667 9668 mutex_enter(&rsm_drv_data.drv_lock); 9669 9670 rsm_drv_data.drv_state = RSM_DRV_OK; 9671 9672 cv_broadcast(&rsm_drv_data.drv_cv); 9673 9674 mutex_exit(&rsm_drv_data.drv_lock); 9675 9676 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9677 "rsm_dr_callback_post_del done\n")); 9678 9679 return; 9680 9681 } 9682