xref: /illumos-gate/usr/src/uts/common/io/rsm/rsm.c (revision 3d393ee6c37fa10ac512ed6d36109ad616dc7c1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * Overview of the RSM Kernel Agent:
29  * ---------------------------------
30  *
31  * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
32  * kernel agent is a pseudo device driver which makes use of the RSMPI
33  * interface on behalf of the RSMAPI user library.
34  *
35  * The kernel agent functionality can be categorized into the following
36  * components:
37  * 1. Driver Infrastructure
38  * 2. Export/Import Segment Management
39  * 3. Internal resource allocation/deallocation
40  *
41  * The driver infrastructure includes the basic module loading entry points
42  * like _init, _info, _fini to load, unload and report information about
43  * the driver module. The driver infrastructure also includes the
44  * autoconfiguration entry points namely, attach, detach and getinfo for
45  * the device autoconfiguration.
46  *
47  * The kernel agent is a pseudo character device driver and exports
48  * a cb_ops structure which defines the driver entry points for character
49  * device access. This includes the open and close entry points. The
50  * other entry points provided include ioctl, devmap and segmap and chpoll.
51  * read and write entry points are not used since the device is memory
52  * mapped. Also ddi_prop_op is used for the prop_op entry point.
53  *
54  * The ioctl entry point supports a number of commands, which are used by
55  * the RSMAPI library in order to export and import segments. These
56  * commands include commands for binding and rebinding the physical pages
57  * allocated to the virtual address range, publishing the export segment,
58  * unpublishing and republishing an export segment, creating an
59  * import segment and a virtual connection from this import segment to
60  * an export segment, performing scatter-gather data transfer, barrier
61  * operations.
62  *
63  *
64  * Export and Import segments:
65  * ---------------------------
66  *
67  * In order to create an RSM export segment a process allocates a range in its
68  * virtual address space for the segment using standard Solaris interfaces.
69  * The process then calls RSMAPI, which in turn makes an ioctl call to the
70  * RSM kernel agent for an allocation of physical memory pages and for
71  * creation of the export segment by binding these pages to the virtual
72  * address range. These pages are locked in memory so that remote accesses
73  * are always applied to the correct page. Then the RSM segment is published,
74  * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
75  * is assigned to it.
76  *
77  * In order to import a published RSM segment, RSMAPI creates an import
78  * segment and forms a virtual connection across the interconnect to the
79  * export segment, via an ioctl into the kernel agent with the connect
80  * command. The import segment setup is completed by mapping the
81  * local device memory into the importers virtual address space. The
82  * mapping of the import segment is handled by the segmap/devmap
83  * infrastructure described as follows.
84  *
85  * Segmap and Devmap interfaces:
86  *
87  * The RSM kernel agent allows device memory to be directly accessed by user
88  * threads via memory mapping. In order to do so, the RSM kernel agent
89  * supports the devmap and segmap entry points.
90  *
91  * The segmap entry point(rsm_segmap) is responsible for setting up a memory
92  * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
93  * responsible for exporting the device memory to the user applications.
94  * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
95  * control is transfered to the devmap_setup call which calls rsm_devmap.
96  *
97  * rsm_devmap validates the user mapping to the device or kernel memory
98  * and passes the information to the system for setting up the mapping. The
99  * actual setting up of the mapping is done by devmap_devmem_setup(for
100  * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
101  * registered for device context management via the devmap_devmem_setup
102  * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
103  * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
104  * is created, a mapping is freed, a mapping is accessed or an existing
105  * mapping is duplicated respectively. These callbacks allow the RSM kernel
106  * agent to maintain state information associated with the mappings.
107  * The state information is mainly in the form of a cookie list for the import
108  * segment for which mapping has been done.
109  *
110  * Forced disconnect of import segments:
111  *
112  * When an exported segment is unpublished, the exporter sends a forced
113  * disconnect message to all its importers. The importer segments are
114  * unloaded and disconnected. This involves unloading the original
115  * mappings and remapping to a preallocated kernel trash page. This is
116  * done by devmap_umem_remap. The trash/dummy page is a kernel page,
117  * preallocated by the kernel agent during attach using ddi_umem_alloc with
118  * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
119  * due to unloading of the original mappings.
120  *
121  * Additionally every segment has a mapping generation number associated
122  * with it. This is an entry in the barrier generation page, created
123  * during attach time. This mapping generation number for the import
124  * segments is incremented on a force disconnect to notify the application
125  * of the force disconnect. On this notification, the application needs
126  * to reconnect the segment to establish a new legitimate mapping.
127  *
128  *
129  * Locks used in the kernel agent:
130  * -------------------------------
131  *
132  * The kernel agent uses a variety of mutexes and condition variables for
133  * mutual exclusion of the shared data structures and for synchronization
134  * between the various threads. Some of the locks are described as follows.
135  *
136  * Each resource structure, which represents either an export/import segment
137  * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
138  * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
139  * rsmseglock_acquire and rsmseglock_release macros. An additional
140  * lock called the rsmsi_lock is used for the shared import data structure
141  * that is relevant for resources representing import segments. There is
142  * also a condition variable associated with the resource called s_cv. This
143  * is used to wait for events like the segment state change etc.
144  *
145  * The resource structures are allocated from a pool of resource structures,
146  * called rsm_resource. This pool is protected via a reader-writer lock,
147  * called rsmrc_lock.
148  *
149  * There are two separate hash tables, one for the export segments and
150  * one for the import segments. The export segments are inserted into the
151  * export segment hash table only after they have been published and the
152  * import segments are inserted in the import segments list only after they
153  * have successfully connected to an exported segment. These tables are
154  * protected via reader-writer locks.
155  *
156  * Debug Support in the kernel agent:
157  * ----------------------------------
158  *
159  * Debugging support in the kernel agent is provided by the following
160  * macros.
161  *
162  * DBG_PRINTF((category, level, message)) is a macro which logs a debug
163  * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
164  * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
165  * on the definition of the category and level. All messages that belong to
166  * the specified category(rsmdbg_category) and are of an equal or greater
167  * severity than the specified level(rsmdbg_level) are logged. The message
168  * is a string which uses the same formatting rules as the strings used in
169  * printf.
170  *
171  * The category defines which component of the kernel agent has logged this
172  * message. There are a number of categories that have been defined such as
173  * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
174  * DBG_ADDCATEGORY is used to add in another category to the currently
175  * specified category value so that the component using this new category
176  * can also effectively log debug messages. Thus, the category of a specific
177  * message is some combination of the available categories and we can define
178  * sub-categories if we want a finer level of granularity.
179  *
180  * The level defines the severity of the message. Different level values are
181  * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
182  * the least severe(debug level is 0).
183  *
184  * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
185  * variable or a string respectively.
186  *
187  *
188  * NOTES:
189  *
190  * Special Fork and Exec Handling:
191  * -------------------------------
192  *
193  * The backing physical pages of an exported segment are always locked down.
194  * Thus, there are two cases in which a process having exported segments
195  * will cause a cpu to hang: (1) the process invokes exec; (2) a process
196  * forks and invokes exit before the duped file descriptors for the export
197  * segments are closed in the child process. The hang is caused because the
198  * address space release algorithm in Solaris VM subsystem is based on a
199  * non-blocking loop which does not terminate while segments are locked
200  * down. In addition to this, Solaris VM subsystem lacks a callback
201  * mechanism to the rsm kernel agent to allow unlocking these export
202  * segment pages.
203  *
204  * In order to circumvent this problem, the kernel agent does the following.
205  * The Solaris VM subsystem keeps memory segments in increasing order of
206  * virtual addressses. Thus a special page(special_exit_offset) is allocated
207  * by the kernel agent and is mmapped into the heap area of the process address
208  * space(the mmap is done by the RSMAPI library). During the mmap processing
209  * of this special page by the devmap infrastructure, a callback(the same
210  * devmap context management callbacks discussed above) is registered for an
211  * unmap.
212  *
213  * As discussed above, this page is processed by the Solaris address space
214  * release code before any of the exported segments pages(which are allocated
215  * from high memory). It is during this processing that the unmap callback gets
216  * called and this callback is responsible for force destroying the exported
217  * segments and thus eliminating the problem of locked pages.
218  *
219  * Flow-control:
220  * ------------
221  *
222  * A credit based flow control algorithm is used for messages whose
223  * processing cannot be done in the interrupt context because it might
224  * involve invoking rsmpi calls, or might take a long time to complete
225  * or might need to allocate resources. The algorithm operates on a per
226  * path basis. To send a message the pathend needs to have a credit and
227  * it consumes one for every message that is flow controlled. On the
228  * receiving pathend the message is put on a msgbuf_queue and a task is
229  * dispatched on the worker thread - recv_taskq where it is processed.
230  * After processing the message, the receiving pathend dequeues the message,
231  * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
232  * credits to the sender pathend.
233  *
234  * RSM_DRTEST:
235  * -----------
236  *
237  * This is used to enable the DR testing using a test driver on test
238  * platforms which do not supported DR.
239  *
240  */
241 
242 #include <sys/types.h>
243 #include <sys/param.h>
244 #include <sys/user.h>
245 #include <sys/buf.h>
246 #include <sys/systm.h>
247 #include <sys/cred.h>
248 #include <sys/vm.h>
249 #include <sys/uio.h>
250 #include <vm/seg.h>
251 #include <vm/page.h>
252 #include <sys/stat.h>
253 
254 #include <sys/time.h>
255 #include <sys/errno.h>
256 
257 #include <sys/file.h>
258 #include <sys/uio.h>
259 #include <sys/proc.h>
260 #include <sys/mman.h>
261 #include <sys/open.h>
262 #include <sys/atomic.h>
263 #include <sys/mem_config.h>
264 
265 
266 #include <sys/ddi.h>
267 #include <sys/devops.h>
268 #include <sys/ddidevmap.h>
269 #include <sys/sunddi.h>
270 #include <sys/esunddi.h>
271 #include <sys/ddi_impldefs.h>
272 
273 #include <sys/kmem.h>
274 #include <sys/conf.h>
275 #include <sys/devops.h>
276 #include <sys/ddi_impldefs.h>
277 
278 #include <sys/modctl.h>
279 
280 #include <sys/policy.h>
281 #include <sys/types.h>
282 #include <sys/conf.h>
283 #include <sys/param.h>
284 
285 #include <sys/taskq.h>
286 
287 #include <sys/rsm/rsm_common.h>
288 #include <sys/rsm/rsmapi_common.h>
289 #include <sys/rsm/rsm.h>
290 #include <rsm_in.h>
291 #include <sys/rsm/rsmka_path_int.h>
292 #include <sys/rsm/rsmpi.h>
293 
294 #include <sys/modctl.h>
295 #include <sys/debug.h>
296 
297 #include <sys/tuneable.h>
298 
299 #ifdef	RSM_DRTEST
300 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
301 		void *arg);
302 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
303 		void *arg);
304 #endif
305 
306 extern void dbg_printf(int category, int level, char *fmt, ...);
307 extern void rsmka_pathmanager_init();
308 extern void rsmka_pathmanager_cleanup();
309 extern void rele_sendq_token();
310 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
311 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
312 extern int rsmka_topology_ioctl(caddr_t, int, int);
313 
314 extern pri_t maxclsyspri;
315 extern work_queue_t work_queue;
316 extern kmutex_t ipc_info_lock;
317 extern kmutex_t ipc_info_cvlock;
318 extern kcondvar_t ipc_info_cv;
319 extern kmutex_t path_hold_cvlock;
320 extern kcondvar_t path_hold_cv;
321 
322 extern kmutex_t rsmka_buf_lock;
323 
324 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
325 extern adapter_t *rsmka_lookup_adapter(char *, int);
326 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
327 extern boolean_t rsmka_do_path_active(path_t *, int);
328 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
329 extern void rsmka_release_adapter(adapter_t *);
330 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
331 extern void rsmka_dequeue_msgbuf(path_t *path);
332 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
333 /* lint -w2 */
334 
335 static int rsm_open(dev_t *, int, int, cred_t *);
336 static int rsm_close(dev_t, int, int, cred_t *);
337 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
338     cred_t *credp, int *rvalp);
339 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
340     uint_t);
341 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
342     uint_t, uint_t, cred_t *);
343 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
344     struct pollhead **phpp);
345 
346 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
347 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
348 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
349 
350 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
351 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
352 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
353 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
354 				rsm_permission_t);
355 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
356 static void rsmacl_free(rsmapi_access_entry_t *, int);
357 static void rsmpiacl_free(rsm_access_entry_t *, int);
358 
359 static int rsm_inc_pgcnt(pgcnt_t);
360 static void rsm_dec_pgcnt(pgcnt_t);
361 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
362 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
363 					size_t *);
364 static void exporter_quiesce();
365 static void rsmseg_suspend(rsmseg_t *, int *);
366 static void rsmsegshare_suspend(rsmseg_t *);
367 static int rsmseg_resume(rsmseg_t *, void **);
368 static int rsmsegshare_resume(rsmseg_t *);
369 
370 static struct cb_ops rsm_cb_ops = {
371 	rsm_open,		/* open */
372 	rsm_close,		/* close */
373 	nodev,			/* strategy */
374 	nodev,			/* print */
375 	nodev,			/* dump */
376 	nodev,			/* read */
377 	nodev,			/* write */
378 	rsm_ioctl,		/* ioctl */
379 	rsm_devmap,		/* devmap */
380 	NULL,			/* mmap */
381 	rsm_segmap,		/* segmap */
382 	rsm_chpoll,		/* poll */
383 	ddi_prop_op,		/* cb_prop_op */
384 	0,			/* streamtab  */
385 	D_NEW|D_MP|D_DEVMAP,	/* Driver compatibility flag */
386 	0,
387 	0,
388 	0
389 };
390 
391 static struct dev_ops rsm_ops = {
392 	DEVO_REV,		/* devo_rev, */
393 	0,			/* refcnt  */
394 	rsm_info,		/* get_dev_info */
395 	nulldev,		/* identify */
396 	nulldev,		/* probe */
397 	rsm_attach,		/* attach */
398 	rsm_detach,		/* detach */
399 	nodev,			/* reset */
400 	&rsm_cb_ops,		/* driver operations */
401 	(struct bus_ops *)0,	/* bus operations */
402 	0,
403 	ddi_quiesce_not_needed,		/* quiesce */
404 };
405 
406 /*
407  * Module linkage information for the kernel.
408  */
409 
410 static struct modldrv modldrv = {
411 	&mod_driverops, /* Type of module.  This one is a pseudo driver */
412 	"Remote Shared Memory Driver",
413 	&rsm_ops,	/* driver ops */
414 };
415 
416 static struct modlinkage modlinkage = {
417 	MODREV_1,
418 	(void *)&modldrv,
419 	0,
420 	0,
421 	0
422 };
423 
424 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
425 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
426 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
427 
428 static kphysm_setup_vector_t rsm_dr_callback_vec = {
429 	KPHYSM_SETUP_VECTOR_VERSION,
430 	rsm_dr_callback_post_add,
431 	rsm_dr_callback_pre_del,
432 	rsm_dr_callback_post_del
433 };
434 
435 /* This flag can be changed to 0 to help with PIT testing */
436 int rsmka_modunloadok = 1;
437 int no_reply_cnt = 0;
438 
439 uint64_t rsm_ctrlmsg_errcnt = 0;
440 uint64_t rsm_ipcsend_errcnt = 0;
441 
442 #define	MAX_NODES 64
443 
444 static struct rsm_driver_data rsm_drv_data;
445 static struct rsmresource_table rsm_resource;
446 
447 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
448 static void rsmresource_destroy(void);
449 static int rsmresource_alloc(minor_t *);
450 static rsmresource_t *rsmresource_free(minor_t rnum);
451 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
452 static int rsm_unpublish(rsmseg_t *seg, int mode);
453 static int rsm_unbind(rsmseg_t *seg);
454 static uint_t rsmhash(rsm_memseg_id_t key);
455 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
456 static void rsmhash_free(rsmhash_table_t *rhash, int size);
457 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
458 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
459 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
460 					void *cookie);
461 int rsm_disconnect(rsmseg_t *seg);
462 void rsmseg_unload(rsmseg_t *);
463 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
464 
465 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
466     rsm_intr_q_op_t opcode, rsm_addr_t src,
467     void *data, size_t size, rsm_intr_hand_arg_t arg);
468 
469 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
470 
471 rsm_node_id_t my_nodeid;
472 
473 /* cookie, va, offsets and length for the barrier */
474 static rsm_gnum_t		*bar_va;
475 static ddi_umem_cookie_t	bar_cookie;
476 static off_t			barrier_offset;
477 static size_t			barrier_size;
478 static int			max_segs;
479 
480 /* cookie for the trash memory */
481 static ddi_umem_cookie_t	remap_cookie;
482 
483 static rsm_memseg_id_t	rsm_nextavail_segmentid;
484 
485 extern taskq_t *work_taskq;
486 extern char *taskq_name;
487 
488 static dev_info_t *rsm_dip;	/* private copy of devinfo pointer */
489 
490 static rsmhash_table_t rsm_export_segs;		/* list of exported segs */
491 rsmhash_table_t rsm_import_segs;		/* list of imported segs */
492 static rsmhash_table_t rsm_event_queues;	/* list of event queues */
493 
494 static	rsm_ipc_t	rsm_ipc;		/* ipc info */
495 
496 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
497 static list_head_t	rsm_suspend_list;
498 
499 /* list of descriptors for remote importers */
500 static importers_table_t importer_list;
501 
502 kmutex_t rsm_suspend_cvlock;
503 kcondvar_t rsm_suspend_cv;
504 
505 static kmutex_t rsm_lock;
506 
507 adapter_t loopback_adapter;
508 rsm_controller_attr_t loopback_attr;
509 
510 int rsmipc_send_controlmsg(path_t *path, int msgtype);
511 
512 void rsmka_init_loopback();
513 
514 int rsmka_null_seg_create(
515     rsm_controller_handle_t,
516     rsm_memseg_export_handle_t *,
517     size_t,
518     uint_t,
519     rsm_memory_local_t *,
520     rsm_resource_callback_t,
521     rsm_resource_callback_arg_t);
522 
523 int rsmka_null_seg_destroy(
524     rsm_memseg_export_handle_t);
525 
526 int rsmka_null_bind(
527     rsm_memseg_export_handle_t,
528     off_t,
529     rsm_memory_local_t *,
530     rsm_resource_callback_t,
531     rsm_resource_callback_arg_t);
532 
533 int rsmka_null_unbind(
534     rsm_memseg_export_handle_t,
535     off_t,
536     size_t);
537 
538 int rsmka_null_rebind(
539     rsm_memseg_export_handle_t,
540     off_t,
541     rsm_memory_local_t *,
542     rsm_resource_callback_t,
543     rsm_resource_callback_arg_t);
544 
545 int rsmka_null_publish(
546     rsm_memseg_export_handle_t,
547     rsm_access_entry_t [],
548     uint_t,
549     rsm_memseg_id_t,
550     rsm_resource_callback_t,
551     rsm_resource_callback_arg_t);
552 
553 
554 int rsmka_null_republish(
555     rsm_memseg_export_handle_t,
556     rsm_access_entry_t [],
557     uint_t,
558     rsm_resource_callback_t,
559     rsm_resource_callback_arg_t);
560 
561 int rsmka_null_unpublish(
562     rsm_memseg_export_handle_t);
563 
564 rsm_ops_t null_rsmpi_ops;
565 
566 /*
567  * data and locks to keep track of total amount of exported memory
568  */
569 static	pgcnt_t		rsm_pgcnt;
570 static	pgcnt_t		rsm_pgcnt_max;	/* max allowed */
571 static	kmutex_t	rsm_pgcnt_lock;
572 
573 static	int		rsm_enable_dr;
574 
575 static	char		loopback_str[] = "loopback";
576 
577 int		rsm_hash_size;
578 
579 /*
580  * The locking model is as follows:
581  *
582  * Local operations:
583  *		find resource - grab reader lock on resouce list
584  *		insert rc     - grab writer lock
585  *		delete rc     - grab writer lock and resource mutex
586  *		read/write    - no lock
587  *
588  * Remote invocations:
589  *		find resource - grab read lock and resource mutex
590  *
591  * State:
592  *		resource state - grab resource mutex
593  */
594 
595 int
596 _init(void)
597 {
598 	int e;
599 
600 	e = mod_install(&modlinkage);
601 	if (e != 0) {
602 		return (e);
603 	}
604 
605 	mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
606 
607 	mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
608 
609 
610 	rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
611 
612 	rsm_hash_size = RSM_HASHSZ;
613 
614 	rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
615 
616 	rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
617 
618 	mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
619 
620 	mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
621 	cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
622 
623 	mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
624 	cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
625 
626 	mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
627 	cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
628 
629 	rsm_ipc.count = RSMIPC_SZ;
630 	rsm_ipc.wanted = 0;
631 	rsm_ipc.sequence = 0;
632 
633 	(void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
634 
635 	for (e = 0; e < RSMIPC_SZ; e++) {
636 		rsmipc_slot_t *slot = &rsm_ipc.slots[e];
637 
638 		RSMIPC_SET(slot, RSMIPC_FREE);
639 		mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
640 		cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
641 	}
642 
643 	/*
644 	 * Initialize the suspend message list
645 	 */
646 	rsm_suspend_list.list_head = NULL;
647 	mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
648 
649 	/*
650 	 * It is assumed here that configuration data is available
651 	 * during system boot since _init may be called at that time.
652 	 */
653 
654 	rsmka_pathmanager_init();
655 
656 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
657 	    "rsm: _init done\n"));
658 
659 	return (DDI_SUCCESS);
660 
661 }
662 
663 int
664 _info(struct modinfo *modinfop)
665 {
666 
667 	return (mod_info(&modlinkage, modinfop));
668 }
669 
670 int
671 _fini(void)
672 {
673 	int e;
674 
675 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
676 	    "rsm: _fini enter\n"));
677 
678 	/*
679 	 * The rsmka_modunloadok flag is simply used to help with
680 	 * the PIT testing. Make this flag 0 to disallow modunload.
681 	 */
682 	if (rsmka_modunloadok == 0)
683 		return (EBUSY);
684 
685 	/* rsm_detach will be called as a result of mod_remove */
686 	e = mod_remove(&modlinkage);
687 	if (e) {
688 		DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
689 		    "Unable to fini RSM %x\n", e));
690 		return (e);
691 	}
692 
693 	rsmka_pathmanager_cleanup();
694 
695 	rw_destroy(&rsm_resource.rsmrc_lock);
696 
697 	rw_destroy(&rsm_export_segs.rsmhash_rw);
698 	rw_destroy(&rsm_import_segs.rsmhash_rw);
699 	rw_destroy(&rsm_event_queues.rsmhash_rw);
700 
701 	mutex_destroy(&importer_list.lock);
702 
703 	mutex_destroy(&rsm_ipc.lock);
704 	cv_destroy(&rsm_ipc.cv);
705 
706 	(void) mutex_destroy(&rsm_suspend_list.list_lock);
707 
708 	(void) mutex_destroy(&rsm_pgcnt_lock);
709 
710 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
711 
712 	return (DDI_SUCCESS);
713 
714 }
715 
716 /*ARGSUSED1*/
717 static int
718 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
719 {
720 	minor_t	rnum;
721 	int	percent;
722 	int	ret;
723 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
724 
725 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
726 
727 	switch (cmd) {
728 	case DDI_ATTACH:
729 		break;
730 	case DDI_RESUME:
731 	default:
732 		DBG_PRINTF((category, RSM_ERR,
733 		    "rsm:rsm_attach - cmd not supported\n"));
734 		return (DDI_FAILURE);
735 	}
736 
737 	if (rsm_dip != NULL) {
738 		DBG_PRINTF((category, RSM_ERR,
739 		    "rsm:rsm_attach - supports only "
740 		    "one instance\n"));
741 		return (DDI_FAILURE);
742 	}
743 
744 	rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
745 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
746 	    "enable-dynamic-reconfiguration", 1);
747 
748 	mutex_enter(&rsm_drv_data.drv_lock);
749 	rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
750 	mutex_exit(&rsm_drv_data.drv_lock);
751 
752 	if (rsm_enable_dr) {
753 #ifdef	RSM_DRTEST
754 		ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
755 		    (void *)NULL);
756 #else
757 		ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
758 		    (void *)NULL);
759 #endif
760 		if (ret != 0) {
761 			mutex_exit(&rsm_drv_data.drv_lock);
762 			cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
763 			    "reconfiguration setup failed\n");
764 			return (DDI_FAILURE);
765 		}
766 	}
767 
768 	mutex_enter(&rsm_drv_data.drv_lock);
769 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
770 	rsm_drv_data.drv_state = RSM_DRV_OK;
771 	cv_broadcast(&rsm_drv_data.drv_cv);
772 	mutex_exit(&rsm_drv_data.drv_lock);
773 
774 	/*
775 	 * page_list_read_lock();
776 	 * xx_setup();
777 	 * page_list_read_unlock();
778 	 */
779 
780 	rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
781 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
782 	    "segment-hashtable-size", RSM_HASHSZ);
783 	if (rsm_hash_size == 0) {
784 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
785 		    "rsm: segment-hashtable-size in rsm.conf "
786 		    "must be greater than 0, defaulting to 128\n"));
787 		rsm_hash_size = RSM_HASHSZ;
788 	}
789 
790 	DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
791 	    rsm_hash_size));
792 
793 	rsm_pgcnt = 0;
794 
795 	percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
796 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
797 	    "max-exported-memory", 0);
798 	if (percent < 0) {
799 		DBG_PRINTF((category, RSM_ERR,
800 		    "rsm:rsm_attach not enough memory available to "
801 		    "export, or max-exported-memory set incorrectly.\n"));
802 		return (DDI_FAILURE);
803 	}
804 	/* 0 indicates no fixed upper limit. maxmem is the max	*/
805 	/* available pageable physical mem			*/
806 	rsm_pgcnt_max = (percent*maxmem)/100;
807 
808 	if (rsm_pgcnt_max > 0) {
809 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
810 		    "rsm: Available physical memory = %lu pages, "
811 		    "Max exportable memory = %lu pages",
812 		    maxmem, rsm_pgcnt_max));
813 	}
814 
815 	/*
816 	 * Create minor number
817 	 */
818 	if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
819 		DBG_PRINTF((category, RSM_ERR,
820 		    "rsm: rsm_attach - Unable to get "
821 		    "minor number\n"));
822 		return (DDI_FAILURE);
823 	}
824 
825 	ASSERT(rnum == RSM_DRIVER_MINOR);
826 
827 	if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
828 	    rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
829 		DBG_PRINTF((category, RSM_ERR,
830 		    "rsm: rsm_attach - unable to allocate "
831 		    "minor #\n"));
832 		return (DDI_FAILURE);
833 	}
834 
835 	rsm_dip = devi;
836 	/*
837 	 * Allocate the hashtables
838 	 */
839 	rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
840 	rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
841 
842 	importer_list.bucket = (importing_token_t **)
843 	    kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
844 
845 	/*
846 	 * Allocate a resource struct
847 	 */
848 	{
849 		rsmresource_t *p;
850 
851 		p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
852 
853 		mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
854 
855 		rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
856 	}
857 
858 	/*
859 	 * Based on the rsm.conf property max-segments, determine the maximum
860 	 * number of segments that can be exported/imported. This is then used
861 	 * to determine the size for barrier failure pages.
862 	 */
863 
864 	/* First get the max number of segments from the rsm.conf file */
865 	max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
866 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
867 	    "max-segments", 0);
868 	if (max_segs == 0) {
869 		/* Use default number of segments */
870 		max_segs = RSM_MAX_NUM_SEG;
871 	}
872 
873 	/*
874 	 * Based on the max number of segments allowed, determine the barrier
875 	 * page size. add 1 to max_segs since the barrier page itself uses
876 	 * a slot
877 	 */
878 	barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
879 	    PAGESIZE);
880 
881 	/*
882 	 * allocation of the barrier failure page
883 	 */
884 	bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
885 	    DDI_UMEM_SLEEP, &bar_cookie);
886 
887 	/*
888 	 * Set the barrier_offset
889 	 */
890 	barrier_offset = 0;
891 
892 	/*
893 	 * Allocate a trash memory and get a cookie for it. This will be used
894 	 * when remapping segments during force disconnects. Allocate the
895 	 * trash memory with a large size which is page aligned.
896 	 */
897 	(void) ddi_umem_alloc((size_t)TRASHSIZE,
898 	    DDI_UMEM_TRASH, &remap_cookie);
899 
900 	/* initialize user segment id allocation variable */
901 	rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
902 
903 	/*
904 	 * initialize the null_rsmpi_ops vector and the loopback adapter
905 	 */
906 	rsmka_init_loopback();
907 
908 
909 	ddi_report_dev(devi);
910 
911 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
912 
913 	return (DDI_SUCCESS);
914 }
915 
916 /*
917  * The call to mod_remove in the _fine routine will cause the system
918  * to call rsm_detach
919  */
920 /*ARGSUSED*/
921 static int
922 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
923 {
924 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
925 
926 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
927 
928 	switch (cmd) {
929 	case DDI_DETACH:
930 		break;
931 	default:
932 		DBG_PRINTF((category, RSM_ERR,
933 		    "rsm:rsm_detach - cmd %x not supported\n",
934 		    cmd));
935 		return (DDI_FAILURE);
936 	}
937 
938 	mutex_enter(&rsm_drv_data.drv_lock);
939 	while (rsm_drv_data.drv_state != RSM_DRV_OK)
940 		cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
941 	rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
942 	mutex_exit(&rsm_drv_data.drv_lock);
943 
944 	/*
945 	 * Unregister the DR callback functions
946 	 */
947 	if (rsm_enable_dr) {
948 #ifdef	RSM_DRTEST
949 		rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
950 		    (void *)NULL);
951 #else
952 		kphysm_setup_func_unregister(&rsm_dr_callback_vec,
953 		    (void *)NULL);
954 #endif
955 	}
956 
957 	mutex_enter(&rsm_drv_data.drv_lock);
958 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
959 	rsm_drv_data.drv_state = RSM_DRV_NEW;
960 	mutex_exit(&rsm_drv_data.drv_lock);
961 
962 	ASSERT(rsm_suspend_list.list_head == NULL);
963 
964 	/*
965 	 * Release all resources, seglist, controller, ...
966 	 */
967 
968 	/* remove intersend queues */
969 	/* remove registered services */
970 
971 
972 	ddi_remove_minor_node(dip, DRIVER_NAME);
973 	rsm_dip = NULL;
974 
975 	/*
976 	 * Free minor zero resource
977 	 */
978 	{
979 		rsmresource_t *p;
980 
981 		p = rsmresource_free(RSM_DRIVER_MINOR);
982 		if (p) {
983 			mutex_destroy(&p->rsmrc_lock);
984 			kmem_free((void *)p, sizeof (*p));
985 		}
986 	}
987 
988 	/*
989 	 * Free resource table
990 	 */
991 
992 	rsmresource_destroy();
993 
994 	/*
995 	 * Free the hash tables
996 	 */
997 	rsmhash_free(&rsm_export_segs, rsm_hash_size);
998 	rsmhash_free(&rsm_import_segs, rsm_hash_size);
999 
1000 	kmem_free((void *)importer_list.bucket,
1001 	    rsm_hash_size * sizeof (importing_token_t *));
1002 	importer_list.bucket = NULL;
1003 
1004 
1005 	/* free barrier page */
1006 	if (bar_cookie != NULL) {
1007 		ddi_umem_free(bar_cookie);
1008 	}
1009 	bar_va = NULL;
1010 	bar_cookie = NULL;
1011 
1012 	/*
1013 	 * Free the memory allocated for the trash
1014 	 */
1015 	if (remap_cookie != NULL) {
1016 		ddi_umem_free(remap_cookie);
1017 	}
1018 	remap_cookie = NULL;
1019 
1020 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1021 
1022 	return (DDI_SUCCESS);
1023 }
1024 
1025 /*ARGSUSED*/
1026 static int
1027 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1028 {
1029 	register int error;
1030 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1031 
1032 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1033 
1034 	switch (infocmd) {
1035 	case DDI_INFO_DEVT2DEVINFO:
1036 		if (rsm_dip == NULL)
1037 			error = DDI_FAILURE;
1038 		else {
1039 			*result = (void *)rsm_dip;
1040 			error = DDI_SUCCESS;
1041 		}
1042 		break;
1043 	case DDI_INFO_DEVT2INSTANCE:
1044 		*result = (void *)0;
1045 		error = DDI_SUCCESS;
1046 		break;
1047 	default:
1048 		error = DDI_FAILURE;
1049 	}
1050 
1051 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1052 	return (error);
1053 }
1054 
1055 adapter_t *
1056 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1057 {
1058 	adapter_t *adapter;
1059 	char adapter_devname[MAXNAMELEN];
1060 	int instance;
1061 	DBG_DEFINE(category,
1062 	    RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1063 
1064 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1065 
1066 	instance = msg->cnum;
1067 
1068 	if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1069 		return (NULL);
1070 	}
1071 
1072 	if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1073 		return (NULL);
1074 
1075 	if (strcmp(adapter_devname, "loopback") == 0)
1076 		return (&loopback_adapter);
1077 
1078 	adapter = rsmka_lookup_adapter(adapter_devname, instance);
1079 
1080 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1081 
1082 	return (adapter);
1083 }
1084 
1085 
1086 /*
1087  * *********************** Resource Number Management ********************
1088  * All resources are stored in a simple hash table. The table is an array
1089  * of pointers to resource blks. Each blk contains:
1090  *	base	- base number of this blk
1091  *	used	- number of used slots in this blk.
1092  *	blks    - array of pointers to resource items.
1093  * An entry in a resource blk is empty if it's NULL.
1094  *
1095  * We start with no resource array. Each time we run out of slots, we
1096  * reallocate a new larger array and copy the pointer to the new array and
1097  * a new resource blk is allocated and added to the hash table.
1098  *
1099  * The resource control block contains:
1100  *      root    - array of pointer of resource blks
1101  *      sz      - current size of array.
1102  *      len     - last valid entry in array.
1103  *
1104  * A search operation based on a resource number is as follows:
1105  *      index = rnum / RESOURCE_BLKSZ;
1106  *      ASSERT(index < resource_block.len);
1107  *      ASSERT(index < resource_block.sz);
1108  *	offset = rnum % RESOURCE_BLKSZ;
1109  *      ASSERT(offset >= resource_block.root[index]->base);
1110  *	ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1111  *	return resource_block.root[index]->blks[offset];
1112  *
1113  * A resource blk is freed with its used count reachs zero.
1114  */
1115 static int
1116 rsmresource_alloc(minor_t *rnum)
1117 {
1118 
1119 	/* search for available resource slot */
1120 	int i, j, empty = -1;
1121 	rsmresource_blk_t *blk;
1122 
1123 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1124 	    "rsmresource_alloc enter\n"));
1125 
1126 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1127 
1128 	/* Try to find an empty slot */
1129 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1130 		blk = rsm_resource.rsmrc_root[i];
1131 		if (blk != NULL && blk->rsmrcblk_avail > 0) {
1132 			/* found an empty slot in this blk */
1133 			for (j = 0; j < RSMRC_BLKSZ; j++) {
1134 				if (blk->rsmrcblk_blks[j] == NULL) {
1135 					*rnum = (minor_t)
1136 					    (j + (i * RSMRC_BLKSZ));
1137 					/*
1138 					 * obey gen page limits
1139 					 */
1140 					if (*rnum >= max_segs + 1) {
1141 						if (empty < 0) {
1142 							rw_exit(&rsm_resource.
1143 							    rsmrc_lock);
1144 							DBG_PRINTF((
1145 							    RSM_KERNEL_ALL,
1146 							    RSM_ERR,
1147 							    "rsmresource"
1148 							    "_alloc failed:"
1149 							    "not enough res"
1150 							    "%d\n", *rnum));
1151 					return (RSMERR_INSUFFICIENT_RESOURCES);
1152 						} else {
1153 							/* use empty slot */
1154 							break;
1155 						}
1156 
1157 					}
1158 
1159 					blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1160 					blk->rsmrcblk_avail--;
1161 					rw_exit(&rsm_resource.rsmrc_lock);
1162 					DBG_PRINTF((RSM_KERNEL_ALL,
1163 					    RSM_DEBUG_VERBOSE,
1164 					    "rsmresource_alloc done\n"));
1165 					return (RSM_SUCCESS);
1166 				}
1167 			}
1168 		} else if (blk == NULL && empty < 0) {
1169 			/* remember first empty slot */
1170 			empty = i;
1171 		}
1172 	}
1173 
1174 	/* Couldn't find anything, allocate a new blk */
1175 	/*
1176 	 * Do we need to reallocate the root array
1177 	 */
1178 	if (empty < 0) {
1179 		if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1180 			/*
1181 			 * Allocate new array and copy current stuff into it
1182 			 */
1183 			rsmresource_blk_t	**p;
1184 			uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1185 			    RSMRC_BLKSZ;
1186 			/*
1187 			 * Don't allocate more that max valid rnum
1188 			 */
1189 			if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1190 			    max_segs + 1) {
1191 				rw_exit(&rsm_resource.rsmrc_lock);
1192 				return (RSMERR_INSUFFICIENT_RESOURCES);
1193 			}
1194 
1195 			p = (rsmresource_blk_t **)kmem_zalloc(
1196 			    newsz * sizeof (*p),
1197 			    KM_SLEEP);
1198 
1199 			if (rsm_resource.rsmrc_root) {
1200 				uint_t oldsz;
1201 
1202 				oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1203 				    (int)sizeof (*p));
1204 
1205 				/*
1206 				 * Copy old data into new space and
1207 				 * free old stuff
1208 				 */
1209 				bcopy(rsm_resource.rsmrc_root, p, oldsz);
1210 				kmem_free(rsm_resource.rsmrc_root, oldsz);
1211 			}
1212 
1213 			rsm_resource.rsmrc_root = p;
1214 			rsm_resource.rsmrc_sz = (int)newsz;
1215 		}
1216 
1217 		empty = rsm_resource.rsmrc_len;
1218 		rsm_resource.rsmrc_len++;
1219 	}
1220 
1221 	/*
1222 	 * Allocate a new blk
1223 	 */
1224 	blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1225 	ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1226 	rsm_resource.rsmrc_root[empty] = blk;
1227 	blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1228 
1229 	/*
1230 	 * Allocate slot
1231 	 */
1232 
1233 	*rnum = (minor_t)(empty * RSMRC_BLKSZ);
1234 
1235 	/*
1236 	 * watch out not to exceed bounds of barrier page
1237 	 */
1238 	if (*rnum >= max_segs + 1) {
1239 		rw_exit(&rsm_resource.rsmrc_lock);
1240 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1241 		    "rsmresource_alloc failed %d\n", *rnum));
1242 
1243 		return (RSMERR_INSUFFICIENT_RESOURCES);
1244 	}
1245 	blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1246 
1247 
1248 	rw_exit(&rsm_resource.rsmrc_lock);
1249 
1250 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1251 	    "rsmresource_alloc done\n"));
1252 
1253 	return (RSM_SUCCESS);
1254 }
1255 
1256 static rsmresource_t *
1257 rsmresource_free(minor_t rnum)
1258 {
1259 
1260 	/* search for available resource slot */
1261 	int i, j;
1262 	rsmresource_blk_t *blk;
1263 	rsmresource_t *p;
1264 
1265 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1266 	    "rsmresource_free enter\n"));
1267 
1268 	i = (int)(rnum / RSMRC_BLKSZ);
1269 	j = (int)(rnum % RSMRC_BLKSZ);
1270 
1271 	if (i >= rsm_resource.rsmrc_len) {
1272 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1273 		    "rsmresource_free done\n"));
1274 		return (NULL);
1275 	}
1276 
1277 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1278 
1279 	ASSERT(rsm_resource.rsmrc_root);
1280 	ASSERT(i < rsm_resource.rsmrc_len);
1281 	ASSERT(i < rsm_resource.rsmrc_sz);
1282 	blk = rsm_resource.rsmrc_root[i];
1283 	if (blk == NULL) {
1284 		rw_exit(&rsm_resource.rsmrc_lock);
1285 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1286 		    "rsmresource_free done\n"));
1287 		return (NULL);
1288 	}
1289 
1290 	ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1291 
1292 	p = blk->rsmrcblk_blks[j];
1293 	if (p == RSMRC_RESERVED) {
1294 		p = NULL;
1295 	}
1296 
1297 	blk->rsmrcblk_blks[j] = NULL;
1298 	blk->rsmrcblk_avail++;
1299 	if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1300 		/* free this blk */
1301 		kmem_free(blk, sizeof (*blk));
1302 		rsm_resource.rsmrc_root[i] = NULL;
1303 	}
1304 
1305 	rw_exit(&rsm_resource.rsmrc_lock);
1306 
1307 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1308 	    "rsmresource_free done\n"));
1309 
1310 	return (p);
1311 }
1312 
1313 static rsmresource_t *
1314 rsmresource_lookup(minor_t rnum, int lock)
1315 {
1316 	int i, j;
1317 	rsmresource_blk_t *blk;
1318 	rsmresource_t *p;
1319 
1320 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1321 	    "rsmresource_lookup enter\n"));
1322 
1323 	/* Find resource and lock it in READER mode */
1324 	/* search for available resource slot */
1325 
1326 	i = (int)(rnum / RSMRC_BLKSZ);
1327 	j = (int)(rnum % RSMRC_BLKSZ);
1328 
1329 	if (i >= rsm_resource.rsmrc_len) {
1330 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1331 		    "rsmresource_lookup done\n"));
1332 		return (NULL);
1333 	}
1334 
1335 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1336 
1337 	blk = rsm_resource.rsmrc_root[i];
1338 	if (blk != NULL) {
1339 		ASSERT(i < rsm_resource.rsmrc_len);
1340 		ASSERT(i < rsm_resource.rsmrc_sz);
1341 
1342 		p = blk->rsmrcblk_blks[j];
1343 		if (lock == RSM_LOCK) {
1344 			if (p != RSMRC_RESERVED) {
1345 				mutex_enter(&p->rsmrc_lock);
1346 			} else {
1347 				p = NULL;
1348 			}
1349 		}
1350 	} else {
1351 		p = NULL;
1352 	}
1353 	rw_exit(&rsm_resource.rsmrc_lock);
1354 
1355 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1356 	    "rsmresource_lookup done\n"));
1357 
1358 	return (p);
1359 }
1360 
1361 static void
1362 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1363 {
1364 	/* Find resource and lock it in READER mode */
1365 	/* Caller can upgrade if need be */
1366 	/* search for available resource slot */
1367 	int i, j;
1368 	rsmresource_blk_t *blk;
1369 
1370 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1371 	    "rsmresource_insert enter\n"));
1372 
1373 	i = (int)(rnum / RSMRC_BLKSZ);
1374 	j = (int)(rnum % RSMRC_BLKSZ);
1375 
1376 	p->rsmrc_type = type;
1377 	p->rsmrc_num = rnum;
1378 
1379 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1380 
1381 	ASSERT(rsm_resource.rsmrc_root);
1382 	ASSERT(i < rsm_resource.rsmrc_len);
1383 	ASSERT(i < rsm_resource.rsmrc_sz);
1384 
1385 	blk = rsm_resource.rsmrc_root[i];
1386 	ASSERT(blk);
1387 
1388 	ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1389 
1390 	blk->rsmrcblk_blks[j] = p;
1391 
1392 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1393 	    "rsmresource_insert done\n"));
1394 
1395 	rw_exit(&rsm_resource.rsmrc_lock);
1396 }
1397 
1398 static void
1399 rsmresource_destroy()
1400 {
1401 	int i, j;
1402 
1403 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1404 	    "rsmresource_destroy enter\n"));
1405 
1406 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1407 
1408 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1409 		rsmresource_blk_t	*blk;
1410 
1411 		blk = rsm_resource.rsmrc_root[i];
1412 		if (blk == NULL) {
1413 			continue;
1414 		}
1415 		for (j = 0; j < RSMRC_BLKSZ; j++) {
1416 			if (blk->rsmrcblk_blks[j] != NULL) {
1417 				DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1418 				    "Not null slot %d, %lx\n", j,
1419 				    (size_t)blk->rsmrcblk_blks[j]));
1420 			}
1421 		}
1422 		kmem_free(blk, sizeof (*blk));
1423 		rsm_resource.rsmrc_root[i] = NULL;
1424 	}
1425 	if (rsm_resource.rsmrc_root) {
1426 		i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1427 		kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1428 		rsm_resource.rsmrc_root = NULL;
1429 		rsm_resource.rsmrc_len = 0;
1430 		rsm_resource.rsmrc_sz = 0;
1431 	}
1432 
1433 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1434 	    "rsmresource_destroy done\n"));
1435 
1436 	rw_exit(&rsm_resource.rsmrc_lock);
1437 }
1438 
1439 
1440 /* ******************** Generic Key Hash Table Management ********* */
1441 static rsmresource_t *
1442 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1443     rsm_resource_state_t state)
1444 {
1445 	rsmresource_t	*p;
1446 	uint_t		hashval;
1447 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1448 
1449 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1450 
1451 	hashval = rsmhash(key);
1452 
1453 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1454 	    key, hashval));
1455 
1456 	rw_enter(&rhash->rsmhash_rw, RW_READER);
1457 
1458 	p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1459 
1460 	for (; p; p = p->rsmrc_next) {
1461 		if (p->rsmrc_key == key) {
1462 			/* acquire resource lock */
1463 			RSMRC_LOCK(p);
1464 			break;
1465 		}
1466 	}
1467 
1468 	rw_exit(&rhash->rsmhash_rw);
1469 
1470 	if (p != NULL && p->rsmrc_state != state) {
1471 		/* state changed, release lock and return null */
1472 		RSMRC_UNLOCK(p);
1473 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1474 		    "rsmhash_lookup done: state changed\n"));
1475 		return (NULL);
1476 	}
1477 
1478 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1479 
1480 	return (p);
1481 }
1482 
1483 static void
1484 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1485 {
1486 	rsmresource_t		*p, **back;
1487 	uint_t			hashval;
1488 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1489 
1490 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1491 
1492 	hashval = rsmhash(rcelm->rsmrc_key);
1493 
1494 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1495 	    rcelm->rsmrc_key, hashval));
1496 
1497 	/*
1498 	 * It's ok not to find the segment.
1499 	 */
1500 	rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1501 
1502 	back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1503 
1504 	for (; (p = *back) != NULL;  back = &p->rsmrc_next) {
1505 		if (p == rcelm) {
1506 			*back = rcelm->rsmrc_next;
1507 			break;
1508 		}
1509 	}
1510 
1511 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1512 
1513 	rw_exit(&rhash->rsmhash_rw);
1514 }
1515 
1516 static int
1517 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1518     int dup_check, rsm_resource_state_t state)
1519 {
1520 	rsmresource_t	*p = NULL, **bktp;
1521 	uint_t		hashval;
1522 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1523 
1524 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1525 
1526 	/* lock table */
1527 	rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1528 
1529 	/*
1530 	 * If the current resource state is other than the state passed in
1531 	 * then the resource is (probably) already on the list. eg. for an
1532 	 * import segment if the state is not RSM_STATE_NEW then it's on the
1533 	 * list already.
1534 	 */
1535 	RSMRC_LOCK(new);
1536 	if (new->rsmrc_state != state) {
1537 		RSMRC_UNLOCK(new);
1538 		rw_exit(&rhash->rsmhash_rw);
1539 		return (RSMERR_BAD_SEG_HNDL);
1540 	}
1541 
1542 	hashval = rsmhash(key);
1543 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1544 
1545 	if (dup_check) {
1546 		/*
1547 		 * Used for checking export segments; don't want to have
1548 		 * the same key used for multiple segments.
1549 		 */
1550 
1551 		p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1552 
1553 		for (; p; p = p->rsmrc_next) {
1554 			if (p->rsmrc_key == key) {
1555 				RSMRC_UNLOCK(new);
1556 				break;
1557 			}
1558 		}
1559 	}
1560 
1561 	if (p == NULL) {
1562 		/* Key doesn't exist, add it */
1563 
1564 		bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1565 
1566 		new->rsmrc_key = key;
1567 		new->rsmrc_next = *bktp;
1568 		*bktp = new;
1569 	}
1570 
1571 	rw_exit(&rhash->rsmhash_rw);
1572 
1573 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1574 
1575 	return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1576 }
1577 
1578 /*
1579  * XOR each byte of the key.
1580  */
1581 static uint_t
1582 rsmhash(rsm_memseg_id_t key)
1583 {
1584 	uint_t	hash = key;
1585 
1586 	hash ^=  (key >> 8);
1587 	hash ^=  (key >> 16);
1588 	hash ^=  (key >> 24);
1589 
1590 	return (hash % rsm_hash_size);
1591 
1592 }
1593 
1594 /*
1595  * generic function to get a specific bucket
1596  */
1597 static void *
1598 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1599 {
1600 
1601 	if (rhash->bucket == NULL)
1602 		return (NULL);
1603 	else
1604 		return ((void *)rhash->bucket[hashval]);
1605 }
1606 
1607 /*
1608  * generic function to get a specific bucket's address
1609  */
1610 static void **
1611 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1612 {
1613 	if (rhash->bucket == NULL)
1614 		return (NULL);
1615 	else
1616 		return ((void **)&(rhash->bucket[hashval]));
1617 }
1618 
1619 /*
1620  * generic function to alloc a hash table
1621  */
1622 static void
1623 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1624 {
1625 	rhash->bucket = (rsmresource_t **)
1626 	    kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1627 }
1628 
1629 /*
1630  * generic function to free a hash table
1631  */
1632 static void
1633 rsmhash_free(rsmhash_table_t *rhash, int size)
1634 {
1635 
1636 	kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1637 	rhash->bucket = NULL;
1638 
1639 }
1640 /* *********************** Exported Segment Key Management ************ */
1641 
1642 #define	rsmexport_add(new, key)		\
1643 	rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1644 	    RSM_STATE_BIND)
1645 
1646 #define	rsmexport_rm(arg)	\
1647 	rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1648 
1649 #define	rsmexport_lookup(key)	\
1650 	(rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1651 
1652 /* ************************** Import Segment List Management ********** */
1653 
1654 /*
1655  *  Add segment to import list. This will be useful for paging and loopback
1656  * segment unloading.
1657  */
1658 #define	rsmimport_add(arg, key)	\
1659 	rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1660 	    RSM_STATE_NEW)
1661 
1662 #define	rsmimport_rm(arg)	\
1663 	rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1664 
1665 /*
1666  *	#define	rsmimport_lookup(key)	\
1667  *	(rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1668  */
1669 
1670 /*
1671  * increase the ref count and make the import segment point to the
1672  * shared data structure. Return a pointer to the share data struct
1673  * and the shared data struct is locked upon return
1674  */
1675 static rsm_import_share_t *
1676 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1677     rsmseg_t *segp)
1678 {
1679 	uint_t		hash;
1680 	rsmresource_t		*p;
1681 	rsm_import_share_t	*shdatap;
1682 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1683 
1684 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1685 
1686 	hash = rsmhash(key);
1687 	/* lock table */
1688 	rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1689 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1690 	    key, hash));
1691 
1692 	p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1693 
1694 	for (; p; p = p->rsmrc_next) {
1695 		/*
1696 		 * Look for an entry that is importing the same exporter
1697 		 * with the share data structure allocated.
1698 		 */
1699 		if ((p->rsmrc_key == key) &&
1700 		    (p->rsmrc_node == node) &&
1701 		    (p->rsmrc_adapter == adapter) &&
1702 		    (((rsmseg_t *)p)->s_share != NULL)) {
1703 			shdatap = ((rsmseg_t *)p)->s_share;
1704 			break;
1705 		}
1706 	}
1707 
1708 	if (p == NULL) {
1709 		/* we are the first importer, create the shared data struct */
1710 		shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1711 		shdatap->rsmsi_state = RSMSI_STATE_NEW;
1712 		shdatap->rsmsi_segid = key;
1713 		shdatap->rsmsi_node = node;
1714 		mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1715 		cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1716 	}
1717 
1718 	rsmseglock_acquire(segp);
1719 
1720 	/* we grab the shared lock before returning from this function */
1721 	mutex_enter(&shdatap->rsmsi_lock);
1722 
1723 	shdatap->rsmsi_refcnt++;
1724 	segp->s_share = shdatap;
1725 
1726 	rsmseglock_release(segp);
1727 
1728 	rw_exit(&rsm_import_segs.rsmhash_rw);
1729 
1730 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1731 
1732 	return (shdatap);
1733 }
1734 
1735 /*
1736  * the shared data structure should be locked before calling
1737  * rsmsharecv_signal().
1738  * Change the state and signal any waiting segments.
1739  */
1740 void
1741 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1742 {
1743 	ASSERT(rsmsharelock_held(seg));
1744 
1745 	if (seg->s_share->rsmsi_state == oldstate) {
1746 		seg->s_share->rsmsi_state = newstate;
1747 		cv_broadcast(&seg->s_share->rsmsi_cv);
1748 	}
1749 }
1750 
1751 /*
1752  * Add to the hash table
1753  */
1754 static void
1755 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1756     void *cookie)
1757 {
1758 
1759 	importing_token_t	*head;
1760 	importing_token_t	*new_token;
1761 	int			index;
1762 
1763 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1764 
1765 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1766 
1767 	new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1768 	new_token->importing_node = node;
1769 	new_token->key = key;
1770 	new_token->import_segment_cookie = cookie;
1771 	new_token->importing_adapter_hwaddr = hwaddr;
1772 
1773 	index = rsmhash(key);
1774 
1775 	mutex_enter(&importer_list.lock);
1776 
1777 	head = importer_list.bucket[index];
1778 	importer_list.bucket[index] = new_token;
1779 	new_token->next = head;
1780 	mutex_exit(&importer_list.lock);
1781 
1782 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1783 }
1784 
1785 static void
1786 importer_list_rm(rsm_node_id_t node,  rsm_memseg_id_t key, void *cookie)
1787 {
1788 
1789 	importing_token_t	*prev, *token = NULL;
1790 	int			index;
1791 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1792 
1793 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1794 
1795 	index = rsmhash(key);
1796 
1797 	mutex_enter(&importer_list.lock);
1798 
1799 	token = importer_list.bucket[index];
1800 
1801 	prev = token;
1802 	while (token != NULL) {
1803 		if (token->importing_node == node &&
1804 		    token->import_segment_cookie == cookie) {
1805 			if (prev == token)
1806 				importer_list.bucket[index] = token->next;
1807 			else
1808 				prev->next = token->next;
1809 			kmem_free((void *)token, sizeof (*token));
1810 			break;
1811 		} else {
1812 			prev = token;
1813 			token = token->next;
1814 		}
1815 	}
1816 
1817 	mutex_exit(&importer_list.lock);
1818 
1819 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1820 
1821 
1822 }
1823 
1824 /* **************************Segment Structure Management ************* */
1825 
1826 /*
1827  * Free segment structure
1828  */
1829 static void
1830 rsmseg_free(rsmseg_t *seg)
1831 {
1832 
1833 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1834 
1835 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1836 
1837 	/* need to take seglock here to avoid race with rsmmap_unmap() */
1838 	rsmseglock_acquire(seg);
1839 	if (seg->s_ckl != NULL) {
1840 		/* Segment is still busy */
1841 		seg->s_state = RSM_STATE_END;
1842 		rsmseglock_release(seg);
1843 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1844 		    "rsmseg_free done\n"));
1845 		return;
1846 	}
1847 
1848 	rsmseglock_release(seg);
1849 
1850 	ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1851 
1852 	/*
1853 	 * If it's an importer decrement the refcount
1854 	 * and if its down to zero free the shared data structure.
1855 	 * This is where failures during rsm_connect() are unrefcounted
1856 	 */
1857 	if (seg->s_share != NULL) {
1858 
1859 		ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1860 
1861 		rsmsharelock_acquire(seg);
1862 
1863 		ASSERT(seg->s_share->rsmsi_refcnt > 0);
1864 
1865 		seg->s_share->rsmsi_refcnt--;
1866 
1867 		if (seg->s_share->rsmsi_refcnt == 0) {
1868 			rsmsharelock_release(seg);
1869 			mutex_destroy(&seg->s_share->rsmsi_lock);
1870 			cv_destroy(&seg->s_share->rsmsi_cv);
1871 			kmem_free((void *)(seg->s_share),
1872 			    sizeof (rsm_import_share_t));
1873 		} else {
1874 			rsmsharelock_release(seg);
1875 		}
1876 		/*
1877 		 * The following needs to be done after any
1878 		 * rsmsharelock calls which use seg->s_share.
1879 		 */
1880 		seg->s_share = NULL;
1881 	}
1882 
1883 	cv_destroy(&seg->s_cv);
1884 	mutex_destroy(&seg->s_lock);
1885 	rsmacl_free(seg->s_acl, seg->s_acl_len);
1886 	rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1887 	if (seg->s_adapter)
1888 		rsmka_release_adapter(seg->s_adapter);
1889 
1890 	kmem_free((void *)seg, sizeof (*seg));
1891 
1892 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1893 
1894 }
1895 
1896 
1897 static rsmseg_t *
1898 rsmseg_alloc(minor_t num, struct cred *cred)
1899 {
1900 	rsmseg_t	*new;
1901 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1902 
1903 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1904 	/*
1905 	 * allocate memory for new segment. This should be a segkmem cache.
1906 	 */
1907 	new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1908 
1909 	new->s_state = RSM_STATE_NEW;
1910 	new->s_minor	= num;
1911 	new->s_acl_len	= 0;
1912 	new->s_cookie = NULL;
1913 	new->s_adapter = NULL;
1914 
1915 	new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1916 	/* we don't have a key yet, will set at export/connect */
1917 	new->s_uid  = crgetuid(cred);
1918 	new->s_gid  = crgetgid(cred);
1919 
1920 	mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1921 	cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1922 
1923 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1924 
1925 	return (new);
1926 }
1927 
1928 /* ******************************** Driver Open/Close/Poll *************** */
1929 
1930 /*ARGSUSED1*/
1931 static int
1932 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1933 {
1934 	minor_t rnum;
1935 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1936 
1937 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1938 	/*
1939 	 * Char only
1940 	 */
1941 	if (otyp != OTYP_CHR) {
1942 		DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1943 		return (EINVAL);
1944 	}
1945 
1946 	/*
1947 	 * Only zero can be opened, clones are used for resources.
1948 	 */
1949 	if (getminor(*devp) != RSM_DRIVER_MINOR) {
1950 		DBG_PRINTF((category, RSM_ERR,
1951 		    "rsm_open: bad minor %d\n", getminor(*devp)));
1952 		return (ENODEV);
1953 	}
1954 
1955 	if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1956 		DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1957 		return (EPERM);
1958 	}
1959 
1960 	if (!(flag & FWRITE)) {
1961 		/*
1962 		 * The library function _rsm_librsm_init calls open for
1963 		 * /dev/rsm with flag set to O_RDONLY.  We want a valid
1964 		 * file descriptor to be returned for minor device zero.
1965 		 */
1966 
1967 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1968 		    "rsm_open RDONLY done\n"));
1969 		return (DDI_SUCCESS);
1970 	}
1971 
1972 	/*
1973 	 * - allocate new minor number and segment.
1974 	 * - add segment to list of all segments.
1975 	 * - set minordev data to segment
1976 	 * - update devp argument to new device
1977 	 * - update s_cred to cred; make sure you do crhold(cred);
1978 	 */
1979 
1980 	/* allocate a new resource number */
1981 	if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1982 		/*
1983 		 * We will bind this minor to a specific resource in first
1984 		 * ioctl
1985 		 */
1986 		*devp = makedevice(getmajor(*devp), rnum);
1987 	} else {
1988 		return (EAGAIN);
1989 	}
1990 
1991 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1992 	return (DDI_SUCCESS);
1993 }
1994 
1995 static void
1996 rsmseg_close(rsmseg_t *seg, int force_flag)
1997 {
1998 	int e = RSM_SUCCESS;
1999 
2000 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2001 
2002 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2003 
2004 	rsmseglock_acquire(seg);
2005 	if (!force_flag && (seg->s_hdr.rsmrc_type ==
2006 	    RSM_RESOURCE_EXPORT_SEGMENT)) {
2007 		/*
2008 		 * If we are processing rsm_close wait for force_destroy
2009 		 * processing to complete since force_destroy processing
2010 		 * needs to finish first before we can free the segment.
2011 		 * force_destroy is only for export segments
2012 		 */
2013 		while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2014 			cv_wait(&seg->s_cv, &seg->s_lock);
2015 		}
2016 	}
2017 	rsmseglock_release(seg);
2018 
2019 	/* It's ok to read the state without a lock */
2020 	switch (seg->s_state) {
2021 	case RSM_STATE_EXPORT:
2022 	case RSM_STATE_EXPORT_QUIESCING:
2023 	case RSM_STATE_EXPORT_QUIESCED:
2024 		e = rsm_unpublish(seg, 1);
2025 		/* FALLTHRU */
2026 	case RSM_STATE_BIND_QUIESCED:
2027 		/* FALLTHRU */
2028 	case RSM_STATE_BIND:
2029 		e = rsm_unbind(seg);
2030 		if (e != RSM_SUCCESS && force_flag == 1)
2031 			return;
2032 		ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2033 		/* FALLTHRU */
2034 	case RSM_STATE_NEW_QUIESCED:
2035 		rsmseglock_acquire(seg);
2036 		seg->s_state = RSM_STATE_NEW;
2037 		cv_broadcast(&seg->s_cv);
2038 		rsmseglock_release(seg);
2039 		break;
2040 	case RSM_STATE_NEW:
2041 		break;
2042 	case RSM_STATE_ZOMBIE:
2043 		/*
2044 		 * Segments in this state have been removed off the
2045 		 * exported segments list and have been unpublished
2046 		 * and unbind. These segments have been removed during
2047 		 * a callback to the rsm_export_force_destroy, which
2048 		 * is called for the purpose of unlocking these
2049 		 * exported memory segments when a process exits but
2050 		 * leaves the segments locked down since rsm_close is
2051 		 * is not called for the segments. This can happen
2052 		 * when a process calls fork or exec and then exits.
2053 		 * Once the segments are in the ZOMBIE state, all that
2054 		 * remains is to destroy them when rsm_close is called.
2055 		 * This is done here. Thus, for such segments the
2056 		 * the state is changed to new so that later in this
2057 		 * function rsmseg_free is called.
2058 		 */
2059 		rsmseglock_acquire(seg);
2060 		seg->s_state = RSM_STATE_NEW;
2061 		rsmseglock_release(seg);
2062 		break;
2063 	case RSM_STATE_MAP_QUIESCE:
2064 	case RSM_STATE_ACTIVE:
2065 		/* Disconnect will handle the unmap */
2066 	case RSM_STATE_CONN_QUIESCE:
2067 	case RSM_STATE_CONNECT:
2068 	case RSM_STATE_DISCONNECT:
2069 		ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2070 		(void) rsm_disconnect(seg);
2071 		break;
2072 	case RSM_STATE_MAPPING:
2073 		/*FALLTHRU*/
2074 	case RSM_STATE_END:
2075 		DBG_PRINTF((category, RSM_ERR,
2076 		    "Invalid segment state %d in rsm_close\n", seg->s_state));
2077 		break;
2078 	default:
2079 		DBG_PRINTF((category, RSM_ERR,
2080 		    "Invalid segment state %d in rsm_close\n", seg->s_state));
2081 		break;
2082 	}
2083 
2084 	/*
2085 	 * check state.
2086 	 * - make sure you do crfree(s_cred);
2087 	 * release segment and minor number
2088 	 */
2089 	ASSERT(seg->s_state == RSM_STATE_NEW);
2090 
2091 	/*
2092 	 * The export_force_destroy callback is created to unlock
2093 	 * the exported segments of a process
2094 	 * when the process does a fork or exec and then exits calls this
2095 	 * function with the force flag set to 1 which indicates that the
2096 	 * segment state must be converted to ZOMBIE. This state means that the
2097 	 * segments still exist and have been unlocked and most importantly the
2098 	 * only operation allowed is to destroy them on an rsm_close.
2099 	 */
2100 	if (force_flag) {
2101 		rsmseglock_acquire(seg);
2102 		seg->s_state = RSM_STATE_ZOMBIE;
2103 		rsmseglock_release(seg);
2104 	} else {
2105 		rsmseg_free(seg);
2106 	}
2107 
2108 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2109 }
2110 
2111 static int
2112 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2113 {
2114 	minor_t	rnum = getminor(dev);
2115 	rsmresource_t *res;
2116 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2117 
2118 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2119 
2120 	flag = flag; cred = cred;
2121 
2122 	if (otyp != OTYP_CHR)
2123 		return (EINVAL);
2124 
2125 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2126 
2127 	/*
2128 	 * At this point we are the last reference to the resource.
2129 	 * Free resource number from resource table.
2130 	 * It's ok to remove number before we free the segment.
2131 	 * We need to lock the resource to protect against remote calls.
2132 	 */
2133 	if (rnum == RSM_DRIVER_MINOR ||
2134 	    (res = rsmresource_free(rnum)) == NULL) {
2135 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2136 		return (DDI_SUCCESS);
2137 	}
2138 
2139 	switch (res->rsmrc_type) {
2140 	case RSM_RESOURCE_EXPORT_SEGMENT:
2141 	case RSM_RESOURCE_IMPORT_SEGMENT:
2142 		rsmseg_close((rsmseg_t *)res, 0);
2143 		break;
2144 	case RSM_RESOURCE_BAR:
2145 		DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2146 		break;
2147 	default:
2148 		break;
2149 	}
2150 
2151 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2152 
2153 	return (DDI_SUCCESS);
2154 }
2155 
2156 /*
2157  * rsm_inc_pgcnt
2158  *
2159  * Description: increment rsm page counter.
2160  *
2161  * Parameters:	pgcnt_t	pnum;	number of pages to be used
2162  *
2163  * Returns:	RSM_SUCCESS	if memory limit not exceeded
2164  *		ENOSPC		if memory limit exceeded. In this case, the
2165  *				page counter remains unchanged.
2166  *
2167  */
2168 static int
2169 rsm_inc_pgcnt(pgcnt_t pnum)
2170 {
2171 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2172 	if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2173 		return (RSM_SUCCESS);
2174 	}
2175 
2176 	mutex_enter(&rsm_pgcnt_lock);
2177 
2178 	if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2179 		/* ensure that limits have not been exceeded */
2180 		mutex_exit(&rsm_pgcnt_lock);
2181 		return (RSMERR_INSUFFICIENT_MEM);
2182 	}
2183 
2184 	rsm_pgcnt += pnum;
2185 	DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2186 	    rsm_pgcnt));
2187 	mutex_exit(&rsm_pgcnt_lock);
2188 
2189 	return (RSM_SUCCESS);
2190 }
2191 
2192 /*
2193  * rsm_dec_pgcnt
2194  *
2195  * Description:	decrement rsm page counter.
2196  *
2197  * Parameters:	pgcnt_t	pnum;	number of pages freed
2198  *
2199  */
2200 static void
2201 rsm_dec_pgcnt(pgcnt_t pnum)
2202 {
2203 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2204 
2205 	if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2206 		return;
2207 	}
2208 
2209 	mutex_enter(&rsm_pgcnt_lock);
2210 	ASSERT(rsm_pgcnt >= pnum);
2211 	rsm_pgcnt -= pnum;
2212 	DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2213 	    rsm_pgcnt));
2214 	mutex_exit(&rsm_pgcnt_lock);
2215 }
2216 
2217 static struct umem_callback_ops rsm_as_ops = {
2218 	UMEM_CALLBACK_VERSION, /* version number */
2219 	rsm_export_force_destroy,
2220 };
2221 
2222 static int
2223 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2224     proc_t *procp)
2225 {
2226 	int error = RSM_SUCCESS;
2227 	ulong_t pnum;
2228 	struct umem_callback_ops *callbackops = &rsm_as_ops;
2229 
2230 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2231 
2232 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2233 
2234 	/*
2235 	 * Make sure vaddr and len are aligned on a page boundary
2236 	 */
2237 	if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2238 		return (RSMERR_BAD_ADDR);
2239 	}
2240 
2241 	if (len & (PAGESIZE - 1)) {
2242 		return (RSMERR_BAD_LENGTH);
2243 	}
2244 
2245 	/*
2246 	 * Find number of pages
2247 	 */
2248 	pnum = btopr(len);
2249 	error = rsm_inc_pgcnt(pnum);
2250 	if (error != RSM_SUCCESS) {
2251 		DBG_PRINTF((category, RSM_ERR,
2252 		    "rsm_bind_pages:mem limit exceeded\n"));
2253 		return (RSMERR_INSUFFICIENT_MEM);
2254 	}
2255 
2256 	error = umem_lockmemory(vaddr, len,
2257 	    DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2258 	    cookie,
2259 	    callbackops, procp);
2260 
2261 	if (error) {
2262 		rsm_dec_pgcnt(pnum);
2263 		DBG_PRINTF((category, RSM_ERR,
2264 		    "rsm_bind_pages:ddi_umem_lock failed\n"));
2265 		/*
2266 		 * ddi_umem_lock, in the case of failure, returns one of
2267 		 * the following three errors. These are translated into
2268 		 * the RSMERR namespace and returned.
2269 		 */
2270 		if (error == EFAULT)
2271 			return (RSMERR_BAD_ADDR);
2272 		else if (error == EACCES)
2273 			return (RSMERR_PERM_DENIED);
2274 		else
2275 			return (RSMERR_INSUFFICIENT_MEM);
2276 	}
2277 
2278 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2279 
2280 	return (error);
2281 
2282 }
2283 
2284 static int
2285 rsm_unbind_pages(rsmseg_t *seg)
2286 {
2287 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2288 
2289 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2290 
2291 	ASSERT(rsmseglock_held(seg));
2292 
2293 	if (seg->s_cookie != NULL) {
2294 		/* unlock address range */
2295 		ddi_umem_unlock(seg->s_cookie);
2296 		rsm_dec_pgcnt(btopr(seg->s_len));
2297 		seg->s_cookie = NULL;
2298 	}
2299 
2300 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2301 
2302 	return (RSM_SUCCESS);
2303 }
2304 
2305 
2306 static int
2307 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2308 {
2309 	int e;
2310 	adapter_t *adapter;
2311 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2312 
2313 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2314 
2315 	adapter = rsm_getadapter(msg, mode);
2316 	if (adapter == NULL) {
2317 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2318 		    "rsm_bind done:no adapter\n"));
2319 		return (RSMERR_CTLR_NOT_PRESENT);
2320 	}
2321 
2322 	/* lock address range */
2323 	if (msg->vaddr == NULL) {
2324 		rsmka_release_adapter(adapter);
2325 		DBG_PRINTF((category, RSM_ERR,
2326 		    "rsm: rsm_bind done: invalid vaddr\n"));
2327 		return (RSMERR_BAD_ADDR);
2328 	}
2329 	if (msg->len <= 0) {
2330 		rsmka_release_adapter(adapter);
2331 		DBG_PRINTF((category, RSM_ERR,
2332 		    "rsm_bind: invalid length\n"));
2333 		return (RSMERR_BAD_LENGTH);
2334 	}
2335 
2336 	/* Lock segment */
2337 	rsmseglock_acquire(seg);
2338 
2339 	while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2340 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2341 			DBG_PRINTF((category, RSM_DEBUG,
2342 			    "rsm_bind done: cv_wait INTERRUPTED"));
2343 			rsmka_release_adapter(adapter);
2344 			rsmseglock_release(seg);
2345 			return (RSMERR_INTERRUPTED);
2346 		}
2347 	}
2348 
2349 	ASSERT(seg->s_state == RSM_STATE_NEW);
2350 
2351 	ASSERT(seg->s_cookie == NULL);
2352 
2353 	e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2354 	if (e == RSM_SUCCESS) {
2355 		seg->s_flags |= RSM_USER_MEMORY;
2356 		if (msg->perm & RSM_ALLOW_REBIND) {
2357 			seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2358 		}
2359 		if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2360 			seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2361 		}
2362 		seg->s_region.r_vaddr = msg->vaddr;
2363 		/*
2364 		 * Set the s_pid value in the segment structure. This is used
2365 		 * to identify exported segments belonging to a particular
2366 		 * process so that when the process exits, these segments can
2367 		 * be unlocked forcefully even if rsm_close is not called on
2368 		 * process exit since there maybe other processes referencing
2369 		 * them (for example on a fork or exec).
2370 		 * The s_pid value is also used to authenticate the process
2371 		 * doing a publish or unpublish on the export segment. Only
2372 		 * the creator of the export segment has a right to do a
2373 		 * publish or unpublish and unbind on the segment.
2374 		 */
2375 		seg->s_pid = ddi_get_pid();
2376 		seg->s_len = msg->len;
2377 		seg->s_state = RSM_STATE_BIND;
2378 		seg->s_adapter = adapter;
2379 		seg->s_proc = curproc;
2380 	} else {
2381 		rsmka_release_adapter(adapter);
2382 		DBG_PRINTF((category, RSM_WARNING,
2383 		    "unable to lock down pages\n"));
2384 	}
2385 
2386 	msg->rnum = seg->s_minor;
2387 	/* Unlock segment */
2388 	rsmseglock_release(seg);
2389 
2390 	if (e == RSM_SUCCESS) {
2391 		/* copyout the resource number */
2392 #ifdef _MULTI_DATAMODEL
2393 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2394 			rsm_ioctlmsg32_t msg32;
2395 
2396 			msg32.rnum = msg->rnum;
2397 			if (ddi_copyout((caddr_t)&msg32.rnum,
2398 			    (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2399 			    sizeof (minor_t), mode)) {
2400 				rsmka_release_adapter(adapter);
2401 				e = RSMERR_BAD_ADDR;
2402 			}
2403 		}
2404 #endif
2405 		if (ddi_copyout((caddr_t)&msg->rnum,
2406 		    (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2407 		    sizeof (minor_t), mode)) {
2408 			rsmka_release_adapter(adapter);
2409 			e = RSMERR_BAD_ADDR;
2410 		}
2411 	}
2412 
2413 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2414 
2415 	return (e);
2416 }
2417 
2418 static void
2419 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2420     rsm_memseg_id_t ex_segid,
2421     ddi_umem_cookie_t cookie)
2422 
2423 {
2424 	rsmresource_t	*p = NULL;
2425 	rsmhash_table_t *rhash = &rsm_import_segs;
2426 	uint_t		index;
2427 
2428 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2429 	    "rsm_remap_local_importers enter\n"));
2430 
2431 	index = rsmhash(ex_segid);
2432 
2433 	rw_enter(&rhash->rsmhash_rw, RW_READER);
2434 
2435 	p = rsmhash_getbkt(rhash, index);
2436 
2437 	for (; p; p = p->rsmrc_next) {
2438 		rsmseg_t *seg = (rsmseg_t *)p;
2439 		rsmseglock_acquire(seg);
2440 		/*
2441 		 * Change the s_cookie value of only the local importers
2442 		 * which have been mapped (in state RSM_STATE_ACTIVE).
2443 		 * Note that there is no need to change the s_cookie value
2444 		 * if the imported segment is in RSM_STATE_MAPPING since
2445 		 * eventually the s_cookie will be updated via the mapping
2446 		 * functionality.
2447 		 */
2448 		if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2449 		    (seg->s_state == RSM_STATE_ACTIVE)) {
2450 			seg->s_cookie = cookie;
2451 		}
2452 		rsmseglock_release(seg);
2453 	}
2454 	rw_exit(&rhash->rsmhash_rw);
2455 
2456 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2457 	    "rsm_remap_local_importers done\n"));
2458 }
2459 
2460 static int
2461 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2462 {
2463 	int e;
2464 	adapter_t *adapter;
2465 	ddi_umem_cookie_t cookie;
2466 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2467 
2468 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2469 
2470 	/* Check for permissions to rebind */
2471 	if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2472 		return (RSMERR_REBIND_NOT_ALLOWED);
2473 	}
2474 
2475 	if (seg->s_pid != ddi_get_pid() &&
2476 	    ddi_get_pid() != 0) {
2477 		DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2478 		return (RSMERR_NOT_CREATOR);
2479 	}
2480 
2481 	/*
2482 	 * We will not be allowing partial rebind and hence length passed
2483 	 * in must be same as segment length
2484 	 */
2485 	if (msg->vaddr == NULL) {
2486 		DBG_PRINTF((category, RSM_ERR,
2487 		    "rsm_rebind done: null msg->vaddr\n"));
2488 		return (RSMERR_BAD_ADDR);
2489 	}
2490 	if (msg->len != seg->s_len) {
2491 		DBG_PRINTF((category, RSM_ERR,
2492 		    "rsm_rebind: invalid length\n"));
2493 		return (RSMERR_BAD_LENGTH);
2494 	}
2495 
2496 	/* Lock segment */
2497 	rsmseglock_acquire(seg);
2498 
2499 	while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2500 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2501 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2502 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2503 			rsmseglock_release(seg);
2504 			DBG_PRINTF((category, RSM_DEBUG,
2505 			    "rsm_rebind done: cv_wait INTERRUPTED"));
2506 			return (RSMERR_INTERRUPTED);
2507 		}
2508 	}
2509 
2510 	/* verify segment state */
2511 	if ((seg->s_state != RSM_STATE_BIND) &&
2512 	    (seg->s_state != RSM_STATE_EXPORT)) {
2513 		/* Unlock segment */
2514 		rsmseglock_release(seg);
2515 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2516 		    "rsm_rebind done: invalid state\n"));
2517 		return (RSMERR_BAD_SEG_HNDL);
2518 	}
2519 
2520 	ASSERT(seg->s_cookie != NULL);
2521 
2522 	if (msg->vaddr == seg->s_region.r_vaddr) {
2523 		rsmseglock_release(seg);
2524 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2525 		return (RSM_SUCCESS);
2526 	}
2527 
2528 	e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2529 	if (e == RSM_SUCCESS) {
2530 		struct buf *xbuf;
2531 		dev_t sdev = 0;
2532 		rsm_memory_local_t mem;
2533 
2534 		xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2535 		    sdev, 0, NULL, DDI_UMEM_SLEEP);
2536 		ASSERT(xbuf != NULL);
2537 
2538 		mem.ms_type = RSM_MEM_BUF;
2539 		mem.ms_bp = xbuf;
2540 
2541 		adapter = seg->s_adapter;
2542 		e = adapter->rsmpi_ops->rsm_rebind(
2543 		    seg->s_handle.out, 0, &mem,
2544 		    RSM_RESOURCE_DONTWAIT, NULL);
2545 
2546 		if (e == RSM_SUCCESS) {
2547 			/*
2548 			 * unbind the older pages, and unload local importers;
2549 			 * but don't disconnect importers
2550 			 */
2551 			(void) rsm_unbind_pages(seg);
2552 			seg->s_cookie = cookie;
2553 			seg->s_region.r_vaddr = msg->vaddr;
2554 			rsm_remap_local_importers(my_nodeid, seg->s_segid,
2555 			    cookie);
2556 		} else {
2557 			/*
2558 			 * Unbind the pages associated with "cookie" by the
2559 			 * rsm_bind_pages calls prior to this. This is
2560 			 * similar to what is done in the rsm_unbind_pages
2561 			 * routine for the seg->s_cookie.
2562 			 */
2563 			ddi_umem_unlock(cookie);
2564 			rsm_dec_pgcnt(btopr(msg->len));
2565 			DBG_PRINTF((category, RSM_ERR,
2566 			    "rsm_rebind failed with %d\n", e));
2567 		}
2568 		/*
2569 		 * At present there is no dependency on the existence of xbuf.
2570 		 * So we can free it here. If in the future this changes, it can
2571 		 * be freed sometime during the segment destroy.
2572 		 */
2573 		freerbuf(xbuf);
2574 	}
2575 
2576 	/* Unlock segment */
2577 	rsmseglock_release(seg);
2578 
2579 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2580 
2581 	return (e);
2582 }
2583 
2584 static int
2585 rsm_unbind(rsmseg_t *seg)
2586 {
2587 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2588 
2589 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2590 
2591 	rsmseglock_acquire(seg);
2592 
2593 	/* verify segment state */
2594 	if ((seg->s_state != RSM_STATE_BIND) &&
2595 	    (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2596 		rsmseglock_release(seg);
2597 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2598 		    "rsm_unbind: invalid state\n"));
2599 		return (RSMERR_BAD_SEG_HNDL);
2600 	}
2601 
2602 	/* unlock current range */
2603 	(void) rsm_unbind_pages(seg);
2604 
2605 	if (seg->s_state == RSM_STATE_BIND) {
2606 		seg->s_state = RSM_STATE_NEW;
2607 	} else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2608 		seg->s_state = RSM_STATE_NEW_QUIESCED;
2609 	}
2610 
2611 	rsmseglock_release(seg);
2612 
2613 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2614 
2615 	return (RSM_SUCCESS);
2616 }
2617 
2618 /* **************************** Exporter Access List Management ******* */
2619 static void
2620 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2621 {
2622 	int	acl_sz;
2623 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2624 
2625 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2626 
2627 	/* acl could be NULL */
2628 
2629 	if (acl != NULL && acl_len > 0) {
2630 		acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2631 		kmem_free((void *)acl, acl_sz);
2632 	}
2633 
2634 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2635 }
2636 
2637 static void
2638 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2639 {
2640 	int	acl_sz;
2641 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2642 
2643 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2644 
2645 	if (acl != NULL && acl_len > 0) {
2646 		acl_sz = acl_len * sizeof (rsm_access_entry_t);
2647 		kmem_free((void *)acl, acl_sz);
2648 	}
2649 
2650 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2651 
2652 }
2653 
2654 static int
2655 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2656     rsmapi_access_entry_t **list, int *len, int loopback)
2657 {
2658 	rsmapi_access_entry_t *acl;
2659 	int	acl_len;
2660 	int i;
2661 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2662 
2663 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2664 
2665 	*len = 0;
2666 	*list = NULL;
2667 
2668 	acl_len = msg->acl_len;
2669 	if ((loopback && acl_len > 1) || (acl_len < 0) ||
2670 	    (acl_len > MAX_NODES)) {
2671 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2672 		    "rsmacl_build done: acl invalid\n"));
2673 		return (RSMERR_BAD_ACL);
2674 	}
2675 
2676 	if (acl_len > 0 && acl_len <= MAX_NODES) {
2677 		size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2678 
2679 		acl = kmem_alloc(acl_size, KM_SLEEP);
2680 
2681 		if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2682 		    acl_size, mode)) {
2683 			kmem_free((void *) acl, acl_size);
2684 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2685 			    "rsmacl_build done: BAD_ADDR\n"));
2686 			return (RSMERR_BAD_ADDR);
2687 		}
2688 
2689 		/*
2690 		 * Verify access list
2691 		 */
2692 		for (i = 0; i < acl_len; i++) {
2693 			if (acl[i].ae_node > MAX_NODES ||
2694 			    (loopback && (acl[i].ae_node != my_nodeid)) ||
2695 			    acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2696 				/* invalid entry */
2697 				kmem_free((void *) acl, acl_size);
2698 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2699 				    "rsmacl_build done: EINVAL\n"));
2700 				return (RSMERR_BAD_ACL);
2701 			}
2702 		}
2703 
2704 		*len = acl_len;
2705 		*list = acl;
2706 	}
2707 
2708 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2709 
2710 	return (DDI_SUCCESS);
2711 }
2712 
2713 static int
2714 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2715     int acl_len, adapter_t *adapter)
2716 {
2717 	rsm_access_entry_t *acl;
2718 	rsm_addr_t hwaddr;
2719 	int i;
2720 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2721 
2722 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2723 
2724 	if (src != NULL) {
2725 		size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2726 		acl = kmem_alloc(acl_size, KM_SLEEP);
2727 
2728 		/*
2729 		 * translate access list
2730 		 */
2731 		for (i = 0; i < acl_len; i++) {
2732 			if (src[i].ae_node == my_nodeid) {
2733 				acl[i].ae_addr = adapter->hwaddr;
2734 			} else {
2735 				hwaddr = get_remote_hwaddr(adapter,
2736 				    src[i].ae_node);
2737 				if ((int64_t)hwaddr < 0) {
2738 					/* invalid hwaddr */
2739 					kmem_free((void *) acl, acl_size);
2740 					DBG_PRINTF((category,
2741 					    RSM_DEBUG_VERBOSE,
2742 					    "rsmpiacl_create done:"
2743 					    "EINVAL hwaddr\n"));
2744 					return (RSMERR_INTERNAL_ERROR);
2745 				}
2746 				acl[i].ae_addr = hwaddr;
2747 			}
2748 			/* rsmpi understands only RSM_PERM_XXXX */
2749 			acl[i].ae_permission =
2750 			    src[i].ae_permission & RSM_PERM_RDWR;
2751 		}
2752 		*dest = acl;
2753 	} else {
2754 		*dest = NULL;
2755 	}
2756 
2757 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2758 
2759 	return (RSM_SUCCESS);
2760 }
2761 
2762 static int
2763 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2764     rsmipc_reply_t *reply)
2765 {
2766 
2767 	int		i;
2768 	rsmseg_t	*seg;
2769 	rsm_memseg_id_t key = req->rsmipc_key;
2770 	rsm_permission_t perm = req->rsmipc_perm;
2771 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2772 
2773 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2774 	    "rsmsegacl_validate enter\n"));
2775 
2776 	/*
2777 	 * Find segment and grab its lock. The reason why we grab the segment
2778 	 * lock in side the search is to avoid the race when the segment is
2779 	 * being deleted and we already have a pointer to it.
2780 	 */
2781 	seg = rsmexport_lookup(key);
2782 	if (!seg) {
2783 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2784 		    "rsmsegacl_validate done: %u ENXIO\n", key));
2785 		return (RSMERR_SEG_NOT_PUBLISHED);
2786 	}
2787 
2788 	ASSERT(rsmseglock_held(seg));
2789 	ASSERT(seg->s_state == RSM_STATE_EXPORT);
2790 
2791 	/*
2792 	 * We implement a 2-level protection scheme.
2793 	 * First, we check if local/remote host has access rights.
2794 	 * Second, we check if the user has access rights.
2795 	 *
2796 	 * This routine only validates the rnode access_list
2797 	 */
2798 	if (seg->s_acl_len > 0) {
2799 		/*
2800 		 * Check host access list
2801 		 */
2802 		ASSERT(seg->s_acl != NULL);
2803 		for (i = 0; i < seg->s_acl_len; i++) {
2804 			if (seg->s_acl[i].ae_node == rnode) {
2805 				perm &= seg->s_acl[i].ae_permission;
2806 				goto found;
2807 			}
2808 		}
2809 		/* rnode is not found in the list */
2810 		rsmseglock_release(seg);
2811 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2812 		    "rsmsegacl_validate done: EPERM\n"));
2813 		return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2814 	} else {
2815 		/* use default owner creation umask */
2816 		perm &= seg->s_mode;
2817 	}
2818 
2819 found:
2820 	/* update perm for this node */
2821 	reply->rsmipc_mode = perm;
2822 	reply->rsmipc_uid = seg->s_uid;
2823 	reply->rsmipc_gid = seg->s_gid;
2824 	reply->rsmipc_segid = seg->s_segid;
2825 	reply->rsmipc_seglen = seg->s_len;
2826 
2827 	/*
2828 	 * Perm of requesting node is valid; source will validate user
2829 	 */
2830 	rsmseglock_release(seg);
2831 
2832 	/*
2833 	 * Add the importer to the list right away, if connect fails
2834 	 * the importer will ask the exporter to remove it.
2835 	 */
2836 	importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2837 	    req->rsmipc_segment_cookie);
2838 
2839 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2840 
2841 	return (RSM_SUCCESS);
2842 }
2843 
2844 
2845 /* ************************** Exporter Calls ************************* */
2846 
2847 static int
2848 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2849 {
2850 	int			e;
2851 	int			acl_len;
2852 	rsmapi_access_entry_t	*acl;
2853 	rsm_access_entry_t	*rsmpi_acl;
2854 	rsm_memory_local_t	mem;
2855 	struct buf		*xbuf;
2856 	dev_t 			sdev = 0;
2857 	adapter_t		*adapter;
2858 	rsm_memseg_id_t		segment_id = 0;
2859 	int			loopback_flag = 0;
2860 	int			create_flags = 0;
2861 	rsm_resource_callback_t	callback_flag;
2862 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2863 
2864 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2865 
2866 	if (seg->s_adapter == &loopback_adapter)
2867 		loopback_flag = 1;
2868 
2869 	if (seg->s_pid != ddi_get_pid() &&
2870 	    ddi_get_pid() != 0) {
2871 		DBG_PRINTF((category, RSM_ERR,
2872 		    "rsm_publish: Not creator\n"));
2873 		return (RSMERR_NOT_CREATOR);
2874 	}
2875 
2876 	/*
2877 	 * Get per node access list
2878 	 */
2879 	e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2880 	if (e != DDI_SUCCESS) {
2881 		DBG_PRINTF((category, RSM_ERR,
2882 		    "rsm_publish done: rsmacl_build failed\n"));
2883 		return (e);
2884 	}
2885 
2886 	/*
2887 	 * The application provided msg->key is used for resolving a
2888 	 * segment id according to the following:
2889 	 *    key = 0   		Kernel Agent selects the segment id
2890 	 *    key <= RSM_DLPI_ID_END	Reserved for system usage except
2891 	 *				RSMLIB range
2892 	 *    key < RSM_USER_APP_ID_BASE segment id = key
2893 	 *    key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2894 	 *
2895 	 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2896 	 * overflows to zero after 0x80000000 allocations.
2897 	 * An algorithm is needed which allows reinitialization and provides
2898 	 * for reallocation after overflow.  For now, ENOMEM is returned
2899 	 * once the overflow condition has occurred.
2900 	 */
2901 	if (msg->key == 0) {
2902 		mutex_enter(&rsm_lock);
2903 		segment_id = rsm_nextavail_segmentid;
2904 		if (segment_id != 0) {
2905 			rsm_nextavail_segmentid++;
2906 			mutex_exit(&rsm_lock);
2907 		} else {
2908 			mutex_exit(&rsm_lock);
2909 			DBG_PRINTF((category, RSM_ERR,
2910 			    "rsm_publish done: no more keys avlbl\n"));
2911 			return (RSMERR_INSUFFICIENT_RESOURCES);
2912 		}
2913 	} else	if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2914 		/* range reserved for internal use by base/ndi libraries */
2915 		segment_id = msg->key;
2916 	else	if (msg->key <= RSM_DLPI_ID_END)
2917 		return (RSMERR_RESERVED_SEGID);
2918 	else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2919 		segment_id = msg->key;
2920 	else {
2921 		DBG_PRINTF((category, RSM_ERR,
2922 		    "rsm_publish done: invalid key %u\n", msg->key));
2923 		return (RSMERR_RESERVED_SEGID);
2924 	}
2925 
2926 	/* Add key to exportlist; The segment lock is held on success */
2927 	e = rsmexport_add(seg, segment_id);
2928 	if (e) {
2929 		rsmacl_free(acl, acl_len);
2930 		DBG_PRINTF((category, RSM_ERR,
2931 		    "rsm_publish done: export_add failed: %d\n", e));
2932 		return (e);
2933 	}
2934 
2935 	seg->s_segid = segment_id;
2936 
2937 	if ((seg->s_state != RSM_STATE_BIND) &&
2938 	    (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2939 		/* state changed since then, free acl and return */
2940 		rsmseglock_release(seg);
2941 		rsmexport_rm(seg);
2942 		rsmacl_free(acl, acl_len);
2943 		DBG_PRINTF((category, RSM_ERR,
2944 		    "rsm_publish done: segment in wrong state: %d\n",
2945 		    seg->s_state));
2946 		return (RSMERR_BAD_SEG_HNDL);
2947 	}
2948 
2949 	/*
2950 	 * If this is for a local memory handle and permissions are zero,
2951 	 * then the surrogate segment is very large and we want to skip
2952 	 * allocation of DVMA space.
2953 	 *
2954 	 * Careful!  If the user didn't use an ACL list, acl will be a NULL
2955 	 * pointer.  Check that before dereferencing it.
2956 	 */
2957 	if (acl != (rsmapi_access_entry_t *)NULL) {
2958 		if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2959 			goto skipdriver;
2960 	}
2961 
2962 	/* create segment  */
2963 	xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2964 	    sdev, 0, NULL, DDI_UMEM_SLEEP);
2965 	ASSERT(xbuf != NULL);
2966 
2967 	mem.ms_type = RSM_MEM_BUF;
2968 	mem.ms_bp = xbuf;
2969 
2970 	/* This call includes a bind operations */
2971 
2972 	adapter = seg->s_adapter;
2973 	/*
2974 	 * create a acl list with hwaddr for RSMPI publish
2975 	 */
2976 	e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2977 
2978 	if (e != RSM_SUCCESS) {
2979 		rsmseglock_release(seg);
2980 		rsmexport_rm(seg);
2981 		rsmacl_free(acl, acl_len);
2982 		freerbuf(xbuf);
2983 		DBG_PRINTF((category, RSM_ERR,
2984 		    "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2985 		return (e);
2986 	}
2987 
2988 	if (seg->s_state == RSM_STATE_BIND) {
2989 		/* create segment  */
2990 
2991 		/* This call includes a bind operations */
2992 
2993 		if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2994 			create_flags = RSM_ALLOW_UNBIND_REBIND;
2995 		}
2996 
2997 		if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
2998 			callback_flag  = RSM_RESOURCE_DONTWAIT;
2999 		} else {
3000 			callback_flag  = RSM_RESOURCE_SLEEP;
3001 		}
3002 
3003 		e = adapter->rsmpi_ops->rsm_seg_create(
3004 		    adapter->rsmpi_handle,
3005 		    &seg->s_handle.out, seg->s_len,
3006 		    create_flags, &mem,
3007 		    callback_flag, NULL);
3008 		/*
3009 		 * At present there is no dependency on the existence of xbuf.
3010 		 * So we can free it here. If in the future this changes, it can
3011 		 * be freed sometime during the segment destroy.
3012 		 */
3013 		freerbuf(xbuf);
3014 
3015 		if (e != RSM_SUCCESS) {
3016 			rsmseglock_release(seg);
3017 			rsmexport_rm(seg);
3018 			rsmacl_free(acl, acl_len);
3019 			rsmpiacl_free(rsmpi_acl, acl_len);
3020 			DBG_PRINTF((category, RSM_ERR,
3021 			    "rsm_publish done: export_create failed: %d\n", e));
3022 			/*
3023 			 * The following assertion ensures that the two errors
3024 			 * related to the length and its alignment do not occur
3025 			 * since they have been checked during export_create
3026 			 */
3027 			ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3028 			    e != RSMERR_BAD_LENGTH);
3029 			if (e == RSMERR_NOT_MEM)
3030 				e = RSMERR_INSUFFICIENT_MEM;
3031 
3032 			return (e);
3033 		}
3034 		/* export segment, this should create an IMMU mapping */
3035 		e = adapter->rsmpi_ops->rsm_publish(
3036 		    seg->s_handle.out,
3037 		    rsmpi_acl, acl_len,
3038 		    seg->s_segid,
3039 		    RSM_RESOURCE_DONTWAIT, NULL);
3040 
3041 		if (e != RSM_SUCCESS) {
3042 			adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3043 			rsmseglock_release(seg);
3044 			rsmexport_rm(seg);
3045 			rsmacl_free(acl, acl_len);
3046 			rsmpiacl_free(rsmpi_acl, acl_len);
3047 			DBG_PRINTF((category, RSM_ERR,
3048 			    "rsm_publish done: export_publish failed: %d\n",
3049 			    e));
3050 			return (e);
3051 		}
3052 	}
3053 
3054 	seg->s_acl_in = rsmpi_acl;
3055 
3056 skipdriver:
3057 	/* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3058 	seg->s_acl_len	= acl_len;
3059 	seg->s_acl	= acl;
3060 
3061 	if (seg->s_state == RSM_STATE_BIND) {
3062 		seg->s_state = RSM_STATE_EXPORT;
3063 	} else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3064 		seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3065 		cv_broadcast(&seg->s_cv);
3066 	}
3067 
3068 	rsmseglock_release(seg);
3069 
3070 	/*
3071 	 * If the segment id was solicited, then return it in
3072 	 * the original incoming message.
3073 	 */
3074 	if (msg->key == 0) {
3075 		msg->key = segment_id;
3076 #ifdef _MULTI_DATAMODEL
3077 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3078 			rsm_ioctlmsg32_t msg32;
3079 
3080 			msg32.key = msg->key;
3081 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3082 			    "rsm_publish done\n"));
3083 			return (ddi_copyout((caddr_t)&msg32,
3084 			    (caddr_t)dataptr, sizeof (msg32), mode));
3085 		}
3086 #endif
3087 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3088 		    "rsm_publish done\n"));
3089 		return (ddi_copyout((caddr_t)msg,
3090 		    (caddr_t)dataptr, sizeof (*msg), mode));
3091 	}
3092 
3093 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3094 	return (DDI_SUCCESS);
3095 }
3096 
3097 /*
3098  * This function modifies the access control list of an already published
3099  * segment.  There is no effect on import segments which are already
3100  * connected.
3101  */
3102 static int
3103 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3104 {
3105 	rsmapi_access_entry_t	*new_acl, *old_acl, *tmp_acl;
3106 	rsm_access_entry_t	*rsmpi_new_acl, *rsmpi_old_acl;
3107 	int			new_acl_len, old_acl_len, tmp_acl_len;
3108 	int			e, i;
3109 	adapter_t		*adapter;
3110 	int			loopback_flag = 0;
3111 	rsm_memseg_id_t		key;
3112 	rsm_permission_t	permission;
3113 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3114 
3115 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3116 
3117 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3118 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3119 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3120 		return (RSMERR_SEG_NOT_PUBLISHED);
3121 
3122 	if (seg->s_pid != ddi_get_pid() &&
3123 	    ddi_get_pid() != 0) {
3124 		DBG_PRINTF((category, RSM_ERR,
3125 		    "rsm_republish: Not owner\n"));
3126 		return (RSMERR_NOT_CREATOR);
3127 	}
3128 
3129 	if (seg->s_adapter == &loopback_adapter)
3130 		loopback_flag = 1;
3131 
3132 	/*
3133 	 * Build new list first
3134 	 */
3135 	e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3136 	if (e) {
3137 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3138 		    "rsm_republish done: rsmacl_build failed %d", e));
3139 		return (e);
3140 	}
3141 
3142 	/* Lock segment */
3143 	rsmseglock_acquire(seg);
3144 	/*
3145 	 * a republish is in progress - REPUBLISH message is being
3146 	 * sent to the importers so wait for it to complete OR
3147 	 * wait till DR completes
3148 	 */
3149 	while (((seg->s_state == RSM_STATE_EXPORT) &&
3150 	    (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3151 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3152 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3153 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3154 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3155 			    "rsm_republish done: cv_wait  INTERRUPTED"));
3156 			rsmseglock_release(seg);
3157 			rsmacl_free(new_acl, new_acl_len);
3158 			return (RSMERR_INTERRUPTED);
3159 		}
3160 	}
3161 
3162 	/* recheck if state is valid */
3163 	if (seg->s_state != RSM_STATE_EXPORT) {
3164 		rsmseglock_release(seg);
3165 		rsmacl_free(new_acl, new_acl_len);
3166 		return (RSMERR_SEG_NOT_PUBLISHED);
3167 	}
3168 
3169 	key = seg->s_key;
3170 	old_acl = seg->s_acl;
3171 	old_acl_len = seg->s_acl_len;
3172 
3173 	seg->s_acl = new_acl;
3174 	seg->s_acl_len = new_acl_len;
3175 
3176 	/*
3177 	 * This call will only be meaningful if and when the interconnect
3178 	 * layer makes use of the access list
3179 	 */
3180 	adapter = seg->s_adapter;
3181 	/*
3182 	 * create a acl list with hwaddr for RSMPI publish
3183 	 */
3184 	e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3185 
3186 	if (e != RSM_SUCCESS) {
3187 		seg->s_acl = old_acl;
3188 		seg->s_acl_len = old_acl_len;
3189 		rsmseglock_release(seg);
3190 		rsmacl_free(new_acl, new_acl_len);
3191 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3192 		    "rsm_republish done: rsmpiacl_create failed %d", e));
3193 		return (e);
3194 	}
3195 	rsmpi_old_acl = seg->s_acl_in;
3196 	seg->s_acl_in = rsmpi_new_acl;
3197 
3198 	e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3199 	    seg->s_acl_in, seg->s_acl_len,
3200 	    RSM_RESOURCE_DONTWAIT, NULL);
3201 
3202 	if (e != RSM_SUCCESS) {
3203 		seg->s_acl = old_acl;
3204 		seg->s_acl_in = rsmpi_old_acl;
3205 		seg->s_acl_len = old_acl_len;
3206 		rsmseglock_release(seg);
3207 		rsmacl_free(new_acl, new_acl_len);
3208 		rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3209 
3210 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3211 		    "rsm_republish done: rsmpi republish failed %d\n", e));
3212 		return (e);
3213 	}
3214 
3215 	/* create a tmp copy of the new acl */
3216 	tmp_acl_len = new_acl_len;
3217 	if (tmp_acl_len > 0) {
3218 		tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3219 		for (i = 0; i < tmp_acl_len; i++) {
3220 			tmp_acl[i].ae_node = new_acl[i].ae_node;
3221 			tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3222 		}
3223 		/*
3224 		 * The default permission of a node which was in the old
3225 		 * ACL but not in the new ACL is 0 ie no access.
3226 		 */
3227 		permission = 0;
3228 	} else {
3229 		/*
3230 		 * NULL acl means all importers can connect and
3231 		 * default permission will be owner creation umask
3232 		 */
3233 		tmp_acl = NULL;
3234 		permission = seg->s_mode;
3235 	}
3236 
3237 	/* make other republishers to wait for republish to complete */
3238 	seg->s_flags |= RSM_REPUBLISH_WAIT;
3239 
3240 	rsmseglock_release(seg);
3241 
3242 	/* send the new perms to the importing nodes */
3243 	rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3244 
3245 	rsmseglock_acquire(seg);
3246 	seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3247 	/* wake up any one waiting for republish to complete */
3248 	cv_broadcast(&seg->s_cv);
3249 	rsmseglock_release(seg);
3250 
3251 	rsmacl_free(tmp_acl, tmp_acl_len);
3252 	rsmacl_free(old_acl, old_acl_len);
3253 	rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3254 
3255 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3256 	return (DDI_SUCCESS);
3257 }
3258 
3259 static int
3260 rsm_unpublish(rsmseg_t *seg, int mode)
3261 {
3262 	rsmapi_access_entry_t	*acl;
3263 	rsm_access_entry_t	*rsmpi_acl;
3264 	int			acl_len;
3265 	int			e;
3266 	clock_t			ticks;
3267 	adapter_t *adapter;
3268 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3269 
3270 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3271 
3272 	if (seg->s_pid != ddi_get_pid() &&
3273 	    ddi_get_pid() != 0) {
3274 		DBG_PRINTF((category, RSM_ERR,
3275 		    "rsm_unpublish: Not creator\n"));
3276 		return (RSMERR_NOT_CREATOR);
3277 	}
3278 
3279 	rsmseglock_acquire(seg);
3280 	/*
3281 	 * wait for QUIESCING to complete here before rsmexport_rm
3282 	 * is called because the SUSPEND_COMPLETE mesg which changes
3283 	 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3284 	 * signals the cv_wait needs to find it in the hashtable.
3285 	 */
3286 	while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3287 	    ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3288 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3289 			rsmseglock_release(seg);
3290 			DBG_PRINTF((category, RSM_ERR,
3291 			    "rsm_unpublish done: cv_wait INTR qscing"
3292 			    "getv/putv in progress"));
3293 			return (RSMERR_INTERRUPTED);
3294 		}
3295 	}
3296 
3297 	/* verify segment state */
3298 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3299 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3300 		rsmseglock_release(seg);
3301 		DBG_PRINTF((category, RSM_ERR,
3302 		    "rsm_unpublish done: bad state %x\n", seg->s_state));
3303 		return (RSMERR_SEG_NOT_PUBLISHED);
3304 	}
3305 
3306 	rsmseglock_release(seg);
3307 
3308 	rsmexport_rm(seg);
3309 
3310 	rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3311 
3312 	rsmseglock_acquire(seg);
3313 	/*
3314 	 * wait for republish to complete
3315 	 */
3316 	while ((seg->s_state == RSM_STATE_EXPORT) &&
3317 	    (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3318 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3319 			DBG_PRINTF((category, RSM_ERR,
3320 			    "rsm_unpublish done: cv_wait INTR repubing"));
3321 			rsmseglock_release(seg);
3322 			return (RSMERR_INTERRUPTED);
3323 		}
3324 	}
3325 
3326 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3327 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3328 		DBG_PRINTF((category, RSM_ERR,
3329 		    "rsm_unpublish done: invalid state"));
3330 		rsmseglock_release(seg);
3331 		return (RSMERR_SEG_NOT_PUBLISHED);
3332 	}
3333 
3334 	/*
3335 	 * check for putv/get surrogate segment which was not published
3336 	 * to the driver.
3337 	 *
3338 	 * Be certain to see if there is an ACL first!  If this segment was
3339 	 * not published with an ACL, acl will be a null pointer.  Check
3340 	 * that before dereferencing it.
3341 	 */
3342 	acl = seg->s_acl;
3343 	if (acl != (rsmapi_access_entry_t *)NULL) {
3344 		if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3345 			goto bypass;
3346 	}
3347 
3348 	/* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3349 	if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3350 		goto bypass;
3351 
3352 	adapter = seg->s_adapter;
3353 	for (;;) {
3354 		if (seg->s_state != RSM_STATE_EXPORT) {
3355 			rsmseglock_release(seg);
3356 			DBG_PRINTF((category, RSM_ERR,
3357 			    "rsm_unpublish done: bad state %x\n",
3358 			    seg->s_state));
3359 			return (RSMERR_SEG_NOT_PUBLISHED);
3360 		}
3361 
3362 		/* unpublish from adapter */
3363 		e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3364 
3365 		if (e == RSM_SUCCESS) {
3366 			break;
3367 		}
3368 
3369 		if (e == RSMERR_SEG_IN_USE && mode == 1) {
3370 			/*
3371 			 * wait for unpublish to succeed, it's busy.
3372 			 */
3373 			seg->s_flags |= RSM_EXPORT_WAIT;
3374 
3375 			/* wait for a max of 1 ms - this is an empirical */
3376 			/* value that was found by some minimal testing  */
3377 			/* can be fine tuned when we have better numbers */
3378 			/* A long term fix would be to send cv_signal	 */
3379 			/* from the intr callback routine		 */
3380 			(void) drv_getparm(LBOLT, &ticks);
3381 			ticks += drv_usectohz(1000);
3382 			/* currently nobody signals this wait		*/
3383 			(void) cv_timedwait(&seg->s_cv, &seg->s_lock, ticks);
3384 
3385 			DBG_PRINTF((category, RSM_ERR,
3386 			    "rsm_unpublish: SEG_IN_USE\n"));
3387 
3388 			seg->s_flags &= ~RSM_EXPORT_WAIT;
3389 		} else {
3390 			if (mode == 1) {
3391 				DBG_PRINTF((category, RSM_ERR,
3392 				    "rsm:rsmpi unpublish err %x\n", e));
3393 				seg->s_state = RSM_STATE_BIND;
3394 			}
3395 			rsmseglock_release(seg);
3396 			return (e);
3397 		}
3398 	}
3399 
3400 	/* Free segment */
3401 	e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3402 
3403 	if (e != RSM_SUCCESS) {
3404 		DBG_PRINTF((category, RSM_ERR,
3405 		    "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3406 		    seg->s_key, e));
3407 	}
3408 
3409 bypass:
3410 	acl = seg->s_acl;
3411 	rsmpi_acl = seg->s_acl_in;
3412 	acl_len = seg->s_acl_len;
3413 
3414 	seg->s_acl = NULL;
3415 	seg->s_acl_in = NULL;
3416 	seg->s_acl_len = 0;
3417 
3418 	if (seg->s_state == RSM_STATE_EXPORT) {
3419 		seg->s_state = RSM_STATE_BIND;
3420 	} else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3421 		seg->s_state = RSM_STATE_BIND_QUIESCED;
3422 		cv_broadcast(&seg->s_cv);
3423 	}
3424 
3425 	rsmseglock_release(seg);
3426 
3427 	rsmacl_free(acl, acl_len);
3428 	rsmpiacl_free(rsmpi_acl, acl_len);
3429 
3430 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3431 
3432 	return (DDI_SUCCESS);
3433 }
3434 
3435 /*
3436  * Called from rsm_unpublish to force an unload and disconnection of all
3437  * importers of the unpublished segment.
3438  *
3439  * First build the list of segments requiring a force disconnect, then
3440  * send a request for each.
3441  */
3442 static void
3443 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3444     rsm_node_id_t ex_nodeid)
3445 {
3446 	rsmipc_request_t 	request;
3447 	importing_token_t	*prev_token, *token, *tmp_token, *tokp;
3448 	importing_token_t	*force_disconnect_list = NULL;
3449 	int			index;
3450 
3451 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3452 	    "rsm_send_importer_disconnects enter\n"));
3453 
3454 	index = rsmhash(ex_segid);
3455 
3456 	mutex_enter(&importer_list.lock);
3457 
3458 	prev_token = NULL;
3459 	token = importer_list.bucket[index];
3460 
3461 	while (token != NULL) {
3462 		if (token->key == ex_segid) {
3463 			/*
3464 			 * take it off the importer list and add it
3465 			 * to the force disconnect list.
3466 			 */
3467 			if (prev_token == NULL)
3468 				importer_list.bucket[index] = token->next;
3469 			else
3470 				prev_token->next = token->next;
3471 			tmp_token = token;
3472 			token = token->next;
3473 			if (force_disconnect_list == NULL) {
3474 				force_disconnect_list = tmp_token;
3475 				tmp_token->next = NULL;
3476 			} else {
3477 				tokp = force_disconnect_list;
3478 				/*
3479 				 * make sure that the tmp_token's node
3480 				 * is not already on the force disconnect
3481 				 * list.
3482 				 */
3483 				while (tokp != NULL) {
3484 					if (tokp->importing_node ==
3485 					    tmp_token->importing_node) {
3486 						break;
3487 					}
3488 					tokp = tokp->next;
3489 				}
3490 				if (tokp == NULL) {
3491 					tmp_token->next =
3492 					    force_disconnect_list;
3493 					force_disconnect_list = tmp_token;
3494 				} else {
3495 					kmem_free((void *)tmp_token,
3496 					    sizeof (*token));
3497 				}
3498 			}
3499 
3500 		} else {
3501 			prev_token = token;
3502 			token = token->next;
3503 		}
3504 	}
3505 	mutex_exit(&importer_list.lock);
3506 
3507 	token = force_disconnect_list;
3508 	while (token != NULL) {
3509 		if (token->importing_node == my_nodeid) {
3510 			rsm_force_unload(ex_nodeid, ex_segid,
3511 			    DISCONNECT);
3512 		} else {
3513 			request.rsmipc_hdr.rsmipc_type =
3514 			    RSMIPC_MSG_DISCONNECT;
3515 			request.rsmipc_key = token->key;
3516 			for (;;) {
3517 				if (rsmipc_send(token->importing_node,
3518 				    &request,
3519 				    RSM_NO_REPLY) == RSM_SUCCESS) {
3520 					break;
3521 				} else {
3522 					delay(drv_usectohz(10000));
3523 				}
3524 			}
3525 		}
3526 		tmp_token = token;
3527 		token = token->next;
3528 		kmem_free((void *)tmp_token, sizeof (*token));
3529 	}
3530 
3531 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3532 	    "rsm_send_importer_disconnects done\n"));
3533 }
3534 
3535 /*
3536  * This function is used as a callback for unlocking the pages locked
3537  * down by a process which then does a fork or an exec.
3538  * It marks the export segments corresponding to umem cookie given by
3539  * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3540  * destroyed later when an rsm_close occurs).
3541  */
3542 static void
3543 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3544 {
3545 	rsmresource_blk_t *blk;
3546 	rsmresource_t *p;
3547 	rsmseg_t *eseg = NULL;
3548 	int i, j;
3549 	int found = 0;
3550 
3551 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3552 	    "rsm_export_force_destroy enter\n"));
3553 
3554 	/*
3555 	 * Walk the resource list and locate the export segment (either
3556 	 * in the BIND or the EXPORT state) which corresponds to the
3557 	 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3558 	 * Change the state to ZOMBIE by calling rsmseg_close with the
3559 	 * force_flag argument (the second argument) set to 1. Also,
3560 	 * unpublish and unbind the segment, but don't free it. Free it
3561 	 * only on a rsm_close call for the segment.
3562 	 */
3563 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3564 
3565 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3566 		blk = rsm_resource.rsmrc_root[i];
3567 		if (blk == NULL) {
3568 			continue;
3569 		}
3570 
3571 		for (j = 0; j < RSMRC_BLKSZ; j++) {
3572 			p = blk->rsmrcblk_blks[j];
3573 			if ((p != NULL) && (p != RSMRC_RESERVED) &&
3574 			    (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3575 				eseg = (rsmseg_t *)p;
3576 				if (eseg->s_cookie != ck)
3577 					continue; /* continue searching */
3578 				/*
3579 				 * Found the segment, set flag to indicate
3580 				 * force destroy processing is in progress
3581 				 */
3582 				rsmseglock_acquire(eseg);
3583 				eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3584 				rsmseglock_release(eseg);
3585 				found = 1;
3586 				break;
3587 			}
3588 		}
3589 
3590 		if (found)
3591 			break;
3592 	}
3593 
3594 	rw_exit(&rsm_resource.rsmrc_lock);
3595 
3596 	if (found) {
3597 		ASSERT(eseg != NULL);
3598 		/* call rsmseg_close with force flag set to 1 */
3599 		rsmseg_close(eseg, 1);
3600 		/*
3601 		 * force destroy processing done, clear flag and signal any
3602 		 * thread waiting in rsmseg_close.
3603 		 */
3604 		rsmseglock_acquire(eseg);
3605 		eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3606 		cv_broadcast(&eseg->s_cv);
3607 		rsmseglock_release(eseg);
3608 	}
3609 
3610 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3611 	    "rsm_export_force_destroy done\n"));
3612 }
3613 
3614 /* ******************************* Remote Calls *********************** */
3615 static void
3616 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3617 {
3618 	rsmipc_reply_t reply;
3619 	DBG_DEFINE(category,
3620 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3621 
3622 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3623 	    "rsm_intr_segconnect enter\n"));
3624 
3625 	reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3626 
3627 	reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3628 	reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3629 
3630 	(void) rsmipc_send(src, NULL, &reply);
3631 
3632 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3633 	    "rsm_intr_segconnect done\n"));
3634 }
3635 
3636 
3637 /*
3638  * When an exported segment is unpublished the exporter sends an ipc
3639  * message (RSMIPC_MSG_DISCONNECT) to all importers.  The recv ipc dispatcher
3640  * calls this function.  The import list is scanned; segments which match the
3641  * exported segment id are unloaded and disconnected.
3642  *
3643  * Will also be called from rsm_rebind with disconnect_flag FALSE.
3644  *
3645  */
3646 static void
3647 rsm_force_unload(rsm_node_id_t src_nodeid,
3648     rsm_memseg_id_t ex_segid,
3649     boolean_t disconnect_flag)
3650 
3651 {
3652 	rsmresource_t	*p = NULL;
3653 	rsmhash_table_t *rhash = &rsm_import_segs;
3654 	uint_t		index;
3655 	DBG_DEFINE(category,
3656 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3657 
3658 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3659 
3660 	index = rsmhash(ex_segid);
3661 
3662 	rw_enter(&rhash->rsmhash_rw, RW_READER);
3663 
3664 	p = rsmhash_getbkt(rhash, index);
3665 
3666 	for (; p; p = p->rsmrc_next) {
3667 		rsmseg_t *seg = (rsmseg_t *)p;
3668 		if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3669 			/*
3670 			 * In order to make rsmseg_unload and rsm_force_unload
3671 			 * thread safe, acquire the segment lock here.
3672 			 * rsmseg_unload is responsible for releasing the lock.
3673 			 * rsmseg_unload releases the lock just before a call
3674 			 * to rsmipc_send or in case of an early exit which
3675 			 * occurs if the segment was in the state
3676 			 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3677 			 */
3678 			rsmseglock_acquire(seg);
3679 			if (disconnect_flag)
3680 				seg->s_flags |= RSM_FORCE_DISCONNECT;
3681 			rsmseg_unload(seg);
3682 		}
3683 	}
3684 	rw_exit(&rhash->rsmhash_rw);
3685 
3686 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3687 }
3688 
3689 static void
3690 rsm_intr_reply(rsmipc_msghdr_t *msg)
3691 {
3692 	/*
3693 	 * Find slot for cookie in reply.
3694 	 * Match sequence with sequence in cookie
3695 	 * If no match; return
3696 	 * Try to grap lock of slot, if locked return
3697 	 * copy data into reply slot area
3698 	 * signal waiter
3699 	 */
3700 	rsmipc_slot_t 	*slot;
3701 	rsmipc_cookie_t	*cookie;
3702 	void *data = (void *) msg;
3703 	size_t size = sizeof (rsmipc_reply_t);
3704 	DBG_DEFINE(category,
3705 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3706 
3707 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3708 
3709 	cookie = &msg->rsmipc_cookie;
3710 	if (cookie->ic.index >= RSMIPC_SZ) {
3711 		DBG_PRINTF((category, RSM_ERR,
3712 		    "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3713 		return;
3714 	}
3715 
3716 	ASSERT(cookie->ic.index < RSMIPC_SZ);
3717 	slot = &rsm_ipc.slots[cookie->ic.index];
3718 	mutex_enter(&slot->rsmipc_lock);
3719 	if (slot->rsmipc_cookie.value == cookie->value) {
3720 		/* found a match */
3721 		if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3722 			bcopy(data, slot->rsmipc_data, size);
3723 			RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3724 			cv_signal(&slot->rsmipc_cv);
3725 		}
3726 	} else {
3727 		DBG_PRINTF((category, RSM_DEBUG,
3728 		    "rsm: rsm_intr_reply mismatched reply %d\n",
3729 		    cookie->ic.index));
3730 	}
3731 	mutex_exit(&slot->rsmipc_lock);
3732 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3733 }
3734 
3735 /*
3736  * This function gets dispatched on the worker thread when we receive
3737  * the SQREADY message. This function sends the SQREADY_ACK message.
3738  */
3739 static void
3740 rsm_sqready_ack_deferred(void *arg)
3741 {
3742 	path_t	*path = (path_t *)arg;
3743 	DBG_DEFINE(category,
3744 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3745 
3746 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3747 	    "rsm_sqready_ack_deferred enter\n"));
3748 
3749 	mutex_enter(&path->mutex);
3750 
3751 	/*
3752 	 * If path is not active no point in sending the ACK
3753 	 * because the whole SQREADY protocol will again start
3754 	 * when the path becomes active.
3755 	 */
3756 	if (path->state != RSMKA_PATH_ACTIVE) {
3757 		/*
3758 		 * decrement the path refcnt incremented in rsm_proc_sqready
3759 		 */
3760 		PATH_RELE_NOLOCK(path);
3761 		mutex_exit(&path->mutex);
3762 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3763 		    "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3764 		return;
3765 	}
3766 
3767 	/* send an SQREADY_ACK message */
3768 	(void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3769 
3770 	/* initialize credits to the max level */
3771 	path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3772 
3773 	/* wake up any send that is waiting for credits */
3774 	cv_broadcast(&path->sendq_token.sendq_cv);
3775 
3776 	/*
3777 	 * decrement the path refcnt since we incremented it in
3778 	 * rsm_proc_sqready
3779 	 */
3780 	PATH_RELE_NOLOCK(path);
3781 
3782 	mutex_exit(&path->mutex);
3783 
3784 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3785 	    "rsm_sqready_ack_deferred done\n"));
3786 }
3787 
3788 /*
3789  * Process the SQREADY message
3790  */
3791 static void
3792 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3793     rsm_intr_hand_arg_t arg)
3794 {
3795 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3796 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3797 	path_t			*path;
3798 	DBG_DEFINE(category,
3799 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3800 
3801 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3802 
3803 	/* look up the path - incr the path refcnt */
3804 	path = rsm_find_path(hdlr_argp->adapter_name,
3805 	    hdlr_argp->adapter_instance, src_hwaddr);
3806 
3807 	/*
3808 	 * No path exists or path is not active - drop the message
3809 	 */
3810 	if (path == NULL) {
3811 		DBG_PRINTF((category, RSM_DEBUG,
3812 		    "rsm_proc_sqready done: msg dropped no path\n"));
3813 		return;
3814 	}
3815 
3816 	mutex_exit(&path->mutex);
3817 
3818 	/* drain any tasks from the previous incarnation */
3819 	taskq_wait(path->recv_taskq);
3820 
3821 	mutex_enter(&path->mutex);
3822 	/*
3823 	 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3824 	 * in the meanwhile we received an SQREADY message, blindly reset
3825 	 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3826 	 * and forget about the SQREADY that we sent.
3827 	 */
3828 	path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3829 
3830 	if (path->state != RSMKA_PATH_ACTIVE) {
3831 		/* decr refcnt and drop the mutex */
3832 		PATH_RELE_NOLOCK(path);
3833 		mutex_exit(&path->mutex);
3834 		DBG_PRINTF((category, RSM_DEBUG,
3835 		    "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3836 		return;
3837 	}
3838 
3839 	DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3840 	    " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3841 
3842 	/*
3843 	 * The sender's local incarnation number is our remote incarnation
3844 	 * number save it in the path data structure
3845 	 */
3846 	path->remote_incn = msg->rsmipc_local_incn;
3847 	path->sendq_token.msgbuf_avail = 0;
3848 	path->procmsg_cnt = 0;
3849 
3850 	/*
3851 	 * path is active - dispatch task to send SQREADY_ACK - remember
3852 	 * RSMPI calls can't be done in interrupt context
3853 	 *
3854 	 * We can use the recv_taskq to send because the remote endpoint
3855 	 * cannot start sending messages till it receives SQREADY_ACK hence
3856 	 * at this point there are no tasks on recv_taskq.
3857 	 *
3858 	 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3859 	 */
3860 	(void) taskq_dispatch(path->recv_taskq,
3861 	    rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3862 
3863 	mutex_exit(&path->mutex);
3864 
3865 
3866 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3867 }
3868 
3869 /*
3870  * Process the SQREADY_ACK message
3871  */
3872 static void
3873 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3874     rsm_intr_hand_arg_t arg)
3875 {
3876 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3877 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3878 	path_t			*path;
3879 	DBG_DEFINE(category,
3880 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3881 
3882 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3883 	    "rsm_proc_sqready_ack enter\n"));
3884 
3885 	/* look up the path - incr the path refcnt */
3886 	path = rsm_find_path(hdlr_argp->adapter_name,
3887 	    hdlr_argp->adapter_instance, src_hwaddr);
3888 
3889 	/*
3890 	 * drop the message if - no path exists or path is not active
3891 	 * or if its not waiting for SQREADY_ACK message
3892 	 */
3893 	if (path == NULL) {
3894 		DBG_PRINTF((category, RSM_DEBUG,
3895 		    "rsm_proc_sqready_ack done: msg dropped no path\n"));
3896 		return;
3897 	}
3898 
3899 	if ((path->state != RSMKA_PATH_ACTIVE) ||
3900 	    !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3901 		/* decrement the refcnt */
3902 		PATH_RELE_NOLOCK(path);
3903 		mutex_exit(&path->mutex);
3904 		DBG_PRINTF((category, RSM_DEBUG,
3905 		    "rsm_proc_sqready_ack done: msg dropped\n"));
3906 		return;
3907 	}
3908 
3909 	/*
3910 	 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3911 	 * sent, if not drop it.
3912 	 */
3913 	if (path->local_incn != msghdr->rsmipc_incn) {
3914 		/* decrement the refcnt */
3915 		PATH_RELE_NOLOCK(path);
3916 		mutex_exit(&path->mutex);
3917 		DBG_PRINTF((category, RSM_DEBUG,
3918 		    "rsm_proc_sqready_ack done: msg old incn %lld\n",
3919 		    msghdr->rsmipc_incn));
3920 		return;
3921 	}
3922 
3923 	DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3924 	    " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3925 
3926 	/*
3927 	 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3928 	 */
3929 	path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3930 
3931 	/* save the remote sendq incn number */
3932 	path->remote_incn = msg->rsmipc_local_incn;
3933 
3934 	/* initialize credits to the max level */
3935 	path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3936 
3937 	/* wake up any send that is waiting for credits */
3938 	cv_broadcast(&path->sendq_token.sendq_cv);
3939 
3940 	/* decrement the refcnt */
3941 	PATH_RELE_NOLOCK(path);
3942 
3943 	mutex_exit(&path->mutex);
3944 
3945 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3946 	    "rsm_proc_sqready_ack done\n"));
3947 }
3948 
3949 /*
3950  * process the RSMIPC_MSG_CREDIT message
3951  */
3952 static void
3953 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3954     rsm_intr_hand_arg_t arg)
3955 {
3956 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3957 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3958 	path_t			*path;
3959 	DBG_DEFINE(category,
3960 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3961 	    RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3962 
3963 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3964 
3965 	/* look up the path - incr the path refcnt */
3966 	path = rsm_find_path(hdlr_argp->adapter_name,
3967 	    hdlr_argp->adapter_instance, src_hwaddr);
3968 
3969 	if (path == NULL) {
3970 		DBG_PRINTF((category, RSM_DEBUG,
3971 		    "rsm_add_credits enter: path not found\n"));
3972 		return;
3973 	}
3974 
3975 	/* the path is not active - discard credits */
3976 	if (path->state != RSMKA_PATH_ACTIVE) {
3977 		PATH_RELE_NOLOCK(path);
3978 		mutex_exit(&path->mutex);
3979 		DBG_PRINTF((category, RSM_DEBUG,
3980 		    "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3981 		return;
3982 	}
3983 
3984 	/*
3985 	 * Check if these credits are for current incarnation of the path.
3986 	 */
3987 	if (path->local_incn != msghdr->rsmipc_incn) {
3988 		/* decrement the refcnt */
3989 		PATH_RELE_NOLOCK(path);
3990 		mutex_exit(&path->mutex);
3991 		DBG_PRINTF((category, RSM_DEBUG,
3992 		    "rsm_add_credits enter: old incn %lld\n",
3993 		    msghdr->rsmipc_incn));
3994 		return;
3995 	}
3996 
3997 	DBG_PRINTF((category, RSM_DEBUG,
3998 	    "rsm_add_credits:path=%lx new-creds=%d "
3999 	    "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
4000 	    path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
4001 	    src_hwaddr));
4002 
4003 
4004 	/* add credits to the path's sendq */
4005 	path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4006 
4007 	ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4008 
4009 	/* wake up any send that is waiting for credits */
4010 	cv_broadcast(&path->sendq_token.sendq_cv);
4011 
4012 	/* decrement the refcnt */
4013 	PATH_RELE_NOLOCK(path);
4014 
4015 	mutex_exit(&path->mutex);
4016 
4017 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4018 }
4019 
4020 static void
4021 rsm_intr_event(rsmipc_request_t *msg)
4022 {
4023 	rsmseg_t	*seg;
4024 	rsmresource_t	*p;
4025 	rsm_node_id_t	src_node;
4026 	DBG_DEFINE(category,
4027 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4028 
4029 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4030 
4031 	src_node = msg->rsmipc_hdr.rsmipc_src;
4032 
4033 	if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4034 		/* This is for an import segment */
4035 		uint_t hashval = rsmhash(msg->rsmipc_key);
4036 
4037 		rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4038 
4039 		p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4040 
4041 		for (; p; p = p->rsmrc_next) {
4042 			if ((p->rsmrc_key == msg->rsmipc_key) &&
4043 			    (p->rsmrc_node == src_node)) {
4044 				seg = (rsmseg_t *)p;
4045 				rsmseglock_acquire(seg);
4046 
4047 				atomic_add_32(&seg->s_pollevent, 1);
4048 
4049 				if (seg->s_pollflag & RSM_SEGMENT_POLL)
4050 					pollwakeup(&seg->s_poll, POLLRDNORM);
4051 
4052 				rsmseglock_release(seg);
4053 			}
4054 		}
4055 
4056 		rw_exit(&rsm_import_segs.rsmhash_rw);
4057 	} else {
4058 		/* This is for an export segment */
4059 		seg = rsmexport_lookup(msg->rsmipc_key);
4060 		if (!seg) {
4061 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4062 			    "rsm_intr_event done: exp seg not found\n"));
4063 			return;
4064 		}
4065 
4066 		ASSERT(rsmseglock_held(seg));
4067 
4068 		atomic_add_32(&seg->s_pollevent, 1);
4069 
4070 		/*
4071 		 * We must hold the segment lock here, or else the segment
4072 		 * can be freed while pollwakeup is using it. This implies
4073 		 * that we MUST NOT grab the segment lock during rsm_chpoll,
4074 		 * as outlined in the chpoll(2) man page.
4075 		 */
4076 		if (seg->s_pollflag & RSM_SEGMENT_POLL)
4077 			pollwakeup(&seg->s_poll, POLLRDNORM);
4078 
4079 		rsmseglock_release(seg);
4080 	}
4081 
4082 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4083 }
4084 
4085 /*
4086  * The exporter did a republish and changed the ACL - this change is only
4087  * visible to new importers.
4088  */
4089 static void
4090 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4091     rsm_permission_t perm)
4092 {
4093 
4094 	rsmresource_t	*p;
4095 	rsmseg_t	*seg;
4096 	uint_t		hashval = rsmhash(key);
4097 	DBG_DEFINE(category,
4098 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4099 
4100 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4101 
4102 	rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4103 
4104 	p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4105 
4106 	for (; p; p = p->rsmrc_next) {
4107 		/*
4108 		 * find the importer and update the permission in the shared
4109 		 * data structure. Any new importers will use the new perms
4110 		 */
4111 		if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4112 			seg = (rsmseg_t *)p;
4113 
4114 			rsmseglock_acquire(seg);
4115 			rsmsharelock_acquire(seg);
4116 			seg->s_share->rsmsi_mode = perm;
4117 			rsmsharelock_release(seg);
4118 			rsmseglock_release(seg);
4119 
4120 			break;
4121 		}
4122 	}
4123 
4124 	rw_exit(&rsm_import_segs.rsmhash_rw);
4125 
4126 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4127 }
4128 
4129 void
4130 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4131 {
4132 	int		done = 1; /* indicate all SUSPENDS have been acked */
4133 	list_element_t	*elem;
4134 	DBG_DEFINE(category,
4135 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4136 
4137 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4138 	    "rsm_suspend_complete enter\n"));
4139 
4140 	mutex_enter(&rsm_suspend_list.list_lock);
4141 
4142 	if (rsm_suspend_list.list_head == NULL) {
4143 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4144 		    "rsm_suspend_complete done: suspend_list is empty\n"));
4145 		mutex_exit(&rsm_suspend_list.list_lock);
4146 		return;
4147 	}
4148 
4149 	elem = rsm_suspend_list.list_head;
4150 	while (elem != NULL) {
4151 		if (elem->nodeid == src_node) {
4152 			/* clear the pending flag for the node */
4153 			elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4154 			elem->flags |= flag;
4155 		}
4156 
4157 		if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4158 			done = 0; /* still some nodes have not yet ACKED */
4159 
4160 		elem = elem->next;
4161 	}
4162 
4163 	mutex_exit(&rsm_suspend_list.list_lock);
4164 
4165 	if (!done) {
4166 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4167 		    "rsm_suspend_complete done: acks pending\n"));
4168 		return;
4169 	}
4170 	/*
4171 	 * Now that we are done with suspending all the remote importers
4172 	 * time to quiesce the local exporters
4173 	 */
4174 	exporter_quiesce();
4175 
4176 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4177 	    "rsm_suspend_complete done\n"));
4178 }
4179 
4180 static void
4181 exporter_quiesce()
4182 {
4183 	int		i, e;
4184 	rsmresource_t	*current;
4185 	rsmseg_t	*seg;
4186 	adapter_t	*adapter;
4187 	DBG_DEFINE(category,
4188 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4189 
4190 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4191 	/*
4192 	 * The importers send a SUSPEND_COMPLETE to the exporter node
4193 	 *	Unpublish, unbind the export segment and
4194 	 *	move the segments to the EXPORT_QUIESCED state
4195 	 */
4196 
4197 	rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4198 
4199 	for (i = 0; i < rsm_hash_size; i++) {
4200 		current = rsm_export_segs.bucket[i];
4201 		while (current != NULL) {
4202 			seg = (rsmseg_t *)current;
4203 			rsmseglock_acquire(seg);
4204 			if (current->rsmrc_state ==
4205 			    RSM_STATE_EXPORT_QUIESCING) {
4206 				adapter = seg->s_adapter;
4207 				/*
4208 				 * some local memory handles are not published
4209 				 * check if it was published
4210 				 */
4211 				if ((seg->s_acl == NULL) ||
4212 				    (seg->s_acl[0].ae_node != my_nodeid) ||
4213 				    (seg->s_acl[0].ae_permission != 0)) {
4214 
4215 					e = adapter->rsmpi_ops->rsm_unpublish(
4216 					    seg->s_handle.out);
4217 					DBG_PRINTF((category, RSM_DEBUG,
4218 					    "exporter_quiesce:unpub %d\n", e));
4219 
4220 					e = adapter->rsmpi_ops->rsm_seg_destroy(
4221 					    seg->s_handle.out);
4222 
4223 					DBG_PRINTF((category, RSM_DEBUG,
4224 					    "exporter_quiesce:destroy %d\n",
4225 					    e));
4226 				}
4227 
4228 				(void) rsm_unbind_pages(seg);
4229 				seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4230 				cv_broadcast(&seg->s_cv);
4231 			}
4232 			rsmseglock_release(seg);
4233 			current = current->rsmrc_next;
4234 		}
4235 	}
4236 	rw_exit(&rsm_export_segs.rsmhash_rw);
4237 
4238 	/*
4239 	 * All the local segments we are done with the pre-del processing
4240 	 * - time to move to PREDEL_COMPLETED.
4241 	 */
4242 
4243 	mutex_enter(&rsm_drv_data.drv_lock);
4244 
4245 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4246 
4247 	rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4248 
4249 	cv_broadcast(&rsm_drv_data.drv_cv);
4250 
4251 	mutex_exit(&rsm_drv_data.drv_lock);
4252 
4253 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4254 }
4255 
4256 static void
4257 importer_suspend(rsm_node_id_t src_node)
4258 {
4259 	int		i;
4260 	int		susp_flg; /* true means already suspended */
4261 	int		num_importers;
4262 	rsmresource_t	*p = NULL, *curp;
4263 	rsmhash_table_t *rhash = &rsm_import_segs;
4264 	rsmseg_t	*seg;
4265 	rsmipc_request_t request;
4266 	DBG_DEFINE(category,
4267 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4268 
4269 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4270 
4271 	rw_enter(&rhash->rsmhash_rw, RW_READER);
4272 	for (i = 0; i < rsm_hash_size; i++) {
4273 		p = rhash->bucket[i];
4274 
4275 		/*
4276 		 * Suspend all importers with same <node, key> pair.
4277 		 * After the last one of the shared importers has been
4278 		 * suspended - suspend the shared mappings/connection.
4279 		 */
4280 		for (; p; p = p->rsmrc_next) {
4281 			rsmseg_t *first = (rsmseg_t *)p;
4282 			if ((first->s_node != src_node) ||
4283 			    (first->s_state == RSM_STATE_DISCONNECT))
4284 				continue; /* go to next entry */
4285 			/*
4286 			 * search the rest of the bucket for
4287 			 * other siblings (imprtrs with the same key)
4288 			 * of "first" and suspend them.
4289 			 * All importers with same key fall in
4290 			 * the same bucket.
4291 			 */
4292 			num_importers = 0;
4293 			for (curp = p; curp; curp = curp->rsmrc_next) {
4294 				seg = (rsmseg_t *)curp;
4295 
4296 				rsmseglock_acquire(seg);
4297 
4298 				if ((seg->s_node != first->s_node) ||
4299 				    (seg->s_key != first->s_key) ||
4300 				    (seg->s_state == RSM_STATE_DISCONNECT)) {
4301 					/*
4302 					 * either not a peer segment or its a
4303 					 * disconnected segment - skip it
4304 					 */
4305 					rsmseglock_release(seg);
4306 					continue;
4307 				}
4308 
4309 				rsmseg_suspend(seg, &susp_flg);
4310 
4311 				if (susp_flg) { /* seg already suspended */
4312 					rsmseglock_release(seg);
4313 					break; /* the inner for loop */
4314 				}
4315 
4316 				num_importers++;
4317 				rsmsharelock_acquire(seg);
4318 				/*
4319 				 * we've processed all importers that are
4320 				 * siblings of "first"
4321 				 */
4322 				if (num_importers ==
4323 				    seg->s_share->rsmsi_refcnt) {
4324 					rsmsharelock_release(seg);
4325 					rsmseglock_release(seg);
4326 					break;
4327 				}
4328 				rsmsharelock_release(seg);
4329 				rsmseglock_release(seg);
4330 			}
4331 
4332 			/*
4333 			 * All the importers with the same key and
4334 			 * nodeid as "first" have been suspended.
4335 			 * Now suspend the shared connect/mapping.
4336 			 * This is done only once.
4337 			 */
4338 			if (!susp_flg) {
4339 				rsmsegshare_suspend(seg);
4340 			}
4341 		}
4342 	}
4343 
4344 	rw_exit(&rhash->rsmhash_rw);
4345 
4346 	/* send an ACK for SUSPEND message */
4347 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4348 	(void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4349 
4350 
4351 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4352 
4353 }
4354 
4355 static void
4356 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4357 {
4358 	int		recheck_state;
4359 	rsmcookie_t	*hdl;
4360 	DBG_DEFINE(category,
4361 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4362 
4363 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4364 	    "rsmseg_suspend enter: key=%u\n", seg->s_key));
4365 
4366 	*susp_flg = 0;
4367 
4368 	ASSERT(rsmseglock_held(seg));
4369 	/* wait if putv/getv is in progress */
4370 	while (seg->s_rdmacnt > 0)
4371 		cv_wait(&seg->s_cv, &seg->s_lock);
4372 
4373 	do {
4374 		recheck_state = 0;
4375 
4376 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4377 		    "rsmseg_suspend:segment %x state=%d\n",
4378 		    seg->s_key, seg->s_state));
4379 
4380 		switch (seg->s_state) {
4381 		case RSM_STATE_NEW:
4382 			/* not a valid state */
4383 			break;
4384 		case RSM_STATE_CONNECTING:
4385 			seg->s_state = RSM_STATE_ABORT_CONNECT;
4386 			break;
4387 		case RSM_STATE_ABORT_CONNECT:
4388 			break;
4389 		case RSM_STATE_CONNECT:
4390 			seg->s_handle.in = NULL;
4391 			seg->s_state = RSM_STATE_CONN_QUIESCE;
4392 			break;
4393 		case RSM_STATE_MAPPING:
4394 			/* wait until segment leaves the mapping state */
4395 			while (seg->s_state == RSM_STATE_MAPPING)
4396 				cv_wait(&seg->s_cv, &seg->s_lock);
4397 			recheck_state = 1;
4398 			break;
4399 		case RSM_STATE_ACTIVE:
4400 			/* unload the mappings */
4401 			if (seg->s_ckl != NULL) {
4402 				hdl = seg->s_ckl;
4403 				for (; hdl != NULL; hdl = hdl->c_next) {
4404 					(void) devmap_unload(hdl->c_dhp,
4405 					    hdl->c_off, hdl->c_len);
4406 				}
4407 			}
4408 			seg->s_mapinfo = NULL;
4409 			seg->s_state = RSM_STATE_MAP_QUIESCE;
4410 			break;
4411 		case RSM_STATE_CONN_QUIESCE:
4412 			/* FALLTHRU */
4413 		case RSM_STATE_MAP_QUIESCE:
4414 			/* rsmseg_suspend already done for seg */
4415 			*susp_flg = 1;
4416 			break;
4417 		case RSM_STATE_DISCONNECT:
4418 			break;
4419 		default:
4420 			ASSERT(0); /* invalid state */
4421 		}
4422 	} while (recheck_state);
4423 
4424 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4425 }
4426 
4427 static void
4428 rsmsegshare_suspend(rsmseg_t *seg)
4429 {
4430 	int			e;
4431 	adapter_t		*adapter;
4432 	rsm_import_share_t	*sharedp;
4433 	DBG_DEFINE(category,
4434 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4435 
4436 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4437 	    "rsmsegshare_suspend enter\n"));
4438 
4439 	rsmseglock_acquire(seg);
4440 	rsmsharelock_acquire(seg);
4441 
4442 	sharedp = seg->s_share;
4443 	adapter = seg->s_adapter;
4444 	switch (sharedp->rsmsi_state) {
4445 	case RSMSI_STATE_NEW:
4446 		break;
4447 	case RSMSI_STATE_CONNECTING:
4448 		sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4449 		break;
4450 	case RSMSI_STATE_ABORT_CONNECT:
4451 		break;
4452 	case RSMSI_STATE_CONNECTED:
4453 		/* do the rsmpi disconnect */
4454 		if (sharedp->rsmsi_node != my_nodeid) {
4455 			e = adapter->rsmpi_ops->
4456 			    rsm_disconnect(sharedp->rsmsi_handle);
4457 
4458 			DBG_PRINTF((category, RSM_DEBUG,
4459 			    "rsm:rsmpi disconnect seg=%x:err=%d\n",
4460 			    sharedp->rsmsi_segid, e));
4461 		}
4462 
4463 		sharedp->rsmsi_handle = NULL;
4464 
4465 		sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4466 		break;
4467 	case RSMSI_STATE_CONN_QUIESCE:
4468 		break;
4469 	case RSMSI_STATE_MAPPED:
4470 		/* do the rsmpi unmap and disconnect */
4471 		if (sharedp->rsmsi_node != my_nodeid) {
4472 			e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4473 
4474 			DBG_PRINTF((category, RSM_DEBUG,
4475 			    "rsmshare_suspend: rsmpi unmap %d\n", e));
4476 
4477 			e = adapter->rsmpi_ops->
4478 			    rsm_disconnect(sharedp->rsmsi_handle);
4479 			DBG_PRINTF((category, RSM_DEBUG,
4480 			    "rsm:rsmpi disconnect seg=%x:err=%d\n",
4481 			    sharedp->rsmsi_segid, e));
4482 		}
4483 
4484 		sharedp->rsmsi_handle = NULL;
4485 
4486 		sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4487 		break;
4488 	case RSMSI_STATE_MAP_QUIESCE:
4489 		break;
4490 	case RSMSI_STATE_DISCONNECTED:
4491 		break;
4492 	default:
4493 		ASSERT(0); /* invalid state */
4494 	}
4495 
4496 	rsmsharelock_release(seg);
4497 	rsmseglock_release(seg);
4498 
4499 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4500 	    "rsmsegshare_suspend done\n"));
4501 }
4502 
4503 /*
4504  * This should get called on receiving a RESUME message or from
4505  * the pathmanger if the node undergoing DR dies.
4506  */
4507 static void
4508 importer_resume(rsm_node_id_t src_node)
4509 {
4510 	int		i;
4511 	rsmresource_t	*p = NULL;
4512 	rsmhash_table_t *rhash = &rsm_import_segs;
4513 	void		*cookie;
4514 	DBG_DEFINE(category,
4515 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4516 
4517 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4518 
4519 	rw_enter(&rhash->rsmhash_rw, RW_READER);
4520 
4521 	for (i = 0; i < rsm_hash_size; i++) {
4522 		p = rhash->bucket[i];
4523 
4524 		for (; p; p = p->rsmrc_next) {
4525 			rsmseg_t *seg = (rsmseg_t *)p;
4526 
4527 			rsmseglock_acquire(seg);
4528 
4529 			/* process only importers of node undergoing DR */
4530 			if (seg->s_node != src_node) {
4531 				rsmseglock_release(seg);
4532 				continue;
4533 			}
4534 
4535 			if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4536 				rsmipc_request_t	request;
4537 				/*
4538 				 * rsmpi map/connect failed
4539 				 * inform the exporter so that it can
4540 				 * remove the importer.
4541 				 */
4542 				request.rsmipc_hdr.rsmipc_type =
4543 				    RSMIPC_MSG_NOTIMPORTING;
4544 				request.rsmipc_key = seg->s_segid;
4545 				request.rsmipc_segment_cookie = cookie;
4546 				rsmseglock_release(seg);
4547 				(void) rsmipc_send(seg->s_node, &request,
4548 				    RSM_NO_REPLY);
4549 			} else {
4550 				rsmseglock_release(seg);
4551 			}
4552 		}
4553 	}
4554 
4555 	rw_exit(&rhash->rsmhash_rw);
4556 
4557 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4558 }
4559 
4560 static int
4561 rsmseg_resume(rsmseg_t *seg, void **cookie)
4562 {
4563 	int			e;
4564 	int			retc;
4565 	off_t			dev_offset;
4566 	size_t			maplen;
4567 	uint_t			maxprot;
4568 	rsm_mapinfo_t		*p;
4569 	rsmcookie_t		*hdl;
4570 	rsm_import_share_t	*sharedp;
4571 	DBG_DEFINE(category,
4572 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4573 
4574 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4575 	    "rsmseg_resume enter: key=%u\n", seg->s_key));
4576 
4577 	*cookie = NULL;
4578 
4579 	ASSERT(rsmseglock_held(seg));
4580 
4581 	if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4582 	    (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4583 		return (RSM_SUCCESS);
4584 	}
4585 
4586 	sharedp = seg->s_share;
4587 
4588 	rsmsharelock_acquire(seg);
4589 
4590 	/* resume the shared connection and/or mapping */
4591 	retc = rsmsegshare_resume(seg);
4592 
4593 	if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4594 		/* shared state can either be connected or mapped */
4595 		if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4596 		    (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4597 			ASSERT(retc == RSM_SUCCESS);
4598 			seg->s_handle.in = sharedp->rsmsi_handle;
4599 			rsmsharelock_release(seg);
4600 			seg->s_state = RSM_STATE_CONNECT;
4601 
4602 		} else { /* error in rsmpi connect during resume */
4603 			seg->s_handle.in = NULL;
4604 			seg->s_state = RSM_STATE_DISCONNECT;
4605 
4606 			sharedp->rsmsi_refcnt--;
4607 			cookie = (void *)sharedp->rsmsi_cookie;
4608 
4609 			if (sharedp->rsmsi_refcnt == 0) {
4610 				ASSERT(sharedp->rsmsi_mapcnt == 0);
4611 				rsmsharelock_release(seg);
4612 
4613 				/* clean up the shared data structure */
4614 				mutex_destroy(&sharedp->rsmsi_lock);
4615 				cv_destroy(&sharedp->rsmsi_cv);
4616 				kmem_free((void *)(sharedp),
4617 				    sizeof (rsm_import_share_t));
4618 
4619 			} else {
4620 				rsmsharelock_release(seg);
4621 			}
4622 			/*
4623 			 * The following needs to be done after any
4624 			 * rsmsharelock calls which use seg->s_share.
4625 			 */
4626 			seg->s_share = NULL;
4627 		}
4628 
4629 		/* signal any waiting segment */
4630 		cv_broadcast(&seg->s_cv);
4631 
4632 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4633 		    "rsmseg_resume done:state=%d\n", seg->s_state));
4634 		return (retc);
4635 	}
4636 
4637 	ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4638 
4639 	/* Setup protections for remap */
4640 	maxprot = PROT_USER;
4641 	if (seg->s_mode & RSM_PERM_READ) {
4642 		maxprot |= PROT_READ;
4643 	}
4644 	if (seg->s_mode & RSM_PERM_WRITE) {
4645 		maxprot |= PROT_WRITE;
4646 	}
4647 
4648 	if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4649 		/* error in rsmpi connect or map during resume */
4650 
4651 		/* remap to trash page */
4652 		ASSERT(seg->s_ckl != NULL);
4653 
4654 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4655 			e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4656 			    remap_cookie, hdl->c_off, hdl->c_len,
4657 			    maxprot, 0, NULL);
4658 
4659 			DBG_PRINTF((category, RSM_ERR,
4660 			    "rsmseg_resume:remap=%d\n", e));
4661 		}
4662 
4663 		seg->s_handle.in = NULL;
4664 		seg->s_state = RSM_STATE_DISCONNECT;
4665 
4666 		sharedp->rsmsi_refcnt--;
4667 
4668 		sharedp->rsmsi_mapcnt--;
4669 		seg->s_mapinfo = NULL;
4670 
4671 		if (sharedp->rsmsi_refcnt == 0) {
4672 			ASSERT(sharedp->rsmsi_mapcnt == 0);
4673 			rsmsharelock_release(seg);
4674 
4675 			/* clean up the shared data structure */
4676 			mutex_destroy(&sharedp->rsmsi_lock);
4677 			cv_destroy(&sharedp->rsmsi_cv);
4678 			kmem_free((void *)(sharedp),
4679 			    sizeof (rsm_import_share_t));
4680 
4681 		} else {
4682 			rsmsharelock_release(seg);
4683 		}
4684 		/*
4685 		 * The following needs to be done after any
4686 		 * rsmsharelock calls which use seg->s_share.
4687 		 */
4688 		seg->s_share = NULL;
4689 
4690 		/* signal any waiting segment */
4691 		cv_broadcast(&seg->s_cv);
4692 
4693 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4694 		    "rsmseg_resume done:seg=%x,err=%d\n",
4695 		    seg->s_key, retc));
4696 		return (retc);
4697 
4698 	}
4699 
4700 	seg->s_handle.in = sharedp->rsmsi_handle;
4701 
4702 	if (seg->s_node == my_nodeid) { /* loopback */
4703 		ASSERT(seg->s_mapinfo == NULL);
4704 
4705 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4706 			e = devmap_umem_remap(hdl->c_dhp,
4707 			    rsm_dip, seg->s_cookie,
4708 			    hdl->c_off, hdl->c_len,
4709 			    maxprot, 0, NULL);
4710 
4711 			DBG_PRINTF((category, RSM_ERR,
4712 			    "rsmseg_resume:remap=%d\n", e));
4713 		}
4714 	} else { /* remote exporter */
4715 		/* remap to the new rsmpi maps */
4716 		seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4717 
4718 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4719 			p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4720 			    &dev_offset, &maplen);
4721 			e = devmap_devmem_remap(hdl->c_dhp,
4722 			    p->dip, p->dev_register, dev_offset,
4723 			    maplen, maxprot, 0, NULL);
4724 
4725 			DBG_PRINTF((category, RSM_ERR,
4726 			    "rsmseg_resume:remap=%d\n", e));
4727 		}
4728 	}
4729 
4730 	rsmsharelock_release(seg);
4731 
4732 	seg->s_state = RSM_STATE_ACTIVE;
4733 	cv_broadcast(&seg->s_cv);
4734 
4735 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4736 
4737 	return (retc);
4738 }
4739 
4740 static int
4741 rsmsegshare_resume(rsmseg_t *seg)
4742 {
4743 	int			e = RSM_SUCCESS;
4744 	adapter_t		*adapter;
4745 	rsm_import_share_t	*sharedp;
4746 	DBG_DEFINE(category,
4747 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4748 
4749 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4750 
4751 	ASSERT(rsmseglock_held(seg));
4752 	ASSERT(rsmsharelock_held(seg));
4753 
4754 	sharedp = seg->s_share;
4755 
4756 	/*
4757 	 * If we are not in a xxxx_QUIESCE state that means shared
4758 	 * connect/mapping processing has been already been done
4759 	 * so return success.
4760 	 */
4761 	if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4762 	    (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4763 		return (RSM_SUCCESS);
4764 	}
4765 
4766 	adapter = seg->s_adapter;
4767 
4768 	if (sharedp->rsmsi_node != my_nodeid) {
4769 		rsm_addr_t	hwaddr;
4770 		hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4771 
4772 		e = adapter->rsmpi_ops->rsm_connect(
4773 		    adapter->rsmpi_handle, hwaddr,
4774 		    sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4775 
4776 		DBG_PRINTF((category, RSM_DEBUG,
4777 		    "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4778 		    sharedp->rsmsi_segid, e));
4779 
4780 		if (e != RSM_SUCCESS) {
4781 			/* when do we send the NOT_IMPORTING message */
4782 			sharedp->rsmsi_handle = NULL;
4783 			sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4784 			/* signal any waiting segment */
4785 			cv_broadcast(&sharedp->rsmsi_cv);
4786 			return (e);
4787 		}
4788 	}
4789 
4790 	if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4791 		sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4792 		/* signal any waiting segment */
4793 		cv_broadcast(&sharedp->rsmsi_cv);
4794 		return (e);
4795 	}
4796 
4797 	ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4798 
4799 	/* do the rsmpi map of the whole segment here */
4800 	if (sharedp->rsmsi_node != my_nodeid) {
4801 		size_t mapped_len;
4802 		rsm_mapinfo_t *p;
4803 
4804 		/*
4805 		 * We need to do rsmpi maps with <off, lens> identical to
4806 		 * the old mapinfo list because the segment mapping handles
4807 		 * dhp and such need the fragmentation of rsmpi maps to be
4808 		 * identical to what it was during the mmap of the segment
4809 		 */
4810 		p = sharedp->rsmsi_mapinfo;
4811 
4812 		while (p != NULL) {
4813 			mapped_len = 0;
4814 
4815 			e = adapter->rsmpi_ops->rsm_map(
4816 			    sharedp->rsmsi_handle, p->start_offset,
4817 			    p->individual_len, &mapped_len,
4818 			    &p->dip, &p->dev_register, &p->dev_offset,
4819 			    NULL, NULL);
4820 
4821 			if (e != 0) {
4822 				DBG_PRINTF((category, RSM_ERR,
4823 				    "rsmsegshare_resume: rsmpi map err=%d\n",
4824 				    e));
4825 				break;
4826 			}
4827 
4828 			if (mapped_len != p->individual_len) {
4829 				DBG_PRINTF((category, RSM_ERR,
4830 				    "rsmsegshare_resume: rsmpi maplen"
4831 				    "< reqlen=%lx\n", mapped_len));
4832 				e = RSMERR_BAD_LENGTH;
4833 				break;
4834 			}
4835 
4836 			p = p->next;
4837 
4838 		}
4839 
4840 
4841 		if (e != RSM_SUCCESS) { /* rsmpi map failed */
4842 			int	err;
4843 			/* Check if this is the first rsm_map */
4844 			if (p != sharedp->rsmsi_mapinfo) {
4845 				/*
4846 				 * A single rsm_unmap undoes multiple rsm_maps.
4847 				 */
4848 				(void) seg->s_adapter->rsmpi_ops->
4849 				    rsm_unmap(sharedp->rsmsi_handle);
4850 			}
4851 
4852 			rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4853 			sharedp->rsmsi_mapinfo = NULL;
4854 
4855 			err = adapter->rsmpi_ops->
4856 			    rsm_disconnect(sharedp->rsmsi_handle);
4857 
4858 			DBG_PRINTF((category, RSM_DEBUG,
4859 			    "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4860 			    sharedp->rsmsi_segid, err));
4861 
4862 			sharedp->rsmsi_handle = NULL;
4863 			sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4864 
4865 			/* signal the waiting segments */
4866 			cv_broadcast(&sharedp->rsmsi_cv);
4867 			DBG_PRINTF((category, RSM_DEBUG,
4868 			    "rsmsegshare_resume done: rsmpi map err\n"));
4869 			return (e);
4870 		}
4871 	}
4872 
4873 	sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4874 
4875 	/* signal any waiting segment */
4876 	cv_broadcast(&sharedp->rsmsi_cv);
4877 
4878 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4879 
4880 	return (e);
4881 }
4882 
4883 /*
4884  * this is the routine that gets called by recv_taskq which is the
4885  * thread that processes messages that are flow-controlled.
4886  */
4887 static void
4888 rsm_intr_proc_deferred(void *arg)
4889 {
4890 	path_t			*path = (path_t *)arg;
4891 	rsmipc_request_t	*msg;
4892 	rsmipc_msghdr_t		*msghdr;
4893 	rsm_node_id_t		src_node;
4894 	msgbuf_elem_t		*head;
4895 	int			e;
4896 	DBG_DEFINE(category,
4897 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4898 
4899 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4900 	    "rsm_intr_proc_deferred enter\n"));
4901 
4902 	mutex_enter(&path->mutex);
4903 
4904 	/* use the head of the msgbuf_queue */
4905 	head = rsmka_gethead_msgbuf(path);
4906 
4907 	mutex_exit(&path->mutex);
4908 
4909 	msg = (rsmipc_request_t *)&(head->msg);
4910 	msghdr = (rsmipc_msghdr_t *)msg;
4911 
4912 	src_node = msghdr->rsmipc_src;
4913 
4914 	/*
4915 	 * messages that need to send a reply should check the message version
4916 	 * before processing the message. And all messages that need to
4917 	 * send a reply should be processed here by the worker thread.
4918 	 */
4919 	switch (msghdr->rsmipc_type) {
4920 	case RSMIPC_MSG_SEGCONNECT:
4921 		if (msghdr->rsmipc_version != RSM_VERSION) {
4922 			rsmipc_reply_t reply;
4923 			reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4924 			reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4925 			reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4926 			(void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4927 		} else {
4928 			rsm_intr_segconnect(src_node, msg);
4929 		}
4930 		break;
4931 	case RSMIPC_MSG_DISCONNECT:
4932 		rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4933 		break;
4934 	case RSMIPC_MSG_SUSPEND:
4935 		importer_suspend(src_node);
4936 		break;
4937 	case RSMIPC_MSG_SUSPEND_DONE:
4938 		rsm_suspend_complete(src_node, 0);
4939 		break;
4940 	case RSMIPC_MSG_RESUME:
4941 		importer_resume(src_node);
4942 		break;
4943 	default:
4944 		ASSERT(0);
4945 	}
4946 
4947 	mutex_enter(&path->mutex);
4948 
4949 	rsmka_dequeue_msgbuf(path);
4950 
4951 	/* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4952 	if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4953 		path->procmsg_cnt++;
4954 
4955 	ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4956 
4957 	/* No need to send credits if path is going down */
4958 	if ((path->state == RSMKA_PATH_ACTIVE) &&
4959 	    (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4960 		/*
4961 		 * send credits and reset procmsg_cnt if success otherwise
4962 		 * credits will be sent after processing the next message
4963 		 */
4964 		e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4965 		if (e == 0)
4966 			path->procmsg_cnt = 0;
4967 		else
4968 			DBG_PRINTF((category, RSM_ERR,
4969 			    "rsm_intr_proc_deferred:send credits err=%d\n", e));
4970 	}
4971 
4972 	/*
4973 	 * decrement the path refcnt since we incremented it in
4974 	 * rsm_intr_callback_dispatch
4975 	 */
4976 	PATH_RELE_NOLOCK(path);
4977 
4978 	mutex_exit(&path->mutex);
4979 
4980 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4981 	    "rsm_intr_proc_deferred done\n"));
4982 }
4983 
4984 /*
4985  * Flow-controlled messages are enqueued and dispatched onto a taskq here
4986  */
4987 static void
4988 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4989     rsm_intr_hand_arg_t arg)
4990 {
4991 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
4992 	path_t			*path;
4993 	rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4994 	DBG_DEFINE(category,
4995 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4996 
4997 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4998 	    "rsm_intr_callback_dispatch enter\n"));
4999 	ASSERT(data && hdlr_argp);
5000 
5001 	/* look up the path - incr the path refcnt */
5002 	path = rsm_find_path(hdlr_argp->adapter_name,
5003 	    hdlr_argp->adapter_instance, src_hwaddr);
5004 
5005 	/* the path has been removed - drop this message */
5006 	if (path == NULL) {
5007 		DBG_PRINTF((category, RSM_DEBUG,
5008 		    "rsm_intr_callback_dispatch done: msg dropped\n"));
5009 		return;
5010 	}
5011 	/* the path is not active - don't accept new messages */
5012 	if (path->state != RSMKA_PATH_ACTIVE) {
5013 		PATH_RELE_NOLOCK(path);
5014 		mutex_exit(&path->mutex);
5015 		DBG_PRINTF((category, RSM_DEBUG,
5016 		    "rsm_intr_callback_dispatch done: msg dropped"
5017 		    " path=%lx !ACTIVE\n", path));
5018 		return;
5019 	}
5020 
5021 	/*
5022 	 * Check if this message was sent to an older incarnation
5023 	 * of the path/sendq.
5024 	 */
5025 	if (path->local_incn != msghdr->rsmipc_incn) {
5026 		/* decrement the refcnt */
5027 		PATH_RELE_NOLOCK(path);
5028 		mutex_exit(&path->mutex);
5029 		DBG_PRINTF((category, RSM_DEBUG,
5030 		    "rsm_intr_callback_dispatch done: old incn %lld\n",
5031 		    msghdr->rsmipc_incn));
5032 		return;
5033 	}
5034 
5035 	/* copy and enqueue msg on the path's msgbuf queue */
5036 	rsmka_enqueue_msgbuf(path, data);
5037 
5038 	/*
5039 	 * schedule task to process messages - ignore retval from
5040 	 * task_dispatch because we sender cannot send more than
5041 	 * what receiver can handle.
5042 	 */
5043 	(void) taskq_dispatch(path->recv_taskq,
5044 	    rsm_intr_proc_deferred, path, KM_NOSLEEP);
5045 
5046 	mutex_exit(&path->mutex);
5047 
5048 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5049 	    "rsm_intr_callback_dispatch done\n"));
5050 }
5051 
5052 /*
5053  * This procedure is called from rsm_srv_func when a remote node creates a
5054  * a send queue.  This event is used as a hint that an  earlier failed
5055  * attempt to create a send queue to that remote node may now succeed and
5056  * should be retried.  Indication of an earlier failed attempt is provided
5057  * by the RSMKA_SQCREATE_PENDING flag.
5058  */
5059 static void
5060 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5061 {
5062 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
5063 	path_t			*path;
5064 	DBG_DEFINE(category,
5065 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5066 
5067 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5068 	    "rsm_sqcreateop_callback enter\n"));
5069 
5070 	/* look up the path - incr the path refcnt */
5071 	path = rsm_find_path(hdlr_argp->adapter_name,
5072 	    hdlr_argp->adapter_instance, src_hwaddr);
5073 
5074 	if (path == NULL) {
5075 		DBG_PRINTF((category, RSM_DEBUG,
5076 		    "rsm_sqcreateop_callback done: no path\n"));
5077 		return;
5078 	}
5079 
5080 	if ((path->state == RSMKA_PATH_UP) &&
5081 	    (path->flags & RSMKA_SQCREATE_PENDING)) {
5082 		/*
5083 		 * previous attempt to create sendq had failed, retry
5084 		 * it and move to RSMKA_PATH_ACTIVE state if successful.
5085 		 * the refcnt will be decremented in the do_deferred_work
5086 		 */
5087 		(void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5088 	} else {
5089 		/* decrement the refcnt */
5090 		PATH_RELE_NOLOCK(path);
5091 	}
5092 	mutex_exit(&path->mutex);
5093 
5094 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5095 	    "rsm_sqcreateop_callback done\n"));
5096 }
5097 
5098 static void
5099 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5100 {
5101 	rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5102 	rsmipc_request_t *msg = (rsmipc_request_t *)data;
5103 	rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5104 	rsm_node_id_t src_node;
5105 	DBG_DEFINE(category,
5106 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5107 
5108 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5109 	    "src=%d, type=%d\n", msghdr->rsmipc_src,
5110 	    msghdr->rsmipc_type));
5111 
5112 	/*
5113 	 * Check for the version number in the msg header. If it is not
5114 	 * RSM_VERSION, drop the message. In the future, we need to manage
5115 	 * incompatible version numbers in some way
5116 	 */
5117 	if (msghdr->rsmipc_version != RSM_VERSION) {
5118 		DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5119 		/*
5120 		 * Drop requests that don't have a reply right here
5121 		 * Request with reply will send a BAD_VERSION reply
5122 		 * when they get processed by the worker thread.
5123 		 */
5124 		if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5125 			return;
5126 		}
5127 
5128 	}
5129 
5130 	src_node = msghdr->rsmipc_src;
5131 
5132 	switch (msghdr->rsmipc_type) {
5133 	case RSMIPC_MSG_SEGCONNECT:
5134 	case RSMIPC_MSG_DISCONNECT:
5135 	case RSMIPC_MSG_SUSPEND:
5136 	case RSMIPC_MSG_SUSPEND_DONE:
5137 	case RSMIPC_MSG_RESUME:
5138 		/*
5139 		 * These message types are handled by a worker thread using
5140 		 * the flow-control algorithm.
5141 		 * Any message processing that does one or more of the
5142 		 * following should be handled in a worker thread.
5143 		 *	- allocates resources and might sleep
5144 		 *	- makes RSMPI calls down to the interconnect driver
5145 		 *	this by defn include requests with reply.
5146 		 *	- takes a long duration of time
5147 		 */
5148 		rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5149 		break;
5150 	case RSMIPC_MSG_NOTIMPORTING:
5151 		importer_list_rm(src_node, msg->rsmipc_key,
5152 		    msg->rsmipc_segment_cookie);
5153 		break;
5154 	case RSMIPC_MSG_SQREADY:
5155 		rsm_proc_sqready(data, src_hwaddr, arg);
5156 		break;
5157 	case RSMIPC_MSG_SQREADY_ACK:
5158 		rsm_proc_sqready_ack(data, src_hwaddr, arg);
5159 		break;
5160 	case RSMIPC_MSG_CREDIT:
5161 		rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5162 		break;
5163 	case RSMIPC_MSG_REPLY:
5164 		rsm_intr_reply(msghdr);
5165 		break;
5166 	case RSMIPC_MSG_BELL:
5167 		rsm_intr_event(msg);
5168 		break;
5169 	case RSMIPC_MSG_IMPORTING:
5170 		importer_list_add(src_node, msg->rsmipc_key,
5171 		    msg->rsmipc_adapter_hwaddr,
5172 		    msg->rsmipc_segment_cookie);
5173 		break;
5174 	case RSMIPC_MSG_REPUBLISH:
5175 		importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5176 		break;
5177 	default:
5178 		DBG_PRINTF((category, RSM_DEBUG,
5179 		    "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5180 		    (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5181 	}
5182 
5183 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5184 
5185 }
5186 
5187 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5188     rsm_intr_q_op_t opcode, rsm_addr_t src,
5189     void *data, size_t size, rsm_intr_hand_arg_t arg)
5190 {
5191 	DBG_DEFINE(category,
5192 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5193 
5194 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5195 
5196 	switch (opcode) {
5197 	case RSM_INTR_Q_OP_CREATE:
5198 		DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5199 		rsm_sqcreateop_callback(src, arg);
5200 		break;
5201 	case RSM_INTR_Q_OP_DESTROY:
5202 		DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5203 		break;
5204 	case RSM_INTR_Q_OP_RECEIVE:
5205 		rsm_intr_callback(data, src, arg);
5206 		break;
5207 	default:
5208 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5209 		    "rsm_srv_func: unknown opcode = %x\n", opcode));
5210 	}
5211 
5212 	chd = chd;
5213 	size = size;
5214 
5215 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5216 
5217 	return (RSM_INTR_HAND_CLAIMED);
5218 }
5219 
5220 /* *************************** IPC slots ************************* */
5221 static rsmipc_slot_t *
5222 rsmipc_alloc()
5223 {
5224 	int i;
5225 	rsmipc_slot_t *slot;
5226 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5227 
5228 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5229 
5230 	/* try to find a free slot, if not wait */
5231 	mutex_enter(&rsm_ipc.lock);
5232 
5233 	while (rsm_ipc.count == 0) {
5234 		rsm_ipc.wanted = 1;
5235 		cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5236 	}
5237 
5238 	/* An empty slot is available, find it */
5239 	slot = &rsm_ipc.slots[0];
5240 	for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5241 		if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5242 			RSMIPC_CLEAR(slot, RSMIPC_FREE);
5243 			break;
5244 		}
5245 	}
5246 
5247 	ASSERT(i < RSMIPC_SZ);
5248 	rsm_ipc.count--;	/* one less is available */
5249 	rsm_ipc.sequence++; /* new sequence */
5250 
5251 	slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5252 	slot->rsmipc_cookie.ic.index = (uint_t)i;
5253 
5254 	mutex_exit(&rsm_ipc.lock);
5255 
5256 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5257 
5258 	return (slot);
5259 }
5260 
5261 static void
5262 rsmipc_free(rsmipc_slot_t *slot)
5263 {
5264 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5265 
5266 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5267 
5268 	ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5269 	ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5270 
5271 	mutex_enter(&rsm_ipc.lock);
5272 
5273 	RSMIPC_SET(slot, RSMIPC_FREE);
5274 
5275 	slot->rsmipc_cookie.ic.sequence = 0;
5276 
5277 	mutex_exit(&slot->rsmipc_lock);
5278 	rsm_ipc.count++;
5279 	ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5280 	if (rsm_ipc.wanted) {
5281 		rsm_ipc.wanted = 0;
5282 		cv_broadcast(&rsm_ipc.cv);
5283 	}
5284 
5285 	mutex_exit(&rsm_ipc.lock);
5286 
5287 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5288 }
5289 
5290 static int
5291 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5292 {
5293 	int		e = 0;
5294 	int		credit_check = 0;
5295 	int		retry_cnt = 0;
5296 	int		min_retry_cnt = 10;
5297 	clock_t		ticks;
5298 	rsm_send_t	is;
5299 	rsmipc_slot_t	*rslot;
5300 	adapter_t	*adapter;
5301 	path_t		*path;
5302 	sendq_token_t	*sendq_token;
5303 	sendq_token_t	*used_sendq_token = NULL;
5304 	rsm_send_q_handle_t	ipc_handle;
5305 	DBG_DEFINE(category,
5306 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5307 
5308 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5309 	    dest));
5310 
5311 	/*
5312 	 * Check if this is a local case
5313 	 */
5314 	if (dest == my_nodeid) {
5315 		switch (req->rsmipc_hdr.rsmipc_type) {
5316 		case RSMIPC_MSG_SEGCONNECT:
5317 			reply->rsmipc_status = (short)rsmsegacl_validate(
5318 			    req, dest, reply);
5319 			break;
5320 		case RSMIPC_MSG_BELL:
5321 			req->rsmipc_hdr.rsmipc_src = dest;
5322 			rsm_intr_event(req);
5323 			break;
5324 		case RSMIPC_MSG_IMPORTING:
5325 			importer_list_add(dest, req->rsmipc_key,
5326 			    req->rsmipc_adapter_hwaddr,
5327 			    req->rsmipc_segment_cookie);
5328 			break;
5329 		case RSMIPC_MSG_NOTIMPORTING:
5330 			importer_list_rm(dest, req->rsmipc_key,
5331 			    req->rsmipc_segment_cookie);
5332 			break;
5333 		case RSMIPC_MSG_REPUBLISH:
5334 			importer_update(dest, req->rsmipc_key,
5335 			    req->rsmipc_perm);
5336 			break;
5337 		case RSMIPC_MSG_SUSPEND:
5338 			importer_suspend(dest);
5339 			break;
5340 		case RSMIPC_MSG_SUSPEND_DONE:
5341 			rsm_suspend_complete(dest, 0);
5342 			break;
5343 		case RSMIPC_MSG_RESUME:
5344 			importer_resume(dest);
5345 			break;
5346 		default:
5347 			ASSERT(0);
5348 		}
5349 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5350 		    "rsmipc_send done\n"));
5351 		return (0);
5352 	}
5353 
5354 	if (dest >= MAX_NODES) {
5355 		DBG_PRINTF((category, RSM_ERR,
5356 		    "rsm: rsmipc_send bad node number %x\n", dest));
5357 		return (RSMERR_REMOTE_NODE_UNREACHABLE);
5358 	}
5359 
5360 	/*
5361 	 * Oh boy! we are going remote.
5362 	 */
5363 
5364 	/*
5365 	 * identify if we need to have credits to send this message
5366 	 * - only selected requests are flow controlled
5367 	 */
5368 	if (req != NULL) {
5369 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5370 		    "rsmipc_send:request type=%d\n",
5371 		    req->rsmipc_hdr.rsmipc_type));
5372 
5373 		switch (req->rsmipc_hdr.rsmipc_type) {
5374 		case RSMIPC_MSG_SEGCONNECT:
5375 		case RSMIPC_MSG_DISCONNECT:
5376 		case RSMIPC_MSG_IMPORTING:
5377 		case RSMIPC_MSG_SUSPEND:
5378 		case RSMIPC_MSG_SUSPEND_DONE:
5379 		case RSMIPC_MSG_RESUME:
5380 			credit_check = 1;
5381 			break;
5382 		default:
5383 			credit_check = 0;
5384 		}
5385 	}
5386 
5387 again:
5388 	if (retry_cnt++ == min_retry_cnt) {
5389 		/* backoff before further retries for 10ms */
5390 		delay(drv_usectohz(10000));
5391 		retry_cnt = 0; /* reset retry_cnt */
5392 	}
5393 	sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5394 	if (sendq_token == NULL) {
5395 		DBG_PRINTF((category, RSM_ERR,
5396 		    "rsm: rsmipc_send no device to reach node %d\n", dest));
5397 		return (RSMERR_REMOTE_NODE_UNREACHABLE);
5398 	}
5399 
5400 	if ((sendq_token == used_sendq_token) &&
5401 	    ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5402 	    (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5403 		rele_sendq_token(sendq_token);
5404 		DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5405 		return (RSMERR_CONN_ABORTED);
5406 	} else
5407 		used_sendq_token = sendq_token;
5408 
5409 /* lint -save -e413 */
5410 	path = SQ_TOKEN_TO_PATH(sendq_token);
5411 	adapter = path->local_adapter;
5412 /* lint -restore */
5413 	ipc_handle = sendq_token->rsmpi_sendq_handle;
5414 
5415 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5416 	    "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5417 
5418 	if (reply == NULL) {
5419 		/* Send request without ack */
5420 		/*
5421 		 * Set the rsmipc_version number in the msghdr for KA
5422 		 * communication versioning
5423 		 */
5424 		req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5425 		req->rsmipc_hdr.rsmipc_src = my_nodeid;
5426 		/*
5427 		 * remote endpoints incn should match the value in our
5428 		 * path's remote_incn field. No need to grab any lock
5429 		 * since we have refcnted the path in rsmka_get_sendq_token
5430 		 */
5431 		req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5432 
5433 		is.is_data = (void *)req;
5434 		is.is_size = sizeof (*req);
5435 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5436 		is.is_wait = 0;
5437 
5438 		if (credit_check) {
5439 			mutex_enter(&path->mutex);
5440 			/*
5441 			 * wait till we recv credits or path goes down. If path
5442 			 * goes down rsm_send will fail and we handle the error
5443 			 * then
5444 			 */
5445 			while ((sendq_token->msgbuf_avail == 0) &&
5446 			    (path->state == RSMKA_PATH_ACTIVE)) {
5447 				e = cv_wait_sig(&sendq_token->sendq_cv,
5448 				    &path->mutex);
5449 				if (e == 0) {
5450 					mutex_exit(&path->mutex);
5451 					no_reply_cnt++;
5452 					rele_sendq_token(sendq_token);
5453 					DBG_PRINTF((category, RSM_DEBUG,
5454 					    "rsmipc_send done: "
5455 					    "cv_wait INTERRUPTED"));
5456 					return (RSMERR_INTERRUPTED);
5457 				}
5458 			}
5459 
5460 			/*
5461 			 * path is not active retry on another path.
5462 			 */
5463 			if (path->state != RSMKA_PATH_ACTIVE) {
5464 				mutex_exit(&path->mutex);
5465 				rele_sendq_token(sendq_token);
5466 				e = RSMERR_CONN_ABORTED;
5467 				DBG_PRINTF((category, RSM_ERR,
5468 				    "rsm: rsmipc_send: path !ACTIVE"));
5469 				goto again;
5470 			}
5471 
5472 			ASSERT(sendq_token->msgbuf_avail > 0);
5473 
5474 			/*
5475 			 * reserve a msgbuf
5476 			 */
5477 			sendq_token->msgbuf_avail--;
5478 
5479 			mutex_exit(&path->mutex);
5480 
5481 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5482 			    NULL);
5483 
5484 			if (e != RSM_SUCCESS) {
5485 				mutex_enter(&path->mutex);
5486 				/*
5487 				 * release the reserved msgbuf since
5488 				 * the send failed
5489 				 */
5490 				sendq_token->msgbuf_avail++;
5491 				cv_broadcast(&sendq_token->sendq_cv);
5492 				mutex_exit(&path->mutex);
5493 			}
5494 		} else
5495 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5496 			    NULL);
5497 
5498 		no_reply_cnt++;
5499 		rele_sendq_token(sendq_token);
5500 		if (e != RSM_SUCCESS) {
5501 			DBG_PRINTF((category, RSM_ERR,
5502 			    "rsm: rsmipc_send no reply send"
5503 			    " err = %d no reply count = %d\n",
5504 			    e, no_reply_cnt));
5505 			ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5506 			    e != RSMERR_BAD_BARRIER_HNDL);
5507 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5508 			goto again;
5509 		} else {
5510 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5511 			    "rsmipc_send done\n"));
5512 			return (e);
5513 		}
5514 
5515 	}
5516 
5517 	if (req == NULL) {
5518 		/* Send reply - No flow control is done for reply */
5519 		/*
5520 		 * Set the version in the msg header for KA communication
5521 		 * versioning
5522 		 */
5523 		reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5524 		reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5525 		/* incn number is not used for reply msgs currently */
5526 		reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5527 
5528 		is.is_data = (void *)reply;
5529 		is.is_size = sizeof (*reply);
5530 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5531 		is.is_wait = 0;
5532 		e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5533 		rele_sendq_token(sendq_token);
5534 		if (e != RSM_SUCCESS) {
5535 			DBG_PRINTF((category, RSM_ERR,
5536 			    "rsm: rsmipc_send reply send"
5537 			    " err = %d\n", e));
5538 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5539 			goto again;
5540 		} else {
5541 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5542 			    "rsmipc_send done\n"));
5543 			return (e);
5544 		}
5545 	}
5546 
5547 	/* Reply needed */
5548 	rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5549 
5550 	mutex_enter(&rslot->rsmipc_lock);
5551 
5552 	rslot->rsmipc_data = (void *)reply;
5553 	RSMIPC_SET(rslot, RSMIPC_PENDING);
5554 
5555 	while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5556 		/*
5557 		 * Set the rsmipc_version number in the msghdr for KA
5558 		 * communication versioning
5559 		 */
5560 		req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5561 		req->rsmipc_hdr.rsmipc_src = my_nodeid;
5562 		req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5563 		/*
5564 		 * remote endpoints incn should match the value in our
5565 		 * path's remote_incn field. No need to grab any lock
5566 		 * since we have refcnted the path in rsmka_get_sendq_token
5567 		 */
5568 		req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5569 
5570 		is.is_data = (void *)req;
5571 		is.is_size = sizeof (*req);
5572 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5573 		is.is_wait = 0;
5574 		if (credit_check) {
5575 
5576 			mutex_enter(&path->mutex);
5577 			/*
5578 			 * wait till we recv credits or path goes down. If path
5579 			 * goes down rsm_send will fail and we handle the error
5580 			 * then.
5581 			 */
5582 			while ((sendq_token->msgbuf_avail == 0) &&
5583 			    (path->state == RSMKA_PATH_ACTIVE)) {
5584 				e = cv_wait_sig(&sendq_token->sendq_cv,
5585 				    &path->mutex);
5586 				if (e == 0) {
5587 					mutex_exit(&path->mutex);
5588 					RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5589 					rsmipc_free(rslot);
5590 					rele_sendq_token(sendq_token);
5591 					DBG_PRINTF((category, RSM_DEBUG,
5592 					    "rsmipc_send done: "
5593 					    "cv_wait INTERRUPTED"));
5594 					return (RSMERR_INTERRUPTED);
5595 				}
5596 			}
5597 
5598 			/*
5599 			 * path is not active retry on another path.
5600 			 */
5601 			if (path->state != RSMKA_PATH_ACTIVE) {
5602 				mutex_exit(&path->mutex);
5603 				RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5604 				rsmipc_free(rslot);
5605 				rele_sendq_token(sendq_token);
5606 				e = RSMERR_CONN_ABORTED;
5607 				DBG_PRINTF((category, RSM_ERR,
5608 				    "rsm: rsmipc_send: path !ACTIVE"));
5609 				goto again;
5610 			}
5611 
5612 			ASSERT(sendq_token->msgbuf_avail > 0);
5613 
5614 			/*
5615 			 * reserve a msgbuf
5616 			 */
5617 			sendq_token->msgbuf_avail--;
5618 
5619 			mutex_exit(&path->mutex);
5620 
5621 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5622 			    NULL);
5623 
5624 			if (e != RSM_SUCCESS) {
5625 				mutex_enter(&path->mutex);
5626 				/*
5627 				 * release the reserved msgbuf since
5628 				 * the send failed
5629 				 */
5630 				sendq_token->msgbuf_avail++;
5631 				cv_broadcast(&sendq_token->sendq_cv);
5632 				mutex_exit(&path->mutex);
5633 			}
5634 		} else
5635 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5636 			    NULL);
5637 
5638 		if (e != RSM_SUCCESS) {
5639 			DBG_PRINTF((category, RSM_ERR,
5640 			    "rsm: rsmipc_send rsmpi send err = %d\n", e));
5641 			RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5642 			rsmipc_free(rslot);
5643 			rele_sendq_token(sendq_token);
5644 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5645 			goto again;
5646 		}
5647 
5648 		/* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5649 		(void) drv_getparm(LBOLT, &ticks);
5650 		ticks += drv_usectohz(5000000);
5651 		e = cv_timedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5652 		    ticks);
5653 		if (e < 0) {
5654 			/* timed out - retry */
5655 			e = RSMERR_TIMEOUT;
5656 		} else if (e == 0) {
5657 			/* signalled - return error */
5658 			e = RSMERR_INTERRUPTED;
5659 			break;
5660 		} else {
5661 			e = RSM_SUCCESS;
5662 		}
5663 	}
5664 
5665 	RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5666 	rsmipc_free(rslot);
5667 	rele_sendq_token(sendq_token);
5668 
5669 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5670 	return (e);
5671 }
5672 
5673 static int
5674 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,  void *cookie)
5675 {
5676 	rsmipc_request_t request;
5677 
5678 	/*
5679 	 *  inform the exporter to delete this importer
5680 	 */
5681 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5682 	request.rsmipc_key = segid;
5683 	request.rsmipc_segment_cookie = cookie;
5684 	return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5685 }
5686 
5687 static void
5688 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t	*acl,
5689     int acl_len, rsm_permission_t default_permission)
5690 {
5691 	int			i;
5692 	importing_token_t	*token;
5693 	rsmipc_request_t	request;
5694 	republish_token_t	*republish_list = NULL;
5695 	republish_token_t	*rp;
5696 	rsm_permission_t	permission;
5697 	int			index;
5698 
5699 	/*
5700 	 * send the new access mode to all the nodes that have imported
5701 	 * this segment.
5702 	 * If the new acl does not have a node that was present in
5703 	 * the old acl a access permission of 0 is sent.
5704 	 */
5705 
5706 	index = rsmhash(segid);
5707 
5708 	/*
5709 	 * create a list of node/permissions to send the republish message
5710 	 */
5711 	mutex_enter(&importer_list.lock);
5712 
5713 	token = importer_list.bucket[index];
5714 	while (token != NULL) {
5715 		if (segid == token->key) {
5716 			permission = default_permission;
5717 
5718 			for (i = 0; i < acl_len; i++) {
5719 				if (token->importing_node == acl[i].ae_node) {
5720 					permission = acl[i].ae_permission;
5721 					break;
5722 				}
5723 			}
5724 			rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5725 
5726 			rp->key = segid;
5727 			rp->importing_node = token->importing_node;
5728 			rp->permission = permission;
5729 			rp->next = republish_list;
5730 			republish_list = rp;
5731 		}
5732 		token = token->next;
5733 	}
5734 
5735 	mutex_exit(&importer_list.lock);
5736 
5737 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5738 	request.rsmipc_key = segid;
5739 
5740 	while (republish_list != NULL) {
5741 		request.rsmipc_perm = republish_list->permission;
5742 		(void) rsmipc_send(republish_list->importing_node,
5743 		    &request, RSM_NO_REPLY);
5744 		rp = republish_list;
5745 		republish_list = republish_list->next;
5746 		kmem_free(rp, sizeof (republish_token_t));
5747 	}
5748 }
5749 
5750 static void
5751 rsm_send_suspend()
5752 {
5753 	int			i, e;
5754 	rsmipc_request_t 	request;
5755 	list_element_t		*tokp;
5756 	list_element_t		*head = NULL;
5757 	importing_token_t	*token;
5758 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5759 	    "rsm_send_suspend enter\n"));
5760 
5761 	/*
5762 	 * create a list of node to send the suspend message
5763 	 *
5764 	 * Currently the whole importer list is scanned and we obtain
5765 	 * all the nodes - this basically gets all nodes that at least
5766 	 * import one segment from the local node.
5767 	 *
5768 	 * no need to grab the rsm_suspend_list lock here since we are
5769 	 * single threaded when suspend is called.
5770 	 */
5771 
5772 	mutex_enter(&importer_list.lock);
5773 	for (i = 0; i < rsm_hash_size; i++) {
5774 
5775 		token = importer_list.bucket[i];
5776 
5777 		while (token != NULL) {
5778 
5779 			tokp = head;
5780 
5781 			/*
5782 			 * make sure that the token's node
5783 			 * is not already on the suspend list
5784 			 */
5785 			while (tokp != NULL) {
5786 				if (tokp->nodeid == token->importing_node) {
5787 					break;
5788 				}
5789 				tokp = tokp->next;
5790 			}
5791 
5792 			if (tokp == NULL) { /* not in suspend list */
5793 				tokp = kmem_zalloc(sizeof (list_element_t),
5794 				    KM_SLEEP);
5795 				tokp->nodeid = token->importing_node;
5796 				tokp->next = head;
5797 				head = tokp;
5798 			}
5799 
5800 			token = token->next;
5801 		}
5802 	}
5803 	mutex_exit(&importer_list.lock);
5804 
5805 	if (head == NULL) { /* no importers so go ahead and quiesce segments */
5806 		exporter_quiesce();
5807 		return;
5808 	}
5809 
5810 	mutex_enter(&rsm_suspend_list.list_lock);
5811 	ASSERT(rsm_suspend_list.list_head == NULL);
5812 	/*
5813 	 * update the suspend list righaway so that if a node dies the
5814 	 * pathmanager can set the NODE dead flag
5815 	 */
5816 	rsm_suspend_list.list_head = head;
5817 	mutex_exit(&rsm_suspend_list.list_lock);
5818 
5819 	tokp = head;
5820 
5821 	while (tokp != NULL) {
5822 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5823 		e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5824 		/*
5825 		 * Error in rsmipc_send currently happens due to inaccessibility
5826 		 * of the remote node.
5827 		 */
5828 		if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5829 			tokp->flags |= RSM_SUSPEND_ACKPENDING;
5830 		}
5831 
5832 		tokp = tokp->next;
5833 	}
5834 
5835 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5836 	    "rsm_send_suspend done\n"));
5837 
5838 }
5839 
5840 static void
5841 rsm_send_resume()
5842 {
5843 	rsmipc_request_t 	request;
5844 	list_element_t		*elem, *head;
5845 
5846 	/*
5847 	 * save the suspend list so that we know where to send
5848 	 * the resume messages and make the suspend list head
5849 	 * NULL.
5850 	 */
5851 	mutex_enter(&rsm_suspend_list.list_lock);
5852 	head = rsm_suspend_list.list_head;
5853 	rsm_suspend_list.list_head = NULL;
5854 	mutex_exit(&rsm_suspend_list.list_lock);
5855 
5856 	while (head != NULL) {
5857 		elem = head;
5858 		head = head->next;
5859 
5860 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5861 
5862 		(void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5863 
5864 		kmem_free((void *)elem, sizeof (list_element_t));
5865 
5866 	}
5867 
5868 }
5869 
5870 /*
5871  * This function takes path and sends a message using the sendq
5872  * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5873  * and RSMIPC_MSG_CREDIT are sent using this function.
5874  */
5875 int
5876 rsmipc_send_controlmsg(path_t *path, int msgtype)
5877 {
5878 	int			e;
5879 	int			retry_cnt = 0;
5880 	int			min_retry_cnt = 10;
5881 	clock_t			timeout;
5882 	adapter_t		*adapter;
5883 	rsm_send_t		is;
5884 	rsm_send_q_handle_t	ipc_handle;
5885 	rsmipc_controlmsg_t	msg;
5886 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5887 
5888 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5889 	    "rsmipc_send_controlmsg enter\n"));
5890 
5891 	ASSERT(MUTEX_HELD(&path->mutex));
5892 
5893 	adapter = path->local_adapter;
5894 
5895 	DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5896 	    "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5897 	    my_nodeid, adapter->hwaddr, path->remote_node,
5898 	    path->remote_hwaddr, path->procmsg_cnt));
5899 
5900 	if (path->state != RSMKA_PATH_ACTIVE) {
5901 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5902 		    "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5903 		return (1);
5904 	}
5905 
5906 	ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5907 
5908 	msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5909 	msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5910 	msg.rsmipc_hdr.rsmipc_type = msgtype;
5911 	msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5912 
5913 	if (msgtype == RSMIPC_MSG_CREDIT)
5914 		msg.rsmipc_credits = path->procmsg_cnt;
5915 
5916 	msg.rsmipc_local_incn = path->local_incn;
5917 
5918 	msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5919 	/* incr the sendq, path refcnt */
5920 	PATH_HOLD_NOLOCK(path);
5921 	SENDQ_TOKEN_HOLD(path);
5922 
5923 	do {
5924 		/* drop the path lock before doing the rsm_send */
5925 		mutex_exit(&path->mutex);
5926 
5927 		is.is_data = (void *)&msg;
5928 		is.is_size = sizeof (msg);
5929 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5930 		is.is_wait = 0;
5931 
5932 		e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5933 
5934 		ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5935 		    e != RSMERR_BAD_BARRIER_HNDL);
5936 
5937 		mutex_enter(&path->mutex);
5938 
5939 		if (e == RSM_SUCCESS) {
5940 			break;
5941 		}
5942 		/* error counter for statistics */
5943 		atomic_add_64(&rsm_ctrlmsg_errcnt, 1);
5944 
5945 		DBG_PRINTF((category, RSM_ERR,
5946 		    "rsmipc_send_controlmsg:rsm_send error=%d", e));
5947 
5948 		if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5949 			timeout  = ddi_get_lbolt() + drv_usectohz(10000);
5950 			(void) cv_timedwait(&path->sendq_token.sendq_cv,
5951 			    &path->mutex, timeout);
5952 			retry_cnt = 0;
5953 		}
5954 	} while (path->state == RSMKA_PATH_ACTIVE);
5955 
5956 	/* decrement the sendq,path refcnt that we incr before rsm_send */
5957 	SENDQ_TOKEN_RELE(path);
5958 	PATH_RELE_NOLOCK(path);
5959 
5960 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5961 	    "rsmipc_send_controlmsg done=%d", e));
5962 	return (e);
5963 }
5964 
5965 /*
5966  * Called from rsm_force_unload and path_importer_disconnect. The memory
5967  * mapping for the imported segment is removed and the segment is
5968  * disconnected at the interconnect layer if disconnect_flag is TRUE.
5969  * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5970  * and FALSE from rsm_rebind.
5971  *
5972  * When subsequent accesses cause page faulting, the dummy page is mapped
5973  * to resolve the fault, and the mapping generation number is incremented
5974  * so that the application can be notified on a close barrier operation.
5975  *
5976  * It is important to note that the caller of rsmseg_unload is responsible for
5977  * acquiring the segment lock before making a call to rsmseg_unload. This is
5978  * required to make the caller and rsmseg_unload thread safe. The segment lock
5979  * will be released by the rsmseg_unload function.
5980  */
5981 void
5982 rsmseg_unload(rsmseg_t *im_seg)
5983 {
5984 	rsmcookie_t		*hdl;
5985 	void			*shared_cookie;
5986 	rsmipc_request_t	request;
5987 	uint_t			maxprot;
5988 
5989 	DBG_DEFINE(category,
5990 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5991 
5992 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5993 
5994 	ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5995 
5996 	/* wait until segment leaves the mapping state */
5997 	while (im_seg->s_state == RSM_STATE_MAPPING)
5998 		cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5999 	/*
6000 	 * An unload is only necessary if the segment is connected. However,
6001 	 * if the segment was on the import list in state RSM_STATE_CONNECTING
6002 	 * then a connection was in progress. Change to RSM_STATE_NEW
6003 	 * here to cause an early exit from the connection process.
6004 	 */
6005 	if (im_seg->s_state == RSM_STATE_NEW) {
6006 		rsmseglock_release(im_seg);
6007 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6008 		    "rsmseg_unload done: RSM_STATE_NEW\n"));
6009 		return;
6010 	} else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6011 		im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6012 		rsmsharelock_acquire(im_seg);
6013 		im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6014 		rsmsharelock_release(im_seg);
6015 		rsmseglock_release(im_seg);
6016 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6017 		    "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6018 		return;
6019 	}
6020 
6021 	if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6022 		if (im_seg->s_ckl != NULL) {
6023 			int e;
6024 			/* Setup protections for remap */
6025 			maxprot = PROT_USER;
6026 			if (im_seg->s_mode & RSM_PERM_READ) {
6027 				maxprot |= PROT_READ;
6028 			}
6029 			if (im_seg->s_mode & RSM_PERM_WRITE) {
6030 				maxprot |= PROT_WRITE;
6031 			}
6032 			hdl = im_seg->s_ckl;
6033 			for (; hdl != NULL; hdl = hdl->c_next) {
6034 				e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6035 				    remap_cookie,
6036 				    hdl->c_off, hdl->c_len,
6037 				    maxprot, 0, NULL);
6038 
6039 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6040 				    "remap returns %d\n", e));
6041 			}
6042 		}
6043 
6044 		(void) rsm_closeconnection(im_seg, &shared_cookie);
6045 
6046 		if (shared_cookie != NULL) {
6047 			/*
6048 			 * inform the exporting node so this import
6049 			 * can be deleted from the list of importers.
6050 			 */
6051 			request.rsmipc_hdr.rsmipc_type =
6052 			    RSMIPC_MSG_NOTIMPORTING;
6053 			request.rsmipc_key = im_seg->s_segid;
6054 			request.rsmipc_segment_cookie = shared_cookie;
6055 			rsmseglock_release(im_seg);
6056 			(void) rsmipc_send(im_seg->s_node, &request,
6057 			    RSM_NO_REPLY);
6058 		} else {
6059 			rsmseglock_release(im_seg);
6060 		}
6061 	}
6062 	else
6063 		rsmseglock_release(im_seg);
6064 
6065 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6066 
6067 }
6068 
6069 /* ****************************** Importer Calls ************************ */
6070 
6071 static int
6072 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6073 {
6074 	int shifts = 0;
6075 
6076 	if (crgetuid(cr) != owner) {
6077 		shifts += 3;
6078 		if (!groupmember(group, cr))
6079 			shifts += 3;
6080 	}
6081 
6082 	mode &= ~(perm << shifts);
6083 
6084 	if (mode == 0)
6085 		return (0);
6086 
6087 	return (secpolicy_rsm_access(cr, owner, mode));
6088 }
6089 
6090 
6091 static int
6092 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6093     intptr_t dataptr, int mode)
6094 {
6095 	int e;
6096 	int			recheck_state = 0;
6097 	void			*shared_cookie;
6098 	rsmipc_request_t	request;
6099 	rsmipc_reply_t		reply;
6100 	rsm_permission_t	access;
6101 	adapter_t		*adapter;
6102 	rsm_addr_t		addr = 0;
6103 	rsm_import_share_t	*sharedp;
6104 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6105 
6106 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6107 
6108 	adapter = rsm_getadapter(msg, mode);
6109 	if (adapter == NULL) {
6110 		DBG_PRINTF((category, RSM_ERR,
6111 		    "rsm_connect done:ENODEV adapter=NULL\n"));
6112 		return (RSMERR_CTLR_NOT_PRESENT);
6113 	}
6114 
6115 	if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6116 		rsmka_release_adapter(adapter);
6117 		DBG_PRINTF((category, RSM_ERR,
6118 		    "rsm_connect done:ENODEV loopback\n"));
6119 		return (RSMERR_CTLR_NOT_PRESENT);
6120 	}
6121 
6122 
6123 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6124 	ASSERT(seg->s_state == RSM_STATE_NEW);
6125 
6126 	/*
6127 	 * Translate perm to access
6128 	 */
6129 	if (msg->perm & ~RSM_PERM_RDWR) {
6130 		rsmka_release_adapter(adapter);
6131 		DBG_PRINTF((category, RSM_ERR,
6132 		    "rsm_connect done:EINVAL invalid perms\n"));
6133 		return (RSMERR_BAD_PERMS);
6134 	}
6135 	access = 0;
6136 	if (msg->perm & RSM_PERM_READ)
6137 		access |= RSM_ACCESS_READ;
6138 	if (msg->perm & RSM_PERM_WRITE)
6139 		access |= RSM_ACCESS_WRITE;
6140 
6141 	seg->s_node = msg->nodeid;
6142 
6143 	/*
6144 	 * Adding to the import list locks the segment; release the segment
6145 	 * lock so we can get the reply for the send.
6146 	 */
6147 	e = rsmimport_add(seg, msg->key);
6148 	if (e) {
6149 		rsmka_release_adapter(adapter);
6150 		DBG_PRINTF((category, RSM_ERR,
6151 		    "rsm_connect done:rsmimport_add failed %d\n", e));
6152 		return (e);
6153 	}
6154 	seg->s_state = RSM_STATE_CONNECTING;
6155 
6156 	/*
6157 	 * Set the s_adapter field here so as to have a valid comparison of
6158 	 * the adapter and the s_adapter value during rsmshare_get. For
6159 	 * any error, set s_adapter to NULL before doing a release_adapter
6160 	 */
6161 	seg->s_adapter = adapter;
6162 
6163 	rsmseglock_release(seg);
6164 
6165 	/*
6166 	 * get the pointer to the shared data structure; the
6167 	 * shared data is locked and refcount has been incremented
6168 	 */
6169 	sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6170 
6171 	ASSERT(rsmsharelock_held(seg));
6172 
6173 	do {
6174 		/* flag indicates whether we need to recheck the state */
6175 		recheck_state = 0;
6176 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6177 		    "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6178 		switch (sharedp->rsmsi_state) {
6179 		case RSMSI_STATE_NEW:
6180 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6181 			break;
6182 		case RSMSI_STATE_CONNECTING:
6183 			/* FALLTHRU */
6184 		case RSMSI_STATE_CONN_QUIESCE:
6185 			/* FALLTHRU */
6186 		case RSMSI_STATE_MAP_QUIESCE:
6187 			/* wait for the state to change */
6188 			while ((sharedp->rsmsi_state ==
6189 			    RSMSI_STATE_CONNECTING) ||
6190 			    (sharedp->rsmsi_state ==
6191 			    RSMSI_STATE_CONN_QUIESCE) ||
6192 			    (sharedp->rsmsi_state ==
6193 			    RSMSI_STATE_MAP_QUIESCE)) {
6194 				if (cv_wait_sig(&sharedp->rsmsi_cv,
6195 				    &sharedp->rsmsi_lock) == 0) {
6196 					/* signalled - clean up and return */
6197 					rsmsharelock_release(seg);
6198 					rsmimport_rm(seg);
6199 					seg->s_adapter = NULL;
6200 					rsmka_release_adapter(adapter);
6201 					seg->s_state = RSM_STATE_NEW;
6202 					DBG_PRINTF((category, RSM_ERR,
6203 					    "rsm_connect done: INTERRUPTED\n"));
6204 					return (RSMERR_INTERRUPTED);
6205 				}
6206 			}
6207 			/*
6208 			 * the state changed, loop back and check what it is
6209 			 */
6210 			recheck_state = 1;
6211 			break;
6212 		case RSMSI_STATE_ABORT_CONNECT:
6213 			/* exit the loop and clean up further down */
6214 			break;
6215 		case RSMSI_STATE_CONNECTED:
6216 			/* already connected, good - fall through */
6217 		case RSMSI_STATE_MAPPED:
6218 			/* already mapped, wow - fall through */
6219 			/* access validation etc is done further down */
6220 			break;
6221 		case RSMSI_STATE_DISCONNECTED:
6222 			/* disconnected - so reconnect now */
6223 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6224 			break;
6225 		default:
6226 			ASSERT(0); /* Invalid State */
6227 		}
6228 	} while (recheck_state);
6229 
6230 	if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6231 		/* we are the first to connect */
6232 		rsmsharelock_release(seg);
6233 
6234 		if (msg->nodeid != my_nodeid) {
6235 			addr = get_remote_hwaddr(adapter, msg->nodeid);
6236 
6237 			if ((int64_t)addr < 0) {
6238 				rsmsharelock_acquire(seg);
6239 				rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6240 				    RSMSI_STATE_NEW);
6241 				rsmsharelock_release(seg);
6242 				rsmimport_rm(seg);
6243 				seg->s_adapter = NULL;
6244 				rsmka_release_adapter(adapter);
6245 				seg->s_state = RSM_STATE_NEW;
6246 				DBG_PRINTF((category, RSM_ERR,
6247 				    "rsm_connect done: hwaddr<0\n"));
6248 				return (RSMERR_INTERNAL_ERROR);
6249 			}
6250 		} else {
6251 			addr = adapter->hwaddr;
6252 		}
6253 
6254 		/*
6255 		 * send request to node [src, dest, key, msgid] and get back
6256 		 * [status, msgid, cookie]
6257 		 */
6258 		request.rsmipc_key = msg->key;
6259 		/*
6260 		 * we need the s_mode of the exporter so pass
6261 		 * RSM_ACCESS_TRUSTED
6262 		 */
6263 		request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6264 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6265 		request.rsmipc_adapter_hwaddr = addr;
6266 		request.rsmipc_segment_cookie = sharedp;
6267 
6268 		e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6269 		if (e) {
6270 			rsmsharelock_acquire(seg);
6271 			rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6272 			    RSMSI_STATE_NEW);
6273 			rsmsharelock_release(seg);
6274 			rsmimport_rm(seg);
6275 			seg->s_adapter = NULL;
6276 			rsmka_release_adapter(adapter);
6277 			seg->s_state = RSM_STATE_NEW;
6278 			DBG_PRINTF((category, RSM_ERR,
6279 			    "rsm_connect done:rsmipc_send failed %d\n", e));
6280 			return (e);
6281 		}
6282 
6283 		if (reply.rsmipc_status != RSM_SUCCESS) {
6284 			rsmsharelock_acquire(seg);
6285 			rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6286 			    RSMSI_STATE_NEW);
6287 			rsmsharelock_release(seg);
6288 			rsmimport_rm(seg);
6289 			seg->s_adapter = NULL;
6290 			rsmka_release_adapter(adapter);
6291 			seg->s_state = RSM_STATE_NEW;
6292 			DBG_PRINTF((category, RSM_ERR,
6293 			    "rsm_connect done:rsmipc_send reply err %d\n",
6294 			    reply.rsmipc_status));
6295 			return (reply.rsmipc_status);
6296 		}
6297 
6298 		rsmsharelock_acquire(seg);
6299 		/* store the information recvd into the shared data struct */
6300 		sharedp->rsmsi_mode = reply.rsmipc_mode;
6301 		sharedp->rsmsi_uid = reply.rsmipc_uid;
6302 		sharedp->rsmsi_gid = reply.rsmipc_gid;
6303 		sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6304 		sharedp->rsmsi_cookie = sharedp;
6305 	}
6306 
6307 	rsmsharelock_release(seg);
6308 
6309 	/*
6310 	 * Get the segment lock and check for a force disconnect
6311 	 * from the export side which would have changed the state
6312 	 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6313 	 * force disconnect will be held off until the connection
6314 	 * has completed.
6315 	 */
6316 	rsmseglock_acquire(seg);
6317 	rsmsharelock_acquire(seg);
6318 	ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6319 	    seg->s_state == RSM_STATE_ABORT_CONNECT);
6320 
6321 	shared_cookie = sharedp->rsmsi_cookie;
6322 
6323 	if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6324 	    (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6325 		seg->s_state = RSM_STATE_NEW;
6326 		seg->s_adapter = NULL;
6327 		rsmsharelock_release(seg);
6328 		rsmseglock_release(seg);
6329 		rsmimport_rm(seg);
6330 		rsmka_release_adapter(adapter);
6331 
6332 		rsmsharelock_acquire(seg);
6333 		if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6334 			/*
6335 			 * set a flag indicating abort handling has been
6336 			 * done
6337 			 */
6338 			sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6339 			rsmsharelock_release(seg);
6340 			/* send a message to exporter - only once */
6341 			(void) rsm_send_notimporting(msg->nodeid,
6342 			    msg->key, shared_cookie);
6343 			rsmsharelock_acquire(seg);
6344 			/*
6345 			 * wake up any waiting importers and inform that
6346 			 * connection has been aborted
6347 			 */
6348 			cv_broadcast(&sharedp->rsmsi_cv);
6349 		}
6350 		rsmsharelock_release(seg);
6351 
6352 		DBG_PRINTF((category, RSM_ERR,
6353 		    "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6354 		return (RSMERR_INTERRUPTED);
6355 	}
6356 
6357 
6358 	/*
6359 	 * We need to verify that this process has access
6360 	 */
6361 	e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6362 	    access & sharedp->rsmsi_mode,
6363 	    (int)(msg->perm & RSM_PERM_RDWR), cred);
6364 	if (e) {
6365 		rsmsharelock_release(seg);
6366 		seg->s_state = RSM_STATE_NEW;
6367 		seg->s_adapter = NULL;
6368 		rsmseglock_release(seg);
6369 		rsmimport_rm(seg);
6370 		rsmka_release_adapter(adapter);
6371 		/*
6372 		 * No need to lock segment it has been removed
6373 		 * from the hash table
6374 		 */
6375 		rsmsharelock_acquire(seg);
6376 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6377 			rsmsharelock_release(seg);
6378 			/* this is the first importer */
6379 
6380 			(void) rsm_send_notimporting(msg->nodeid, msg->key,
6381 			    shared_cookie);
6382 			rsmsharelock_acquire(seg);
6383 			sharedp->rsmsi_state = RSMSI_STATE_NEW;
6384 			cv_broadcast(&sharedp->rsmsi_cv);
6385 		}
6386 		rsmsharelock_release(seg);
6387 
6388 		DBG_PRINTF((category, RSM_ERR,
6389 		    "rsm_connect done: ipcaccess failed\n"));
6390 		return (RSMERR_PERM_DENIED);
6391 	}
6392 
6393 	/* update state and cookie */
6394 	seg->s_segid = sharedp->rsmsi_segid;
6395 	seg->s_len = sharedp->rsmsi_seglen;
6396 	seg->s_mode = access & sharedp->rsmsi_mode;
6397 	seg->s_pid = ddi_get_pid();
6398 	seg->s_mapinfo = NULL;
6399 
6400 	if (seg->s_node != my_nodeid) {
6401 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6402 			e = adapter->rsmpi_ops->rsm_connect(
6403 			    adapter->rsmpi_handle,
6404 			    addr, seg->s_segid, &sharedp->rsmsi_handle);
6405 
6406 			if (e != RSM_SUCCESS) {
6407 				seg->s_state = RSM_STATE_NEW;
6408 				seg->s_adapter = NULL;
6409 				rsmsharelock_release(seg);
6410 				rsmseglock_release(seg);
6411 				rsmimport_rm(seg);
6412 				rsmka_release_adapter(adapter);
6413 				/*
6414 				 *  inform the exporter to delete this importer
6415 				 */
6416 				(void) rsm_send_notimporting(msg->nodeid,
6417 				    msg->key, shared_cookie);
6418 
6419 				/*
6420 				 * Now inform any waiting importers to
6421 				 * retry connect. This needs to be done
6422 				 * after sending notimporting so that
6423 				 * the notimporting is sent before a waiting
6424 				 * importer sends a segconnect while retrying
6425 				 *
6426 				 * No need to lock segment it has been removed
6427 				 * from the hash table
6428 				 */
6429 
6430 				rsmsharelock_acquire(seg);
6431 				sharedp->rsmsi_state = RSMSI_STATE_NEW;
6432 				cv_broadcast(&sharedp->rsmsi_cv);
6433 				rsmsharelock_release(seg);
6434 
6435 				DBG_PRINTF((category, RSM_ERR,
6436 				    "rsm_connect error %d\n", e));
6437 				if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6438 					return (
6439 					    RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6440 				else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6441 				    (e == RSMERR_UNKNOWN_RSM_ADDR))
6442 					return (RSMERR_REMOTE_NODE_UNREACHABLE);
6443 				else
6444 					return (e);
6445 			}
6446 
6447 		}
6448 		seg->s_handle.in = sharedp->rsmsi_handle;
6449 
6450 	}
6451 
6452 	seg->s_state = RSM_STATE_CONNECT;
6453 
6454 
6455 	seg->s_flags &= ~RSM_IMPORT_DUMMY;	/* clear dummy flag */
6456 	if (bar_va) {
6457 		/* increment generation number on barrier page */
6458 		atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6459 		/* return user off into barrier page where status will be */
6460 		msg->off = (int)seg->s_hdr.rsmrc_num;
6461 		msg->gnum = bar_va[msg->off]; 	/* gnum race */
6462 	} else {
6463 		msg->off = 0;
6464 		msg->gnum = 0;	/* gnum race */
6465 	}
6466 
6467 	msg->len = (int)sharedp->rsmsi_seglen;
6468 	msg->rnum = seg->s_minor;
6469 	rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6470 	rsmsharelock_release(seg);
6471 	rsmseglock_release(seg);
6472 
6473 	/* Return back to user the segment size & perm in case it's needed */
6474 
6475 #ifdef _MULTI_DATAMODEL
6476 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6477 		rsm_ioctlmsg32_t msg32;
6478 
6479 		if (msg->len > UINT_MAX)
6480 			msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6481 		else
6482 			msg32.len = msg->len;
6483 		msg32.off = msg->off;
6484 		msg32.perm = msg->perm;
6485 		msg32.gnum = msg->gnum;
6486 		msg32.rnum = msg->rnum;
6487 
6488 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6489 		    "rsm_connect done\n"));
6490 
6491 		if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6492 		    sizeof (msg32), mode))
6493 			return (RSMERR_BAD_ADDR);
6494 		else
6495 			return (RSM_SUCCESS);
6496 	}
6497 #endif
6498 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6499 
6500 	if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6501 	    mode))
6502 		return (RSMERR_BAD_ADDR);
6503 	else
6504 		return (RSM_SUCCESS);
6505 }
6506 
6507 static int
6508 rsm_unmap(rsmseg_t *seg)
6509 {
6510 	int			err;
6511 	adapter_t		*adapter;
6512 	rsm_import_share_t	*sharedp;
6513 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6514 
6515 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6516 	    "rsm_unmap enter %u\n", seg->s_segid));
6517 
6518 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6519 
6520 	/* assert seg is locked */
6521 	ASSERT(rsmseglock_held(seg));
6522 	ASSERT(seg->s_state != RSM_STATE_MAPPING);
6523 
6524 	if ((seg->s_state != RSM_STATE_ACTIVE) &&
6525 	    (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6526 		/* segment unmap has already been done */
6527 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6528 		return (RSM_SUCCESS);
6529 	}
6530 
6531 	sharedp = seg->s_share;
6532 
6533 	rsmsharelock_acquire(seg);
6534 
6535 	/*
6536 	 *	- shared data struct is in MAPPED or MAP_QUIESCE state
6537 	 */
6538 
6539 	ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6540 	    sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6541 
6542 	/*
6543 	 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6544 	 * the segment cookie list was NULL; but it is always NULL when
6545 	 * called from rsmmap_unmap and won't be NULL when called for
6546 	 * a force disconnect - so the check for NULL cookie list was removed
6547 	 */
6548 
6549 	ASSERT(sharedp->rsmsi_mapcnt > 0);
6550 
6551 	sharedp->rsmsi_mapcnt--;
6552 
6553 	if (sharedp->rsmsi_mapcnt == 0) {
6554 		if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6555 			/* unmap the shared RSMPI mapping */
6556 			adapter = seg->s_adapter;
6557 			if (seg->s_node != my_nodeid) {
6558 				ASSERT(sharedp->rsmsi_handle != NULL);
6559 				err = adapter->rsmpi_ops->
6560 				    rsm_unmap(sharedp->rsmsi_handle);
6561 				DBG_PRINTF((category, RSM_DEBUG,
6562 				    "rsm_unmap: rsmpi unmap %d\n", err));
6563 				rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6564 				sharedp->rsmsi_mapinfo = NULL;
6565 			}
6566 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6567 		} else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6568 			sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6569 		}
6570 	}
6571 
6572 	rsmsharelock_release(seg);
6573 
6574 	/*
6575 	 * The s_cookie field is used to store the cookie returned from the
6576 	 * ddi_umem_lock when binding the pages for an export segment. This
6577 	 * is the primary use of the s_cookie field and does not normally
6578 	 * pertain to any importing segment except in the loopback case.
6579 	 * For the loopback case, the import segment and export segment are
6580 	 * on the same node, the s_cookie field of the segment structure for
6581 	 * the importer is initialized to the s_cookie field in the exported
6582 	 * segment during the map operation and is used during the call to
6583 	 * devmap_umem_setup for the import mapping.
6584 	 * Thus, during unmap, we simply need to set s_cookie to NULL to
6585 	 * indicate that the mapping no longer exists.
6586 	 */
6587 	seg->s_cookie = NULL;
6588 
6589 	seg->s_mapinfo = NULL;
6590 
6591 	if (seg->s_state == RSM_STATE_ACTIVE)
6592 		seg->s_state = RSM_STATE_CONNECT;
6593 	else
6594 		seg->s_state = RSM_STATE_CONN_QUIESCE;
6595 
6596 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6597 
6598 	return (RSM_SUCCESS);
6599 }
6600 
6601 /*
6602  * cookie returned here if not null indicates that it is
6603  * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6604  * message.
6605  */
6606 static int
6607 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6608 {
6609 	int			e;
6610 	adapter_t		*adapter;
6611 	rsm_import_share_t	*sharedp;
6612 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6613 
6614 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6615 	    "rsm_closeconnection enter\n"));
6616 
6617 	*cookie = (void *)NULL;
6618 
6619 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6620 
6621 	/* assert seg is locked */
6622 	ASSERT(rsmseglock_held(seg));
6623 
6624 	if (seg->s_state == RSM_STATE_DISCONNECT) {
6625 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6626 		    "rsm_closeconnection done: already disconnected\n"));
6627 		return (RSM_SUCCESS);
6628 	}
6629 
6630 	/* wait for all putv/getv ops to get done */
6631 	while (seg->s_rdmacnt > 0) {
6632 		cv_wait(&seg->s_cv, &seg->s_lock);
6633 	}
6634 
6635 	(void) rsm_unmap(seg);
6636 
6637 	ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6638 	    seg->s_state == RSM_STATE_CONN_QUIESCE);
6639 
6640 	adapter = seg->s_adapter;
6641 	sharedp = seg->s_share;
6642 
6643 	ASSERT(sharedp != NULL);
6644 
6645 	rsmsharelock_acquire(seg);
6646 
6647 	/*
6648 	 * Disconnect on adapter
6649 	 *
6650 	 * The current algorithm is stateless, I don't have to contact
6651 	 * server when I go away. He only gives me permissions. Of course,
6652 	 * the adapters will talk to terminate the connect.
6653 	 *
6654 	 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6655 	 */
6656 	if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6657 	    (sharedp->rsmsi_node != my_nodeid)) {
6658 
6659 		if (sharedp->rsmsi_refcnt == 1) {
6660 			/* this is the last importer */
6661 			ASSERT(sharedp->rsmsi_mapcnt == 0);
6662 
6663 			e = adapter->rsmpi_ops->
6664 			    rsm_disconnect(sharedp->rsmsi_handle);
6665 			if (e != RSM_SUCCESS) {
6666 				DBG_PRINTF((category, RSM_DEBUG,
6667 				    "rsm:disconnect failed seg=%x:err=%d\n",
6668 				    seg->s_key, e));
6669 			}
6670 		}
6671 	}
6672 
6673 	seg->s_handle.in = NULL;
6674 
6675 	sharedp->rsmsi_refcnt--;
6676 
6677 	if (sharedp->rsmsi_refcnt == 0) {
6678 		*cookie = (void *)sharedp->rsmsi_cookie;
6679 		sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6680 		sharedp->rsmsi_handle = NULL;
6681 		rsmsharelock_release(seg);
6682 
6683 		/* clean up the shared data structure */
6684 		mutex_destroy(&sharedp->rsmsi_lock);
6685 		cv_destroy(&sharedp->rsmsi_cv);
6686 		kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6687 
6688 	} else {
6689 		rsmsharelock_release(seg);
6690 	}
6691 
6692 	/* increment generation number on barrier page */
6693 	if (bar_va) {
6694 		atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6695 	}
6696 
6697 	/*
6698 	 * The following needs to be done after any
6699 	 * rsmsharelock calls which use seg->s_share.
6700 	 */
6701 	seg->s_share = NULL;
6702 
6703 	seg->s_state = RSM_STATE_DISCONNECT;
6704 	/* signal anyone waiting in the CONN_QUIESCE state */
6705 	cv_broadcast(&seg->s_cv);
6706 
6707 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6708 	    "rsm_closeconnection done\n"));
6709 
6710 	return (RSM_SUCCESS);
6711 }
6712 
6713 int
6714 rsm_disconnect(rsmseg_t *seg)
6715 {
6716 	rsmipc_request_t	request;
6717 	void			*shared_cookie;
6718 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6719 
6720 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6721 
6722 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6723 
6724 	/* assert seg isn't locked */
6725 	ASSERT(!rsmseglock_held(seg));
6726 
6727 
6728 	/* Remove segment from imported list */
6729 	rsmimport_rm(seg);
6730 
6731 	/* acquire the segment */
6732 	rsmseglock_acquire(seg);
6733 
6734 	/* wait until segment leaves the mapping state */
6735 	while (seg->s_state == RSM_STATE_MAPPING)
6736 		cv_wait(&seg->s_cv, &seg->s_lock);
6737 
6738 	if (seg->s_state == RSM_STATE_DISCONNECT) {
6739 		seg->s_state = RSM_STATE_NEW;
6740 		rsmseglock_release(seg);
6741 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6742 		    "rsm_disconnect done: already disconnected\n"));
6743 		return (RSM_SUCCESS);
6744 	}
6745 
6746 	(void) rsm_closeconnection(seg, &shared_cookie);
6747 
6748 	/* update state */
6749 	seg->s_state = RSM_STATE_NEW;
6750 
6751 	if (shared_cookie != NULL) {
6752 		/*
6753 		 *  This is the last importer so inform the exporting node
6754 		 *  so this import can be deleted from the list of importers.
6755 		 */
6756 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6757 		request.rsmipc_key = seg->s_segid;
6758 		request.rsmipc_segment_cookie = shared_cookie;
6759 		rsmseglock_release(seg);
6760 		(void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6761 	} else {
6762 		rsmseglock_release(seg);
6763 	}
6764 
6765 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6766 
6767 	return (DDI_SUCCESS);
6768 }
6769 
6770 /*ARGSUSED*/
6771 static int
6772 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6773     struct pollhead **phpp)
6774 {
6775 	minor_t		rnum;
6776 	rsmresource_t	*res;
6777 	rsmseg_t 	*seg;
6778 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6779 
6780 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6781 
6782 	/* find minor, no lock */
6783 	rnum = getminor(dev);
6784 	res = rsmresource_lookup(rnum, RSM_NOLOCK);
6785 
6786 	/* poll is supported only for export/import segments */
6787 	if ((res == NULL) || (res == RSMRC_RESERVED) ||
6788 	    (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6789 		return (ENXIO);
6790 	}
6791 
6792 	*reventsp = 0;
6793 
6794 	/*
6795 	 * An exported segment must be in state RSM_STATE_EXPORT; an
6796 	 * imported segment must be in state RSM_STATE_ACTIVE.
6797 	 */
6798 	seg = (rsmseg_t *)res;
6799 
6800 	if (seg->s_pollevent) {
6801 		*reventsp = POLLRDNORM;
6802 	} else if (!anyyet) {
6803 		/* cannot take segment lock here */
6804 		*phpp = &seg->s_poll;
6805 		seg->s_pollflag |= RSM_SEGMENT_POLL;
6806 	}
6807 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6808 	return (0);
6809 }
6810 
6811 
6812 
6813 /* ************************* IOCTL Commands ********************* */
6814 
6815 static rsmseg_t *
6816 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6817     rsm_resource_type_t type)
6818 {
6819 	/* get segment from resource handle */
6820 	rsmseg_t *seg;
6821 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6822 
6823 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6824 
6825 
6826 	if (res != RSMRC_RESERVED) {
6827 		seg = (rsmseg_t *)res;
6828 	} else {
6829 		/* Allocate segment now and bind it */
6830 		seg = rsmseg_alloc(rnum, credp);
6831 
6832 		/*
6833 		 * if DR pre-processing is going on or DR is in progress
6834 		 * then the new export segments should be in the NEW_QSCD state
6835 		 */
6836 		if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6837 			mutex_enter(&rsm_drv_data.drv_lock);
6838 			if ((rsm_drv_data.drv_state ==
6839 			    RSM_DRV_PREDEL_STARTED) ||
6840 			    (rsm_drv_data.drv_state ==
6841 			    RSM_DRV_PREDEL_COMPLETED) ||
6842 			    (rsm_drv_data.drv_state ==
6843 			    RSM_DRV_DR_IN_PROGRESS)) {
6844 				seg->s_state = RSM_STATE_NEW_QUIESCED;
6845 			}
6846 			mutex_exit(&rsm_drv_data.drv_lock);
6847 		}
6848 
6849 		rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6850 	}
6851 
6852 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6853 
6854 	return (seg);
6855 }
6856 
6857 static int
6858 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6859     int mode, cred_t *credp)
6860 {
6861 	int error;
6862 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6863 
6864 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6865 
6866 	arg = arg;
6867 	credp = credp;
6868 
6869 	ASSERT(seg != NULL);
6870 
6871 	switch (cmd) {
6872 	case RSM_IOCTL_BIND:
6873 		error = rsm_bind(seg, msg, arg, mode);
6874 		break;
6875 	case RSM_IOCTL_REBIND:
6876 		error = rsm_rebind(seg, msg);
6877 		break;
6878 	case RSM_IOCTL_UNBIND:
6879 		error = ENOTSUP;
6880 		break;
6881 	case RSM_IOCTL_PUBLISH:
6882 		error = rsm_publish(seg, msg, arg, mode);
6883 		break;
6884 	case RSM_IOCTL_REPUBLISH:
6885 		error = rsm_republish(seg, msg, mode);
6886 		break;
6887 	case RSM_IOCTL_UNPUBLISH:
6888 		error = rsm_unpublish(seg, 1);
6889 		break;
6890 	default:
6891 		error = EINVAL;
6892 		break;
6893 	}
6894 
6895 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6896 	    error));
6897 
6898 	return (error);
6899 }
6900 static int
6901 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6902     int mode, cred_t *credp)
6903 {
6904 	int error;
6905 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6906 
6907 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6908 
6909 	ASSERT(seg);
6910 
6911 	switch (cmd) {
6912 	case RSM_IOCTL_CONNECT:
6913 		error = rsm_connect(seg, msg, credp, arg, mode);
6914 		break;
6915 	default:
6916 		error = EINVAL;
6917 		break;
6918 	}
6919 
6920 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6921 	    error));
6922 	return (error);
6923 }
6924 
6925 static int
6926 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6927     int mode)
6928 {
6929 	int e;
6930 	adapter_t *adapter;
6931 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6932 
6933 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6934 
6935 
6936 	if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6937 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6938 		    "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6939 		return (RSMERR_CONN_ABORTED);
6940 	} else if (seg->s_node == my_nodeid) {
6941 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6942 		    "rsmbar_ioctl done: loopback\n"));
6943 		return (RSM_SUCCESS);
6944 	}
6945 
6946 	adapter = seg->s_adapter;
6947 
6948 	switch (cmd) {
6949 	case RSM_IOCTL_BAR_CHECK:
6950 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6951 		    "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6952 		return (bar_va ? RSM_SUCCESS : EINVAL);
6953 	case RSM_IOCTL_BAR_OPEN:
6954 		e = adapter->rsmpi_ops->
6955 		    rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6956 		break;
6957 	case RSM_IOCTL_BAR_ORDER:
6958 		e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6959 		break;
6960 	case RSM_IOCTL_BAR_CLOSE:
6961 		e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6962 		break;
6963 	default:
6964 		e = EINVAL;
6965 		break;
6966 	}
6967 
6968 	if (e == RSM_SUCCESS) {
6969 #ifdef _MULTI_DATAMODEL
6970 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6971 			rsm_ioctlmsg32_t msg32;
6972 			int i;
6973 
6974 			for (i = 0; i < 4; i++) {
6975 				msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6976 			}
6977 
6978 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6979 			    "rsmbar_ioctl done\n"));
6980 			if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6981 			    sizeof (msg32), mode))
6982 				return (RSMERR_BAD_ADDR);
6983 			else
6984 				return (RSM_SUCCESS);
6985 		}
6986 #endif
6987 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6988 		    "rsmbar_ioctl done\n"));
6989 		if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6990 		    sizeof (*msg), mode))
6991 			return (RSMERR_BAD_ADDR);
6992 		else
6993 			return (RSM_SUCCESS);
6994 	}
6995 
6996 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6997 	    "rsmbar_ioctl done: error=%d\n", e));
6998 
6999 	return (e);
7000 }
7001 
7002 /*
7003  * Ring the doorbell of the export segment to which this segment is
7004  * connected.
7005  */
7006 static int
7007 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7008 {
7009 	int e = 0;
7010 	rsmipc_request_t request;
7011 
7012 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7013 
7014 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7015 
7016 	request.rsmipc_key = seg->s_segid;
7017 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7018 	request.rsmipc_segment_cookie = NULL;
7019 	e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7020 
7021 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7022 	    "exportbell_ioctl done: %d\n", e));
7023 
7024 	return (e);
7025 }
7026 
7027 /*
7028  * Ring the doorbells of all segments importing this segment
7029  */
7030 static int
7031 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7032 {
7033 	importing_token_t	*token = NULL;
7034 	rsmipc_request_t	request;
7035 	int			index;
7036 
7037 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7038 
7039 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7040 
7041 	ASSERT(seg->s_state != RSM_STATE_NEW &&
7042 	    seg->s_state != RSM_STATE_NEW_QUIESCED);
7043 
7044 	request.rsmipc_key = seg->s_segid;
7045 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7046 
7047 	index = rsmhash(seg->s_segid);
7048 
7049 	token = importer_list.bucket[index];
7050 
7051 	while (token != NULL) {
7052 		if (seg->s_key == token->key) {
7053 			request.rsmipc_segment_cookie =
7054 			    token->import_segment_cookie;
7055 			(void) rsmipc_send(token->importing_node,
7056 			    &request, RSM_NO_REPLY);
7057 		}
7058 		token = token->next;
7059 	}
7060 
7061 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7062 	    "importbell_ioctl done\n"));
7063 	return (RSM_SUCCESS);
7064 }
7065 
7066 static int
7067 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7068     rsm_poll_event_t **eventspp, int mode)
7069 {
7070 	rsm_poll_event_t	*evlist = NULL;
7071 	size_t			evlistsz;
7072 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7073 
7074 #ifdef _MULTI_DATAMODEL
7075 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7076 		int i;
7077 		rsm_consume_event_msg32_t cemsg32 = {0};
7078 		rsm_poll_event32_t	event32[RSM_MAX_POLLFDS];
7079 		rsm_poll_event32_t	*evlist32;
7080 		size_t			evlistsz32;
7081 
7082 		/* copyin the ioctl message */
7083 		if (ddi_copyin(arg, (caddr_t)&cemsg32,
7084 		    sizeof (rsm_consume_event_msg32_t), mode)) {
7085 			DBG_PRINTF((category, RSM_ERR,
7086 			    "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7087 			return (RSMERR_BAD_ADDR);
7088 		}
7089 		msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7090 		msgp->numents = (int)cemsg32.numents;
7091 
7092 		evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7093 		/*
7094 		 * If numents is large alloc events list on heap otherwise
7095 		 * use the address of array that was passed in.
7096 		 */
7097 		if (msgp->numents > RSM_MAX_POLLFDS) {
7098 			if (msgp->numents > max_segs) { /* validate numents */
7099 				DBG_PRINTF((category, RSM_ERR,
7100 				    "consumeevent_copyin: "
7101 				    "RSMERR_BAD_ARGS_ERRORS\n"));
7102 				return (RSMERR_BAD_ARGS_ERRORS);
7103 			}
7104 			evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7105 		} else {
7106 			evlist32 = event32;
7107 		}
7108 
7109 		/* copyin the seglist into the rsm_poll_event32_t array */
7110 		if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7111 		    evlistsz32, mode)) {
7112 			if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7113 				kmem_free(evlist32, evlistsz32);
7114 			}
7115 			DBG_PRINTF((category, RSM_ERR,
7116 			    "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7117 			return (RSMERR_BAD_ADDR);
7118 		}
7119 
7120 		/* evlist and evlistsz are based on rsm_poll_event_t type */
7121 		evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7122 
7123 		if (msgp->numents > RSM_MAX_POLLFDS) {
7124 			evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7125 			*eventspp = evlist;
7126 		} else {
7127 			evlist = *eventspp;
7128 		}
7129 		/*
7130 		 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7131 		 * array
7132 		 */
7133 		for (i = 0; i < msgp->numents; i++) {
7134 			evlist[i].rnum = evlist32[i].rnum;
7135 			evlist[i].fdsidx = evlist32[i].fdsidx;
7136 			evlist[i].revent = evlist32[i].revent;
7137 		}
7138 		/* free the temp 32-bit event list */
7139 		if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7140 			kmem_free(evlist32, evlistsz32);
7141 		}
7142 
7143 		return (RSM_SUCCESS);
7144 	}
7145 #endif
7146 	/* copyin the ioctl message */
7147 	if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7148 	    mode)) {
7149 		DBG_PRINTF((category, RSM_ERR,
7150 		    "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7151 		return (RSMERR_BAD_ADDR);
7152 	}
7153 	/*
7154 	 * If numents is large alloc events list on heap otherwise
7155 	 * use the address of array that was passed in.
7156 	 */
7157 	if (msgp->numents > RSM_MAX_POLLFDS) {
7158 		if (msgp->numents > max_segs) { /* validate numents */
7159 			DBG_PRINTF((category, RSM_ERR,
7160 			    "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7161 			return (RSMERR_BAD_ARGS_ERRORS);
7162 		}
7163 		evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7164 		evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7165 		*eventspp  = evlist;
7166 	}
7167 
7168 	/* copyin the seglist */
7169 	if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7170 	    sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7171 		if (evlist) {
7172 			kmem_free(evlist, evlistsz);
7173 			*eventspp = NULL;
7174 		}
7175 		DBG_PRINTF((category, RSM_ERR,
7176 		    "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7177 		return (RSMERR_BAD_ADDR);
7178 	}
7179 
7180 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7181 	    "consumeevent_copyin done\n"));
7182 	return (RSM_SUCCESS);
7183 }
7184 
7185 static int
7186 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7187     rsm_poll_event_t *eventsp, int mode)
7188 {
7189 	size_t			evlistsz;
7190 	int			err = RSM_SUCCESS;
7191 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7192 
7193 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7194 	    "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7195 	    msgp->numents, eventsp));
7196 
7197 #ifdef _MULTI_DATAMODEL
7198 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7199 		int i;
7200 		rsm_poll_event32_t	event32[RSM_MAX_POLLFDS];
7201 		rsm_poll_event32_t	*evlist32;
7202 		size_t			evlistsz32;
7203 
7204 		evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7205 		if (msgp->numents > RSM_MAX_POLLFDS) {
7206 			evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7207 		} else {
7208 			evlist32 = event32;
7209 		}
7210 
7211 		/*
7212 		 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7213 		 * array
7214 		 */
7215 		for (i = 0; i < msgp->numents; i++) {
7216 			evlist32[i].rnum = eventsp[i].rnum;
7217 			evlist32[i].fdsidx = eventsp[i].fdsidx;
7218 			evlist32[i].revent = eventsp[i].revent;
7219 		}
7220 
7221 		if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7222 		    evlistsz32, mode)) {
7223 			err = RSMERR_BAD_ADDR;
7224 		}
7225 
7226 		if (msgp->numents > RSM_MAX_POLLFDS) {
7227 			if (evlist32) {	/* free the temp 32-bit event list */
7228 				kmem_free(evlist32, evlistsz32);
7229 			}
7230 			/*
7231 			 * eventsp and evlistsz are based on rsm_poll_event_t
7232 			 * type
7233 			 */
7234 			evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7235 			/* event list on the heap and needs to be freed here */
7236 			if (eventsp) {
7237 				kmem_free(eventsp, evlistsz);
7238 			}
7239 		}
7240 
7241 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7242 		    "consumeevent_copyout done: err=%d\n", err));
7243 		return (err);
7244 	}
7245 #endif
7246 	evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7247 
7248 	if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7249 	    mode)) {
7250 		err = RSMERR_BAD_ADDR;
7251 	}
7252 
7253 	if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7254 		/* event list on the heap and needs to be freed here */
7255 		kmem_free(eventsp, evlistsz);
7256 	}
7257 
7258 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7259 	    "consumeevent_copyout done: err=%d\n", err));
7260 	return (err);
7261 }
7262 
7263 static int
7264 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7265 {
7266 	int	rc;
7267 	int	i;
7268 	minor_t	rnum;
7269 	rsm_consume_event_msg_t	msg = {0};
7270 	rsmseg_t		*seg;
7271 	rsm_poll_event_t	*event_list;
7272 	rsm_poll_event_t	events[RSM_MAX_POLLFDS];
7273 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7274 
7275 	event_list = events;
7276 
7277 	if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7278 	    RSM_SUCCESS) {
7279 		return (rc);
7280 	}
7281 
7282 	for (i = 0; i < msg.numents; i++) {
7283 		rnum = event_list[i].rnum;
7284 		event_list[i].revent = 0;
7285 		/* get the segment structure */
7286 		seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7287 		if (seg) {
7288 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7289 			    "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7290 			    seg));
7291 			if (seg->s_pollevent) {
7292 				/* consume the event */
7293 				atomic_add_32(&seg->s_pollevent, -1);
7294 				event_list[i].revent = POLLRDNORM;
7295 			}
7296 			rsmseglock_release(seg);
7297 		}
7298 	}
7299 
7300 	if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7301 	    RSM_SUCCESS) {
7302 		return (rc);
7303 	}
7304 
7305 	return (RSM_SUCCESS);
7306 }
7307 
7308 static int
7309 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7310 {
7311 	int size;
7312 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7313 
7314 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7315 
7316 #ifdef _MULTI_DATAMODEL
7317 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7318 		rsmka_iovec32_t	*iovec32, *iovec32_base;
7319 		int i;
7320 
7321 		size = count * sizeof (rsmka_iovec32_t);
7322 		iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7323 		if (ddi_copyin((caddr_t)user_vec,
7324 		    (caddr_t)iovec32, size, mode)) {
7325 			kmem_free(iovec32, size);
7326 			DBG_PRINTF((category, RSM_DEBUG,
7327 			    "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7328 			return (RSMERR_BAD_ADDR);
7329 		}
7330 
7331 		for (i = 0; i < count; i++, iovec++, iovec32++) {
7332 			iovec->io_type = (int)iovec32->io_type;
7333 			if (iovec->io_type == RSM_HANDLE_TYPE)
7334 				iovec->local.segid = (rsm_memseg_id_t)
7335 				    iovec32->local;
7336 			else
7337 				iovec->local.vaddr =
7338 				    (caddr_t)(uintptr_t)iovec32->local;
7339 			iovec->local_offset = (size_t)iovec32->local_offset;
7340 			iovec->remote_offset = (size_t)iovec32->remote_offset;
7341 			iovec->transfer_len = (size_t)iovec32->transfer_len;
7342 
7343 		}
7344 		kmem_free(iovec32_base, size);
7345 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7346 		    "iovec_copyin done\n"));
7347 		return (DDI_SUCCESS);
7348 	}
7349 #endif
7350 
7351 	size = count * sizeof (rsmka_iovec_t);
7352 	if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7353 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7354 		    "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7355 		return (RSMERR_BAD_ADDR);
7356 	}
7357 
7358 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7359 
7360 	return (DDI_SUCCESS);
7361 }
7362 
7363 
7364 static int
7365 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7366 {
7367 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7368 
7369 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7370 
7371 #ifdef _MULTI_DATAMODEL
7372 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7373 		rsmka_scat_gath32_t sg_io32;
7374 
7375 		if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7376 		    mode)) {
7377 			DBG_PRINTF((category, RSM_DEBUG,
7378 			    "sgio_copyin done: returning EFAULT\n"));
7379 			return (RSMERR_BAD_ADDR);
7380 		}
7381 		sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7382 		sg_io->io_request_count =  (size_t)sg_io32.io_request_count;
7383 		sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7384 		sg_io->flags = (size_t)sg_io32.flags;
7385 		sg_io->remote_handle = (rsm_memseg_import_handle_t)
7386 		    (uintptr_t)sg_io32.remote_handle;
7387 		sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7388 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7389 		    "sgio_copyin done\n"));
7390 		return (DDI_SUCCESS);
7391 	}
7392 #endif
7393 	if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7394 	    mode)) {
7395 		DBG_PRINTF((category, RSM_DEBUG,
7396 		    "sgio_copyin done: returning EFAULT\n"));
7397 		return (RSMERR_BAD_ADDR);
7398 	}
7399 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7400 	return (DDI_SUCCESS);
7401 }
7402 
7403 static int
7404 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7405 {
7406 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7407 
7408 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7409 	    "sgio_resid_copyout enter\n"));
7410 
7411 #ifdef _MULTI_DATAMODEL
7412 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7413 		rsmka_scat_gath32_t sg_io32;
7414 
7415 		sg_io32.io_residual_count = sg_io->io_residual_count;
7416 		sg_io32.flags = sg_io->flags;
7417 
7418 		if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7419 		    (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7420 		    sizeof (uint32_t), mode)) {
7421 
7422 			DBG_PRINTF((category, RSM_ERR,
7423 			    "sgio_resid_copyout error: rescnt\n"));
7424 			return (RSMERR_BAD_ADDR);
7425 		}
7426 
7427 		if (ddi_copyout((caddr_t)&sg_io32.flags,
7428 		    (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7429 		    sizeof (uint32_t), mode)) {
7430 
7431 			DBG_PRINTF((category, RSM_ERR,
7432 			    "sgio_resid_copyout error: flags\n"));
7433 			return (RSMERR_BAD_ADDR);
7434 		}
7435 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7436 		    "sgio_resid_copyout done\n"));
7437 		return (DDI_SUCCESS);
7438 	}
7439 #endif
7440 	if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7441 	    (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7442 	    sizeof (ulong_t), mode)) {
7443 
7444 		DBG_PRINTF((category, RSM_ERR,
7445 		    "sgio_resid_copyout error:rescnt\n"));
7446 		return (RSMERR_BAD_ADDR);
7447 	}
7448 
7449 	if (ddi_copyout((caddr_t)&sg_io->flags,
7450 	    (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7451 	    sizeof (uint_t), mode)) {
7452 
7453 		DBG_PRINTF((category, RSM_ERR,
7454 		    "sgio_resid_copyout error:flags\n"));
7455 		return (RSMERR_BAD_ADDR);
7456 	}
7457 
7458 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7459 	return (DDI_SUCCESS);
7460 }
7461 
7462 
7463 static int
7464 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7465 {
7466 	rsmka_scat_gath_t	sg_io;
7467 	rsmka_iovec_t		ka_iovec_arr[RSM_MAX_IOVLEN];
7468 	rsmka_iovec_t		*ka_iovec;
7469 	rsmka_iovec_t		*ka_iovec_start;
7470 	rsmpi_scat_gath_t	rsmpi_sg_io;
7471 	rsmpi_iovec_t		iovec_arr[RSM_MAX_IOVLEN];
7472 	rsmpi_iovec_t		*iovec;
7473 	rsmpi_iovec_t		*iovec_start = NULL;
7474 	rsmapi_access_entry_t	*acl;
7475 	rsmresource_t		*res;
7476 	minor_t			rnum;
7477 	rsmseg_t		*im_seg, *ex_seg;
7478 	int			e;
7479 	int			error = 0;
7480 	uint_t			i;
7481 	uint_t			iov_proc = 0; /* num of iovecs processed */
7482 	size_t			size = 0;
7483 	size_t			ka_size;
7484 
7485 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7486 
7487 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7488 
7489 	credp = credp;
7490 
7491 	/*
7492 	 * Copyin the scatter/gather structure  and build new structure
7493 	 * for rsmpi.
7494 	 */
7495 	e = sgio_copyin(arg, &sg_io, mode);
7496 	if (e != DDI_SUCCESS) {
7497 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7498 		    "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7499 		return (e);
7500 	}
7501 
7502 	if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7503 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7504 		    "rsm_iovec_ioctl done: request_count(%d) too large\n",
7505 		    sg_io.io_request_count));
7506 		return (RSMERR_BAD_SGIO);
7507 	}
7508 
7509 	rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7510 	rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7511 	rsmpi_sg_io.io_segflg = 0;
7512 
7513 	/* Allocate memory and copyin io vector array  */
7514 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7515 		ka_size =  sg_io.io_request_count * sizeof (rsmka_iovec_t);
7516 		ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7517 	} else {
7518 		ka_iovec_start = ka_iovec = ka_iovec_arr;
7519 	}
7520 	e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7521 	    sg_io.io_request_count, mode);
7522 	if (e != DDI_SUCCESS) {
7523 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7524 			kmem_free(ka_iovec, ka_size);
7525 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7526 		    "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7527 		return (e);
7528 	}
7529 
7530 	/* get the import segment descriptor */
7531 	rnum = getminor(dev);
7532 	res = rsmresource_lookup(rnum, RSM_LOCK);
7533 
7534 	/*
7535 	 * The following sequence of locking may (or MAY NOT) cause a
7536 	 * deadlock but this is currently not addressed here since the
7537 	 * implementation will be changed to incorporate the use of
7538 	 * reference counting for both the import and the export segments.
7539 	 */
7540 
7541 	/* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7542 
7543 	im_seg = (rsmseg_t *)res;
7544 
7545 	if (im_seg == NULL) {
7546 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7547 			kmem_free(ka_iovec, ka_size);
7548 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7549 		    "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7550 		return (EINVAL);
7551 	}
7552 	/* putv/getv supported is supported only on import segments */
7553 	if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7554 		rsmseglock_release(im_seg);
7555 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7556 			kmem_free(ka_iovec, ka_size);
7557 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7558 		    "rsm_iovec_ioctl done: not an import segment\n"));
7559 		return (EINVAL);
7560 	}
7561 
7562 	/*
7563 	 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7564 	 * as well as wait for a local DR to complete.
7565 	 */
7566 	while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7567 	    (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7568 	    (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7569 		if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7570 			DBG_PRINTF((category, RSM_DEBUG,
7571 			    "rsm_iovec_ioctl done: cv_wait INTR"));
7572 			rsmseglock_release(im_seg);
7573 			return (RSMERR_INTERRUPTED);
7574 		}
7575 	}
7576 
7577 	if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7578 	    (im_seg->s_state != RSM_STATE_ACTIVE)) {
7579 
7580 		ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7581 		    im_seg->s_state == RSM_STATE_NEW);
7582 
7583 		DBG_PRINTF((category, RSM_DEBUG,
7584 		    "rsm_iovec_ioctl done: im_seg not conn/map"));
7585 		rsmseglock_release(im_seg);
7586 		e = RSMERR_BAD_SGIO;
7587 		goto out;
7588 	}
7589 
7590 	im_seg->s_rdmacnt++;
7591 	rsmseglock_release(im_seg);
7592 
7593 	/*
7594 	 * Allocate and set up the io vector for rsmpi
7595 	 */
7596 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7597 		size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7598 		iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7599 	} else {
7600 		iovec_start = iovec = iovec_arr;
7601 	}
7602 
7603 	rsmpi_sg_io.iovec = iovec;
7604 	for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7605 		if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7606 			ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7607 
7608 			if (ex_seg == NULL) {
7609 				e = RSMERR_BAD_SGIO;
7610 				break;
7611 			}
7612 			ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7613 
7614 			acl = ex_seg->s_acl;
7615 			if (acl[0].ae_permission == 0) {
7616 				struct buf *xbuf;
7617 				dev_t sdev = 0;
7618 
7619 				xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7620 				    0, ex_seg->s_len, B_WRITE,
7621 				    sdev, 0, NULL, DDI_UMEM_SLEEP);
7622 
7623 				ASSERT(xbuf != NULL);
7624 
7625 				iovec->local_mem.ms_type = RSM_MEM_BUF;
7626 				iovec->local_mem.ms_memory.bp = xbuf;
7627 			} else {
7628 				iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7629 				iovec->local_mem.ms_memory.handle =
7630 				    ex_seg->s_handle.out;
7631 			}
7632 			ex_seg->s_rdmacnt++; /* refcnt the handle */
7633 			rsmseglock_release(ex_seg);
7634 		} else {
7635 			iovec->local_mem.ms_type = RSM_MEM_VADDR;
7636 			iovec->local_mem.ms_memory.vr.vaddr =
7637 			    ka_iovec->local.vaddr;
7638 		}
7639 
7640 		iovec->local_offset = ka_iovec->local_offset;
7641 		iovec->remote_handle = im_seg->s_handle.in;
7642 		iovec->remote_offset = ka_iovec->remote_offset;
7643 		iovec->transfer_length = ka_iovec->transfer_len;
7644 		iovec++;
7645 		ka_iovec++;
7646 	}
7647 
7648 	if (iov_proc <  sg_io.io_request_count) {
7649 		/* error while processing handle */
7650 		rsmseglock_acquire(im_seg);
7651 		im_seg->s_rdmacnt--;   /* decrement the refcnt for importseg */
7652 		if (im_seg->s_rdmacnt == 0) {
7653 			cv_broadcast(&im_seg->s_cv);
7654 		}
7655 		rsmseglock_release(im_seg);
7656 		goto out;
7657 	}
7658 
7659 	/* call rsmpi */
7660 	if (cmd == RSM_IOCTL_PUTV)
7661 		e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7662 		    im_seg->s_adapter->rsmpi_handle,
7663 		    &rsmpi_sg_io);
7664 	else if (cmd == RSM_IOCTL_GETV)
7665 		e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7666 		    im_seg->s_adapter->rsmpi_handle,
7667 		    &rsmpi_sg_io);
7668 	else {
7669 		e = EINVAL;
7670 		DBG_PRINTF((category, RSM_DEBUG,
7671 		    "iovec_ioctl: bad command = %x\n", cmd));
7672 	}
7673 
7674 
7675 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7676 	    "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7677 
7678 	sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7679 
7680 	/*
7681 	 * Check for implicit signal post flag and do the signal
7682 	 * post if needed
7683 	 */
7684 	if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7685 	    e == RSM_SUCCESS) {
7686 		rsmipc_request_t request;
7687 
7688 		request.rsmipc_key = im_seg->s_segid;
7689 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7690 		request.rsmipc_segment_cookie = NULL;
7691 		e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7692 		/*
7693 		 * Reset the implicit signal post flag to 0 to indicate
7694 		 * that the signal post has been done and need not be
7695 		 * done in the RSMAPI library
7696 		 */
7697 		sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7698 	}
7699 
7700 	rsmseglock_acquire(im_seg);
7701 	im_seg->s_rdmacnt--;
7702 	if (im_seg->s_rdmacnt == 0) {
7703 		cv_broadcast(&im_seg->s_cv);
7704 	}
7705 	rsmseglock_release(im_seg);
7706 	error = sgio_resid_copyout(arg, &sg_io, mode);
7707 out:
7708 	iovec = iovec_start;
7709 	ka_iovec = ka_iovec_start;
7710 	for (i = 0; i < iov_proc; i++) {
7711 		if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7712 			ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7713 
7714 			ASSERT(ex_seg != NULL);
7715 			ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7716 
7717 			ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7718 			if (ex_seg->s_rdmacnt == 0) {
7719 				cv_broadcast(&ex_seg->s_cv);
7720 			}
7721 			rsmseglock_release(ex_seg);
7722 		}
7723 
7724 		ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7725 
7726 		/*
7727 		 * At present there is no dependency on the existence of xbufs
7728 		 * created by ddi_umem_iosetup for each of the iovecs. So we
7729 		 * can these xbufs here.
7730 		 */
7731 		if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7732 			freerbuf(iovec->local_mem.ms_memory.bp);
7733 		}
7734 
7735 		iovec++;
7736 		ka_iovec++;
7737 	}
7738 
7739 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7740 		if (iovec_start)
7741 			kmem_free(iovec_start, size);
7742 		kmem_free(ka_iovec_start, ka_size);
7743 	}
7744 
7745 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7746 	    "rsm_iovec_ioctl done %d\n", e));
7747 	/* if RSMPI call fails return that else return copyout's retval */
7748 	return ((e != RSM_SUCCESS) ? e : error);
7749 
7750 }
7751 
7752 
7753 static int
7754 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7755 {
7756 	adapter_t	*adapter;
7757 	rsm_addr_t	addr;
7758 	rsm_node_id_t	node;
7759 	int		rval = DDI_SUCCESS;
7760 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7761 
7762 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7763 
7764 	adapter =  rsm_getadapter(msg, mode);
7765 	if (adapter == NULL) {
7766 		DBG_PRINTF((category, RSM_DEBUG,
7767 		    "rsmaddr_ioctl done: adapter not found\n"));
7768 		return (RSMERR_CTLR_NOT_PRESENT);
7769 	}
7770 
7771 	switch (cmd) {
7772 	case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7773 		/* returns the hwaddr in msg->hwaddr */
7774 		if (msg->nodeid == my_nodeid) {
7775 			msg->hwaddr = adapter->hwaddr;
7776 		} else {
7777 			addr = get_remote_hwaddr(adapter, msg->nodeid);
7778 			if ((int64_t)addr < 0) {
7779 				rval = RSMERR_INTERNAL_ERROR;
7780 			} else {
7781 				msg->hwaddr = addr;
7782 			}
7783 		}
7784 		break;
7785 	case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7786 		/* returns the nodeid in msg->nodeid */
7787 		if (msg->hwaddr == adapter->hwaddr) {
7788 			msg->nodeid = my_nodeid;
7789 		} else {
7790 			node = get_remote_nodeid(adapter, msg->hwaddr);
7791 			if ((int)node < 0) {
7792 				rval = RSMERR_INTERNAL_ERROR;
7793 			} else {
7794 				msg->nodeid = (rsm_node_id_t)node;
7795 			}
7796 		}
7797 		break;
7798 	default:
7799 		rval = EINVAL;
7800 		break;
7801 	}
7802 
7803 	rsmka_release_adapter(adapter);
7804 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7805 	    "rsmaddr_ioctl done: %d\n", rval));
7806 	return (rval);
7807 }
7808 
7809 static int
7810 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7811 {
7812 	DBG_DEFINE(category,
7813 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7814 
7815 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7816 
7817 #ifdef _MULTI_DATAMODEL
7818 
7819 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7820 		rsm_ioctlmsg32_t msg32;
7821 		int i;
7822 
7823 		if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7824 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7825 			    "rsm_ddi_copyin done: EFAULT\n"));
7826 			return (RSMERR_BAD_ADDR);
7827 		}
7828 		msg->len = msg32.len;
7829 		msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7830 		msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7831 		msg->key = msg32.key;
7832 		msg->acl_len = msg32.acl_len;
7833 		msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7834 		msg->cnum = msg32.cnum;
7835 		msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7836 		msg->cname_len = msg32.cname_len;
7837 		msg->nodeid = msg32.nodeid;
7838 		msg->hwaddr = msg32.hwaddr;
7839 		msg->perm = msg32.perm;
7840 		for (i = 0; i < 4; i++) {
7841 			msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7842 		}
7843 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7844 		    "rsm_ddi_copyin done\n"));
7845 		return (RSM_SUCCESS);
7846 	}
7847 #endif
7848 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7849 	if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7850 		return (RSMERR_BAD_ADDR);
7851 	else
7852 		return (RSM_SUCCESS);
7853 }
7854 
7855 static int
7856 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7857 {
7858 	rsmka_int_controller_attr_t	rsm_cattr;
7859 	DBG_DEFINE(category,
7860 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7861 
7862 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7863 	    "rsmattr_ddi_copyout enter\n"));
7864 	/*
7865 	 * need to copy appropriate data from rsm_controller_attr_t
7866 	 * to rsmka_int_controller_attr_t
7867 	 */
7868 #ifdef	_MULTI_DATAMODEL
7869 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7870 		rsmka_int_controller_attr32_t rsm_cattr32;
7871 
7872 		rsm_cattr32.attr_direct_access_sizes =
7873 		    adapter->rsm_attr.attr_direct_access_sizes;
7874 		rsm_cattr32.attr_atomic_sizes =
7875 		    adapter->rsm_attr.attr_atomic_sizes;
7876 		rsm_cattr32.attr_page_size =
7877 		    adapter->rsm_attr.attr_page_size;
7878 		if (adapter->rsm_attr.attr_max_export_segment_size >
7879 		    UINT_MAX)
7880 			rsm_cattr32.attr_max_export_segment_size =
7881 			    RSM_MAXSZ_PAGE_ALIGNED;
7882 		else
7883 			rsm_cattr32.attr_max_export_segment_size =
7884 			    adapter->rsm_attr.attr_max_export_segment_size;
7885 		if (adapter->rsm_attr.attr_tot_export_segment_size >
7886 		    UINT_MAX)
7887 			rsm_cattr32.attr_tot_export_segment_size =
7888 			    RSM_MAXSZ_PAGE_ALIGNED;
7889 		else
7890 			rsm_cattr32.attr_tot_export_segment_size =
7891 			    adapter->rsm_attr.attr_tot_export_segment_size;
7892 		if (adapter->rsm_attr.attr_max_export_segments >
7893 		    UINT_MAX)
7894 			rsm_cattr32.attr_max_export_segments =
7895 			    UINT_MAX;
7896 		else
7897 			rsm_cattr32.attr_max_export_segments =
7898 			    adapter->rsm_attr.attr_max_export_segments;
7899 		if (adapter->rsm_attr.attr_max_import_map_size >
7900 		    UINT_MAX)
7901 			rsm_cattr32.attr_max_import_map_size =
7902 			    RSM_MAXSZ_PAGE_ALIGNED;
7903 		else
7904 			rsm_cattr32.attr_max_import_map_size =
7905 			    adapter->rsm_attr.attr_max_import_map_size;
7906 		if (adapter->rsm_attr.attr_tot_import_map_size >
7907 		    UINT_MAX)
7908 			rsm_cattr32.attr_tot_import_map_size =
7909 			    RSM_MAXSZ_PAGE_ALIGNED;
7910 		else
7911 			rsm_cattr32.attr_tot_import_map_size =
7912 			    adapter->rsm_attr.attr_tot_import_map_size;
7913 		if (adapter->rsm_attr.attr_max_import_segments >
7914 		    UINT_MAX)
7915 			rsm_cattr32.attr_max_import_segments =
7916 			    UINT_MAX;
7917 		else
7918 			rsm_cattr32.attr_max_import_segments =
7919 			    adapter->rsm_attr.attr_max_import_segments;
7920 		rsm_cattr32.attr_controller_addr =
7921 		    adapter->rsm_attr.attr_controller_addr;
7922 
7923 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7924 		    "rsmattr_ddi_copyout done\n"));
7925 		if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7926 		    sizeof (rsmka_int_controller_attr32_t), mode)) {
7927 			return (RSMERR_BAD_ADDR);
7928 		}
7929 		else
7930 			return (RSM_SUCCESS);
7931 	}
7932 #endif
7933 	rsm_cattr.attr_direct_access_sizes =
7934 	    adapter->rsm_attr.attr_direct_access_sizes;
7935 	rsm_cattr.attr_atomic_sizes =
7936 	    adapter->rsm_attr.attr_atomic_sizes;
7937 	rsm_cattr.attr_page_size =
7938 	    adapter->rsm_attr.attr_page_size;
7939 	rsm_cattr.attr_max_export_segment_size =
7940 	    adapter->rsm_attr.attr_max_export_segment_size;
7941 	rsm_cattr.attr_tot_export_segment_size =
7942 	    adapter->rsm_attr.attr_tot_export_segment_size;
7943 	rsm_cattr.attr_max_export_segments =
7944 	    adapter->rsm_attr.attr_max_export_segments;
7945 	rsm_cattr.attr_max_import_map_size =
7946 	    adapter->rsm_attr.attr_max_import_map_size;
7947 	rsm_cattr.attr_tot_import_map_size =
7948 	    adapter->rsm_attr.attr_tot_import_map_size;
7949 	rsm_cattr.attr_max_import_segments =
7950 	    adapter->rsm_attr.attr_max_import_segments;
7951 	rsm_cattr.attr_controller_addr =
7952 	    adapter->rsm_attr.attr_controller_addr;
7953 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7954 	    "rsmattr_ddi_copyout done\n"));
7955 	if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7956 	    sizeof (rsmka_int_controller_attr_t), mode)) {
7957 		return (RSMERR_BAD_ADDR);
7958 	}
7959 	else
7960 		return (RSM_SUCCESS);
7961 }
7962 
7963 /*ARGSUSED*/
7964 static int
7965 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7966     int *rvalp)
7967 {
7968 	rsmseg_t *seg;
7969 	rsmresource_t	*res;
7970 	minor_t		rnum;
7971 	rsm_ioctlmsg_t msg = {0};
7972 	int error;
7973 	adapter_t *adapter;
7974 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7975 
7976 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7977 
7978 	if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7979 		error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7980 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7981 		    "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7982 		return (error);
7983 	}
7984 
7985 	/* topology cmd does not use the arg common to other cmds */
7986 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7987 		error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7988 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7989 		    "rsm_ioctl done: %d\n", error));
7990 		return (error);
7991 	}
7992 
7993 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7994 		error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7995 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7996 		    "rsm_ioctl done: %d\n", error));
7997 		return (error);
7998 	}
7999 
8000 	/*
8001 	 * try to load arguments
8002 	 */
8003 	if (cmd != RSM_IOCTL_RING_BELL &&
8004 	    rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
8005 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8006 		    "rsm_ioctl done: EFAULT\n"));
8007 		return (RSMERR_BAD_ADDR);
8008 	}
8009 
8010 	if (cmd == RSM_IOCTL_ATTR) {
8011 		adapter =  rsm_getadapter(&msg, mode);
8012 		if (adapter == NULL) {
8013 			DBG_PRINTF((category, RSM_DEBUG,
8014 			    "rsm_ioctl done: ENODEV\n"));
8015 			return (RSMERR_CTLR_NOT_PRESENT);
8016 		}
8017 		error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8018 		rsmka_release_adapter(adapter);
8019 		DBG_PRINTF((category, RSM_DEBUG,
8020 		    "rsm_ioctl:after copyout %d\n", error));
8021 		return (error);
8022 	}
8023 
8024 	if (cmd == RSM_IOCTL_BAR_INFO) {
8025 		/* Return library off,len of barrier page */
8026 		msg.off = barrier_offset;
8027 		msg.len = (int)barrier_size;
8028 #ifdef _MULTI_DATAMODEL
8029 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8030 			rsm_ioctlmsg32_t msg32;
8031 
8032 			if (msg.len > UINT_MAX)
8033 				msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8034 			else
8035 				msg32.len = (int32_t)msg.len;
8036 			msg32.off = (int32_t)msg.off;
8037 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8038 			    "rsm_ioctl done\n"));
8039 			if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8040 			    sizeof (msg32), mode))
8041 				return (RSMERR_BAD_ADDR);
8042 			else
8043 				return (RSM_SUCCESS);
8044 		}
8045 #endif
8046 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8047 		    "rsm_ioctl done\n"));
8048 		if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8049 		    sizeof (msg), mode))
8050 			return (RSMERR_BAD_ADDR);
8051 		else
8052 			return (RSM_SUCCESS);
8053 	}
8054 
8055 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8056 		/* map the nodeid or hwaddr */
8057 		error = rsmaddr_ioctl(cmd, &msg, mode);
8058 		if (error == RSM_SUCCESS) {
8059 #ifdef _MULTI_DATAMODEL
8060 			if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8061 				rsm_ioctlmsg32_t msg32;
8062 
8063 				msg32.hwaddr = (uint64_t)msg.hwaddr;
8064 				msg32.nodeid = (uint32_t)msg.nodeid;
8065 
8066 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8067 				    "rsm_ioctl done\n"));
8068 				if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8069 				    sizeof (msg32), mode))
8070 					return (RSMERR_BAD_ADDR);
8071 				else
8072 					return (RSM_SUCCESS);
8073 			}
8074 #endif
8075 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8076 			    "rsm_ioctl done\n"));
8077 			if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8078 			    sizeof (msg), mode))
8079 				return (RSMERR_BAD_ADDR);
8080 			else
8081 				return (RSM_SUCCESS);
8082 		}
8083 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8084 		    "rsm_ioctl done: %d\n", error));
8085 		return (error);
8086 	}
8087 
8088 	/* Find resource and look it in read mode */
8089 	rnum = getminor(dev);
8090 	res = rsmresource_lookup(rnum, RSM_NOLOCK);
8091 	ASSERT(res != NULL);
8092 
8093 	/*
8094 	 * Find command group
8095 	 */
8096 	switch (RSM_IOCTL_CMDGRP(cmd)) {
8097 	case RSM_IOCTL_EXPORT_SEG:
8098 		/*
8099 		 * Export list is searched during publish, loopback and
8100 		 * remote lookup call.
8101 		 */
8102 		seg = rsmresource_seg(res, rnum, credp,
8103 		    RSM_RESOURCE_EXPORT_SEGMENT);
8104 		if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8105 			error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8106 			    credp);
8107 		} else { /* export ioctl on an import/barrier resource */
8108 			error = RSMERR_BAD_SEG_HNDL;
8109 		}
8110 		break;
8111 	case RSM_IOCTL_IMPORT_SEG:
8112 		/* Import list is searched during remote unmap call. */
8113 		seg = rsmresource_seg(res, rnum, credp,
8114 		    RSM_RESOURCE_IMPORT_SEGMENT);
8115 		if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8116 			error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8117 			    credp);
8118 		} else  { /* import ioctl on an export/barrier resource */
8119 			error = RSMERR_BAD_SEG_HNDL;
8120 		}
8121 		break;
8122 	case RSM_IOCTL_BAR:
8123 		if (res != RSMRC_RESERVED &&
8124 		    res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8125 			error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8126 			    mode);
8127 		} else { /* invalid res value */
8128 			error = RSMERR_BAD_SEG_HNDL;
8129 		}
8130 		break;
8131 	case RSM_IOCTL_BELL:
8132 		if (res != RSMRC_RESERVED) {
8133 			if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8134 				error = exportbell_ioctl((rsmseg_t *)res, cmd);
8135 			else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8136 				error = importbell_ioctl((rsmseg_t *)res, cmd);
8137 			else /* RSM_RESOURCE_BAR */
8138 				error = RSMERR_BAD_SEG_HNDL;
8139 		} else { /* invalid res value */
8140 			error = RSMERR_BAD_SEG_HNDL;
8141 		}
8142 		break;
8143 	default:
8144 		error = EINVAL;
8145 	}
8146 
8147 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8148 	    error));
8149 	return (error);
8150 }
8151 
8152 
8153 /* **************************** Segment Mapping Operations ********* */
8154 static rsm_mapinfo_t *
8155 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8156     size_t *map_len)
8157 {
8158 	rsm_mapinfo_t	*p;
8159 	/*
8160 	 * Find the correct mapinfo structure to use during the mapping
8161 	 * from the seg->s_mapinfo list.
8162 	 * The seg->s_mapinfo list contains in reverse order the mappings
8163 	 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8164 	 * access the correct entry within this list for the mapping
8165 	 * requested.
8166 	 *
8167 	 * The algorithm for selecting a list entry is as follows:
8168 	 *
8169 	 * When start_offset of an entry <= off we have found the entry
8170 	 * we were looking for. Adjust the dev_offset and map_len (needs
8171 	 * to be PAGESIZE aligned).
8172 	 */
8173 	p = seg->s_mapinfo;
8174 	for (; p; p = p->next) {
8175 		if (p->start_offset <= off) {
8176 			*dev_offset = p->dev_offset + off - p->start_offset;
8177 			*map_len = (len > p->individual_len) ?
8178 			    p->individual_len : ptob(btopr(len));
8179 			return (p);
8180 		}
8181 		p = p->next;
8182 	}
8183 
8184 	return (NULL);
8185 }
8186 
8187 static void
8188 rsm_free_mapinfo(rsm_mapinfo_t  *mapinfo)
8189 {
8190 	rsm_mapinfo_t *p;
8191 
8192 	while (mapinfo != NULL) {
8193 		p = mapinfo;
8194 		mapinfo = mapinfo->next;
8195 		kmem_free(p, sizeof (*p));
8196 	}
8197 }
8198 
8199 static int
8200 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8201     size_t len, void **pvtp)
8202 {
8203 	rsmcookie_t	*p;
8204 	rsmresource_t	*res;
8205 	rsmseg_t	*seg;
8206 	minor_t rnum;
8207 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8208 
8209 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8210 
8211 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8212 	    "rsmmap_map: dhp = %x\n", dhp));
8213 
8214 	flags = flags;
8215 
8216 	rnum = getminor(dev);
8217 	res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8218 	ASSERT(res != NULL);
8219 
8220 	seg = (rsmseg_t *)res;
8221 
8222 	rsmseglock_acquire(seg);
8223 
8224 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8225 
8226 	/*
8227 	 * Allocate structure and add cookie to segment list
8228 	 */
8229 	p = kmem_alloc(sizeof (*p), KM_SLEEP);
8230 
8231 	p->c_dhp = dhp;
8232 	p->c_off = off;
8233 	p->c_len = len;
8234 	p->c_next = seg->s_ckl;
8235 	seg->s_ckl = p;
8236 
8237 	*pvtp = (void *)seg;
8238 
8239 	rsmseglock_release(seg);
8240 
8241 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8242 	return (DDI_SUCCESS);
8243 }
8244 
8245 /*
8246  * Page fault handling is done here. The prerequisite mapping setup
8247  * has been done in rsm_devmap with calls to ddi_devmem_setup or
8248  * ddi_umem_setup
8249  */
8250 static int
8251 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8252     uint_t type, uint_t rw)
8253 {
8254 	int e;
8255 	rsmseg_t *seg = (rsmseg_t *)pvt;
8256 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8257 
8258 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8259 
8260 	rsmseglock_acquire(seg);
8261 
8262 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8263 
8264 	while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8265 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8266 			DBG_PRINTF((category, RSM_DEBUG,
8267 			    "rsmmap_access done: cv_wait INTR"));
8268 			rsmseglock_release(seg);
8269 			return (RSMERR_INTERRUPTED);
8270 		}
8271 	}
8272 
8273 	ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8274 	    seg->s_state == RSM_STATE_ACTIVE);
8275 
8276 	if (seg->s_state == RSM_STATE_DISCONNECT)
8277 		seg->s_flags |= RSM_IMPORT_DUMMY;
8278 
8279 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8280 	    "rsmmap_access: dhp = %x\n", dhp));
8281 
8282 	rsmseglock_release(seg);
8283 
8284 	if (e = devmap_load(dhp, offset, len, type, rw)) {
8285 		DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8286 	}
8287 
8288 
8289 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8290 
8291 	return (e);
8292 }
8293 
8294 static int
8295 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8296 	void **newpvt)
8297 {
8298 	rsmseg_t	*seg = (rsmseg_t *)oldpvt;
8299 	rsmcookie_t	*p, *old;
8300 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8301 
8302 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8303 
8304 	/*
8305 	 * Same as map, create an entry to hold cookie and add it to
8306 	 * connect segment list. The oldpvt is a pointer to segment.
8307 	 * Return segment pointer in newpvt.
8308 	 */
8309 	rsmseglock_acquire(seg);
8310 
8311 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8312 
8313 	/*
8314 	 * Find old cookie
8315 	 */
8316 	for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8317 		if (old->c_dhp == dhp) {
8318 			break;
8319 		}
8320 	}
8321 	if (old == NULL) {
8322 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8323 		    "rsmmap_dup done: EINVAL\n"));
8324 		rsmseglock_release(seg);
8325 		return (EINVAL);
8326 	}
8327 
8328 	p = kmem_alloc(sizeof (*p), KM_SLEEP);
8329 
8330 	p->c_dhp = new_dhp;
8331 	p->c_off = old->c_off;
8332 	p->c_len = old->c_len;
8333 	p->c_next = seg->s_ckl;
8334 	seg->s_ckl = p;
8335 
8336 	*newpvt = (void *)seg;
8337 
8338 	rsmseglock_release(seg);
8339 
8340 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8341 
8342 	return (DDI_SUCCESS);
8343 }
8344 
8345 static void
8346 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8347 	devmap_cookie_t new_dhp1, void **pvtp1,
8348 	devmap_cookie_t new_dhp2, void **pvtp2)
8349 {
8350 	/*
8351 	 * Remove pvtp structure from segment list.
8352 	 */
8353 	rsmseg_t	*seg = (rsmseg_t *)pvtp;
8354 	int freeflag;
8355 
8356 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8357 
8358 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8359 
8360 	off = off; len = len;
8361 	pvtp1 = pvtp1; pvtp2 = pvtp2;
8362 
8363 	rsmseglock_acquire(seg);
8364 
8365 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8366 
8367 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8368 	    "rsmmap_unmap: dhp = %x\n", dhp));
8369 	/*
8370 	 * We can go ahead and remove the dhps even if we are in
8371 	 * the MAPPING state because the dhps being removed here
8372 	 * belong to a different mmap and we are holding the segment
8373 	 * lock.
8374 	 */
8375 	if (new_dhp1 == NULL && new_dhp2 == NULL) {
8376 		/* find and remove dhp handle */
8377 		rsmcookie_t *tmp, **back = &seg->s_ckl;
8378 
8379 		while (*back != NULL) {
8380 			tmp = *back;
8381 			if (tmp->c_dhp == dhp) {
8382 				*back = tmp->c_next;
8383 				kmem_free(tmp, sizeof (*tmp));
8384 				break;
8385 			}
8386 			back = &tmp->c_next;
8387 		}
8388 	} else {
8389 		DBG_PRINTF((category, RSM_DEBUG_LVL2,
8390 		    "rsmmap_unmap:parital unmap"
8391 		    "new_dhp1 %lx, new_dhp2 %lx\n",
8392 		    (size_t)new_dhp1, (size_t)new_dhp2));
8393 	}
8394 
8395 	/*
8396 	 * rsmmap_unmap is called for each mapping cookie on the list.
8397 	 * When the list becomes empty and we are not in the MAPPING
8398 	 * state then unmap in the rsmpi driver.
8399 	 */
8400 	if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8401 		(void) rsm_unmap(seg);
8402 
8403 	if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8404 		freeflag = 1;
8405 	} else {
8406 		freeflag = 0;
8407 	}
8408 
8409 	rsmseglock_release(seg);
8410 
8411 	if (freeflag) {
8412 		/* Free the segment structure */
8413 		rsmseg_free(seg);
8414 	}
8415 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8416 
8417 }
8418 
8419 static struct devmap_callback_ctl rsmmap_ops = {
8420 	DEVMAP_OPS_REV,	/* devmap_ops version number	*/
8421 	rsmmap_map,	/* devmap_ops map routine */
8422 	rsmmap_access,	/* devmap_ops access routine */
8423 	rsmmap_dup,		/* devmap_ops dup routine		*/
8424 	rsmmap_unmap,	/* devmap_ops unmap routine */
8425 };
8426 
8427 static int
8428 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8429     size_t *maplen, uint_t model /*ARGSUSED*/)
8430 {
8431 	struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8432 	int		err;
8433 	uint_t		maxprot;
8434 	minor_t		rnum;
8435 	rsmseg_t	*seg;
8436 	off_t		dev_offset;
8437 	size_t		cur_len;
8438 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8439 
8440 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8441 
8442 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8443 	    "rsm_devmap: off = %lx, len = %lx\n", off, len));
8444 	rnum = getminor(dev);
8445 	seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8446 	ASSERT(seg != NULL);
8447 
8448 	if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8449 		if ((off == barrier_offset) &&
8450 		    (len == barrier_size)) {
8451 
8452 			ASSERT(bar_va != NULL && bar_cookie != NULL);
8453 
8454 			/*
8455 			 * The offset argument in devmap_umem_setup represents
8456 			 * the offset within the kernel memory defined by the
8457 			 * cookie. We use this offset as barrier_offset.
8458 			 */
8459 			err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8460 			    barrier_offset, len, PROT_USER|PROT_READ,
8461 			    DEVMAP_DEFAULTS, 0);
8462 
8463 			if (err != 0) {
8464 				DBG_PRINTF((category, RSM_ERR,
8465 				    "rsm_devmap done: %d\n", err));
8466 				return (RSMERR_MAP_FAILED);
8467 			}
8468 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8469 			    "rsm_devmap done: %d\n", err));
8470 
8471 			*maplen = barrier_size;
8472 
8473 			return (err);
8474 		} else {
8475 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8476 			    "rsm_devmap done: %d\n", err));
8477 			return (RSMERR_MAP_FAILED);
8478 		}
8479 	}
8480 
8481 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8482 	ASSERT(seg->s_state == RSM_STATE_MAPPING);
8483 
8484 	/*
8485 	 * Make sure we still have permission for the map operation.
8486 	 */
8487 	maxprot = PROT_USER;
8488 	if (seg->s_mode & RSM_PERM_READ) {
8489 		maxprot |= PROT_READ;
8490 	}
8491 
8492 	if (seg->s_mode & RSM_PERM_WRITE) {
8493 		maxprot |= PROT_WRITE;
8494 	}
8495 
8496 	/*
8497 	 * For each devmap call, rsmmap_map is called. This maintains driver
8498 	 * private information for the mapping. Thus, if there are multiple
8499 	 * devmap calls there will be multiple rsmmap_map calls and for each
8500 	 * call, the mapping information will be stored.
8501 	 * In case of an error during the processing of the devmap call, error
8502 	 * will be returned. This error return causes the caller of rsm_devmap
8503 	 * to undo all the mappings by calling rsmmap_unmap for each one.
8504 	 * rsmmap_unmap will free up the private information for the requested
8505 	 * mapping.
8506 	 */
8507 	if (seg->s_node != my_nodeid) {
8508 		rsm_mapinfo_t *p;
8509 
8510 		p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8511 		if (p == NULL) {
8512 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8513 			    "rsm_devmap: incorrect mapping info\n"));
8514 			return (RSMERR_MAP_FAILED);
8515 		}
8516 		err = devmap_devmem_setup(dhc, p->dip,
8517 		    callbackops, p->dev_register,
8518 		    dev_offset, cur_len, maxprot,
8519 		    DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8520 
8521 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8522 		    "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8523 		    "off=%lx,len=%lx\n",
8524 		    p->dip, p->dev_register, dev_offset, off, cur_len));
8525 
8526 		if (err != 0) {
8527 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8528 			    "rsm_devmap: devmap_devmem_setup failed %d\n",
8529 			    err));
8530 			return (RSMERR_MAP_FAILED);
8531 		}
8532 		/* cur_len is always an integral multiple pagesize */
8533 		ASSERT((cur_len & (PAGESIZE-1)) == 0);
8534 		*maplen = cur_len;
8535 		return (err);
8536 
8537 	} else {
8538 		err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8539 		    seg->s_cookie, off, len, maxprot,
8540 		    DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8541 		if (err != 0) {
8542 			DBG_PRINTF((category, RSM_DEBUG,
8543 			    "rsm_devmap: devmap_umem_setup failed %d\n",
8544 			    err));
8545 			return (RSMERR_MAP_FAILED);
8546 		}
8547 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8548 		    "rsm_devmap: loopback done\n"));
8549 
8550 		*maplen = ptob(btopr(len));
8551 
8552 		return (err);
8553 	}
8554 }
8555 
8556 /*
8557  * We can use the devmap framework for mapping device memory to user space by
8558  * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8559  * processing calls this entry point and devmap_setup is called within this
8560  * function, which eventually calls rsm_devmap
8561  */
8562 static int
8563 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8564     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8565 {
8566 	int			error = 0;
8567 	int			old_state;
8568 	minor_t			rnum;
8569 	rsmseg_t		*seg, *eseg;
8570 	adapter_t		*adapter;
8571 	rsm_import_share_t	*sharedp;
8572 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8573 
8574 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8575 
8576 	/*
8577 	 * find segment
8578 	 */
8579 	rnum = getminor(dev);
8580 	seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8581 
8582 	if (seg == NULL) {
8583 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8584 		    "rsm_segmap done: invalid segment\n"));
8585 		return (EINVAL);
8586 	}
8587 
8588 	/*
8589 	 * the user is trying to map a resource that has not been
8590 	 * defined yet. The library uses this to map in the
8591 	 * barrier page.
8592 	 */
8593 	if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8594 		rsmseglock_release(seg);
8595 
8596 		/*
8597 		 * The mapping for the barrier page is identified
8598 		 * by the special offset barrier_offset
8599 		 */
8600 
8601 		if (off == (off_t)barrier_offset ||
8602 		    len == (off_t)barrier_size) {
8603 			if (bar_cookie == NULL || bar_va == NULL) {
8604 				DBG_PRINTF((category, RSM_DEBUG,
8605 				    "rsm_segmap: bar cookie/va is NULL\n"));
8606 				return (EINVAL);
8607 			}
8608 
8609 			error = devmap_setup(dev, (offset_t)off, as, addrp,
8610 			    (size_t)len, prot, maxprot, flags,  cred);
8611 
8612 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8613 			    "rsm_segmap done: %d\n", error));
8614 			return (error);
8615 		} else {
8616 			DBG_PRINTF((category, RSM_DEBUG,
8617 			    "rsm_segmap: bad offset/length\n"));
8618 			return (EINVAL);
8619 		}
8620 	}
8621 
8622 	/* Make sure you can only map imported segments */
8623 	if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8624 		rsmseglock_release(seg);
8625 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8626 		    "rsm_segmap done: not an import segment\n"));
8627 		return (EINVAL);
8628 	}
8629 	/* check means library is broken */
8630 	ASSERT(seg->s_hdr.rsmrc_num == rnum);
8631 
8632 	/* wait for the segment to become unquiesced */
8633 	while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8634 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8635 			rsmseglock_release(seg);
8636 			DBG_PRINTF((category, RSM_DEBUG,
8637 			    "rsm_segmap done: cv_wait INTR"));
8638 			return (ENODEV);
8639 		}
8640 	}
8641 
8642 	/* wait until segment leaves the mapping state */
8643 	while (seg->s_state == RSM_STATE_MAPPING)
8644 		cv_wait(&seg->s_cv, &seg->s_lock);
8645 
8646 	/*
8647 	 * we allow multiple maps of the same segment in the KA
8648 	 * and it works because we do an rsmpi map of the whole
8649 	 * segment during the first map and all the device mapping
8650 	 * information needed in rsm_devmap is in the mapinfo list.
8651 	 */
8652 	if ((seg->s_state != RSM_STATE_CONNECT) &&
8653 	    (seg->s_state != RSM_STATE_ACTIVE)) {
8654 		rsmseglock_release(seg);
8655 		DBG_PRINTF((category, RSM_DEBUG,
8656 		    "rsm_segmap done: segment not connected\n"));
8657 		return (ENODEV);
8658 	}
8659 
8660 	/*
8661 	 * Make sure we are not mapping a larger segment than what's
8662 	 * exported
8663 	 */
8664 	if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8665 		rsmseglock_release(seg);
8666 		DBG_PRINTF((category, RSM_DEBUG,
8667 		    "rsm_segmap done: off+len>seg size\n"));
8668 		return (ENXIO);
8669 	}
8670 
8671 	/*
8672 	 * Make sure we still have permission for the map operation.
8673 	 */
8674 	maxprot = PROT_USER;
8675 	if (seg->s_mode & RSM_PERM_READ) {
8676 		maxprot |= PROT_READ;
8677 	}
8678 
8679 	if (seg->s_mode & RSM_PERM_WRITE) {
8680 		maxprot |= PROT_WRITE;
8681 	}
8682 
8683 	if ((prot & maxprot) != prot) {
8684 		/* No permission */
8685 		rsmseglock_release(seg);
8686 		DBG_PRINTF((category, RSM_DEBUG,
8687 		    "rsm_segmap done: no permission\n"));
8688 		return (EACCES);
8689 	}
8690 
8691 	old_state = seg->s_state;
8692 
8693 	ASSERT(seg->s_share != NULL);
8694 
8695 	rsmsharelock_acquire(seg);
8696 
8697 	sharedp = seg->s_share;
8698 
8699 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8700 	    "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8701 
8702 	if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8703 	    (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8704 		rsmsharelock_release(seg);
8705 		rsmseglock_release(seg);
8706 		DBG_PRINTF((category, RSM_DEBUG,
8707 		    "rsm_segmap done:RSMSI_STATE %d invalid\n",
8708 		    sharedp->rsmsi_state));
8709 		return (ENODEV);
8710 	}
8711 
8712 	/*
8713 	 * Do the map - since we want importers to share mappings
8714 	 * we do the rsmpi map for the whole segment
8715 	 */
8716 	if (seg->s_node != my_nodeid) {
8717 		uint_t dev_register;
8718 		off_t dev_offset;
8719 		dev_info_t *dip;
8720 		size_t tmp_len;
8721 		size_t total_length_mapped = 0;
8722 		size_t length_to_map = seg->s_len;
8723 		off_t tmp_off = 0;
8724 		rsm_mapinfo_t *p;
8725 
8726 		/*
8727 		 * length_to_map = seg->s_len is always an integral
8728 		 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8729 		 * list is a multiple of PAGESIZE - RSMPI map ensures this
8730 		 */
8731 
8732 		adapter = seg->s_adapter;
8733 		ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8734 		    sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8735 
8736 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8737 			error = 0;
8738 			/* map the whole segment */
8739 			while (total_length_mapped < seg->s_len) {
8740 				tmp_len = 0;
8741 
8742 				error = adapter->rsmpi_ops->rsm_map(
8743 				    seg->s_handle.in, tmp_off,
8744 				    length_to_map, &tmp_len,
8745 				    &dip, &dev_register, &dev_offset,
8746 				    NULL, NULL);
8747 
8748 				if (error != 0)
8749 					break;
8750 
8751 				/*
8752 				 * Store the mapping info obtained from rsm_map
8753 				 */
8754 				p = kmem_alloc(sizeof (*p), KM_SLEEP);
8755 				p->dev_register = dev_register;
8756 				p->dev_offset = dev_offset;
8757 				p->dip = dip;
8758 				p->individual_len = tmp_len;
8759 				p->start_offset = tmp_off;
8760 				p->next = sharedp->rsmsi_mapinfo;
8761 				sharedp->rsmsi_mapinfo = p;
8762 
8763 				total_length_mapped += tmp_len;
8764 				length_to_map -= tmp_len;
8765 				tmp_off += tmp_len;
8766 			}
8767 			seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8768 
8769 			if (error != RSM_SUCCESS) {
8770 				/* Check if this is the the first rsm_map */
8771 				if (sharedp->rsmsi_mapinfo != NULL) {
8772 					/*
8773 					 * A single rsm_unmap undoes
8774 					 * multiple rsm_maps.
8775 					 */
8776 					(void) seg->s_adapter->rsmpi_ops->
8777 					    rsm_unmap(sharedp->rsmsi_handle);
8778 					rsm_free_mapinfo(sharedp->
8779 					    rsmsi_mapinfo);
8780 				}
8781 				sharedp->rsmsi_mapinfo = NULL;
8782 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8783 				rsmsharelock_release(seg);
8784 				rsmseglock_release(seg);
8785 				DBG_PRINTF((category, RSM_DEBUG,
8786 				    "rsm_segmap done: rsmpi map err %d\n",
8787 				    error));
8788 				ASSERT(error != RSMERR_BAD_LENGTH &&
8789 				    error != RSMERR_BAD_MEM_ALIGNMENT &&
8790 				    error != RSMERR_BAD_SEG_HNDL);
8791 				if (error == RSMERR_UNSUPPORTED_OPERATION)
8792 					return (ENOTSUP);
8793 				else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8794 					return (EAGAIN);
8795 				else if (error == RSMERR_CONN_ABORTED)
8796 					return (ENODEV);
8797 				else
8798 					return (error);
8799 			} else {
8800 				sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8801 			}
8802 		} else {
8803 			seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8804 		}
8805 
8806 		sharedp->rsmsi_mapcnt++;
8807 
8808 		rsmsharelock_release(seg);
8809 
8810 		/* move to an intermediate mapping state */
8811 		seg->s_state = RSM_STATE_MAPPING;
8812 		rsmseglock_release(seg);
8813 
8814 		error = devmap_setup(dev, (offset_t)off, as, addrp,
8815 		    len, prot, maxprot, flags, cred);
8816 
8817 		rsmseglock_acquire(seg);
8818 		ASSERT(seg->s_state == RSM_STATE_MAPPING);
8819 
8820 		if (error == DDI_SUCCESS) {
8821 			seg->s_state = RSM_STATE_ACTIVE;
8822 		} else {
8823 			rsmsharelock_acquire(seg);
8824 
8825 			ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8826 
8827 			sharedp->rsmsi_mapcnt--;
8828 			if (sharedp->rsmsi_mapcnt == 0) {
8829 				/* unmap the shared RSMPI mapping */
8830 				ASSERT(sharedp->rsmsi_handle != NULL);
8831 				(void) adapter->rsmpi_ops->
8832 				    rsm_unmap(sharedp->rsmsi_handle);
8833 				rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8834 				sharedp->rsmsi_mapinfo = NULL;
8835 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8836 			}
8837 
8838 			rsmsharelock_release(seg);
8839 			seg->s_state = old_state;
8840 			DBG_PRINTF((category, RSM_ERR,
8841 			    "rsm: devmap_setup failed %d\n", error));
8842 		}
8843 		cv_broadcast(&seg->s_cv);
8844 		rsmseglock_release(seg);
8845 		DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8846 		    error));
8847 		return (error);
8848 	} else {
8849 		/*
8850 		 * For loopback, the export segment mapping cookie (s_cookie)
8851 		 * is also used as the s_cookie value for its import segments
8852 		 * during mapping.
8853 		 * Note that reference counting for s_cookie of the export
8854 		 * segment is not required due to the following:
8855 		 * We never have a case of the export segment being destroyed,
8856 		 * leaving the import segments with a stale value for the
8857 		 * s_cookie field, since a force disconnect is done prior to a
8858 		 * destroy of an export segment. The force disconnect causes
8859 		 * the s_cookie value to be reset to NULL. Also for the
8860 		 * rsm_rebind operation, we change the s_cookie value of the
8861 		 * export segment as well as of all its local (loopback)
8862 		 * importers.
8863 		 */
8864 		DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8865 
8866 		rsmsharelock_release(seg);
8867 		/*
8868 		 * In order to maintain the lock ordering between the export
8869 		 * and import segment locks, we need to acquire the export
8870 		 * segment lock first and only then acquire the import
8871 		 * segment lock.
8872 		 * The above is necessary to avoid any deadlock scenarios
8873 		 * with rsm_rebind which also acquires both the export
8874 		 * and import segment locks in the above mentioned order.
8875 		 * Based on code inspection, there seem to be no other
8876 		 * situations in which both the export and import segment
8877 		 * locks are acquired either in the same or opposite order
8878 		 * as mentioned above.
8879 		 * Thus in order to conform to the above lock order, we
8880 		 * need to change the state of the import segment to
8881 		 * RSM_STATE_MAPPING, release the lock. Once this is done we
8882 		 * can now safely acquire the export segment lock first
8883 		 * followed by the import segment lock which is as per
8884 		 * the lock order mentioned above.
8885 		 */
8886 		/* move to an intermediate mapping state */
8887 		seg->s_state = RSM_STATE_MAPPING;
8888 		rsmseglock_release(seg);
8889 
8890 		eseg = rsmexport_lookup(seg->s_key);
8891 
8892 		if (eseg == NULL) {
8893 			rsmseglock_acquire(seg);
8894 			/*
8895 			 * Revert to old_state and signal any waiters
8896 			 * The shared state is not changed
8897 			 */
8898 
8899 			seg->s_state = old_state;
8900 			cv_broadcast(&seg->s_cv);
8901 			rsmseglock_release(seg);
8902 			DBG_PRINTF((category, RSM_DEBUG,
8903 			    "rsm_segmap done: key %d not found\n", seg->s_key));
8904 			return (ENODEV);
8905 		}
8906 
8907 		rsmsharelock_acquire(seg);
8908 		ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8909 		    sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8910 
8911 		sharedp->rsmsi_mapcnt++;
8912 		sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8913 		rsmsharelock_release(seg);
8914 
8915 		ASSERT(eseg->s_cookie != NULL);
8916 
8917 		/*
8918 		 * It is not required or necessary to acquire the import
8919 		 * segment lock here to change the value of s_cookie since
8920 		 * no one will touch the import segment as long as it is
8921 		 * in the RSM_STATE_MAPPING state.
8922 		 */
8923 		seg->s_cookie = eseg->s_cookie;
8924 
8925 		rsmseglock_release(eseg);
8926 
8927 		error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8928 		    prot, maxprot, flags, cred);
8929 
8930 		rsmseglock_acquire(seg);
8931 		ASSERT(seg->s_state == RSM_STATE_MAPPING);
8932 		if (error == 0) {
8933 			seg->s_state = RSM_STATE_ACTIVE;
8934 		} else {
8935 			rsmsharelock_acquire(seg);
8936 
8937 			ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8938 
8939 			sharedp->rsmsi_mapcnt--;
8940 			if (sharedp->rsmsi_mapcnt == 0) {
8941 				sharedp->rsmsi_mapinfo = NULL;
8942 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8943 			}
8944 			rsmsharelock_release(seg);
8945 			seg->s_state = old_state;
8946 			seg->s_cookie = NULL;
8947 		}
8948 		cv_broadcast(&seg->s_cv);
8949 		rsmseglock_release(seg);
8950 		DBG_PRINTF((category, RSM_DEBUG_LVL2,
8951 		    "rsm_segmap done: %d\n", error));
8952 		return (error);
8953 	}
8954 }
8955 
8956 int
8957 rsmka_null_seg_create(
8958     rsm_controller_handle_t argcp,
8959     rsm_memseg_export_handle_t *handle,
8960     size_t size,
8961     uint_t flags,
8962     rsm_memory_local_t *memory,
8963     rsm_resource_callback_t callback,
8964     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
8965 {
8966 	return (RSM_SUCCESS);
8967 }
8968 
8969 
8970 int
8971 rsmka_null_seg_destroy(
8972     rsm_memseg_export_handle_t argmemseg	/*ARGSUSED*/)
8973 {
8974 	return (RSM_SUCCESS);
8975 }
8976 
8977 
8978 int
8979 rsmka_null_bind(
8980     rsm_memseg_export_handle_t argmemseg,
8981     off_t offset,
8982     rsm_memory_local_t *argmemory,
8983     rsm_resource_callback_t callback,
8984     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
8985 {
8986 	return (RSM_SUCCESS);
8987 }
8988 
8989 
8990 int
8991 rsmka_null_unbind(
8992     rsm_memseg_export_handle_t argmemseg,
8993     off_t offset,
8994     size_t length	/*ARGSUSED*/)
8995 {
8996 	return (DDI_SUCCESS);
8997 }
8998 
8999 int
9000 rsmka_null_rebind(
9001     rsm_memseg_export_handle_t argmemseg,
9002     off_t offset,
9003     rsm_memory_local_t *memory,
9004     rsm_resource_callback_t callback,
9005     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9006 {
9007 	return (RSM_SUCCESS);
9008 }
9009 
9010 int
9011 rsmka_null_publish(
9012     rsm_memseg_export_handle_t argmemseg,
9013     rsm_access_entry_t access_list[],
9014     uint_t access_list_length,
9015     rsm_memseg_id_t segment_id,
9016     rsm_resource_callback_t callback,
9017     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9018 {
9019 	return (RSM_SUCCESS);
9020 }
9021 
9022 
9023 int
9024 rsmka_null_republish(
9025     rsm_memseg_export_handle_t memseg,
9026     rsm_access_entry_t access_list[],
9027     uint_t access_list_length,
9028     rsm_resource_callback_t callback,
9029     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9030 {
9031 	return (RSM_SUCCESS);
9032 }
9033 
9034 int
9035 rsmka_null_unpublish(
9036     rsm_memseg_export_handle_t argmemseg	/*ARGSUSED*/)
9037 {
9038 	return (RSM_SUCCESS);
9039 }
9040 
9041 
9042 void
9043 rsmka_init_loopback()
9044 {
9045 	rsm_ops_t	*ops = &null_rsmpi_ops;
9046 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9047 
9048 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9049 	    "rsmka_init_loopback enter\n"));
9050 
9051 	/* initialize null ops vector */
9052 	ops->rsm_seg_create = rsmka_null_seg_create;
9053 	ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9054 	ops->rsm_bind = rsmka_null_bind;
9055 	ops->rsm_unbind = rsmka_null_unbind;
9056 	ops->rsm_rebind = rsmka_null_rebind;
9057 	ops->rsm_publish = rsmka_null_publish;
9058 	ops->rsm_unpublish = rsmka_null_unpublish;
9059 	ops->rsm_republish = rsmka_null_republish;
9060 
9061 	/* initialize attributes for loopback adapter */
9062 	loopback_attr.attr_name = loopback_str;
9063 	loopback_attr.attr_page_size = 0x8; /* 8K */
9064 
9065 	/* initialize loopback adapter */
9066 	loopback_adapter.rsm_attr = loopback_attr;
9067 	loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9068 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9069 	    "rsmka_init_loopback done\n"));
9070 }
9071 
9072 /* ************** DR functions ********************************** */
9073 static void
9074 rsm_quiesce_exp_seg(rsmresource_t *resp)
9075 {
9076 	int		recheck_state;
9077 	rsmseg_t	*segp = (rsmseg_t *)resp;
9078 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9079 	DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9080 
9081 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9082 	    "%s enter: key=%u\n", function, segp->s_key));
9083 
9084 	rsmseglock_acquire(segp);
9085 	do {
9086 		recheck_state = 0;
9087 		if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9088 		    (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9089 		    (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9090 		    (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9091 			rsmseglock_release(segp);
9092 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9093 			    "%s done:state =%d\n", function,
9094 			    segp->s_state));
9095 			return;
9096 		}
9097 
9098 		if (segp->s_state == RSM_STATE_NEW) {
9099 			segp->s_state = RSM_STATE_NEW_QUIESCED;
9100 			rsmseglock_release(segp);
9101 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9102 			    "%s done:state =%d\n", function,
9103 			    segp->s_state));
9104 			return;
9105 		}
9106 
9107 		if (segp->s_state == RSM_STATE_BIND) {
9108 			/* unbind */
9109 			(void) rsm_unbind_pages(segp);
9110 			segp->s_state = RSM_STATE_BIND_QUIESCED;
9111 			rsmseglock_release(segp);
9112 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9113 			    "%s done:state =%d\n", function,
9114 			    segp->s_state));
9115 			return;
9116 		}
9117 
9118 		if (segp->s_state == RSM_STATE_EXPORT) {
9119 			/*
9120 			 * wait for putv/getv to complete if the segp is
9121 			 * a local memory handle
9122 			 */
9123 			while ((segp->s_state == RSM_STATE_EXPORT) &&
9124 			    (segp->s_rdmacnt != 0)) {
9125 				cv_wait(&segp->s_cv, &segp->s_lock);
9126 			}
9127 
9128 			if (segp->s_state != RSM_STATE_EXPORT) {
9129 				/*
9130 				 * state changed need to see what it
9131 				 * should be changed to.
9132 				 */
9133 				recheck_state = 1;
9134 				continue;
9135 			}
9136 
9137 			segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9138 			rsmseglock_release(segp);
9139 			/*
9140 			 * send SUSPEND messages - currently it will be
9141 			 * done at the end
9142 			 */
9143 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9144 			    "%s done:state =%d\n", function,
9145 			    segp->s_state));
9146 			return;
9147 		}
9148 	} while (recheck_state);
9149 
9150 	rsmseglock_release(segp);
9151 }
9152 
9153 static void
9154 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9155 {
9156 	int			ret;
9157 	rsmseg_t		*segp = (rsmseg_t *)resp;
9158 	rsmapi_access_entry_t	*acl;
9159 	rsm_access_entry_t	*rsmpi_acl;
9160 	int			acl_len;
9161 	int			create_flags = 0;
9162 	struct buf		*xbuf;
9163 	rsm_memory_local_t	mem;
9164 	adapter_t		*adapter;
9165 	dev_t			sdev = 0;
9166 	rsm_resource_callback_t callback_flag;
9167 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9168 	DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9169 
9170 	rsmseglock_acquire(segp);
9171 
9172 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9173 	    "%s enter: key=%u, state=%d\n", function, segp->s_key,
9174 	    segp->s_state));
9175 
9176 	if ((segp->s_state == RSM_STATE_NEW) ||
9177 	    (segp->s_state == RSM_STATE_BIND) ||
9178 	    (segp->s_state == RSM_STATE_EXPORT)) {
9179 		rsmseglock_release(segp);
9180 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9181 		    function, segp->s_state));
9182 		return;
9183 	}
9184 
9185 	if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9186 		segp->s_state = RSM_STATE_NEW;
9187 		cv_broadcast(&segp->s_cv);
9188 		rsmseglock_release(segp);
9189 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9190 		    function, segp->s_state));
9191 		return;
9192 	}
9193 
9194 	if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9195 		/* bind the segment */
9196 		ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9197 		    segp->s_len, segp->s_proc);
9198 		if (ret == RSM_SUCCESS) { /* bind successful */
9199 			segp->s_state = RSM_STATE_BIND;
9200 		} else { /* bind failed - resource unavailable */
9201 			segp->s_state = RSM_STATE_NEW;
9202 		}
9203 		cv_broadcast(&segp->s_cv);
9204 		rsmseglock_release(segp);
9205 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9206 		    "%s done: bind_qscd bind = %d\n", function, ret));
9207 		return;
9208 	}
9209 
9210 	while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9211 		/* wait for the segment to move to EXPORT_QUIESCED state */
9212 		cv_wait(&segp->s_cv, &segp->s_lock);
9213 	}
9214 
9215 	if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9216 		/* bind the segment */
9217 		ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9218 		    segp->s_len, segp->s_proc);
9219 
9220 		if (ret != RSM_SUCCESS) {
9221 			/* bind failed - resource unavailable */
9222 			acl_len = segp->s_acl_len;
9223 			acl = segp->s_acl;
9224 			rsmpi_acl = segp->s_acl_in;
9225 			segp->s_acl_len = 0;
9226 			segp->s_acl = NULL;
9227 			segp->s_acl_in = NULL;
9228 			rsmseglock_release(segp);
9229 
9230 			rsmexport_rm(segp);
9231 			rsmacl_free(acl, acl_len);
9232 			rsmpiacl_free(rsmpi_acl, acl_len);
9233 
9234 			rsmseglock_acquire(segp);
9235 			segp->s_state = RSM_STATE_NEW;
9236 			cv_broadcast(&segp->s_cv);
9237 			rsmseglock_release(segp);
9238 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9239 			    "%s done: exp_qscd bind failed = %d\n",
9240 			    function, ret));
9241 			return;
9242 		}
9243 		/*
9244 		 * publish the segment
9245 		 * if  successful
9246 		 *   segp->s_state = RSM_STATE_EXPORT;
9247 		 * else failed
9248 		 *   segp->s_state = RSM_STATE_BIND;
9249 		 */
9250 
9251 		/* check whether it is a local_memory_handle */
9252 		if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9253 			if ((segp->s_acl[0].ae_node == my_nodeid) &&
9254 			    (segp->s_acl[0].ae_permission == 0)) {
9255 				segp->s_state = RSM_STATE_EXPORT;
9256 				cv_broadcast(&segp->s_cv);
9257 				rsmseglock_release(segp);
9258 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9259 				    "%s done:exp_qscd\n", function));
9260 				return;
9261 			}
9262 		}
9263 		xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9264 		    sdev, 0, NULL, DDI_UMEM_SLEEP);
9265 		ASSERT(xbuf != NULL);
9266 
9267 		mem.ms_type = RSM_MEM_BUF;
9268 		mem.ms_bp = xbuf;
9269 
9270 		adapter = segp->s_adapter;
9271 
9272 		if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9273 			create_flags = RSM_ALLOW_UNBIND_REBIND;
9274 		}
9275 
9276 		if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9277 			callback_flag  = RSM_RESOURCE_DONTWAIT;
9278 		} else {
9279 			callback_flag  = RSM_RESOURCE_SLEEP;
9280 		}
9281 
9282 		ret = adapter->rsmpi_ops->rsm_seg_create(
9283 		    adapter->rsmpi_handle, &segp->s_handle.out,
9284 		    segp->s_len, create_flags, &mem,
9285 		    callback_flag, NULL);
9286 
9287 		if (ret != RSM_SUCCESS) {
9288 			acl_len = segp->s_acl_len;
9289 			acl = segp->s_acl;
9290 			rsmpi_acl = segp->s_acl_in;
9291 			segp->s_acl_len = 0;
9292 			segp->s_acl = NULL;
9293 			segp->s_acl_in = NULL;
9294 			rsmseglock_release(segp);
9295 
9296 			rsmexport_rm(segp);
9297 			rsmacl_free(acl, acl_len);
9298 			rsmpiacl_free(rsmpi_acl, acl_len);
9299 
9300 			rsmseglock_acquire(segp);
9301 			segp->s_state = RSM_STATE_BIND;
9302 			cv_broadcast(&segp->s_cv);
9303 			rsmseglock_release(segp);
9304 			DBG_PRINTF((category, RSM_ERR,
9305 			    "%s done: exp_qscd create failed = %d\n",
9306 			    function, ret));
9307 			return;
9308 		}
9309 
9310 		ret = adapter->rsmpi_ops->rsm_publish(
9311 		    segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9312 		    segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9313 
9314 		if (ret != RSM_SUCCESS) {
9315 			acl_len = segp->s_acl_len;
9316 			acl = segp->s_acl;
9317 			rsmpi_acl = segp->s_acl_in;
9318 			segp->s_acl_len = 0;
9319 			segp->s_acl = NULL;
9320 			segp->s_acl_in = NULL;
9321 			adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9322 			rsmseglock_release(segp);
9323 
9324 			rsmexport_rm(segp);
9325 			rsmacl_free(acl, acl_len);
9326 			rsmpiacl_free(rsmpi_acl, acl_len);
9327 
9328 			rsmseglock_acquire(segp);
9329 			segp->s_state = RSM_STATE_BIND;
9330 			cv_broadcast(&segp->s_cv);
9331 			rsmseglock_release(segp);
9332 			DBG_PRINTF((category, RSM_ERR,
9333 			    "%s done: exp_qscd publish failed = %d\n",
9334 			    function, ret));
9335 			return;
9336 		}
9337 
9338 		segp->s_state = RSM_STATE_EXPORT;
9339 		cv_broadcast(&segp->s_cv);
9340 		rsmseglock_release(segp);
9341 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9342 		    function));
9343 		return;
9344 	}
9345 
9346 	rsmseglock_release(segp);
9347 
9348 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9349 }
9350 
9351 static void
9352 rsm_quiesce_imp_seg(rsmresource_t *resp)
9353 {
9354 	rsmseg_t	*segp = (rsmseg_t *)resp;
9355 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9356 	DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9357 
9358 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9359 	    "%s enter: key=%u\n", function, segp->s_key));
9360 
9361 	rsmseglock_acquire(segp);
9362 	segp->s_flags |= RSM_DR_INPROGRESS;
9363 
9364 	while (segp->s_rdmacnt != 0) {
9365 		/* wait for the RDMA to complete */
9366 		cv_wait(&segp->s_cv, &segp->s_lock);
9367 	}
9368 
9369 	rsmseglock_release(segp);
9370 
9371 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9372 
9373 }
9374 
9375 static void
9376 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9377 {
9378 	rsmseg_t	*segp = (rsmseg_t *)resp;
9379 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9380 	DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9381 
9382 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9383 	    "%s enter: key=%u\n", function, segp->s_key));
9384 
9385 	rsmseglock_acquire(segp);
9386 
9387 	segp->s_flags &= ~RSM_DR_INPROGRESS;
9388 	/* wake up any waiting putv/getv ops */
9389 	cv_broadcast(&segp->s_cv);
9390 
9391 	rsmseglock_release(segp);
9392 
9393 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9394 
9395 
9396 }
9397 
9398 static void
9399 rsm_process_exp_seg(rsmresource_t *resp, int event)
9400 {
9401 	if (event == RSM_DR_QUIESCE)
9402 		rsm_quiesce_exp_seg(resp);
9403 	else /* UNQUIESCE */
9404 		rsm_unquiesce_exp_seg(resp);
9405 }
9406 
9407 static void
9408 rsm_process_imp_seg(rsmresource_t *resp, int event)
9409 {
9410 	if (event == RSM_DR_QUIESCE)
9411 		rsm_quiesce_imp_seg(resp);
9412 	else /* UNQUIESCE */
9413 		rsm_unquiesce_imp_seg(resp);
9414 }
9415 
9416 static void
9417 rsm_dr_process_local_segments(int event)
9418 {
9419 
9420 	int i, j;
9421 	rsmresource_blk_t	*blk;
9422 	rsmresource_t		*p;
9423 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9424 
9425 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9426 	    "rsm_dr_process_local_segments enter\n"));
9427 
9428 	/* iterate through the resource structure */
9429 
9430 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9431 
9432 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9433 		blk = rsm_resource.rsmrc_root[i];
9434 		if (blk != NULL) {
9435 			for (j = 0; j < RSMRC_BLKSZ; j++) {
9436 				p = blk->rsmrcblk_blks[j];
9437 				if ((p != NULL) && (p != RSMRC_RESERVED)) {
9438 					/* valid resource */
9439 					if (p->rsmrc_type ==
9440 					    RSM_RESOURCE_EXPORT_SEGMENT)
9441 						rsm_process_exp_seg(p, event);
9442 					else if (p->rsmrc_type ==
9443 					    RSM_RESOURCE_IMPORT_SEGMENT)
9444 						rsm_process_imp_seg(p, event);
9445 				}
9446 			}
9447 		}
9448 	}
9449 
9450 	rw_exit(&rsm_resource.rsmrc_lock);
9451 
9452 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9453 	    "rsm_dr_process_local_segments done\n"));
9454 }
9455 
9456 /* *************** DR callback functions ************ */
9457 static void
9458 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9459 {
9460 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9461 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9462 	    "rsm_dr_callback_post_add is a no-op\n"));
9463 	/* Noop */
9464 }
9465 
9466 static int
9467 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9468 {
9469 	int	recheck_state = 0;
9470 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9471 
9472 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9473 	    "rsm_dr_callback_pre_del enter\n"));
9474 
9475 	mutex_enter(&rsm_drv_data.drv_lock);
9476 
9477 	do {
9478 		recheck_state = 0;
9479 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9480 		    "rsm_dr_callback_pre_del:state=%d\n",
9481 		    rsm_drv_data.drv_state));
9482 
9483 		switch (rsm_drv_data.drv_state) {
9484 		case RSM_DRV_NEW:
9485 			/*
9486 			 * The state should usually never be RSM_DRV_NEW
9487 			 * since in this state the callbacks have not yet
9488 			 * been registered. So, ASSERT.
9489 			 */
9490 			ASSERT(0);
9491 			return (0);
9492 		case RSM_DRV_REG_PROCESSING:
9493 			/*
9494 			 * The driver is in the process of registering
9495 			 * with the DR framework. So, wait till the
9496 			 * registration process is complete.
9497 			 */
9498 			recheck_state = 1;
9499 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9500 			break;
9501 		case RSM_DRV_UNREG_PROCESSING:
9502 			/*
9503 			 * If the state is RSM_DRV_UNREG_PROCESSING, the
9504 			 * module is in the process of detaching and
9505 			 * unregistering the callbacks from the DR
9506 			 * framework. So, simply return.
9507 			 */
9508 			mutex_exit(&rsm_drv_data.drv_lock);
9509 			DBG_PRINTF((category, RSM_DEBUG,
9510 			    "rsm_dr_callback_pre_del:"
9511 			    "pre-del on NEW/UNREG\n"));
9512 			return (0);
9513 		case RSM_DRV_OK:
9514 			rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9515 			break;
9516 		case RSM_DRV_PREDEL_STARTED:
9517 			/* FALLTHRU */
9518 		case RSM_DRV_PREDEL_COMPLETED:
9519 			/* FALLTHRU */
9520 		case RSM_DRV_POSTDEL_IN_PROGRESS:
9521 			recheck_state = 1;
9522 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9523 			break;
9524 		case RSM_DRV_DR_IN_PROGRESS:
9525 			rsm_drv_data.drv_memdel_cnt++;
9526 			mutex_exit(&rsm_drv_data.drv_lock);
9527 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9528 			    "rsm_dr_callback_pre_del done\n"));
9529 			return (0);
9530 			/* break; */
9531 		default:
9532 			ASSERT(0);
9533 			break;
9534 		}
9535 
9536 	} while (recheck_state);
9537 
9538 	rsm_drv_data.drv_memdel_cnt++;
9539 
9540 	mutex_exit(&rsm_drv_data.drv_lock);
9541 
9542 	/* Do all the quiescing stuff here */
9543 	DBG_PRINTF((category, RSM_DEBUG,
9544 	    "rsm_dr_callback_pre_del: quiesce things now\n"));
9545 
9546 	rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9547 
9548 	/*
9549 	 * now that all local segments have been quiesced lets inform
9550 	 * the importers
9551 	 */
9552 	rsm_send_suspend();
9553 
9554 	/*
9555 	 * In response to the suspend message the remote node(s) will process
9556 	 * the segments and send a suspend_complete message. Till all
9557 	 * the nodes send the suspend_complete message we wait in the
9558 	 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9559 	 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9560 	 */
9561 	mutex_enter(&rsm_drv_data.drv_lock);
9562 
9563 	while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9564 		cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9565 	}
9566 
9567 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9568 
9569 	rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9570 	cv_broadcast(&rsm_drv_data.drv_cv);
9571 
9572 	mutex_exit(&rsm_drv_data.drv_lock);
9573 
9574 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9575 	    "rsm_dr_callback_pre_del done\n"));
9576 
9577 	return (0);
9578 }
9579 
9580 static void
9581 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9582 {
9583 	int	recheck_state = 0;
9584 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9585 
9586 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9587 	    "rsm_dr_callback_post_del enter\n"));
9588 
9589 	mutex_enter(&rsm_drv_data.drv_lock);
9590 
9591 	do {
9592 		recheck_state = 0;
9593 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9594 		    "rsm_dr_callback_post_del:state=%d\n",
9595 		    rsm_drv_data.drv_state));
9596 
9597 		switch (rsm_drv_data.drv_state) {
9598 		case RSM_DRV_NEW:
9599 			/*
9600 			 * The driver state cannot not be RSM_DRV_NEW
9601 			 * since in this state the callbacks have not
9602 			 * yet been registered.
9603 			 */
9604 			ASSERT(0);
9605 			return;
9606 		case RSM_DRV_REG_PROCESSING:
9607 			/*
9608 			 * The driver is in the process of registering with
9609 			 * the DR framework. Wait till the registration is
9610 			 * complete.
9611 			 */
9612 			recheck_state = 1;
9613 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9614 			break;
9615 		case RSM_DRV_UNREG_PROCESSING:
9616 			/*
9617 			 * RSM_DRV_UNREG_PROCESSING state means the module
9618 			 * is detaching and unregistering the callbacks
9619 			 * from the DR framework. So simply return.
9620 			 */
9621 			/* FALLTHRU */
9622 		case RSM_DRV_OK:
9623 			/*
9624 			 * RSM_DRV_OK means we missed the pre-del
9625 			 * corresponding to this post-del coz we had not
9626 			 * registered yet, so simply return.
9627 			 */
9628 			mutex_exit(&rsm_drv_data.drv_lock);
9629 			DBG_PRINTF((category, RSM_DEBUG,
9630 			    "rsm_dr_callback_post_del:"
9631 			    "post-del on OK/UNREG\n"));
9632 			return;
9633 			/* break; */
9634 		case RSM_DRV_PREDEL_STARTED:
9635 			/* FALLTHRU */
9636 		case RSM_DRV_PREDEL_COMPLETED:
9637 			/* FALLTHRU */
9638 		case RSM_DRV_POSTDEL_IN_PROGRESS:
9639 			recheck_state = 1;
9640 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9641 			break;
9642 		case RSM_DRV_DR_IN_PROGRESS:
9643 			rsm_drv_data.drv_memdel_cnt--;
9644 			if (rsm_drv_data.drv_memdel_cnt > 0) {
9645 				mutex_exit(&rsm_drv_data.drv_lock);
9646 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9647 				    "rsm_dr_callback_post_del done:\n"));
9648 				return;
9649 			}
9650 			rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9651 			break;
9652 		default:
9653 			ASSERT(0);
9654 			return;
9655 			/* break; */
9656 		}
9657 	} while (recheck_state);
9658 
9659 	mutex_exit(&rsm_drv_data.drv_lock);
9660 
9661 	/* Do all the unquiescing stuff here */
9662 	DBG_PRINTF((category, RSM_DEBUG,
9663 	    "rsm_dr_callback_post_del: unquiesce things now\n"));
9664 
9665 	rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9666 
9667 	/*
9668 	 * now that all local segments have been unquiesced lets inform
9669 	 * the importers
9670 	 */
9671 	rsm_send_resume();
9672 
9673 	mutex_enter(&rsm_drv_data.drv_lock);
9674 
9675 	rsm_drv_data.drv_state = RSM_DRV_OK;
9676 
9677 	cv_broadcast(&rsm_drv_data.drv_cv);
9678 
9679 	mutex_exit(&rsm_drv_data.drv_lock);
9680 
9681 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9682 	    "rsm_dr_callback_post_del done\n"));
9683 
9684 	return;
9685 
9686 }
9687