xref: /titanic_50/usr/src/uts/common/io/rsm/rsm.c (revision 197c9523b8946cf70fab2bc4ee633b18fc5bde68)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2012 Milan Jurik. All rights reserved.
25  */
26 
27 
28 /*
29  * Overview of the RSM Kernel Agent:
30  * ---------------------------------
31  *
32  * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
33  * kernel agent is a pseudo device driver which makes use of the RSMPI
34  * interface on behalf of the RSMAPI user library.
35  *
36  * The kernel agent functionality can be categorized into the following
37  * components:
38  * 1. Driver Infrastructure
39  * 2. Export/Import Segment Management
40  * 3. Internal resource allocation/deallocation
41  *
42  * The driver infrastructure includes the basic module loading entry points
43  * like _init, _info, _fini to load, unload and report information about
44  * the driver module. The driver infrastructure also includes the
45  * autoconfiguration entry points namely, attach, detach and getinfo for
46  * the device autoconfiguration.
47  *
48  * The kernel agent is a pseudo character device driver and exports
49  * a cb_ops structure which defines the driver entry points for character
50  * device access. This includes the open and close entry points. The
51  * other entry points provided include ioctl, devmap and segmap and chpoll.
52  * read and write entry points are not used since the device is memory
53  * mapped. Also ddi_prop_op is used for the prop_op entry point.
54  *
55  * The ioctl entry point supports a number of commands, which are used by
56  * the RSMAPI library in order to export and import segments. These
57  * commands include commands for binding and rebinding the physical pages
58  * allocated to the virtual address range, publishing the export segment,
59  * unpublishing and republishing an export segment, creating an
60  * import segment and a virtual connection from this import segment to
61  * an export segment, performing scatter-gather data transfer, barrier
62  * operations.
63  *
64  *
65  * Export and Import segments:
66  * ---------------------------
67  *
68  * In order to create an RSM export segment a process allocates a range in its
69  * virtual address space for the segment using standard Solaris interfaces.
70  * The process then calls RSMAPI, which in turn makes an ioctl call to the
71  * RSM kernel agent for an allocation of physical memory pages and for
72  * creation of the export segment by binding these pages to the virtual
73  * address range. These pages are locked in memory so that remote accesses
74  * are always applied to the correct page. Then the RSM segment is published,
75  * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
76  * is assigned to it.
77  *
78  * In order to import a published RSM segment, RSMAPI creates an import
79  * segment and forms a virtual connection across the interconnect to the
80  * export segment, via an ioctl into the kernel agent with the connect
81  * command. The import segment setup is completed by mapping the
82  * local device memory into the importers virtual address space. The
83  * mapping of the import segment is handled by the segmap/devmap
84  * infrastructure described as follows.
85  *
86  * Segmap and Devmap interfaces:
87  *
88  * The RSM kernel agent allows device memory to be directly accessed by user
89  * threads via memory mapping. In order to do so, the RSM kernel agent
90  * supports the devmap and segmap entry points.
91  *
92  * The segmap entry point(rsm_segmap) is responsible for setting up a memory
93  * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
94  * responsible for exporting the device memory to the user applications.
95  * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
96  * control is transfered to the devmap_setup call which calls rsm_devmap.
97  *
98  * rsm_devmap validates the user mapping to the device or kernel memory
99  * and passes the information to the system for setting up the mapping. The
100  * actual setting up of the mapping is done by devmap_devmem_setup(for
101  * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
102  * registered for device context management via the devmap_devmem_setup
103  * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
104  * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
105  * is created, a mapping is freed, a mapping is accessed or an existing
106  * mapping is duplicated respectively. These callbacks allow the RSM kernel
107  * agent to maintain state information associated with the mappings.
108  * The state information is mainly in the form of a cookie list for the import
109  * segment for which mapping has been done.
110  *
111  * Forced disconnect of import segments:
112  *
113  * When an exported segment is unpublished, the exporter sends a forced
114  * disconnect message to all its importers. The importer segments are
115  * unloaded and disconnected. This involves unloading the original
116  * mappings and remapping to a preallocated kernel trash page. This is
117  * done by devmap_umem_remap. The trash/dummy page is a kernel page,
118  * preallocated by the kernel agent during attach using ddi_umem_alloc with
119  * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
120  * due to unloading of the original mappings.
121  *
122  * Additionally every segment has a mapping generation number associated
123  * with it. This is an entry in the barrier generation page, created
124  * during attach time. This mapping generation number for the import
125  * segments is incremented on a force disconnect to notify the application
126  * of the force disconnect. On this notification, the application needs
127  * to reconnect the segment to establish a new legitimate mapping.
128  *
129  *
130  * Locks used in the kernel agent:
131  * -------------------------------
132  *
133  * The kernel agent uses a variety of mutexes and condition variables for
134  * mutual exclusion of the shared data structures and for synchronization
135  * between the various threads. Some of the locks are described as follows.
136  *
137  * Each resource structure, which represents either an export/import segment
138  * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
139  * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
140  * rsmseglock_acquire and rsmseglock_release macros. An additional
141  * lock called the rsmsi_lock is used for the shared import data structure
142  * that is relevant for resources representing import segments. There is
143  * also a condition variable associated with the resource called s_cv. This
144  * is used to wait for events like the segment state change etc.
145  *
146  * The resource structures are allocated from a pool of resource structures,
147  * called rsm_resource. This pool is protected via a reader-writer lock,
148  * called rsmrc_lock.
149  *
150  * There are two separate hash tables, one for the export segments and
151  * one for the import segments. The export segments are inserted into the
152  * export segment hash table only after they have been published and the
153  * import segments are inserted in the import segments list only after they
154  * have successfully connected to an exported segment. These tables are
155  * protected via reader-writer locks.
156  *
157  * Debug Support in the kernel agent:
158  * ----------------------------------
159  *
160  * Debugging support in the kernel agent is provided by the following
161  * macros.
162  *
163  * DBG_PRINTF((category, level, message)) is a macro which logs a debug
164  * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
165  * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
166  * on the definition of the category and level. All messages that belong to
167  * the specified category(rsmdbg_category) and are of an equal or greater
168  * severity than the specified level(rsmdbg_level) are logged. The message
169  * is a string which uses the same formatting rules as the strings used in
170  * printf.
171  *
172  * The category defines which component of the kernel agent has logged this
173  * message. There are a number of categories that have been defined such as
174  * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
175  * DBG_ADDCATEGORY is used to add in another category to the currently
176  * specified category value so that the component using this new category
177  * can also effectively log debug messages. Thus, the category of a specific
178  * message is some combination of the available categories and we can define
179  * sub-categories if we want a finer level of granularity.
180  *
181  * The level defines the severity of the message. Different level values are
182  * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
183  * the least severe(debug level is 0).
184  *
185  * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
186  * variable or a string respectively.
187  *
188  *
189  * NOTES:
190  *
191  * Special Fork and Exec Handling:
192  * -------------------------------
193  *
194  * The backing physical pages of an exported segment are always locked down.
195  * Thus, there are two cases in which a process having exported segments
196  * will cause a cpu to hang: (1) the process invokes exec; (2) a process
197  * forks and invokes exit before the duped file descriptors for the export
198  * segments are closed in the child process. The hang is caused because the
199  * address space release algorithm in Solaris VM subsystem is based on a
200  * non-blocking loop which does not terminate while segments are locked
201  * down. In addition to this, Solaris VM subsystem lacks a callback
202  * mechanism to the rsm kernel agent to allow unlocking these export
203  * segment pages.
204  *
205  * In order to circumvent this problem, the kernel agent does the following.
206  * The Solaris VM subsystem keeps memory segments in increasing order of
207  * virtual addressses. Thus a special page(special_exit_offset) is allocated
208  * by the kernel agent and is mmapped into the heap area of the process address
209  * space(the mmap is done by the RSMAPI library). During the mmap processing
210  * of this special page by the devmap infrastructure, a callback(the same
211  * devmap context management callbacks discussed above) is registered for an
212  * unmap.
213  *
214  * As discussed above, this page is processed by the Solaris address space
215  * release code before any of the exported segments pages(which are allocated
216  * from high memory). It is during this processing that the unmap callback gets
217  * called and this callback is responsible for force destroying the exported
218  * segments and thus eliminating the problem of locked pages.
219  *
220  * Flow-control:
221  * ------------
222  *
223  * A credit based flow control algorithm is used for messages whose
224  * processing cannot be done in the interrupt context because it might
225  * involve invoking rsmpi calls, or might take a long time to complete
226  * or might need to allocate resources. The algorithm operates on a per
227  * path basis. To send a message the pathend needs to have a credit and
228  * it consumes one for every message that is flow controlled. On the
229  * receiving pathend the message is put on a msgbuf_queue and a task is
230  * dispatched on the worker thread - recv_taskq where it is processed.
231  * After processing the message, the receiving pathend dequeues the message,
232  * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
233  * credits to the sender pathend.
234  *
235  * RSM_DRTEST:
236  * -----------
237  *
238  * This is used to enable the DR testing using a test driver on test
239  * platforms which do not supported DR.
240  *
241  */
242 
243 #include <sys/types.h>
244 #include <sys/param.h>
245 #include <sys/user.h>
246 #include <sys/buf.h>
247 #include <sys/systm.h>
248 #include <sys/cred.h>
249 #include <sys/vm.h>
250 #include <sys/uio.h>
251 #include <vm/seg.h>
252 #include <vm/page.h>
253 #include <sys/stat.h>
254 
255 #include <sys/time.h>
256 #include <sys/errno.h>
257 
258 #include <sys/file.h>
259 #include <sys/uio.h>
260 #include <sys/proc.h>
261 #include <sys/mman.h>
262 #include <sys/open.h>
263 #include <sys/atomic.h>
264 #include <sys/mem_config.h>
265 
266 
267 #include <sys/ddi.h>
268 #include <sys/devops.h>
269 #include <sys/ddidevmap.h>
270 #include <sys/sunddi.h>
271 #include <sys/esunddi.h>
272 #include <sys/ddi_impldefs.h>
273 
274 #include <sys/kmem.h>
275 #include <sys/conf.h>
276 #include <sys/devops.h>
277 #include <sys/ddi_impldefs.h>
278 
279 #include <sys/modctl.h>
280 
281 #include <sys/policy.h>
282 #include <sys/types.h>
283 #include <sys/conf.h>
284 #include <sys/param.h>
285 
286 #include <sys/taskq.h>
287 
288 #include <sys/rsm/rsm_common.h>
289 #include <sys/rsm/rsmapi_common.h>
290 #include <sys/rsm/rsm.h>
291 #include <rsm_in.h>
292 #include <sys/rsm/rsmka_path_int.h>
293 #include <sys/rsm/rsmpi.h>
294 
295 #include <sys/modctl.h>
296 #include <sys/debug.h>
297 
298 #include <sys/tuneable.h>
299 
300 #ifdef	RSM_DRTEST
301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
302 		void *arg);
303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
304 		void *arg);
305 #endif
306 
307 extern void dbg_printf(int category, int level, char *fmt, ...);
308 extern void rsmka_pathmanager_init();
309 extern void rsmka_pathmanager_cleanup();
310 extern void rele_sendq_token(sendq_token_t *);
311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
313 extern int rsmka_topology_ioctl(caddr_t, int, int);
314 
315 extern pri_t maxclsyspri;
316 extern work_queue_t work_queue;
317 extern kmutex_t ipc_info_lock;
318 extern kmutex_t ipc_info_cvlock;
319 extern kcondvar_t ipc_info_cv;
320 extern kmutex_t path_hold_cvlock;
321 extern kcondvar_t path_hold_cv;
322 
323 extern kmutex_t rsmka_buf_lock;
324 
325 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
326 extern adapter_t *rsmka_lookup_adapter(char *, int);
327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
328 extern boolean_t rsmka_do_path_active(path_t *, int);
329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
330 extern void rsmka_release_adapter(adapter_t *);
331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
332 extern void rsmka_dequeue_msgbuf(path_t *path);
333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
334 /* lint -w2 */
335 
336 static int rsm_open(dev_t *, int, int, cred_t *);
337 static int rsm_close(dev_t, int, int, cred_t *);
338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
339     cred_t *credp, int *rvalp);
340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
341     uint_t);
342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
343     uint_t, uint_t, cred_t *);
344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
345     struct pollhead **phpp);
346 
347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
350 
351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
355 				rsm_permission_t);
356 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
357 static void rsmacl_free(rsmapi_access_entry_t *, int);
358 static void rsmpiacl_free(rsm_access_entry_t *, int);
359 
360 static int rsm_inc_pgcnt(pgcnt_t);
361 static void rsm_dec_pgcnt(pgcnt_t);
362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
364 					size_t *);
365 static void exporter_quiesce();
366 static void rsmseg_suspend(rsmseg_t *, int *);
367 static void rsmsegshare_suspend(rsmseg_t *);
368 static int rsmseg_resume(rsmseg_t *, void **);
369 static int rsmsegshare_resume(rsmseg_t *);
370 
371 static struct cb_ops rsm_cb_ops = {
372 	rsm_open,		/* open */
373 	rsm_close,		/* close */
374 	nodev,			/* strategy */
375 	nodev,			/* print */
376 	nodev,			/* dump */
377 	nodev,			/* read */
378 	nodev,			/* write */
379 	rsm_ioctl,		/* ioctl */
380 	rsm_devmap,		/* devmap */
381 	NULL,			/* mmap */
382 	rsm_segmap,		/* segmap */
383 	rsm_chpoll,		/* poll */
384 	ddi_prop_op,		/* cb_prop_op */
385 	0,			/* streamtab  */
386 	D_NEW|D_MP|D_DEVMAP,	/* Driver compatibility flag */
387 	0,
388 	0,
389 	0
390 };
391 
392 static struct dev_ops rsm_ops = {
393 	DEVO_REV,		/* devo_rev, */
394 	0,			/* refcnt  */
395 	rsm_info,		/* get_dev_info */
396 	nulldev,		/* identify */
397 	nulldev,		/* probe */
398 	rsm_attach,		/* attach */
399 	rsm_detach,		/* detach */
400 	nodev,			/* reset */
401 	&rsm_cb_ops,		/* driver operations */
402 	(struct bus_ops *)0,	/* bus operations */
403 	0,
404 	ddi_quiesce_not_needed,		/* quiesce */
405 };
406 
407 /*
408  * Module linkage information for the kernel.
409  */
410 
411 static struct modldrv modldrv = {
412 	&mod_driverops, /* Type of module.  This one is a pseudo driver */
413 	"Remote Shared Memory Driver",
414 	&rsm_ops,	/* driver ops */
415 };
416 
417 static struct modlinkage modlinkage = {
418 	MODREV_1,
419 	(void *)&modldrv,
420 	0,
421 	0,
422 	0
423 };
424 
425 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
426 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
427 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
428 
429 static kphysm_setup_vector_t rsm_dr_callback_vec = {
430 	KPHYSM_SETUP_VECTOR_VERSION,
431 	rsm_dr_callback_post_add,
432 	rsm_dr_callback_pre_del,
433 	rsm_dr_callback_post_del
434 };
435 
436 /* This flag can be changed to 0 to help with PIT testing */
437 int rsmka_modunloadok = 1;
438 int no_reply_cnt = 0;
439 
440 uint64_t rsm_ctrlmsg_errcnt = 0;
441 uint64_t rsm_ipcsend_errcnt = 0;
442 
443 #define	MAX_NODES 64
444 
445 static struct rsm_driver_data rsm_drv_data;
446 static struct rsmresource_table rsm_resource;
447 
448 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
449 static void rsmresource_destroy(void);
450 static int rsmresource_alloc(minor_t *);
451 static rsmresource_t *rsmresource_free(minor_t rnum);
452 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
453 static int rsm_unpublish(rsmseg_t *seg, int mode);
454 static int rsm_unbind(rsmseg_t *seg);
455 static uint_t rsmhash(rsm_memseg_id_t key);
456 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
457 static void rsmhash_free(rsmhash_table_t *rhash, int size);
458 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
459 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
460 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
461 					void *cookie);
462 int rsm_disconnect(rsmseg_t *seg);
463 void rsmseg_unload(rsmseg_t *);
464 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
465 
466 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
467     rsm_intr_q_op_t opcode, rsm_addr_t src,
468     void *data, size_t size, rsm_intr_hand_arg_t arg);
469 
470 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
471 
472 rsm_node_id_t my_nodeid;
473 
474 /* cookie, va, offsets and length for the barrier */
475 static rsm_gnum_t		*bar_va;
476 static ddi_umem_cookie_t	bar_cookie;
477 static off_t			barrier_offset;
478 static size_t			barrier_size;
479 static int			max_segs;
480 
481 /* cookie for the trash memory */
482 static ddi_umem_cookie_t	remap_cookie;
483 
484 static rsm_memseg_id_t	rsm_nextavail_segmentid;
485 
486 extern taskq_t *work_taskq;
487 extern char *taskq_name;
488 
489 static dev_info_t *rsm_dip;	/* private copy of devinfo pointer */
490 
491 static rsmhash_table_t rsm_export_segs;		/* list of exported segs */
492 rsmhash_table_t rsm_import_segs;		/* list of imported segs */
493 static rsmhash_table_t rsm_event_queues;	/* list of event queues */
494 
495 static	rsm_ipc_t	rsm_ipc;		/* ipc info */
496 
497 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
498 static list_head_t	rsm_suspend_list;
499 
500 /* list of descriptors for remote importers */
501 static importers_table_t importer_list;
502 
503 kmutex_t rsm_suspend_cvlock;
504 kcondvar_t rsm_suspend_cv;
505 
506 static kmutex_t rsm_lock;
507 
508 adapter_t loopback_adapter;
509 rsm_controller_attr_t loopback_attr;
510 
511 int rsmipc_send_controlmsg(path_t *path, int msgtype);
512 
513 void rsmka_init_loopback();
514 
515 int rsmka_null_seg_create(
516     rsm_controller_handle_t,
517     rsm_memseg_export_handle_t *,
518     size_t,
519     uint_t,
520     rsm_memory_local_t *,
521     rsm_resource_callback_t,
522     rsm_resource_callback_arg_t);
523 
524 int rsmka_null_seg_destroy(
525     rsm_memseg_export_handle_t);
526 
527 int rsmka_null_bind(
528     rsm_memseg_export_handle_t,
529     off_t,
530     rsm_memory_local_t *,
531     rsm_resource_callback_t,
532     rsm_resource_callback_arg_t);
533 
534 int rsmka_null_unbind(
535     rsm_memseg_export_handle_t,
536     off_t,
537     size_t);
538 
539 int rsmka_null_rebind(
540     rsm_memseg_export_handle_t,
541     off_t,
542     rsm_memory_local_t *,
543     rsm_resource_callback_t,
544     rsm_resource_callback_arg_t);
545 
546 int rsmka_null_publish(
547     rsm_memseg_export_handle_t,
548     rsm_access_entry_t [],
549     uint_t,
550     rsm_memseg_id_t,
551     rsm_resource_callback_t,
552     rsm_resource_callback_arg_t);
553 
554 
555 int rsmka_null_republish(
556     rsm_memseg_export_handle_t,
557     rsm_access_entry_t [],
558     uint_t,
559     rsm_resource_callback_t,
560     rsm_resource_callback_arg_t);
561 
562 int rsmka_null_unpublish(
563     rsm_memseg_export_handle_t);
564 
565 rsm_ops_t null_rsmpi_ops;
566 
567 /*
568  * data and locks to keep track of total amount of exported memory
569  */
570 static	pgcnt_t		rsm_pgcnt;
571 static	pgcnt_t		rsm_pgcnt_max;	/* max allowed */
572 static	kmutex_t	rsm_pgcnt_lock;
573 
574 static	int		rsm_enable_dr;
575 
576 static	char		loopback_str[] = "loopback";
577 
578 int		rsm_hash_size;
579 
580 /*
581  * The locking model is as follows:
582  *
583  * Local operations:
584  *		find resource - grab reader lock on resouce list
585  *		insert rc     - grab writer lock
586  *		delete rc     - grab writer lock and resource mutex
587  *		read/write    - no lock
588  *
589  * Remote invocations:
590  *		find resource - grab read lock and resource mutex
591  *
592  * State:
593  *		resource state - grab resource mutex
594  */
595 
596 int
597 _init(void)
598 {
599 	int e;
600 
601 	e = mod_install(&modlinkage);
602 	if (e != 0) {
603 		return (e);
604 	}
605 
606 	mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
607 
608 	mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
609 
610 
611 	rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
612 
613 	rsm_hash_size = RSM_HASHSZ;
614 
615 	rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
616 
617 	rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
618 
619 	mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
620 
621 	mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
622 	cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
623 
624 	mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
625 	cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
626 
627 	mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
628 	cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
629 
630 	rsm_ipc.count = RSMIPC_SZ;
631 	rsm_ipc.wanted = 0;
632 	rsm_ipc.sequence = 0;
633 
634 	(void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
635 
636 	for (e = 0; e < RSMIPC_SZ; e++) {
637 		rsmipc_slot_t *slot = &rsm_ipc.slots[e];
638 
639 		RSMIPC_SET(slot, RSMIPC_FREE);
640 		mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
641 		cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
642 	}
643 
644 	/*
645 	 * Initialize the suspend message list
646 	 */
647 	rsm_suspend_list.list_head = NULL;
648 	mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
649 
650 	/*
651 	 * It is assumed here that configuration data is available
652 	 * during system boot since _init may be called at that time.
653 	 */
654 
655 	rsmka_pathmanager_init();
656 
657 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
658 	    "rsm: _init done\n"));
659 
660 	return (DDI_SUCCESS);
661 
662 }
663 
664 int
665 _info(struct modinfo *modinfop)
666 {
667 
668 	return (mod_info(&modlinkage, modinfop));
669 }
670 
671 int
672 _fini(void)
673 {
674 	int e;
675 
676 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
677 	    "rsm: _fini enter\n"));
678 
679 	/*
680 	 * The rsmka_modunloadok flag is simply used to help with
681 	 * the PIT testing. Make this flag 0 to disallow modunload.
682 	 */
683 	if (rsmka_modunloadok == 0)
684 		return (EBUSY);
685 
686 	/* rsm_detach will be called as a result of mod_remove */
687 	e = mod_remove(&modlinkage);
688 	if (e) {
689 		DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
690 		    "Unable to fini RSM %x\n", e));
691 		return (e);
692 	}
693 
694 	rsmka_pathmanager_cleanup();
695 
696 	rw_destroy(&rsm_resource.rsmrc_lock);
697 
698 	rw_destroy(&rsm_export_segs.rsmhash_rw);
699 	rw_destroy(&rsm_import_segs.rsmhash_rw);
700 	rw_destroy(&rsm_event_queues.rsmhash_rw);
701 
702 	mutex_destroy(&importer_list.lock);
703 
704 	mutex_destroy(&rsm_ipc.lock);
705 	cv_destroy(&rsm_ipc.cv);
706 
707 	(void) mutex_destroy(&rsm_suspend_list.list_lock);
708 
709 	(void) mutex_destroy(&rsm_pgcnt_lock);
710 
711 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
712 
713 	return (DDI_SUCCESS);
714 
715 }
716 
717 /*ARGSUSED1*/
718 static int
719 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
720 {
721 	minor_t	rnum;
722 	int	percent;
723 	int	ret;
724 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
725 
726 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
727 
728 	switch (cmd) {
729 	case DDI_ATTACH:
730 		break;
731 	case DDI_RESUME:
732 	default:
733 		DBG_PRINTF((category, RSM_ERR,
734 		    "rsm:rsm_attach - cmd not supported\n"));
735 		return (DDI_FAILURE);
736 	}
737 
738 	if (rsm_dip != NULL) {
739 		DBG_PRINTF((category, RSM_ERR,
740 		    "rsm:rsm_attach - supports only "
741 		    "one instance\n"));
742 		return (DDI_FAILURE);
743 	}
744 
745 	rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
746 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
747 	    "enable-dynamic-reconfiguration", 1);
748 
749 	mutex_enter(&rsm_drv_data.drv_lock);
750 	rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
751 	mutex_exit(&rsm_drv_data.drv_lock);
752 
753 	if (rsm_enable_dr) {
754 #ifdef	RSM_DRTEST
755 		ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
756 		    (void *)NULL);
757 #else
758 		ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
759 		    (void *)NULL);
760 #endif
761 		if (ret != 0) {
762 			mutex_exit(&rsm_drv_data.drv_lock);
763 			cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
764 			    "reconfiguration setup failed\n");
765 			return (DDI_FAILURE);
766 		}
767 	}
768 
769 	mutex_enter(&rsm_drv_data.drv_lock);
770 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
771 	rsm_drv_data.drv_state = RSM_DRV_OK;
772 	cv_broadcast(&rsm_drv_data.drv_cv);
773 	mutex_exit(&rsm_drv_data.drv_lock);
774 
775 	/*
776 	 * page_list_read_lock();
777 	 * xx_setup();
778 	 * page_list_read_unlock();
779 	 */
780 
781 	rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
782 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
783 	    "segment-hashtable-size", RSM_HASHSZ);
784 	if (rsm_hash_size == 0) {
785 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
786 		    "rsm: segment-hashtable-size in rsm.conf "
787 		    "must be greater than 0, defaulting to 128\n"));
788 		rsm_hash_size = RSM_HASHSZ;
789 	}
790 
791 	DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
792 	    rsm_hash_size));
793 
794 	rsm_pgcnt = 0;
795 
796 	percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
797 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
798 	    "max-exported-memory", 0);
799 	if (percent < 0) {
800 		DBG_PRINTF((category, RSM_ERR,
801 		    "rsm:rsm_attach not enough memory available to "
802 		    "export, or max-exported-memory set incorrectly.\n"));
803 		return (DDI_FAILURE);
804 	}
805 	/* 0 indicates no fixed upper limit. maxmem is the max	*/
806 	/* available pageable physical mem			*/
807 	rsm_pgcnt_max = (percent*maxmem)/100;
808 
809 	if (rsm_pgcnt_max > 0) {
810 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
811 		    "rsm: Available physical memory = %lu pages, "
812 		    "Max exportable memory = %lu pages",
813 		    maxmem, rsm_pgcnt_max));
814 	}
815 
816 	/*
817 	 * Create minor number
818 	 */
819 	if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
820 		DBG_PRINTF((category, RSM_ERR,
821 		    "rsm: rsm_attach - Unable to get "
822 		    "minor number\n"));
823 		return (DDI_FAILURE);
824 	}
825 
826 	ASSERT(rnum == RSM_DRIVER_MINOR);
827 
828 	if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
829 	    rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
830 		DBG_PRINTF((category, RSM_ERR,
831 		    "rsm: rsm_attach - unable to allocate "
832 		    "minor #\n"));
833 		return (DDI_FAILURE);
834 	}
835 
836 	rsm_dip = devi;
837 	/*
838 	 * Allocate the hashtables
839 	 */
840 	rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
841 	rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
842 
843 	importer_list.bucket = (importing_token_t **)
844 	    kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
845 
846 	/*
847 	 * Allocate a resource struct
848 	 */
849 	{
850 		rsmresource_t *p;
851 
852 		p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
853 
854 		mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
855 
856 		rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
857 	}
858 
859 	/*
860 	 * Based on the rsm.conf property max-segments, determine the maximum
861 	 * number of segments that can be exported/imported. This is then used
862 	 * to determine the size for barrier failure pages.
863 	 */
864 
865 	/* First get the max number of segments from the rsm.conf file */
866 	max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
867 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
868 	    "max-segments", 0);
869 	if (max_segs == 0) {
870 		/* Use default number of segments */
871 		max_segs = RSM_MAX_NUM_SEG;
872 	}
873 
874 	/*
875 	 * Based on the max number of segments allowed, determine the barrier
876 	 * page size. add 1 to max_segs since the barrier page itself uses
877 	 * a slot
878 	 */
879 	barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
880 	    PAGESIZE);
881 
882 	/*
883 	 * allocation of the barrier failure page
884 	 */
885 	bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
886 	    DDI_UMEM_SLEEP, &bar_cookie);
887 
888 	/*
889 	 * Set the barrier_offset
890 	 */
891 	barrier_offset = 0;
892 
893 	/*
894 	 * Allocate a trash memory and get a cookie for it. This will be used
895 	 * when remapping segments during force disconnects. Allocate the
896 	 * trash memory with a large size which is page aligned.
897 	 */
898 	(void) ddi_umem_alloc((size_t)TRASHSIZE,
899 	    DDI_UMEM_TRASH, &remap_cookie);
900 
901 	/* initialize user segment id allocation variable */
902 	rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
903 
904 	/*
905 	 * initialize the null_rsmpi_ops vector and the loopback adapter
906 	 */
907 	rsmka_init_loopback();
908 
909 
910 	ddi_report_dev(devi);
911 
912 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
913 
914 	return (DDI_SUCCESS);
915 }
916 
917 /*
918  * The call to mod_remove in the _fine routine will cause the system
919  * to call rsm_detach
920  */
921 /*ARGSUSED*/
922 static int
923 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
924 {
925 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
926 
927 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
928 
929 	switch (cmd) {
930 	case DDI_DETACH:
931 		break;
932 	default:
933 		DBG_PRINTF((category, RSM_ERR,
934 		    "rsm:rsm_detach - cmd %x not supported\n",
935 		    cmd));
936 		return (DDI_FAILURE);
937 	}
938 
939 	mutex_enter(&rsm_drv_data.drv_lock);
940 	while (rsm_drv_data.drv_state != RSM_DRV_OK)
941 		cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
942 	rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
943 	mutex_exit(&rsm_drv_data.drv_lock);
944 
945 	/*
946 	 * Unregister the DR callback functions
947 	 */
948 	if (rsm_enable_dr) {
949 #ifdef	RSM_DRTEST
950 		rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
951 		    (void *)NULL);
952 #else
953 		kphysm_setup_func_unregister(&rsm_dr_callback_vec,
954 		    (void *)NULL);
955 #endif
956 	}
957 
958 	mutex_enter(&rsm_drv_data.drv_lock);
959 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
960 	rsm_drv_data.drv_state = RSM_DRV_NEW;
961 	mutex_exit(&rsm_drv_data.drv_lock);
962 
963 	ASSERT(rsm_suspend_list.list_head == NULL);
964 
965 	/*
966 	 * Release all resources, seglist, controller, ...
967 	 */
968 
969 	/* remove intersend queues */
970 	/* remove registered services */
971 
972 
973 	ddi_remove_minor_node(dip, DRIVER_NAME);
974 	rsm_dip = NULL;
975 
976 	/*
977 	 * Free minor zero resource
978 	 */
979 	{
980 		rsmresource_t *p;
981 
982 		p = rsmresource_free(RSM_DRIVER_MINOR);
983 		if (p) {
984 			mutex_destroy(&p->rsmrc_lock);
985 			kmem_free((void *)p, sizeof (*p));
986 		}
987 	}
988 
989 	/*
990 	 * Free resource table
991 	 */
992 
993 	rsmresource_destroy();
994 
995 	/*
996 	 * Free the hash tables
997 	 */
998 	rsmhash_free(&rsm_export_segs, rsm_hash_size);
999 	rsmhash_free(&rsm_import_segs, rsm_hash_size);
1000 
1001 	kmem_free((void *)importer_list.bucket,
1002 	    rsm_hash_size * sizeof (importing_token_t *));
1003 	importer_list.bucket = NULL;
1004 
1005 
1006 	/* free barrier page */
1007 	if (bar_cookie != NULL) {
1008 		ddi_umem_free(bar_cookie);
1009 	}
1010 	bar_va = NULL;
1011 	bar_cookie = NULL;
1012 
1013 	/*
1014 	 * Free the memory allocated for the trash
1015 	 */
1016 	if (remap_cookie != NULL) {
1017 		ddi_umem_free(remap_cookie);
1018 	}
1019 	remap_cookie = NULL;
1020 
1021 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1022 
1023 	return (DDI_SUCCESS);
1024 }
1025 
1026 /*ARGSUSED*/
1027 static int
1028 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1029 {
1030 	register int error;
1031 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1032 
1033 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1034 
1035 	switch (infocmd) {
1036 	case DDI_INFO_DEVT2DEVINFO:
1037 		if (rsm_dip == NULL)
1038 			error = DDI_FAILURE;
1039 		else {
1040 			*result = (void *)rsm_dip;
1041 			error = DDI_SUCCESS;
1042 		}
1043 		break;
1044 	case DDI_INFO_DEVT2INSTANCE:
1045 		*result = (void *)0;
1046 		error = DDI_SUCCESS;
1047 		break;
1048 	default:
1049 		error = DDI_FAILURE;
1050 	}
1051 
1052 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1053 	return (error);
1054 }
1055 
1056 adapter_t *
1057 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1058 {
1059 	adapter_t *adapter;
1060 	char adapter_devname[MAXNAMELEN];
1061 	int instance;
1062 	DBG_DEFINE(category,
1063 	    RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1064 
1065 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1066 
1067 	instance = msg->cnum;
1068 
1069 	if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1070 		return (NULL);
1071 	}
1072 
1073 	if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1074 		return (NULL);
1075 
1076 	if (strcmp(adapter_devname, "loopback") == 0)
1077 		return (&loopback_adapter);
1078 
1079 	adapter = rsmka_lookup_adapter(adapter_devname, instance);
1080 
1081 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1082 
1083 	return (adapter);
1084 }
1085 
1086 
1087 /*
1088  * *********************** Resource Number Management ********************
1089  * All resources are stored in a simple hash table. The table is an array
1090  * of pointers to resource blks. Each blk contains:
1091  *	base	- base number of this blk
1092  *	used	- number of used slots in this blk.
1093  *	blks    - array of pointers to resource items.
1094  * An entry in a resource blk is empty if it's NULL.
1095  *
1096  * We start with no resource array. Each time we run out of slots, we
1097  * reallocate a new larger array and copy the pointer to the new array and
1098  * a new resource blk is allocated and added to the hash table.
1099  *
1100  * The resource control block contains:
1101  *      root    - array of pointer of resource blks
1102  *      sz      - current size of array.
1103  *      len     - last valid entry in array.
1104  *
1105  * A search operation based on a resource number is as follows:
1106  *      index = rnum / RESOURCE_BLKSZ;
1107  *      ASSERT(index < resource_block.len);
1108  *      ASSERT(index < resource_block.sz);
1109  *	offset = rnum % RESOURCE_BLKSZ;
1110  *      ASSERT(offset >= resource_block.root[index]->base);
1111  *	ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1112  *	return resource_block.root[index]->blks[offset];
1113  *
1114  * A resource blk is freed with its used count reachs zero.
1115  */
1116 static int
1117 rsmresource_alloc(minor_t *rnum)
1118 {
1119 
1120 	/* search for available resource slot */
1121 	int i, j, empty = -1;
1122 	rsmresource_blk_t *blk;
1123 
1124 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1125 	    "rsmresource_alloc enter\n"));
1126 
1127 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1128 
1129 	/* Try to find an empty slot */
1130 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1131 		blk = rsm_resource.rsmrc_root[i];
1132 		if (blk != NULL && blk->rsmrcblk_avail > 0) {
1133 			/* found an empty slot in this blk */
1134 			for (j = 0; j < RSMRC_BLKSZ; j++) {
1135 				if (blk->rsmrcblk_blks[j] == NULL) {
1136 					*rnum = (minor_t)
1137 					    (j + (i * RSMRC_BLKSZ));
1138 					/*
1139 					 * obey gen page limits
1140 					 */
1141 					if (*rnum >= max_segs + 1) {
1142 						if (empty < 0) {
1143 							rw_exit(&rsm_resource.
1144 							    rsmrc_lock);
1145 							DBG_PRINTF((
1146 							    RSM_KERNEL_ALL,
1147 							    RSM_ERR,
1148 							    "rsmresource"
1149 							    "_alloc failed:"
1150 							    "not enough res"
1151 							    "%d\n", *rnum));
1152 					return (RSMERR_INSUFFICIENT_RESOURCES);
1153 						} else {
1154 							/* use empty slot */
1155 							break;
1156 						}
1157 
1158 					}
1159 
1160 					blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1161 					blk->rsmrcblk_avail--;
1162 					rw_exit(&rsm_resource.rsmrc_lock);
1163 					DBG_PRINTF((RSM_KERNEL_ALL,
1164 					    RSM_DEBUG_VERBOSE,
1165 					    "rsmresource_alloc done\n"));
1166 					return (RSM_SUCCESS);
1167 				}
1168 			}
1169 		} else if (blk == NULL && empty < 0) {
1170 			/* remember first empty slot */
1171 			empty = i;
1172 		}
1173 	}
1174 
1175 	/* Couldn't find anything, allocate a new blk */
1176 	/*
1177 	 * Do we need to reallocate the root array
1178 	 */
1179 	if (empty < 0) {
1180 		if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1181 			/*
1182 			 * Allocate new array and copy current stuff into it
1183 			 */
1184 			rsmresource_blk_t	**p;
1185 			uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1186 			    RSMRC_BLKSZ;
1187 			/*
1188 			 * Don't allocate more that max valid rnum
1189 			 */
1190 			if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1191 			    max_segs + 1) {
1192 				rw_exit(&rsm_resource.rsmrc_lock);
1193 				return (RSMERR_INSUFFICIENT_RESOURCES);
1194 			}
1195 
1196 			p = (rsmresource_blk_t **)kmem_zalloc(
1197 			    newsz * sizeof (*p),
1198 			    KM_SLEEP);
1199 
1200 			if (rsm_resource.rsmrc_root) {
1201 				uint_t oldsz;
1202 
1203 				oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1204 				    (int)sizeof (*p));
1205 
1206 				/*
1207 				 * Copy old data into new space and
1208 				 * free old stuff
1209 				 */
1210 				bcopy(rsm_resource.rsmrc_root, p, oldsz);
1211 				kmem_free(rsm_resource.rsmrc_root, oldsz);
1212 			}
1213 
1214 			rsm_resource.rsmrc_root = p;
1215 			rsm_resource.rsmrc_sz = (int)newsz;
1216 		}
1217 
1218 		empty = rsm_resource.rsmrc_len;
1219 		rsm_resource.rsmrc_len++;
1220 	}
1221 
1222 	/*
1223 	 * Allocate a new blk
1224 	 */
1225 	blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1226 	ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1227 	rsm_resource.rsmrc_root[empty] = blk;
1228 	blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1229 
1230 	/*
1231 	 * Allocate slot
1232 	 */
1233 
1234 	*rnum = (minor_t)(empty * RSMRC_BLKSZ);
1235 
1236 	/*
1237 	 * watch out not to exceed bounds of barrier page
1238 	 */
1239 	if (*rnum >= max_segs + 1) {
1240 		rw_exit(&rsm_resource.rsmrc_lock);
1241 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1242 		    "rsmresource_alloc failed %d\n", *rnum));
1243 
1244 		return (RSMERR_INSUFFICIENT_RESOURCES);
1245 	}
1246 	blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1247 
1248 
1249 	rw_exit(&rsm_resource.rsmrc_lock);
1250 
1251 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1252 	    "rsmresource_alloc done\n"));
1253 
1254 	return (RSM_SUCCESS);
1255 }
1256 
1257 static rsmresource_t *
1258 rsmresource_free(minor_t rnum)
1259 {
1260 
1261 	/* search for available resource slot */
1262 	int i, j;
1263 	rsmresource_blk_t *blk;
1264 	rsmresource_t *p;
1265 
1266 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1267 	    "rsmresource_free enter\n"));
1268 
1269 	i = (int)(rnum / RSMRC_BLKSZ);
1270 	j = (int)(rnum % RSMRC_BLKSZ);
1271 
1272 	if (i >= rsm_resource.rsmrc_len) {
1273 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1274 		    "rsmresource_free done\n"));
1275 		return (NULL);
1276 	}
1277 
1278 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1279 
1280 	ASSERT(rsm_resource.rsmrc_root);
1281 	ASSERT(i < rsm_resource.rsmrc_len);
1282 	ASSERT(i < rsm_resource.rsmrc_sz);
1283 	blk = rsm_resource.rsmrc_root[i];
1284 	if (blk == NULL) {
1285 		rw_exit(&rsm_resource.rsmrc_lock);
1286 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1287 		    "rsmresource_free done\n"));
1288 		return (NULL);
1289 	}
1290 
1291 	ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1292 
1293 	p = blk->rsmrcblk_blks[j];
1294 	if (p == RSMRC_RESERVED) {
1295 		p = NULL;
1296 	}
1297 
1298 	blk->rsmrcblk_blks[j] = NULL;
1299 	blk->rsmrcblk_avail++;
1300 	if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1301 		/* free this blk */
1302 		kmem_free(blk, sizeof (*blk));
1303 		rsm_resource.rsmrc_root[i] = NULL;
1304 	}
1305 
1306 	rw_exit(&rsm_resource.rsmrc_lock);
1307 
1308 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1309 	    "rsmresource_free done\n"));
1310 
1311 	return (p);
1312 }
1313 
1314 static rsmresource_t *
1315 rsmresource_lookup(minor_t rnum, int lock)
1316 {
1317 	int i, j;
1318 	rsmresource_blk_t *blk;
1319 	rsmresource_t *p;
1320 
1321 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1322 	    "rsmresource_lookup enter\n"));
1323 
1324 	/* Find resource and lock it in READER mode */
1325 	/* search for available resource slot */
1326 
1327 	i = (int)(rnum / RSMRC_BLKSZ);
1328 	j = (int)(rnum % RSMRC_BLKSZ);
1329 
1330 	if (i >= rsm_resource.rsmrc_len) {
1331 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1332 		    "rsmresource_lookup done\n"));
1333 		return (NULL);
1334 	}
1335 
1336 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1337 
1338 	blk = rsm_resource.rsmrc_root[i];
1339 	if (blk != NULL) {
1340 		ASSERT(i < rsm_resource.rsmrc_len);
1341 		ASSERT(i < rsm_resource.rsmrc_sz);
1342 
1343 		p = blk->rsmrcblk_blks[j];
1344 		if (lock == RSM_LOCK) {
1345 			if (p != RSMRC_RESERVED) {
1346 				mutex_enter(&p->rsmrc_lock);
1347 			} else {
1348 				p = NULL;
1349 			}
1350 		}
1351 	} else {
1352 		p = NULL;
1353 	}
1354 	rw_exit(&rsm_resource.rsmrc_lock);
1355 
1356 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1357 	    "rsmresource_lookup done\n"));
1358 
1359 	return (p);
1360 }
1361 
1362 static void
1363 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1364 {
1365 	/* Find resource and lock it in READER mode */
1366 	/* Caller can upgrade if need be */
1367 	/* search for available resource slot */
1368 	int i, j;
1369 	rsmresource_blk_t *blk;
1370 
1371 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1372 	    "rsmresource_insert enter\n"));
1373 
1374 	i = (int)(rnum / RSMRC_BLKSZ);
1375 	j = (int)(rnum % RSMRC_BLKSZ);
1376 
1377 	p->rsmrc_type = type;
1378 	p->rsmrc_num = rnum;
1379 
1380 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1381 
1382 	ASSERT(rsm_resource.rsmrc_root);
1383 	ASSERT(i < rsm_resource.rsmrc_len);
1384 	ASSERT(i < rsm_resource.rsmrc_sz);
1385 
1386 	blk = rsm_resource.rsmrc_root[i];
1387 	ASSERT(blk);
1388 
1389 	ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1390 
1391 	blk->rsmrcblk_blks[j] = p;
1392 
1393 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1394 	    "rsmresource_insert done\n"));
1395 
1396 	rw_exit(&rsm_resource.rsmrc_lock);
1397 }
1398 
1399 static void
1400 rsmresource_destroy()
1401 {
1402 	int i, j;
1403 
1404 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1405 	    "rsmresource_destroy enter\n"));
1406 
1407 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1408 
1409 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1410 		rsmresource_blk_t	*blk;
1411 
1412 		blk = rsm_resource.rsmrc_root[i];
1413 		if (blk == NULL) {
1414 			continue;
1415 		}
1416 		for (j = 0; j < RSMRC_BLKSZ; j++) {
1417 			if (blk->rsmrcblk_blks[j] != NULL) {
1418 				DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1419 				    "Not null slot %d, %lx\n", j,
1420 				    (size_t)blk->rsmrcblk_blks[j]));
1421 			}
1422 		}
1423 		kmem_free(blk, sizeof (*blk));
1424 		rsm_resource.rsmrc_root[i] = NULL;
1425 	}
1426 	if (rsm_resource.rsmrc_root) {
1427 		i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1428 		kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1429 		rsm_resource.rsmrc_root = NULL;
1430 		rsm_resource.rsmrc_len = 0;
1431 		rsm_resource.rsmrc_sz = 0;
1432 	}
1433 
1434 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1435 	    "rsmresource_destroy done\n"));
1436 
1437 	rw_exit(&rsm_resource.rsmrc_lock);
1438 }
1439 
1440 
1441 /* ******************** Generic Key Hash Table Management ********* */
1442 static rsmresource_t *
1443 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1444     rsm_resource_state_t state)
1445 {
1446 	rsmresource_t	*p;
1447 	uint_t		hashval;
1448 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1449 
1450 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1451 
1452 	hashval = rsmhash(key);
1453 
1454 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1455 	    key, hashval));
1456 
1457 	rw_enter(&rhash->rsmhash_rw, RW_READER);
1458 
1459 	p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1460 
1461 	for (; p; p = p->rsmrc_next) {
1462 		if (p->rsmrc_key == key) {
1463 			/* acquire resource lock */
1464 			RSMRC_LOCK(p);
1465 			break;
1466 		}
1467 	}
1468 
1469 	rw_exit(&rhash->rsmhash_rw);
1470 
1471 	if (p != NULL && p->rsmrc_state != state) {
1472 		/* state changed, release lock and return null */
1473 		RSMRC_UNLOCK(p);
1474 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1475 		    "rsmhash_lookup done: state changed\n"));
1476 		return (NULL);
1477 	}
1478 
1479 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1480 
1481 	return (p);
1482 }
1483 
1484 static void
1485 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1486 {
1487 	rsmresource_t		*p, **back;
1488 	uint_t			hashval;
1489 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1490 
1491 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1492 
1493 	hashval = rsmhash(rcelm->rsmrc_key);
1494 
1495 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1496 	    rcelm->rsmrc_key, hashval));
1497 
1498 	/*
1499 	 * It's ok not to find the segment.
1500 	 */
1501 	rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1502 
1503 	back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1504 
1505 	for (; (p = *back) != NULL;  back = &p->rsmrc_next) {
1506 		if (p == rcelm) {
1507 			*back = rcelm->rsmrc_next;
1508 			break;
1509 		}
1510 	}
1511 
1512 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1513 
1514 	rw_exit(&rhash->rsmhash_rw);
1515 }
1516 
1517 static int
1518 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1519     int dup_check, rsm_resource_state_t state)
1520 {
1521 	rsmresource_t	*p = NULL, **bktp;
1522 	uint_t		hashval;
1523 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1524 
1525 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1526 
1527 	/* lock table */
1528 	rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1529 
1530 	/*
1531 	 * If the current resource state is other than the state passed in
1532 	 * then the resource is (probably) already on the list. eg. for an
1533 	 * import segment if the state is not RSM_STATE_NEW then it's on the
1534 	 * list already.
1535 	 */
1536 	RSMRC_LOCK(new);
1537 	if (new->rsmrc_state != state) {
1538 		RSMRC_UNLOCK(new);
1539 		rw_exit(&rhash->rsmhash_rw);
1540 		return (RSMERR_BAD_SEG_HNDL);
1541 	}
1542 
1543 	hashval = rsmhash(key);
1544 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1545 
1546 	if (dup_check) {
1547 		/*
1548 		 * Used for checking export segments; don't want to have
1549 		 * the same key used for multiple segments.
1550 		 */
1551 
1552 		p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1553 
1554 		for (; p; p = p->rsmrc_next) {
1555 			if (p->rsmrc_key == key) {
1556 				RSMRC_UNLOCK(new);
1557 				break;
1558 			}
1559 		}
1560 	}
1561 
1562 	if (p == NULL) {
1563 		/* Key doesn't exist, add it */
1564 
1565 		bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1566 
1567 		new->rsmrc_key = key;
1568 		new->rsmrc_next = *bktp;
1569 		*bktp = new;
1570 	}
1571 
1572 	rw_exit(&rhash->rsmhash_rw);
1573 
1574 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1575 
1576 	return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1577 }
1578 
1579 /*
1580  * XOR each byte of the key.
1581  */
1582 static uint_t
1583 rsmhash(rsm_memseg_id_t key)
1584 {
1585 	uint_t	hash = key;
1586 
1587 	hash ^=  (key >> 8);
1588 	hash ^=  (key >> 16);
1589 	hash ^=  (key >> 24);
1590 
1591 	return (hash % rsm_hash_size);
1592 
1593 }
1594 
1595 /*
1596  * generic function to get a specific bucket
1597  */
1598 static void *
1599 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1600 {
1601 
1602 	if (rhash->bucket == NULL)
1603 		return (NULL);
1604 	else
1605 		return ((void *)rhash->bucket[hashval]);
1606 }
1607 
1608 /*
1609  * generic function to get a specific bucket's address
1610  */
1611 static void **
1612 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1613 {
1614 	if (rhash->bucket == NULL)
1615 		return (NULL);
1616 	else
1617 		return ((void **)&(rhash->bucket[hashval]));
1618 }
1619 
1620 /*
1621  * generic function to alloc a hash table
1622  */
1623 static void
1624 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1625 {
1626 	rhash->bucket = (rsmresource_t **)
1627 	    kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1628 }
1629 
1630 /*
1631  * generic function to free a hash table
1632  */
1633 static void
1634 rsmhash_free(rsmhash_table_t *rhash, int size)
1635 {
1636 
1637 	kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1638 	rhash->bucket = NULL;
1639 
1640 }
1641 /* *********************** Exported Segment Key Management ************ */
1642 
1643 #define	rsmexport_add(new, key)		\
1644 	rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1645 	    RSM_STATE_BIND)
1646 
1647 #define	rsmexport_rm(arg)	\
1648 	rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1649 
1650 #define	rsmexport_lookup(key)	\
1651 	(rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1652 
1653 /* ************************** Import Segment List Management ********** */
1654 
1655 /*
1656  *  Add segment to import list. This will be useful for paging and loopback
1657  * segment unloading.
1658  */
1659 #define	rsmimport_add(arg, key)	\
1660 	rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1661 	    RSM_STATE_NEW)
1662 
1663 #define	rsmimport_rm(arg)	\
1664 	rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1665 
1666 /*
1667  *	#define	rsmimport_lookup(key)	\
1668  *	(rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1669  */
1670 
1671 /*
1672  * increase the ref count and make the import segment point to the
1673  * shared data structure. Return a pointer to the share data struct
1674  * and the shared data struct is locked upon return
1675  */
1676 static rsm_import_share_t *
1677 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1678     rsmseg_t *segp)
1679 {
1680 	uint_t		hash;
1681 	rsmresource_t		*p;
1682 	rsm_import_share_t	*shdatap;
1683 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1684 
1685 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1686 
1687 	hash = rsmhash(key);
1688 	/* lock table */
1689 	rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1690 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1691 	    key, hash));
1692 
1693 	p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1694 
1695 	for (; p; p = p->rsmrc_next) {
1696 		/*
1697 		 * Look for an entry that is importing the same exporter
1698 		 * with the share data structure allocated.
1699 		 */
1700 		if ((p->rsmrc_key == key) &&
1701 		    (p->rsmrc_node == node) &&
1702 		    (p->rsmrc_adapter == adapter) &&
1703 		    (((rsmseg_t *)p)->s_share != NULL)) {
1704 			shdatap = ((rsmseg_t *)p)->s_share;
1705 			break;
1706 		}
1707 	}
1708 
1709 	if (p == NULL) {
1710 		/* we are the first importer, create the shared data struct */
1711 		shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1712 		shdatap->rsmsi_state = RSMSI_STATE_NEW;
1713 		shdatap->rsmsi_segid = key;
1714 		shdatap->rsmsi_node = node;
1715 		mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1716 		cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1717 	}
1718 
1719 	rsmseglock_acquire(segp);
1720 
1721 	/* we grab the shared lock before returning from this function */
1722 	mutex_enter(&shdatap->rsmsi_lock);
1723 
1724 	shdatap->rsmsi_refcnt++;
1725 	segp->s_share = shdatap;
1726 
1727 	rsmseglock_release(segp);
1728 
1729 	rw_exit(&rsm_import_segs.rsmhash_rw);
1730 
1731 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1732 
1733 	return (shdatap);
1734 }
1735 
1736 /*
1737  * the shared data structure should be locked before calling
1738  * rsmsharecv_signal().
1739  * Change the state and signal any waiting segments.
1740  */
1741 void
1742 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1743 {
1744 	ASSERT(rsmsharelock_held(seg));
1745 
1746 	if (seg->s_share->rsmsi_state == oldstate) {
1747 		seg->s_share->rsmsi_state = newstate;
1748 		cv_broadcast(&seg->s_share->rsmsi_cv);
1749 	}
1750 }
1751 
1752 /*
1753  * Add to the hash table
1754  */
1755 static void
1756 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1757     void *cookie)
1758 {
1759 
1760 	importing_token_t	*head;
1761 	importing_token_t	*new_token;
1762 	int			index;
1763 
1764 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1765 
1766 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1767 
1768 	new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1769 	new_token->importing_node = node;
1770 	new_token->key = key;
1771 	new_token->import_segment_cookie = cookie;
1772 	new_token->importing_adapter_hwaddr = hwaddr;
1773 
1774 	index = rsmhash(key);
1775 
1776 	mutex_enter(&importer_list.lock);
1777 
1778 	head = importer_list.bucket[index];
1779 	importer_list.bucket[index] = new_token;
1780 	new_token->next = head;
1781 	mutex_exit(&importer_list.lock);
1782 
1783 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1784 }
1785 
1786 static void
1787 importer_list_rm(rsm_node_id_t node,  rsm_memseg_id_t key, void *cookie)
1788 {
1789 
1790 	importing_token_t	*prev, *token = NULL;
1791 	int			index;
1792 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1793 
1794 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1795 
1796 	index = rsmhash(key);
1797 
1798 	mutex_enter(&importer_list.lock);
1799 
1800 	token = importer_list.bucket[index];
1801 
1802 	prev = token;
1803 	while (token != NULL) {
1804 		if (token->importing_node == node &&
1805 		    token->import_segment_cookie == cookie) {
1806 			if (prev == token)
1807 				importer_list.bucket[index] = token->next;
1808 			else
1809 				prev->next = token->next;
1810 			kmem_free((void *)token, sizeof (*token));
1811 			break;
1812 		} else {
1813 			prev = token;
1814 			token = token->next;
1815 		}
1816 	}
1817 
1818 	mutex_exit(&importer_list.lock);
1819 
1820 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1821 
1822 
1823 }
1824 
1825 /* **************************Segment Structure Management ************* */
1826 
1827 /*
1828  * Free segment structure
1829  */
1830 static void
1831 rsmseg_free(rsmseg_t *seg)
1832 {
1833 
1834 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1835 
1836 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1837 
1838 	/* need to take seglock here to avoid race with rsmmap_unmap() */
1839 	rsmseglock_acquire(seg);
1840 	if (seg->s_ckl != NULL) {
1841 		/* Segment is still busy */
1842 		seg->s_state = RSM_STATE_END;
1843 		rsmseglock_release(seg);
1844 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1845 		    "rsmseg_free done\n"));
1846 		return;
1847 	}
1848 
1849 	rsmseglock_release(seg);
1850 
1851 	ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1852 
1853 	/*
1854 	 * If it's an importer decrement the refcount
1855 	 * and if its down to zero free the shared data structure.
1856 	 * This is where failures during rsm_connect() are unrefcounted
1857 	 */
1858 	if (seg->s_share != NULL) {
1859 
1860 		ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1861 
1862 		rsmsharelock_acquire(seg);
1863 
1864 		ASSERT(seg->s_share->rsmsi_refcnt > 0);
1865 
1866 		seg->s_share->rsmsi_refcnt--;
1867 
1868 		if (seg->s_share->rsmsi_refcnt == 0) {
1869 			rsmsharelock_release(seg);
1870 			mutex_destroy(&seg->s_share->rsmsi_lock);
1871 			cv_destroy(&seg->s_share->rsmsi_cv);
1872 			kmem_free((void *)(seg->s_share),
1873 			    sizeof (rsm_import_share_t));
1874 		} else {
1875 			rsmsharelock_release(seg);
1876 		}
1877 		/*
1878 		 * The following needs to be done after any
1879 		 * rsmsharelock calls which use seg->s_share.
1880 		 */
1881 		seg->s_share = NULL;
1882 	}
1883 
1884 	cv_destroy(&seg->s_cv);
1885 	mutex_destroy(&seg->s_lock);
1886 	rsmacl_free(seg->s_acl, seg->s_acl_len);
1887 	rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1888 	if (seg->s_adapter)
1889 		rsmka_release_adapter(seg->s_adapter);
1890 
1891 	kmem_free((void *)seg, sizeof (*seg));
1892 
1893 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1894 
1895 }
1896 
1897 
1898 static rsmseg_t *
1899 rsmseg_alloc(minor_t num, struct cred *cred)
1900 {
1901 	rsmseg_t	*new;
1902 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1903 
1904 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1905 	/*
1906 	 * allocate memory for new segment. This should be a segkmem cache.
1907 	 */
1908 	new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1909 
1910 	new->s_state = RSM_STATE_NEW;
1911 	new->s_minor	= num;
1912 	new->s_acl_len	= 0;
1913 	new->s_cookie = NULL;
1914 	new->s_adapter = NULL;
1915 
1916 	new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1917 	/* we don't have a key yet, will set at export/connect */
1918 	new->s_uid  = crgetuid(cred);
1919 	new->s_gid  = crgetgid(cred);
1920 
1921 	mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1922 	cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1923 
1924 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1925 
1926 	return (new);
1927 }
1928 
1929 /* ******************************** Driver Open/Close/Poll *************** */
1930 
1931 /*ARGSUSED1*/
1932 static int
1933 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1934 {
1935 	minor_t rnum;
1936 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1937 
1938 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1939 	/*
1940 	 * Char only
1941 	 */
1942 	if (otyp != OTYP_CHR) {
1943 		DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1944 		return (EINVAL);
1945 	}
1946 
1947 	/*
1948 	 * Only zero can be opened, clones are used for resources.
1949 	 */
1950 	if (getminor(*devp) != RSM_DRIVER_MINOR) {
1951 		DBG_PRINTF((category, RSM_ERR,
1952 		    "rsm_open: bad minor %d\n", getminor(*devp)));
1953 		return (ENODEV);
1954 	}
1955 
1956 	if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1957 		DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1958 		return (EPERM);
1959 	}
1960 
1961 	if (!(flag & FWRITE)) {
1962 		/*
1963 		 * The library function _rsm_librsm_init calls open for
1964 		 * /dev/rsm with flag set to O_RDONLY.  We want a valid
1965 		 * file descriptor to be returned for minor device zero.
1966 		 */
1967 
1968 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1969 		    "rsm_open RDONLY done\n"));
1970 		return (DDI_SUCCESS);
1971 	}
1972 
1973 	/*
1974 	 * - allocate new minor number and segment.
1975 	 * - add segment to list of all segments.
1976 	 * - set minordev data to segment
1977 	 * - update devp argument to new device
1978 	 * - update s_cred to cred; make sure you do crhold(cred);
1979 	 */
1980 
1981 	/* allocate a new resource number */
1982 	if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1983 		/*
1984 		 * We will bind this minor to a specific resource in first
1985 		 * ioctl
1986 		 */
1987 		*devp = makedevice(getmajor(*devp), rnum);
1988 	} else {
1989 		return (EAGAIN);
1990 	}
1991 
1992 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1993 	return (DDI_SUCCESS);
1994 }
1995 
1996 static void
1997 rsmseg_close(rsmseg_t *seg, int force_flag)
1998 {
1999 	int e = RSM_SUCCESS;
2000 
2001 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2002 
2003 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2004 
2005 	rsmseglock_acquire(seg);
2006 	if (!force_flag && (seg->s_hdr.rsmrc_type ==
2007 	    RSM_RESOURCE_EXPORT_SEGMENT)) {
2008 		/*
2009 		 * If we are processing rsm_close wait for force_destroy
2010 		 * processing to complete since force_destroy processing
2011 		 * needs to finish first before we can free the segment.
2012 		 * force_destroy is only for export segments
2013 		 */
2014 		while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2015 			cv_wait(&seg->s_cv, &seg->s_lock);
2016 		}
2017 	}
2018 	rsmseglock_release(seg);
2019 
2020 	/* It's ok to read the state without a lock */
2021 	switch (seg->s_state) {
2022 	case RSM_STATE_EXPORT:
2023 	case RSM_STATE_EXPORT_QUIESCING:
2024 	case RSM_STATE_EXPORT_QUIESCED:
2025 		e = rsm_unpublish(seg, 1);
2026 		/* FALLTHRU */
2027 	case RSM_STATE_BIND_QUIESCED:
2028 		/* FALLTHRU */
2029 	case RSM_STATE_BIND:
2030 		e = rsm_unbind(seg);
2031 		if (e != RSM_SUCCESS && force_flag == 1)
2032 			return;
2033 		ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2034 		/* FALLTHRU */
2035 	case RSM_STATE_NEW_QUIESCED:
2036 		rsmseglock_acquire(seg);
2037 		seg->s_state = RSM_STATE_NEW;
2038 		cv_broadcast(&seg->s_cv);
2039 		rsmseglock_release(seg);
2040 		break;
2041 	case RSM_STATE_NEW:
2042 		break;
2043 	case RSM_STATE_ZOMBIE:
2044 		/*
2045 		 * Segments in this state have been removed off the
2046 		 * exported segments list and have been unpublished
2047 		 * and unbind. These segments have been removed during
2048 		 * a callback to the rsm_export_force_destroy, which
2049 		 * is called for the purpose of unlocking these
2050 		 * exported memory segments when a process exits but
2051 		 * leaves the segments locked down since rsm_close is
2052 		 * is not called for the segments. This can happen
2053 		 * when a process calls fork or exec and then exits.
2054 		 * Once the segments are in the ZOMBIE state, all that
2055 		 * remains is to destroy them when rsm_close is called.
2056 		 * This is done here. Thus, for such segments the
2057 		 * the state is changed to new so that later in this
2058 		 * function rsmseg_free is called.
2059 		 */
2060 		rsmseglock_acquire(seg);
2061 		seg->s_state = RSM_STATE_NEW;
2062 		rsmseglock_release(seg);
2063 		break;
2064 	case RSM_STATE_MAP_QUIESCE:
2065 	case RSM_STATE_ACTIVE:
2066 		/* Disconnect will handle the unmap */
2067 	case RSM_STATE_CONN_QUIESCE:
2068 	case RSM_STATE_CONNECT:
2069 	case RSM_STATE_DISCONNECT:
2070 		ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2071 		(void) rsm_disconnect(seg);
2072 		break;
2073 	case RSM_STATE_MAPPING:
2074 		/*FALLTHRU*/
2075 	case RSM_STATE_END:
2076 		DBG_PRINTF((category, RSM_ERR,
2077 		    "Invalid segment state %d in rsm_close\n", seg->s_state));
2078 		break;
2079 	default:
2080 		DBG_PRINTF((category, RSM_ERR,
2081 		    "Invalid segment state %d in rsm_close\n", seg->s_state));
2082 		break;
2083 	}
2084 
2085 	/*
2086 	 * check state.
2087 	 * - make sure you do crfree(s_cred);
2088 	 * release segment and minor number
2089 	 */
2090 	ASSERT(seg->s_state == RSM_STATE_NEW);
2091 
2092 	/*
2093 	 * The export_force_destroy callback is created to unlock
2094 	 * the exported segments of a process
2095 	 * when the process does a fork or exec and then exits calls this
2096 	 * function with the force flag set to 1 which indicates that the
2097 	 * segment state must be converted to ZOMBIE. This state means that the
2098 	 * segments still exist and have been unlocked and most importantly the
2099 	 * only operation allowed is to destroy them on an rsm_close.
2100 	 */
2101 	if (force_flag) {
2102 		rsmseglock_acquire(seg);
2103 		seg->s_state = RSM_STATE_ZOMBIE;
2104 		rsmseglock_release(seg);
2105 	} else {
2106 		rsmseg_free(seg);
2107 	}
2108 
2109 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2110 }
2111 
2112 static int
2113 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2114 {
2115 	minor_t	rnum = getminor(dev);
2116 	rsmresource_t *res;
2117 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2118 
2119 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2120 
2121 	flag = flag; cred = cred;
2122 
2123 	if (otyp != OTYP_CHR)
2124 		return (EINVAL);
2125 
2126 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2127 
2128 	/*
2129 	 * At this point we are the last reference to the resource.
2130 	 * Free resource number from resource table.
2131 	 * It's ok to remove number before we free the segment.
2132 	 * We need to lock the resource to protect against remote calls.
2133 	 */
2134 	if (rnum == RSM_DRIVER_MINOR ||
2135 	    (res = rsmresource_free(rnum)) == NULL) {
2136 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2137 		return (DDI_SUCCESS);
2138 	}
2139 
2140 	switch (res->rsmrc_type) {
2141 	case RSM_RESOURCE_EXPORT_SEGMENT:
2142 	case RSM_RESOURCE_IMPORT_SEGMENT:
2143 		rsmseg_close((rsmseg_t *)res, 0);
2144 		break;
2145 	case RSM_RESOURCE_BAR:
2146 		DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2147 		break;
2148 	default:
2149 		break;
2150 	}
2151 
2152 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2153 
2154 	return (DDI_SUCCESS);
2155 }
2156 
2157 /*
2158  * rsm_inc_pgcnt
2159  *
2160  * Description: increment rsm page counter.
2161  *
2162  * Parameters:	pgcnt_t	pnum;	number of pages to be used
2163  *
2164  * Returns:	RSM_SUCCESS	if memory limit not exceeded
2165  *		ENOSPC		if memory limit exceeded. In this case, the
2166  *				page counter remains unchanged.
2167  *
2168  */
2169 static int
2170 rsm_inc_pgcnt(pgcnt_t pnum)
2171 {
2172 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2173 	if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2174 		return (RSM_SUCCESS);
2175 	}
2176 
2177 	mutex_enter(&rsm_pgcnt_lock);
2178 
2179 	if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2180 		/* ensure that limits have not been exceeded */
2181 		mutex_exit(&rsm_pgcnt_lock);
2182 		return (RSMERR_INSUFFICIENT_MEM);
2183 	}
2184 
2185 	rsm_pgcnt += pnum;
2186 	DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2187 	    rsm_pgcnt));
2188 	mutex_exit(&rsm_pgcnt_lock);
2189 
2190 	return (RSM_SUCCESS);
2191 }
2192 
2193 /*
2194  * rsm_dec_pgcnt
2195  *
2196  * Description:	decrement rsm page counter.
2197  *
2198  * Parameters:	pgcnt_t	pnum;	number of pages freed
2199  *
2200  */
2201 static void
2202 rsm_dec_pgcnt(pgcnt_t pnum)
2203 {
2204 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2205 
2206 	if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2207 		return;
2208 	}
2209 
2210 	mutex_enter(&rsm_pgcnt_lock);
2211 	ASSERT(rsm_pgcnt >= pnum);
2212 	rsm_pgcnt -= pnum;
2213 	DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2214 	    rsm_pgcnt));
2215 	mutex_exit(&rsm_pgcnt_lock);
2216 }
2217 
2218 static struct umem_callback_ops rsm_as_ops = {
2219 	UMEM_CALLBACK_VERSION, /* version number */
2220 	rsm_export_force_destroy,
2221 };
2222 
2223 static int
2224 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2225     proc_t *procp)
2226 {
2227 	int error = RSM_SUCCESS;
2228 	ulong_t pnum;
2229 	struct umem_callback_ops *callbackops = &rsm_as_ops;
2230 
2231 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2232 
2233 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2234 
2235 	/*
2236 	 * Make sure vaddr and len are aligned on a page boundary
2237 	 */
2238 	if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2239 		return (RSMERR_BAD_ADDR);
2240 	}
2241 
2242 	if (len & (PAGESIZE - 1)) {
2243 		return (RSMERR_BAD_LENGTH);
2244 	}
2245 
2246 	/*
2247 	 * Find number of pages
2248 	 */
2249 	pnum = btopr(len);
2250 	error = rsm_inc_pgcnt(pnum);
2251 	if (error != RSM_SUCCESS) {
2252 		DBG_PRINTF((category, RSM_ERR,
2253 		    "rsm_bind_pages:mem limit exceeded\n"));
2254 		return (RSMERR_INSUFFICIENT_MEM);
2255 	}
2256 
2257 	error = umem_lockmemory(vaddr, len,
2258 	    DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2259 	    cookie,
2260 	    callbackops, procp);
2261 
2262 	if (error) {
2263 		rsm_dec_pgcnt(pnum);
2264 		DBG_PRINTF((category, RSM_ERR,
2265 		    "rsm_bind_pages:ddi_umem_lock failed\n"));
2266 		/*
2267 		 * ddi_umem_lock, in the case of failure, returns one of
2268 		 * the following three errors. These are translated into
2269 		 * the RSMERR namespace and returned.
2270 		 */
2271 		if (error == EFAULT)
2272 			return (RSMERR_BAD_ADDR);
2273 		else if (error == EACCES)
2274 			return (RSMERR_PERM_DENIED);
2275 		else
2276 			return (RSMERR_INSUFFICIENT_MEM);
2277 	}
2278 
2279 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2280 
2281 	return (error);
2282 
2283 }
2284 
2285 static int
2286 rsm_unbind_pages(rsmseg_t *seg)
2287 {
2288 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2289 
2290 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2291 
2292 	ASSERT(rsmseglock_held(seg));
2293 
2294 	if (seg->s_cookie != NULL) {
2295 		/* unlock address range */
2296 		ddi_umem_unlock(seg->s_cookie);
2297 		rsm_dec_pgcnt(btopr(seg->s_len));
2298 		seg->s_cookie = NULL;
2299 	}
2300 
2301 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2302 
2303 	return (RSM_SUCCESS);
2304 }
2305 
2306 
2307 static int
2308 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2309 {
2310 	int e;
2311 	adapter_t *adapter;
2312 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2313 
2314 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2315 
2316 	adapter = rsm_getadapter(msg, mode);
2317 	if (adapter == NULL) {
2318 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2319 		    "rsm_bind done:no adapter\n"));
2320 		return (RSMERR_CTLR_NOT_PRESENT);
2321 	}
2322 
2323 	/* lock address range */
2324 	if (msg->vaddr == NULL) {
2325 		rsmka_release_adapter(adapter);
2326 		DBG_PRINTF((category, RSM_ERR,
2327 		    "rsm: rsm_bind done: invalid vaddr\n"));
2328 		return (RSMERR_BAD_ADDR);
2329 	}
2330 	if (msg->len <= 0) {
2331 		rsmka_release_adapter(adapter);
2332 		DBG_PRINTF((category, RSM_ERR,
2333 		    "rsm_bind: invalid length\n"));
2334 		return (RSMERR_BAD_LENGTH);
2335 	}
2336 
2337 	/* Lock segment */
2338 	rsmseglock_acquire(seg);
2339 
2340 	while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2341 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2342 			DBG_PRINTF((category, RSM_DEBUG,
2343 			    "rsm_bind done: cv_wait INTERRUPTED"));
2344 			rsmka_release_adapter(adapter);
2345 			rsmseglock_release(seg);
2346 			return (RSMERR_INTERRUPTED);
2347 		}
2348 	}
2349 
2350 	ASSERT(seg->s_state == RSM_STATE_NEW);
2351 
2352 	ASSERT(seg->s_cookie == NULL);
2353 
2354 	e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2355 	if (e == RSM_SUCCESS) {
2356 		seg->s_flags |= RSM_USER_MEMORY;
2357 		if (msg->perm & RSM_ALLOW_REBIND) {
2358 			seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2359 		}
2360 		if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2361 			seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2362 		}
2363 		seg->s_region.r_vaddr = msg->vaddr;
2364 		/*
2365 		 * Set the s_pid value in the segment structure. This is used
2366 		 * to identify exported segments belonging to a particular
2367 		 * process so that when the process exits, these segments can
2368 		 * be unlocked forcefully even if rsm_close is not called on
2369 		 * process exit since there maybe other processes referencing
2370 		 * them (for example on a fork or exec).
2371 		 * The s_pid value is also used to authenticate the process
2372 		 * doing a publish or unpublish on the export segment. Only
2373 		 * the creator of the export segment has a right to do a
2374 		 * publish or unpublish and unbind on the segment.
2375 		 */
2376 		seg->s_pid = ddi_get_pid();
2377 		seg->s_len = msg->len;
2378 		seg->s_state = RSM_STATE_BIND;
2379 		seg->s_adapter = adapter;
2380 		seg->s_proc = curproc;
2381 	} else {
2382 		rsmka_release_adapter(adapter);
2383 		DBG_PRINTF((category, RSM_WARNING,
2384 		    "unable to lock down pages\n"));
2385 	}
2386 
2387 	msg->rnum = seg->s_minor;
2388 	/* Unlock segment */
2389 	rsmseglock_release(seg);
2390 
2391 	if (e == RSM_SUCCESS) {
2392 		/* copyout the resource number */
2393 #ifdef _MULTI_DATAMODEL
2394 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2395 			rsm_ioctlmsg32_t msg32;
2396 
2397 			msg32.rnum = msg->rnum;
2398 			if (ddi_copyout((caddr_t)&msg32.rnum,
2399 			    (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2400 			    sizeof (minor_t), mode)) {
2401 				rsmka_release_adapter(adapter);
2402 				e = RSMERR_BAD_ADDR;
2403 			}
2404 		}
2405 #endif
2406 		if (ddi_copyout((caddr_t)&msg->rnum,
2407 		    (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2408 		    sizeof (minor_t), mode)) {
2409 			rsmka_release_adapter(adapter);
2410 			e = RSMERR_BAD_ADDR;
2411 		}
2412 	}
2413 
2414 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2415 
2416 	return (e);
2417 }
2418 
2419 static void
2420 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2421     rsm_memseg_id_t ex_segid,
2422     ddi_umem_cookie_t cookie)
2423 
2424 {
2425 	rsmresource_t	*p = NULL;
2426 	rsmhash_table_t *rhash = &rsm_import_segs;
2427 	uint_t		index;
2428 
2429 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2430 	    "rsm_remap_local_importers enter\n"));
2431 
2432 	index = rsmhash(ex_segid);
2433 
2434 	rw_enter(&rhash->rsmhash_rw, RW_READER);
2435 
2436 	p = rsmhash_getbkt(rhash, index);
2437 
2438 	for (; p; p = p->rsmrc_next) {
2439 		rsmseg_t *seg = (rsmseg_t *)p;
2440 		rsmseglock_acquire(seg);
2441 		/*
2442 		 * Change the s_cookie value of only the local importers
2443 		 * which have been mapped (in state RSM_STATE_ACTIVE).
2444 		 * Note that there is no need to change the s_cookie value
2445 		 * if the imported segment is in RSM_STATE_MAPPING since
2446 		 * eventually the s_cookie will be updated via the mapping
2447 		 * functionality.
2448 		 */
2449 		if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2450 		    (seg->s_state == RSM_STATE_ACTIVE)) {
2451 			seg->s_cookie = cookie;
2452 		}
2453 		rsmseglock_release(seg);
2454 	}
2455 	rw_exit(&rhash->rsmhash_rw);
2456 
2457 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2458 	    "rsm_remap_local_importers done\n"));
2459 }
2460 
2461 static int
2462 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2463 {
2464 	int e;
2465 	adapter_t *adapter;
2466 	ddi_umem_cookie_t cookie;
2467 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2468 
2469 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2470 
2471 	/* Check for permissions to rebind */
2472 	if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2473 		return (RSMERR_REBIND_NOT_ALLOWED);
2474 	}
2475 
2476 	if (seg->s_pid != ddi_get_pid() &&
2477 	    ddi_get_pid() != 0) {
2478 		DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2479 		return (RSMERR_NOT_CREATOR);
2480 	}
2481 
2482 	/*
2483 	 * We will not be allowing partial rebind and hence length passed
2484 	 * in must be same as segment length
2485 	 */
2486 	if (msg->vaddr == NULL) {
2487 		DBG_PRINTF((category, RSM_ERR,
2488 		    "rsm_rebind done: null msg->vaddr\n"));
2489 		return (RSMERR_BAD_ADDR);
2490 	}
2491 	if (msg->len != seg->s_len) {
2492 		DBG_PRINTF((category, RSM_ERR,
2493 		    "rsm_rebind: invalid length\n"));
2494 		return (RSMERR_BAD_LENGTH);
2495 	}
2496 
2497 	/* Lock segment */
2498 	rsmseglock_acquire(seg);
2499 
2500 	while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2501 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2502 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2503 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2504 			rsmseglock_release(seg);
2505 			DBG_PRINTF((category, RSM_DEBUG,
2506 			    "rsm_rebind done: cv_wait INTERRUPTED"));
2507 			return (RSMERR_INTERRUPTED);
2508 		}
2509 	}
2510 
2511 	/* verify segment state */
2512 	if ((seg->s_state != RSM_STATE_BIND) &&
2513 	    (seg->s_state != RSM_STATE_EXPORT)) {
2514 		/* Unlock segment */
2515 		rsmseglock_release(seg);
2516 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2517 		    "rsm_rebind done: invalid state\n"));
2518 		return (RSMERR_BAD_SEG_HNDL);
2519 	}
2520 
2521 	ASSERT(seg->s_cookie != NULL);
2522 
2523 	if (msg->vaddr == seg->s_region.r_vaddr) {
2524 		rsmseglock_release(seg);
2525 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2526 		return (RSM_SUCCESS);
2527 	}
2528 
2529 	e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2530 	if (e == RSM_SUCCESS) {
2531 		struct buf *xbuf;
2532 		dev_t sdev = 0;
2533 		rsm_memory_local_t mem;
2534 
2535 		xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2536 		    sdev, 0, NULL, DDI_UMEM_SLEEP);
2537 		ASSERT(xbuf != NULL);
2538 
2539 		mem.ms_type = RSM_MEM_BUF;
2540 		mem.ms_bp = xbuf;
2541 
2542 		adapter = seg->s_adapter;
2543 		e = adapter->rsmpi_ops->rsm_rebind(
2544 		    seg->s_handle.out, 0, &mem,
2545 		    RSM_RESOURCE_DONTWAIT, NULL);
2546 
2547 		if (e == RSM_SUCCESS) {
2548 			/*
2549 			 * unbind the older pages, and unload local importers;
2550 			 * but don't disconnect importers
2551 			 */
2552 			(void) rsm_unbind_pages(seg);
2553 			seg->s_cookie = cookie;
2554 			seg->s_region.r_vaddr = msg->vaddr;
2555 			rsm_remap_local_importers(my_nodeid, seg->s_segid,
2556 			    cookie);
2557 		} else {
2558 			/*
2559 			 * Unbind the pages associated with "cookie" by the
2560 			 * rsm_bind_pages calls prior to this. This is
2561 			 * similar to what is done in the rsm_unbind_pages
2562 			 * routine for the seg->s_cookie.
2563 			 */
2564 			ddi_umem_unlock(cookie);
2565 			rsm_dec_pgcnt(btopr(msg->len));
2566 			DBG_PRINTF((category, RSM_ERR,
2567 			    "rsm_rebind failed with %d\n", e));
2568 		}
2569 		/*
2570 		 * At present there is no dependency on the existence of xbuf.
2571 		 * So we can free it here. If in the future this changes, it can
2572 		 * be freed sometime during the segment destroy.
2573 		 */
2574 		freerbuf(xbuf);
2575 	}
2576 
2577 	/* Unlock segment */
2578 	rsmseglock_release(seg);
2579 
2580 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2581 
2582 	return (e);
2583 }
2584 
2585 static int
2586 rsm_unbind(rsmseg_t *seg)
2587 {
2588 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2589 
2590 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2591 
2592 	rsmseglock_acquire(seg);
2593 
2594 	/* verify segment state */
2595 	if ((seg->s_state != RSM_STATE_BIND) &&
2596 	    (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2597 		rsmseglock_release(seg);
2598 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2599 		    "rsm_unbind: invalid state\n"));
2600 		return (RSMERR_BAD_SEG_HNDL);
2601 	}
2602 
2603 	/* unlock current range */
2604 	(void) rsm_unbind_pages(seg);
2605 
2606 	if (seg->s_state == RSM_STATE_BIND) {
2607 		seg->s_state = RSM_STATE_NEW;
2608 	} else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2609 		seg->s_state = RSM_STATE_NEW_QUIESCED;
2610 	}
2611 
2612 	rsmseglock_release(seg);
2613 
2614 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2615 
2616 	return (RSM_SUCCESS);
2617 }
2618 
2619 /* **************************** Exporter Access List Management ******* */
2620 static void
2621 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2622 {
2623 	int	acl_sz;
2624 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2625 
2626 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2627 
2628 	/* acl could be NULL */
2629 
2630 	if (acl != NULL && acl_len > 0) {
2631 		acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2632 		kmem_free((void *)acl, acl_sz);
2633 	}
2634 
2635 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2636 }
2637 
2638 static void
2639 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2640 {
2641 	int	acl_sz;
2642 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2643 
2644 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2645 
2646 	if (acl != NULL && acl_len > 0) {
2647 		acl_sz = acl_len * sizeof (rsm_access_entry_t);
2648 		kmem_free((void *)acl, acl_sz);
2649 	}
2650 
2651 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2652 
2653 }
2654 
2655 static int
2656 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2657     rsmapi_access_entry_t **list, int *len, int loopback)
2658 {
2659 	rsmapi_access_entry_t *acl;
2660 	int	acl_len;
2661 	int i;
2662 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2663 
2664 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2665 
2666 	*len = 0;
2667 	*list = NULL;
2668 
2669 	acl_len = msg->acl_len;
2670 	if ((loopback && acl_len > 1) || (acl_len < 0) ||
2671 	    (acl_len > MAX_NODES)) {
2672 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2673 		    "rsmacl_build done: acl invalid\n"));
2674 		return (RSMERR_BAD_ACL);
2675 	}
2676 
2677 	if (acl_len > 0 && acl_len <= MAX_NODES) {
2678 		size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2679 
2680 		acl = kmem_alloc(acl_size, KM_SLEEP);
2681 
2682 		if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2683 		    acl_size, mode)) {
2684 			kmem_free((void *) acl, acl_size);
2685 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2686 			    "rsmacl_build done: BAD_ADDR\n"));
2687 			return (RSMERR_BAD_ADDR);
2688 		}
2689 
2690 		/*
2691 		 * Verify access list
2692 		 */
2693 		for (i = 0; i < acl_len; i++) {
2694 			if (acl[i].ae_node > MAX_NODES ||
2695 			    (loopback && (acl[i].ae_node != my_nodeid)) ||
2696 			    acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2697 				/* invalid entry */
2698 				kmem_free((void *) acl, acl_size);
2699 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2700 				    "rsmacl_build done: EINVAL\n"));
2701 				return (RSMERR_BAD_ACL);
2702 			}
2703 		}
2704 
2705 		*len = acl_len;
2706 		*list = acl;
2707 	}
2708 
2709 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2710 
2711 	return (DDI_SUCCESS);
2712 }
2713 
2714 static int
2715 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2716     int acl_len, adapter_t *adapter)
2717 {
2718 	rsm_access_entry_t *acl;
2719 	rsm_addr_t hwaddr;
2720 	int i;
2721 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2722 
2723 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2724 
2725 	if (src != NULL) {
2726 		size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2727 		acl = kmem_alloc(acl_size, KM_SLEEP);
2728 
2729 		/*
2730 		 * translate access list
2731 		 */
2732 		for (i = 0; i < acl_len; i++) {
2733 			if (src[i].ae_node == my_nodeid) {
2734 				acl[i].ae_addr = adapter->hwaddr;
2735 			} else {
2736 				hwaddr = get_remote_hwaddr(adapter,
2737 				    src[i].ae_node);
2738 				if ((int64_t)hwaddr < 0) {
2739 					/* invalid hwaddr */
2740 					kmem_free((void *) acl, acl_size);
2741 					DBG_PRINTF((category,
2742 					    RSM_DEBUG_VERBOSE,
2743 					    "rsmpiacl_create done:"
2744 					    "EINVAL hwaddr\n"));
2745 					return (RSMERR_INTERNAL_ERROR);
2746 				}
2747 				acl[i].ae_addr = hwaddr;
2748 			}
2749 			/* rsmpi understands only RSM_PERM_XXXX */
2750 			acl[i].ae_permission =
2751 			    src[i].ae_permission & RSM_PERM_RDWR;
2752 		}
2753 		*dest = acl;
2754 	} else {
2755 		*dest = NULL;
2756 	}
2757 
2758 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2759 
2760 	return (RSM_SUCCESS);
2761 }
2762 
2763 static int
2764 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2765     rsmipc_reply_t *reply)
2766 {
2767 
2768 	int		i;
2769 	rsmseg_t	*seg;
2770 	rsm_memseg_id_t key = req->rsmipc_key;
2771 	rsm_permission_t perm = req->rsmipc_perm;
2772 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2773 
2774 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2775 	    "rsmsegacl_validate enter\n"));
2776 
2777 	/*
2778 	 * Find segment and grab its lock. The reason why we grab the segment
2779 	 * lock in side the search is to avoid the race when the segment is
2780 	 * being deleted and we already have a pointer to it.
2781 	 */
2782 	seg = rsmexport_lookup(key);
2783 	if (!seg) {
2784 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2785 		    "rsmsegacl_validate done: %u ENXIO\n", key));
2786 		return (RSMERR_SEG_NOT_PUBLISHED);
2787 	}
2788 
2789 	ASSERT(rsmseglock_held(seg));
2790 	ASSERT(seg->s_state == RSM_STATE_EXPORT);
2791 
2792 	/*
2793 	 * We implement a 2-level protection scheme.
2794 	 * First, we check if local/remote host has access rights.
2795 	 * Second, we check if the user has access rights.
2796 	 *
2797 	 * This routine only validates the rnode access_list
2798 	 */
2799 	if (seg->s_acl_len > 0) {
2800 		/*
2801 		 * Check host access list
2802 		 */
2803 		ASSERT(seg->s_acl != NULL);
2804 		for (i = 0; i < seg->s_acl_len; i++) {
2805 			if (seg->s_acl[i].ae_node == rnode) {
2806 				perm &= seg->s_acl[i].ae_permission;
2807 				goto found;
2808 			}
2809 		}
2810 		/* rnode is not found in the list */
2811 		rsmseglock_release(seg);
2812 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2813 		    "rsmsegacl_validate done: EPERM\n"));
2814 		return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2815 	} else {
2816 		/* use default owner creation umask */
2817 		perm &= seg->s_mode;
2818 	}
2819 
2820 found:
2821 	/* update perm for this node */
2822 	reply->rsmipc_mode = perm;
2823 	reply->rsmipc_uid = seg->s_uid;
2824 	reply->rsmipc_gid = seg->s_gid;
2825 	reply->rsmipc_segid = seg->s_segid;
2826 	reply->rsmipc_seglen = seg->s_len;
2827 
2828 	/*
2829 	 * Perm of requesting node is valid; source will validate user
2830 	 */
2831 	rsmseglock_release(seg);
2832 
2833 	/*
2834 	 * Add the importer to the list right away, if connect fails
2835 	 * the importer will ask the exporter to remove it.
2836 	 */
2837 	importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2838 	    req->rsmipc_segment_cookie);
2839 
2840 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2841 
2842 	return (RSM_SUCCESS);
2843 }
2844 
2845 
2846 /* ************************** Exporter Calls ************************* */
2847 
2848 static int
2849 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2850 {
2851 	int			e;
2852 	int			acl_len;
2853 	rsmapi_access_entry_t	*acl;
2854 	rsm_access_entry_t	*rsmpi_acl;
2855 	rsm_memory_local_t	mem;
2856 	struct buf		*xbuf;
2857 	dev_t 			sdev = 0;
2858 	adapter_t		*adapter;
2859 	rsm_memseg_id_t		segment_id = 0;
2860 	int			loopback_flag = 0;
2861 	int			create_flags = 0;
2862 	rsm_resource_callback_t	callback_flag;
2863 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2864 
2865 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2866 
2867 	if (seg->s_adapter == &loopback_adapter)
2868 		loopback_flag = 1;
2869 
2870 	if (seg->s_pid != ddi_get_pid() &&
2871 	    ddi_get_pid() != 0) {
2872 		DBG_PRINTF((category, RSM_ERR,
2873 		    "rsm_publish: Not creator\n"));
2874 		return (RSMERR_NOT_CREATOR);
2875 	}
2876 
2877 	/*
2878 	 * Get per node access list
2879 	 */
2880 	e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2881 	if (e != DDI_SUCCESS) {
2882 		DBG_PRINTF((category, RSM_ERR,
2883 		    "rsm_publish done: rsmacl_build failed\n"));
2884 		return (e);
2885 	}
2886 
2887 	/*
2888 	 * The application provided msg->key is used for resolving a
2889 	 * segment id according to the following:
2890 	 *    key = 0   		Kernel Agent selects the segment id
2891 	 *    key <= RSM_DLPI_ID_END	Reserved for system usage except
2892 	 *				RSMLIB range
2893 	 *    key < RSM_USER_APP_ID_BASE segment id = key
2894 	 *    key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2895 	 *
2896 	 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2897 	 * overflows to zero after 0x80000000 allocations.
2898 	 * An algorithm is needed which allows reinitialization and provides
2899 	 * for reallocation after overflow.  For now, ENOMEM is returned
2900 	 * once the overflow condition has occurred.
2901 	 */
2902 	if (msg->key == 0) {
2903 		mutex_enter(&rsm_lock);
2904 		segment_id = rsm_nextavail_segmentid;
2905 		if (segment_id != 0) {
2906 			rsm_nextavail_segmentid++;
2907 			mutex_exit(&rsm_lock);
2908 		} else {
2909 			mutex_exit(&rsm_lock);
2910 			DBG_PRINTF((category, RSM_ERR,
2911 			    "rsm_publish done: no more keys avlbl\n"));
2912 			return (RSMERR_INSUFFICIENT_RESOURCES);
2913 		}
2914 	} else	if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2915 		/* range reserved for internal use by base/ndi libraries */
2916 		segment_id = msg->key;
2917 	else	if (msg->key <= RSM_DLPI_ID_END)
2918 		return (RSMERR_RESERVED_SEGID);
2919 	else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2920 		segment_id = msg->key;
2921 	else {
2922 		DBG_PRINTF((category, RSM_ERR,
2923 		    "rsm_publish done: invalid key %u\n", msg->key));
2924 		return (RSMERR_RESERVED_SEGID);
2925 	}
2926 
2927 	/* Add key to exportlist; The segment lock is held on success */
2928 	e = rsmexport_add(seg, segment_id);
2929 	if (e) {
2930 		rsmacl_free(acl, acl_len);
2931 		DBG_PRINTF((category, RSM_ERR,
2932 		    "rsm_publish done: export_add failed: %d\n", e));
2933 		return (e);
2934 	}
2935 
2936 	seg->s_segid = segment_id;
2937 
2938 	if ((seg->s_state != RSM_STATE_BIND) &&
2939 	    (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2940 		/* state changed since then, free acl and return */
2941 		rsmseglock_release(seg);
2942 		rsmexport_rm(seg);
2943 		rsmacl_free(acl, acl_len);
2944 		DBG_PRINTF((category, RSM_ERR,
2945 		    "rsm_publish done: segment in wrong state: %d\n",
2946 		    seg->s_state));
2947 		return (RSMERR_BAD_SEG_HNDL);
2948 	}
2949 
2950 	/*
2951 	 * If this is for a local memory handle and permissions are zero,
2952 	 * then the surrogate segment is very large and we want to skip
2953 	 * allocation of DVMA space.
2954 	 *
2955 	 * Careful!  If the user didn't use an ACL list, acl will be a NULL
2956 	 * pointer.  Check that before dereferencing it.
2957 	 */
2958 	if (acl != (rsmapi_access_entry_t *)NULL) {
2959 		if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2960 			goto skipdriver;
2961 	}
2962 
2963 	/* create segment  */
2964 	xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2965 	    sdev, 0, NULL, DDI_UMEM_SLEEP);
2966 	ASSERT(xbuf != NULL);
2967 
2968 	mem.ms_type = RSM_MEM_BUF;
2969 	mem.ms_bp = xbuf;
2970 
2971 	/* This call includes a bind operations */
2972 
2973 	adapter = seg->s_adapter;
2974 	/*
2975 	 * create a acl list with hwaddr for RSMPI publish
2976 	 */
2977 	e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2978 
2979 	if (e != RSM_SUCCESS) {
2980 		rsmseglock_release(seg);
2981 		rsmexport_rm(seg);
2982 		rsmacl_free(acl, acl_len);
2983 		freerbuf(xbuf);
2984 		DBG_PRINTF((category, RSM_ERR,
2985 		    "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2986 		return (e);
2987 	}
2988 
2989 	if (seg->s_state == RSM_STATE_BIND) {
2990 		/* create segment  */
2991 
2992 		/* This call includes a bind operations */
2993 
2994 		if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2995 			create_flags = RSM_ALLOW_UNBIND_REBIND;
2996 		}
2997 
2998 		if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
2999 			callback_flag  = RSM_RESOURCE_DONTWAIT;
3000 		} else {
3001 			callback_flag  = RSM_RESOURCE_SLEEP;
3002 		}
3003 
3004 		e = adapter->rsmpi_ops->rsm_seg_create(
3005 		    adapter->rsmpi_handle,
3006 		    &seg->s_handle.out, seg->s_len,
3007 		    create_flags, &mem,
3008 		    callback_flag, NULL);
3009 		/*
3010 		 * At present there is no dependency on the existence of xbuf.
3011 		 * So we can free it here. If in the future this changes, it can
3012 		 * be freed sometime during the segment destroy.
3013 		 */
3014 		freerbuf(xbuf);
3015 
3016 		if (e != RSM_SUCCESS) {
3017 			rsmseglock_release(seg);
3018 			rsmexport_rm(seg);
3019 			rsmacl_free(acl, acl_len);
3020 			rsmpiacl_free(rsmpi_acl, acl_len);
3021 			DBG_PRINTF((category, RSM_ERR,
3022 			    "rsm_publish done: export_create failed: %d\n", e));
3023 			/*
3024 			 * The following assertion ensures that the two errors
3025 			 * related to the length and its alignment do not occur
3026 			 * since they have been checked during export_create
3027 			 */
3028 			ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3029 			    e != RSMERR_BAD_LENGTH);
3030 			if (e == RSMERR_NOT_MEM)
3031 				e = RSMERR_INSUFFICIENT_MEM;
3032 
3033 			return (e);
3034 		}
3035 		/* export segment, this should create an IMMU mapping */
3036 		e = adapter->rsmpi_ops->rsm_publish(
3037 		    seg->s_handle.out,
3038 		    rsmpi_acl, acl_len,
3039 		    seg->s_segid,
3040 		    RSM_RESOURCE_DONTWAIT, NULL);
3041 
3042 		if (e != RSM_SUCCESS) {
3043 			adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3044 			rsmseglock_release(seg);
3045 			rsmexport_rm(seg);
3046 			rsmacl_free(acl, acl_len);
3047 			rsmpiacl_free(rsmpi_acl, acl_len);
3048 			DBG_PRINTF((category, RSM_ERR,
3049 			    "rsm_publish done: export_publish failed: %d\n",
3050 			    e));
3051 			return (e);
3052 		}
3053 	}
3054 
3055 	seg->s_acl_in = rsmpi_acl;
3056 
3057 skipdriver:
3058 	/* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3059 	seg->s_acl_len	= acl_len;
3060 	seg->s_acl	= acl;
3061 
3062 	if (seg->s_state == RSM_STATE_BIND) {
3063 		seg->s_state = RSM_STATE_EXPORT;
3064 	} else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3065 		seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3066 		cv_broadcast(&seg->s_cv);
3067 	}
3068 
3069 	rsmseglock_release(seg);
3070 
3071 	/*
3072 	 * If the segment id was solicited, then return it in
3073 	 * the original incoming message.
3074 	 */
3075 	if (msg->key == 0) {
3076 		msg->key = segment_id;
3077 #ifdef _MULTI_DATAMODEL
3078 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3079 			rsm_ioctlmsg32_t msg32;
3080 
3081 			msg32.key = msg->key;
3082 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3083 			    "rsm_publish done\n"));
3084 			return (ddi_copyout((caddr_t)&msg32,
3085 			    (caddr_t)dataptr, sizeof (msg32), mode));
3086 		}
3087 #endif
3088 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3089 		    "rsm_publish done\n"));
3090 		return (ddi_copyout((caddr_t)msg,
3091 		    (caddr_t)dataptr, sizeof (*msg), mode));
3092 	}
3093 
3094 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3095 	return (DDI_SUCCESS);
3096 }
3097 
3098 /*
3099  * This function modifies the access control list of an already published
3100  * segment.  There is no effect on import segments which are already
3101  * connected.
3102  */
3103 static int
3104 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3105 {
3106 	rsmapi_access_entry_t	*new_acl, *old_acl, *tmp_acl;
3107 	rsm_access_entry_t	*rsmpi_new_acl, *rsmpi_old_acl;
3108 	int			new_acl_len, old_acl_len, tmp_acl_len;
3109 	int			e, i;
3110 	adapter_t		*adapter;
3111 	int			loopback_flag = 0;
3112 	rsm_memseg_id_t		key;
3113 	rsm_permission_t	permission;
3114 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3115 
3116 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3117 
3118 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3119 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3120 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3121 		return (RSMERR_SEG_NOT_PUBLISHED);
3122 
3123 	if (seg->s_pid != ddi_get_pid() &&
3124 	    ddi_get_pid() != 0) {
3125 		DBG_PRINTF((category, RSM_ERR,
3126 		    "rsm_republish: Not owner\n"));
3127 		return (RSMERR_NOT_CREATOR);
3128 	}
3129 
3130 	if (seg->s_adapter == &loopback_adapter)
3131 		loopback_flag = 1;
3132 
3133 	/*
3134 	 * Build new list first
3135 	 */
3136 	e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3137 	if (e) {
3138 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3139 		    "rsm_republish done: rsmacl_build failed %d", e));
3140 		return (e);
3141 	}
3142 
3143 	/* Lock segment */
3144 	rsmseglock_acquire(seg);
3145 	/*
3146 	 * a republish is in progress - REPUBLISH message is being
3147 	 * sent to the importers so wait for it to complete OR
3148 	 * wait till DR completes
3149 	 */
3150 	while (((seg->s_state == RSM_STATE_EXPORT) &&
3151 	    (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3152 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3153 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3154 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3155 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3156 			    "rsm_republish done: cv_wait  INTERRUPTED"));
3157 			rsmseglock_release(seg);
3158 			rsmacl_free(new_acl, new_acl_len);
3159 			return (RSMERR_INTERRUPTED);
3160 		}
3161 	}
3162 
3163 	/* recheck if state is valid */
3164 	if (seg->s_state != RSM_STATE_EXPORT) {
3165 		rsmseglock_release(seg);
3166 		rsmacl_free(new_acl, new_acl_len);
3167 		return (RSMERR_SEG_NOT_PUBLISHED);
3168 	}
3169 
3170 	key = seg->s_key;
3171 	old_acl = seg->s_acl;
3172 	old_acl_len = seg->s_acl_len;
3173 
3174 	seg->s_acl = new_acl;
3175 	seg->s_acl_len = new_acl_len;
3176 
3177 	/*
3178 	 * This call will only be meaningful if and when the interconnect
3179 	 * layer makes use of the access list
3180 	 */
3181 	adapter = seg->s_adapter;
3182 	/*
3183 	 * create a acl list with hwaddr for RSMPI publish
3184 	 */
3185 	e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3186 
3187 	if (e != RSM_SUCCESS) {
3188 		seg->s_acl = old_acl;
3189 		seg->s_acl_len = old_acl_len;
3190 		rsmseglock_release(seg);
3191 		rsmacl_free(new_acl, new_acl_len);
3192 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3193 		    "rsm_republish done: rsmpiacl_create failed %d", e));
3194 		return (e);
3195 	}
3196 	rsmpi_old_acl = seg->s_acl_in;
3197 	seg->s_acl_in = rsmpi_new_acl;
3198 
3199 	e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3200 	    seg->s_acl_in, seg->s_acl_len,
3201 	    RSM_RESOURCE_DONTWAIT, NULL);
3202 
3203 	if (e != RSM_SUCCESS) {
3204 		seg->s_acl = old_acl;
3205 		seg->s_acl_in = rsmpi_old_acl;
3206 		seg->s_acl_len = old_acl_len;
3207 		rsmseglock_release(seg);
3208 		rsmacl_free(new_acl, new_acl_len);
3209 		rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3210 
3211 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3212 		    "rsm_republish done: rsmpi republish failed %d\n", e));
3213 		return (e);
3214 	}
3215 
3216 	/* create a tmp copy of the new acl */
3217 	tmp_acl_len = new_acl_len;
3218 	if (tmp_acl_len > 0) {
3219 		tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3220 		for (i = 0; i < tmp_acl_len; i++) {
3221 			tmp_acl[i].ae_node = new_acl[i].ae_node;
3222 			tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3223 		}
3224 		/*
3225 		 * The default permission of a node which was in the old
3226 		 * ACL but not in the new ACL is 0 ie no access.
3227 		 */
3228 		permission = 0;
3229 	} else {
3230 		/*
3231 		 * NULL acl means all importers can connect and
3232 		 * default permission will be owner creation umask
3233 		 */
3234 		tmp_acl = NULL;
3235 		permission = seg->s_mode;
3236 	}
3237 
3238 	/* make other republishers to wait for republish to complete */
3239 	seg->s_flags |= RSM_REPUBLISH_WAIT;
3240 
3241 	rsmseglock_release(seg);
3242 
3243 	/* send the new perms to the importing nodes */
3244 	rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3245 
3246 	rsmseglock_acquire(seg);
3247 	seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3248 	/* wake up any one waiting for republish to complete */
3249 	cv_broadcast(&seg->s_cv);
3250 	rsmseglock_release(seg);
3251 
3252 	rsmacl_free(tmp_acl, tmp_acl_len);
3253 	rsmacl_free(old_acl, old_acl_len);
3254 	rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3255 
3256 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3257 	return (DDI_SUCCESS);
3258 }
3259 
3260 static int
3261 rsm_unpublish(rsmseg_t *seg, int mode)
3262 {
3263 	rsmapi_access_entry_t	*acl;
3264 	rsm_access_entry_t	*rsmpi_acl;
3265 	int			acl_len;
3266 	int			e;
3267 	adapter_t *adapter;
3268 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3269 
3270 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3271 
3272 	if (seg->s_pid != ddi_get_pid() &&
3273 	    ddi_get_pid() != 0) {
3274 		DBG_PRINTF((category, RSM_ERR,
3275 		    "rsm_unpublish: Not creator\n"));
3276 		return (RSMERR_NOT_CREATOR);
3277 	}
3278 
3279 	rsmseglock_acquire(seg);
3280 	/*
3281 	 * wait for QUIESCING to complete here before rsmexport_rm
3282 	 * is called because the SUSPEND_COMPLETE mesg which changes
3283 	 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3284 	 * signals the cv_wait needs to find it in the hashtable.
3285 	 */
3286 	while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3287 	    ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3288 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3289 			rsmseglock_release(seg);
3290 			DBG_PRINTF((category, RSM_ERR,
3291 			    "rsm_unpublish done: cv_wait INTR qscing"
3292 			    "getv/putv in progress"));
3293 			return (RSMERR_INTERRUPTED);
3294 		}
3295 	}
3296 
3297 	/* verify segment state */
3298 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3299 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3300 		rsmseglock_release(seg);
3301 		DBG_PRINTF((category, RSM_ERR,
3302 		    "rsm_unpublish done: bad state %x\n", seg->s_state));
3303 		return (RSMERR_SEG_NOT_PUBLISHED);
3304 	}
3305 
3306 	rsmseglock_release(seg);
3307 
3308 	rsmexport_rm(seg);
3309 
3310 	rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3311 
3312 	rsmseglock_acquire(seg);
3313 	/*
3314 	 * wait for republish to complete
3315 	 */
3316 	while ((seg->s_state == RSM_STATE_EXPORT) &&
3317 	    (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3318 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3319 			DBG_PRINTF((category, RSM_ERR,
3320 			    "rsm_unpublish done: cv_wait INTR repubing"));
3321 			rsmseglock_release(seg);
3322 			return (RSMERR_INTERRUPTED);
3323 		}
3324 	}
3325 
3326 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3327 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3328 		DBG_PRINTF((category, RSM_ERR,
3329 		    "rsm_unpublish done: invalid state"));
3330 		rsmseglock_release(seg);
3331 		return (RSMERR_SEG_NOT_PUBLISHED);
3332 	}
3333 
3334 	/*
3335 	 * check for putv/get surrogate segment which was not published
3336 	 * to the driver.
3337 	 *
3338 	 * Be certain to see if there is an ACL first!  If this segment was
3339 	 * not published with an ACL, acl will be a null pointer.  Check
3340 	 * that before dereferencing it.
3341 	 */
3342 	acl = seg->s_acl;
3343 	if (acl != (rsmapi_access_entry_t *)NULL) {
3344 		if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3345 			goto bypass;
3346 	}
3347 
3348 	/* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3349 	if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3350 		goto bypass;
3351 
3352 	adapter = seg->s_adapter;
3353 	for (;;) {
3354 		if (seg->s_state != RSM_STATE_EXPORT) {
3355 			rsmseglock_release(seg);
3356 			DBG_PRINTF((category, RSM_ERR,
3357 			    "rsm_unpublish done: bad state %x\n",
3358 			    seg->s_state));
3359 			return (RSMERR_SEG_NOT_PUBLISHED);
3360 		}
3361 
3362 		/* unpublish from adapter */
3363 		e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3364 
3365 		if (e == RSM_SUCCESS) {
3366 			break;
3367 		}
3368 
3369 		if (e == RSMERR_SEG_IN_USE && mode == 1) {
3370 			/*
3371 			 * wait for unpublish to succeed, it's busy.
3372 			 */
3373 			seg->s_flags |= RSM_EXPORT_WAIT;
3374 
3375 			/* wait for a max of 1 ms - this is an empirical */
3376 			/* value that was found by some minimal testing  */
3377 			/* can be fine tuned when we have better numbers */
3378 			/* A long term fix would be to send cv_signal	 */
3379 			/* from the intr callback routine		 */
3380 			/* currently nobody signals this wait		 */
3381 			(void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3382 			    drv_usectohz(1000), TR_CLOCK_TICK);
3383 
3384 			DBG_PRINTF((category, RSM_ERR,
3385 			    "rsm_unpublish: SEG_IN_USE\n"));
3386 
3387 			seg->s_flags &= ~RSM_EXPORT_WAIT;
3388 		} else {
3389 			if (mode == 1) {
3390 				DBG_PRINTF((category, RSM_ERR,
3391 				    "rsm:rsmpi unpublish err %x\n", e));
3392 				seg->s_state = RSM_STATE_BIND;
3393 			}
3394 			rsmseglock_release(seg);
3395 			return (e);
3396 		}
3397 	}
3398 
3399 	/* Free segment */
3400 	e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3401 
3402 	if (e != RSM_SUCCESS) {
3403 		DBG_PRINTF((category, RSM_ERR,
3404 		    "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3405 		    seg->s_key, e));
3406 	}
3407 
3408 bypass:
3409 	acl = seg->s_acl;
3410 	rsmpi_acl = seg->s_acl_in;
3411 	acl_len = seg->s_acl_len;
3412 
3413 	seg->s_acl = NULL;
3414 	seg->s_acl_in = NULL;
3415 	seg->s_acl_len = 0;
3416 
3417 	if (seg->s_state == RSM_STATE_EXPORT) {
3418 		seg->s_state = RSM_STATE_BIND;
3419 	} else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3420 		seg->s_state = RSM_STATE_BIND_QUIESCED;
3421 		cv_broadcast(&seg->s_cv);
3422 	}
3423 
3424 	rsmseglock_release(seg);
3425 
3426 	rsmacl_free(acl, acl_len);
3427 	rsmpiacl_free(rsmpi_acl, acl_len);
3428 
3429 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3430 
3431 	return (DDI_SUCCESS);
3432 }
3433 
3434 /*
3435  * Called from rsm_unpublish to force an unload and disconnection of all
3436  * importers of the unpublished segment.
3437  *
3438  * First build the list of segments requiring a force disconnect, then
3439  * send a request for each.
3440  */
3441 static void
3442 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3443     rsm_node_id_t ex_nodeid)
3444 {
3445 	rsmipc_request_t 	request;
3446 	importing_token_t	*prev_token, *token, *tmp_token, *tokp;
3447 	importing_token_t	*force_disconnect_list = NULL;
3448 	int			index;
3449 
3450 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3451 	    "rsm_send_importer_disconnects enter\n"));
3452 
3453 	index = rsmhash(ex_segid);
3454 
3455 	mutex_enter(&importer_list.lock);
3456 
3457 	prev_token = NULL;
3458 	token = importer_list.bucket[index];
3459 
3460 	while (token != NULL) {
3461 		if (token->key == ex_segid) {
3462 			/*
3463 			 * take it off the importer list and add it
3464 			 * to the force disconnect list.
3465 			 */
3466 			if (prev_token == NULL)
3467 				importer_list.bucket[index] = token->next;
3468 			else
3469 				prev_token->next = token->next;
3470 			tmp_token = token;
3471 			token = token->next;
3472 			if (force_disconnect_list == NULL) {
3473 				force_disconnect_list = tmp_token;
3474 				tmp_token->next = NULL;
3475 			} else {
3476 				tokp = force_disconnect_list;
3477 				/*
3478 				 * make sure that the tmp_token's node
3479 				 * is not already on the force disconnect
3480 				 * list.
3481 				 */
3482 				while (tokp != NULL) {
3483 					if (tokp->importing_node ==
3484 					    tmp_token->importing_node) {
3485 						break;
3486 					}
3487 					tokp = tokp->next;
3488 				}
3489 				if (tokp == NULL) {
3490 					tmp_token->next =
3491 					    force_disconnect_list;
3492 					force_disconnect_list = tmp_token;
3493 				} else {
3494 					kmem_free((void *)tmp_token,
3495 					    sizeof (*token));
3496 				}
3497 			}
3498 
3499 		} else {
3500 			prev_token = token;
3501 			token = token->next;
3502 		}
3503 	}
3504 	mutex_exit(&importer_list.lock);
3505 
3506 	token = force_disconnect_list;
3507 	while (token != NULL) {
3508 		if (token->importing_node == my_nodeid) {
3509 			rsm_force_unload(ex_nodeid, ex_segid,
3510 			    DISCONNECT);
3511 		} else {
3512 			request.rsmipc_hdr.rsmipc_type =
3513 			    RSMIPC_MSG_DISCONNECT;
3514 			request.rsmipc_key = token->key;
3515 			for (;;) {
3516 				if (rsmipc_send(token->importing_node,
3517 				    &request,
3518 				    RSM_NO_REPLY) == RSM_SUCCESS) {
3519 					break;
3520 				} else {
3521 					delay(drv_usectohz(10000));
3522 				}
3523 			}
3524 		}
3525 		tmp_token = token;
3526 		token = token->next;
3527 		kmem_free((void *)tmp_token, sizeof (*token));
3528 	}
3529 
3530 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3531 	    "rsm_send_importer_disconnects done\n"));
3532 }
3533 
3534 /*
3535  * This function is used as a callback for unlocking the pages locked
3536  * down by a process which then does a fork or an exec.
3537  * It marks the export segments corresponding to umem cookie given by
3538  * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3539  * destroyed later when an rsm_close occurs).
3540  */
3541 static void
3542 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3543 {
3544 	rsmresource_blk_t *blk;
3545 	rsmresource_t *p;
3546 	rsmseg_t *eseg = NULL;
3547 	int i, j;
3548 	int found = 0;
3549 
3550 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3551 	    "rsm_export_force_destroy enter\n"));
3552 
3553 	/*
3554 	 * Walk the resource list and locate the export segment (either
3555 	 * in the BIND or the EXPORT state) which corresponds to the
3556 	 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3557 	 * Change the state to ZOMBIE by calling rsmseg_close with the
3558 	 * force_flag argument (the second argument) set to 1. Also,
3559 	 * unpublish and unbind the segment, but don't free it. Free it
3560 	 * only on a rsm_close call for the segment.
3561 	 */
3562 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3563 
3564 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3565 		blk = rsm_resource.rsmrc_root[i];
3566 		if (blk == NULL) {
3567 			continue;
3568 		}
3569 
3570 		for (j = 0; j < RSMRC_BLKSZ; j++) {
3571 			p = blk->rsmrcblk_blks[j];
3572 			if ((p != NULL) && (p != RSMRC_RESERVED) &&
3573 			    (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3574 				eseg = (rsmseg_t *)p;
3575 				if (eseg->s_cookie != ck)
3576 					continue; /* continue searching */
3577 				/*
3578 				 * Found the segment, set flag to indicate
3579 				 * force destroy processing is in progress
3580 				 */
3581 				rsmseglock_acquire(eseg);
3582 				eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3583 				rsmseglock_release(eseg);
3584 				found = 1;
3585 				break;
3586 			}
3587 		}
3588 
3589 		if (found)
3590 			break;
3591 	}
3592 
3593 	rw_exit(&rsm_resource.rsmrc_lock);
3594 
3595 	if (found) {
3596 		ASSERT(eseg != NULL);
3597 		/* call rsmseg_close with force flag set to 1 */
3598 		rsmseg_close(eseg, 1);
3599 		/*
3600 		 * force destroy processing done, clear flag and signal any
3601 		 * thread waiting in rsmseg_close.
3602 		 */
3603 		rsmseglock_acquire(eseg);
3604 		eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3605 		cv_broadcast(&eseg->s_cv);
3606 		rsmseglock_release(eseg);
3607 	}
3608 
3609 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3610 	    "rsm_export_force_destroy done\n"));
3611 }
3612 
3613 /* ******************************* Remote Calls *********************** */
3614 static void
3615 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3616 {
3617 	rsmipc_reply_t reply;
3618 	DBG_DEFINE(category,
3619 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3620 
3621 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3622 	    "rsm_intr_segconnect enter\n"));
3623 
3624 	reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3625 
3626 	reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3627 	reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3628 
3629 	(void) rsmipc_send(src, NULL, &reply);
3630 
3631 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3632 	    "rsm_intr_segconnect done\n"));
3633 }
3634 
3635 
3636 /*
3637  * When an exported segment is unpublished the exporter sends an ipc
3638  * message (RSMIPC_MSG_DISCONNECT) to all importers.  The recv ipc dispatcher
3639  * calls this function.  The import list is scanned; segments which match the
3640  * exported segment id are unloaded and disconnected.
3641  *
3642  * Will also be called from rsm_rebind with disconnect_flag FALSE.
3643  *
3644  */
3645 static void
3646 rsm_force_unload(rsm_node_id_t src_nodeid,
3647     rsm_memseg_id_t ex_segid,
3648     boolean_t disconnect_flag)
3649 
3650 {
3651 	rsmresource_t	*p = NULL;
3652 	rsmhash_table_t *rhash = &rsm_import_segs;
3653 	uint_t		index;
3654 	DBG_DEFINE(category,
3655 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3656 
3657 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3658 
3659 	index = rsmhash(ex_segid);
3660 
3661 	rw_enter(&rhash->rsmhash_rw, RW_READER);
3662 
3663 	p = rsmhash_getbkt(rhash, index);
3664 
3665 	for (; p; p = p->rsmrc_next) {
3666 		rsmseg_t *seg = (rsmseg_t *)p;
3667 		if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3668 			/*
3669 			 * In order to make rsmseg_unload and rsm_force_unload
3670 			 * thread safe, acquire the segment lock here.
3671 			 * rsmseg_unload is responsible for releasing the lock.
3672 			 * rsmseg_unload releases the lock just before a call
3673 			 * to rsmipc_send or in case of an early exit which
3674 			 * occurs if the segment was in the state
3675 			 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3676 			 */
3677 			rsmseglock_acquire(seg);
3678 			if (disconnect_flag)
3679 				seg->s_flags |= RSM_FORCE_DISCONNECT;
3680 			rsmseg_unload(seg);
3681 		}
3682 	}
3683 	rw_exit(&rhash->rsmhash_rw);
3684 
3685 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3686 }
3687 
3688 static void
3689 rsm_intr_reply(rsmipc_msghdr_t *msg)
3690 {
3691 	/*
3692 	 * Find slot for cookie in reply.
3693 	 * Match sequence with sequence in cookie
3694 	 * If no match; return
3695 	 * Try to grap lock of slot, if locked return
3696 	 * copy data into reply slot area
3697 	 * signal waiter
3698 	 */
3699 	rsmipc_slot_t 	*slot;
3700 	rsmipc_cookie_t	*cookie;
3701 	void *data = (void *) msg;
3702 	size_t size = sizeof (rsmipc_reply_t);
3703 	DBG_DEFINE(category,
3704 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3705 
3706 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3707 
3708 	cookie = &msg->rsmipc_cookie;
3709 	if (cookie->ic.index >= RSMIPC_SZ) {
3710 		DBG_PRINTF((category, RSM_ERR,
3711 		    "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3712 		return;
3713 	}
3714 
3715 	ASSERT(cookie->ic.index < RSMIPC_SZ);
3716 	slot = &rsm_ipc.slots[cookie->ic.index];
3717 	mutex_enter(&slot->rsmipc_lock);
3718 	if (slot->rsmipc_cookie.value == cookie->value) {
3719 		/* found a match */
3720 		if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3721 			bcopy(data, slot->rsmipc_data, size);
3722 			RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3723 			cv_signal(&slot->rsmipc_cv);
3724 		}
3725 	} else {
3726 		DBG_PRINTF((category, RSM_DEBUG,
3727 		    "rsm: rsm_intr_reply mismatched reply %d\n",
3728 		    cookie->ic.index));
3729 	}
3730 	mutex_exit(&slot->rsmipc_lock);
3731 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3732 }
3733 
3734 /*
3735  * This function gets dispatched on the worker thread when we receive
3736  * the SQREADY message. This function sends the SQREADY_ACK message.
3737  */
3738 static void
3739 rsm_sqready_ack_deferred(void *arg)
3740 {
3741 	path_t	*path = (path_t *)arg;
3742 	DBG_DEFINE(category,
3743 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3744 
3745 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3746 	    "rsm_sqready_ack_deferred enter\n"));
3747 
3748 	mutex_enter(&path->mutex);
3749 
3750 	/*
3751 	 * If path is not active no point in sending the ACK
3752 	 * because the whole SQREADY protocol will again start
3753 	 * when the path becomes active.
3754 	 */
3755 	if (path->state != RSMKA_PATH_ACTIVE) {
3756 		/*
3757 		 * decrement the path refcnt incremented in rsm_proc_sqready
3758 		 */
3759 		PATH_RELE_NOLOCK(path);
3760 		mutex_exit(&path->mutex);
3761 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3762 		    "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3763 		return;
3764 	}
3765 
3766 	/* send an SQREADY_ACK message */
3767 	(void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3768 
3769 	/* initialize credits to the max level */
3770 	path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3771 
3772 	/* wake up any send that is waiting for credits */
3773 	cv_broadcast(&path->sendq_token.sendq_cv);
3774 
3775 	/*
3776 	 * decrement the path refcnt since we incremented it in
3777 	 * rsm_proc_sqready
3778 	 */
3779 	PATH_RELE_NOLOCK(path);
3780 
3781 	mutex_exit(&path->mutex);
3782 
3783 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3784 	    "rsm_sqready_ack_deferred done\n"));
3785 }
3786 
3787 /*
3788  * Process the SQREADY message
3789  */
3790 static void
3791 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3792     rsm_intr_hand_arg_t arg)
3793 {
3794 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3795 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3796 	path_t			*path;
3797 	DBG_DEFINE(category,
3798 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3799 
3800 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3801 
3802 	/* look up the path - incr the path refcnt */
3803 	path = rsm_find_path(hdlr_argp->adapter_name,
3804 	    hdlr_argp->adapter_instance, src_hwaddr);
3805 
3806 	/*
3807 	 * No path exists or path is not active - drop the message
3808 	 */
3809 	if (path == NULL) {
3810 		DBG_PRINTF((category, RSM_DEBUG,
3811 		    "rsm_proc_sqready done: msg dropped no path\n"));
3812 		return;
3813 	}
3814 
3815 	mutex_exit(&path->mutex);
3816 
3817 	/* drain any tasks from the previous incarnation */
3818 	taskq_wait(path->recv_taskq);
3819 
3820 	mutex_enter(&path->mutex);
3821 	/*
3822 	 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3823 	 * in the meanwhile we received an SQREADY message, blindly reset
3824 	 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3825 	 * and forget about the SQREADY that we sent.
3826 	 */
3827 	path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3828 
3829 	if (path->state != RSMKA_PATH_ACTIVE) {
3830 		/* decr refcnt and drop the mutex */
3831 		PATH_RELE_NOLOCK(path);
3832 		mutex_exit(&path->mutex);
3833 		DBG_PRINTF((category, RSM_DEBUG,
3834 		    "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3835 		return;
3836 	}
3837 
3838 	DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3839 	    " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3840 
3841 	/*
3842 	 * The sender's local incarnation number is our remote incarnation
3843 	 * number save it in the path data structure
3844 	 */
3845 	path->remote_incn = msg->rsmipc_local_incn;
3846 	path->sendq_token.msgbuf_avail = 0;
3847 	path->procmsg_cnt = 0;
3848 
3849 	/*
3850 	 * path is active - dispatch task to send SQREADY_ACK - remember
3851 	 * RSMPI calls can't be done in interrupt context
3852 	 *
3853 	 * We can use the recv_taskq to send because the remote endpoint
3854 	 * cannot start sending messages till it receives SQREADY_ACK hence
3855 	 * at this point there are no tasks on recv_taskq.
3856 	 *
3857 	 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3858 	 */
3859 	(void) taskq_dispatch(path->recv_taskq,
3860 	    rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3861 
3862 	mutex_exit(&path->mutex);
3863 
3864 
3865 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3866 }
3867 
3868 /*
3869  * Process the SQREADY_ACK message
3870  */
3871 static void
3872 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3873     rsm_intr_hand_arg_t arg)
3874 {
3875 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3876 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3877 	path_t			*path;
3878 	DBG_DEFINE(category,
3879 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3880 
3881 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3882 	    "rsm_proc_sqready_ack enter\n"));
3883 
3884 	/* look up the path - incr the path refcnt */
3885 	path = rsm_find_path(hdlr_argp->adapter_name,
3886 	    hdlr_argp->adapter_instance, src_hwaddr);
3887 
3888 	/*
3889 	 * drop the message if - no path exists or path is not active
3890 	 * or if its not waiting for SQREADY_ACK message
3891 	 */
3892 	if (path == NULL) {
3893 		DBG_PRINTF((category, RSM_DEBUG,
3894 		    "rsm_proc_sqready_ack done: msg dropped no path\n"));
3895 		return;
3896 	}
3897 
3898 	if ((path->state != RSMKA_PATH_ACTIVE) ||
3899 	    !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3900 		/* decrement the refcnt */
3901 		PATH_RELE_NOLOCK(path);
3902 		mutex_exit(&path->mutex);
3903 		DBG_PRINTF((category, RSM_DEBUG,
3904 		    "rsm_proc_sqready_ack done: msg dropped\n"));
3905 		return;
3906 	}
3907 
3908 	/*
3909 	 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3910 	 * sent, if not drop it.
3911 	 */
3912 	if (path->local_incn != msghdr->rsmipc_incn) {
3913 		/* decrement the refcnt */
3914 		PATH_RELE_NOLOCK(path);
3915 		mutex_exit(&path->mutex);
3916 		DBG_PRINTF((category, RSM_DEBUG,
3917 		    "rsm_proc_sqready_ack done: msg old incn %lld\n",
3918 		    msghdr->rsmipc_incn));
3919 		return;
3920 	}
3921 
3922 	DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3923 	    " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3924 
3925 	/*
3926 	 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3927 	 */
3928 	path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3929 
3930 	/* save the remote sendq incn number */
3931 	path->remote_incn = msg->rsmipc_local_incn;
3932 
3933 	/* initialize credits to the max level */
3934 	path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3935 
3936 	/* wake up any send that is waiting for credits */
3937 	cv_broadcast(&path->sendq_token.sendq_cv);
3938 
3939 	/* decrement the refcnt */
3940 	PATH_RELE_NOLOCK(path);
3941 
3942 	mutex_exit(&path->mutex);
3943 
3944 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3945 	    "rsm_proc_sqready_ack done\n"));
3946 }
3947 
3948 /*
3949  * process the RSMIPC_MSG_CREDIT message
3950  */
3951 static void
3952 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3953     rsm_intr_hand_arg_t arg)
3954 {
3955 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3956 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3957 	path_t			*path;
3958 	DBG_DEFINE(category,
3959 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3960 	    RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3961 
3962 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3963 
3964 	/* look up the path - incr the path refcnt */
3965 	path = rsm_find_path(hdlr_argp->adapter_name,
3966 	    hdlr_argp->adapter_instance, src_hwaddr);
3967 
3968 	if (path == NULL) {
3969 		DBG_PRINTF((category, RSM_DEBUG,
3970 		    "rsm_add_credits enter: path not found\n"));
3971 		return;
3972 	}
3973 
3974 	/* the path is not active - discard credits */
3975 	if (path->state != RSMKA_PATH_ACTIVE) {
3976 		PATH_RELE_NOLOCK(path);
3977 		mutex_exit(&path->mutex);
3978 		DBG_PRINTF((category, RSM_DEBUG,
3979 		    "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3980 		return;
3981 	}
3982 
3983 	/*
3984 	 * Check if these credits are for current incarnation of the path.
3985 	 */
3986 	if (path->local_incn != msghdr->rsmipc_incn) {
3987 		/* decrement the refcnt */
3988 		PATH_RELE_NOLOCK(path);
3989 		mutex_exit(&path->mutex);
3990 		DBG_PRINTF((category, RSM_DEBUG,
3991 		    "rsm_add_credits enter: old incn %lld\n",
3992 		    msghdr->rsmipc_incn));
3993 		return;
3994 	}
3995 
3996 	DBG_PRINTF((category, RSM_DEBUG,
3997 	    "rsm_add_credits:path=%lx new-creds=%d "
3998 	    "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
3999 	    path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
4000 	    src_hwaddr));
4001 
4002 
4003 	/* add credits to the path's sendq */
4004 	path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4005 
4006 	ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4007 
4008 	/* wake up any send that is waiting for credits */
4009 	cv_broadcast(&path->sendq_token.sendq_cv);
4010 
4011 	/* decrement the refcnt */
4012 	PATH_RELE_NOLOCK(path);
4013 
4014 	mutex_exit(&path->mutex);
4015 
4016 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4017 }
4018 
4019 static void
4020 rsm_intr_event(rsmipc_request_t *msg)
4021 {
4022 	rsmseg_t	*seg;
4023 	rsmresource_t	*p;
4024 	rsm_node_id_t	src_node;
4025 	DBG_DEFINE(category,
4026 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4027 
4028 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4029 
4030 	src_node = msg->rsmipc_hdr.rsmipc_src;
4031 
4032 	if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4033 		/* This is for an import segment */
4034 		uint_t hashval = rsmhash(msg->rsmipc_key);
4035 
4036 		rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4037 
4038 		p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4039 
4040 		for (; p; p = p->rsmrc_next) {
4041 			if ((p->rsmrc_key == msg->rsmipc_key) &&
4042 			    (p->rsmrc_node == src_node)) {
4043 				seg = (rsmseg_t *)p;
4044 				rsmseglock_acquire(seg);
4045 
4046 				atomic_add_32(&seg->s_pollevent, 1);
4047 
4048 				if (seg->s_pollflag & RSM_SEGMENT_POLL)
4049 					pollwakeup(&seg->s_poll, POLLRDNORM);
4050 
4051 				rsmseglock_release(seg);
4052 			}
4053 		}
4054 
4055 		rw_exit(&rsm_import_segs.rsmhash_rw);
4056 	} else {
4057 		/* This is for an export segment */
4058 		seg = rsmexport_lookup(msg->rsmipc_key);
4059 		if (!seg) {
4060 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4061 			    "rsm_intr_event done: exp seg not found\n"));
4062 			return;
4063 		}
4064 
4065 		ASSERT(rsmseglock_held(seg));
4066 
4067 		atomic_add_32(&seg->s_pollevent, 1);
4068 
4069 		/*
4070 		 * We must hold the segment lock here, or else the segment
4071 		 * can be freed while pollwakeup is using it. This implies
4072 		 * that we MUST NOT grab the segment lock during rsm_chpoll,
4073 		 * as outlined in the chpoll(2) man page.
4074 		 */
4075 		if (seg->s_pollflag & RSM_SEGMENT_POLL)
4076 			pollwakeup(&seg->s_poll, POLLRDNORM);
4077 
4078 		rsmseglock_release(seg);
4079 	}
4080 
4081 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4082 }
4083 
4084 /*
4085  * The exporter did a republish and changed the ACL - this change is only
4086  * visible to new importers.
4087  */
4088 static void
4089 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4090     rsm_permission_t perm)
4091 {
4092 
4093 	rsmresource_t	*p;
4094 	rsmseg_t	*seg;
4095 	uint_t		hashval = rsmhash(key);
4096 	DBG_DEFINE(category,
4097 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4098 
4099 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4100 
4101 	rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4102 
4103 	p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4104 
4105 	for (; p; p = p->rsmrc_next) {
4106 		/*
4107 		 * find the importer and update the permission in the shared
4108 		 * data structure. Any new importers will use the new perms
4109 		 */
4110 		if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4111 			seg = (rsmseg_t *)p;
4112 
4113 			rsmseglock_acquire(seg);
4114 			rsmsharelock_acquire(seg);
4115 			seg->s_share->rsmsi_mode = perm;
4116 			rsmsharelock_release(seg);
4117 			rsmseglock_release(seg);
4118 
4119 			break;
4120 		}
4121 	}
4122 
4123 	rw_exit(&rsm_import_segs.rsmhash_rw);
4124 
4125 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4126 }
4127 
4128 void
4129 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4130 {
4131 	int		done = 1; /* indicate all SUSPENDS have been acked */
4132 	list_element_t	*elem;
4133 	DBG_DEFINE(category,
4134 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4135 
4136 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4137 	    "rsm_suspend_complete enter\n"));
4138 
4139 	mutex_enter(&rsm_suspend_list.list_lock);
4140 
4141 	if (rsm_suspend_list.list_head == NULL) {
4142 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4143 		    "rsm_suspend_complete done: suspend_list is empty\n"));
4144 		mutex_exit(&rsm_suspend_list.list_lock);
4145 		return;
4146 	}
4147 
4148 	elem = rsm_suspend_list.list_head;
4149 	while (elem != NULL) {
4150 		if (elem->nodeid == src_node) {
4151 			/* clear the pending flag for the node */
4152 			elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4153 			elem->flags |= flag;
4154 		}
4155 
4156 		if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4157 			done = 0; /* still some nodes have not yet ACKED */
4158 
4159 		elem = elem->next;
4160 	}
4161 
4162 	mutex_exit(&rsm_suspend_list.list_lock);
4163 
4164 	if (!done) {
4165 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4166 		    "rsm_suspend_complete done: acks pending\n"));
4167 		return;
4168 	}
4169 	/*
4170 	 * Now that we are done with suspending all the remote importers
4171 	 * time to quiesce the local exporters
4172 	 */
4173 	exporter_quiesce();
4174 
4175 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4176 	    "rsm_suspend_complete done\n"));
4177 }
4178 
4179 static void
4180 exporter_quiesce()
4181 {
4182 	int		i, e;
4183 	rsmresource_t	*current;
4184 	rsmseg_t	*seg;
4185 	adapter_t	*adapter;
4186 	DBG_DEFINE(category,
4187 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4188 
4189 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4190 	/*
4191 	 * The importers send a SUSPEND_COMPLETE to the exporter node
4192 	 *	Unpublish, unbind the export segment and
4193 	 *	move the segments to the EXPORT_QUIESCED state
4194 	 */
4195 
4196 	rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4197 
4198 	for (i = 0; i < rsm_hash_size; i++) {
4199 		current = rsm_export_segs.bucket[i];
4200 		while (current != NULL) {
4201 			seg = (rsmseg_t *)current;
4202 			rsmseglock_acquire(seg);
4203 			if (current->rsmrc_state ==
4204 			    RSM_STATE_EXPORT_QUIESCING) {
4205 				adapter = seg->s_adapter;
4206 				/*
4207 				 * some local memory handles are not published
4208 				 * check if it was published
4209 				 */
4210 				if ((seg->s_acl == NULL) ||
4211 				    (seg->s_acl[0].ae_node != my_nodeid) ||
4212 				    (seg->s_acl[0].ae_permission != 0)) {
4213 
4214 					e = adapter->rsmpi_ops->rsm_unpublish(
4215 					    seg->s_handle.out);
4216 					DBG_PRINTF((category, RSM_DEBUG,
4217 					    "exporter_quiesce:unpub %d\n", e));
4218 
4219 					e = adapter->rsmpi_ops->rsm_seg_destroy(
4220 					    seg->s_handle.out);
4221 
4222 					DBG_PRINTF((category, RSM_DEBUG,
4223 					    "exporter_quiesce:destroy %d\n",
4224 					    e));
4225 				}
4226 
4227 				(void) rsm_unbind_pages(seg);
4228 				seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4229 				cv_broadcast(&seg->s_cv);
4230 			}
4231 			rsmseglock_release(seg);
4232 			current = current->rsmrc_next;
4233 		}
4234 	}
4235 	rw_exit(&rsm_export_segs.rsmhash_rw);
4236 
4237 	/*
4238 	 * All the local segments we are done with the pre-del processing
4239 	 * - time to move to PREDEL_COMPLETED.
4240 	 */
4241 
4242 	mutex_enter(&rsm_drv_data.drv_lock);
4243 
4244 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4245 
4246 	rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4247 
4248 	cv_broadcast(&rsm_drv_data.drv_cv);
4249 
4250 	mutex_exit(&rsm_drv_data.drv_lock);
4251 
4252 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4253 }
4254 
4255 static void
4256 importer_suspend(rsm_node_id_t src_node)
4257 {
4258 	int		i;
4259 	int		susp_flg; /* true means already suspended */
4260 	int		num_importers;
4261 	rsmresource_t	*p = NULL, *curp;
4262 	rsmhash_table_t *rhash = &rsm_import_segs;
4263 	rsmseg_t	*seg;
4264 	rsmipc_request_t request;
4265 	DBG_DEFINE(category,
4266 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4267 
4268 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4269 
4270 	rw_enter(&rhash->rsmhash_rw, RW_READER);
4271 	for (i = 0; i < rsm_hash_size; i++) {
4272 		p = rhash->bucket[i];
4273 
4274 		/*
4275 		 * Suspend all importers with same <node, key> pair.
4276 		 * After the last one of the shared importers has been
4277 		 * suspended - suspend the shared mappings/connection.
4278 		 */
4279 		for (; p; p = p->rsmrc_next) {
4280 			rsmseg_t *first = (rsmseg_t *)p;
4281 			if ((first->s_node != src_node) ||
4282 			    (first->s_state == RSM_STATE_DISCONNECT))
4283 				continue; /* go to next entry */
4284 			/*
4285 			 * search the rest of the bucket for
4286 			 * other siblings (imprtrs with the same key)
4287 			 * of "first" and suspend them.
4288 			 * All importers with same key fall in
4289 			 * the same bucket.
4290 			 */
4291 			num_importers = 0;
4292 			for (curp = p; curp; curp = curp->rsmrc_next) {
4293 				seg = (rsmseg_t *)curp;
4294 
4295 				rsmseglock_acquire(seg);
4296 
4297 				if ((seg->s_node != first->s_node) ||
4298 				    (seg->s_key != first->s_key) ||
4299 				    (seg->s_state == RSM_STATE_DISCONNECT)) {
4300 					/*
4301 					 * either not a peer segment or its a
4302 					 * disconnected segment - skip it
4303 					 */
4304 					rsmseglock_release(seg);
4305 					continue;
4306 				}
4307 
4308 				rsmseg_suspend(seg, &susp_flg);
4309 
4310 				if (susp_flg) { /* seg already suspended */
4311 					rsmseglock_release(seg);
4312 					break; /* the inner for loop */
4313 				}
4314 
4315 				num_importers++;
4316 				rsmsharelock_acquire(seg);
4317 				/*
4318 				 * we've processed all importers that are
4319 				 * siblings of "first"
4320 				 */
4321 				if (num_importers ==
4322 				    seg->s_share->rsmsi_refcnt) {
4323 					rsmsharelock_release(seg);
4324 					rsmseglock_release(seg);
4325 					break;
4326 				}
4327 				rsmsharelock_release(seg);
4328 				rsmseglock_release(seg);
4329 			}
4330 
4331 			/*
4332 			 * All the importers with the same key and
4333 			 * nodeid as "first" have been suspended.
4334 			 * Now suspend the shared connect/mapping.
4335 			 * This is done only once.
4336 			 */
4337 			if (!susp_flg) {
4338 				rsmsegshare_suspend(seg);
4339 			}
4340 		}
4341 	}
4342 
4343 	rw_exit(&rhash->rsmhash_rw);
4344 
4345 	/* send an ACK for SUSPEND message */
4346 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4347 	(void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4348 
4349 
4350 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4351 
4352 }
4353 
4354 static void
4355 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4356 {
4357 	int		recheck_state;
4358 	rsmcookie_t	*hdl;
4359 	DBG_DEFINE(category,
4360 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4361 
4362 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4363 	    "rsmseg_suspend enter: key=%u\n", seg->s_key));
4364 
4365 	*susp_flg = 0;
4366 
4367 	ASSERT(rsmseglock_held(seg));
4368 	/* wait if putv/getv is in progress */
4369 	while (seg->s_rdmacnt > 0)
4370 		cv_wait(&seg->s_cv, &seg->s_lock);
4371 
4372 	do {
4373 		recheck_state = 0;
4374 
4375 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4376 		    "rsmseg_suspend:segment %x state=%d\n",
4377 		    seg->s_key, seg->s_state));
4378 
4379 		switch (seg->s_state) {
4380 		case RSM_STATE_NEW:
4381 			/* not a valid state */
4382 			break;
4383 		case RSM_STATE_CONNECTING:
4384 			seg->s_state = RSM_STATE_ABORT_CONNECT;
4385 			break;
4386 		case RSM_STATE_ABORT_CONNECT:
4387 			break;
4388 		case RSM_STATE_CONNECT:
4389 			seg->s_handle.in = NULL;
4390 			seg->s_state = RSM_STATE_CONN_QUIESCE;
4391 			break;
4392 		case RSM_STATE_MAPPING:
4393 			/* wait until segment leaves the mapping state */
4394 			while (seg->s_state == RSM_STATE_MAPPING)
4395 				cv_wait(&seg->s_cv, &seg->s_lock);
4396 			recheck_state = 1;
4397 			break;
4398 		case RSM_STATE_ACTIVE:
4399 			/* unload the mappings */
4400 			if (seg->s_ckl != NULL) {
4401 				hdl = seg->s_ckl;
4402 				for (; hdl != NULL; hdl = hdl->c_next) {
4403 					(void) devmap_unload(hdl->c_dhp,
4404 					    hdl->c_off, hdl->c_len);
4405 				}
4406 			}
4407 			seg->s_mapinfo = NULL;
4408 			seg->s_state = RSM_STATE_MAP_QUIESCE;
4409 			break;
4410 		case RSM_STATE_CONN_QUIESCE:
4411 			/* FALLTHRU */
4412 		case RSM_STATE_MAP_QUIESCE:
4413 			/* rsmseg_suspend already done for seg */
4414 			*susp_flg = 1;
4415 			break;
4416 		case RSM_STATE_DISCONNECT:
4417 			break;
4418 		default:
4419 			ASSERT(0); /* invalid state */
4420 		}
4421 	} while (recheck_state);
4422 
4423 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4424 }
4425 
4426 static void
4427 rsmsegshare_suspend(rsmseg_t *seg)
4428 {
4429 	int			e;
4430 	adapter_t		*adapter;
4431 	rsm_import_share_t	*sharedp;
4432 	DBG_DEFINE(category,
4433 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4434 
4435 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4436 	    "rsmsegshare_suspend enter\n"));
4437 
4438 	rsmseglock_acquire(seg);
4439 	rsmsharelock_acquire(seg);
4440 
4441 	sharedp = seg->s_share;
4442 	adapter = seg->s_adapter;
4443 	switch (sharedp->rsmsi_state) {
4444 	case RSMSI_STATE_NEW:
4445 		break;
4446 	case RSMSI_STATE_CONNECTING:
4447 		sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4448 		break;
4449 	case RSMSI_STATE_ABORT_CONNECT:
4450 		break;
4451 	case RSMSI_STATE_CONNECTED:
4452 		/* do the rsmpi disconnect */
4453 		if (sharedp->rsmsi_node != my_nodeid) {
4454 			e = adapter->rsmpi_ops->
4455 			    rsm_disconnect(sharedp->rsmsi_handle);
4456 
4457 			DBG_PRINTF((category, RSM_DEBUG,
4458 			    "rsm:rsmpi disconnect seg=%x:err=%d\n",
4459 			    sharedp->rsmsi_segid, e));
4460 		}
4461 
4462 		sharedp->rsmsi_handle = NULL;
4463 
4464 		sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4465 		break;
4466 	case RSMSI_STATE_CONN_QUIESCE:
4467 		break;
4468 	case RSMSI_STATE_MAPPED:
4469 		/* do the rsmpi unmap and disconnect */
4470 		if (sharedp->rsmsi_node != my_nodeid) {
4471 			e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4472 
4473 			DBG_PRINTF((category, RSM_DEBUG,
4474 			    "rsmshare_suspend: rsmpi unmap %d\n", e));
4475 
4476 			e = adapter->rsmpi_ops->
4477 			    rsm_disconnect(sharedp->rsmsi_handle);
4478 			DBG_PRINTF((category, RSM_DEBUG,
4479 			    "rsm:rsmpi disconnect seg=%x:err=%d\n",
4480 			    sharedp->rsmsi_segid, e));
4481 		}
4482 
4483 		sharedp->rsmsi_handle = NULL;
4484 
4485 		sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4486 		break;
4487 	case RSMSI_STATE_MAP_QUIESCE:
4488 		break;
4489 	case RSMSI_STATE_DISCONNECTED:
4490 		break;
4491 	default:
4492 		ASSERT(0); /* invalid state */
4493 	}
4494 
4495 	rsmsharelock_release(seg);
4496 	rsmseglock_release(seg);
4497 
4498 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4499 	    "rsmsegshare_suspend done\n"));
4500 }
4501 
4502 /*
4503  * This should get called on receiving a RESUME message or from
4504  * the pathmanger if the node undergoing DR dies.
4505  */
4506 static void
4507 importer_resume(rsm_node_id_t src_node)
4508 {
4509 	int		i;
4510 	rsmresource_t	*p = NULL;
4511 	rsmhash_table_t *rhash = &rsm_import_segs;
4512 	void		*cookie;
4513 	DBG_DEFINE(category,
4514 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4515 
4516 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4517 
4518 	rw_enter(&rhash->rsmhash_rw, RW_READER);
4519 
4520 	for (i = 0; i < rsm_hash_size; i++) {
4521 		p = rhash->bucket[i];
4522 
4523 		for (; p; p = p->rsmrc_next) {
4524 			rsmseg_t *seg = (rsmseg_t *)p;
4525 
4526 			rsmseglock_acquire(seg);
4527 
4528 			/* process only importers of node undergoing DR */
4529 			if (seg->s_node != src_node) {
4530 				rsmseglock_release(seg);
4531 				continue;
4532 			}
4533 
4534 			if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4535 				rsmipc_request_t	request;
4536 				/*
4537 				 * rsmpi map/connect failed
4538 				 * inform the exporter so that it can
4539 				 * remove the importer.
4540 				 */
4541 				request.rsmipc_hdr.rsmipc_type =
4542 				    RSMIPC_MSG_NOTIMPORTING;
4543 				request.rsmipc_key = seg->s_segid;
4544 				request.rsmipc_segment_cookie = cookie;
4545 				rsmseglock_release(seg);
4546 				(void) rsmipc_send(seg->s_node, &request,
4547 				    RSM_NO_REPLY);
4548 			} else {
4549 				rsmseglock_release(seg);
4550 			}
4551 		}
4552 	}
4553 
4554 	rw_exit(&rhash->rsmhash_rw);
4555 
4556 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4557 }
4558 
4559 static int
4560 rsmseg_resume(rsmseg_t *seg, void **cookie)
4561 {
4562 	int			e;
4563 	int			retc;
4564 	off_t			dev_offset;
4565 	size_t			maplen;
4566 	uint_t			maxprot;
4567 	rsm_mapinfo_t		*p;
4568 	rsmcookie_t		*hdl;
4569 	rsm_import_share_t	*sharedp;
4570 	DBG_DEFINE(category,
4571 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4572 
4573 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4574 	    "rsmseg_resume enter: key=%u\n", seg->s_key));
4575 
4576 	*cookie = NULL;
4577 
4578 	ASSERT(rsmseglock_held(seg));
4579 
4580 	if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4581 	    (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4582 		return (RSM_SUCCESS);
4583 	}
4584 
4585 	sharedp = seg->s_share;
4586 
4587 	rsmsharelock_acquire(seg);
4588 
4589 	/* resume the shared connection and/or mapping */
4590 	retc = rsmsegshare_resume(seg);
4591 
4592 	if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4593 		/* shared state can either be connected or mapped */
4594 		if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4595 		    (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4596 			ASSERT(retc == RSM_SUCCESS);
4597 			seg->s_handle.in = sharedp->rsmsi_handle;
4598 			rsmsharelock_release(seg);
4599 			seg->s_state = RSM_STATE_CONNECT;
4600 
4601 		} else { /* error in rsmpi connect during resume */
4602 			seg->s_handle.in = NULL;
4603 			seg->s_state = RSM_STATE_DISCONNECT;
4604 
4605 			sharedp->rsmsi_refcnt--;
4606 			cookie = (void *)sharedp->rsmsi_cookie;
4607 
4608 			if (sharedp->rsmsi_refcnt == 0) {
4609 				ASSERT(sharedp->rsmsi_mapcnt == 0);
4610 				rsmsharelock_release(seg);
4611 
4612 				/* clean up the shared data structure */
4613 				mutex_destroy(&sharedp->rsmsi_lock);
4614 				cv_destroy(&sharedp->rsmsi_cv);
4615 				kmem_free((void *)(sharedp),
4616 				    sizeof (rsm_import_share_t));
4617 
4618 			} else {
4619 				rsmsharelock_release(seg);
4620 			}
4621 			/*
4622 			 * The following needs to be done after any
4623 			 * rsmsharelock calls which use seg->s_share.
4624 			 */
4625 			seg->s_share = NULL;
4626 		}
4627 
4628 		/* signal any waiting segment */
4629 		cv_broadcast(&seg->s_cv);
4630 
4631 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4632 		    "rsmseg_resume done:state=%d\n", seg->s_state));
4633 		return (retc);
4634 	}
4635 
4636 	ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4637 
4638 	/* Setup protections for remap */
4639 	maxprot = PROT_USER;
4640 	if (seg->s_mode & RSM_PERM_READ) {
4641 		maxprot |= PROT_READ;
4642 	}
4643 	if (seg->s_mode & RSM_PERM_WRITE) {
4644 		maxprot |= PROT_WRITE;
4645 	}
4646 
4647 	if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4648 		/* error in rsmpi connect or map during resume */
4649 
4650 		/* remap to trash page */
4651 		ASSERT(seg->s_ckl != NULL);
4652 
4653 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4654 			e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4655 			    remap_cookie, hdl->c_off, hdl->c_len,
4656 			    maxprot, 0, NULL);
4657 
4658 			DBG_PRINTF((category, RSM_ERR,
4659 			    "rsmseg_resume:remap=%d\n", e));
4660 		}
4661 
4662 		seg->s_handle.in = NULL;
4663 		seg->s_state = RSM_STATE_DISCONNECT;
4664 
4665 		sharedp->rsmsi_refcnt--;
4666 
4667 		sharedp->rsmsi_mapcnt--;
4668 		seg->s_mapinfo = NULL;
4669 
4670 		if (sharedp->rsmsi_refcnt == 0) {
4671 			ASSERT(sharedp->rsmsi_mapcnt == 0);
4672 			rsmsharelock_release(seg);
4673 
4674 			/* clean up the shared data structure */
4675 			mutex_destroy(&sharedp->rsmsi_lock);
4676 			cv_destroy(&sharedp->rsmsi_cv);
4677 			kmem_free((void *)(sharedp),
4678 			    sizeof (rsm_import_share_t));
4679 
4680 		} else {
4681 			rsmsharelock_release(seg);
4682 		}
4683 		/*
4684 		 * The following needs to be done after any
4685 		 * rsmsharelock calls which use seg->s_share.
4686 		 */
4687 		seg->s_share = NULL;
4688 
4689 		/* signal any waiting segment */
4690 		cv_broadcast(&seg->s_cv);
4691 
4692 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4693 		    "rsmseg_resume done:seg=%x,err=%d\n",
4694 		    seg->s_key, retc));
4695 		return (retc);
4696 
4697 	}
4698 
4699 	seg->s_handle.in = sharedp->rsmsi_handle;
4700 
4701 	if (seg->s_node == my_nodeid) { /* loopback */
4702 		ASSERT(seg->s_mapinfo == NULL);
4703 
4704 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4705 			e = devmap_umem_remap(hdl->c_dhp,
4706 			    rsm_dip, seg->s_cookie,
4707 			    hdl->c_off, hdl->c_len,
4708 			    maxprot, 0, NULL);
4709 
4710 			DBG_PRINTF((category, RSM_ERR,
4711 			    "rsmseg_resume:remap=%d\n", e));
4712 		}
4713 	} else { /* remote exporter */
4714 		/* remap to the new rsmpi maps */
4715 		seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4716 
4717 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4718 			p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4719 			    &dev_offset, &maplen);
4720 			e = devmap_devmem_remap(hdl->c_dhp,
4721 			    p->dip, p->dev_register, dev_offset,
4722 			    maplen, maxprot, 0, NULL);
4723 
4724 			DBG_PRINTF((category, RSM_ERR,
4725 			    "rsmseg_resume:remap=%d\n", e));
4726 		}
4727 	}
4728 
4729 	rsmsharelock_release(seg);
4730 
4731 	seg->s_state = RSM_STATE_ACTIVE;
4732 	cv_broadcast(&seg->s_cv);
4733 
4734 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4735 
4736 	return (retc);
4737 }
4738 
4739 static int
4740 rsmsegshare_resume(rsmseg_t *seg)
4741 {
4742 	int			e = RSM_SUCCESS;
4743 	adapter_t		*adapter;
4744 	rsm_import_share_t	*sharedp;
4745 	DBG_DEFINE(category,
4746 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4747 
4748 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4749 
4750 	ASSERT(rsmseglock_held(seg));
4751 	ASSERT(rsmsharelock_held(seg));
4752 
4753 	sharedp = seg->s_share;
4754 
4755 	/*
4756 	 * If we are not in a xxxx_QUIESCE state that means shared
4757 	 * connect/mapping processing has been already been done
4758 	 * so return success.
4759 	 */
4760 	if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4761 	    (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4762 		return (RSM_SUCCESS);
4763 	}
4764 
4765 	adapter = seg->s_adapter;
4766 
4767 	if (sharedp->rsmsi_node != my_nodeid) {
4768 		rsm_addr_t	hwaddr;
4769 		hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4770 
4771 		e = adapter->rsmpi_ops->rsm_connect(
4772 		    adapter->rsmpi_handle, hwaddr,
4773 		    sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4774 
4775 		DBG_PRINTF((category, RSM_DEBUG,
4776 		    "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4777 		    sharedp->rsmsi_segid, e));
4778 
4779 		if (e != RSM_SUCCESS) {
4780 			/* when do we send the NOT_IMPORTING message */
4781 			sharedp->rsmsi_handle = NULL;
4782 			sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4783 			/* signal any waiting segment */
4784 			cv_broadcast(&sharedp->rsmsi_cv);
4785 			return (e);
4786 		}
4787 	}
4788 
4789 	if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4790 		sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4791 		/* signal any waiting segment */
4792 		cv_broadcast(&sharedp->rsmsi_cv);
4793 		return (e);
4794 	}
4795 
4796 	ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4797 
4798 	/* do the rsmpi map of the whole segment here */
4799 	if (sharedp->rsmsi_node != my_nodeid) {
4800 		size_t mapped_len;
4801 		rsm_mapinfo_t *p;
4802 
4803 		/*
4804 		 * We need to do rsmpi maps with <off, lens> identical to
4805 		 * the old mapinfo list because the segment mapping handles
4806 		 * dhp and such need the fragmentation of rsmpi maps to be
4807 		 * identical to what it was during the mmap of the segment
4808 		 */
4809 		p = sharedp->rsmsi_mapinfo;
4810 
4811 		while (p != NULL) {
4812 			mapped_len = 0;
4813 
4814 			e = adapter->rsmpi_ops->rsm_map(
4815 			    sharedp->rsmsi_handle, p->start_offset,
4816 			    p->individual_len, &mapped_len,
4817 			    &p->dip, &p->dev_register, &p->dev_offset,
4818 			    NULL, NULL);
4819 
4820 			if (e != 0) {
4821 				DBG_PRINTF((category, RSM_ERR,
4822 				    "rsmsegshare_resume: rsmpi map err=%d\n",
4823 				    e));
4824 				break;
4825 			}
4826 
4827 			if (mapped_len != p->individual_len) {
4828 				DBG_PRINTF((category, RSM_ERR,
4829 				    "rsmsegshare_resume: rsmpi maplen"
4830 				    "< reqlen=%lx\n", mapped_len));
4831 				e = RSMERR_BAD_LENGTH;
4832 				break;
4833 			}
4834 
4835 			p = p->next;
4836 
4837 		}
4838 
4839 
4840 		if (e != RSM_SUCCESS) { /* rsmpi map failed */
4841 			int	err;
4842 			/* Check if this is the first rsm_map */
4843 			if (p != sharedp->rsmsi_mapinfo) {
4844 				/*
4845 				 * A single rsm_unmap undoes multiple rsm_maps.
4846 				 */
4847 				(void) seg->s_adapter->rsmpi_ops->
4848 				    rsm_unmap(sharedp->rsmsi_handle);
4849 			}
4850 
4851 			rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4852 			sharedp->rsmsi_mapinfo = NULL;
4853 
4854 			err = adapter->rsmpi_ops->
4855 			    rsm_disconnect(sharedp->rsmsi_handle);
4856 
4857 			DBG_PRINTF((category, RSM_DEBUG,
4858 			    "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4859 			    sharedp->rsmsi_segid, err));
4860 
4861 			sharedp->rsmsi_handle = NULL;
4862 			sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4863 
4864 			/* signal the waiting segments */
4865 			cv_broadcast(&sharedp->rsmsi_cv);
4866 			DBG_PRINTF((category, RSM_DEBUG,
4867 			    "rsmsegshare_resume done: rsmpi map err\n"));
4868 			return (e);
4869 		}
4870 	}
4871 
4872 	sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4873 
4874 	/* signal any waiting segment */
4875 	cv_broadcast(&sharedp->rsmsi_cv);
4876 
4877 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4878 
4879 	return (e);
4880 }
4881 
4882 /*
4883  * this is the routine that gets called by recv_taskq which is the
4884  * thread that processes messages that are flow-controlled.
4885  */
4886 static void
4887 rsm_intr_proc_deferred(void *arg)
4888 {
4889 	path_t			*path = (path_t *)arg;
4890 	rsmipc_request_t	*msg;
4891 	rsmipc_msghdr_t		*msghdr;
4892 	rsm_node_id_t		src_node;
4893 	msgbuf_elem_t		*head;
4894 	int			e;
4895 	DBG_DEFINE(category,
4896 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4897 
4898 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4899 	    "rsm_intr_proc_deferred enter\n"));
4900 
4901 	mutex_enter(&path->mutex);
4902 
4903 	/* use the head of the msgbuf_queue */
4904 	head = rsmka_gethead_msgbuf(path);
4905 
4906 	mutex_exit(&path->mutex);
4907 
4908 	msg = (rsmipc_request_t *)&(head->msg);
4909 	msghdr = (rsmipc_msghdr_t *)msg;
4910 
4911 	src_node = msghdr->rsmipc_src;
4912 
4913 	/*
4914 	 * messages that need to send a reply should check the message version
4915 	 * before processing the message. And all messages that need to
4916 	 * send a reply should be processed here by the worker thread.
4917 	 */
4918 	switch (msghdr->rsmipc_type) {
4919 	case RSMIPC_MSG_SEGCONNECT:
4920 		if (msghdr->rsmipc_version != RSM_VERSION) {
4921 			rsmipc_reply_t reply;
4922 			reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4923 			reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4924 			reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4925 			(void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4926 		} else {
4927 			rsm_intr_segconnect(src_node, msg);
4928 		}
4929 		break;
4930 	case RSMIPC_MSG_DISCONNECT:
4931 		rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4932 		break;
4933 	case RSMIPC_MSG_SUSPEND:
4934 		importer_suspend(src_node);
4935 		break;
4936 	case RSMIPC_MSG_SUSPEND_DONE:
4937 		rsm_suspend_complete(src_node, 0);
4938 		break;
4939 	case RSMIPC_MSG_RESUME:
4940 		importer_resume(src_node);
4941 		break;
4942 	default:
4943 		ASSERT(0);
4944 	}
4945 
4946 	mutex_enter(&path->mutex);
4947 
4948 	rsmka_dequeue_msgbuf(path);
4949 
4950 	/* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4951 	if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4952 		path->procmsg_cnt++;
4953 
4954 	ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4955 
4956 	/* No need to send credits if path is going down */
4957 	if ((path->state == RSMKA_PATH_ACTIVE) &&
4958 	    (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4959 		/*
4960 		 * send credits and reset procmsg_cnt if success otherwise
4961 		 * credits will be sent after processing the next message
4962 		 */
4963 		e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4964 		if (e == 0)
4965 			path->procmsg_cnt = 0;
4966 		else
4967 			DBG_PRINTF((category, RSM_ERR,
4968 			    "rsm_intr_proc_deferred:send credits err=%d\n", e));
4969 	}
4970 
4971 	/*
4972 	 * decrement the path refcnt since we incremented it in
4973 	 * rsm_intr_callback_dispatch
4974 	 */
4975 	PATH_RELE_NOLOCK(path);
4976 
4977 	mutex_exit(&path->mutex);
4978 
4979 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4980 	    "rsm_intr_proc_deferred done\n"));
4981 }
4982 
4983 /*
4984  * Flow-controlled messages are enqueued and dispatched onto a taskq here
4985  */
4986 static void
4987 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4988     rsm_intr_hand_arg_t arg)
4989 {
4990 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
4991 	path_t			*path;
4992 	rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4993 	DBG_DEFINE(category,
4994 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4995 
4996 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4997 	    "rsm_intr_callback_dispatch enter\n"));
4998 	ASSERT(data && hdlr_argp);
4999 
5000 	/* look up the path - incr the path refcnt */
5001 	path = rsm_find_path(hdlr_argp->adapter_name,
5002 	    hdlr_argp->adapter_instance, src_hwaddr);
5003 
5004 	/* the path has been removed - drop this message */
5005 	if (path == NULL) {
5006 		DBG_PRINTF((category, RSM_DEBUG,
5007 		    "rsm_intr_callback_dispatch done: msg dropped\n"));
5008 		return;
5009 	}
5010 	/* the path is not active - don't accept new messages */
5011 	if (path->state != RSMKA_PATH_ACTIVE) {
5012 		PATH_RELE_NOLOCK(path);
5013 		mutex_exit(&path->mutex);
5014 		DBG_PRINTF((category, RSM_DEBUG,
5015 		    "rsm_intr_callback_dispatch done: msg dropped"
5016 		    " path=%lx !ACTIVE\n", path));
5017 		return;
5018 	}
5019 
5020 	/*
5021 	 * Check if this message was sent to an older incarnation
5022 	 * of the path/sendq.
5023 	 */
5024 	if (path->local_incn != msghdr->rsmipc_incn) {
5025 		/* decrement the refcnt */
5026 		PATH_RELE_NOLOCK(path);
5027 		mutex_exit(&path->mutex);
5028 		DBG_PRINTF((category, RSM_DEBUG,
5029 		    "rsm_intr_callback_dispatch done: old incn %lld\n",
5030 		    msghdr->rsmipc_incn));
5031 		return;
5032 	}
5033 
5034 	/* copy and enqueue msg on the path's msgbuf queue */
5035 	rsmka_enqueue_msgbuf(path, data);
5036 
5037 	/*
5038 	 * schedule task to process messages - ignore retval from
5039 	 * task_dispatch because we sender cannot send more than
5040 	 * what receiver can handle.
5041 	 */
5042 	(void) taskq_dispatch(path->recv_taskq,
5043 	    rsm_intr_proc_deferred, path, KM_NOSLEEP);
5044 
5045 	mutex_exit(&path->mutex);
5046 
5047 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5048 	    "rsm_intr_callback_dispatch done\n"));
5049 }
5050 
5051 /*
5052  * This procedure is called from rsm_srv_func when a remote node creates a
5053  * a send queue.  This event is used as a hint that an  earlier failed
5054  * attempt to create a send queue to that remote node may now succeed and
5055  * should be retried.  Indication of an earlier failed attempt is provided
5056  * by the RSMKA_SQCREATE_PENDING flag.
5057  */
5058 static void
5059 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5060 {
5061 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
5062 	path_t			*path;
5063 	DBG_DEFINE(category,
5064 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5065 
5066 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5067 	    "rsm_sqcreateop_callback enter\n"));
5068 
5069 	/* look up the path - incr the path refcnt */
5070 	path = rsm_find_path(hdlr_argp->adapter_name,
5071 	    hdlr_argp->adapter_instance, src_hwaddr);
5072 
5073 	if (path == NULL) {
5074 		DBG_PRINTF((category, RSM_DEBUG,
5075 		    "rsm_sqcreateop_callback done: no path\n"));
5076 		return;
5077 	}
5078 
5079 	if ((path->state == RSMKA_PATH_UP) &&
5080 	    (path->flags & RSMKA_SQCREATE_PENDING)) {
5081 		/*
5082 		 * previous attempt to create sendq had failed, retry
5083 		 * it and move to RSMKA_PATH_ACTIVE state if successful.
5084 		 * the refcnt will be decremented in the do_deferred_work
5085 		 */
5086 		(void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5087 	} else {
5088 		/* decrement the refcnt */
5089 		PATH_RELE_NOLOCK(path);
5090 	}
5091 	mutex_exit(&path->mutex);
5092 
5093 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5094 	    "rsm_sqcreateop_callback done\n"));
5095 }
5096 
5097 static void
5098 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5099 {
5100 	rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5101 	rsmipc_request_t *msg = (rsmipc_request_t *)data;
5102 	rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5103 	rsm_node_id_t src_node;
5104 	DBG_DEFINE(category,
5105 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5106 
5107 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5108 	    "src=%d, type=%d\n", msghdr->rsmipc_src,
5109 	    msghdr->rsmipc_type));
5110 
5111 	/*
5112 	 * Check for the version number in the msg header. If it is not
5113 	 * RSM_VERSION, drop the message. In the future, we need to manage
5114 	 * incompatible version numbers in some way
5115 	 */
5116 	if (msghdr->rsmipc_version != RSM_VERSION) {
5117 		DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5118 		/*
5119 		 * Drop requests that don't have a reply right here
5120 		 * Request with reply will send a BAD_VERSION reply
5121 		 * when they get processed by the worker thread.
5122 		 */
5123 		if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5124 			return;
5125 		}
5126 
5127 	}
5128 
5129 	src_node = msghdr->rsmipc_src;
5130 
5131 	switch (msghdr->rsmipc_type) {
5132 	case RSMIPC_MSG_SEGCONNECT:
5133 	case RSMIPC_MSG_DISCONNECT:
5134 	case RSMIPC_MSG_SUSPEND:
5135 	case RSMIPC_MSG_SUSPEND_DONE:
5136 	case RSMIPC_MSG_RESUME:
5137 		/*
5138 		 * These message types are handled by a worker thread using
5139 		 * the flow-control algorithm.
5140 		 * Any message processing that does one or more of the
5141 		 * following should be handled in a worker thread.
5142 		 *	- allocates resources and might sleep
5143 		 *	- makes RSMPI calls down to the interconnect driver
5144 		 *	this by defn include requests with reply.
5145 		 *	- takes a long duration of time
5146 		 */
5147 		rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5148 		break;
5149 	case RSMIPC_MSG_NOTIMPORTING:
5150 		importer_list_rm(src_node, msg->rsmipc_key,
5151 		    msg->rsmipc_segment_cookie);
5152 		break;
5153 	case RSMIPC_MSG_SQREADY:
5154 		rsm_proc_sqready(data, src_hwaddr, arg);
5155 		break;
5156 	case RSMIPC_MSG_SQREADY_ACK:
5157 		rsm_proc_sqready_ack(data, src_hwaddr, arg);
5158 		break;
5159 	case RSMIPC_MSG_CREDIT:
5160 		rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5161 		break;
5162 	case RSMIPC_MSG_REPLY:
5163 		rsm_intr_reply(msghdr);
5164 		break;
5165 	case RSMIPC_MSG_BELL:
5166 		rsm_intr_event(msg);
5167 		break;
5168 	case RSMIPC_MSG_IMPORTING:
5169 		importer_list_add(src_node, msg->rsmipc_key,
5170 		    msg->rsmipc_adapter_hwaddr,
5171 		    msg->rsmipc_segment_cookie);
5172 		break;
5173 	case RSMIPC_MSG_REPUBLISH:
5174 		importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5175 		break;
5176 	default:
5177 		DBG_PRINTF((category, RSM_DEBUG,
5178 		    "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5179 		    (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5180 	}
5181 
5182 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5183 
5184 }
5185 
5186 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5187     rsm_intr_q_op_t opcode, rsm_addr_t src,
5188     void *data, size_t size, rsm_intr_hand_arg_t arg)
5189 {
5190 	DBG_DEFINE(category,
5191 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5192 
5193 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5194 
5195 	switch (opcode) {
5196 	case RSM_INTR_Q_OP_CREATE:
5197 		DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5198 		rsm_sqcreateop_callback(src, arg);
5199 		break;
5200 	case RSM_INTR_Q_OP_DESTROY:
5201 		DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5202 		break;
5203 	case RSM_INTR_Q_OP_RECEIVE:
5204 		rsm_intr_callback(data, src, arg);
5205 		break;
5206 	default:
5207 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5208 		    "rsm_srv_func: unknown opcode = %x\n", opcode));
5209 	}
5210 
5211 	chd = chd;
5212 	size = size;
5213 
5214 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5215 
5216 	return (RSM_INTR_HAND_CLAIMED);
5217 }
5218 
5219 /* *************************** IPC slots ************************* */
5220 static rsmipc_slot_t *
5221 rsmipc_alloc()
5222 {
5223 	int i;
5224 	rsmipc_slot_t *slot;
5225 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5226 
5227 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5228 
5229 	/* try to find a free slot, if not wait */
5230 	mutex_enter(&rsm_ipc.lock);
5231 
5232 	while (rsm_ipc.count == 0) {
5233 		rsm_ipc.wanted = 1;
5234 		cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5235 	}
5236 
5237 	/* An empty slot is available, find it */
5238 	slot = &rsm_ipc.slots[0];
5239 	for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5240 		if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5241 			RSMIPC_CLEAR(slot, RSMIPC_FREE);
5242 			break;
5243 		}
5244 	}
5245 
5246 	ASSERT(i < RSMIPC_SZ);
5247 	rsm_ipc.count--;	/* one less is available */
5248 	rsm_ipc.sequence++; /* new sequence */
5249 
5250 	slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5251 	slot->rsmipc_cookie.ic.index = (uint_t)i;
5252 
5253 	mutex_exit(&rsm_ipc.lock);
5254 
5255 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5256 
5257 	return (slot);
5258 }
5259 
5260 static void
5261 rsmipc_free(rsmipc_slot_t *slot)
5262 {
5263 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5264 
5265 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5266 
5267 	ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5268 	ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5269 
5270 	mutex_enter(&rsm_ipc.lock);
5271 
5272 	RSMIPC_SET(slot, RSMIPC_FREE);
5273 
5274 	slot->rsmipc_cookie.ic.sequence = 0;
5275 
5276 	mutex_exit(&slot->rsmipc_lock);
5277 	rsm_ipc.count++;
5278 	ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5279 	if (rsm_ipc.wanted) {
5280 		rsm_ipc.wanted = 0;
5281 		cv_broadcast(&rsm_ipc.cv);
5282 	}
5283 
5284 	mutex_exit(&rsm_ipc.lock);
5285 
5286 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5287 }
5288 
5289 static int
5290 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5291 {
5292 	int		e = 0;
5293 	int		credit_check = 0;
5294 	int		retry_cnt = 0;
5295 	int		min_retry_cnt = 10;
5296 	rsm_send_t	is;
5297 	rsmipc_slot_t	*rslot;
5298 	adapter_t	*adapter;
5299 	path_t		*path;
5300 	sendq_token_t	*sendq_token;
5301 	sendq_token_t	*used_sendq_token = NULL;
5302 	rsm_send_q_handle_t	ipc_handle;
5303 	DBG_DEFINE(category,
5304 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5305 
5306 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5307 	    dest));
5308 
5309 	/*
5310 	 * Check if this is a local case
5311 	 */
5312 	if (dest == my_nodeid) {
5313 		switch (req->rsmipc_hdr.rsmipc_type) {
5314 		case RSMIPC_MSG_SEGCONNECT:
5315 			reply->rsmipc_status = (short)rsmsegacl_validate(
5316 			    req, dest, reply);
5317 			break;
5318 		case RSMIPC_MSG_BELL:
5319 			req->rsmipc_hdr.rsmipc_src = dest;
5320 			rsm_intr_event(req);
5321 			break;
5322 		case RSMIPC_MSG_IMPORTING:
5323 			importer_list_add(dest, req->rsmipc_key,
5324 			    req->rsmipc_adapter_hwaddr,
5325 			    req->rsmipc_segment_cookie);
5326 			break;
5327 		case RSMIPC_MSG_NOTIMPORTING:
5328 			importer_list_rm(dest, req->rsmipc_key,
5329 			    req->rsmipc_segment_cookie);
5330 			break;
5331 		case RSMIPC_MSG_REPUBLISH:
5332 			importer_update(dest, req->rsmipc_key,
5333 			    req->rsmipc_perm);
5334 			break;
5335 		case RSMIPC_MSG_SUSPEND:
5336 			importer_suspend(dest);
5337 			break;
5338 		case RSMIPC_MSG_SUSPEND_DONE:
5339 			rsm_suspend_complete(dest, 0);
5340 			break;
5341 		case RSMIPC_MSG_RESUME:
5342 			importer_resume(dest);
5343 			break;
5344 		default:
5345 			ASSERT(0);
5346 		}
5347 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5348 		    "rsmipc_send done\n"));
5349 		return (0);
5350 	}
5351 
5352 	if (dest >= MAX_NODES) {
5353 		DBG_PRINTF((category, RSM_ERR,
5354 		    "rsm: rsmipc_send bad node number %x\n", dest));
5355 		return (RSMERR_REMOTE_NODE_UNREACHABLE);
5356 	}
5357 
5358 	/*
5359 	 * Oh boy! we are going remote.
5360 	 */
5361 
5362 	/*
5363 	 * identify if we need to have credits to send this message
5364 	 * - only selected requests are flow controlled
5365 	 */
5366 	if (req != NULL) {
5367 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5368 		    "rsmipc_send:request type=%d\n",
5369 		    req->rsmipc_hdr.rsmipc_type));
5370 
5371 		switch (req->rsmipc_hdr.rsmipc_type) {
5372 		case RSMIPC_MSG_SEGCONNECT:
5373 		case RSMIPC_MSG_DISCONNECT:
5374 		case RSMIPC_MSG_IMPORTING:
5375 		case RSMIPC_MSG_SUSPEND:
5376 		case RSMIPC_MSG_SUSPEND_DONE:
5377 		case RSMIPC_MSG_RESUME:
5378 			credit_check = 1;
5379 			break;
5380 		default:
5381 			credit_check = 0;
5382 		}
5383 	}
5384 
5385 again:
5386 	if (retry_cnt++ == min_retry_cnt) {
5387 		/* backoff before further retries for 10ms */
5388 		delay(drv_usectohz(10000));
5389 		retry_cnt = 0; /* reset retry_cnt */
5390 	}
5391 	sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5392 	if (sendq_token == NULL) {
5393 		DBG_PRINTF((category, RSM_ERR,
5394 		    "rsm: rsmipc_send no device to reach node %d\n", dest));
5395 		return (RSMERR_REMOTE_NODE_UNREACHABLE);
5396 	}
5397 
5398 	if ((sendq_token == used_sendq_token) &&
5399 	    ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5400 	    (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5401 		rele_sendq_token(sendq_token);
5402 		DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5403 		return (RSMERR_CONN_ABORTED);
5404 	} else
5405 		used_sendq_token = sendq_token;
5406 
5407 /* lint -save -e413 */
5408 	path = SQ_TOKEN_TO_PATH(sendq_token);
5409 	adapter = path->local_adapter;
5410 /* lint -restore */
5411 	ipc_handle = sendq_token->rsmpi_sendq_handle;
5412 
5413 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5414 	    "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5415 
5416 	if (reply == NULL) {
5417 		/* Send request without ack */
5418 		/*
5419 		 * Set the rsmipc_version number in the msghdr for KA
5420 		 * communication versioning
5421 		 */
5422 		req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5423 		req->rsmipc_hdr.rsmipc_src = my_nodeid;
5424 		/*
5425 		 * remote endpoints incn should match the value in our
5426 		 * path's remote_incn field. No need to grab any lock
5427 		 * since we have refcnted the path in rsmka_get_sendq_token
5428 		 */
5429 		req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5430 
5431 		is.is_data = (void *)req;
5432 		is.is_size = sizeof (*req);
5433 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5434 		is.is_wait = 0;
5435 
5436 		if (credit_check) {
5437 			mutex_enter(&path->mutex);
5438 			/*
5439 			 * wait till we recv credits or path goes down. If path
5440 			 * goes down rsm_send will fail and we handle the error
5441 			 * then
5442 			 */
5443 			while ((sendq_token->msgbuf_avail == 0) &&
5444 			    (path->state == RSMKA_PATH_ACTIVE)) {
5445 				e = cv_wait_sig(&sendq_token->sendq_cv,
5446 				    &path->mutex);
5447 				if (e == 0) {
5448 					mutex_exit(&path->mutex);
5449 					no_reply_cnt++;
5450 					rele_sendq_token(sendq_token);
5451 					DBG_PRINTF((category, RSM_DEBUG,
5452 					    "rsmipc_send done: "
5453 					    "cv_wait INTERRUPTED"));
5454 					return (RSMERR_INTERRUPTED);
5455 				}
5456 			}
5457 
5458 			/*
5459 			 * path is not active retry on another path.
5460 			 */
5461 			if (path->state != RSMKA_PATH_ACTIVE) {
5462 				mutex_exit(&path->mutex);
5463 				rele_sendq_token(sendq_token);
5464 				e = RSMERR_CONN_ABORTED;
5465 				DBG_PRINTF((category, RSM_ERR,
5466 				    "rsm: rsmipc_send: path !ACTIVE"));
5467 				goto again;
5468 			}
5469 
5470 			ASSERT(sendq_token->msgbuf_avail > 0);
5471 
5472 			/*
5473 			 * reserve a msgbuf
5474 			 */
5475 			sendq_token->msgbuf_avail--;
5476 
5477 			mutex_exit(&path->mutex);
5478 
5479 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5480 			    NULL);
5481 
5482 			if (e != RSM_SUCCESS) {
5483 				mutex_enter(&path->mutex);
5484 				/*
5485 				 * release the reserved msgbuf since
5486 				 * the send failed
5487 				 */
5488 				sendq_token->msgbuf_avail++;
5489 				cv_broadcast(&sendq_token->sendq_cv);
5490 				mutex_exit(&path->mutex);
5491 			}
5492 		} else
5493 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5494 			    NULL);
5495 
5496 		no_reply_cnt++;
5497 		rele_sendq_token(sendq_token);
5498 		if (e != RSM_SUCCESS) {
5499 			DBG_PRINTF((category, RSM_ERR,
5500 			    "rsm: rsmipc_send no reply send"
5501 			    " err = %d no reply count = %d\n",
5502 			    e, no_reply_cnt));
5503 			ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5504 			    e != RSMERR_BAD_BARRIER_HNDL);
5505 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5506 			goto again;
5507 		} else {
5508 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5509 			    "rsmipc_send done\n"));
5510 			return (e);
5511 		}
5512 
5513 	}
5514 
5515 	if (req == NULL) {
5516 		/* Send reply - No flow control is done for reply */
5517 		/*
5518 		 * Set the version in the msg header for KA communication
5519 		 * versioning
5520 		 */
5521 		reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5522 		reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5523 		/* incn number is not used for reply msgs currently */
5524 		reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5525 
5526 		is.is_data = (void *)reply;
5527 		is.is_size = sizeof (*reply);
5528 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5529 		is.is_wait = 0;
5530 		e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5531 		rele_sendq_token(sendq_token);
5532 		if (e != RSM_SUCCESS) {
5533 			DBG_PRINTF((category, RSM_ERR,
5534 			    "rsm: rsmipc_send reply send"
5535 			    " err = %d\n", e));
5536 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5537 			goto again;
5538 		} else {
5539 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5540 			    "rsmipc_send done\n"));
5541 			return (e);
5542 		}
5543 	}
5544 
5545 	/* Reply needed */
5546 	rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5547 
5548 	mutex_enter(&rslot->rsmipc_lock);
5549 
5550 	rslot->rsmipc_data = (void *)reply;
5551 	RSMIPC_SET(rslot, RSMIPC_PENDING);
5552 
5553 	while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5554 		/*
5555 		 * Set the rsmipc_version number in the msghdr for KA
5556 		 * communication versioning
5557 		 */
5558 		req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5559 		req->rsmipc_hdr.rsmipc_src = my_nodeid;
5560 		req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5561 		/*
5562 		 * remote endpoints incn should match the value in our
5563 		 * path's remote_incn field. No need to grab any lock
5564 		 * since we have refcnted the path in rsmka_get_sendq_token
5565 		 */
5566 		req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5567 
5568 		is.is_data = (void *)req;
5569 		is.is_size = sizeof (*req);
5570 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5571 		is.is_wait = 0;
5572 		if (credit_check) {
5573 
5574 			mutex_enter(&path->mutex);
5575 			/*
5576 			 * wait till we recv credits or path goes down. If path
5577 			 * goes down rsm_send will fail and we handle the error
5578 			 * then.
5579 			 */
5580 			while ((sendq_token->msgbuf_avail == 0) &&
5581 			    (path->state == RSMKA_PATH_ACTIVE)) {
5582 				e = cv_wait_sig(&sendq_token->sendq_cv,
5583 				    &path->mutex);
5584 				if (e == 0) {
5585 					mutex_exit(&path->mutex);
5586 					RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5587 					rsmipc_free(rslot);
5588 					rele_sendq_token(sendq_token);
5589 					DBG_PRINTF((category, RSM_DEBUG,
5590 					    "rsmipc_send done: "
5591 					    "cv_wait INTERRUPTED"));
5592 					return (RSMERR_INTERRUPTED);
5593 				}
5594 			}
5595 
5596 			/*
5597 			 * path is not active retry on another path.
5598 			 */
5599 			if (path->state != RSMKA_PATH_ACTIVE) {
5600 				mutex_exit(&path->mutex);
5601 				RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5602 				rsmipc_free(rslot);
5603 				rele_sendq_token(sendq_token);
5604 				e = RSMERR_CONN_ABORTED;
5605 				DBG_PRINTF((category, RSM_ERR,
5606 				    "rsm: rsmipc_send: path !ACTIVE"));
5607 				goto again;
5608 			}
5609 
5610 			ASSERT(sendq_token->msgbuf_avail > 0);
5611 
5612 			/*
5613 			 * reserve a msgbuf
5614 			 */
5615 			sendq_token->msgbuf_avail--;
5616 
5617 			mutex_exit(&path->mutex);
5618 
5619 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5620 			    NULL);
5621 
5622 			if (e != RSM_SUCCESS) {
5623 				mutex_enter(&path->mutex);
5624 				/*
5625 				 * release the reserved msgbuf since
5626 				 * the send failed
5627 				 */
5628 				sendq_token->msgbuf_avail++;
5629 				cv_broadcast(&sendq_token->sendq_cv);
5630 				mutex_exit(&path->mutex);
5631 			}
5632 		} else
5633 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5634 			    NULL);
5635 
5636 		if (e != RSM_SUCCESS) {
5637 			DBG_PRINTF((category, RSM_ERR,
5638 			    "rsm: rsmipc_send rsmpi send err = %d\n", e));
5639 			RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5640 			rsmipc_free(rslot);
5641 			rele_sendq_token(sendq_token);
5642 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5643 			goto again;
5644 		}
5645 
5646 		/* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5647 		e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5648 		    drv_usectohz(5000000), TR_CLOCK_TICK);
5649 		if (e < 0) {
5650 			/* timed out - retry */
5651 			e = RSMERR_TIMEOUT;
5652 		} else if (e == 0) {
5653 			/* signalled - return error */
5654 			e = RSMERR_INTERRUPTED;
5655 			break;
5656 		} else {
5657 			e = RSM_SUCCESS;
5658 		}
5659 	}
5660 
5661 	RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5662 	rsmipc_free(rslot);
5663 	rele_sendq_token(sendq_token);
5664 
5665 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5666 	return (e);
5667 }
5668 
5669 static int
5670 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,  void *cookie)
5671 {
5672 	rsmipc_request_t request;
5673 
5674 	/*
5675 	 *  inform the exporter to delete this importer
5676 	 */
5677 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5678 	request.rsmipc_key = segid;
5679 	request.rsmipc_segment_cookie = cookie;
5680 	return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5681 }
5682 
5683 static void
5684 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t	*acl,
5685     int acl_len, rsm_permission_t default_permission)
5686 {
5687 	int			i;
5688 	importing_token_t	*token;
5689 	rsmipc_request_t	request;
5690 	republish_token_t	*republish_list = NULL;
5691 	republish_token_t	*rp;
5692 	rsm_permission_t	permission;
5693 	int			index;
5694 
5695 	/*
5696 	 * send the new access mode to all the nodes that have imported
5697 	 * this segment.
5698 	 * If the new acl does not have a node that was present in
5699 	 * the old acl a access permission of 0 is sent.
5700 	 */
5701 
5702 	index = rsmhash(segid);
5703 
5704 	/*
5705 	 * create a list of node/permissions to send the republish message
5706 	 */
5707 	mutex_enter(&importer_list.lock);
5708 
5709 	token = importer_list.bucket[index];
5710 	while (token != NULL) {
5711 		if (segid == token->key) {
5712 			permission = default_permission;
5713 
5714 			for (i = 0; i < acl_len; i++) {
5715 				if (token->importing_node == acl[i].ae_node) {
5716 					permission = acl[i].ae_permission;
5717 					break;
5718 				}
5719 			}
5720 			rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5721 
5722 			rp->key = segid;
5723 			rp->importing_node = token->importing_node;
5724 			rp->permission = permission;
5725 			rp->next = republish_list;
5726 			republish_list = rp;
5727 		}
5728 		token = token->next;
5729 	}
5730 
5731 	mutex_exit(&importer_list.lock);
5732 
5733 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5734 	request.rsmipc_key = segid;
5735 
5736 	while (republish_list != NULL) {
5737 		request.rsmipc_perm = republish_list->permission;
5738 		(void) rsmipc_send(republish_list->importing_node,
5739 		    &request, RSM_NO_REPLY);
5740 		rp = republish_list;
5741 		republish_list = republish_list->next;
5742 		kmem_free(rp, sizeof (republish_token_t));
5743 	}
5744 }
5745 
5746 static void
5747 rsm_send_suspend()
5748 {
5749 	int			i, e;
5750 	rsmipc_request_t 	request;
5751 	list_element_t		*tokp;
5752 	list_element_t		*head = NULL;
5753 	importing_token_t	*token;
5754 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5755 	    "rsm_send_suspend enter\n"));
5756 
5757 	/*
5758 	 * create a list of node to send the suspend message
5759 	 *
5760 	 * Currently the whole importer list is scanned and we obtain
5761 	 * all the nodes - this basically gets all nodes that at least
5762 	 * import one segment from the local node.
5763 	 *
5764 	 * no need to grab the rsm_suspend_list lock here since we are
5765 	 * single threaded when suspend is called.
5766 	 */
5767 
5768 	mutex_enter(&importer_list.lock);
5769 	for (i = 0; i < rsm_hash_size; i++) {
5770 
5771 		token = importer_list.bucket[i];
5772 
5773 		while (token != NULL) {
5774 
5775 			tokp = head;
5776 
5777 			/*
5778 			 * make sure that the token's node
5779 			 * is not already on the suspend list
5780 			 */
5781 			while (tokp != NULL) {
5782 				if (tokp->nodeid == token->importing_node) {
5783 					break;
5784 				}
5785 				tokp = tokp->next;
5786 			}
5787 
5788 			if (tokp == NULL) { /* not in suspend list */
5789 				tokp = kmem_zalloc(sizeof (list_element_t),
5790 				    KM_SLEEP);
5791 				tokp->nodeid = token->importing_node;
5792 				tokp->next = head;
5793 				head = tokp;
5794 			}
5795 
5796 			token = token->next;
5797 		}
5798 	}
5799 	mutex_exit(&importer_list.lock);
5800 
5801 	if (head == NULL) { /* no importers so go ahead and quiesce segments */
5802 		exporter_quiesce();
5803 		return;
5804 	}
5805 
5806 	mutex_enter(&rsm_suspend_list.list_lock);
5807 	ASSERT(rsm_suspend_list.list_head == NULL);
5808 	/*
5809 	 * update the suspend list righaway so that if a node dies the
5810 	 * pathmanager can set the NODE dead flag
5811 	 */
5812 	rsm_suspend_list.list_head = head;
5813 	mutex_exit(&rsm_suspend_list.list_lock);
5814 
5815 	tokp = head;
5816 
5817 	while (tokp != NULL) {
5818 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5819 		e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5820 		/*
5821 		 * Error in rsmipc_send currently happens due to inaccessibility
5822 		 * of the remote node.
5823 		 */
5824 		if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5825 			tokp->flags |= RSM_SUSPEND_ACKPENDING;
5826 		}
5827 
5828 		tokp = tokp->next;
5829 	}
5830 
5831 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5832 	    "rsm_send_suspend done\n"));
5833 
5834 }
5835 
5836 static void
5837 rsm_send_resume()
5838 {
5839 	rsmipc_request_t 	request;
5840 	list_element_t		*elem, *head;
5841 
5842 	/*
5843 	 * save the suspend list so that we know where to send
5844 	 * the resume messages and make the suspend list head
5845 	 * NULL.
5846 	 */
5847 	mutex_enter(&rsm_suspend_list.list_lock);
5848 	head = rsm_suspend_list.list_head;
5849 	rsm_suspend_list.list_head = NULL;
5850 	mutex_exit(&rsm_suspend_list.list_lock);
5851 
5852 	while (head != NULL) {
5853 		elem = head;
5854 		head = head->next;
5855 
5856 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5857 
5858 		(void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5859 
5860 		kmem_free((void *)elem, sizeof (list_element_t));
5861 
5862 	}
5863 
5864 }
5865 
5866 /*
5867  * This function takes path and sends a message using the sendq
5868  * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5869  * and RSMIPC_MSG_CREDIT are sent using this function.
5870  */
5871 int
5872 rsmipc_send_controlmsg(path_t *path, int msgtype)
5873 {
5874 	int			e;
5875 	int			retry_cnt = 0;
5876 	int			min_retry_cnt = 10;
5877 	adapter_t		*adapter;
5878 	rsm_send_t		is;
5879 	rsm_send_q_handle_t	ipc_handle;
5880 	rsmipc_controlmsg_t	msg;
5881 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5882 
5883 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5884 	    "rsmipc_send_controlmsg enter\n"));
5885 
5886 	ASSERT(MUTEX_HELD(&path->mutex));
5887 
5888 	adapter = path->local_adapter;
5889 
5890 	DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5891 	    "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5892 	    my_nodeid, adapter->hwaddr, path->remote_node,
5893 	    path->remote_hwaddr, path->procmsg_cnt));
5894 
5895 	if (path->state != RSMKA_PATH_ACTIVE) {
5896 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5897 		    "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5898 		return (1);
5899 	}
5900 
5901 	ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5902 
5903 	msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5904 	msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5905 	msg.rsmipc_hdr.rsmipc_type = msgtype;
5906 	msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5907 
5908 	if (msgtype == RSMIPC_MSG_CREDIT)
5909 		msg.rsmipc_credits = path->procmsg_cnt;
5910 
5911 	msg.rsmipc_local_incn = path->local_incn;
5912 
5913 	msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5914 	/* incr the sendq, path refcnt */
5915 	PATH_HOLD_NOLOCK(path);
5916 	SENDQ_TOKEN_HOLD(path);
5917 
5918 	do {
5919 		/* drop the path lock before doing the rsm_send */
5920 		mutex_exit(&path->mutex);
5921 
5922 		is.is_data = (void *)&msg;
5923 		is.is_size = sizeof (msg);
5924 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5925 		is.is_wait = 0;
5926 
5927 		e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5928 
5929 		ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5930 		    e != RSMERR_BAD_BARRIER_HNDL);
5931 
5932 		mutex_enter(&path->mutex);
5933 
5934 		if (e == RSM_SUCCESS) {
5935 			break;
5936 		}
5937 		/* error counter for statistics */
5938 		atomic_add_64(&rsm_ctrlmsg_errcnt, 1);
5939 
5940 		DBG_PRINTF((category, RSM_ERR,
5941 		    "rsmipc_send_controlmsg:rsm_send error=%d", e));
5942 
5943 		if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5944 			(void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5945 			    &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5946 			retry_cnt = 0;
5947 		}
5948 	} while (path->state == RSMKA_PATH_ACTIVE);
5949 
5950 	/* decrement the sendq,path refcnt that we incr before rsm_send */
5951 	SENDQ_TOKEN_RELE(path);
5952 	PATH_RELE_NOLOCK(path);
5953 
5954 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5955 	    "rsmipc_send_controlmsg done=%d", e));
5956 	return (e);
5957 }
5958 
5959 /*
5960  * Called from rsm_force_unload and path_importer_disconnect. The memory
5961  * mapping for the imported segment is removed and the segment is
5962  * disconnected at the interconnect layer if disconnect_flag is TRUE.
5963  * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5964  * and FALSE from rsm_rebind.
5965  *
5966  * When subsequent accesses cause page faulting, the dummy page is mapped
5967  * to resolve the fault, and the mapping generation number is incremented
5968  * so that the application can be notified on a close barrier operation.
5969  *
5970  * It is important to note that the caller of rsmseg_unload is responsible for
5971  * acquiring the segment lock before making a call to rsmseg_unload. This is
5972  * required to make the caller and rsmseg_unload thread safe. The segment lock
5973  * will be released by the rsmseg_unload function.
5974  */
5975 void
5976 rsmseg_unload(rsmseg_t *im_seg)
5977 {
5978 	rsmcookie_t		*hdl;
5979 	void			*shared_cookie;
5980 	rsmipc_request_t	request;
5981 	uint_t			maxprot;
5982 
5983 	DBG_DEFINE(category,
5984 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5985 
5986 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5987 
5988 	ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5989 
5990 	/* wait until segment leaves the mapping state */
5991 	while (im_seg->s_state == RSM_STATE_MAPPING)
5992 		cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5993 	/*
5994 	 * An unload is only necessary if the segment is connected. However,
5995 	 * if the segment was on the import list in state RSM_STATE_CONNECTING
5996 	 * then a connection was in progress. Change to RSM_STATE_NEW
5997 	 * here to cause an early exit from the connection process.
5998 	 */
5999 	if (im_seg->s_state == RSM_STATE_NEW) {
6000 		rsmseglock_release(im_seg);
6001 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6002 		    "rsmseg_unload done: RSM_STATE_NEW\n"));
6003 		return;
6004 	} else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6005 		im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6006 		rsmsharelock_acquire(im_seg);
6007 		im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6008 		rsmsharelock_release(im_seg);
6009 		rsmseglock_release(im_seg);
6010 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6011 		    "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6012 		return;
6013 	}
6014 
6015 	if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6016 		if (im_seg->s_ckl != NULL) {
6017 			int e;
6018 			/* Setup protections for remap */
6019 			maxprot = PROT_USER;
6020 			if (im_seg->s_mode & RSM_PERM_READ) {
6021 				maxprot |= PROT_READ;
6022 			}
6023 			if (im_seg->s_mode & RSM_PERM_WRITE) {
6024 				maxprot |= PROT_WRITE;
6025 			}
6026 			hdl = im_seg->s_ckl;
6027 			for (; hdl != NULL; hdl = hdl->c_next) {
6028 				e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6029 				    remap_cookie,
6030 				    hdl->c_off, hdl->c_len,
6031 				    maxprot, 0, NULL);
6032 
6033 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6034 				    "remap returns %d\n", e));
6035 			}
6036 		}
6037 
6038 		(void) rsm_closeconnection(im_seg, &shared_cookie);
6039 
6040 		if (shared_cookie != NULL) {
6041 			/*
6042 			 * inform the exporting node so this import
6043 			 * can be deleted from the list of importers.
6044 			 */
6045 			request.rsmipc_hdr.rsmipc_type =
6046 			    RSMIPC_MSG_NOTIMPORTING;
6047 			request.rsmipc_key = im_seg->s_segid;
6048 			request.rsmipc_segment_cookie = shared_cookie;
6049 			rsmseglock_release(im_seg);
6050 			(void) rsmipc_send(im_seg->s_node, &request,
6051 			    RSM_NO_REPLY);
6052 		} else {
6053 			rsmseglock_release(im_seg);
6054 		}
6055 	}
6056 	else
6057 		rsmseglock_release(im_seg);
6058 
6059 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6060 
6061 }
6062 
6063 /* ****************************** Importer Calls ************************ */
6064 
6065 static int
6066 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6067 {
6068 	int shifts = 0;
6069 
6070 	if (crgetuid(cr) != owner) {
6071 		shifts += 3;
6072 		if (!groupmember(group, cr))
6073 			shifts += 3;
6074 	}
6075 
6076 	mode &= ~(perm << shifts);
6077 
6078 	if (mode == 0)
6079 		return (0);
6080 
6081 	return (secpolicy_rsm_access(cr, owner, mode));
6082 }
6083 
6084 
6085 static int
6086 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6087     intptr_t dataptr, int mode)
6088 {
6089 	int e;
6090 	int			recheck_state = 0;
6091 	void			*shared_cookie;
6092 	rsmipc_request_t	request;
6093 	rsmipc_reply_t		reply;
6094 	rsm_permission_t	access;
6095 	adapter_t		*adapter;
6096 	rsm_addr_t		addr = 0;
6097 	rsm_import_share_t	*sharedp;
6098 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6099 
6100 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6101 
6102 	adapter = rsm_getadapter(msg, mode);
6103 	if (adapter == NULL) {
6104 		DBG_PRINTF((category, RSM_ERR,
6105 		    "rsm_connect done:ENODEV adapter=NULL\n"));
6106 		return (RSMERR_CTLR_NOT_PRESENT);
6107 	}
6108 
6109 	if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6110 		rsmka_release_adapter(adapter);
6111 		DBG_PRINTF((category, RSM_ERR,
6112 		    "rsm_connect done:ENODEV loopback\n"));
6113 		return (RSMERR_CTLR_NOT_PRESENT);
6114 	}
6115 
6116 
6117 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6118 	ASSERT(seg->s_state == RSM_STATE_NEW);
6119 
6120 	/*
6121 	 * Translate perm to access
6122 	 */
6123 	if (msg->perm & ~RSM_PERM_RDWR) {
6124 		rsmka_release_adapter(adapter);
6125 		DBG_PRINTF((category, RSM_ERR,
6126 		    "rsm_connect done:EINVAL invalid perms\n"));
6127 		return (RSMERR_BAD_PERMS);
6128 	}
6129 	access = 0;
6130 	if (msg->perm & RSM_PERM_READ)
6131 		access |= RSM_ACCESS_READ;
6132 	if (msg->perm & RSM_PERM_WRITE)
6133 		access |= RSM_ACCESS_WRITE;
6134 
6135 	seg->s_node = msg->nodeid;
6136 
6137 	/*
6138 	 * Adding to the import list locks the segment; release the segment
6139 	 * lock so we can get the reply for the send.
6140 	 */
6141 	e = rsmimport_add(seg, msg->key);
6142 	if (e) {
6143 		rsmka_release_adapter(adapter);
6144 		DBG_PRINTF((category, RSM_ERR,
6145 		    "rsm_connect done:rsmimport_add failed %d\n", e));
6146 		return (e);
6147 	}
6148 	seg->s_state = RSM_STATE_CONNECTING;
6149 
6150 	/*
6151 	 * Set the s_adapter field here so as to have a valid comparison of
6152 	 * the adapter and the s_adapter value during rsmshare_get. For
6153 	 * any error, set s_adapter to NULL before doing a release_adapter
6154 	 */
6155 	seg->s_adapter = adapter;
6156 
6157 	rsmseglock_release(seg);
6158 
6159 	/*
6160 	 * get the pointer to the shared data structure; the
6161 	 * shared data is locked and refcount has been incremented
6162 	 */
6163 	sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6164 
6165 	ASSERT(rsmsharelock_held(seg));
6166 
6167 	do {
6168 		/* flag indicates whether we need to recheck the state */
6169 		recheck_state = 0;
6170 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6171 		    "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6172 		switch (sharedp->rsmsi_state) {
6173 		case RSMSI_STATE_NEW:
6174 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6175 			break;
6176 		case RSMSI_STATE_CONNECTING:
6177 			/* FALLTHRU */
6178 		case RSMSI_STATE_CONN_QUIESCE:
6179 			/* FALLTHRU */
6180 		case RSMSI_STATE_MAP_QUIESCE:
6181 			/* wait for the state to change */
6182 			while ((sharedp->rsmsi_state ==
6183 			    RSMSI_STATE_CONNECTING) ||
6184 			    (sharedp->rsmsi_state ==
6185 			    RSMSI_STATE_CONN_QUIESCE) ||
6186 			    (sharedp->rsmsi_state ==
6187 			    RSMSI_STATE_MAP_QUIESCE)) {
6188 				if (cv_wait_sig(&sharedp->rsmsi_cv,
6189 				    &sharedp->rsmsi_lock) == 0) {
6190 					/* signalled - clean up and return */
6191 					rsmsharelock_release(seg);
6192 					rsmimport_rm(seg);
6193 					seg->s_adapter = NULL;
6194 					rsmka_release_adapter(adapter);
6195 					seg->s_state = RSM_STATE_NEW;
6196 					DBG_PRINTF((category, RSM_ERR,
6197 					    "rsm_connect done: INTERRUPTED\n"));
6198 					return (RSMERR_INTERRUPTED);
6199 				}
6200 			}
6201 			/*
6202 			 * the state changed, loop back and check what it is
6203 			 */
6204 			recheck_state = 1;
6205 			break;
6206 		case RSMSI_STATE_ABORT_CONNECT:
6207 			/* exit the loop and clean up further down */
6208 			break;
6209 		case RSMSI_STATE_CONNECTED:
6210 			/* already connected, good - fall through */
6211 		case RSMSI_STATE_MAPPED:
6212 			/* already mapped, wow - fall through */
6213 			/* access validation etc is done further down */
6214 			break;
6215 		case RSMSI_STATE_DISCONNECTED:
6216 			/* disconnected - so reconnect now */
6217 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6218 			break;
6219 		default:
6220 			ASSERT(0); /* Invalid State */
6221 		}
6222 	} while (recheck_state);
6223 
6224 	if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6225 		/* we are the first to connect */
6226 		rsmsharelock_release(seg);
6227 
6228 		if (msg->nodeid != my_nodeid) {
6229 			addr = get_remote_hwaddr(adapter, msg->nodeid);
6230 
6231 			if ((int64_t)addr < 0) {
6232 				rsmsharelock_acquire(seg);
6233 				rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6234 				    RSMSI_STATE_NEW);
6235 				rsmsharelock_release(seg);
6236 				rsmimport_rm(seg);
6237 				seg->s_adapter = NULL;
6238 				rsmka_release_adapter(adapter);
6239 				seg->s_state = RSM_STATE_NEW;
6240 				DBG_PRINTF((category, RSM_ERR,
6241 				    "rsm_connect done: hwaddr<0\n"));
6242 				return (RSMERR_INTERNAL_ERROR);
6243 			}
6244 		} else {
6245 			addr = adapter->hwaddr;
6246 		}
6247 
6248 		/*
6249 		 * send request to node [src, dest, key, msgid] and get back
6250 		 * [status, msgid, cookie]
6251 		 */
6252 		request.rsmipc_key = msg->key;
6253 		/*
6254 		 * we need the s_mode of the exporter so pass
6255 		 * RSM_ACCESS_TRUSTED
6256 		 */
6257 		request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6258 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6259 		request.rsmipc_adapter_hwaddr = addr;
6260 		request.rsmipc_segment_cookie = sharedp;
6261 
6262 		e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6263 		if (e) {
6264 			rsmsharelock_acquire(seg);
6265 			rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6266 			    RSMSI_STATE_NEW);
6267 			rsmsharelock_release(seg);
6268 			rsmimport_rm(seg);
6269 			seg->s_adapter = NULL;
6270 			rsmka_release_adapter(adapter);
6271 			seg->s_state = RSM_STATE_NEW;
6272 			DBG_PRINTF((category, RSM_ERR,
6273 			    "rsm_connect done:rsmipc_send failed %d\n", e));
6274 			return (e);
6275 		}
6276 
6277 		if (reply.rsmipc_status != RSM_SUCCESS) {
6278 			rsmsharelock_acquire(seg);
6279 			rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6280 			    RSMSI_STATE_NEW);
6281 			rsmsharelock_release(seg);
6282 			rsmimport_rm(seg);
6283 			seg->s_adapter = NULL;
6284 			rsmka_release_adapter(adapter);
6285 			seg->s_state = RSM_STATE_NEW;
6286 			DBG_PRINTF((category, RSM_ERR,
6287 			    "rsm_connect done:rsmipc_send reply err %d\n",
6288 			    reply.rsmipc_status));
6289 			return (reply.rsmipc_status);
6290 		}
6291 
6292 		rsmsharelock_acquire(seg);
6293 		/* store the information recvd into the shared data struct */
6294 		sharedp->rsmsi_mode = reply.rsmipc_mode;
6295 		sharedp->rsmsi_uid = reply.rsmipc_uid;
6296 		sharedp->rsmsi_gid = reply.rsmipc_gid;
6297 		sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6298 		sharedp->rsmsi_cookie = sharedp;
6299 	}
6300 
6301 	rsmsharelock_release(seg);
6302 
6303 	/*
6304 	 * Get the segment lock and check for a force disconnect
6305 	 * from the export side which would have changed the state
6306 	 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6307 	 * force disconnect will be held off until the connection
6308 	 * has completed.
6309 	 */
6310 	rsmseglock_acquire(seg);
6311 	rsmsharelock_acquire(seg);
6312 	ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6313 	    seg->s_state == RSM_STATE_ABORT_CONNECT);
6314 
6315 	shared_cookie = sharedp->rsmsi_cookie;
6316 
6317 	if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6318 	    (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6319 		seg->s_state = RSM_STATE_NEW;
6320 		seg->s_adapter = NULL;
6321 		rsmsharelock_release(seg);
6322 		rsmseglock_release(seg);
6323 		rsmimport_rm(seg);
6324 		rsmka_release_adapter(adapter);
6325 
6326 		rsmsharelock_acquire(seg);
6327 		if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6328 			/*
6329 			 * set a flag indicating abort handling has been
6330 			 * done
6331 			 */
6332 			sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6333 			rsmsharelock_release(seg);
6334 			/* send a message to exporter - only once */
6335 			(void) rsm_send_notimporting(msg->nodeid,
6336 			    msg->key, shared_cookie);
6337 			rsmsharelock_acquire(seg);
6338 			/*
6339 			 * wake up any waiting importers and inform that
6340 			 * connection has been aborted
6341 			 */
6342 			cv_broadcast(&sharedp->rsmsi_cv);
6343 		}
6344 		rsmsharelock_release(seg);
6345 
6346 		DBG_PRINTF((category, RSM_ERR,
6347 		    "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6348 		return (RSMERR_INTERRUPTED);
6349 	}
6350 
6351 
6352 	/*
6353 	 * We need to verify that this process has access
6354 	 */
6355 	e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6356 	    access & sharedp->rsmsi_mode,
6357 	    (int)(msg->perm & RSM_PERM_RDWR), cred);
6358 	if (e) {
6359 		rsmsharelock_release(seg);
6360 		seg->s_state = RSM_STATE_NEW;
6361 		seg->s_adapter = NULL;
6362 		rsmseglock_release(seg);
6363 		rsmimport_rm(seg);
6364 		rsmka_release_adapter(adapter);
6365 		/*
6366 		 * No need to lock segment it has been removed
6367 		 * from the hash table
6368 		 */
6369 		rsmsharelock_acquire(seg);
6370 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6371 			rsmsharelock_release(seg);
6372 			/* this is the first importer */
6373 
6374 			(void) rsm_send_notimporting(msg->nodeid, msg->key,
6375 			    shared_cookie);
6376 			rsmsharelock_acquire(seg);
6377 			sharedp->rsmsi_state = RSMSI_STATE_NEW;
6378 			cv_broadcast(&sharedp->rsmsi_cv);
6379 		}
6380 		rsmsharelock_release(seg);
6381 
6382 		DBG_PRINTF((category, RSM_ERR,
6383 		    "rsm_connect done: ipcaccess failed\n"));
6384 		return (RSMERR_PERM_DENIED);
6385 	}
6386 
6387 	/* update state and cookie */
6388 	seg->s_segid = sharedp->rsmsi_segid;
6389 	seg->s_len = sharedp->rsmsi_seglen;
6390 	seg->s_mode = access & sharedp->rsmsi_mode;
6391 	seg->s_pid = ddi_get_pid();
6392 	seg->s_mapinfo = NULL;
6393 
6394 	if (seg->s_node != my_nodeid) {
6395 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6396 			e = adapter->rsmpi_ops->rsm_connect(
6397 			    adapter->rsmpi_handle,
6398 			    addr, seg->s_segid, &sharedp->rsmsi_handle);
6399 
6400 			if (e != RSM_SUCCESS) {
6401 				seg->s_state = RSM_STATE_NEW;
6402 				seg->s_adapter = NULL;
6403 				rsmsharelock_release(seg);
6404 				rsmseglock_release(seg);
6405 				rsmimport_rm(seg);
6406 				rsmka_release_adapter(adapter);
6407 				/*
6408 				 *  inform the exporter to delete this importer
6409 				 */
6410 				(void) rsm_send_notimporting(msg->nodeid,
6411 				    msg->key, shared_cookie);
6412 
6413 				/*
6414 				 * Now inform any waiting importers to
6415 				 * retry connect. This needs to be done
6416 				 * after sending notimporting so that
6417 				 * the notimporting is sent before a waiting
6418 				 * importer sends a segconnect while retrying
6419 				 *
6420 				 * No need to lock segment it has been removed
6421 				 * from the hash table
6422 				 */
6423 
6424 				rsmsharelock_acquire(seg);
6425 				sharedp->rsmsi_state = RSMSI_STATE_NEW;
6426 				cv_broadcast(&sharedp->rsmsi_cv);
6427 				rsmsharelock_release(seg);
6428 
6429 				DBG_PRINTF((category, RSM_ERR,
6430 				    "rsm_connect error %d\n", e));
6431 				if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6432 					return (
6433 					    RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6434 				else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6435 				    (e == RSMERR_UNKNOWN_RSM_ADDR))
6436 					return (RSMERR_REMOTE_NODE_UNREACHABLE);
6437 				else
6438 					return (e);
6439 			}
6440 
6441 		}
6442 		seg->s_handle.in = sharedp->rsmsi_handle;
6443 
6444 	}
6445 
6446 	seg->s_state = RSM_STATE_CONNECT;
6447 
6448 
6449 	seg->s_flags &= ~RSM_IMPORT_DUMMY;	/* clear dummy flag */
6450 	if (bar_va) {
6451 		/* increment generation number on barrier page */
6452 		atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6453 		/* return user off into barrier page where status will be */
6454 		msg->off = (int)seg->s_hdr.rsmrc_num;
6455 		msg->gnum = bar_va[msg->off]; 	/* gnum race */
6456 	} else {
6457 		msg->off = 0;
6458 		msg->gnum = 0;	/* gnum race */
6459 	}
6460 
6461 	msg->len = (int)sharedp->rsmsi_seglen;
6462 	msg->rnum = seg->s_minor;
6463 	rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6464 	rsmsharelock_release(seg);
6465 	rsmseglock_release(seg);
6466 
6467 	/* Return back to user the segment size & perm in case it's needed */
6468 
6469 #ifdef _MULTI_DATAMODEL
6470 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6471 		rsm_ioctlmsg32_t msg32;
6472 
6473 		if (msg->len > UINT_MAX)
6474 			msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6475 		else
6476 			msg32.len = msg->len;
6477 		msg32.off = msg->off;
6478 		msg32.perm = msg->perm;
6479 		msg32.gnum = msg->gnum;
6480 		msg32.rnum = msg->rnum;
6481 
6482 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6483 		    "rsm_connect done\n"));
6484 
6485 		if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6486 		    sizeof (msg32), mode))
6487 			return (RSMERR_BAD_ADDR);
6488 		else
6489 			return (RSM_SUCCESS);
6490 	}
6491 #endif
6492 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6493 
6494 	if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6495 	    mode))
6496 		return (RSMERR_BAD_ADDR);
6497 	else
6498 		return (RSM_SUCCESS);
6499 }
6500 
6501 static int
6502 rsm_unmap(rsmseg_t *seg)
6503 {
6504 	int			err;
6505 	adapter_t		*adapter;
6506 	rsm_import_share_t	*sharedp;
6507 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6508 
6509 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6510 	    "rsm_unmap enter %u\n", seg->s_segid));
6511 
6512 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6513 
6514 	/* assert seg is locked */
6515 	ASSERT(rsmseglock_held(seg));
6516 	ASSERT(seg->s_state != RSM_STATE_MAPPING);
6517 
6518 	if ((seg->s_state != RSM_STATE_ACTIVE) &&
6519 	    (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6520 		/* segment unmap has already been done */
6521 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6522 		return (RSM_SUCCESS);
6523 	}
6524 
6525 	sharedp = seg->s_share;
6526 
6527 	rsmsharelock_acquire(seg);
6528 
6529 	/*
6530 	 *	- shared data struct is in MAPPED or MAP_QUIESCE state
6531 	 */
6532 
6533 	ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6534 	    sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6535 
6536 	/*
6537 	 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6538 	 * the segment cookie list was NULL; but it is always NULL when
6539 	 * called from rsmmap_unmap and won't be NULL when called for
6540 	 * a force disconnect - so the check for NULL cookie list was removed
6541 	 */
6542 
6543 	ASSERT(sharedp->rsmsi_mapcnt > 0);
6544 
6545 	sharedp->rsmsi_mapcnt--;
6546 
6547 	if (sharedp->rsmsi_mapcnt == 0) {
6548 		if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6549 			/* unmap the shared RSMPI mapping */
6550 			adapter = seg->s_adapter;
6551 			if (seg->s_node != my_nodeid) {
6552 				ASSERT(sharedp->rsmsi_handle != NULL);
6553 				err = adapter->rsmpi_ops->
6554 				    rsm_unmap(sharedp->rsmsi_handle);
6555 				DBG_PRINTF((category, RSM_DEBUG,
6556 				    "rsm_unmap: rsmpi unmap %d\n", err));
6557 				rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6558 				sharedp->rsmsi_mapinfo = NULL;
6559 			}
6560 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6561 		} else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6562 			sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6563 		}
6564 	}
6565 
6566 	rsmsharelock_release(seg);
6567 
6568 	/*
6569 	 * The s_cookie field is used to store the cookie returned from the
6570 	 * ddi_umem_lock when binding the pages for an export segment. This
6571 	 * is the primary use of the s_cookie field and does not normally
6572 	 * pertain to any importing segment except in the loopback case.
6573 	 * For the loopback case, the import segment and export segment are
6574 	 * on the same node, the s_cookie field of the segment structure for
6575 	 * the importer is initialized to the s_cookie field in the exported
6576 	 * segment during the map operation and is used during the call to
6577 	 * devmap_umem_setup for the import mapping.
6578 	 * Thus, during unmap, we simply need to set s_cookie to NULL to
6579 	 * indicate that the mapping no longer exists.
6580 	 */
6581 	seg->s_cookie = NULL;
6582 
6583 	seg->s_mapinfo = NULL;
6584 
6585 	if (seg->s_state == RSM_STATE_ACTIVE)
6586 		seg->s_state = RSM_STATE_CONNECT;
6587 	else
6588 		seg->s_state = RSM_STATE_CONN_QUIESCE;
6589 
6590 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6591 
6592 	return (RSM_SUCCESS);
6593 }
6594 
6595 /*
6596  * cookie returned here if not null indicates that it is
6597  * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6598  * message.
6599  */
6600 static int
6601 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6602 {
6603 	int			e;
6604 	adapter_t		*adapter;
6605 	rsm_import_share_t	*sharedp;
6606 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6607 
6608 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6609 	    "rsm_closeconnection enter\n"));
6610 
6611 	*cookie = (void *)NULL;
6612 
6613 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6614 
6615 	/* assert seg is locked */
6616 	ASSERT(rsmseglock_held(seg));
6617 
6618 	if (seg->s_state == RSM_STATE_DISCONNECT) {
6619 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6620 		    "rsm_closeconnection done: already disconnected\n"));
6621 		return (RSM_SUCCESS);
6622 	}
6623 
6624 	/* wait for all putv/getv ops to get done */
6625 	while (seg->s_rdmacnt > 0) {
6626 		cv_wait(&seg->s_cv, &seg->s_lock);
6627 	}
6628 
6629 	(void) rsm_unmap(seg);
6630 
6631 	ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6632 	    seg->s_state == RSM_STATE_CONN_QUIESCE);
6633 
6634 	adapter = seg->s_adapter;
6635 	sharedp = seg->s_share;
6636 
6637 	ASSERT(sharedp != NULL);
6638 
6639 	rsmsharelock_acquire(seg);
6640 
6641 	/*
6642 	 * Disconnect on adapter
6643 	 *
6644 	 * The current algorithm is stateless, I don't have to contact
6645 	 * server when I go away. He only gives me permissions. Of course,
6646 	 * the adapters will talk to terminate the connect.
6647 	 *
6648 	 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6649 	 */
6650 	if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6651 	    (sharedp->rsmsi_node != my_nodeid)) {
6652 
6653 		if (sharedp->rsmsi_refcnt == 1) {
6654 			/* this is the last importer */
6655 			ASSERT(sharedp->rsmsi_mapcnt == 0);
6656 
6657 			e = adapter->rsmpi_ops->
6658 			    rsm_disconnect(sharedp->rsmsi_handle);
6659 			if (e != RSM_SUCCESS) {
6660 				DBG_PRINTF((category, RSM_DEBUG,
6661 				    "rsm:disconnect failed seg=%x:err=%d\n",
6662 				    seg->s_key, e));
6663 			}
6664 		}
6665 	}
6666 
6667 	seg->s_handle.in = NULL;
6668 
6669 	sharedp->rsmsi_refcnt--;
6670 
6671 	if (sharedp->rsmsi_refcnt == 0) {
6672 		*cookie = (void *)sharedp->rsmsi_cookie;
6673 		sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6674 		sharedp->rsmsi_handle = NULL;
6675 		rsmsharelock_release(seg);
6676 
6677 		/* clean up the shared data structure */
6678 		mutex_destroy(&sharedp->rsmsi_lock);
6679 		cv_destroy(&sharedp->rsmsi_cv);
6680 		kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6681 
6682 	} else {
6683 		rsmsharelock_release(seg);
6684 	}
6685 
6686 	/* increment generation number on barrier page */
6687 	if (bar_va) {
6688 		atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6689 	}
6690 
6691 	/*
6692 	 * The following needs to be done after any
6693 	 * rsmsharelock calls which use seg->s_share.
6694 	 */
6695 	seg->s_share = NULL;
6696 
6697 	seg->s_state = RSM_STATE_DISCONNECT;
6698 	/* signal anyone waiting in the CONN_QUIESCE state */
6699 	cv_broadcast(&seg->s_cv);
6700 
6701 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6702 	    "rsm_closeconnection done\n"));
6703 
6704 	return (RSM_SUCCESS);
6705 }
6706 
6707 int
6708 rsm_disconnect(rsmseg_t *seg)
6709 {
6710 	rsmipc_request_t	request;
6711 	void			*shared_cookie;
6712 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6713 
6714 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6715 
6716 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6717 
6718 	/* assert seg isn't locked */
6719 	ASSERT(!rsmseglock_held(seg));
6720 
6721 
6722 	/* Remove segment from imported list */
6723 	rsmimport_rm(seg);
6724 
6725 	/* acquire the segment */
6726 	rsmseglock_acquire(seg);
6727 
6728 	/* wait until segment leaves the mapping state */
6729 	while (seg->s_state == RSM_STATE_MAPPING)
6730 		cv_wait(&seg->s_cv, &seg->s_lock);
6731 
6732 	if (seg->s_state == RSM_STATE_DISCONNECT) {
6733 		seg->s_state = RSM_STATE_NEW;
6734 		rsmseglock_release(seg);
6735 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6736 		    "rsm_disconnect done: already disconnected\n"));
6737 		return (RSM_SUCCESS);
6738 	}
6739 
6740 	(void) rsm_closeconnection(seg, &shared_cookie);
6741 
6742 	/* update state */
6743 	seg->s_state = RSM_STATE_NEW;
6744 
6745 	if (shared_cookie != NULL) {
6746 		/*
6747 		 *  This is the last importer so inform the exporting node
6748 		 *  so this import can be deleted from the list of importers.
6749 		 */
6750 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6751 		request.rsmipc_key = seg->s_segid;
6752 		request.rsmipc_segment_cookie = shared_cookie;
6753 		rsmseglock_release(seg);
6754 		(void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6755 	} else {
6756 		rsmseglock_release(seg);
6757 	}
6758 
6759 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6760 
6761 	return (DDI_SUCCESS);
6762 }
6763 
6764 /*ARGSUSED*/
6765 static int
6766 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6767     struct pollhead **phpp)
6768 {
6769 	minor_t		rnum;
6770 	rsmresource_t	*res;
6771 	rsmseg_t 	*seg;
6772 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6773 
6774 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6775 
6776 	/* find minor, no lock */
6777 	rnum = getminor(dev);
6778 	res = rsmresource_lookup(rnum, RSM_NOLOCK);
6779 
6780 	/* poll is supported only for export/import segments */
6781 	if ((res == NULL) || (res == RSMRC_RESERVED) ||
6782 	    (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6783 		return (ENXIO);
6784 	}
6785 
6786 	*reventsp = 0;
6787 
6788 	/*
6789 	 * An exported segment must be in state RSM_STATE_EXPORT; an
6790 	 * imported segment must be in state RSM_STATE_ACTIVE.
6791 	 */
6792 	seg = (rsmseg_t *)res;
6793 
6794 	if (seg->s_pollevent) {
6795 		*reventsp = POLLRDNORM;
6796 	} else if (!anyyet) {
6797 		/* cannot take segment lock here */
6798 		*phpp = &seg->s_poll;
6799 		seg->s_pollflag |= RSM_SEGMENT_POLL;
6800 	}
6801 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6802 	return (0);
6803 }
6804 
6805 
6806 
6807 /* ************************* IOCTL Commands ********************* */
6808 
6809 static rsmseg_t *
6810 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6811     rsm_resource_type_t type)
6812 {
6813 	/* get segment from resource handle */
6814 	rsmseg_t *seg;
6815 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6816 
6817 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6818 
6819 
6820 	if (res != RSMRC_RESERVED) {
6821 		seg = (rsmseg_t *)res;
6822 	} else {
6823 		/* Allocate segment now and bind it */
6824 		seg = rsmseg_alloc(rnum, credp);
6825 
6826 		/*
6827 		 * if DR pre-processing is going on or DR is in progress
6828 		 * then the new export segments should be in the NEW_QSCD state
6829 		 */
6830 		if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6831 			mutex_enter(&rsm_drv_data.drv_lock);
6832 			if ((rsm_drv_data.drv_state ==
6833 			    RSM_DRV_PREDEL_STARTED) ||
6834 			    (rsm_drv_data.drv_state ==
6835 			    RSM_DRV_PREDEL_COMPLETED) ||
6836 			    (rsm_drv_data.drv_state ==
6837 			    RSM_DRV_DR_IN_PROGRESS)) {
6838 				seg->s_state = RSM_STATE_NEW_QUIESCED;
6839 			}
6840 			mutex_exit(&rsm_drv_data.drv_lock);
6841 		}
6842 
6843 		rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6844 	}
6845 
6846 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6847 
6848 	return (seg);
6849 }
6850 
6851 static int
6852 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6853     int mode, cred_t *credp)
6854 {
6855 	int error;
6856 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6857 
6858 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6859 
6860 	arg = arg;
6861 	credp = credp;
6862 
6863 	ASSERT(seg != NULL);
6864 
6865 	switch (cmd) {
6866 	case RSM_IOCTL_BIND:
6867 		error = rsm_bind(seg, msg, arg, mode);
6868 		break;
6869 	case RSM_IOCTL_REBIND:
6870 		error = rsm_rebind(seg, msg);
6871 		break;
6872 	case RSM_IOCTL_UNBIND:
6873 		error = ENOTSUP;
6874 		break;
6875 	case RSM_IOCTL_PUBLISH:
6876 		error = rsm_publish(seg, msg, arg, mode);
6877 		break;
6878 	case RSM_IOCTL_REPUBLISH:
6879 		error = rsm_republish(seg, msg, mode);
6880 		break;
6881 	case RSM_IOCTL_UNPUBLISH:
6882 		error = rsm_unpublish(seg, 1);
6883 		break;
6884 	default:
6885 		error = EINVAL;
6886 		break;
6887 	}
6888 
6889 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6890 	    error));
6891 
6892 	return (error);
6893 }
6894 static int
6895 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6896     int mode, cred_t *credp)
6897 {
6898 	int error;
6899 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6900 
6901 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6902 
6903 	ASSERT(seg);
6904 
6905 	switch (cmd) {
6906 	case RSM_IOCTL_CONNECT:
6907 		error = rsm_connect(seg, msg, credp, arg, mode);
6908 		break;
6909 	default:
6910 		error = EINVAL;
6911 		break;
6912 	}
6913 
6914 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6915 	    error));
6916 	return (error);
6917 }
6918 
6919 static int
6920 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6921     int mode)
6922 {
6923 	int e;
6924 	adapter_t *adapter;
6925 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6926 
6927 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6928 
6929 
6930 	if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6931 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6932 		    "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6933 		return (RSMERR_CONN_ABORTED);
6934 	} else if (seg->s_node == my_nodeid) {
6935 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6936 		    "rsmbar_ioctl done: loopback\n"));
6937 		return (RSM_SUCCESS);
6938 	}
6939 
6940 	adapter = seg->s_adapter;
6941 
6942 	switch (cmd) {
6943 	case RSM_IOCTL_BAR_CHECK:
6944 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6945 		    "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6946 		return (bar_va ? RSM_SUCCESS : EINVAL);
6947 	case RSM_IOCTL_BAR_OPEN:
6948 		e = adapter->rsmpi_ops->
6949 		    rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6950 		break;
6951 	case RSM_IOCTL_BAR_ORDER:
6952 		e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6953 		break;
6954 	case RSM_IOCTL_BAR_CLOSE:
6955 		e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6956 		break;
6957 	default:
6958 		e = EINVAL;
6959 		break;
6960 	}
6961 
6962 	if (e == RSM_SUCCESS) {
6963 #ifdef _MULTI_DATAMODEL
6964 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6965 			rsm_ioctlmsg32_t msg32;
6966 			int i;
6967 
6968 			for (i = 0; i < 4; i++) {
6969 				msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6970 			}
6971 
6972 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6973 			    "rsmbar_ioctl done\n"));
6974 			if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6975 			    sizeof (msg32), mode))
6976 				return (RSMERR_BAD_ADDR);
6977 			else
6978 				return (RSM_SUCCESS);
6979 		}
6980 #endif
6981 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6982 		    "rsmbar_ioctl done\n"));
6983 		if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6984 		    sizeof (*msg), mode))
6985 			return (RSMERR_BAD_ADDR);
6986 		else
6987 			return (RSM_SUCCESS);
6988 	}
6989 
6990 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6991 	    "rsmbar_ioctl done: error=%d\n", e));
6992 
6993 	return (e);
6994 }
6995 
6996 /*
6997  * Ring the doorbell of the export segment to which this segment is
6998  * connected.
6999  */
7000 static int
7001 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7002 {
7003 	int e = 0;
7004 	rsmipc_request_t request;
7005 
7006 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7007 
7008 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7009 
7010 	request.rsmipc_key = seg->s_segid;
7011 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7012 	request.rsmipc_segment_cookie = NULL;
7013 	e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7014 
7015 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7016 	    "exportbell_ioctl done: %d\n", e));
7017 
7018 	return (e);
7019 }
7020 
7021 /*
7022  * Ring the doorbells of all segments importing this segment
7023  */
7024 static int
7025 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7026 {
7027 	importing_token_t	*token = NULL;
7028 	rsmipc_request_t	request;
7029 	int			index;
7030 
7031 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7032 
7033 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7034 
7035 	ASSERT(seg->s_state != RSM_STATE_NEW &&
7036 	    seg->s_state != RSM_STATE_NEW_QUIESCED);
7037 
7038 	request.rsmipc_key = seg->s_segid;
7039 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7040 
7041 	index = rsmhash(seg->s_segid);
7042 
7043 	token = importer_list.bucket[index];
7044 
7045 	while (token != NULL) {
7046 		if (seg->s_key == token->key) {
7047 			request.rsmipc_segment_cookie =
7048 			    token->import_segment_cookie;
7049 			(void) rsmipc_send(token->importing_node,
7050 			    &request, RSM_NO_REPLY);
7051 		}
7052 		token = token->next;
7053 	}
7054 
7055 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7056 	    "importbell_ioctl done\n"));
7057 	return (RSM_SUCCESS);
7058 }
7059 
7060 static int
7061 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7062     rsm_poll_event_t **eventspp, int mode)
7063 {
7064 	rsm_poll_event_t	*evlist = NULL;
7065 	size_t			evlistsz;
7066 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7067 
7068 #ifdef _MULTI_DATAMODEL
7069 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7070 		int i;
7071 		rsm_consume_event_msg32_t cemsg32 = {0};
7072 		rsm_poll_event32_t	event32[RSM_MAX_POLLFDS];
7073 		rsm_poll_event32_t	*evlist32;
7074 		size_t			evlistsz32;
7075 
7076 		/* copyin the ioctl message */
7077 		if (ddi_copyin(arg, (caddr_t)&cemsg32,
7078 		    sizeof (rsm_consume_event_msg32_t), mode)) {
7079 			DBG_PRINTF((category, RSM_ERR,
7080 			    "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7081 			return (RSMERR_BAD_ADDR);
7082 		}
7083 		msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7084 		msgp->numents = (int)cemsg32.numents;
7085 
7086 		evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7087 		/*
7088 		 * If numents is large alloc events list on heap otherwise
7089 		 * use the address of array that was passed in.
7090 		 */
7091 		if (msgp->numents > RSM_MAX_POLLFDS) {
7092 			if (msgp->numents > max_segs) { /* validate numents */
7093 				DBG_PRINTF((category, RSM_ERR,
7094 				    "consumeevent_copyin: "
7095 				    "RSMERR_BAD_ARGS_ERRORS\n"));
7096 				return (RSMERR_BAD_ARGS_ERRORS);
7097 			}
7098 			evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7099 		} else {
7100 			evlist32 = event32;
7101 		}
7102 
7103 		/* copyin the seglist into the rsm_poll_event32_t array */
7104 		if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7105 		    evlistsz32, mode)) {
7106 			if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7107 				kmem_free(evlist32, evlistsz32);
7108 			}
7109 			DBG_PRINTF((category, RSM_ERR,
7110 			    "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7111 			return (RSMERR_BAD_ADDR);
7112 		}
7113 
7114 		/* evlist and evlistsz are based on rsm_poll_event_t type */
7115 		evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7116 
7117 		if (msgp->numents > RSM_MAX_POLLFDS) {
7118 			evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7119 			*eventspp = evlist;
7120 		} else {
7121 			evlist = *eventspp;
7122 		}
7123 		/*
7124 		 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7125 		 * array
7126 		 */
7127 		for (i = 0; i < msgp->numents; i++) {
7128 			evlist[i].rnum = evlist32[i].rnum;
7129 			evlist[i].fdsidx = evlist32[i].fdsidx;
7130 			evlist[i].revent = evlist32[i].revent;
7131 		}
7132 		/* free the temp 32-bit event list */
7133 		if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7134 			kmem_free(evlist32, evlistsz32);
7135 		}
7136 
7137 		return (RSM_SUCCESS);
7138 	}
7139 #endif
7140 	/* copyin the ioctl message */
7141 	if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7142 	    mode)) {
7143 		DBG_PRINTF((category, RSM_ERR,
7144 		    "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7145 		return (RSMERR_BAD_ADDR);
7146 	}
7147 	/*
7148 	 * If numents is large alloc events list on heap otherwise
7149 	 * use the address of array that was passed in.
7150 	 */
7151 	if (msgp->numents > RSM_MAX_POLLFDS) {
7152 		if (msgp->numents > max_segs) { /* validate numents */
7153 			DBG_PRINTF((category, RSM_ERR,
7154 			    "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7155 			return (RSMERR_BAD_ARGS_ERRORS);
7156 		}
7157 		evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7158 		evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7159 		*eventspp  = evlist;
7160 	}
7161 
7162 	/* copyin the seglist */
7163 	if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7164 	    sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7165 		if (evlist) {
7166 			kmem_free(evlist, evlistsz);
7167 			*eventspp = NULL;
7168 		}
7169 		DBG_PRINTF((category, RSM_ERR,
7170 		    "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7171 		return (RSMERR_BAD_ADDR);
7172 	}
7173 
7174 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7175 	    "consumeevent_copyin done\n"));
7176 	return (RSM_SUCCESS);
7177 }
7178 
7179 static int
7180 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7181     rsm_poll_event_t *eventsp, int mode)
7182 {
7183 	size_t			evlistsz;
7184 	int			err = RSM_SUCCESS;
7185 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7186 
7187 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7188 	    "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7189 	    msgp->numents, eventsp));
7190 
7191 #ifdef _MULTI_DATAMODEL
7192 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7193 		int i;
7194 		rsm_poll_event32_t	event32[RSM_MAX_POLLFDS];
7195 		rsm_poll_event32_t	*evlist32;
7196 		size_t			evlistsz32;
7197 
7198 		evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7199 		if (msgp->numents > RSM_MAX_POLLFDS) {
7200 			evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7201 		} else {
7202 			evlist32 = event32;
7203 		}
7204 
7205 		/*
7206 		 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7207 		 * array
7208 		 */
7209 		for (i = 0; i < msgp->numents; i++) {
7210 			evlist32[i].rnum = eventsp[i].rnum;
7211 			evlist32[i].fdsidx = eventsp[i].fdsidx;
7212 			evlist32[i].revent = eventsp[i].revent;
7213 		}
7214 
7215 		if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7216 		    evlistsz32, mode)) {
7217 			err = RSMERR_BAD_ADDR;
7218 		}
7219 
7220 		if (msgp->numents > RSM_MAX_POLLFDS) {
7221 			if (evlist32) {	/* free the temp 32-bit event list */
7222 				kmem_free(evlist32, evlistsz32);
7223 			}
7224 			/*
7225 			 * eventsp and evlistsz are based on rsm_poll_event_t
7226 			 * type
7227 			 */
7228 			evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7229 			/* event list on the heap and needs to be freed here */
7230 			if (eventsp) {
7231 				kmem_free(eventsp, evlistsz);
7232 			}
7233 		}
7234 
7235 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7236 		    "consumeevent_copyout done: err=%d\n", err));
7237 		return (err);
7238 	}
7239 #endif
7240 	evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7241 
7242 	if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7243 	    mode)) {
7244 		err = RSMERR_BAD_ADDR;
7245 	}
7246 
7247 	if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7248 		/* event list on the heap and needs to be freed here */
7249 		kmem_free(eventsp, evlistsz);
7250 	}
7251 
7252 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7253 	    "consumeevent_copyout done: err=%d\n", err));
7254 	return (err);
7255 }
7256 
7257 static int
7258 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7259 {
7260 	int	rc;
7261 	int	i;
7262 	minor_t	rnum;
7263 	rsm_consume_event_msg_t	msg = {0};
7264 	rsmseg_t		*seg;
7265 	rsm_poll_event_t	*event_list;
7266 	rsm_poll_event_t	events[RSM_MAX_POLLFDS];
7267 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7268 
7269 	event_list = events;
7270 
7271 	if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7272 	    RSM_SUCCESS) {
7273 		return (rc);
7274 	}
7275 
7276 	for (i = 0; i < msg.numents; i++) {
7277 		rnum = event_list[i].rnum;
7278 		event_list[i].revent = 0;
7279 		/* get the segment structure */
7280 		seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7281 		if (seg) {
7282 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7283 			    "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7284 			    seg));
7285 			if (seg->s_pollevent) {
7286 				/* consume the event */
7287 				atomic_add_32(&seg->s_pollevent, -1);
7288 				event_list[i].revent = POLLRDNORM;
7289 			}
7290 			rsmseglock_release(seg);
7291 		}
7292 	}
7293 
7294 	if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7295 	    RSM_SUCCESS) {
7296 		return (rc);
7297 	}
7298 
7299 	return (RSM_SUCCESS);
7300 }
7301 
7302 static int
7303 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7304 {
7305 	int size;
7306 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7307 
7308 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7309 
7310 #ifdef _MULTI_DATAMODEL
7311 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7312 		rsmka_iovec32_t	*iovec32, *iovec32_base;
7313 		int i;
7314 
7315 		size = count * sizeof (rsmka_iovec32_t);
7316 		iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7317 		if (ddi_copyin((caddr_t)user_vec,
7318 		    (caddr_t)iovec32, size, mode)) {
7319 			kmem_free(iovec32, size);
7320 			DBG_PRINTF((category, RSM_DEBUG,
7321 			    "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7322 			return (RSMERR_BAD_ADDR);
7323 		}
7324 
7325 		for (i = 0; i < count; i++, iovec++, iovec32++) {
7326 			iovec->io_type = (int)iovec32->io_type;
7327 			if (iovec->io_type == RSM_HANDLE_TYPE)
7328 				iovec->local.segid = (rsm_memseg_id_t)
7329 				    iovec32->local;
7330 			else
7331 				iovec->local.vaddr =
7332 				    (caddr_t)(uintptr_t)iovec32->local;
7333 			iovec->local_offset = (size_t)iovec32->local_offset;
7334 			iovec->remote_offset = (size_t)iovec32->remote_offset;
7335 			iovec->transfer_len = (size_t)iovec32->transfer_len;
7336 
7337 		}
7338 		kmem_free(iovec32_base, size);
7339 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7340 		    "iovec_copyin done\n"));
7341 		return (DDI_SUCCESS);
7342 	}
7343 #endif
7344 
7345 	size = count * sizeof (rsmka_iovec_t);
7346 	if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7347 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7348 		    "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7349 		return (RSMERR_BAD_ADDR);
7350 	}
7351 
7352 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7353 
7354 	return (DDI_SUCCESS);
7355 }
7356 
7357 
7358 static int
7359 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7360 {
7361 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7362 
7363 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7364 
7365 #ifdef _MULTI_DATAMODEL
7366 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7367 		rsmka_scat_gath32_t sg_io32;
7368 
7369 		if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7370 		    mode)) {
7371 			DBG_PRINTF((category, RSM_DEBUG,
7372 			    "sgio_copyin done: returning EFAULT\n"));
7373 			return (RSMERR_BAD_ADDR);
7374 		}
7375 		sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7376 		sg_io->io_request_count =  (size_t)sg_io32.io_request_count;
7377 		sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7378 		sg_io->flags = (size_t)sg_io32.flags;
7379 		sg_io->remote_handle = (rsm_memseg_import_handle_t)
7380 		    (uintptr_t)sg_io32.remote_handle;
7381 		sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7382 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7383 		    "sgio_copyin done\n"));
7384 		return (DDI_SUCCESS);
7385 	}
7386 #endif
7387 	if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7388 	    mode)) {
7389 		DBG_PRINTF((category, RSM_DEBUG,
7390 		    "sgio_copyin done: returning EFAULT\n"));
7391 		return (RSMERR_BAD_ADDR);
7392 	}
7393 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7394 	return (DDI_SUCCESS);
7395 }
7396 
7397 static int
7398 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7399 {
7400 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7401 
7402 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7403 	    "sgio_resid_copyout enter\n"));
7404 
7405 #ifdef _MULTI_DATAMODEL
7406 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7407 		rsmka_scat_gath32_t sg_io32;
7408 
7409 		sg_io32.io_residual_count = sg_io->io_residual_count;
7410 		sg_io32.flags = sg_io->flags;
7411 
7412 		if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7413 		    (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7414 		    sizeof (uint32_t), mode)) {
7415 
7416 			DBG_PRINTF((category, RSM_ERR,
7417 			    "sgio_resid_copyout error: rescnt\n"));
7418 			return (RSMERR_BAD_ADDR);
7419 		}
7420 
7421 		if (ddi_copyout((caddr_t)&sg_io32.flags,
7422 		    (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7423 		    sizeof (uint32_t), mode)) {
7424 
7425 			DBG_PRINTF((category, RSM_ERR,
7426 			    "sgio_resid_copyout error: flags\n"));
7427 			return (RSMERR_BAD_ADDR);
7428 		}
7429 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7430 		    "sgio_resid_copyout done\n"));
7431 		return (DDI_SUCCESS);
7432 	}
7433 #endif
7434 	if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7435 	    (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7436 	    sizeof (ulong_t), mode)) {
7437 
7438 		DBG_PRINTF((category, RSM_ERR,
7439 		    "sgio_resid_copyout error:rescnt\n"));
7440 		return (RSMERR_BAD_ADDR);
7441 	}
7442 
7443 	if (ddi_copyout((caddr_t)&sg_io->flags,
7444 	    (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7445 	    sizeof (uint_t), mode)) {
7446 
7447 		DBG_PRINTF((category, RSM_ERR,
7448 		    "sgio_resid_copyout error:flags\n"));
7449 		return (RSMERR_BAD_ADDR);
7450 	}
7451 
7452 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7453 	return (DDI_SUCCESS);
7454 }
7455 
7456 
7457 static int
7458 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7459 {
7460 	rsmka_scat_gath_t	sg_io;
7461 	rsmka_iovec_t		ka_iovec_arr[RSM_MAX_IOVLEN];
7462 	rsmka_iovec_t		*ka_iovec;
7463 	rsmka_iovec_t		*ka_iovec_start;
7464 	rsmpi_scat_gath_t	rsmpi_sg_io;
7465 	rsmpi_iovec_t		iovec_arr[RSM_MAX_IOVLEN];
7466 	rsmpi_iovec_t		*iovec;
7467 	rsmpi_iovec_t		*iovec_start = NULL;
7468 	rsmapi_access_entry_t	*acl;
7469 	rsmresource_t		*res;
7470 	minor_t			rnum;
7471 	rsmseg_t		*im_seg, *ex_seg;
7472 	int			e;
7473 	int			error = 0;
7474 	uint_t			i;
7475 	uint_t			iov_proc = 0; /* num of iovecs processed */
7476 	size_t			size = 0;
7477 	size_t			ka_size;
7478 
7479 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7480 
7481 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7482 
7483 	credp = credp;
7484 
7485 	/*
7486 	 * Copyin the scatter/gather structure  and build new structure
7487 	 * for rsmpi.
7488 	 */
7489 	e = sgio_copyin(arg, &sg_io, mode);
7490 	if (e != DDI_SUCCESS) {
7491 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7492 		    "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7493 		return (e);
7494 	}
7495 
7496 	if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7497 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7498 		    "rsm_iovec_ioctl done: request_count(%d) too large\n",
7499 		    sg_io.io_request_count));
7500 		return (RSMERR_BAD_SGIO);
7501 	}
7502 
7503 	rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7504 	rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7505 	rsmpi_sg_io.io_segflg = 0;
7506 
7507 	/* Allocate memory and copyin io vector array  */
7508 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7509 		ka_size =  sg_io.io_request_count * sizeof (rsmka_iovec_t);
7510 		ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7511 	} else {
7512 		ka_iovec_start = ka_iovec = ka_iovec_arr;
7513 	}
7514 	e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7515 	    sg_io.io_request_count, mode);
7516 	if (e != DDI_SUCCESS) {
7517 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7518 			kmem_free(ka_iovec, ka_size);
7519 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7520 		    "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7521 		return (e);
7522 	}
7523 
7524 	/* get the import segment descriptor */
7525 	rnum = getminor(dev);
7526 	res = rsmresource_lookup(rnum, RSM_LOCK);
7527 
7528 	/*
7529 	 * The following sequence of locking may (or MAY NOT) cause a
7530 	 * deadlock but this is currently not addressed here since the
7531 	 * implementation will be changed to incorporate the use of
7532 	 * reference counting for both the import and the export segments.
7533 	 */
7534 
7535 	/* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7536 
7537 	im_seg = (rsmseg_t *)res;
7538 
7539 	if (im_seg == NULL) {
7540 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7541 			kmem_free(ka_iovec, ka_size);
7542 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7543 		    "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7544 		return (EINVAL);
7545 	}
7546 	/* putv/getv supported is supported only on import segments */
7547 	if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7548 		rsmseglock_release(im_seg);
7549 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7550 			kmem_free(ka_iovec, ka_size);
7551 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7552 		    "rsm_iovec_ioctl done: not an import segment\n"));
7553 		return (EINVAL);
7554 	}
7555 
7556 	/*
7557 	 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7558 	 * as well as wait for a local DR to complete.
7559 	 */
7560 	while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7561 	    (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7562 	    (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7563 		if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7564 			DBG_PRINTF((category, RSM_DEBUG,
7565 			    "rsm_iovec_ioctl done: cv_wait INTR"));
7566 			rsmseglock_release(im_seg);
7567 			return (RSMERR_INTERRUPTED);
7568 		}
7569 	}
7570 
7571 	if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7572 	    (im_seg->s_state != RSM_STATE_ACTIVE)) {
7573 
7574 		ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7575 		    im_seg->s_state == RSM_STATE_NEW);
7576 
7577 		DBG_PRINTF((category, RSM_DEBUG,
7578 		    "rsm_iovec_ioctl done: im_seg not conn/map"));
7579 		rsmseglock_release(im_seg);
7580 		e = RSMERR_BAD_SGIO;
7581 		goto out;
7582 	}
7583 
7584 	im_seg->s_rdmacnt++;
7585 	rsmseglock_release(im_seg);
7586 
7587 	/*
7588 	 * Allocate and set up the io vector for rsmpi
7589 	 */
7590 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7591 		size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7592 		iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7593 	} else {
7594 		iovec_start = iovec = iovec_arr;
7595 	}
7596 
7597 	rsmpi_sg_io.iovec = iovec;
7598 	for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7599 		if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7600 			ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7601 
7602 			if (ex_seg == NULL) {
7603 				e = RSMERR_BAD_SGIO;
7604 				break;
7605 			}
7606 			ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7607 
7608 			acl = ex_seg->s_acl;
7609 			if (acl[0].ae_permission == 0) {
7610 				struct buf *xbuf;
7611 				dev_t sdev = 0;
7612 
7613 				xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7614 				    0, ex_seg->s_len, B_WRITE,
7615 				    sdev, 0, NULL, DDI_UMEM_SLEEP);
7616 
7617 				ASSERT(xbuf != NULL);
7618 
7619 				iovec->local_mem.ms_type = RSM_MEM_BUF;
7620 				iovec->local_mem.ms_memory.bp = xbuf;
7621 			} else {
7622 				iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7623 				iovec->local_mem.ms_memory.handle =
7624 				    ex_seg->s_handle.out;
7625 			}
7626 			ex_seg->s_rdmacnt++; /* refcnt the handle */
7627 			rsmseglock_release(ex_seg);
7628 		} else {
7629 			iovec->local_mem.ms_type = RSM_MEM_VADDR;
7630 			iovec->local_mem.ms_memory.vr.vaddr =
7631 			    ka_iovec->local.vaddr;
7632 		}
7633 
7634 		iovec->local_offset = ka_iovec->local_offset;
7635 		iovec->remote_handle = im_seg->s_handle.in;
7636 		iovec->remote_offset = ka_iovec->remote_offset;
7637 		iovec->transfer_length = ka_iovec->transfer_len;
7638 		iovec++;
7639 		ka_iovec++;
7640 	}
7641 
7642 	if (iov_proc <  sg_io.io_request_count) {
7643 		/* error while processing handle */
7644 		rsmseglock_acquire(im_seg);
7645 		im_seg->s_rdmacnt--;   /* decrement the refcnt for importseg */
7646 		if (im_seg->s_rdmacnt == 0) {
7647 			cv_broadcast(&im_seg->s_cv);
7648 		}
7649 		rsmseglock_release(im_seg);
7650 		goto out;
7651 	}
7652 
7653 	/* call rsmpi */
7654 	if (cmd == RSM_IOCTL_PUTV)
7655 		e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7656 		    im_seg->s_adapter->rsmpi_handle,
7657 		    &rsmpi_sg_io);
7658 	else if (cmd == RSM_IOCTL_GETV)
7659 		e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7660 		    im_seg->s_adapter->rsmpi_handle,
7661 		    &rsmpi_sg_io);
7662 	else {
7663 		e = EINVAL;
7664 		DBG_PRINTF((category, RSM_DEBUG,
7665 		    "iovec_ioctl: bad command = %x\n", cmd));
7666 	}
7667 
7668 
7669 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7670 	    "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7671 
7672 	sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7673 
7674 	/*
7675 	 * Check for implicit signal post flag and do the signal
7676 	 * post if needed
7677 	 */
7678 	if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7679 	    e == RSM_SUCCESS) {
7680 		rsmipc_request_t request;
7681 
7682 		request.rsmipc_key = im_seg->s_segid;
7683 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7684 		request.rsmipc_segment_cookie = NULL;
7685 		e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7686 		/*
7687 		 * Reset the implicit signal post flag to 0 to indicate
7688 		 * that the signal post has been done and need not be
7689 		 * done in the RSMAPI library
7690 		 */
7691 		sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7692 	}
7693 
7694 	rsmseglock_acquire(im_seg);
7695 	im_seg->s_rdmacnt--;
7696 	if (im_seg->s_rdmacnt == 0) {
7697 		cv_broadcast(&im_seg->s_cv);
7698 	}
7699 	rsmseglock_release(im_seg);
7700 	error = sgio_resid_copyout(arg, &sg_io, mode);
7701 out:
7702 	iovec = iovec_start;
7703 	ka_iovec = ka_iovec_start;
7704 	for (i = 0; i < iov_proc; i++) {
7705 		if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7706 			ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7707 
7708 			ASSERT(ex_seg != NULL);
7709 			ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7710 
7711 			ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7712 			if (ex_seg->s_rdmacnt == 0) {
7713 				cv_broadcast(&ex_seg->s_cv);
7714 			}
7715 			rsmseglock_release(ex_seg);
7716 		}
7717 
7718 		ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7719 
7720 		/*
7721 		 * At present there is no dependency on the existence of xbufs
7722 		 * created by ddi_umem_iosetup for each of the iovecs. So we
7723 		 * can these xbufs here.
7724 		 */
7725 		if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7726 			freerbuf(iovec->local_mem.ms_memory.bp);
7727 		}
7728 
7729 		iovec++;
7730 		ka_iovec++;
7731 	}
7732 
7733 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7734 		if (iovec_start)
7735 			kmem_free(iovec_start, size);
7736 		kmem_free(ka_iovec_start, ka_size);
7737 	}
7738 
7739 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7740 	    "rsm_iovec_ioctl done %d\n", e));
7741 	/* if RSMPI call fails return that else return copyout's retval */
7742 	return ((e != RSM_SUCCESS) ? e : error);
7743 
7744 }
7745 
7746 
7747 static int
7748 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7749 {
7750 	adapter_t	*adapter;
7751 	rsm_addr_t	addr;
7752 	rsm_node_id_t	node;
7753 	int		rval = DDI_SUCCESS;
7754 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7755 
7756 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7757 
7758 	adapter =  rsm_getadapter(msg, mode);
7759 	if (adapter == NULL) {
7760 		DBG_PRINTF((category, RSM_DEBUG,
7761 		    "rsmaddr_ioctl done: adapter not found\n"));
7762 		return (RSMERR_CTLR_NOT_PRESENT);
7763 	}
7764 
7765 	switch (cmd) {
7766 	case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7767 		/* returns the hwaddr in msg->hwaddr */
7768 		if (msg->nodeid == my_nodeid) {
7769 			msg->hwaddr = adapter->hwaddr;
7770 		} else {
7771 			addr = get_remote_hwaddr(adapter, msg->nodeid);
7772 			if ((int64_t)addr < 0) {
7773 				rval = RSMERR_INTERNAL_ERROR;
7774 			} else {
7775 				msg->hwaddr = addr;
7776 			}
7777 		}
7778 		break;
7779 	case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7780 		/* returns the nodeid in msg->nodeid */
7781 		if (msg->hwaddr == adapter->hwaddr) {
7782 			msg->nodeid = my_nodeid;
7783 		} else {
7784 			node = get_remote_nodeid(adapter, msg->hwaddr);
7785 			if ((int)node < 0) {
7786 				rval = RSMERR_INTERNAL_ERROR;
7787 			} else {
7788 				msg->nodeid = (rsm_node_id_t)node;
7789 			}
7790 		}
7791 		break;
7792 	default:
7793 		rval = EINVAL;
7794 		break;
7795 	}
7796 
7797 	rsmka_release_adapter(adapter);
7798 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7799 	    "rsmaddr_ioctl done: %d\n", rval));
7800 	return (rval);
7801 }
7802 
7803 static int
7804 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7805 {
7806 	DBG_DEFINE(category,
7807 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7808 
7809 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7810 
7811 #ifdef _MULTI_DATAMODEL
7812 
7813 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7814 		rsm_ioctlmsg32_t msg32;
7815 		int i;
7816 
7817 		if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7818 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7819 			    "rsm_ddi_copyin done: EFAULT\n"));
7820 			return (RSMERR_BAD_ADDR);
7821 		}
7822 		msg->len = msg32.len;
7823 		msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7824 		msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7825 		msg->key = msg32.key;
7826 		msg->acl_len = msg32.acl_len;
7827 		msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7828 		msg->cnum = msg32.cnum;
7829 		msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7830 		msg->cname_len = msg32.cname_len;
7831 		msg->nodeid = msg32.nodeid;
7832 		msg->hwaddr = msg32.hwaddr;
7833 		msg->perm = msg32.perm;
7834 		for (i = 0; i < 4; i++) {
7835 			msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7836 		}
7837 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7838 		    "rsm_ddi_copyin done\n"));
7839 		return (RSM_SUCCESS);
7840 	}
7841 #endif
7842 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7843 	if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7844 		return (RSMERR_BAD_ADDR);
7845 	else
7846 		return (RSM_SUCCESS);
7847 }
7848 
7849 static int
7850 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7851 {
7852 	rsmka_int_controller_attr_t	rsm_cattr;
7853 	DBG_DEFINE(category,
7854 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7855 
7856 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7857 	    "rsmattr_ddi_copyout enter\n"));
7858 	/*
7859 	 * need to copy appropriate data from rsm_controller_attr_t
7860 	 * to rsmka_int_controller_attr_t
7861 	 */
7862 #ifdef	_MULTI_DATAMODEL
7863 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7864 		rsmka_int_controller_attr32_t rsm_cattr32;
7865 
7866 		rsm_cattr32.attr_direct_access_sizes =
7867 		    adapter->rsm_attr.attr_direct_access_sizes;
7868 		rsm_cattr32.attr_atomic_sizes =
7869 		    adapter->rsm_attr.attr_atomic_sizes;
7870 		rsm_cattr32.attr_page_size =
7871 		    adapter->rsm_attr.attr_page_size;
7872 		if (adapter->rsm_attr.attr_max_export_segment_size >
7873 		    UINT_MAX)
7874 			rsm_cattr32.attr_max_export_segment_size =
7875 			    RSM_MAXSZ_PAGE_ALIGNED;
7876 		else
7877 			rsm_cattr32.attr_max_export_segment_size =
7878 			    adapter->rsm_attr.attr_max_export_segment_size;
7879 		if (adapter->rsm_attr.attr_tot_export_segment_size >
7880 		    UINT_MAX)
7881 			rsm_cattr32.attr_tot_export_segment_size =
7882 			    RSM_MAXSZ_PAGE_ALIGNED;
7883 		else
7884 			rsm_cattr32.attr_tot_export_segment_size =
7885 			    adapter->rsm_attr.attr_tot_export_segment_size;
7886 		if (adapter->rsm_attr.attr_max_export_segments >
7887 		    UINT_MAX)
7888 			rsm_cattr32.attr_max_export_segments =
7889 			    UINT_MAX;
7890 		else
7891 			rsm_cattr32.attr_max_export_segments =
7892 			    adapter->rsm_attr.attr_max_export_segments;
7893 		if (adapter->rsm_attr.attr_max_import_map_size >
7894 		    UINT_MAX)
7895 			rsm_cattr32.attr_max_import_map_size =
7896 			    RSM_MAXSZ_PAGE_ALIGNED;
7897 		else
7898 			rsm_cattr32.attr_max_import_map_size =
7899 			    adapter->rsm_attr.attr_max_import_map_size;
7900 		if (adapter->rsm_attr.attr_tot_import_map_size >
7901 		    UINT_MAX)
7902 			rsm_cattr32.attr_tot_import_map_size =
7903 			    RSM_MAXSZ_PAGE_ALIGNED;
7904 		else
7905 			rsm_cattr32.attr_tot_import_map_size =
7906 			    adapter->rsm_attr.attr_tot_import_map_size;
7907 		if (adapter->rsm_attr.attr_max_import_segments >
7908 		    UINT_MAX)
7909 			rsm_cattr32.attr_max_import_segments =
7910 			    UINT_MAX;
7911 		else
7912 			rsm_cattr32.attr_max_import_segments =
7913 			    adapter->rsm_attr.attr_max_import_segments;
7914 		rsm_cattr32.attr_controller_addr =
7915 		    adapter->rsm_attr.attr_controller_addr;
7916 
7917 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7918 		    "rsmattr_ddi_copyout done\n"));
7919 		if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7920 		    sizeof (rsmka_int_controller_attr32_t), mode)) {
7921 			return (RSMERR_BAD_ADDR);
7922 		}
7923 		else
7924 			return (RSM_SUCCESS);
7925 	}
7926 #endif
7927 	rsm_cattr.attr_direct_access_sizes =
7928 	    adapter->rsm_attr.attr_direct_access_sizes;
7929 	rsm_cattr.attr_atomic_sizes =
7930 	    adapter->rsm_attr.attr_atomic_sizes;
7931 	rsm_cattr.attr_page_size =
7932 	    adapter->rsm_attr.attr_page_size;
7933 	rsm_cattr.attr_max_export_segment_size =
7934 	    adapter->rsm_attr.attr_max_export_segment_size;
7935 	rsm_cattr.attr_tot_export_segment_size =
7936 	    adapter->rsm_attr.attr_tot_export_segment_size;
7937 	rsm_cattr.attr_max_export_segments =
7938 	    adapter->rsm_attr.attr_max_export_segments;
7939 	rsm_cattr.attr_max_import_map_size =
7940 	    adapter->rsm_attr.attr_max_import_map_size;
7941 	rsm_cattr.attr_tot_import_map_size =
7942 	    adapter->rsm_attr.attr_tot_import_map_size;
7943 	rsm_cattr.attr_max_import_segments =
7944 	    adapter->rsm_attr.attr_max_import_segments;
7945 	rsm_cattr.attr_controller_addr =
7946 	    adapter->rsm_attr.attr_controller_addr;
7947 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7948 	    "rsmattr_ddi_copyout done\n"));
7949 	if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7950 	    sizeof (rsmka_int_controller_attr_t), mode)) {
7951 		return (RSMERR_BAD_ADDR);
7952 	}
7953 	else
7954 		return (RSM_SUCCESS);
7955 }
7956 
7957 /*ARGSUSED*/
7958 static int
7959 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7960     int *rvalp)
7961 {
7962 	rsmseg_t *seg;
7963 	rsmresource_t	*res;
7964 	minor_t		rnum;
7965 	rsm_ioctlmsg_t msg = {0};
7966 	int error;
7967 	adapter_t *adapter;
7968 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7969 
7970 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7971 
7972 	if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7973 		error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7974 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7975 		    "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7976 		return (error);
7977 	}
7978 
7979 	/* topology cmd does not use the arg common to other cmds */
7980 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7981 		error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7982 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7983 		    "rsm_ioctl done: %d\n", error));
7984 		return (error);
7985 	}
7986 
7987 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7988 		error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7989 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7990 		    "rsm_ioctl done: %d\n", error));
7991 		return (error);
7992 	}
7993 
7994 	/*
7995 	 * try to load arguments
7996 	 */
7997 	if (cmd != RSM_IOCTL_RING_BELL &&
7998 	    rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
7999 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8000 		    "rsm_ioctl done: EFAULT\n"));
8001 		return (RSMERR_BAD_ADDR);
8002 	}
8003 
8004 	if (cmd == RSM_IOCTL_ATTR) {
8005 		adapter =  rsm_getadapter(&msg, mode);
8006 		if (adapter == NULL) {
8007 			DBG_PRINTF((category, RSM_DEBUG,
8008 			    "rsm_ioctl done: ENODEV\n"));
8009 			return (RSMERR_CTLR_NOT_PRESENT);
8010 		}
8011 		error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8012 		rsmka_release_adapter(adapter);
8013 		DBG_PRINTF((category, RSM_DEBUG,
8014 		    "rsm_ioctl:after copyout %d\n", error));
8015 		return (error);
8016 	}
8017 
8018 	if (cmd == RSM_IOCTL_BAR_INFO) {
8019 		/* Return library off,len of barrier page */
8020 		msg.off = barrier_offset;
8021 		msg.len = (int)barrier_size;
8022 #ifdef _MULTI_DATAMODEL
8023 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8024 			rsm_ioctlmsg32_t msg32;
8025 
8026 			if (msg.len > UINT_MAX)
8027 				msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8028 			else
8029 				msg32.len = (int32_t)msg.len;
8030 			msg32.off = (int32_t)msg.off;
8031 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8032 			    "rsm_ioctl done\n"));
8033 			if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8034 			    sizeof (msg32), mode))
8035 				return (RSMERR_BAD_ADDR);
8036 			else
8037 				return (RSM_SUCCESS);
8038 		}
8039 #endif
8040 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8041 		    "rsm_ioctl done\n"));
8042 		if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8043 		    sizeof (msg), mode))
8044 			return (RSMERR_BAD_ADDR);
8045 		else
8046 			return (RSM_SUCCESS);
8047 	}
8048 
8049 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8050 		/* map the nodeid or hwaddr */
8051 		error = rsmaddr_ioctl(cmd, &msg, mode);
8052 		if (error == RSM_SUCCESS) {
8053 #ifdef _MULTI_DATAMODEL
8054 			if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8055 				rsm_ioctlmsg32_t msg32;
8056 
8057 				msg32.hwaddr = (uint64_t)msg.hwaddr;
8058 				msg32.nodeid = (uint32_t)msg.nodeid;
8059 
8060 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8061 				    "rsm_ioctl done\n"));
8062 				if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8063 				    sizeof (msg32), mode))
8064 					return (RSMERR_BAD_ADDR);
8065 				else
8066 					return (RSM_SUCCESS);
8067 			}
8068 #endif
8069 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8070 			    "rsm_ioctl done\n"));
8071 			if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8072 			    sizeof (msg), mode))
8073 				return (RSMERR_BAD_ADDR);
8074 			else
8075 				return (RSM_SUCCESS);
8076 		}
8077 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8078 		    "rsm_ioctl done: %d\n", error));
8079 		return (error);
8080 	}
8081 
8082 	/* Find resource and look it in read mode */
8083 	rnum = getminor(dev);
8084 	res = rsmresource_lookup(rnum, RSM_NOLOCK);
8085 	ASSERT(res != NULL);
8086 
8087 	/*
8088 	 * Find command group
8089 	 */
8090 	switch (RSM_IOCTL_CMDGRP(cmd)) {
8091 	case RSM_IOCTL_EXPORT_SEG:
8092 		/*
8093 		 * Export list is searched during publish, loopback and
8094 		 * remote lookup call.
8095 		 */
8096 		seg = rsmresource_seg(res, rnum, credp,
8097 		    RSM_RESOURCE_EXPORT_SEGMENT);
8098 		if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8099 			error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8100 			    credp);
8101 		} else { /* export ioctl on an import/barrier resource */
8102 			error = RSMERR_BAD_SEG_HNDL;
8103 		}
8104 		break;
8105 	case RSM_IOCTL_IMPORT_SEG:
8106 		/* Import list is searched during remote unmap call. */
8107 		seg = rsmresource_seg(res, rnum, credp,
8108 		    RSM_RESOURCE_IMPORT_SEGMENT);
8109 		if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8110 			error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8111 			    credp);
8112 		} else  { /* import ioctl on an export/barrier resource */
8113 			error = RSMERR_BAD_SEG_HNDL;
8114 		}
8115 		break;
8116 	case RSM_IOCTL_BAR:
8117 		if (res != RSMRC_RESERVED &&
8118 		    res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8119 			error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8120 			    mode);
8121 		} else { /* invalid res value */
8122 			error = RSMERR_BAD_SEG_HNDL;
8123 		}
8124 		break;
8125 	case RSM_IOCTL_BELL:
8126 		if (res != RSMRC_RESERVED) {
8127 			if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8128 				error = exportbell_ioctl((rsmseg_t *)res, cmd);
8129 			else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8130 				error = importbell_ioctl((rsmseg_t *)res, cmd);
8131 			else /* RSM_RESOURCE_BAR */
8132 				error = RSMERR_BAD_SEG_HNDL;
8133 		} else { /* invalid res value */
8134 			error = RSMERR_BAD_SEG_HNDL;
8135 		}
8136 		break;
8137 	default:
8138 		error = EINVAL;
8139 	}
8140 
8141 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8142 	    error));
8143 	return (error);
8144 }
8145 
8146 
8147 /* **************************** Segment Mapping Operations ********* */
8148 static rsm_mapinfo_t *
8149 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8150     size_t *map_len)
8151 {
8152 	rsm_mapinfo_t	*p;
8153 	/*
8154 	 * Find the correct mapinfo structure to use during the mapping
8155 	 * from the seg->s_mapinfo list.
8156 	 * The seg->s_mapinfo list contains in reverse order the mappings
8157 	 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8158 	 * access the correct entry within this list for the mapping
8159 	 * requested.
8160 	 *
8161 	 * The algorithm for selecting a list entry is as follows:
8162 	 *
8163 	 * When start_offset of an entry <= off we have found the entry
8164 	 * we were looking for. Adjust the dev_offset and map_len (needs
8165 	 * to be PAGESIZE aligned).
8166 	 */
8167 	p = seg->s_mapinfo;
8168 	for (; p; p = p->next) {
8169 		if (p->start_offset <= off) {
8170 			*dev_offset = p->dev_offset + off - p->start_offset;
8171 			*map_len = (len > p->individual_len) ?
8172 			    p->individual_len : ptob(btopr(len));
8173 			return (p);
8174 		}
8175 		p = p->next;
8176 	}
8177 
8178 	return (NULL);
8179 }
8180 
8181 static void
8182 rsm_free_mapinfo(rsm_mapinfo_t  *mapinfo)
8183 {
8184 	rsm_mapinfo_t *p;
8185 
8186 	while (mapinfo != NULL) {
8187 		p = mapinfo;
8188 		mapinfo = mapinfo->next;
8189 		kmem_free(p, sizeof (*p));
8190 	}
8191 }
8192 
8193 static int
8194 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8195     size_t len, void **pvtp)
8196 {
8197 	rsmcookie_t	*p;
8198 	rsmresource_t	*res;
8199 	rsmseg_t	*seg;
8200 	minor_t rnum;
8201 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8202 
8203 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8204 
8205 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8206 	    "rsmmap_map: dhp = %x\n", dhp));
8207 
8208 	flags = flags;
8209 
8210 	rnum = getminor(dev);
8211 	res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8212 	ASSERT(res != NULL);
8213 
8214 	seg = (rsmseg_t *)res;
8215 
8216 	rsmseglock_acquire(seg);
8217 
8218 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8219 
8220 	/*
8221 	 * Allocate structure and add cookie to segment list
8222 	 */
8223 	p = kmem_alloc(sizeof (*p), KM_SLEEP);
8224 
8225 	p->c_dhp = dhp;
8226 	p->c_off = off;
8227 	p->c_len = len;
8228 	p->c_next = seg->s_ckl;
8229 	seg->s_ckl = p;
8230 
8231 	*pvtp = (void *)seg;
8232 
8233 	rsmseglock_release(seg);
8234 
8235 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8236 	return (DDI_SUCCESS);
8237 }
8238 
8239 /*
8240  * Page fault handling is done here. The prerequisite mapping setup
8241  * has been done in rsm_devmap with calls to ddi_devmem_setup or
8242  * ddi_umem_setup
8243  */
8244 static int
8245 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8246     uint_t type, uint_t rw)
8247 {
8248 	int e;
8249 	rsmseg_t *seg = (rsmseg_t *)pvt;
8250 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8251 
8252 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8253 
8254 	rsmseglock_acquire(seg);
8255 
8256 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8257 
8258 	while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8259 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8260 			DBG_PRINTF((category, RSM_DEBUG,
8261 			    "rsmmap_access done: cv_wait INTR"));
8262 			rsmseglock_release(seg);
8263 			return (RSMERR_INTERRUPTED);
8264 		}
8265 	}
8266 
8267 	ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8268 	    seg->s_state == RSM_STATE_ACTIVE);
8269 
8270 	if (seg->s_state == RSM_STATE_DISCONNECT)
8271 		seg->s_flags |= RSM_IMPORT_DUMMY;
8272 
8273 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8274 	    "rsmmap_access: dhp = %x\n", dhp));
8275 
8276 	rsmseglock_release(seg);
8277 
8278 	if (e = devmap_load(dhp, offset, len, type, rw)) {
8279 		DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8280 	}
8281 
8282 
8283 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8284 
8285 	return (e);
8286 }
8287 
8288 static int
8289 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8290 	void **newpvt)
8291 {
8292 	rsmseg_t	*seg = (rsmseg_t *)oldpvt;
8293 	rsmcookie_t	*p, *old;
8294 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8295 
8296 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8297 
8298 	/*
8299 	 * Same as map, create an entry to hold cookie and add it to
8300 	 * connect segment list. The oldpvt is a pointer to segment.
8301 	 * Return segment pointer in newpvt.
8302 	 */
8303 	rsmseglock_acquire(seg);
8304 
8305 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8306 
8307 	/*
8308 	 * Find old cookie
8309 	 */
8310 	for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8311 		if (old->c_dhp == dhp) {
8312 			break;
8313 		}
8314 	}
8315 	if (old == NULL) {
8316 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8317 		    "rsmmap_dup done: EINVAL\n"));
8318 		rsmseglock_release(seg);
8319 		return (EINVAL);
8320 	}
8321 
8322 	p = kmem_alloc(sizeof (*p), KM_SLEEP);
8323 
8324 	p->c_dhp = new_dhp;
8325 	p->c_off = old->c_off;
8326 	p->c_len = old->c_len;
8327 	p->c_next = seg->s_ckl;
8328 	seg->s_ckl = p;
8329 
8330 	*newpvt = (void *)seg;
8331 
8332 	rsmseglock_release(seg);
8333 
8334 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8335 
8336 	return (DDI_SUCCESS);
8337 }
8338 
8339 static void
8340 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8341 	devmap_cookie_t new_dhp1, void **pvtp1,
8342 	devmap_cookie_t new_dhp2, void **pvtp2)
8343 {
8344 	/*
8345 	 * Remove pvtp structure from segment list.
8346 	 */
8347 	rsmseg_t	*seg = (rsmseg_t *)pvtp;
8348 	int freeflag;
8349 
8350 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8351 
8352 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8353 
8354 	off = off; len = len;
8355 	pvtp1 = pvtp1; pvtp2 = pvtp2;
8356 
8357 	rsmseglock_acquire(seg);
8358 
8359 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8360 
8361 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8362 	    "rsmmap_unmap: dhp = %x\n", dhp));
8363 	/*
8364 	 * We can go ahead and remove the dhps even if we are in
8365 	 * the MAPPING state because the dhps being removed here
8366 	 * belong to a different mmap and we are holding the segment
8367 	 * lock.
8368 	 */
8369 	if (new_dhp1 == NULL && new_dhp2 == NULL) {
8370 		/* find and remove dhp handle */
8371 		rsmcookie_t *tmp, **back = &seg->s_ckl;
8372 
8373 		while (*back != NULL) {
8374 			tmp = *back;
8375 			if (tmp->c_dhp == dhp) {
8376 				*back = tmp->c_next;
8377 				kmem_free(tmp, sizeof (*tmp));
8378 				break;
8379 			}
8380 			back = &tmp->c_next;
8381 		}
8382 	} else {
8383 		DBG_PRINTF((category, RSM_DEBUG_LVL2,
8384 		    "rsmmap_unmap:parital unmap"
8385 		    "new_dhp1 %lx, new_dhp2 %lx\n",
8386 		    (size_t)new_dhp1, (size_t)new_dhp2));
8387 	}
8388 
8389 	/*
8390 	 * rsmmap_unmap is called for each mapping cookie on the list.
8391 	 * When the list becomes empty and we are not in the MAPPING
8392 	 * state then unmap in the rsmpi driver.
8393 	 */
8394 	if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8395 		(void) rsm_unmap(seg);
8396 
8397 	if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8398 		freeflag = 1;
8399 	} else {
8400 		freeflag = 0;
8401 	}
8402 
8403 	rsmseglock_release(seg);
8404 
8405 	if (freeflag) {
8406 		/* Free the segment structure */
8407 		rsmseg_free(seg);
8408 	}
8409 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8410 
8411 }
8412 
8413 static struct devmap_callback_ctl rsmmap_ops = {
8414 	DEVMAP_OPS_REV,	/* devmap_ops version number	*/
8415 	rsmmap_map,	/* devmap_ops map routine */
8416 	rsmmap_access,	/* devmap_ops access routine */
8417 	rsmmap_dup,		/* devmap_ops dup routine		*/
8418 	rsmmap_unmap,	/* devmap_ops unmap routine */
8419 };
8420 
8421 static int
8422 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8423     size_t *maplen, uint_t model /*ARGSUSED*/)
8424 {
8425 	struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8426 	int		err;
8427 	uint_t		maxprot;
8428 	minor_t		rnum;
8429 	rsmseg_t	*seg;
8430 	off_t		dev_offset;
8431 	size_t		cur_len;
8432 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8433 
8434 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8435 
8436 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8437 	    "rsm_devmap: off = %lx, len = %lx\n", off, len));
8438 	rnum = getminor(dev);
8439 	seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8440 	ASSERT(seg != NULL);
8441 
8442 	if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8443 		if ((off == barrier_offset) &&
8444 		    (len == barrier_size)) {
8445 
8446 			ASSERT(bar_va != NULL && bar_cookie != NULL);
8447 
8448 			/*
8449 			 * The offset argument in devmap_umem_setup represents
8450 			 * the offset within the kernel memory defined by the
8451 			 * cookie. We use this offset as barrier_offset.
8452 			 */
8453 			err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8454 			    barrier_offset, len, PROT_USER|PROT_READ,
8455 			    DEVMAP_DEFAULTS, 0);
8456 
8457 			if (err != 0) {
8458 				DBG_PRINTF((category, RSM_ERR,
8459 				    "rsm_devmap done: %d\n", err));
8460 				return (RSMERR_MAP_FAILED);
8461 			}
8462 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8463 			    "rsm_devmap done: %d\n", err));
8464 
8465 			*maplen = barrier_size;
8466 
8467 			return (err);
8468 		} else {
8469 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8470 			    "rsm_devmap done: %d\n", err));
8471 			return (RSMERR_MAP_FAILED);
8472 		}
8473 	}
8474 
8475 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8476 	ASSERT(seg->s_state == RSM_STATE_MAPPING);
8477 
8478 	/*
8479 	 * Make sure we still have permission for the map operation.
8480 	 */
8481 	maxprot = PROT_USER;
8482 	if (seg->s_mode & RSM_PERM_READ) {
8483 		maxprot |= PROT_READ;
8484 	}
8485 
8486 	if (seg->s_mode & RSM_PERM_WRITE) {
8487 		maxprot |= PROT_WRITE;
8488 	}
8489 
8490 	/*
8491 	 * For each devmap call, rsmmap_map is called. This maintains driver
8492 	 * private information for the mapping. Thus, if there are multiple
8493 	 * devmap calls there will be multiple rsmmap_map calls and for each
8494 	 * call, the mapping information will be stored.
8495 	 * In case of an error during the processing of the devmap call, error
8496 	 * will be returned. This error return causes the caller of rsm_devmap
8497 	 * to undo all the mappings by calling rsmmap_unmap for each one.
8498 	 * rsmmap_unmap will free up the private information for the requested
8499 	 * mapping.
8500 	 */
8501 	if (seg->s_node != my_nodeid) {
8502 		rsm_mapinfo_t *p;
8503 
8504 		p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8505 		if (p == NULL) {
8506 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8507 			    "rsm_devmap: incorrect mapping info\n"));
8508 			return (RSMERR_MAP_FAILED);
8509 		}
8510 		err = devmap_devmem_setup(dhc, p->dip,
8511 		    callbackops, p->dev_register,
8512 		    dev_offset, cur_len, maxprot,
8513 		    DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8514 
8515 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8516 		    "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8517 		    "off=%lx,len=%lx\n",
8518 		    p->dip, p->dev_register, dev_offset, off, cur_len));
8519 
8520 		if (err != 0) {
8521 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8522 			    "rsm_devmap: devmap_devmem_setup failed %d\n",
8523 			    err));
8524 			return (RSMERR_MAP_FAILED);
8525 		}
8526 		/* cur_len is always an integral multiple pagesize */
8527 		ASSERT((cur_len & (PAGESIZE-1)) == 0);
8528 		*maplen = cur_len;
8529 		return (err);
8530 
8531 	} else {
8532 		err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8533 		    seg->s_cookie, off, len, maxprot,
8534 		    DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8535 		if (err != 0) {
8536 			DBG_PRINTF((category, RSM_DEBUG,
8537 			    "rsm_devmap: devmap_umem_setup failed %d\n",
8538 			    err));
8539 			return (RSMERR_MAP_FAILED);
8540 		}
8541 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8542 		    "rsm_devmap: loopback done\n"));
8543 
8544 		*maplen = ptob(btopr(len));
8545 
8546 		return (err);
8547 	}
8548 }
8549 
8550 /*
8551  * We can use the devmap framework for mapping device memory to user space by
8552  * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8553  * processing calls this entry point and devmap_setup is called within this
8554  * function, which eventually calls rsm_devmap
8555  */
8556 static int
8557 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8558     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8559 {
8560 	int			error = 0;
8561 	int			old_state;
8562 	minor_t			rnum;
8563 	rsmseg_t		*seg, *eseg;
8564 	adapter_t		*adapter;
8565 	rsm_import_share_t	*sharedp;
8566 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8567 
8568 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8569 
8570 	/*
8571 	 * find segment
8572 	 */
8573 	rnum = getminor(dev);
8574 	seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8575 
8576 	if (seg == NULL) {
8577 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8578 		    "rsm_segmap done: invalid segment\n"));
8579 		return (EINVAL);
8580 	}
8581 
8582 	/*
8583 	 * the user is trying to map a resource that has not been
8584 	 * defined yet. The library uses this to map in the
8585 	 * barrier page.
8586 	 */
8587 	if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8588 		rsmseglock_release(seg);
8589 
8590 		/*
8591 		 * The mapping for the barrier page is identified
8592 		 * by the special offset barrier_offset
8593 		 */
8594 
8595 		if (off == (off_t)barrier_offset ||
8596 		    len == (off_t)barrier_size) {
8597 			if (bar_cookie == NULL || bar_va == NULL) {
8598 				DBG_PRINTF((category, RSM_DEBUG,
8599 				    "rsm_segmap: bar cookie/va is NULL\n"));
8600 				return (EINVAL);
8601 			}
8602 
8603 			error = devmap_setup(dev, (offset_t)off, as, addrp,
8604 			    (size_t)len, prot, maxprot, flags,  cred);
8605 
8606 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8607 			    "rsm_segmap done: %d\n", error));
8608 			return (error);
8609 		} else {
8610 			DBG_PRINTF((category, RSM_DEBUG,
8611 			    "rsm_segmap: bad offset/length\n"));
8612 			return (EINVAL);
8613 		}
8614 	}
8615 
8616 	/* Make sure you can only map imported segments */
8617 	if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8618 		rsmseglock_release(seg);
8619 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8620 		    "rsm_segmap done: not an import segment\n"));
8621 		return (EINVAL);
8622 	}
8623 	/* check means library is broken */
8624 	ASSERT(seg->s_hdr.rsmrc_num == rnum);
8625 
8626 	/* wait for the segment to become unquiesced */
8627 	while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8628 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8629 			rsmseglock_release(seg);
8630 			DBG_PRINTF((category, RSM_DEBUG,
8631 			    "rsm_segmap done: cv_wait INTR"));
8632 			return (ENODEV);
8633 		}
8634 	}
8635 
8636 	/* wait until segment leaves the mapping state */
8637 	while (seg->s_state == RSM_STATE_MAPPING)
8638 		cv_wait(&seg->s_cv, &seg->s_lock);
8639 
8640 	/*
8641 	 * we allow multiple maps of the same segment in the KA
8642 	 * and it works because we do an rsmpi map of the whole
8643 	 * segment during the first map and all the device mapping
8644 	 * information needed in rsm_devmap is in the mapinfo list.
8645 	 */
8646 	if ((seg->s_state != RSM_STATE_CONNECT) &&
8647 	    (seg->s_state != RSM_STATE_ACTIVE)) {
8648 		rsmseglock_release(seg);
8649 		DBG_PRINTF((category, RSM_DEBUG,
8650 		    "rsm_segmap done: segment not connected\n"));
8651 		return (ENODEV);
8652 	}
8653 
8654 	/*
8655 	 * Make sure we are not mapping a larger segment than what's
8656 	 * exported
8657 	 */
8658 	if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8659 		rsmseglock_release(seg);
8660 		DBG_PRINTF((category, RSM_DEBUG,
8661 		    "rsm_segmap done: off+len>seg size\n"));
8662 		return (ENXIO);
8663 	}
8664 
8665 	/*
8666 	 * Make sure we still have permission for the map operation.
8667 	 */
8668 	maxprot = PROT_USER;
8669 	if (seg->s_mode & RSM_PERM_READ) {
8670 		maxprot |= PROT_READ;
8671 	}
8672 
8673 	if (seg->s_mode & RSM_PERM_WRITE) {
8674 		maxprot |= PROT_WRITE;
8675 	}
8676 
8677 	if ((prot & maxprot) != prot) {
8678 		/* No permission */
8679 		rsmseglock_release(seg);
8680 		DBG_PRINTF((category, RSM_DEBUG,
8681 		    "rsm_segmap done: no permission\n"));
8682 		return (EACCES);
8683 	}
8684 
8685 	old_state = seg->s_state;
8686 
8687 	ASSERT(seg->s_share != NULL);
8688 
8689 	rsmsharelock_acquire(seg);
8690 
8691 	sharedp = seg->s_share;
8692 
8693 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8694 	    "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8695 
8696 	if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8697 	    (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8698 		rsmsharelock_release(seg);
8699 		rsmseglock_release(seg);
8700 		DBG_PRINTF((category, RSM_DEBUG,
8701 		    "rsm_segmap done:RSMSI_STATE %d invalid\n",
8702 		    sharedp->rsmsi_state));
8703 		return (ENODEV);
8704 	}
8705 
8706 	/*
8707 	 * Do the map - since we want importers to share mappings
8708 	 * we do the rsmpi map for the whole segment
8709 	 */
8710 	if (seg->s_node != my_nodeid) {
8711 		uint_t dev_register;
8712 		off_t dev_offset;
8713 		dev_info_t *dip;
8714 		size_t tmp_len;
8715 		size_t total_length_mapped = 0;
8716 		size_t length_to_map = seg->s_len;
8717 		off_t tmp_off = 0;
8718 		rsm_mapinfo_t *p;
8719 
8720 		/*
8721 		 * length_to_map = seg->s_len is always an integral
8722 		 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8723 		 * list is a multiple of PAGESIZE - RSMPI map ensures this
8724 		 */
8725 
8726 		adapter = seg->s_adapter;
8727 		ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8728 		    sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8729 
8730 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8731 			error = 0;
8732 			/* map the whole segment */
8733 			while (total_length_mapped < seg->s_len) {
8734 				tmp_len = 0;
8735 
8736 				error = adapter->rsmpi_ops->rsm_map(
8737 				    seg->s_handle.in, tmp_off,
8738 				    length_to_map, &tmp_len,
8739 				    &dip, &dev_register, &dev_offset,
8740 				    NULL, NULL);
8741 
8742 				if (error != 0)
8743 					break;
8744 
8745 				/*
8746 				 * Store the mapping info obtained from rsm_map
8747 				 */
8748 				p = kmem_alloc(sizeof (*p), KM_SLEEP);
8749 				p->dev_register = dev_register;
8750 				p->dev_offset = dev_offset;
8751 				p->dip = dip;
8752 				p->individual_len = tmp_len;
8753 				p->start_offset = tmp_off;
8754 				p->next = sharedp->rsmsi_mapinfo;
8755 				sharedp->rsmsi_mapinfo = p;
8756 
8757 				total_length_mapped += tmp_len;
8758 				length_to_map -= tmp_len;
8759 				tmp_off += tmp_len;
8760 			}
8761 			seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8762 
8763 			if (error != RSM_SUCCESS) {
8764 				/* Check if this is the the first rsm_map */
8765 				if (sharedp->rsmsi_mapinfo != NULL) {
8766 					/*
8767 					 * A single rsm_unmap undoes
8768 					 * multiple rsm_maps.
8769 					 */
8770 					(void) seg->s_adapter->rsmpi_ops->
8771 					    rsm_unmap(sharedp->rsmsi_handle);
8772 					rsm_free_mapinfo(sharedp->
8773 					    rsmsi_mapinfo);
8774 				}
8775 				sharedp->rsmsi_mapinfo = NULL;
8776 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8777 				rsmsharelock_release(seg);
8778 				rsmseglock_release(seg);
8779 				DBG_PRINTF((category, RSM_DEBUG,
8780 				    "rsm_segmap done: rsmpi map err %d\n",
8781 				    error));
8782 				ASSERT(error != RSMERR_BAD_LENGTH &&
8783 				    error != RSMERR_BAD_MEM_ALIGNMENT &&
8784 				    error != RSMERR_BAD_SEG_HNDL);
8785 				if (error == RSMERR_UNSUPPORTED_OPERATION)
8786 					return (ENOTSUP);
8787 				else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8788 					return (EAGAIN);
8789 				else if (error == RSMERR_CONN_ABORTED)
8790 					return (ENODEV);
8791 				else
8792 					return (error);
8793 			} else {
8794 				sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8795 			}
8796 		} else {
8797 			seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8798 		}
8799 
8800 		sharedp->rsmsi_mapcnt++;
8801 
8802 		rsmsharelock_release(seg);
8803 
8804 		/* move to an intermediate mapping state */
8805 		seg->s_state = RSM_STATE_MAPPING;
8806 		rsmseglock_release(seg);
8807 
8808 		error = devmap_setup(dev, (offset_t)off, as, addrp,
8809 		    len, prot, maxprot, flags, cred);
8810 
8811 		rsmseglock_acquire(seg);
8812 		ASSERT(seg->s_state == RSM_STATE_MAPPING);
8813 
8814 		if (error == DDI_SUCCESS) {
8815 			seg->s_state = RSM_STATE_ACTIVE;
8816 		} else {
8817 			rsmsharelock_acquire(seg);
8818 
8819 			ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8820 
8821 			sharedp->rsmsi_mapcnt--;
8822 			if (sharedp->rsmsi_mapcnt == 0) {
8823 				/* unmap the shared RSMPI mapping */
8824 				ASSERT(sharedp->rsmsi_handle != NULL);
8825 				(void) adapter->rsmpi_ops->
8826 				    rsm_unmap(sharedp->rsmsi_handle);
8827 				rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8828 				sharedp->rsmsi_mapinfo = NULL;
8829 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8830 			}
8831 
8832 			rsmsharelock_release(seg);
8833 			seg->s_state = old_state;
8834 			DBG_PRINTF((category, RSM_ERR,
8835 			    "rsm: devmap_setup failed %d\n", error));
8836 		}
8837 		cv_broadcast(&seg->s_cv);
8838 		rsmseglock_release(seg);
8839 		DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8840 		    error));
8841 		return (error);
8842 	} else {
8843 		/*
8844 		 * For loopback, the export segment mapping cookie (s_cookie)
8845 		 * is also used as the s_cookie value for its import segments
8846 		 * during mapping.
8847 		 * Note that reference counting for s_cookie of the export
8848 		 * segment is not required due to the following:
8849 		 * We never have a case of the export segment being destroyed,
8850 		 * leaving the import segments with a stale value for the
8851 		 * s_cookie field, since a force disconnect is done prior to a
8852 		 * destroy of an export segment. The force disconnect causes
8853 		 * the s_cookie value to be reset to NULL. Also for the
8854 		 * rsm_rebind operation, we change the s_cookie value of the
8855 		 * export segment as well as of all its local (loopback)
8856 		 * importers.
8857 		 */
8858 		DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8859 
8860 		rsmsharelock_release(seg);
8861 		/*
8862 		 * In order to maintain the lock ordering between the export
8863 		 * and import segment locks, we need to acquire the export
8864 		 * segment lock first and only then acquire the import
8865 		 * segment lock.
8866 		 * The above is necessary to avoid any deadlock scenarios
8867 		 * with rsm_rebind which also acquires both the export
8868 		 * and import segment locks in the above mentioned order.
8869 		 * Based on code inspection, there seem to be no other
8870 		 * situations in which both the export and import segment
8871 		 * locks are acquired either in the same or opposite order
8872 		 * as mentioned above.
8873 		 * Thus in order to conform to the above lock order, we
8874 		 * need to change the state of the import segment to
8875 		 * RSM_STATE_MAPPING, release the lock. Once this is done we
8876 		 * can now safely acquire the export segment lock first
8877 		 * followed by the import segment lock which is as per
8878 		 * the lock order mentioned above.
8879 		 */
8880 		/* move to an intermediate mapping state */
8881 		seg->s_state = RSM_STATE_MAPPING;
8882 		rsmseglock_release(seg);
8883 
8884 		eseg = rsmexport_lookup(seg->s_key);
8885 
8886 		if (eseg == NULL) {
8887 			rsmseglock_acquire(seg);
8888 			/*
8889 			 * Revert to old_state and signal any waiters
8890 			 * The shared state is not changed
8891 			 */
8892 
8893 			seg->s_state = old_state;
8894 			cv_broadcast(&seg->s_cv);
8895 			rsmseglock_release(seg);
8896 			DBG_PRINTF((category, RSM_DEBUG,
8897 			    "rsm_segmap done: key %d not found\n", seg->s_key));
8898 			return (ENODEV);
8899 		}
8900 
8901 		rsmsharelock_acquire(seg);
8902 		ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8903 		    sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8904 
8905 		sharedp->rsmsi_mapcnt++;
8906 		sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8907 		rsmsharelock_release(seg);
8908 
8909 		ASSERT(eseg->s_cookie != NULL);
8910 
8911 		/*
8912 		 * It is not required or necessary to acquire the import
8913 		 * segment lock here to change the value of s_cookie since
8914 		 * no one will touch the import segment as long as it is
8915 		 * in the RSM_STATE_MAPPING state.
8916 		 */
8917 		seg->s_cookie = eseg->s_cookie;
8918 
8919 		rsmseglock_release(eseg);
8920 
8921 		error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8922 		    prot, maxprot, flags, cred);
8923 
8924 		rsmseglock_acquire(seg);
8925 		ASSERT(seg->s_state == RSM_STATE_MAPPING);
8926 		if (error == 0) {
8927 			seg->s_state = RSM_STATE_ACTIVE;
8928 		} else {
8929 			rsmsharelock_acquire(seg);
8930 
8931 			ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8932 
8933 			sharedp->rsmsi_mapcnt--;
8934 			if (sharedp->rsmsi_mapcnt == 0) {
8935 				sharedp->rsmsi_mapinfo = NULL;
8936 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8937 			}
8938 			rsmsharelock_release(seg);
8939 			seg->s_state = old_state;
8940 			seg->s_cookie = NULL;
8941 		}
8942 		cv_broadcast(&seg->s_cv);
8943 		rsmseglock_release(seg);
8944 		DBG_PRINTF((category, RSM_DEBUG_LVL2,
8945 		    "rsm_segmap done: %d\n", error));
8946 		return (error);
8947 	}
8948 }
8949 
8950 int
8951 rsmka_null_seg_create(
8952     rsm_controller_handle_t argcp,
8953     rsm_memseg_export_handle_t *handle,
8954     size_t size,
8955     uint_t flags,
8956     rsm_memory_local_t *memory,
8957     rsm_resource_callback_t callback,
8958     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
8959 {
8960 	return (RSM_SUCCESS);
8961 }
8962 
8963 
8964 int
8965 rsmka_null_seg_destroy(
8966     rsm_memseg_export_handle_t argmemseg	/*ARGSUSED*/)
8967 {
8968 	return (RSM_SUCCESS);
8969 }
8970 
8971 
8972 int
8973 rsmka_null_bind(
8974     rsm_memseg_export_handle_t argmemseg,
8975     off_t offset,
8976     rsm_memory_local_t *argmemory,
8977     rsm_resource_callback_t callback,
8978     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
8979 {
8980 	return (RSM_SUCCESS);
8981 }
8982 
8983 
8984 int
8985 rsmka_null_unbind(
8986     rsm_memseg_export_handle_t argmemseg,
8987     off_t offset,
8988     size_t length	/*ARGSUSED*/)
8989 {
8990 	return (DDI_SUCCESS);
8991 }
8992 
8993 int
8994 rsmka_null_rebind(
8995     rsm_memseg_export_handle_t argmemseg,
8996     off_t offset,
8997     rsm_memory_local_t *memory,
8998     rsm_resource_callback_t callback,
8999     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9000 {
9001 	return (RSM_SUCCESS);
9002 }
9003 
9004 int
9005 rsmka_null_publish(
9006     rsm_memseg_export_handle_t argmemseg,
9007     rsm_access_entry_t access_list[],
9008     uint_t access_list_length,
9009     rsm_memseg_id_t segment_id,
9010     rsm_resource_callback_t callback,
9011     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9012 {
9013 	return (RSM_SUCCESS);
9014 }
9015 
9016 
9017 int
9018 rsmka_null_republish(
9019     rsm_memseg_export_handle_t memseg,
9020     rsm_access_entry_t access_list[],
9021     uint_t access_list_length,
9022     rsm_resource_callback_t callback,
9023     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9024 {
9025 	return (RSM_SUCCESS);
9026 }
9027 
9028 int
9029 rsmka_null_unpublish(
9030     rsm_memseg_export_handle_t argmemseg	/*ARGSUSED*/)
9031 {
9032 	return (RSM_SUCCESS);
9033 }
9034 
9035 
9036 void
9037 rsmka_init_loopback()
9038 {
9039 	rsm_ops_t	*ops = &null_rsmpi_ops;
9040 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9041 
9042 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9043 	    "rsmka_init_loopback enter\n"));
9044 
9045 	/* initialize null ops vector */
9046 	ops->rsm_seg_create = rsmka_null_seg_create;
9047 	ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9048 	ops->rsm_bind = rsmka_null_bind;
9049 	ops->rsm_unbind = rsmka_null_unbind;
9050 	ops->rsm_rebind = rsmka_null_rebind;
9051 	ops->rsm_publish = rsmka_null_publish;
9052 	ops->rsm_unpublish = rsmka_null_unpublish;
9053 	ops->rsm_republish = rsmka_null_republish;
9054 
9055 	/* initialize attributes for loopback adapter */
9056 	loopback_attr.attr_name = loopback_str;
9057 	loopback_attr.attr_page_size = 0x8; /* 8K */
9058 
9059 	/* initialize loopback adapter */
9060 	loopback_adapter.rsm_attr = loopback_attr;
9061 	loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9062 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9063 	    "rsmka_init_loopback done\n"));
9064 }
9065 
9066 /* ************** DR functions ********************************** */
9067 static void
9068 rsm_quiesce_exp_seg(rsmresource_t *resp)
9069 {
9070 	int		recheck_state;
9071 	rsmseg_t	*segp = (rsmseg_t *)resp;
9072 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9073 	DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9074 
9075 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9076 	    "%s enter: key=%u\n", function, segp->s_key));
9077 
9078 	rsmseglock_acquire(segp);
9079 	do {
9080 		recheck_state = 0;
9081 		if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9082 		    (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9083 		    (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9084 		    (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9085 			rsmseglock_release(segp);
9086 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9087 			    "%s done:state =%d\n", function,
9088 			    segp->s_state));
9089 			return;
9090 		}
9091 
9092 		if (segp->s_state == RSM_STATE_NEW) {
9093 			segp->s_state = RSM_STATE_NEW_QUIESCED;
9094 			rsmseglock_release(segp);
9095 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9096 			    "%s done:state =%d\n", function,
9097 			    segp->s_state));
9098 			return;
9099 		}
9100 
9101 		if (segp->s_state == RSM_STATE_BIND) {
9102 			/* unbind */
9103 			(void) rsm_unbind_pages(segp);
9104 			segp->s_state = RSM_STATE_BIND_QUIESCED;
9105 			rsmseglock_release(segp);
9106 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9107 			    "%s done:state =%d\n", function,
9108 			    segp->s_state));
9109 			return;
9110 		}
9111 
9112 		if (segp->s_state == RSM_STATE_EXPORT) {
9113 			/*
9114 			 * wait for putv/getv to complete if the segp is
9115 			 * a local memory handle
9116 			 */
9117 			while ((segp->s_state == RSM_STATE_EXPORT) &&
9118 			    (segp->s_rdmacnt != 0)) {
9119 				cv_wait(&segp->s_cv, &segp->s_lock);
9120 			}
9121 
9122 			if (segp->s_state != RSM_STATE_EXPORT) {
9123 				/*
9124 				 * state changed need to see what it
9125 				 * should be changed to.
9126 				 */
9127 				recheck_state = 1;
9128 				continue;
9129 			}
9130 
9131 			segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9132 			rsmseglock_release(segp);
9133 			/*
9134 			 * send SUSPEND messages - currently it will be
9135 			 * done at the end
9136 			 */
9137 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9138 			    "%s done:state =%d\n", function,
9139 			    segp->s_state));
9140 			return;
9141 		}
9142 	} while (recheck_state);
9143 
9144 	rsmseglock_release(segp);
9145 }
9146 
9147 static void
9148 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9149 {
9150 	int			ret;
9151 	rsmseg_t		*segp = (rsmseg_t *)resp;
9152 	rsmapi_access_entry_t	*acl;
9153 	rsm_access_entry_t	*rsmpi_acl;
9154 	int			acl_len;
9155 	int			create_flags = 0;
9156 	struct buf		*xbuf;
9157 	rsm_memory_local_t	mem;
9158 	adapter_t		*adapter;
9159 	dev_t			sdev = 0;
9160 	rsm_resource_callback_t callback_flag;
9161 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9162 	DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9163 
9164 	rsmseglock_acquire(segp);
9165 
9166 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9167 	    "%s enter: key=%u, state=%d\n", function, segp->s_key,
9168 	    segp->s_state));
9169 
9170 	if ((segp->s_state == RSM_STATE_NEW) ||
9171 	    (segp->s_state == RSM_STATE_BIND) ||
9172 	    (segp->s_state == RSM_STATE_EXPORT)) {
9173 		rsmseglock_release(segp);
9174 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9175 		    function, segp->s_state));
9176 		return;
9177 	}
9178 
9179 	if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9180 		segp->s_state = RSM_STATE_NEW;
9181 		cv_broadcast(&segp->s_cv);
9182 		rsmseglock_release(segp);
9183 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9184 		    function, segp->s_state));
9185 		return;
9186 	}
9187 
9188 	if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9189 		/* bind the segment */
9190 		ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9191 		    segp->s_len, segp->s_proc);
9192 		if (ret == RSM_SUCCESS) { /* bind successful */
9193 			segp->s_state = RSM_STATE_BIND;
9194 		} else { /* bind failed - resource unavailable */
9195 			segp->s_state = RSM_STATE_NEW;
9196 		}
9197 		cv_broadcast(&segp->s_cv);
9198 		rsmseglock_release(segp);
9199 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9200 		    "%s done: bind_qscd bind = %d\n", function, ret));
9201 		return;
9202 	}
9203 
9204 	while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9205 		/* wait for the segment to move to EXPORT_QUIESCED state */
9206 		cv_wait(&segp->s_cv, &segp->s_lock);
9207 	}
9208 
9209 	if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9210 		/* bind the segment */
9211 		ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9212 		    segp->s_len, segp->s_proc);
9213 
9214 		if (ret != RSM_SUCCESS) {
9215 			/* bind failed - resource unavailable */
9216 			acl_len = segp->s_acl_len;
9217 			acl = segp->s_acl;
9218 			rsmpi_acl = segp->s_acl_in;
9219 			segp->s_acl_len = 0;
9220 			segp->s_acl = NULL;
9221 			segp->s_acl_in = NULL;
9222 			rsmseglock_release(segp);
9223 
9224 			rsmexport_rm(segp);
9225 			rsmacl_free(acl, acl_len);
9226 			rsmpiacl_free(rsmpi_acl, acl_len);
9227 
9228 			rsmseglock_acquire(segp);
9229 			segp->s_state = RSM_STATE_NEW;
9230 			cv_broadcast(&segp->s_cv);
9231 			rsmseglock_release(segp);
9232 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9233 			    "%s done: exp_qscd bind failed = %d\n",
9234 			    function, ret));
9235 			return;
9236 		}
9237 		/*
9238 		 * publish the segment
9239 		 * if  successful
9240 		 *   segp->s_state = RSM_STATE_EXPORT;
9241 		 * else failed
9242 		 *   segp->s_state = RSM_STATE_BIND;
9243 		 */
9244 
9245 		/* check whether it is a local_memory_handle */
9246 		if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9247 			if ((segp->s_acl[0].ae_node == my_nodeid) &&
9248 			    (segp->s_acl[0].ae_permission == 0)) {
9249 				segp->s_state = RSM_STATE_EXPORT;
9250 				cv_broadcast(&segp->s_cv);
9251 				rsmseglock_release(segp);
9252 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9253 				    "%s done:exp_qscd\n", function));
9254 				return;
9255 			}
9256 		}
9257 		xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9258 		    sdev, 0, NULL, DDI_UMEM_SLEEP);
9259 		ASSERT(xbuf != NULL);
9260 
9261 		mem.ms_type = RSM_MEM_BUF;
9262 		mem.ms_bp = xbuf;
9263 
9264 		adapter = segp->s_adapter;
9265 
9266 		if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9267 			create_flags = RSM_ALLOW_UNBIND_REBIND;
9268 		}
9269 
9270 		if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9271 			callback_flag  = RSM_RESOURCE_DONTWAIT;
9272 		} else {
9273 			callback_flag  = RSM_RESOURCE_SLEEP;
9274 		}
9275 
9276 		ret = adapter->rsmpi_ops->rsm_seg_create(
9277 		    adapter->rsmpi_handle, &segp->s_handle.out,
9278 		    segp->s_len, create_flags, &mem,
9279 		    callback_flag, NULL);
9280 
9281 		if (ret != RSM_SUCCESS) {
9282 			acl_len = segp->s_acl_len;
9283 			acl = segp->s_acl;
9284 			rsmpi_acl = segp->s_acl_in;
9285 			segp->s_acl_len = 0;
9286 			segp->s_acl = NULL;
9287 			segp->s_acl_in = NULL;
9288 			rsmseglock_release(segp);
9289 
9290 			rsmexport_rm(segp);
9291 			rsmacl_free(acl, acl_len);
9292 			rsmpiacl_free(rsmpi_acl, acl_len);
9293 
9294 			rsmseglock_acquire(segp);
9295 			segp->s_state = RSM_STATE_BIND;
9296 			cv_broadcast(&segp->s_cv);
9297 			rsmseglock_release(segp);
9298 			DBG_PRINTF((category, RSM_ERR,
9299 			    "%s done: exp_qscd create failed = %d\n",
9300 			    function, ret));
9301 			return;
9302 		}
9303 
9304 		ret = adapter->rsmpi_ops->rsm_publish(
9305 		    segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9306 		    segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9307 
9308 		if (ret != RSM_SUCCESS) {
9309 			acl_len = segp->s_acl_len;
9310 			acl = segp->s_acl;
9311 			rsmpi_acl = segp->s_acl_in;
9312 			segp->s_acl_len = 0;
9313 			segp->s_acl = NULL;
9314 			segp->s_acl_in = NULL;
9315 			adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9316 			rsmseglock_release(segp);
9317 
9318 			rsmexport_rm(segp);
9319 			rsmacl_free(acl, acl_len);
9320 			rsmpiacl_free(rsmpi_acl, acl_len);
9321 
9322 			rsmseglock_acquire(segp);
9323 			segp->s_state = RSM_STATE_BIND;
9324 			cv_broadcast(&segp->s_cv);
9325 			rsmseglock_release(segp);
9326 			DBG_PRINTF((category, RSM_ERR,
9327 			    "%s done: exp_qscd publish failed = %d\n",
9328 			    function, ret));
9329 			return;
9330 		}
9331 
9332 		segp->s_state = RSM_STATE_EXPORT;
9333 		cv_broadcast(&segp->s_cv);
9334 		rsmseglock_release(segp);
9335 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9336 		    function));
9337 		return;
9338 	}
9339 
9340 	rsmseglock_release(segp);
9341 
9342 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9343 }
9344 
9345 static void
9346 rsm_quiesce_imp_seg(rsmresource_t *resp)
9347 {
9348 	rsmseg_t	*segp = (rsmseg_t *)resp;
9349 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9350 	DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9351 
9352 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9353 	    "%s enter: key=%u\n", function, segp->s_key));
9354 
9355 	rsmseglock_acquire(segp);
9356 	segp->s_flags |= RSM_DR_INPROGRESS;
9357 
9358 	while (segp->s_rdmacnt != 0) {
9359 		/* wait for the RDMA to complete */
9360 		cv_wait(&segp->s_cv, &segp->s_lock);
9361 	}
9362 
9363 	rsmseglock_release(segp);
9364 
9365 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9366 
9367 }
9368 
9369 static void
9370 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9371 {
9372 	rsmseg_t	*segp = (rsmseg_t *)resp;
9373 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9374 	DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9375 
9376 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9377 	    "%s enter: key=%u\n", function, segp->s_key));
9378 
9379 	rsmseglock_acquire(segp);
9380 
9381 	segp->s_flags &= ~RSM_DR_INPROGRESS;
9382 	/* wake up any waiting putv/getv ops */
9383 	cv_broadcast(&segp->s_cv);
9384 
9385 	rsmseglock_release(segp);
9386 
9387 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9388 
9389 
9390 }
9391 
9392 static void
9393 rsm_process_exp_seg(rsmresource_t *resp, int event)
9394 {
9395 	if (event == RSM_DR_QUIESCE)
9396 		rsm_quiesce_exp_seg(resp);
9397 	else /* UNQUIESCE */
9398 		rsm_unquiesce_exp_seg(resp);
9399 }
9400 
9401 static void
9402 rsm_process_imp_seg(rsmresource_t *resp, int event)
9403 {
9404 	if (event == RSM_DR_QUIESCE)
9405 		rsm_quiesce_imp_seg(resp);
9406 	else /* UNQUIESCE */
9407 		rsm_unquiesce_imp_seg(resp);
9408 }
9409 
9410 static void
9411 rsm_dr_process_local_segments(int event)
9412 {
9413 
9414 	int i, j;
9415 	rsmresource_blk_t	*blk;
9416 	rsmresource_t		*p;
9417 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9418 
9419 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9420 	    "rsm_dr_process_local_segments enter\n"));
9421 
9422 	/* iterate through the resource structure */
9423 
9424 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9425 
9426 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9427 		blk = rsm_resource.rsmrc_root[i];
9428 		if (blk != NULL) {
9429 			for (j = 0; j < RSMRC_BLKSZ; j++) {
9430 				p = blk->rsmrcblk_blks[j];
9431 				if ((p != NULL) && (p != RSMRC_RESERVED)) {
9432 					/* valid resource */
9433 					if (p->rsmrc_type ==
9434 					    RSM_RESOURCE_EXPORT_SEGMENT)
9435 						rsm_process_exp_seg(p, event);
9436 					else if (p->rsmrc_type ==
9437 					    RSM_RESOURCE_IMPORT_SEGMENT)
9438 						rsm_process_imp_seg(p, event);
9439 				}
9440 			}
9441 		}
9442 	}
9443 
9444 	rw_exit(&rsm_resource.rsmrc_lock);
9445 
9446 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9447 	    "rsm_dr_process_local_segments done\n"));
9448 }
9449 
9450 /* *************** DR callback functions ************ */
9451 static void
9452 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9453 {
9454 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9455 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9456 	    "rsm_dr_callback_post_add is a no-op\n"));
9457 	/* Noop */
9458 }
9459 
9460 static int
9461 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9462 {
9463 	int	recheck_state = 0;
9464 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9465 
9466 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9467 	    "rsm_dr_callback_pre_del enter\n"));
9468 
9469 	mutex_enter(&rsm_drv_data.drv_lock);
9470 
9471 	do {
9472 		recheck_state = 0;
9473 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9474 		    "rsm_dr_callback_pre_del:state=%d\n",
9475 		    rsm_drv_data.drv_state));
9476 
9477 		switch (rsm_drv_data.drv_state) {
9478 		case RSM_DRV_NEW:
9479 			/*
9480 			 * The state should usually never be RSM_DRV_NEW
9481 			 * since in this state the callbacks have not yet
9482 			 * been registered. So, ASSERT.
9483 			 */
9484 			ASSERT(0);
9485 			return (0);
9486 		case RSM_DRV_REG_PROCESSING:
9487 			/*
9488 			 * The driver is in the process of registering
9489 			 * with the DR framework. So, wait till the
9490 			 * registration process is complete.
9491 			 */
9492 			recheck_state = 1;
9493 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9494 			break;
9495 		case RSM_DRV_UNREG_PROCESSING:
9496 			/*
9497 			 * If the state is RSM_DRV_UNREG_PROCESSING, the
9498 			 * module is in the process of detaching and
9499 			 * unregistering the callbacks from the DR
9500 			 * framework. So, simply return.
9501 			 */
9502 			mutex_exit(&rsm_drv_data.drv_lock);
9503 			DBG_PRINTF((category, RSM_DEBUG,
9504 			    "rsm_dr_callback_pre_del:"
9505 			    "pre-del on NEW/UNREG\n"));
9506 			return (0);
9507 		case RSM_DRV_OK:
9508 			rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9509 			break;
9510 		case RSM_DRV_PREDEL_STARTED:
9511 			/* FALLTHRU */
9512 		case RSM_DRV_PREDEL_COMPLETED:
9513 			/* FALLTHRU */
9514 		case RSM_DRV_POSTDEL_IN_PROGRESS:
9515 			recheck_state = 1;
9516 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9517 			break;
9518 		case RSM_DRV_DR_IN_PROGRESS:
9519 			rsm_drv_data.drv_memdel_cnt++;
9520 			mutex_exit(&rsm_drv_data.drv_lock);
9521 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9522 			    "rsm_dr_callback_pre_del done\n"));
9523 			return (0);
9524 			/* break; */
9525 		default:
9526 			ASSERT(0);
9527 			break;
9528 		}
9529 
9530 	} while (recheck_state);
9531 
9532 	rsm_drv_data.drv_memdel_cnt++;
9533 
9534 	mutex_exit(&rsm_drv_data.drv_lock);
9535 
9536 	/* Do all the quiescing stuff here */
9537 	DBG_PRINTF((category, RSM_DEBUG,
9538 	    "rsm_dr_callback_pre_del: quiesce things now\n"));
9539 
9540 	rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9541 
9542 	/*
9543 	 * now that all local segments have been quiesced lets inform
9544 	 * the importers
9545 	 */
9546 	rsm_send_suspend();
9547 
9548 	/*
9549 	 * In response to the suspend message the remote node(s) will process
9550 	 * the segments and send a suspend_complete message. Till all
9551 	 * the nodes send the suspend_complete message we wait in the
9552 	 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9553 	 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9554 	 */
9555 	mutex_enter(&rsm_drv_data.drv_lock);
9556 
9557 	while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9558 		cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9559 	}
9560 
9561 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9562 
9563 	rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9564 	cv_broadcast(&rsm_drv_data.drv_cv);
9565 
9566 	mutex_exit(&rsm_drv_data.drv_lock);
9567 
9568 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9569 	    "rsm_dr_callback_pre_del done\n"));
9570 
9571 	return (0);
9572 }
9573 
9574 static void
9575 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9576 {
9577 	int	recheck_state = 0;
9578 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9579 
9580 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9581 	    "rsm_dr_callback_post_del enter\n"));
9582 
9583 	mutex_enter(&rsm_drv_data.drv_lock);
9584 
9585 	do {
9586 		recheck_state = 0;
9587 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9588 		    "rsm_dr_callback_post_del:state=%d\n",
9589 		    rsm_drv_data.drv_state));
9590 
9591 		switch (rsm_drv_data.drv_state) {
9592 		case RSM_DRV_NEW:
9593 			/*
9594 			 * The driver state cannot not be RSM_DRV_NEW
9595 			 * since in this state the callbacks have not
9596 			 * yet been registered.
9597 			 */
9598 			ASSERT(0);
9599 			return;
9600 		case RSM_DRV_REG_PROCESSING:
9601 			/*
9602 			 * The driver is in the process of registering with
9603 			 * the DR framework. Wait till the registration is
9604 			 * complete.
9605 			 */
9606 			recheck_state = 1;
9607 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9608 			break;
9609 		case RSM_DRV_UNREG_PROCESSING:
9610 			/*
9611 			 * RSM_DRV_UNREG_PROCESSING state means the module
9612 			 * is detaching and unregistering the callbacks
9613 			 * from the DR framework. So simply return.
9614 			 */
9615 			/* FALLTHRU */
9616 		case RSM_DRV_OK:
9617 			/*
9618 			 * RSM_DRV_OK means we missed the pre-del
9619 			 * corresponding to this post-del coz we had not
9620 			 * registered yet, so simply return.
9621 			 */
9622 			mutex_exit(&rsm_drv_data.drv_lock);
9623 			DBG_PRINTF((category, RSM_DEBUG,
9624 			    "rsm_dr_callback_post_del:"
9625 			    "post-del on OK/UNREG\n"));
9626 			return;
9627 			/* break; */
9628 		case RSM_DRV_PREDEL_STARTED:
9629 			/* FALLTHRU */
9630 		case RSM_DRV_PREDEL_COMPLETED:
9631 			/* FALLTHRU */
9632 		case RSM_DRV_POSTDEL_IN_PROGRESS:
9633 			recheck_state = 1;
9634 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9635 			break;
9636 		case RSM_DRV_DR_IN_PROGRESS:
9637 			rsm_drv_data.drv_memdel_cnt--;
9638 			if (rsm_drv_data.drv_memdel_cnt > 0) {
9639 				mutex_exit(&rsm_drv_data.drv_lock);
9640 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9641 				    "rsm_dr_callback_post_del done:\n"));
9642 				return;
9643 			}
9644 			rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9645 			break;
9646 		default:
9647 			ASSERT(0);
9648 			return;
9649 			/* break; */
9650 		}
9651 	} while (recheck_state);
9652 
9653 	mutex_exit(&rsm_drv_data.drv_lock);
9654 
9655 	/* Do all the unquiescing stuff here */
9656 	DBG_PRINTF((category, RSM_DEBUG,
9657 	    "rsm_dr_callback_post_del: unquiesce things now\n"));
9658 
9659 	rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9660 
9661 	/*
9662 	 * now that all local segments have been unquiesced lets inform
9663 	 * the importers
9664 	 */
9665 	rsm_send_resume();
9666 
9667 	mutex_enter(&rsm_drv_data.drv_lock);
9668 
9669 	rsm_drv_data.drv_state = RSM_DRV_OK;
9670 
9671 	cv_broadcast(&rsm_drv_data.drv_cv);
9672 
9673 	mutex_exit(&rsm_drv_data.drv_lock);
9674 
9675 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9676 	    "rsm_dr_callback_post_del done\n"));
9677 
9678 	return;
9679 
9680 }
9681