xref: /titanic_51/usr/src/uts/common/io/rsm/rsm.c (revision fa9e4066f08beec538e775443c5be79dd423fcab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Overview of the RSM Kernel Agent:
31  * ---------------------------------
32  *
33  * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
34  * kernel agent is a pseudo device driver which makes use of the RSMPI
35  * interface on behalf of the RSMAPI user library.
36  *
37  * The kernel agent functionality can be categorized into the following
38  * components:
39  * 1. Driver Infrastructure
40  * 2. Export/Import Segment Management
41  * 3. Internal resource allocation/deallocation
42  *
43  * The driver infrastructure includes the basic module loading entry points
44  * like _init, _info, _fini to load, unload and report information about
45  * the driver module. The driver infrastructure also includes the
46  * autoconfiguration entry points namely, attach, detach and getinfo for
47  * the device autoconfiguration.
48  *
49  * The kernel agent is a pseudo character device driver and exports
50  * a cb_ops structure which defines the driver entry points for character
51  * device access. This includes the open and close entry points. The
52  * other entry points provided include ioctl, devmap and segmap and chpoll.
53  * read and write entry points are not used since the device is memory
54  * mapped. Also ddi_prop_op is used for the prop_op entry point.
55  *
56  * The ioctl entry point supports a number of commands, which are used by
57  * the RSMAPI library in order to export and import segments. These
58  * commands include commands for binding and rebinding the physical pages
59  * allocated to the virtual address range, publishing the export segment,
60  * unpublishing and republishing an export segment, creating an
61  * import segment and a virtual connection from this import segment to
62  * an export segment, performing scatter-gather data transfer, barrier
63  * operations.
64  *
65  *
66  * Export and Import segments:
67  * ---------------------------
68  *
69  * In order to create an RSM export segment a process allocates a range in its
70  * virtual address space for the segment using standard Solaris interfaces.
71  * The process then calls RSMAPI, which in turn makes an ioctl call to the
72  * RSM kernel agent for an allocation of physical memory pages and for
73  * creation of the export segment by binding these pages to the virtual
74  * address range. These pages are locked in memory so that remote accesses
75  * are always applied to the correct page. Then the RSM segment is published,
76  * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
77  * is assigned to it.
78  *
79  * In order to import a published RSM segment, RSMAPI creates an import
80  * segment and forms a virtual connection across the interconnect to the
81  * export segment, via an ioctl into the kernel agent with the connect
82  * command. The import segment setup is completed by mapping the
83  * local device memory into the importers virtual address space. The
84  * mapping of the import segment is handled by the segmap/devmap
85  * infrastructure described as follows.
86  *
87  * Segmap and Devmap interfaces:
88  *
89  * The RSM kernel agent allows device memory to be directly accessed by user
90  * threads via memory mapping. In order to do so, the RSM kernel agent
91  * supports the devmap and segmap entry points.
92  *
93  * The segmap entry point(rsm_segmap) is responsible for setting up a memory
94  * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
95  * responsible for exporting the device memory to the user applications.
96  * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
97  * control is transfered to the devmap_setup call which calls rsm_devmap.
98  *
99  * rsm_devmap validates the user mapping to the device or kernel memory
100  * and passes the information to the system for setting up the mapping. The
101  * actual setting up of the mapping is done by devmap_devmem_setup(for
102  * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
103  * registered for device context management via the devmap_devmem_setup
104  * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
105  * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
106  * is created, a mapping is freed, a mapping is accessed or an existing
107  * mapping is duplicated respectively. These callbacks allow the RSM kernel
108  * agent to maintain state information associated with the mappings.
109  * The state information is mainly in the form of a cookie list for the import
110  * segment for which mapping has been done.
111  *
112  * Forced disconnect of import segments:
113  *
114  * When an exported segment is unpublished, the exporter sends a forced
115  * disconnect message to all its importers. The importer segments are
116  * unloaded and disconnected. This involves unloading the original
117  * mappings and remapping to a preallocated kernel trash page. This is
118  * done by devmap_umem_remap. The trash/dummy page is a kernel page,
119  * preallocated by the kernel agent during attach using ddi_umem_alloc with
120  * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
121  * due to unloading of the original mappings.
122  *
123  * Additionally every segment has a mapping generation number associated
124  * with it. This is an entry in the barrier generation page, created
125  * during attach time. This mapping generation number for the import
126  * segments is incremented on a force disconnect to notify the application
127  * of the force disconnect. On this notification, the application needs
128  * to reconnect the segment to establish a new legitimate mapping.
129  *
130  *
131  * Locks used in the kernel agent:
132  * -------------------------------
133  *
134  * The kernel agent uses a variety of mutexes and condition variables for
135  * mutual exclusion of the shared data structures and for synchronization
136  * between the various threads. Some of the locks are described as follows.
137  *
138  * Each resource structure, which represents either an export/import segment
139  * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
140  * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
141  * rsmseglock_acquire and rsmseglock_release macros. An additional
142  * lock called the rsmsi_lock is used for the shared import data structure
143  * that is relevant for resources representing import segments. There is
144  * also a condition variable associated with the resource called s_cv. This
145  * is used to wait for events like the segment state change etc.
146  *
147  * The resource structures are allocated from a pool of resource structures,
148  * called rsm_resource. This pool is protected via a reader-writer lock,
149  * called rsmrc_lock.
150  *
151  * There are two separate hash tables, one for the export segments and
152  * one for the import segments. The export segments are inserted into the
153  * export segment hash table only after they have been published and the
154  * import segments are inserted in the import segments list only after they
155  * have successfully connected to an exported segment. These tables are
156  * protected via reader-writer locks.
157  *
158  * Debug Support in the kernel agent:
159  * ----------------------------------
160  *
161  * Debugging support in the kernel agent is provided by the following
162  * macros.
163  *
164  * DBG_PRINTF((category, level, message)) is a macro which logs a debug
165  * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
166  * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
167  * on the definition of the category and level. All messages that belong to
168  * the specified category(rsmdbg_category) and are of an equal or greater
169  * severity than the specified level(rsmdbg_level) are logged. The message
170  * is a string which uses the same formatting rules as the strings used in
171  * printf.
172  *
173  * The category defines which component of the kernel agent has logged this
174  * message. There are a number of categories that have been defined such as
175  * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
176  * DBG_ADDCATEGORY is used to add in another category to the currently
177  * specified category value so that the component using this new category
178  * can also effectively log debug messages. Thus, the category of a specific
179  * message is some combination of the available categories and we can define
180  * sub-categories if we want a finer level of granularity.
181  *
182  * The level defines the severity of the message. Different level values are
183  * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
184  * the least severe(debug level is 0).
185  *
186  * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
187  * variable or a string respectively.
188  *
189  *
190  * NOTES:
191  *
192  * Special Fork and Exec Handling:
193  * -------------------------------
194  *
195  * The backing physical pages of an exported segment are always locked down.
196  * Thus, there are two cases in which a process having exported segments
197  * will cause a cpu to hang: (1) the process invokes exec; (2) a process
198  * forks and invokes exit before the duped file descriptors for the export
199  * segments are closed in the child process. The hang is caused because the
200  * address space release algorithm in Solaris VM subsystem is based on a
201  * non-blocking loop which does not terminate while segments are locked
202  * down. In addition to this, Solaris VM subsystem lacks a callback
203  * mechanism to the rsm kernel agent to allow unlocking these export
204  * segment pages.
205  *
206  * In order to circumvent this problem, the kernel agent does the following.
207  * The Solaris VM subsystem keeps memory segments in increasing order of
208  * virtual addressses. Thus a special page(special_exit_offset) is allocated
209  * by the kernel agent and is mmapped into the heap area of the process address
210  * space(the mmap is done by the RSMAPI library). During the mmap processing
211  * of this special page by the devmap infrastructure, a callback(the same
212  * devmap context management callbacks discussed above) is registered for an
213  * unmap.
214  *
215  * As discussed above, this page is processed by the Solaris address space
216  * release code before any of the exported segments pages(which are allocated
217  * from high memory). It is during this processing that the unmap callback gets
218  * called and this callback is responsible for force destroying the exported
219  * segments and thus eliminating the problem of locked pages.
220  *
221  * Flow-control:
222  * ------------
223  *
224  * A credit based flow control algorithm is used for messages whose
225  * processing cannot be done in the interrupt context because it might
226  * involve invoking rsmpi calls, or might take a long time to complete
227  * or might need to allocate resources. The algorithm operates on a per
228  * path basis. To send a message the pathend needs to have a credit and
229  * it consumes one for every message that is flow controlled. On the
230  * receiving pathend the message is put on a msgbuf_queue and a task is
231  * dispatched on the worker thread - recv_taskq where it is processed.
232  * After processing the message, the receiving pathend dequeues the message,
233  * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
234  * credits to the sender pathend.
235  *
236  * RSM_DRTEST:
237  * -----------
238  *
239  * This is used to enable the DR testing using a test driver on test
240  * platforms which do not supported DR.
241  *
242  */
243 
244 #include <sys/types.h>
245 #include <sys/param.h>
246 #include <sys/user.h>
247 #include <sys/buf.h>
248 #include <sys/systm.h>
249 #include <sys/cred.h>
250 #include <sys/vm.h>
251 #include <sys/uio.h>
252 #include <vm/seg.h>
253 #include <vm/page.h>
254 #include <sys/stat.h>
255 
256 #include <sys/time.h>
257 #include <sys/errno.h>
258 
259 #include <sys/file.h>
260 #include <sys/uio.h>
261 #include <sys/proc.h>
262 #include <sys/mman.h>
263 #include <sys/open.h>
264 #include <sys/atomic.h>
265 #include <sys/mem_config.h>
266 
267 
268 #include <sys/ddi.h>
269 #include <sys/devops.h>
270 #include <sys/ddidevmap.h>
271 #include <sys/sunddi.h>
272 #include <sys/esunddi.h>
273 #include <sys/ddi_impldefs.h>
274 
275 #include <sys/kmem.h>
276 #include <sys/conf.h>
277 #include <sys/devops.h>
278 #include <sys/ddi_impldefs.h>
279 
280 #include <sys/modctl.h>
281 
282 #include <sys/policy.h>
283 #include <sys/types.h>
284 #include <sys/conf.h>
285 #include <sys/param.h>
286 
287 #include <sys/taskq.h>
288 
289 #include <sys/rsm/rsm_common.h>
290 #include <sys/rsm/rsmapi_common.h>
291 #include <sys/rsm/rsm.h>
292 #include <rsm_in.h>
293 #include <sys/rsm/rsmka_path_int.h>
294 #include <sys/rsm/rsmpi.h>
295 
296 #include <sys/modctl.h>
297 #include <sys/debug.h>
298 
299 #include <sys/tuneable.h>
300 
301 #ifdef	RSM_DRTEST
302 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
303 		void *arg);
304 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
305 		void *arg);
306 #endif
307 
308 extern void dbg_printf(int category, int level, char *fmt, ...);
309 extern void rsmka_pathmanager_init();
310 extern void rsmka_pathmanager_cleanup();
311 extern void rele_sendq_token();
312 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
313 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
314 extern int rsmka_topology_ioctl(caddr_t, int, int);
315 
316 extern pri_t maxclsyspri;
317 extern work_queue_t work_queue;
318 extern kmutex_t ipc_info_lock;
319 extern kmutex_t ipc_info_cvlock;
320 extern kcondvar_t ipc_info_cv;
321 extern kmutex_t path_hold_cvlock;
322 extern kcondvar_t path_hold_cv;
323 
324 extern kmutex_t rsmka_buf_lock;
325 
326 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
327 extern adapter_t *rsmka_lookup_adapter(char *, int);
328 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
329 extern boolean_t rsmka_do_path_active(path_t *, int);
330 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
331 extern void rsmka_release_adapter(adapter_t *);
332 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
333 extern void rsmka_dequeue_msgbuf(path_t *path);
334 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
335 /* lint -w2 */
336 
337 static int rsm_open(dev_t *, int, int, cred_t *);
338 static int rsm_close(dev_t, int, int, cred_t *);
339 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
340     cred_t *credp, int *rvalp);
341 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
342     uint_t);
343 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
344     uint_t, uint_t, cred_t *);
345 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
346     struct pollhead **phpp);
347 
348 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
349 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
350 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
351 
352 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
353 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
354 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
355 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
356 				rsm_permission_t);
357 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
358 static void rsmacl_free(rsmapi_access_entry_t *, int);
359 static void rsmpiacl_free(rsm_access_entry_t *, int);
360 
361 static int rsm_inc_pgcnt(pgcnt_t);
362 static void rsm_dec_pgcnt(pgcnt_t);
363 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
364 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
365 					size_t *);
366 static void exporter_quiesce();
367 static void rsmseg_suspend(rsmseg_t *, int *);
368 static void rsmsegshare_suspend(rsmseg_t *);
369 static int rsmseg_resume(rsmseg_t *, void **);
370 static int rsmsegshare_resume(rsmseg_t *);
371 
372 static struct cb_ops rsm_cb_ops = {
373 	rsm_open,		/* open */
374 	rsm_close,		/* close */
375 	nodev,			/* strategy */
376 	nodev,			/* print */
377 	nodev,			/* dump */
378 	nodev,			/* read */
379 	nodev,			/* write */
380 	rsm_ioctl,		/* ioctl */
381 	rsm_devmap,		/* devmap */
382 	NULL,			/* mmap */
383 	rsm_segmap,		/* segmap */
384 	rsm_chpoll,		/* poll */
385 	ddi_prop_op,		/* cb_prop_op */
386 	0,			/* streamtab  */
387 	D_NEW|D_MP|D_DEVMAP,	/* Driver compatibility flag */
388 	0,
389 	0,
390 	0
391 };
392 
393 static struct dev_ops rsm_ops = {
394 	DEVO_REV,		/* devo_rev, */
395 	0,			/* refcnt  */
396 	rsm_info,		/* get_dev_info */
397 	nulldev,		/* identify */
398 	nulldev,		/* probe */
399 	rsm_attach,		/* attach */
400 	rsm_detach,		/* detach */
401 	nodev,			/* reset */
402 	&rsm_cb_ops,		/* driver operations */
403 	(struct bus_ops *)0,	/* bus operations */
404 	0
405 };
406 
407 /*
408  * Module linkage information for the kernel.
409  */
410 
411 static struct modldrv modldrv = {
412 	&mod_driverops, /* Type of module.  This one is a pseudo driver */
413 	"Remote Shared Memory Driver %I%",
414 	&rsm_ops,	/* driver ops */
415 };
416 
417 static struct modlinkage modlinkage = {
418 	MODREV_1,
419 	(void *)&modldrv,
420 	0,
421 	0,
422 	0
423 };
424 
425 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
426 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
427 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
428 
429 static kphysm_setup_vector_t rsm_dr_callback_vec = {
430 	KPHYSM_SETUP_VECTOR_VERSION,
431 	rsm_dr_callback_post_add,
432 	rsm_dr_callback_pre_del,
433 	rsm_dr_callback_post_del
434 };
435 
436 /* This flag can be changed to 0 to help with PIT testing */
437 int rsmka_modunloadok = 1;
438 int no_reply_cnt = 0;
439 
440 uint64_t rsm_ctrlmsg_errcnt = 0;
441 uint64_t rsm_ipcsend_errcnt = 0;
442 
443 #define	MAX_NODES 64
444 
445 static struct rsm_driver_data rsm_drv_data;
446 static struct rsmresource_table rsm_resource;
447 
448 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
449 static void rsmresource_destroy(void);
450 static int rsmresource_alloc(minor_t *);
451 static rsmresource_t *rsmresource_free(minor_t rnum);
452 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
453 static int rsm_unpublish(rsmseg_t *seg, int mode);
454 static int rsm_unbind(rsmseg_t *seg);
455 static uint_t rsmhash(rsm_memseg_id_t key);
456 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
457 static void rsmhash_free(rsmhash_table_t *rhash, int size);
458 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
459 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
460 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
461 					void *cookie);
462 int rsm_disconnect(rsmseg_t *seg);
463 void rsmseg_unload(rsmseg_t *);
464 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
465 
466 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
467     rsm_intr_q_op_t opcode, rsm_addr_t src,
468     void *data, size_t size, rsm_intr_hand_arg_t arg);
469 
470 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
471 
472 rsm_node_id_t my_nodeid;
473 
474 /* cookie, va, offsets and length for the barrier */
475 static rsm_gnum_t		*bar_va;
476 static ddi_umem_cookie_t	bar_cookie;
477 static off_t			barrier_offset;
478 static size_t			barrier_size;
479 static int			max_segs;
480 
481 /* cookie for the trash memory */
482 static ddi_umem_cookie_t	remap_cookie;
483 
484 static rsm_memseg_id_t	rsm_nextavail_segmentid;
485 
486 extern taskq_t *work_taskq;
487 extern char *taskq_name;
488 
489 static dev_info_t *rsm_dip;	/* private copy of devinfo pointer */
490 
491 static rsmhash_table_t rsm_export_segs;		/* list of exported segs */
492 rsmhash_table_t rsm_import_segs;		/* list of imported segs */
493 static rsmhash_table_t rsm_event_queues;	/* list of event queues */
494 
495 static	rsm_ipc_t	rsm_ipc;		/* ipc info */
496 
497 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
498 static list_head_t	rsm_suspend_list;
499 
500 /* list of descriptors for remote importers */
501 static importers_table_t importer_list;
502 
503 kmutex_t rsm_suspend_cvlock;
504 kcondvar_t rsm_suspend_cv;
505 
506 static kmutex_t rsm_lock;
507 
508 adapter_t loopback_adapter;
509 rsm_controller_attr_t loopback_attr;
510 
511 int rsmipc_send_controlmsg(path_t *path, int msgtype);
512 
513 void rsmka_init_loopback();
514 
515 int rsmka_null_seg_create(
516     rsm_controller_handle_t,
517     rsm_memseg_export_handle_t *,
518     size_t,
519     uint_t,
520     rsm_memory_local_t *,
521     rsm_resource_callback_t,
522     rsm_resource_callback_arg_t);
523 
524 int rsmka_null_seg_destroy(
525     rsm_memseg_export_handle_t);
526 
527 int rsmka_null_bind(
528     rsm_memseg_export_handle_t,
529     off_t,
530     rsm_memory_local_t *,
531     rsm_resource_callback_t,
532     rsm_resource_callback_arg_t);
533 
534 int rsmka_null_unbind(
535     rsm_memseg_export_handle_t,
536     off_t,
537     size_t);
538 
539 int rsmka_null_rebind(
540     rsm_memseg_export_handle_t,
541     off_t,
542     rsm_memory_local_t *,
543     rsm_resource_callback_t,
544     rsm_resource_callback_arg_t);
545 
546 int rsmka_null_publish(
547     rsm_memseg_export_handle_t,
548     rsm_access_entry_t [],
549     uint_t,
550     rsm_memseg_id_t,
551     rsm_resource_callback_t,
552     rsm_resource_callback_arg_t);
553 
554 
555 int rsmka_null_republish(
556     rsm_memseg_export_handle_t,
557     rsm_access_entry_t [],
558     uint_t,
559     rsm_resource_callback_t,
560     rsm_resource_callback_arg_t);
561 
562 int rsmka_null_unpublish(
563     rsm_memseg_export_handle_t);
564 
565 rsm_ops_t null_rsmpi_ops;
566 
567 /*
568  * data and locks to keep track of total amount of exported memory
569  */
570 static	pgcnt_t		rsm_pgcnt;
571 static	pgcnt_t		rsm_pgcnt_max;	/* max allowed */
572 static	kmutex_t	rsm_pgcnt_lock;
573 
574 static	int		rsm_enable_dr;
575 
576 static	char		loopback_str[] = "loopback";
577 
578 int		rsm_hash_size;
579 
580 /*
581  * The locking model is as follows:
582  *
583  * Local operations:
584  *		find resource - grab reader lock on resouce list
585  *		insert rc     - grab writer lock
586  *		delete rc     - grab writer lock and resource mutex
587  *		read/write    - no lock
588  *
589  * Remote invocations:
590  *		find resource - grab read lock and resource mutex
591  *
592  * State:
593  *		resource state - grab resource mutex
594  */
595 
596 int
597 _init(void)
598 {
599 	int e;
600 
601 	e = mod_install(&modlinkage);
602 	if (e != 0) {
603 		return (e);
604 	}
605 
606 	mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
607 
608 	mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
609 
610 
611 	rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
612 
613 	rsm_hash_size = RSM_HASHSZ;
614 
615 	rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
616 
617 	rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
618 
619 	mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
620 
621 	mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
622 	cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
623 
624 	mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
625 	cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
626 
627 	mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
628 	cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
629 
630 	rsm_ipc.count = RSMIPC_SZ;
631 	rsm_ipc.wanted = 0;
632 	rsm_ipc.sequence = 0;
633 
634 	(void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
635 
636 	for (e = 0; e < RSMIPC_SZ; e++) {
637 		rsmipc_slot_t *slot = &rsm_ipc.slots[e];
638 
639 		RSMIPC_SET(slot, RSMIPC_FREE);
640 		mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
641 		cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
642 	}
643 
644 	/*
645 	 * Initialize the suspend message list
646 	 */
647 	rsm_suspend_list.list_head = NULL;
648 	mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
649 
650 	/*
651 	 * It is assumed here that configuration data is available
652 	 * during system boot since _init may be called at that time.
653 	 */
654 
655 	rsmka_pathmanager_init();
656 
657 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
658 	    "rsm: _init done\n"));
659 
660 	return (DDI_SUCCESS);
661 
662 }
663 
664 int
665 _info(struct modinfo *modinfop)
666 {
667 
668 	return (mod_info(&modlinkage, modinfop));
669 }
670 
671 int
672 _fini(void)
673 {
674 	int e;
675 
676 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
677 	    "rsm: _fini enter\n"));
678 
679 	/*
680 	 * The rsmka_modunloadok flag is simply used to help with
681 	 * the PIT testing. Make this flag 0 to disallow modunload.
682 	 */
683 	if (rsmka_modunloadok == 0)
684 		return (EBUSY);
685 
686 	/* rsm_detach will be called as a result of mod_remove */
687 	e = mod_remove(&modlinkage);
688 	if (e) {
689 		DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
690 		    "Unable to fini RSM %x\n", e));
691 		return (e);
692 	}
693 
694 	rsmka_pathmanager_cleanup();
695 
696 	rw_destroy(&rsm_resource.rsmrc_lock);
697 
698 	rw_destroy(&rsm_export_segs.rsmhash_rw);
699 	rw_destroy(&rsm_import_segs.rsmhash_rw);
700 	rw_destroy(&rsm_event_queues.rsmhash_rw);
701 
702 	mutex_destroy(&importer_list.lock);
703 
704 	mutex_destroy(&rsm_ipc.lock);
705 	cv_destroy(&rsm_ipc.cv);
706 
707 	(void) mutex_destroy(&rsm_suspend_list.list_lock);
708 
709 	(void) mutex_destroy(&rsm_pgcnt_lock);
710 
711 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
712 
713 	return (DDI_SUCCESS);
714 
715 }
716 
717 /*ARGSUSED1*/
718 static int
719 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
720 {
721 	minor_t	rnum;
722 	int	percent;
723 	int	ret;
724 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
725 
726 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
727 
728 	switch (cmd) {
729 	case DDI_ATTACH:
730 		break;
731 	case DDI_RESUME:
732 	default:
733 		DBG_PRINTF((category, RSM_ERR,
734 		    "rsm:rsm_attach - cmd not supported\n"));
735 		return (DDI_FAILURE);
736 	}
737 
738 	if (rsm_dip != NULL) {
739 		DBG_PRINTF((category, RSM_ERR,
740 		    "rsm:rsm_attach - supports only "
741 		    "one instance\n"));
742 		return (DDI_FAILURE);
743 	}
744 
745 	rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
746 			    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
747 			    "enable-dynamic-reconfiguration", 1);
748 
749 	mutex_enter(&rsm_drv_data.drv_lock);
750 	rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
751 	mutex_exit(&rsm_drv_data.drv_lock);
752 
753 	if (rsm_enable_dr) {
754 #ifdef	RSM_DRTEST
755 		ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
756 		    (void *)NULL);
757 #else
758 		ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
759 		    (void *)NULL);
760 #endif
761 		if (ret != 0) {
762 			mutex_exit(&rsm_drv_data.drv_lock);
763 			cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
764 			    "reconfiguration setup failed\n");
765 			return (DDI_FAILURE);
766 		}
767 	}
768 
769 	mutex_enter(&rsm_drv_data.drv_lock);
770 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
771 	rsm_drv_data.drv_state = RSM_DRV_OK;
772 	cv_broadcast(&rsm_drv_data.drv_cv);
773 	mutex_exit(&rsm_drv_data.drv_lock);
774 
775 	/*
776 	 * page_list_read_lock();
777 	 * xx_setup();
778 	 * page_list_read_unlock();
779 	 */
780 
781 	rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
782 			    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
783 			    "segment-hashtable-size", RSM_HASHSZ);
784 	if (rsm_hash_size == 0) {
785 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
786 		    "rsm: segment-hashtable-size in rsm.conf "
787 		    "must be greater than 0, defaulting to 128\n"));
788 		rsm_hash_size = RSM_HASHSZ;
789 	}
790 
791 	DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
792 	    rsm_hash_size));
793 
794 	rsm_pgcnt = 0;
795 
796 	percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
797 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
798 	    "max-exported-memory", 0);
799 	if (percent < 0) {
800 		DBG_PRINTF((category, RSM_ERR,
801 		    "rsm:rsm_attach not enough memory available to "
802 		    "export, or max-exported-memory set incorrectly.\n"));
803 		return (DDI_FAILURE);
804 	}
805 	/* 0 indicates no fixed upper limit. maxmem is the max	*/
806 	/* available pageable physical mem			*/
807 	rsm_pgcnt_max = (percent*maxmem)/100;
808 
809 	if (rsm_pgcnt_max > 0) {
810 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
811 		    "rsm: Available physical memory = %lu pages, "
812 		    "Max exportable memory = %lu pages",
813 		    maxmem, rsm_pgcnt_max));
814 	}
815 
816 	/*
817 	 * Create minor number
818 	 */
819 	if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
820 		DBG_PRINTF((category, RSM_ERR,
821 		    "rsm: rsm_attach - Unable to get "
822 		    "minor number\n"));
823 		return (DDI_FAILURE);
824 	}
825 
826 	ASSERT(rnum == RSM_DRIVER_MINOR);
827 
828 	if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
829 	    rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
830 		DBG_PRINTF((category, RSM_ERR,
831 		    "rsm: rsm_attach - unable to allocate "
832 		    "minor #\n"));
833 		return (DDI_FAILURE);
834 	}
835 
836 	rsm_dip = devi;
837 	/*
838 	 * Allocate the hashtables
839 	 */
840 	rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
841 	rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
842 
843 	importer_list.bucket = (importing_token_t **)
844 		kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *),
845 		    KM_SLEEP);
846 
847 	/*
848 	 * Allocate a resource struct
849 	 */
850 	{
851 		rsmresource_t *p;
852 
853 		p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
854 
855 		mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
856 
857 		rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
858 	}
859 
860 	/*
861 	 * Based on the rsm.conf property max-segments, determine the maximum
862 	 * number of segments that can be exported/imported. This is then used
863 	 * to determine the size for barrier failure pages.
864 	 */
865 
866 	/* First get the max number of segments from the rsm.conf file */
867 	max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
868 			    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
869 			    "max-segments", 0);
870 	if (max_segs == 0) {
871 		/* Use default number of segments */
872 		max_segs = RSM_MAX_NUM_SEG;
873 	}
874 
875 	/*
876 	 * Based on the max number of segments allowed, determine the barrier
877 	 * page size. add 1 to max_segs since the barrier page itself uses
878 	 * a slot
879 	 */
880 	barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
881 			    PAGESIZE);
882 
883 	/*
884 	 * allocation of the barrier failure page
885 	 */
886 	bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
887 				    DDI_UMEM_SLEEP, &bar_cookie);
888 
889 	/*
890 	 * Set the barrier_offset
891 	 */
892 	barrier_offset = 0;
893 
894 	/*
895 	 * Allocate a trash memory and get a cookie for it. This will be used
896 	 * when remapping segments during force disconnects. Allocate the
897 	 * trash memory with a large size which is page aligned.
898 	 */
899 	(void) ddi_umem_alloc((size_t)TRASHSIZE,
900 		    DDI_UMEM_TRASH, &remap_cookie);
901 
902 	/* initialize user segment id allocation variable */
903 	rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
904 
905 	/*
906 	 * initialize the null_rsmpi_ops vector and the loopback adapter
907 	 */
908 	rsmka_init_loopback();
909 
910 
911 	ddi_report_dev(devi);
912 
913 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
914 
915 	return (DDI_SUCCESS);
916 }
917 
918 /*
919  * The call to mod_remove in the _fine routine will cause the system
920  * to call rsm_detach
921  */
922 /*ARGSUSED*/
923 static int
924 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
925 {
926 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
927 
928 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
929 
930 	switch (cmd) {
931 	case DDI_DETACH:
932 		break;
933 	default:
934 		DBG_PRINTF((category, RSM_ERR,
935 		    "rsm:rsm_detach - cmd %x not supported\n",
936 		    cmd));
937 		return (DDI_FAILURE);
938 	}
939 
940 	mutex_enter(&rsm_drv_data.drv_lock);
941 	while (rsm_drv_data.drv_state != RSM_DRV_OK)
942 		cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
943 	rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
944 	mutex_exit(&rsm_drv_data.drv_lock);
945 
946 	/*
947 	 * Unregister the DR callback functions
948 	 */
949 	if (rsm_enable_dr) {
950 #ifdef	RSM_DRTEST
951 		rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
952 		    (void *)NULL);
953 #else
954 		kphysm_setup_func_unregister(&rsm_dr_callback_vec,
955 		    (void *)NULL);
956 #endif
957 	}
958 
959 	mutex_enter(&rsm_drv_data.drv_lock);
960 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
961 	rsm_drv_data.drv_state = RSM_DRV_NEW;
962 	mutex_exit(&rsm_drv_data.drv_lock);
963 
964 	ASSERT(rsm_suspend_list.list_head == NULL);
965 
966 	/*
967 	 * Release all resources, seglist, controller, ...
968 	 */
969 
970 	/* remove intersend queues */
971 	/* remove registered services */
972 
973 
974 	ddi_remove_minor_node(dip, DRIVER_NAME);
975 	rsm_dip = NULL;
976 
977 	/*
978 	 * Free minor zero resource
979 	 */
980 	{
981 		rsmresource_t *p;
982 
983 		p = rsmresource_free(RSM_DRIVER_MINOR);
984 		if (p) {
985 			mutex_destroy(&p->rsmrc_lock);
986 			kmem_free((void *)p, sizeof (*p));
987 		}
988 	}
989 
990 	/*
991 	 * Free resource table
992 	 */
993 
994 	rsmresource_destroy();
995 
996 	/*
997 	 * Free the hash tables
998 	 */
999 	rsmhash_free(&rsm_export_segs, rsm_hash_size);
1000 	rsmhash_free(&rsm_import_segs, rsm_hash_size);
1001 
1002 	kmem_free((void *)importer_list.bucket,
1003 	    rsm_hash_size * sizeof (importing_token_t *));
1004 	importer_list.bucket = NULL;
1005 
1006 
1007 	/* free barrier page */
1008 	if (bar_cookie != NULL) {
1009 		ddi_umem_free(bar_cookie);
1010 	}
1011 	bar_va = NULL;
1012 	bar_cookie = NULL;
1013 
1014 	/*
1015 	 * Free the memory allocated for the trash
1016 	 */
1017 	if (remap_cookie != NULL) {
1018 		ddi_umem_free(remap_cookie);
1019 	}
1020 	remap_cookie = NULL;
1021 
1022 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1023 
1024 	return (DDI_SUCCESS);
1025 }
1026 
1027 /*ARGSUSED*/
1028 static int
1029 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1030 {
1031 	register int error;
1032 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1033 
1034 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1035 
1036 	switch (infocmd) {
1037 	case DDI_INFO_DEVT2DEVINFO:
1038 		if (rsm_dip == NULL)
1039 			error = DDI_FAILURE;
1040 		else {
1041 			*result = (void *)rsm_dip;
1042 			error = DDI_SUCCESS;
1043 		}
1044 		break;
1045 	case DDI_INFO_DEVT2INSTANCE:
1046 		*result = (void *)0;
1047 		error = DDI_SUCCESS;
1048 		break;
1049 	default:
1050 		error = DDI_FAILURE;
1051 	}
1052 
1053 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1054 	return (error);
1055 }
1056 
1057 adapter_t *
1058 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1059 {
1060 	adapter_t *adapter;
1061 	char adapter_devname[MAXNAMELEN];
1062 	int instance;
1063 	DBG_DEFINE(category,
1064 	    RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1065 
1066 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1067 
1068 	instance = msg->cnum;
1069 
1070 	if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1071 		return (NULL);
1072 	}
1073 
1074 	if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1075 		return (NULL);
1076 
1077 	if (strcmp(adapter_devname, "loopback") == 0)
1078 		return (&loopback_adapter);
1079 
1080 	adapter = rsmka_lookup_adapter(adapter_devname, instance);
1081 
1082 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1083 
1084 	return (adapter);
1085 }
1086 
1087 
1088 /*
1089  * *********************** Resource Number Management ********************
1090  * All resources are stored in a simple hash table. The table is an array
1091  * of pointers to resource blks. Each blk contains:
1092  *	base	- base number of this blk
1093  *	used	- number of used slots in this blk.
1094  *	blks    - array of pointers to resource items.
1095  * An entry in a resource blk is empty if it's NULL.
1096  *
1097  * We start with no resource array. Each time we run out of slots, we
1098  * reallocate a new larger array and copy the pointer to the new array and
1099  * a new resource blk is allocated and added to the hash table.
1100  *
1101  * The resource control block contains:
1102  *      root    - array of pointer of resource blks
1103  *      sz      - current size of array.
1104  *      len     - last valid entry in array.
1105  *
1106  * A search operation based on a resource number is as follows:
1107  *      index = rnum / RESOURCE_BLKSZ;
1108  *      ASSERT(index < resource_block.len);
1109  *      ASSERT(index < resource_block.sz);
1110  *	offset = rnum % RESOURCE_BLKSZ;
1111  *      ASSERT(offset >= resource_block.root[index]->base);
1112  *	ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1113  *	return resource_block.root[index]->blks[offset];
1114  *
1115  * A resource blk is freed with its used count reachs zero.
1116  */
1117 static int
1118 rsmresource_alloc(minor_t *rnum)
1119 {
1120 
1121 	/* search for available resource slot */
1122 	int i, j, empty = -1;
1123 	rsmresource_blk_t *blk;
1124 
1125 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1126 	    "rsmresource_alloc enter\n"));
1127 
1128 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1129 
1130 	/* Try to find an empty slot */
1131 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1132 		blk = rsm_resource.rsmrc_root[i];
1133 		if (blk != NULL && blk->rsmrcblk_avail > 0) {
1134 			/* found an empty slot in this blk */
1135 			for (j = 0; j < RSMRC_BLKSZ; j++) {
1136 				if (blk->rsmrcblk_blks[j] == NULL) {
1137 					*rnum = (minor_t)
1138 					    (j + (i * RSMRC_BLKSZ));
1139 					/*
1140 					 * obey gen page limits
1141 					 */
1142 					if (*rnum >= max_segs + 1) {
1143 						if (empty < 0) {
1144 							rw_exit(&rsm_resource.
1145 							    rsmrc_lock);
1146 							DBG_PRINTF((
1147 							    RSM_KERNEL_ALL,
1148 							    RSM_ERR,
1149 							    "rsmresource"
1150 							    "_alloc failed:"
1151 							    "not enough res"
1152 							    "%d\n", *rnum));
1153 							return (
1154 RSMERR_INSUFFICIENT_RESOURCES);
1155 						} else {
1156 							/* use empty slot */
1157 							break;
1158 						}
1159 
1160 					}
1161 
1162 					blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1163 					blk->rsmrcblk_avail--;
1164 					rw_exit(&rsm_resource.rsmrc_lock);
1165 					DBG_PRINTF((RSM_KERNEL_ALL,
1166 					    RSM_DEBUG_VERBOSE,
1167 					    "rsmresource_alloc done\n"));
1168 					return (RSM_SUCCESS);
1169 				}
1170 			}
1171 		} else if (blk == NULL && empty < 0) {
1172 			/* remember first empty slot */
1173 			empty = i;
1174 		}
1175 	}
1176 
1177 	/* Couldn't find anything, allocate a new blk */
1178 	/*
1179 	 * Do we need to reallocate the root array
1180 	 */
1181 	if (empty < 0) {
1182 		if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1183 			/*
1184 			 * Allocate new array and copy current stuff into it
1185 			 */
1186 			rsmresource_blk_t	**p;
1187 			uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1188 								RSMRC_BLKSZ;
1189 			/*
1190 			 * Don't allocate more that max valid rnum
1191 			 */
1192 			if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1193 			    max_segs + 1) {
1194 				rw_exit(&rsm_resource.rsmrc_lock);
1195 				return (RSMERR_INSUFFICIENT_RESOURCES);
1196 			}
1197 
1198 			p = (rsmresource_blk_t **)kmem_zalloc(
1199 			    newsz * sizeof (*p),
1200 			    KM_SLEEP);
1201 
1202 			if (rsm_resource.rsmrc_root) {
1203 				uint_t oldsz;
1204 
1205 				oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1206 				    (int)sizeof (*p));
1207 
1208 				/*
1209 				 * Copy old data into new space and
1210 				 * free old stuff
1211 				 */
1212 				bcopy(rsm_resource.rsmrc_root, p, oldsz);
1213 				kmem_free(rsm_resource.rsmrc_root, oldsz);
1214 			}
1215 
1216 			rsm_resource.rsmrc_root = p;
1217 			rsm_resource.rsmrc_sz = (int)newsz;
1218 		}
1219 
1220 		empty = rsm_resource.rsmrc_len;
1221 		rsm_resource.rsmrc_len++;
1222 	}
1223 
1224 	/*
1225 	 * Allocate a new blk
1226 	 */
1227 	blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1228 	ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1229 	rsm_resource.rsmrc_root[empty] = blk;
1230 	blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1231 
1232 	/*
1233 	 * Allocate slot
1234 	 */
1235 
1236 	*rnum = (minor_t)(empty * RSMRC_BLKSZ);
1237 
1238 	/*
1239 	 * watch out not to exceed bounds of barrier page
1240 	 */
1241 	if (*rnum >= max_segs + 1) {
1242 		rw_exit(&rsm_resource.rsmrc_lock);
1243 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1244 		    "rsmresource_alloc failed %d\n", *rnum));
1245 
1246 		return (RSMERR_INSUFFICIENT_RESOURCES);
1247 	}
1248 	blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1249 
1250 
1251 	rw_exit(&rsm_resource.rsmrc_lock);
1252 
1253 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1254 	    "rsmresource_alloc done\n"));
1255 
1256 	return (RSM_SUCCESS);
1257 }
1258 
1259 static rsmresource_t *
1260 rsmresource_free(minor_t rnum)
1261 {
1262 
1263 	/* search for available resource slot */
1264 	int i, j;
1265 	rsmresource_blk_t *blk;
1266 	rsmresource_t *p;
1267 
1268 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1269 	    "rsmresource_free enter\n"));
1270 
1271 	i = (int)(rnum / RSMRC_BLKSZ);
1272 	j = (int)(rnum % RSMRC_BLKSZ);
1273 
1274 	if (i >= rsm_resource.rsmrc_len) {
1275 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1276 		    "rsmresource_free done\n"));
1277 		return (NULL);
1278 	}
1279 
1280 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1281 
1282 	ASSERT(rsm_resource.rsmrc_root);
1283 	ASSERT(i < rsm_resource.rsmrc_len);
1284 	ASSERT(i < rsm_resource.rsmrc_sz);
1285 	blk = rsm_resource.rsmrc_root[i];
1286 	if (blk == NULL) {
1287 		rw_exit(&rsm_resource.rsmrc_lock);
1288 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1289 		    "rsmresource_free done\n"));
1290 		return (NULL);
1291 	}
1292 
1293 	ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1294 
1295 	p = blk->rsmrcblk_blks[j];
1296 	if (p == RSMRC_RESERVED) {
1297 		p = NULL;
1298 	}
1299 
1300 	blk->rsmrcblk_blks[j] = NULL;
1301 	blk->rsmrcblk_avail++;
1302 	if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1303 		/* free this blk */
1304 		kmem_free(blk, sizeof (*blk));
1305 		rsm_resource.rsmrc_root[i] = NULL;
1306 	}
1307 
1308 	rw_exit(&rsm_resource.rsmrc_lock);
1309 
1310 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1311 	    "rsmresource_free done\n"));
1312 
1313 	return (p);
1314 }
1315 
1316 static rsmresource_t *
1317 rsmresource_lookup(minor_t rnum, int lock)
1318 {
1319 	int i, j;
1320 	rsmresource_blk_t *blk;
1321 	rsmresource_t *p;
1322 
1323 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1324 	    "rsmresource_lookup enter\n"));
1325 
1326 	/* Find resource and lock it in READER mode */
1327 	/* search for available resource slot */
1328 
1329 	i = (int)(rnum / RSMRC_BLKSZ);
1330 	j = (int)(rnum % RSMRC_BLKSZ);
1331 
1332 	if (i >= rsm_resource.rsmrc_len) {
1333 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1334 		    "rsmresource_lookup done\n"));
1335 		return (NULL);
1336 	}
1337 
1338 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1339 
1340 	blk = rsm_resource.rsmrc_root[i];
1341 	if (blk != NULL) {
1342 		ASSERT(i < rsm_resource.rsmrc_len);
1343 		ASSERT(i < rsm_resource.rsmrc_sz);
1344 
1345 		p = blk->rsmrcblk_blks[j];
1346 		if (lock == RSM_LOCK) {
1347 			if (p != RSMRC_RESERVED) {
1348 				mutex_enter(&p->rsmrc_lock);
1349 			} else {
1350 				p = NULL;
1351 			}
1352 		}
1353 	} else {
1354 		p = NULL;
1355 	}
1356 	rw_exit(&rsm_resource.rsmrc_lock);
1357 
1358 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1359 	    "rsmresource_lookup done\n"));
1360 
1361 	return (p);
1362 }
1363 
1364 static void
1365 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1366 {
1367 	/* Find resource and lock it in READER mode */
1368 	/* Caller can upgrade if need be */
1369 	/* search for available resource slot */
1370 	int i, j;
1371 	rsmresource_blk_t *blk;
1372 
1373 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1374 	    "rsmresource_insert enter\n"));
1375 
1376 	i = (int)(rnum / RSMRC_BLKSZ);
1377 	j = (int)(rnum % RSMRC_BLKSZ);
1378 
1379 	p->rsmrc_type = type;
1380 	p->rsmrc_num = rnum;
1381 
1382 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1383 
1384 	ASSERT(rsm_resource.rsmrc_root);
1385 	ASSERT(i < rsm_resource.rsmrc_len);
1386 	ASSERT(i < rsm_resource.rsmrc_sz);
1387 
1388 	blk = rsm_resource.rsmrc_root[i];
1389 	ASSERT(blk);
1390 
1391 	ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1392 
1393 	blk->rsmrcblk_blks[j] = p;
1394 
1395 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1396 	    "rsmresource_insert done\n"));
1397 
1398 	rw_exit(&rsm_resource.rsmrc_lock);
1399 }
1400 
1401 static void
1402 rsmresource_destroy()
1403 {
1404 	int i, j;
1405 
1406 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1407 	    "rsmresource_destroy enter\n"));
1408 
1409 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1410 
1411 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1412 		rsmresource_blk_t	*blk;
1413 
1414 		blk = rsm_resource.rsmrc_root[i];
1415 		if (blk == NULL) {
1416 			continue;
1417 		}
1418 		for (j = 0; j < RSMRC_BLKSZ; j++) {
1419 			if (blk->rsmrcblk_blks[j] != NULL) {
1420 				DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1421 				    "Not null slot %d, %lx\n", j,
1422 				    (size_t)blk->rsmrcblk_blks[j]));
1423 			}
1424 		}
1425 		kmem_free(blk, sizeof (*blk));
1426 		rsm_resource.rsmrc_root[i] = NULL;
1427 	}
1428 	if (rsm_resource.rsmrc_root) {
1429 		i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1430 		kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1431 		rsm_resource.rsmrc_root = NULL;
1432 		rsm_resource.rsmrc_len = 0;
1433 		rsm_resource.rsmrc_sz = 0;
1434 	}
1435 
1436 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1437 	    "rsmresource_destroy done\n"));
1438 
1439 	rw_exit(&rsm_resource.rsmrc_lock);
1440 }
1441 
1442 
1443 /* ******************** Generic Key Hash Table Management ********* */
1444 static rsmresource_t *
1445 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1446     rsm_resource_state_t state)
1447 {
1448 	rsmresource_t	*p;
1449 	uint_t		hashval;
1450 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1451 
1452 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1453 
1454 	hashval = rsmhash(key);
1455 
1456 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1457 	    key, hashval));
1458 
1459 	rw_enter(&rhash->rsmhash_rw, RW_READER);
1460 
1461 	p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1462 
1463 	for (; p; p = p->rsmrc_next) {
1464 		if (p->rsmrc_key == key) {
1465 			/* acquire resource lock */
1466 			RSMRC_LOCK(p);
1467 			break;
1468 		}
1469 	}
1470 
1471 	rw_exit(&rhash->rsmhash_rw);
1472 
1473 	if (p != NULL && p->rsmrc_state != state) {
1474 		/* state changed, release lock and return null */
1475 		RSMRC_UNLOCK(p);
1476 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1477 		    "rsmhash_lookup done: state changed\n"));
1478 		return (NULL);
1479 	}
1480 
1481 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1482 
1483 	return (p);
1484 }
1485 
1486 static void
1487 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1488 {
1489 	rsmresource_t		*p, **back;
1490 	uint_t			hashval;
1491 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1492 
1493 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1494 
1495 	hashval = rsmhash(rcelm->rsmrc_key);
1496 
1497 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1498 	    rcelm->rsmrc_key, hashval));
1499 
1500 	/*
1501 	 * It's ok not to find the segment.
1502 	 */
1503 	rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1504 
1505 	back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1506 
1507 	for (; (p = *back) != NULL;  back = &p->rsmrc_next) {
1508 		if (p == rcelm) {
1509 			*back = rcelm->rsmrc_next;
1510 			break;
1511 		}
1512 	}
1513 
1514 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1515 
1516 	rw_exit(&rhash->rsmhash_rw);
1517 }
1518 
1519 static int
1520 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1521     int dup_check, rsm_resource_state_t state)
1522 {
1523 	rsmresource_t	*p = NULL, **bktp;
1524 	uint_t		hashval;
1525 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1526 
1527 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1528 
1529 	/* lock table */
1530 	rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1531 
1532 	/*
1533 	 * If the current resource state is other than the state passed in
1534 	 * then the resource is (probably) already on the list. eg. for an
1535 	 * import segment if the state is not RSM_STATE_NEW then it's on the
1536 	 * list already.
1537 	 */
1538 	RSMRC_LOCK(new);
1539 	if (new->rsmrc_state != state) {
1540 		RSMRC_UNLOCK(new);
1541 		rw_exit(&rhash->rsmhash_rw);
1542 		return (RSMERR_BAD_SEG_HNDL);
1543 	}
1544 
1545 	hashval = rsmhash(key);
1546 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1547 
1548 	if (dup_check) {
1549 		/*
1550 		 * Used for checking export segments; don't want to have
1551 		 * the same key used for multiple segments.
1552 		 */
1553 
1554 		p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1555 
1556 		for (; p; p = p->rsmrc_next) {
1557 			if (p->rsmrc_key == key) {
1558 				RSMRC_UNLOCK(new);
1559 				break;
1560 			}
1561 		}
1562 	}
1563 
1564 	if (p == NULL) {
1565 		/* Key doesn't exist, add it */
1566 
1567 		bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1568 
1569 		new->rsmrc_key = key;
1570 		new->rsmrc_next = *bktp;
1571 		*bktp = new;
1572 	}
1573 
1574 	rw_exit(&rhash->rsmhash_rw);
1575 
1576 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1577 
1578 	return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1579 }
1580 
1581 /*
1582  * XOR each byte of the key.
1583  */
1584 static uint_t
1585 rsmhash(rsm_memseg_id_t key)
1586 {
1587 	uint_t	hash = key;
1588 
1589 	hash ^=  (key >> 8);
1590 	hash ^=  (key >> 16);
1591 	hash ^=  (key >> 24);
1592 
1593 	return (hash % rsm_hash_size);
1594 
1595 }
1596 
1597 /*
1598  * generic function to get a specific bucket
1599  */
1600 static void *
1601 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1602 {
1603 
1604 	if (rhash->bucket == NULL)
1605 		return (NULL);
1606 	else
1607 		return ((void *)rhash->bucket[hashval]);
1608 }
1609 
1610 /*
1611  * generic function to get a specific bucket's address
1612  */
1613 static void **
1614 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1615 {
1616 	if (rhash->bucket == NULL)
1617 		return (NULL);
1618 	else
1619 		return ((void **)&(rhash->bucket[hashval]));
1620 }
1621 
1622 /*
1623  * generic function to alloc a hash table
1624  */
1625 static void
1626 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1627 {
1628 	rhash->bucket = (rsmresource_t **)
1629 	    kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1630 }
1631 
1632 /*
1633  * generic function to free a hash table
1634  */
1635 static void
1636 rsmhash_free(rsmhash_table_t *rhash, int size)
1637 {
1638 
1639 	kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1640 	rhash->bucket = NULL;
1641 
1642 }
1643 /* *********************** Exported Segment Key Management ************ */
1644 
1645 #define	rsmexport_add(new, key)		\
1646 	rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1647 	    RSM_STATE_BIND)
1648 
1649 #define	rsmexport_rm(arg)	\
1650 	rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1651 
1652 #define	rsmexport_lookup(key)	\
1653 	(rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1654 
1655 /* ************************** Import Segment List Management ********** */
1656 
1657 /*
1658  *  Add segment to import list. This will be useful for paging and loopback
1659  * segment unloading.
1660  */
1661 #define	rsmimport_add(arg, key)	\
1662 	rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1663 	    RSM_STATE_NEW)
1664 
1665 #define	rsmimport_rm(arg)	\
1666 	rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1667 
1668 /*
1669  *	#define	rsmimport_lookup(key)	\
1670  *	(rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1671  */
1672 
1673 /*
1674  * increase the ref count and make the import segment point to the
1675  * shared data structure. Return a pointer to the share data struct
1676  * and the shared data struct is locked upon return
1677  */
1678 static rsm_import_share_t *
1679 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1680     rsmseg_t *segp)
1681 {
1682 	uint_t		hash;
1683 	rsmresource_t		*p;
1684 	rsm_import_share_t	*shdatap;
1685 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1686 
1687 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1688 
1689 	hash = rsmhash(key);
1690 	/* lock table */
1691 	rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1692 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1693 	    key, hash));
1694 
1695 	p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1696 
1697 	for (; p; p = p->rsmrc_next) {
1698 		/*
1699 		 * Look for an entry that is importing the same exporter
1700 		 * with the share data structure allocated.
1701 		 */
1702 		if ((p->rsmrc_key == key) &&
1703 		    (p->rsmrc_node == node) &&
1704 		    (p->rsmrc_adapter == adapter) &&
1705 		    (((rsmseg_t *)p)->s_share != NULL)) {
1706 			shdatap = ((rsmseg_t *)p)->s_share;
1707 			break;
1708 		}
1709 	}
1710 
1711 	if (p == NULL) {
1712 		/* we are the first importer, create the shared data struct */
1713 		shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1714 		shdatap->rsmsi_state = RSMSI_STATE_NEW;
1715 		shdatap->rsmsi_segid = key;
1716 		shdatap->rsmsi_node = node;
1717 		mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1718 		cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1719 	}
1720 
1721 	rsmseglock_acquire(segp);
1722 
1723 	/* we grab the shared lock before returning from this function */
1724 	mutex_enter(&shdatap->rsmsi_lock);
1725 
1726 	shdatap->rsmsi_refcnt++;
1727 	segp->s_share = shdatap;
1728 
1729 	rsmseglock_release(segp);
1730 
1731 	rw_exit(&rsm_import_segs.rsmhash_rw);
1732 
1733 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1734 
1735 	return (shdatap);
1736 }
1737 
1738 /*
1739  * the shared data structure should be locked before calling
1740  * rsmsharecv_signal().
1741  * Change the state and signal any waiting segments.
1742  */
1743 void
1744 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1745 {
1746 	ASSERT(rsmsharelock_held(seg));
1747 
1748 	if (seg->s_share->rsmsi_state == oldstate) {
1749 		seg->s_share->rsmsi_state = newstate;
1750 		cv_broadcast(&seg->s_share->rsmsi_cv);
1751 	}
1752 }
1753 
1754 /*
1755  * Add to the hash table
1756  */
1757 static void
1758 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1759     void *cookie)
1760 {
1761 
1762 	importing_token_t	*head;
1763 	importing_token_t	*new_token;
1764 	int			index;
1765 
1766 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1767 
1768 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1769 
1770 	new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1771 	new_token->importing_node = node;
1772 	new_token->key = key;
1773 	new_token->import_segment_cookie = cookie;
1774 	new_token->importing_adapter_hwaddr = hwaddr;
1775 
1776 	index = rsmhash(key);
1777 
1778 	mutex_enter(&importer_list.lock);
1779 
1780 	head = importer_list.bucket[index];
1781 	importer_list.bucket[index] = new_token;
1782 	new_token->next = head;
1783 	mutex_exit(&importer_list.lock);
1784 
1785 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1786 }
1787 
1788 static void
1789 importer_list_rm(rsm_node_id_t node,  rsm_memseg_id_t key, void *cookie)
1790 {
1791 
1792 	importing_token_t	*prev, *token = NULL;
1793 	int			index;
1794 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1795 
1796 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1797 
1798 	index = rsmhash(key);
1799 
1800 	mutex_enter(&importer_list.lock);
1801 
1802 	token = importer_list.bucket[index];
1803 
1804 	prev = token;
1805 	while (token != NULL) {
1806 		if (token->importing_node == node &&
1807 		    token->import_segment_cookie == cookie) {
1808 			if (prev == token)
1809 				importer_list.bucket[index] = token->next;
1810 			else
1811 				prev->next = token->next;
1812 			kmem_free((void *)token, sizeof (*token));
1813 			break;
1814 		} else {
1815 			prev = token;
1816 			token = token->next;
1817 		}
1818 	}
1819 
1820 	mutex_exit(&importer_list.lock);
1821 
1822 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1823 
1824 
1825 }
1826 
1827 /* **************************Segment Structure Management ************* */
1828 
1829 /*
1830  * Free segment structure
1831  */
1832 static void
1833 rsmseg_free(rsmseg_t *seg)
1834 {
1835 
1836 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1837 
1838 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1839 
1840 	/* need to take seglock here to avoid race with rsmmap_unmap() */
1841 	rsmseglock_acquire(seg);
1842 	if (seg->s_ckl != NULL) {
1843 		/* Segment is still busy */
1844 		seg->s_state = RSM_STATE_END;
1845 		rsmseglock_release(seg);
1846 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1847 		    "rsmseg_free done\n"));
1848 		return;
1849 	}
1850 
1851 	rsmseglock_release(seg);
1852 
1853 	ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1854 
1855 	/*
1856 	 * If it's an importer decrement the refcount
1857 	 * and if its down to zero free the shared data structure.
1858 	 * This is where failures during rsm_connect() are unrefcounted
1859 	 */
1860 	if (seg->s_share != NULL) {
1861 
1862 		ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1863 
1864 		rsmsharelock_acquire(seg);
1865 
1866 		ASSERT(seg->s_share->rsmsi_refcnt > 0);
1867 
1868 		seg->s_share->rsmsi_refcnt--;
1869 
1870 		if (seg->s_share->rsmsi_refcnt == 0) {
1871 			rsmsharelock_release(seg);
1872 			mutex_destroy(&seg->s_share->rsmsi_lock);
1873 			cv_destroy(&seg->s_share->rsmsi_cv);
1874 			kmem_free((void *)(seg->s_share),
1875 			    sizeof (rsm_import_share_t));
1876 		} else {
1877 			rsmsharelock_release(seg);
1878 		}
1879 		/*
1880 		 * The following needs to be done after any
1881 		 * rsmsharelock calls which use seg->s_share.
1882 		 */
1883 		seg->s_share = NULL;
1884 	}
1885 
1886 	cv_destroy(&seg->s_cv);
1887 	mutex_destroy(&seg->s_lock);
1888 	rsmacl_free(seg->s_acl, seg->s_acl_len);
1889 	rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1890 	if (seg->s_adapter)
1891 		rsmka_release_adapter(seg->s_adapter);
1892 
1893 	kmem_free((void *)seg, sizeof (*seg));
1894 
1895 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1896 
1897 }
1898 
1899 
1900 static rsmseg_t *
1901 rsmseg_alloc(minor_t num, struct cred *cred)
1902 {
1903 	rsmseg_t	*new;
1904 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1905 
1906 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1907 	/*
1908 	 * allocate memory for new segment. This should be a segkmem cache.
1909 	 */
1910 	new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1911 
1912 	new->s_state = RSM_STATE_NEW;
1913 	new->s_minor	= num;
1914 	new->s_acl_len	= 0;
1915 	new->s_cookie = NULL;
1916 	new->s_adapter = NULL;
1917 
1918 	new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1919 	/* we don't have a key yet, will set at export/connect */
1920 	new->s_uid  = crgetuid(cred);
1921 	new->s_gid  = crgetgid(cred);
1922 
1923 	mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1924 	cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1925 
1926 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1927 
1928 	return (new);
1929 }
1930 
1931 /* ******************************** Driver Open/Close/Poll *************** */
1932 
1933 /*ARGSUSED1*/
1934 static int
1935 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1936 {
1937 	minor_t rnum;
1938 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1939 
1940 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1941 	/*
1942 	 * Char only
1943 	 */
1944 	if (otyp != OTYP_CHR) {
1945 		DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1946 		return (EINVAL);
1947 	}
1948 
1949 	/*
1950 	 * Only zero can be opened, clones are used for resources.
1951 	 */
1952 	if (getminor(*devp) != RSM_DRIVER_MINOR) {
1953 		DBG_PRINTF((category, RSM_ERR,
1954 		    "rsm_open: bad minor %d\n", getminor(*devp)));
1955 		return (ENODEV);
1956 	}
1957 
1958 	if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1959 		DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1960 		return (EPERM);
1961 	}
1962 
1963 	if (!(flag & FWRITE)) {
1964 		/*
1965 		 * The library function _rsm_librsm_init calls open for
1966 		 * /dev/rsm with flag set to O_RDONLY.  We want a valid
1967 		 * file descriptor to be returned for minor device zero.
1968 		 */
1969 
1970 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1971 		    "rsm_open RDONLY done\n"));
1972 		return (DDI_SUCCESS);
1973 	}
1974 
1975 	/*
1976 	 * - allocate new minor number and segment.
1977 	 * - add segment to list of all segments.
1978 	 * - set minordev data to segment
1979 	 * - update devp argument to new device
1980 	 * - update s_cred to cred; make sure you do crhold(cred);
1981 	 */
1982 
1983 	/* allocate a new resource number */
1984 	if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1985 		/*
1986 		 * We will bind this minor to a specific resource in first
1987 		 * ioctl
1988 		 */
1989 		*devp = makedevice(getmajor(*devp), rnum);
1990 	} else {
1991 		return (EAGAIN);
1992 	}
1993 
1994 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1995 	return (DDI_SUCCESS);
1996 }
1997 
1998 static void
1999 rsmseg_close(rsmseg_t *seg, int force_flag)
2000 {
2001 	int e = RSM_SUCCESS;
2002 
2003 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2004 
2005 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2006 
2007 	rsmseglock_acquire(seg);
2008 	if (!force_flag && (seg->s_hdr.rsmrc_type ==
2009 	    RSM_RESOURCE_EXPORT_SEGMENT)) {
2010 		/*
2011 		 * If we are processing rsm_close wait for force_destroy
2012 		 * processing to complete since force_destroy processing
2013 		 * needs to finish first before we can free the segment.
2014 		 * force_destroy is only for export segments
2015 		 */
2016 		while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2017 			cv_wait(&seg->s_cv, &seg->s_lock);
2018 		}
2019 	}
2020 	rsmseglock_release(seg);
2021 
2022 	/* It's ok to read the state without a lock */
2023 	switch (seg->s_state) {
2024 	case RSM_STATE_EXPORT:
2025 	case RSM_STATE_EXPORT_QUIESCING:
2026 	case RSM_STATE_EXPORT_QUIESCED:
2027 		e = rsm_unpublish(seg, 1);
2028 		/* FALLTHRU */
2029 	case RSM_STATE_BIND_QUIESCED:
2030 		/* FALLTHRU */
2031 	case RSM_STATE_BIND:
2032 		e = rsm_unbind(seg);
2033 		if (e != RSM_SUCCESS && force_flag == 1)
2034 			return;
2035 		ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2036 		/* FALLTHRU */
2037 	case RSM_STATE_NEW_QUIESCED:
2038 		rsmseglock_acquire(seg);
2039 		seg->s_state = RSM_STATE_NEW;
2040 		cv_broadcast(&seg->s_cv);
2041 		rsmseglock_release(seg);
2042 		break;
2043 	case RSM_STATE_NEW:
2044 		break;
2045 	case RSM_STATE_ZOMBIE:
2046 		/*
2047 		 * Segments in this state have been removed off the
2048 		 * exported segments list and have been unpublished
2049 		 * and unbind. These segments have been removed during
2050 		 * a callback to the rsm_export_force_destroy, which
2051 		 * is called for the purpose of unlocking these
2052 		 * exported memory segments when a process exits but
2053 		 * leaves the segments locked down since rsm_close is
2054 		 * is not called for the segments. This can happen
2055 		 * when a process calls fork or exec and then exits.
2056 		 * Once the segments are in the ZOMBIE state, all that
2057 		 * remains is to destroy them when rsm_close is called.
2058 		 * This is done here. Thus, for such segments the
2059 		 * the state is changed to new so that later in this
2060 		 * function rsmseg_free is called.
2061 		 */
2062 		rsmseglock_acquire(seg);
2063 		seg->s_state = RSM_STATE_NEW;
2064 		rsmseglock_release(seg);
2065 		break;
2066 	case RSM_STATE_MAP_QUIESCE:
2067 	case RSM_STATE_ACTIVE:
2068 		/* Disconnect will handle the unmap */
2069 	case RSM_STATE_CONN_QUIESCE:
2070 	case RSM_STATE_CONNECT:
2071 	case RSM_STATE_DISCONNECT:
2072 		ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2073 		(void) rsm_disconnect(seg);
2074 		break;
2075 	case RSM_STATE_MAPPING:
2076 		/*FALLTHRU*/
2077 	case RSM_STATE_END:
2078 		DBG_PRINTF((category, RSM_ERR,
2079 		    "Invalid segment state %d in rsm_close\n", seg->s_state));
2080 		break;
2081 	default:
2082 		DBG_PRINTF((category, RSM_ERR,
2083 		    "Invalid segment state %d in rsm_close\n", seg->s_state));
2084 		break;
2085 	}
2086 
2087 	/*
2088 	 * check state.
2089 	 * - make sure you do crfree(s_cred);
2090 	 * release segment and minor number
2091 	 */
2092 	ASSERT(seg->s_state == RSM_STATE_NEW);
2093 
2094 	/*
2095 	 * The export_force_destroy callback is created to unlock
2096 	 * the exported segments of a process
2097 	 * when the process does a fork or exec and then exits calls this
2098 	 * function with the force flag set to 1 which indicates that the
2099 	 * segment state must be converted to ZOMBIE. This state means that the
2100 	 * segments still exist and have been unlocked and most importantly the
2101 	 * only operation allowed is to destroy them on an rsm_close.
2102 	 */
2103 	if (force_flag) {
2104 		rsmseglock_acquire(seg);
2105 		seg->s_state = RSM_STATE_ZOMBIE;
2106 		rsmseglock_release(seg);
2107 	} else {
2108 		rsmseg_free(seg);
2109 	}
2110 
2111 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2112 }
2113 
2114 static int
2115 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2116 {
2117 	minor_t	rnum = getminor(dev);
2118 	rsmresource_t *res;
2119 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2120 
2121 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2122 
2123 	flag = flag; cred = cred;
2124 
2125 	if (otyp != OTYP_CHR)
2126 		return (EINVAL);
2127 
2128 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2129 
2130 	/*
2131 	 * At this point we are the last reference to the resource.
2132 	 * Free resource number from resource table.
2133 	 * It's ok to remove number before we free the segment.
2134 	 * We need to lock the resource to protect against remote calls.
2135 	 */
2136 	if (rnum == RSM_DRIVER_MINOR ||
2137 	    (res = rsmresource_free(rnum)) == NULL) {
2138 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2139 		return (DDI_SUCCESS);
2140 	}
2141 
2142 	switch (res->rsmrc_type) {
2143 	case RSM_RESOURCE_EXPORT_SEGMENT:
2144 	case RSM_RESOURCE_IMPORT_SEGMENT:
2145 		rsmseg_close((rsmseg_t *)res, 0);
2146 		break;
2147 	case RSM_RESOURCE_BAR:
2148 		DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2149 		break;
2150 	default:
2151 		break;
2152 	}
2153 
2154 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2155 
2156 	return (DDI_SUCCESS);
2157 }
2158 
2159 /*
2160  * rsm_inc_pgcnt
2161  *
2162  * Description: increment rsm page counter.
2163  *
2164  * Parameters:	pgcnt_t	pnum;	number of pages to be used
2165  *
2166  * Returns:	RSM_SUCCESS	if memory limit not exceeded
2167  *		ENOSPC		if memory limit exceeded. In this case, the
2168  *				page counter remains unchanged.
2169  *
2170  */
2171 static int
2172 rsm_inc_pgcnt(pgcnt_t pnum)
2173 {
2174 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2175 	if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2176 		return (RSM_SUCCESS);
2177 	}
2178 
2179 	mutex_enter(&rsm_pgcnt_lock);
2180 
2181 	if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2182 		/* ensure that limits have not been exceeded */
2183 		mutex_exit(&rsm_pgcnt_lock);
2184 		return (RSMERR_INSUFFICIENT_MEM);
2185 	}
2186 
2187 	rsm_pgcnt += pnum;
2188 	DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2189 	    rsm_pgcnt));
2190 	mutex_exit(&rsm_pgcnt_lock);
2191 
2192 	return (RSM_SUCCESS);
2193 }
2194 
2195 /*
2196  * rsm_dec_pgcnt
2197  *
2198  * Description:	decrement rsm page counter.
2199  *
2200  * Parameters:	pgcnt_t	pnum;	number of pages freed
2201  *
2202  */
2203 static void
2204 rsm_dec_pgcnt(pgcnt_t pnum)
2205 {
2206 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2207 
2208 	if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2209 		return;
2210 	}
2211 
2212 	mutex_enter(&rsm_pgcnt_lock);
2213 	ASSERT(rsm_pgcnt >= pnum);
2214 	rsm_pgcnt -= pnum;
2215 	DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2216 	    rsm_pgcnt));
2217 	mutex_exit(&rsm_pgcnt_lock);
2218 }
2219 
2220 static struct umem_callback_ops rsm_as_ops = {
2221 	UMEM_CALLBACK_VERSION, /* version number */
2222 	rsm_export_force_destroy,
2223 };
2224 
2225 static int
2226 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2227     proc_t *procp)
2228 {
2229 	int error = RSM_SUCCESS;
2230 	ulong_t pnum;
2231 	struct umem_callback_ops *callbackops = &rsm_as_ops;
2232 
2233 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2234 
2235 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2236 
2237 	/*
2238 	 * Make sure vaddr and len are aligned on a page boundary
2239 	 */
2240 	if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2241 		return (RSMERR_BAD_ADDR);
2242 	}
2243 
2244 	if (len & (PAGESIZE - 1)) {
2245 		return (RSMERR_BAD_LENGTH);
2246 	}
2247 
2248 	/*
2249 	 * Find number of pages
2250 	 */
2251 	pnum = btopr(len);
2252 	error = rsm_inc_pgcnt(pnum);
2253 	if (error != RSM_SUCCESS) {
2254 		DBG_PRINTF((category, RSM_ERR,
2255 		    "rsm_bind_pages:mem limit exceeded\n"));
2256 		return (RSMERR_INSUFFICIENT_MEM);
2257 	}
2258 
2259 	error = umem_lockmemory(vaddr, len,
2260 	    DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2261 	    cookie,
2262 	    callbackops, procp);
2263 
2264 	if (error) {
2265 		rsm_dec_pgcnt(pnum);
2266 		DBG_PRINTF((category, RSM_ERR,
2267 		    "rsm_bind_pages:ddi_umem_lock failed\n"));
2268 		/*
2269 		 * ddi_umem_lock, in the case of failure, returns one of
2270 		 * the following three errors. These are translated into
2271 		 * the RSMERR namespace and returned.
2272 		 */
2273 		if (error == EFAULT)
2274 			return (RSMERR_BAD_ADDR);
2275 		else if (error == EACCES)
2276 			return (RSMERR_PERM_DENIED);
2277 		else
2278 			return (RSMERR_INSUFFICIENT_MEM);
2279 	}
2280 
2281 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2282 
2283 	return (error);
2284 
2285 }
2286 
2287 static int
2288 rsm_unbind_pages(rsmseg_t *seg)
2289 {
2290 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2291 
2292 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2293 
2294 	ASSERT(rsmseglock_held(seg));
2295 
2296 	if (seg->s_cookie != NULL) {
2297 		/* unlock address range */
2298 		ddi_umem_unlock(seg->s_cookie);
2299 		rsm_dec_pgcnt(btopr(seg->s_len));
2300 		seg->s_cookie = NULL;
2301 	}
2302 
2303 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2304 
2305 	return (RSM_SUCCESS);
2306 }
2307 
2308 
2309 static int
2310 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2311 {
2312 	int e;
2313 	adapter_t *adapter;
2314 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2315 
2316 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2317 
2318 	adapter = rsm_getadapter(msg, mode);
2319 	if (adapter == NULL) {
2320 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2321 		    "rsm_bind done:no adapter\n"));
2322 		return (RSMERR_CTLR_NOT_PRESENT);
2323 	}
2324 
2325 	/* lock address range */
2326 	if (msg->vaddr == NULL) {
2327 		rsmka_release_adapter(adapter);
2328 		DBG_PRINTF((category, RSM_ERR,
2329 		    "rsm: rsm_bind done: invalid vaddr\n"));
2330 		return (RSMERR_BAD_ADDR);
2331 	}
2332 	if (msg->len <= 0) {
2333 		rsmka_release_adapter(adapter);
2334 		DBG_PRINTF((category, RSM_ERR,
2335 		    "rsm_bind: invalid length\n"));
2336 		return (RSMERR_BAD_LENGTH);
2337 	}
2338 
2339 	/* Lock segment */
2340 	rsmseglock_acquire(seg);
2341 
2342 	while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2343 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2344 			DBG_PRINTF((category, RSM_DEBUG,
2345 			    "rsm_bind done: cv_wait INTERRUPTED"));
2346 			rsmka_release_adapter(adapter);
2347 			rsmseglock_release(seg);
2348 			return (RSMERR_INTERRUPTED);
2349 		}
2350 	}
2351 
2352 	ASSERT(seg->s_state == RSM_STATE_NEW);
2353 
2354 	ASSERT(seg->s_cookie == NULL);
2355 
2356 	e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2357 	if (e == RSM_SUCCESS) {
2358 		seg->s_flags |= RSM_USER_MEMORY;
2359 		if (msg->perm & RSM_ALLOW_REBIND) {
2360 			seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2361 		}
2362 		if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2363 			seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2364 		}
2365 		seg->s_region.r_vaddr = msg->vaddr;
2366 		/*
2367 		 * Set the s_pid value in the segment structure. This is used
2368 		 * to identify exported segments belonging to a particular
2369 		 * process so that when the process exits, these segments can
2370 		 * be unlocked forcefully even if rsm_close is not called on
2371 		 * process exit since there maybe other processes referencing
2372 		 * them (for example on a fork or exec).
2373 		 * The s_pid value is also used to authenticate the process
2374 		 * doing a publish or unpublish on the export segment. Only
2375 		 * the creator of the export segment has a right to do a
2376 		 * publish or unpublish and unbind on the segment.
2377 		 */
2378 		seg->s_pid = ddi_get_pid();
2379 		seg->s_len = msg->len;
2380 		seg->s_state = RSM_STATE_BIND;
2381 		seg->s_adapter = adapter;
2382 		seg->s_proc = curproc;
2383 	} else {
2384 		rsmka_release_adapter(adapter);
2385 		DBG_PRINTF((category, RSM_WARNING,
2386 		    "unable to lock down pages\n"));
2387 	}
2388 
2389 	msg->rnum = seg->s_minor;
2390 	/* Unlock segment */
2391 	rsmseglock_release(seg);
2392 
2393 	if (e == RSM_SUCCESS) {
2394 		/* copyout the resource number */
2395 #ifdef _MULTI_DATAMODEL
2396 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2397 			rsm_ioctlmsg32_t msg32;
2398 
2399 			msg32.rnum = msg->rnum;
2400 			if (ddi_copyout((caddr_t)&msg32.rnum,
2401 			    (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2402 			    sizeof (minor_t), mode)) {
2403 				rsmka_release_adapter(adapter);
2404 				e = RSMERR_BAD_ADDR;
2405 			}
2406 		}
2407 #endif
2408 		if (ddi_copyout((caddr_t)&msg->rnum,
2409 		    (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2410 		    sizeof (minor_t), mode)) {
2411 			rsmka_release_adapter(adapter);
2412 			e = RSMERR_BAD_ADDR;
2413 		}
2414 	}
2415 
2416 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2417 
2418 	return (e);
2419 }
2420 
2421 static void
2422 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2423     rsm_memseg_id_t ex_segid,
2424     ddi_umem_cookie_t cookie)
2425 
2426 {
2427 	rsmresource_t	*p = NULL;
2428 	rsmhash_table_t *rhash = &rsm_import_segs;
2429 	uint_t		index;
2430 
2431 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2432 	    "rsm_remap_local_importers enter\n"));
2433 
2434 	index = rsmhash(ex_segid);
2435 
2436 	rw_enter(&rhash->rsmhash_rw, RW_READER);
2437 
2438 	p = rsmhash_getbkt(rhash, index);
2439 
2440 	for (; p; p = p->rsmrc_next) {
2441 		rsmseg_t *seg = (rsmseg_t *)p;
2442 		rsmseglock_acquire(seg);
2443 		/*
2444 		 * Change the s_cookie value of only the local importers
2445 		 * which have been mapped (in state RSM_STATE_ACTIVE).
2446 		 * Note that there is no need to change the s_cookie value
2447 		 * if the imported segment is in RSM_STATE_MAPPING since
2448 		 * eventually the s_cookie will be updated via the mapping
2449 		 * functionality.
2450 		 */
2451 		if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2452 		    (seg->s_state == RSM_STATE_ACTIVE)) {
2453 			seg->s_cookie = cookie;
2454 		}
2455 		rsmseglock_release(seg);
2456 	}
2457 	rw_exit(&rhash->rsmhash_rw);
2458 
2459 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2460 	    "rsm_remap_local_importers done\n"));
2461 }
2462 
2463 static int
2464 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2465 {
2466 	int e;
2467 	adapter_t *adapter;
2468 	ddi_umem_cookie_t cookie;
2469 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2470 
2471 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2472 
2473 	/* Check for permissions to rebind */
2474 	if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2475 		return (RSMERR_REBIND_NOT_ALLOWED);
2476 	}
2477 
2478 	if (seg->s_pid != ddi_get_pid() &&
2479 	    ddi_get_pid() != 0) {
2480 		DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2481 		return (RSMERR_NOT_CREATOR);
2482 	}
2483 
2484 	/*
2485 	 * We will not be allowing partial rebind and hence length passed
2486 	 * in must be same as segment length
2487 	 */
2488 	if (msg->vaddr == NULL) {
2489 		DBG_PRINTF((category, RSM_ERR,
2490 		    "rsm_rebind done: null msg->vaddr\n"));
2491 		return (RSMERR_BAD_ADDR);
2492 	}
2493 	if (msg->len != seg->s_len) {
2494 		DBG_PRINTF((category, RSM_ERR,
2495 		    "rsm_rebind: invalid length\n"));
2496 		return (RSMERR_BAD_LENGTH);
2497 	}
2498 
2499 	/* Lock segment */
2500 	rsmseglock_acquire(seg);
2501 
2502 	while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2503 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2504 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2505 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2506 			rsmseglock_release(seg);
2507 			DBG_PRINTF((category, RSM_DEBUG,
2508 			    "rsm_rebind done: cv_wait INTERRUPTED"));
2509 			return (RSMERR_INTERRUPTED);
2510 		}
2511 	}
2512 
2513 	/* verify segment state */
2514 	if ((seg->s_state != RSM_STATE_BIND) &&
2515 	    (seg->s_state != RSM_STATE_EXPORT)) {
2516 		/* Unlock segment */
2517 		rsmseglock_release(seg);
2518 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2519 		    "rsm_rebind done: invalid state\n"));
2520 		return (RSMERR_BAD_SEG_HNDL);
2521 	}
2522 
2523 	ASSERT(seg->s_cookie != NULL);
2524 
2525 	if (msg->vaddr == seg->s_region.r_vaddr) {
2526 		rsmseglock_release(seg);
2527 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2528 		return (RSM_SUCCESS);
2529 	}
2530 
2531 	e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2532 	if (e == RSM_SUCCESS) {
2533 		struct buf *xbuf;
2534 		dev_t sdev = 0;
2535 		rsm_memory_local_t mem;
2536 
2537 		xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2538 		    sdev, 0, NULL, DDI_UMEM_SLEEP);
2539 		ASSERT(xbuf != NULL);
2540 
2541 		mem.ms_type = RSM_MEM_BUF;
2542 		mem.ms_bp = xbuf;
2543 
2544 		adapter = seg->s_adapter;
2545 		e = adapter->rsmpi_ops->rsm_rebind(
2546 		    seg->s_handle.out, 0, &mem,
2547 		    RSM_RESOURCE_DONTWAIT, NULL);
2548 
2549 		if (e == RSM_SUCCESS) {
2550 			/*
2551 			 * unbind the older pages, and unload local importers;
2552 			 * but don't disconnect importers
2553 			 */
2554 			(void) rsm_unbind_pages(seg);
2555 			seg->s_cookie = cookie;
2556 			seg->s_region.r_vaddr = msg->vaddr;
2557 			rsm_remap_local_importers(my_nodeid, seg->s_segid,
2558 			    cookie);
2559 		} else {
2560 			/*
2561 			 * Unbind the pages associated with "cookie" by the
2562 			 * rsm_bind_pages calls prior to this. This is
2563 			 * similar to what is done in the rsm_unbind_pages
2564 			 * routine for the seg->s_cookie.
2565 			 */
2566 			ddi_umem_unlock(cookie);
2567 			rsm_dec_pgcnt(btopr(msg->len));
2568 			DBG_PRINTF((category, RSM_ERR,
2569 			    "rsm_rebind failed with %d\n", e));
2570 		}
2571 		/*
2572 		 * At present there is no dependency on the existence of xbuf.
2573 		 * So we can free it here. If in the future this changes, it can
2574 		 * be freed sometime during the segment destroy.
2575 		 */
2576 		freerbuf(xbuf);
2577 	}
2578 
2579 	/* Unlock segment */
2580 	rsmseglock_release(seg);
2581 
2582 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2583 
2584 	return (e);
2585 }
2586 
2587 static int
2588 rsm_unbind(rsmseg_t *seg)
2589 {
2590 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2591 
2592 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2593 
2594 	rsmseglock_acquire(seg);
2595 
2596 	/* verify segment state */
2597 	if ((seg->s_state != RSM_STATE_BIND) &&
2598 	    (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2599 		rsmseglock_release(seg);
2600 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2601 		    "rsm_unbind: invalid state\n"));
2602 		return (RSMERR_BAD_SEG_HNDL);
2603 	}
2604 
2605 	/* unlock current range */
2606 	(void) rsm_unbind_pages(seg);
2607 
2608 	if (seg->s_state == RSM_STATE_BIND) {
2609 		seg->s_state = RSM_STATE_NEW;
2610 	} else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2611 		seg->s_state = RSM_STATE_NEW_QUIESCED;
2612 	}
2613 
2614 	rsmseglock_release(seg);
2615 
2616 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2617 
2618 	return (RSM_SUCCESS);
2619 }
2620 
2621 /* **************************** Exporter Access List Management ******* */
2622 static void
2623 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2624 {
2625 	int	acl_sz;
2626 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2627 
2628 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2629 
2630 	/* acl could be NULL */
2631 
2632 	if (acl != NULL && acl_len > 0) {
2633 		acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2634 		kmem_free((void *)acl, acl_sz);
2635 	}
2636 
2637 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2638 }
2639 
2640 static void
2641 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2642 {
2643 	int	acl_sz;
2644 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2645 
2646 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2647 
2648 	if (acl != NULL && acl_len > 0) {
2649 		acl_sz = acl_len * sizeof (rsm_access_entry_t);
2650 		kmem_free((void *)acl, acl_sz);
2651 	}
2652 
2653 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2654 
2655 }
2656 
2657 static int
2658 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2659     rsmapi_access_entry_t **list, int *len, int loopback)
2660 {
2661 	rsmapi_access_entry_t *acl;
2662 	int	acl_len;
2663 	int i;
2664 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2665 
2666 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2667 
2668 	*len = 0;
2669 	*list = NULL;
2670 
2671 	acl_len = msg->acl_len;
2672 	if ((loopback && acl_len > 1) || (acl_len < 0) ||
2673 	    (acl_len > MAX_NODES)) {
2674 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2675 		    "rsmacl_build done: acl invalid\n"));
2676 		return (RSMERR_BAD_ACL);
2677 	}
2678 
2679 	if (acl_len > 0 && acl_len <= MAX_NODES) {
2680 		size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2681 
2682 		acl = kmem_alloc(acl_size, KM_SLEEP);
2683 
2684 		if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2685 		    acl_size, mode)) {
2686 			kmem_free((void *) acl, acl_size);
2687 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2688 			    "rsmacl_build done: BAD_ADDR\n"));
2689 			return (RSMERR_BAD_ADDR);
2690 		}
2691 
2692 		/*
2693 		 * Verify access list
2694 		 */
2695 		for (i = 0; i < acl_len; i++) {
2696 			if (acl[i].ae_node > MAX_NODES ||
2697 			    (loopback && (acl[i].ae_node != my_nodeid)) ||
2698 			    acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2699 				/* invalid entry */
2700 				kmem_free((void *) acl, acl_size);
2701 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2702 				    "rsmacl_build done: EINVAL\n"));
2703 				return (RSMERR_BAD_ACL);
2704 			}
2705 		}
2706 
2707 		*len = acl_len;
2708 		*list = acl;
2709 	}
2710 
2711 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2712 
2713 	return (DDI_SUCCESS);
2714 }
2715 
2716 static int
2717 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2718     int acl_len, adapter_t *adapter)
2719 {
2720 	rsm_access_entry_t *acl;
2721 	rsm_addr_t hwaddr;
2722 	int i;
2723 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2724 
2725 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2726 
2727 	if (src != NULL) {
2728 		size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2729 		acl = kmem_alloc(acl_size, KM_SLEEP);
2730 
2731 		/*
2732 		 * translate access list
2733 		 */
2734 		for (i = 0; i < acl_len; i++) {
2735 			if (src[i].ae_node == my_nodeid) {
2736 				acl[i].ae_addr = adapter->hwaddr;
2737 			} else {
2738 				hwaddr = get_remote_hwaddr(adapter,
2739 				    src[i].ae_node);
2740 				if ((int64_t)hwaddr < 0) {
2741 					/* invalid hwaddr */
2742 					kmem_free((void *) acl, acl_size);
2743 					DBG_PRINTF((category,
2744 					    RSM_DEBUG_VERBOSE,
2745 					    "rsmpiacl_create done:"
2746 					    "EINVAL hwaddr\n"));
2747 					return (RSMERR_INTERNAL_ERROR);
2748 				}
2749 				acl[i].ae_addr = hwaddr;
2750 			}
2751 			/* rsmpi understands only RSM_PERM_XXXX */
2752 			acl[i].ae_permission =
2753 			    src[i].ae_permission & RSM_PERM_RDWR;
2754 		}
2755 		*dest = acl;
2756 	} else {
2757 		*dest = NULL;
2758 	}
2759 
2760 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2761 
2762 	return (RSM_SUCCESS);
2763 }
2764 
2765 static int
2766 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2767     rsmipc_reply_t *reply)
2768 {
2769 
2770 	int		i;
2771 	rsmseg_t	*seg;
2772 	rsm_memseg_id_t key = req->rsmipc_key;
2773 	rsm_permission_t perm = req->rsmipc_perm;
2774 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2775 
2776 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2777 	    "rsmsegacl_validate enter\n"));
2778 
2779 	/*
2780 	 * Find segment and grab its lock. The reason why we grab the segment
2781 	 * lock in side the search is to avoid the race when the segment is
2782 	 * being deleted and we already have a pointer to it.
2783 	 */
2784 	seg = rsmexport_lookup(key);
2785 	if (!seg) {
2786 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2787 		    "rsmsegacl_validate done: %u ENXIO\n", key));
2788 		return (RSMERR_SEG_NOT_PUBLISHED);
2789 	}
2790 
2791 	ASSERT(rsmseglock_held(seg));
2792 	ASSERT(seg->s_state == RSM_STATE_EXPORT);
2793 
2794 	/*
2795 	 * We implement a 2-level protection scheme.
2796 	 * First, we check if local/remote host has access rights.
2797 	 * Second, we check if the user has access rights.
2798 	 *
2799 	 * This routine only validates the rnode access_list
2800 	 */
2801 	if (seg->s_acl_len > 0) {
2802 		/*
2803 		 * Check host access list
2804 		 */
2805 		ASSERT(seg->s_acl != NULL);
2806 		for (i = 0; i < seg->s_acl_len; i++) {
2807 			if (seg->s_acl[i].ae_node == rnode) {
2808 			    perm &= seg->s_acl[i].ae_permission;
2809 			    goto found;
2810 			}
2811 		}
2812 		/* rnode is not found in the list */
2813 		rsmseglock_release(seg);
2814 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2815 		    "rsmsegacl_validate done: EPERM\n"));
2816 		return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2817 	} else {
2818 		/* use default owner creation umask */
2819 		perm &= seg->s_mode;
2820 	}
2821 
2822 found:
2823 	/* update perm for this node */
2824 	reply->rsmipc_mode = perm;
2825 	reply->rsmipc_uid = seg->s_uid;
2826 	reply->rsmipc_gid = seg->s_gid;
2827 	reply->rsmipc_segid = seg->s_segid;
2828 	reply->rsmipc_seglen = seg->s_len;
2829 
2830 	/*
2831 	 * Perm of requesting node is valid; source will validate user
2832 	 */
2833 	rsmseglock_release(seg);
2834 
2835 	/*
2836 	 * Add the importer to the list right away, if connect fails
2837 	 * the importer will ask the exporter to remove it.
2838 	 */
2839 	importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2840 	    req->rsmipc_segment_cookie);
2841 
2842 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2843 
2844 	return (RSM_SUCCESS);
2845 }
2846 
2847 
2848 /* ************************** Exporter Calls ************************* */
2849 
2850 static int
2851 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2852 {
2853 	int			e;
2854 	int			acl_len;
2855 	rsmapi_access_entry_t	*acl;
2856 	rsm_access_entry_t	*rsmpi_acl;
2857 	rsm_memory_local_t	mem;
2858 	struct buf		*xbuf;
2859 	dev_t 			sdev = 0;
2860 	adapter_t		*adapter;
2861 	rsm_memseg_id_t		segment_id = 0;
2862 	int			loopback_flag = 0;
2863 	int			create_flags = 0;
2864 	rsm_resource_callback_t	callback_flag;
2865 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2866 
2867 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2868 
2869 	if (seg->s_adapter == &loopback_adapter)
2870 		loopback_flag = 1;
2871 
2872 	if (seg->s_pid != ddi_get_pid() &&
2873 	    ddi_get_pid() != 0) {
2874 		DBG_PRINTF((category, RSM_ERR,
2875 		    "rsm_publish: Not creator\n"));
2876 		return (RSMERR_NOT_CREATOR);
2877 	}
2878 
2879 	/*
2880 	 * Get per node access list
2881 	 */
2882 	e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2883 	if (e != DDI_SUCCESS) {
2884 		DBG_PRINTF((category, RSM_ERR,
2885 		    "rsm_publish done: rsmacl_build failed\n"));
2886 		return (e);
2887 	}
2888 
2889 	/*
2890 	 * The application provided msg->key is used for resolving a
2891 	 * segment id according to the following:
2892 	 *    key = 0   		Kernel Agent selects the segment id
2893 	 *    key <= RSM_DLPI_ID_END	Reserved for system usage except
2894 	 *				RSMLIB range
2895 	 *    key < RSM_USER_APP_ID_BASE segment id = key
2896 	 *    key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2897 	 *
2898 	 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2899 	 * overflows to zero after 0x80000000 allocations.
2900 	 * An algorithm is needed which allows reinitialization and provides
2901 	 * for reallocation after overflow.  For now, ENOMEM is returned
2902 	 * once the overflow condition has occurred.
2903 	 */
2904 	if (msg->key == 0) {
2905 		mutex_enter(&rsm_lock);
2906 		segment_id = rsm_nextavail_segmentid;
2907 		if (segment_id != 0) {
2908 			rsm_nextavail_segmentid++;
2909 			mutex_exit(&rsm_lock);
2910 		} else {
2911 			mutex_exit(&rsm_lock);
2912 			DBG_PRINTF((category, RSM_ERR,
2913 			    "rsm_publish done: no more keys avlbl\n"));
2914 			return (RSMERR_INSUFFICIENT_RESOURCES);
2915 		}
2916 	} else	if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2917 		/* range reserved for internal use by base/ndi libraries */
2918 		segment_id = msg->key;
2919 	else	if (msg->key <= RSM_DLPI_ID_END)
2920 		return (RSMERR_RESERVED_SEGID);
2921 	else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2922 		segment_id = msg->key;
2923 	else {
2924 		DBG_PRINTF((category, RSM_ERR,
2925 		    "rsm_publish done: invalid key %u\n", msg->key));
2926 		return (RSMERR_RESERVED_SEGID);
2927 	}
2928 
2929 	/* Add key to exportlist; The segment lock is held on success */
2930 	e = rsmexport_add(seg, segment_id);
2931 	if (e) {
2932 		rsmacl_free(acl, acl_len);
2933 		DBG_PRINTF((category, RSM_ERR,
2934 		    "rsm_publish done: export_add failed: %d\n", e));
2935 		return (e);
2936 	}
2937 
2938 	seg->s_segid = segment_id;
2939 
2940 	if ((seg->s_state != RSM_STATE_BIND) &&
2941 	    (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2942 		/* state changed since then, free acl and return */
2943 		rsmseglock_release(seg);
2944 		rsmexport_rm(seg);
2945 		rsmacl_free(acl, acl_len);
2946 		DBG_PRINTF((category, RSM_ERR,
2947 		    "rsm_publish done: segment in wrong state: %d\n",
2948 		    seg->s_state));
2949 		return (RSMERR_BAD_SEG_HNDL);
2950 	}
2951 
2952 	/*
2953 	 * If this is for a local memory handle and permissions are zero,
2954 	 * then the surrogate segment is very large and we want to skip
2955 	 * allocation of DVMA space.
2956 	 *
2957 	 * Careful!  If the user didn't use an ACL list, acl will be a NULL
2958 	 * pointer.  Check that before dereferencing it.
2959 	 */
2960 	if (acl != (rsmapi_access_entry_t *)NULL) {
2961 		if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2962 			goto skipdriver;
2963 	}
2964 
2965 	/* create segment  */
2966 	xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2967 	    sdev, 0, NULL, DDI_UMEM_SLEEP);
2968 	ASSERT(xbuf != NULL);
2969 
2970 	mem.ms_type = RSM_MEM_BUF;
2971 	mem.ms_bp = xbuf;
2972 
2973 	/* This call includes a bind operations */
2974 
2975 	adapter = seg->s_adapter;
2976 	/*
2977 	 * create a acl list with hwaddr for RSMPI publish
2978 	 */
2979 	e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2980 
2981 	if (e != RSM_SUCCESS) {
2982 		rsmseglock_release(seg);
2983 		rsmexport_rm(seg);
2984 		rsmacl_free(acl, acl_len);
2985 		freerbuf(xbuf);
2986 		DBG_PRINTF((category, RSM_ERR,
2987 		    "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2988 		return (e);
2989 	}
2990 
2991 	if (seg->s_state == RSM_STATE_BIND) {
2992 		/* create segment  */
2993 
2994 		/* This call includes a bind operations */
2995 
2996 		if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2997 			create_flags = RSM_ALLOW_UNBIND_REBIND;
2998 		}
2999 
3000 		if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
3001 			callback_flag  = RSM_RESOURCE_DONTWAIT;
3002 		} else {
3003 			callback_flag  = RSM_RESOURCE_SLEEP;
3004 		}
3005 
3006 		e = adapter->rsmpi_ops->rsm_seg_create(
3007 		    adapter->rsmpi_handle,
3008 		    &seg->s_handle.out, seg->s_len,
3009 		    create_flags, &mem,
3010 		    callback_flag, NULL);
3011 		/*
3012 		 * At present there is no dependency on the existence of xbuf.
3013 		 * So we can free it here. If in the future this changes, it can
3014 		 * be freed sometime during the segment destroy.
3015 		 */
3016 		freerbuf(xbuf);
3017 
3018 		if (e != RSM_SUCCESS) {
3019 			rsmseglock_release(seg);
3020 			rsmexport_rm(seg);
3021 			rsmacl_free(acl, acl_len);
3022 			rsmpiacl_free(rsmpi_acl, acl_len);
3023 			DBG_PRINTF((category, RSM_ERR,
3024 			    "rsm_publish done: export_create failed: %d\n", e));
3025 			/*
3026 			 * The following assertion ensures that the two errors
3027 			 * related to the length and its alignment do not occur
3028 			 * since they have been checked during export_create
3029 			 */
3030 			ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3031 			    e != RSMERR_BAD_LENGTH);
3032 			if (e == RSMERR_NOT_MEM)
3033 				e = RSMERR_INSUFFICIENT_MEM;
3034 
3035 			return (e);
3036 		}
3037 		/* export segment, this should create an IMMU mapping */
3038 		e = adapter->rsmpi_ops->rsm_publish(
3039 		    seg->s_handle.out,
3040 		    rsmpi_acl, acl_len,
3041 		    seg->s_segid,
3042 		    RSM_RESOURCE_DONTWAIT, NULL);
3043 
3044 		if (e != RSM_SUCCESS) {
3045 			adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3046 			rsmseglock_release(seg);
3047 			rsmexport_rm(seg);
3048 			rsmacl_free(acl, acl_len);
3049 			rsmpiacl_free(rsmpi_acl, acl_len);
3050 			DBG_PRINTF((category, RSM_ERR,
3051 			    "rsm_publish done: export_publish failed: %d\n",
3052 			    e));
3053 			return (e);
3054 		}
3055 	}
3056 
3057 	seg->s_acl_in = rsmpi_acl;
3058 
3059 skipdriver:
3060 	/* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3061 	seg->s_acl_len	= acl_len;
3062 	seg->s_acl	= acl;
3063 
3064 	if (seg->s_state == RSM_STATE_BIND) {
3065 		seg->s_state = RSM_STATE_EXPORT;
3066 	} else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3067 		seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3068 		cv_broadcast(&seg->s_cv);
3069 	}
3070 
3071 	rsmseglock_release(seg);
3072 
3073 	/*
3074 	 * If the segment id was solicited, then return it in
3075 	 * the original incoming message.
3076 	 */
3077 	if (msg->key == 0) {
3078 		msg->key = segment_id;
3079 #ifdef _MULTI_DATAMODEL
3080 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3081 			rsm_ioctlmsg32_t msg32;
3082 
3083 			msg32.key = msg->key;
3084 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3085 			    "rsm_publish done\n"));
3086 			return (ddi_copyout((caddr_t)&msg32,
3087 			    (caddr_t)dataptr, sizeof (msg32), mode));
3088 		}
3089 #endif
3090 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3091 		    "rsm_publish done\n"));
3092 		return (ddi_copyout((caddr_t)msg,
3093 		    (caddr_t)dataptr, sizeof (*msg), mode));
3094 	}
3095 
3096 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3097 	return (DDI_SUCCESS);
3098 }
3099 
3100 /*
3101  * This function modifies the access control list of an already published
3102  * segment.  There is no effect on import segments which are already
3103  * connected.
3104  */
3105 static int
3106 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3107 {
3108 	rsmapi_access_entry_t	*new_acl, *old_acl, *tmp_acl;
3109 	rsm_access_entry_t	*rsmpi_new_acl, *rsmpi_old_acl;
3110 	int			new_acl_len, old_acl_len, tmp_acl_len;
3111 	int			e, i;
3112 	adapter_t		*adapter;
3113 	int			loopback_flag = 0;
3114 	rsm_memseg_id_t		key;
3115 	rsm_permission_t	permission;
3116 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3117 
3118 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3119 
3120 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3121 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3122 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3123 		return (RSMERR_SEG_NOT_PUBLISHED);
3124 
3125 	if (seg->s_pid != ddi_get_pid() &&
3126 	    ddi_get_pid() != 0) {
3127 		DBG_PRINTF((category, RSM_ERR,
3128 		    "rsm_republish: Not owner\n"));
3129 		return (RSMERR_NOT_CREATOR);
3130 	}
3131 
3132 	if (seg->s_adapter == &loopback_adapter)
3133 		loopback_flag = 1;
3134 
3135 	/*
3136 	 * Build new list first
3137 	 */
3138 	e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3139 	if (e) {
3140 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3141 		    "rsm_republish done: rsmacl_build failed %d", e));
3142 		return (e);
3143 	}
3144 
3145 	/* Lock segment */
3146 	rsmseglock_acquire(seg);
3147 	/*
3148 	 * a republish is in progress - REPUBLISH message is being
3149 	 * sent to the importers so wait for it to complete OR
3150 	 * wait till DR completes
3151 	 */
3152 	while (((seg->s_state == RSM_STATE_EXPORT) &&
3153 	    (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3154 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3155 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3156 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3157 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3158 			    "rsm_republish done: cv_wait  INTERRUPTED"));
3159 			rsmseglock_release(seg);
3160 			rsmacl_free(new_acl, new_acl_len);
3161 			return (RSMERR_INTERRUPTED);
3162 		}
3163 	}
3164 
3165 	/* recheck if state is valid */
3166 	if (seg->s_state != RSM_STATE_EXPORT) {
3167 		rsmseglock_release(seg);
3168 		rsmacl_free(new_acl, new_acl_len);
3169 		return (RSMERR_SEG_NOT_PUBLISHED);
3170 	}
3171 
3172 	key = seg->s_key;
3173 	old_acl = seg->s_acl;
3174 	old_acl_len = seg->s_acl_len;
3175 
3176 	seg->s_acl = new_acl;
3177 	seg->s_acl_len = new_acl_len;
3178 
3179 	/*
3180 	 * This call will only be meaningful if and when the interconnect
3181 	 * layer makes use of the access list
3182 	 */
3183 	adapter = seg->s_adapter;
3184 	/*
3185 	 * create a acl list with hwaddr for RSMPI publish
3186 	 */
3187 	e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3188 
3189 	if (e != RSM_SUCCESS) {
3190 		seg->s_acl = old_acl;
3191 		seg->s_acl_len = old_acl_len;
3192 		rsmseglock_release(seg);
3193 		rsmacl_free(new_acl, new_acl_len);
3194 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3195 		    "rsm_republish done: rsmpiacl_create failed %d", e));
3196 		return (e);
3197 	}
3198 	rsmpi_old_acl = seg->s_acl_in;
3199 	seg->s_acl_in = rsmpi_new_acl;
3200 
3201 	e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3202 	    seg->s_acl_in, seg->s_acl_len,
3203 	    RSM_RESOURCE_DONTWAIT, NULL);
3204 
3205 	if (e != RSM_SUCCESS) {
3206 		seg->s_acl = old_acl;
3207 		seg->s_acl_in = rsmpi_old_acl;
3208 		seg->s_acl_len = old_acl_len;
3209 		rsmseglock_release(seg);
3210 		rsmacl_free(new_acl, new_acl_len);
3211 		rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3212 
3213 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3214 		    "rsm_republish done: rsmpi republish failed %d\n", e));
3215 		return (e);
3216 	}
3217 
3218 	/* create a tmp copy of the new acl */
3219 	tmp_acl_len = new_acl_len;
3220 	if (tmp_acl_len > 0) {
3221 		tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3222 		for (i = 0; i < tmp_acl_len; i++) {
3223 			tmp_acl[i].ae_node = new_acl[i].ae_node;
3224 			tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3225 		}
3226 		/*
3227 		 * The default permission of a node which was in the old
3228 		 * ACL but not in the new ACL is 0 ie no access.
3229 		 */
3230 		permission = 0;
3231 	} else {
3232 		/*
3233 		 * NULL acl means all importers can connect and
3234 		 * default permission will be owner creation umask
3235 		 */
3236 		tmp_acl = NULL;
3237 		permission = seg->s_mode;
3238 	}
3239 
3240 	/* make other republishers to wait for republish to complete */
3241 	seg->s_flags |= RSM_REPUBLISH_WAIT;
3242 
3243 	rsmseglock_release(seg);
3244 
3245 	/* send the new perms to the importing nodes */
3246 	rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3247 
3248 	rsmseglock_acquire(seg);
3249 	seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3250 	/* wake up any one waiting for republish to complete */
3251 	cv_broadcast(&seg->s_cv);
3252 	rsmseglock_release(seg);
3253 
3254 	rsmacl_free(tmp_acl, tmp_acl_len);
3255 	rsmacl_free(old_acl, old_acl_len);
3256 	rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3257 
3258 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3259 	return (DDI_SUCCESS);
3260 }
3261 
3262 static int
3263 rsm_unpublish(rsmseg_t *seg, int mode)
3264 {
3265 	rsmapi_access_entry_t	*acl;
3266 	rsm_access_entry_t	*rsmpi_acl;
3267 	int			acl_len;
3268 	int			e;
3269 	clock_t			ticks;
3270 	adapter_t *adapter;
3271 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3272 
3273 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3274 
3275 	if (seg->s_pid != ddi_get_pid() &&
3276 	    ddi_get_pid() != 0) {
3277 		DBG_PRINTF((category, RSM_ERR,
3278 		    "rsm_unpublish: Not creator\n"));
3279 		return (RSMERR_NOT_CREATOR);
3280 	}
3281 
3282 	rsmseglock_acquire(seg);
3283 	/*
3284 	 * wait for QUIESCING to complete here before rsmexport_rm
3285 	 * is called because the SUSPEND_COMPLETE mesg which changes
3286 	 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3287 	 * signals the cv_wait needs to find it in the hashtable.
3288 	 */
3289 	while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3290 	    ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3291 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3292 			rsmseglock_release(seg);
3293 			DBG_PRINTF((category, RSM_ERR,
3294 			    "rsm_unpublish done: cv_wait INTR qscing"
3295 			    "getv/putv in progress"));
3296 			return (RSMERR_INTERRUPTED);
3297 		}
3298 	}
3299 
3300 	/* verify segment state */
3301 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3302 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3303 		rsmseglock_release(seg);
3304 		DBG_PRINTF((category, RSM_ERR,
3305 		    "rsm_unpublish done: bad state %x\n", seg->s_state));
3306 		return (RSMERR_SEG_NOT_PUBLISHED);
3307 	}
3308 
3309 	rsmseglock_release(seg);
3310 
3311 	rsmexport_rm(seg);
3312 
3313 	rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3314 
3315 	rsmseglock_acquire(seg);
3316 	/*
3317 	 * wait for republish to complete
3318 	 */
3319 	while ((seg->s_state == RSM_STATE_EXPORT) &&
3320 	    (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3321 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3322 			DBG_PRINTF((category, RSM_ERR,
3323 			    "rsm_unpublish done: cv_wait INTR repubing"));
3324 			rsmseglock_release(seg);
3325 			return (RSMERR_INTERRUPTED);
3326 		}
3327 	}
3328 
3329 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3330 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3331 		DBG_PRINTF((category, RSM_ERR,
3332 		    "rsm_unpublish done: invalid state"));
3333 		rsmseglock_release(seg);
3334 		return (RSMERR_SEG_NOT_PUBLISHED);
3335 	}
3336 
3337 	/*
3338 	 * check for putv/get surrogate segment which was not published
3339 	 * to the driver.
3340 	 *
3341 	 * Be certain to see if there is an ACL first!  If this segment was
3342 	 * not published with an ACL, acl will be a null pointer.  Check
3343 	 * that before dereferencing it.
3344 	 */
3345 	acl = seg->s_acl;
3346 	if (acl != (rsmapi_access_entry_t *)NULL) {
3347 		if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3348 			goto bypass;
3349 	}
3350 
3351 	/* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3352 	if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3353 		goto bypass;
3354 
3355 	adapter = seg->s_adapter;
3356 	for (;;) {
3357 		if (seg->s_state != RSM_STATE_EXPORT) {
3358 			rsmseglock_release(seg);
3359 			DBG_PRINTF((category, RSM_ERR,
3360 			    "rsm_unpublish done: bad state %x\n",
3361 			    seg->s_state));
3362 			return (RSMERR_SEG_NOT_PUBLISHED);
3363 		}
3364 
3365 		/* unpublish from adapter */
3366 		e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3367 
3368 		if (e == RSM_SUCCESS) {
3369 			break;
3370 		}
3371 
3372 		if (e == RSMERR_SEG_IN_USE && mode == 1) {
3373 			/*
3374 			 * wait for unpublish to succeed, it's busy.
3375 			 */
3376 			seg->s_flags |= RSM_EXPORT_WAIT;
3377 
3378 			/* wait for a max of 1 ms - this is an empirical */
3379 			/* value that was found by some minimal testing  */
3380 			/* can be fine tuned when we have better numbers */
3381 			/* A long term fix would be to send cv_signal	 */
3382 			/* from the intr callback routine		 */
3383 			(void) drv_getparm(LBOLT, &ticks);
3384 			ticks += drv_usectohz(1000);
3385 			/* currently nobody signals this wait		*/
3386 			(void) cv_timedwait(&seg->s_cv, &seg->s_lock, ticks);
3387 
3388 			DBG_PRINTF((category, RSM_ERR,
3389 			    "rsm_unpublish: SEG_IN_USE\n"));
3390 
3391 			seg->s_flags &= ~RSM_EXPORT_WAIT;
3392 		} else {
3393 			if (mode == 1) {
3394 				DBG_PRINTF((category, RSM_ERR,
3395 				    "rsm:rsmpi unpublish err %x\n", e));
3396 				seg->s_state = RSM_STATE_BIND;
3397 			}
3398 			rsmseglock_release(seg);
3399 			return (e);
3400 		}
3401 	}
3402 
3403 	/* Free segment */
3404 	e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3405 
3406 	if (e != RSM_SUCCESS) {
3407 		DBG_PRINTF((category, RSM_ERR,
3408 		    "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3409 		    seg->s_key, e));
3410 	}
3411 
3412 bypass:
3413 	acl = seg->s_acl;
3414 	rsmpi_acl = seg->s_acl_in;
3415 	acl_len = seg->s_acl_len;
3416 
3417 	seg->s_acl = NULL;
3418 	seg->s_acl_in = NULL;
3419 	seg->s_acl_len = 0;
3420 
3421 	if (seg->s_state == RSM_STATE_EXPORT) {
3422 		seg->s_state = RSM_STATE_BIND;
3423 	} else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3424 		seg->s_state = RSM_STATE_BIND_QUIESCED;
3425 		cv_broadcast(&seg->s_cv);
3426 	}
3427 
3428 	rsmseglock_release(seg);
3429 
3430 	rsmacl_free(acl, acl_len);
3431 	rsmpiacl_free(rsmpi_acl, acl_len);
3432 
3433 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3434 
3435 	return (DDI_SUCCESS);
3436 }
3437 
3438 /*
3439  * Called from rsm_unpublish to force an unload and disconnection of all
3440  * importers of the unpublished segment.
3441  *
3442  * First build the list of segments requiring a force disconnect, then
3443  * send a request for each.
3444  */
3445 static void
3446 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3447     rsm_node_id_t ex_nodeid)
3448 {
3449 	rsmipc_request_t 	request;
3450 	importing_token_t	*prev_token, *token, *tmp_token, *tokp;
3451 	importing_token_t	*force_disconnect_list = NULL;
3452 	int			index;
3453 
3454 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3455 	    "rsm_send_importer_disconnects enter\n"));
3456 
3457 	index = rsmhash(ex_segid);
3458 
3459 	mutex_enter(&importer_list.lock);
3460 
3461 	prev_token = NULL;
3462 	token = importer_list.bucket[index];
3463 
3464 	while (token != NULL) {
3465 		if (token->key == ex_segid) {
3466 			/*
3467 			 * take it off the importer list and add it
3468 			 * to the force disconnect list.
3469 			 */
3470 			if (prev_token == NULL)
3471 				importer_list.bucket[index] = token->next;
3472 			else
3473 				prev_token->next = token->next;
3474 			tmp_token = token;
3475 			token = token->next;
3476 			if (force_disconnect_list == NULL) {
3477 				force_disconnect_list = tmp_token;
3478 				tmp_token->next = NULL;
3479 			} else {
3480 				tokp = force_disconnect_list;
3481 				/*
3482 				 * make sure that the tmp_token's node
3483 				 * is not already on the force disconnect
3484 				 * list.
3485 				 */
3486 				while (tokp != NULL) {
3487 					if (tokp->importing_node ==
3488 					    tmp_token->importing_node) {
3489 						break;
3490 					}
3491 					tokp = tokp->next;
3492 				}
3493 				if (tokp == NULL) {
3494 					tmp_token->next =
3495 					    force_disconnect_list;
3496 					force_disconnect_list = tmp_token;
3497 				} else {
3498 					kmem_free((void *)tmp_token,
3499 					    sizeof (*token));
3500 				}
3501 			}
3502 
3503 		} else {
3504 			prev_token = token;
3505 			token = token->next;
3506 		}
3507 	}
3508 	mutex_exit(&importer_list.lock);
3509 
3510 	token = force_disconnect_list;
3511 	while (token != NULL) {
3512 		if (token->importing_node == my_nodeid) {
3513 			rsm_force_unload(ex_nodeid, ex_segid,
3514 			    DISCONNECT);
3515 		} else {
3516 			request.rsmipc_hdr.rsmipc_type =
3517 			    RSMIPC_MSG_DISCONNECT;
3518 			request.rsmipc_key = token->key;
3519 			(void) rsmipc_send(token->importing_node,
3520 				    &request,
3521 				    RSM_NO_REPLY);
3522 		}
3523 		tmp_token = token;
3524 		token = token->next;
3525 		kmem_free((void *)tmp_token, sizeof (*token));
3526 	}
3527 
3528 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3529 			"rsm_send_importer_disconnects done\n"));
3530 }
3531 
3532 /*
3533  * This function is used as a callback for unlocking the pages locked
3534  * down by a process which then does a fork or an exec.
3535  * It marks the export segments corresponding to umem cookie given by
3536  * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3537  * destroyed later when an rsm_close occurs).
3538  */
3539 static void
3540 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3541 {
3542 	rsmresource_blk_t *blk;
3543 	rsmresource_t *p;
3544 	rsmseg_t *eseg = NULL;
3545 	int i, j;
3546 	int found = 0;
3547 
3548 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3549 	    "rsm_export_force_destroy enter\n"));
3550 
3551 	/*
3552 	 * Walk the resource list and locate the export segment (either
3553 	 * in the BIND or the EXPORT state) which corresponds to the
3554 	 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3555 	 * Change the state to ZOMBIE by calling rsmseg_close with the
3556 	 * force_flag argument (the second argument) set to 1. Also,
3557 	 * unpublish and unbind the segment, but don't free it. Free it
3558 	 * only on a rsm_close call for the segment.
3559 	 */
3560 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3561 
3562 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3563 		blk = rsm_resource.rsmrc_root[i];
3564 		if (blk == NULL) {
3565 			continue;
3566 		}
3567 
3568 		for (j = 0; j < RSMRC_BLKSZ; j++) {
3569 			p = blk->rsmrcblk_blks[j];
3570 			if ((p != NULL) && (p != RSMRC_RESERVED) &&
3571 			    (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3572 				eseg = (rsmseg_t *)p;
3573 				if (eseg->s_cookie != ck)
3574 					continue; /* continue searching */
3575 				/*
3576 				 * Found the segment, set flag to indicate
3577 				 * force destroy processing is in progress
3578 				 */
3579 				rsmseglock_acquire(eseg);
3580 				eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3581 				rsmseglock_release(eseg);
3582 				found = 1;
3583 				break;
3584 			}
3585 		}
3586 
3587 		if (found)
3588 			break;
3589 	}
3590 
3591 	rw_exit(&rsm_resource.rsmrc_lock);
3592 
3593 	if (found) {
3594 		ASSERT(eseg != NULL);
3595 		/* call rsmseg_close with force flag set to 1 */
3596 		rsmseg_close(eseg, 1);
3597 		/*
3598 		 * force destroy processing done, clear flag and signal any
3599 		 * thread waiting in rsmseg_close.
3600 		 */
3601 		rsmseglock_acquire(eseg);
3602 		eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3603 		cv_broadcast(&eseg->s_cv);
3604 		rsmseglock_release(eseg);
3605 	}
3606 
3607 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3608 	    "rsm_export_force_destroy done\n"));
3609 }
3610 
3611 /* ******************************* Remote Calls *********************** */
3612 static void
3613 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3614 {
3615 	rsmipc_reply_t reply;
3616 	DBG_DEFINE(category,
3617 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3618 
3619 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3620 	    "rsm_intr_segconnect enter\n"));
3621 
3622 	reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3623 
3624 	reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3625 	reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3626 
3627 	(void) rsmipc_send(src, NULL, &reply);
3628 
3629 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3630 	    "rsm_intr_segconnect done\n"));
3631 }
3632 
3633 
3634 /*
3635  * When an exported segment is unpublished the exporter sends an ipc
3636  * message (RSMIPC_MSG_DISCONNECT) to all importers.  The recv ipc dispatcher
3637  * calls this function.  The import list is scanned; segments which match the
3638  * exported segment id are unloaded and disconnected.
3639  *
3640  * Will also be called from rsm_rebind with disconnect_flag FALSE.
3641  *
3642  */
3643 static void
3644 rsm_force_unload(rsm_node_id_t src_nodeid,
3645     rsm_memseg_id_t ex_segid,
3646     boolean_t disconnect_flag)
3647 
3648 {
3649 	rsmresource_t	*p = NULL;
3650 	rsmhash_table_t *rhash = &rsm_import_segs;
3651 	uint_t		index;
3652 	DBG_DEFINE(category,
3653 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3654 
3655 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3656 
3657 	index = rsmhash(ex_segid);
3658 
3659 	rw_enter(&rhash->rsmhash_rw, RW_READER);
3660 
3661 	p = rsmhash_getbkt(rhash, index);
3662 
3663 	for (; p; p = p->rsmrc_next) {
3664 		rsmseg_t *seg = (rsmseg_t *)p;
3665 		if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3666 			/*
3667 			 * In order to make rsmseg_unload and rsm_force_unload
3668 			 * thread safe, acquire the segment lock here.
3669 			 * rsmseg_unload is responsible for releasing the lock.
3670 			 * rsmseg_unload releases the lock just before a call
3671 			 * to rsmipc_send or in case of an early exit which
3672 			 * occurs if the segment was in the state
3673 			 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3674 			 */
3675 			rsmseglock_acquire(seg);
3676 			if (disconnect_flag)
3677 				seg->s_flags |= RSM_FORCE_DISCONNECT;
3678 			rsmseg_unload(seg);
3679 		}
3680 	}
3681 	rw_exit(&rhash->rsmhash_rw);
3682 
3683 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3684 }
3685 
3686 static void
3687 rsm_intr_reply(rsmipc_msghdr_t *msg)
3688 {
3689 	/*
3690 	 * Find slot for cookie in reply.
3691 	 * Match sequence with sequence in cookie
3692 	 * If no match; return
3693 	 * Try to grap lock of slot, if locked return
3694 	 * copy data into reply slot area
3695 	 * signal waiter
3696 	 */
3697 	rsmipc_slot_t 	*slot;
3698 	rsmipc_cookie_t	*cookie;
3699 	void *data = (void *) msg;
3700 	size_t size = sizeof (rsmipc_reply_t);
3701 	DBG_DEFINE(category,
3702 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3703 
3704 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3705 
3706 	cookie = &msg->rsmipc_cookie;
3707 	if (cookie->ic.index >= RSMIPC_SZ) {
3708 		DBG_PRINTF((category, RSM_ERR,
3709 		    "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3710 		return;
3711 	}
3712 
3713 	ASSERT(cookie->ic.index < RSMIPC_SZ);
3714 	slot = &rsm_ipc.slots[cookie->ic.index];
3715 	mutex_enter(&slot->rsmipc_lock);
3716 	if (slot->rsmipc_cookie.value == cookie->value) {
3717 		/* found a match */
3718 		if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3719 			bcopy(data, slot->rsmipc_data, size);
3720 			RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3721 			cv_signal(&slot->rsmipc_cv);
3722 		}
3723 	} else {
3724 		DBG_PRINTF((category, RSM_DEBUG,
3725 		    "rsm: rsm_intr_reply mismatched reply %d\n",
3726 		    cookie->ic.index));
3727 	}
3728 	mutex_exit(&slot->rsmipc_lock);
3729 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3730 }
3731 
3732 /*
3733  * This function gets dispatched on the worker thread when we receive
3734  * the SQREADY message. This function sends the SQREADY_ACK message.
3735  */
3736 static void
3737 rsm_sqready_ack_deferred(void *arg)
3738 {
3739 	path_t	*path = (path_t *)arg;
3740 	DBG_DEFINE(category,
3741 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3742 
3743 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3744 	    "rsm_sqready_ack_deferred enter\n"));
3745 
3746 	mutex_enter(&path->mutex);
3747 
3748 	/*
3749 	 * If path is not active no point in sending the ACK
3750 	 * because the whole SQREADY protocol will again start
3751 	 * when the path becomes active.
3752 	 */
3753 	if (path->state != RSMKA_PATH_ACTIVE) {
3754 		/*
3755 		 * decrement the path refcnt incremented in rsm_proc_sqready
3756 		 */
3757 		PATH_RELE_NOLOCK(path);
3758 		mutex_exit(&path->mutex);
3759 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3760 		    "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3761 		return;
3762 	}
3763 
3764 	/* send an SQREADY_ACK message */
3765 	(void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3766 
3767 	/* initialize credits to the max level */
3768 	path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3769 
3770 	/* wake up any send that is waiting for credits */
3771 	cv_broadcast(&path->sendq_token.sendq_cv);
3772 
3773 	/*
3774 	 * decrement the path refcnt since we incremented it in
3775 	 * rsm_proc_sqready
3776 	 */
3777 	PATH_RELE_NOLOCK(path);
3778 
3779 	mutex_exit(&path->mutex);
3780 
3781 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3782 	    "rsm_sqready_ack_deferred done\n"));
3783 }
3784 
3785 /*
3786  * Process the SQREADY message
3787  */
3788 static void
3789 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3790     rsm_intr_hand_arg_t arg)
3791 {
3792 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3793 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3794 	path_t			*path;
3795 	DBG_DEFINE(category,
3796 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3797 
3798 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3799 
3800 	/* look up the path - incr the path refcnt */
3801 	path = rsm_find_path(hdlr_argp->adapter_name,
3802 	    hdlr_argp->adapter_instance, src_hwaddr);
3803 
3804 	/*
3805 	 * No path exists or path is not active - drop the message
3806 	 */
3807 	if (path == NULL) {
3808 		DBG_PRINTF((category, RSM_DEBUG,
3809 		    "rsm_proc_sqready done: msg dropped no path\n"));
3810 		return;
3811 	}
3812 
3813 	mutex_exit(&path->mutex);
3814 
3815 	/* drain any tasks from the previous incarnation */
3816 	taskq_wait(path->recv_taskq);
3817 
3818 	mutex_enter(&path->mutex);
3819 	/*
3820 	 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3821 	 * in the meanwhile we received an SQREADY message, blindly reset
3822 	 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3823 	 * and forget about the SQREADY that we sent.
3824 	 */
3825 	path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3826 
3827 	if (path->state != RSMKA_PATH_ACTIVE) {
3828 		/* decr refcnt and drop the mutex */
3829 		PATH_RELE_NOLOCK(path);
3830 		mutex_exit(&path->mutex);
3831 		DBG_PRINTF((category, RSM_DEBUG,
3832 		    "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3833 		return;
3834 	}
3835 
3836 	DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3837 	    " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3838 
3839 	/*
3840 	 * The sender's local incarnation number is our remote incarnation
3841 	 * number save it in the path data structure
3842 	 */
3843 	path->remote_incn = msg->rsmipc_local_incn;
3844 	path->sendq_token.msgbuf_avail = 0;
3845 	path->procmsg_cnt = 0;
3846 
3847 	/*
3848 	 * path is active - dispatch task to send SQREADY_ACK - remember
3849 	 * RSMPI calls can't be done in interrupt context
3850 	 *
3851 	 * We can use the recv_taskq to send because the remote endpoint
3852 	 * cannot start sending messages till it receives SQREADY_ACK hence
3853 	 * at this point there are no tasks on recv_taskq.
3854 	 *
3855 	 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3856 	 */
3857 	(void) taskq_dispatch(path->recv_taskq,
3858 	    rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3859 
3860 	mutex_exit(&path->mutex);
3861 
3862 
3863 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3864 }
3865 
3866 /*
3867  * Process the SQREADY_ACK message
3868  */
3869 static void
3870 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3871     rsm_intr_hand_arg_t arg)
3872 {
3873 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3874 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3875 	path_t			*path;
3876 	DBG_DEFINE(category,
3877 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3878 
3879 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3880 	    "rsm_proc_sqready_ack enter\n"));
3881 
3882 	/* look up the path - incr the path refcnt */
3883 	path = rsm_find_path(hdlr_argp->adapter_name,
3884 	    hdlr_argp->adapter_instance, src_hwaddr);
3885 
3886 	/*
3887 	 * drop the message if - no path exists or path is not active
3888 	 * or if its not waiting for SQREADY_ACK message
3889 	 */
3890 	if (path == NULL) {
3891 		DBG_PRINTF((category, RSM_DEBUG,
3892 		    "rsm_proc_sqready_ack done: msg dropped no path\n"));
3893 		return;
3894 	}
3895 
3896 	if ((path->state != RSMKA_PATH_ACTIVE) ||
3897 	    !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3898 		/* decrement the refcnt */
3899 		PATH_RELE_NOLOCK(path);
3900 		mutex_exit(&path->mutex);
3901 		DBG_PRINTF((category, RSM_DEBUG,
3902 		    "rsm_proc_sqready_ack done: msg dropped\n"));
3903 		return;
3904 	}
3905 
3906 	/*
3907 	 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3908 	 * sent, if not drop it.
3909 	 */
3910 	if (path->local_incn != msghdr->rsmipc_incn) {
3911 		/* decrement the refcnt */
3912 		PATH_RELE_NOLOCK(path);
3913 		mutex_exit(&path->mutex);
3914 		DBG_PRINTF((category, RSM_DEBUG,
3915 		    "rsm_proc_sqready_ack done: msg old incn %lld\n",
3916 		    msghdr->rsmipc_incn));
3917 		return;
3918 	}
3919 
3920 	DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3921 		" src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3922 
3923 	/*
3924 	 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3925 	 */
3926 	path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3927 
3928 	/* save the remote sendq incn number */
3929 	path->remote_incn = msg->rsmipc_local_incn;
3930 
3931 	/* initialize credits to the max level */
3932 	path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3933 
3934 	/* wake up any send that is waiting for credits */
3935 	cv_broadcast(&path->sendq_token.sendq_cv);
3936 
3937 	/* decrement the refcnt */
3938 	PATH_RELE_NOLOCK(path);
3939 
3940 	mutex_exit(&path->mutex);
3941 
3942 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3943 	    "rsm_proc_sqready_ack done\n"));
3944 }
3945 
3946 /*
3947  * process the RSMIPC_MSG_CREDIT message
3948  */
3949 static void
3950 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3951     rsm_intr_hand_arg_t arg)
3952 {
3953 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3954 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3955 	path_t			*path;
3956 	DBG_DEFINE(category,
3957 	RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3958 
3959 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3960 
3961 	/* look up the path - incr the path refcnt */
3962 	path = rsm_find_path(hdlr_argp->adapter_name,
3963 	    hdlr_argp->adapter_instance, src_hwaddr);
3964 
3965 	if (path == NULL) {
3966 		DBG_PRINTF((category, RSM_DEBUG,
3967 		    "rsm_add_credits enter: path not found\n"));
3968 		return;
3969 	}
3970 
3971 	/* the path is not active - discard credits */
3972 	if (path->state != RSMKA_PATH_ACTIVE) {
3973 		PATH_RELE_NOLOCK(path);
3974 		mutex_exit(&path->mutex);
3975 		DBG_PRINTF((category, RSM_DEBUG,
3976 		    "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3977 		return;
3978 	}
3979 
3980 	/*
3981 	 * Check if these credits are for current incarnation of the path.
3982 	 */
3983 	if (path->local_incn != msghdr->rsmipc_incn) {
3984 		/* decrement the refcnt */
3985 		PATH_RELE_NOLOCK(path);
3986 		mutex_exit(&path->mutex);
3987 		DBG_PRINTF((category, RSM_DEBUG,
3988 		    "rsm_add_credits enter: old incn %lld\n",
3989 		    msghdr->rsmipc_incn));
3990 		return;
3991 	}
3992 
3993 	DBG_PRINTF((category, RSM_DEBUG,
3994 	    "rsm_add_credits:path=%lx new-creds=%d "
3995 	    "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
3996 	    path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
3997 	    src_hwaddr));
3998 
3999 
4000 	/* add credits to the path's sendq */
4001 	path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4002 
4003 	ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4004 
4005 	/* wake up any send that is waiting for credits */
4006 	cv_broadcast(&path->sendq_token.sendq_cv);
4007 
4008 	/* decrement the refcnt */
4009 	PATH_RELE_NOLOCK(path);
4010 
4011 	mutex_exit(&path->mutex);
4012 
4013 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4014 }
4015 
4016 static void
4017 rsm_intr_event(rsmipc_request_t *msg)
4018 {
4019 	rsmseg_t	*seg;
4020 	rsmresource_t	*p;
4021 	rsm_node_id_t	src_node;
4022 	DBG_DEFINE(category,
4023 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4024 
4025 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4026 
4027 	src_node = msg->rsmipc_hdr.rsmipc_src;
4028 
4029 	if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4030 		/* This is for an import segment */
4031 		uint_t hashval = rsmhash(msg->rsmipc_key);
4032 
4033 		rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4034 
4035 		p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4036 
4037 		for (; p; p = p->rsmrc_next) {
4038 			if ((p->rsmrc_key == msg->rsmipc_key) &&
4039 			    (p->rsmrc_node == src_node)) {
4040 				seg = (rsmseg_t *)p;
4041 				rsmseglock_acquire(seg);
4042 
4043 				atomic_add_32(&seg->s_pollevent, 1);
4044 
4045 				if (seg->s_pollflag & RSM_SEGMENT_POLL)
4046 					pollwakeup(&seg->s_poll, POLLRDNORM);
4047 
4048 				rsmseglock_release(seg);
4049 			}
4050 		}
4051 
4052 		rw_exit(&rsm_import_segs.rsmhash_rw);
4053 	} else {
4054 		/* This is for an export segment */
4055 		seg = rsmexport_lookup(msg->rsmipc_key);
4056 		if (!seg) {
4057 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4058 			    "rsm_intr_event done: exp seg not found\n"));
4059 			return;
4060 		}
4061 
4062 		ASSERT(rsmseglock_held(seg));
4063 
4064 		atomic_add_32(&seg->s_pollevent, 1);
4065 
4066 		/*
4067 		 * We must hold the segment lock here, or else the segment
4068 		 * can be freed while pollwakeup is using it. This implies
4069 		 * that we MUST NOT grab the segment lock during rsm_chpoll,
4070 		 * as outlined in the chpoll(2) man page.
4071 		 */
4072 		if (seg->s_pollflag & RSM_SEGMENT_POLL)
4073 			pollwakeup(&seg->s_poll, POLLRDNORM);
4074 
4075 		rsmseglock_release(seg);
4076 	}
4077 
4078 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4079 }
4080 
4081 /*
4082  * The exporter did a republish and changed the ACL - this change is only
4083  * visible to new importers.
4084  */
4085 static void
4086 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4087     rsm_permission_t perm)
4088 {
4089 
4090 	rsmresource_t	*p;
4091 	rsmseg_t	*seg;
4092 	uint_t		hashval = rsmhash(key);
4093 	DBG_DEFINE(category,
4094 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4095 
4096 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4097 
4098 	rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4099 
4100 	p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4101 
4102 	for (; p; p = p->rsmrc_next) {
4103 		/*
4104 		 * find the importer and update the permission in the shared
4105 		 * data structure. Any new importers will use the new perms
4106 		 */
4107 		if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4108 			seg = (rsmseg_t *)p;
4109 
4110 			rsmseglock_acquire(seg);
4111 			rsmsharelock_acquire(seg);
4112 			seg->s_share->rsmsi_mode = perm;
4113 			rsmsharelock_release(seg);
4114 			rsmseglock_release(seg);
4115 
4116 			break;
4117 		}
4118 	}
4119 
4120 	rw_exit(&rsm_import_segs.rsmhash_rw);
4121 
4122 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4123 }
4124 
4125 void
4126 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4127 {
4128 	int		done = 1; /* indicate all SUSPENDS have been acked */
4129 	list_element_t	*elem;
4130 	DBG_DEFINE(category,
4131 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4132 
4133 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4134 	    "rsm_suspend_complete enter\n"));
4135 
4136 	mutex_enter(&rsm_suspend_list.list_lock);
4137 
4138 	if (rsm_suspend_list.list_head == NULL) {
4139 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4140 		    "rsm_suspend_complete done: suspend_list is empty\n"));
4141 		mutex_exit(&rsm_suspend_list.list_lock);
4142 		return;
4143 	}
4144 
4145 	elem = rsm_suspend_list.list_head;
4146 	while (elem != NULL) {
4147 		if (elem->nodeid == src_node) {
4148 			/* clear the pending flag for the node */
4149 			elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4150 			elem->flags |= flag;
4151 		}
4152 
4153 		if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4154 			done = 0; /* still some nodes have not yet ACKED */
4155 
4156 		elem = elem->next;
4157 	}
4158 
4159 	mutex_exit(&rsm_suspend_list.list_lock);
4160 
4161 	if (!done) {
4162 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4163 		    "rsm_suspend_complete done: acks pending\n"));
4164 		return;
4165 	}
4166 	/*
4167 	 * Now that we are done with suspending all the remote importers
4168 	 * time to quiesce the local exporters
4169 	 */
4170 	exporter_quiesce();
4171 
4172 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4173 	    "rsm_suspend_complete done\n"));
4174 }
4175 
4176 static void
4177 exporter_quiesce()
4178 {
4179 	int		i, e;
4180 	rsmresource_t	*current;
4181 	rsmseg_t	*seg;
4182 	adapter_t	*adapter;
4183 	DBG_DEFINE(category,
4184 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4185 
4186 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4187 	/*
4188 	 * The importers send a SUSPEND_COMPLETE to the exporter node
4189 	 *	Unpublish, unbind the export segment and
4190 	 *	move the segments to the EXPORT_QUIESCED state
4191 	 */
4192 
4193 	rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4194 
4195 	for (i = 0; i < rsm_hash_size; i++) {
4196 		current = rsm_export_segs.bucket[i];
4197 		while (current != NULL) {
4198 			seg = (rsmseg_t *)current;
4199 			rsmseglock_acquire(seg);
4200 			if (current->rsmrc_state ==
4201 			    RSM_STATE_EXPORT_QUIESCING) {
4202 				adapter = seg->s_adapter;
4203 				/*
4204 				 * some local memory handles are not published
4205 				 * check if it was published
4206 				 */
4207 				if ((seg->s_acl == NULL) ||
4208 				    (seg->s_acl[0].ae_node != my_nodeid) ||
4209 				    (seg->s_acl[0].ae_permission != 0)) {
4210 
4211 					e = adapter->rsmpi_ops->rsm_unpublish(
4212 					    seg->s_handle.out);
4213 					DBG_PRINTF((category, RSM_DEBUG,
4214 					    "exporter_quiesce:unpub %d\n", e));
4215 
4216 					e = adapter->rsmpi_ops->rsm_seg_destroy(
4217 					    seg->s_handle.out);
4218 
4219 					DBG_PRINTF((category, RSM_DEBUG,
4220 					    "exporter_quiesce:destroy %d\n",
4221 					    e));
4222 				}
4223 
4224 				(void) rsm_unbind_pages(seg);
4225 				seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4226 				cv_broadcast(&seg->s_cv);
4227 			}
4228 			rsmseglock_release(seg);
4229 			current = current->rsmrc_next;
4230 		}
4231 	}
4232 	rw_exit(&rsm_export_segs.rsmhash_rw);
4233 
4234 	/*
4235 	 * All the local segments we are done with the pre-del processing
4236 	 * - time to move to PREDEL_COMPLETED.
4237 	 */
4238 
4239 	mutex_enter(&rsm_drv_data.drv_lock);
4240 
4241 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4242 
4243 	rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4244 
4245 	cv_broadcast(&rsm_drv_data.drv_cv);
4246 
4247 	mutex_exit(&rsm_drv_data.drv_lock);
4248 
4249 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4250 }
4251 
4252 static void
4253 importer_suspend(rsm_node_id_t src_node)
4254 {
4255 	int		i;
4256 	int		susp_flg; /* true means already suspended */
4257 	int		num_importers;
4258 	rsmresource_t	*p = NULL, *curp;
4259 	rsmhash_table_t *rhash = &rsm_import_segs;
4260 	rsmseg_t	*seg;
4261 	rsmipc_request_t request;
4262 	DBG_DEFINE(category,
4263 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4264 
4265 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4266 
4267 	rw_enter(&rhash->rsmhash_rw, RW_READER);
4268 	for (i = 0; i < rsm_hash_size; i++) {
4269 		p = rhash->bucket[i];
4270 
4271 		/*
4272 		 * Suspend all importers with same <node, key> pair.
4273 		 * After the last one of the shared importers has been
4274 		 * suspended - suspend the shared mappings/connection.
4275 		 */
4276 		for (; p; p = p->rsmrc_next) {
4277 			rsmseg_t *first = (rsmseg_t *)p;
4278 			if ((first->s_node != src_node) ||
4279 			    (first->s_state == RSM_STATE_DISCONNECT))
4280 				continue; /* go to next entry */
4281 			/*
4282 			 * search the rest of the bucket for
4283 			 * other siblings (imprtrs with the same key)
4284 			 * of "first" and suspend them.
4285 			 * All importers with same key fall in
4286 			 * the same bucket.
4287 			 */
4288 			num_importers = 0;
4289 			for (curp = p; curp; curp = curp->rsmrc_next) {
4290 				seg = (rsmseg_t *)curp;
4291 
4292 				rsmseglock_acquire(seg);
4293 
4294 				if ((seg->s_node != first->s_node) ||
4295 				    (seg->s_key != first->s_key) ||
4296 				    (seg->s_state == RSM_STATE_DISCONNECT)) {
4297 					/*
4298 					 * either not a peer segment or its a
4299 					 * disconnected segment - skip it
4300 					 */
4301 					rsmseglock_release(seg);
4302 					continue;
4303 				}
4304 
4305 				rsmseg_suspend(seg, &susp_flg);
4306 
4307 				if (susp_flg) { /* seg already suspended */
4308 					rsmseglock_release(seg);
4309 					break; /* the inner for loop */
4310 				}
4311 
4312 				num_importers++;
4313 				rsmsharelock_acquire(seg);
4314 				/*
4315 				 * we've processed all importers that are
4316 				 * siblings of "first"
4317 				 */
4318 				if (num_importers ==
4319 				    seg->s_share->rsmsi_refcnt) {
4320 					rsmsharelock_release(seg);
4321 					rsmseglock_release(seg);
4322 					break;
4323 				}
4324 				rsmsharelock_release(seg);
4325 				rsmseglock_release(seg);
4326 			}
4327 
4328 			/*
4329 			 * All the importers with the same key and
4330 			 * nodeid as "first" have been suspended.
4331 			 * Now suspend the shared connect/mapping.
4332 			 * This is done only once.
4333 			 */
4334 			if (!susp_flg) {
4335 				rsmsegshare_suspend(seg);
4336 			}
4337 		}
4338 	}
4339 
4340 	rw_exit(&rhash->rsmhash_rw);
4341 
4342 	/* send an ACK for SUSPEND message */
4343 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4344 	(void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4345 
4346 
4347 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4348 
4349 }
4350 
4351 static void
4352 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4353 {
4354 	int		recheck_state;
4355 	rsmcookie_t	*hdl;
4356 	DBG_DEFINE(category,
4357 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4358 
4359 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4360 	    "rsmseg_suspend enter: key=%u\n", seg->s_key));
4361 
4362 	*susp_flg = 0;
4363 
4364 	ASSERT(rsmseglock_held(seg));
4365 	/* wait if putv/getv is in progress */
4366 	while (seg->s_rdmacnt > 0)
4367 		cv_wait(&seg->s_cv, &seg->s_lock);
4368 
4369 	do {
4370 		recheck_state = 0;
4371 
4372 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4373 		    "rsmseg_suspend:segment %x state=%d\n",
4374 		    seg->s_key, seg->s_state));
4375 
4376 		switch (seg->s_state) {
4377 		case RSM_STATE_NEW:
4378 			/* not a valid state */
4379 			break;
4380 		case RSM_STATE_CONNECTING:
4381 			seg->s_state = RSM_STATE_ABORT_CONNECT;
4382 			break;
4383 		case RSM_STATE_ABORT_CONNECT:
4384 			break;
4385 		case RSM_STATE_CONNECT:
4386 			seg->s_handle.in = NULL;
4387 			seg->s_state = RSM_STATE_CONN_QUIESCE;
4388 			break;
4389 		case RSM_STATE_MAPPING:
4390 			/* wait until segment leaves the mapping state */
4391 			while (seg->s_state == RSM_STATE_MAPPING)
4392 				cv_wait(&seg->s_cv, &seg->s_lock);
4393 			recheck_state = 1;
4394 			break;
4395 		case RSM_STATE_ACTIVE:
4396 			/* unload the mappings */
4397 			if (seg->s_ckl != NULL) {
4398 				hdl = seg->s_ckl;
4399 				for (; hdl != NULL; hdl = hdl->c_next) {
4400 					(void) devmap_unload(hdl->c_dhp,
4401 						    hdl->c_off, hdl->c_len);
4402 				}
4403 			}
4404 			seg->s_mapinfo = NULL;
4405 			seg->s_state = RSM_STATE_MAP_QUIESCE;
4406 			break;
4407 		case RSM_STATE_CONN_QUIESCE:
4408 			/* FALLTHRU */
4409 		case RSM_STATE_MAP_QUIESCE:
4410 			/* rsmseg_suspend already done for seg */
4411 			*susp_flg = 1;
4412 			break;
4413 		case RSM_STATE_DISCONNECT:
4414 			break;
4415 		default:
4416 			ASSERT(0); /* invalid state */
4417 		}
4418 	} while (recheck_state);
4419 
4420 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4421 }
4422 
4423 static void
4424 rsmsegshare_suspend(rsmseg_t *seg)
4425 {
4426 	int			e;
4427 	adapter_t		*adapter;
4428 	rsm_import_share_t	*sharedp;
4429 	DBG_DEFINE(category,
4430 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4431 
4432 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4433 	    "rsmsegshare_suspend enter\n"));
4434 
4435 	rsmseglock_acquire(seg);
4436 	rsmsharelock_acquire(seg);
4437 
4438 	sharedp = seg->s_share;
4439 	adapter = seg->s_adapter;
4440 	switch (sharedp->rsmsi_state) {
4441 	case RSMSI_STATE_NEW:
4442 		break;
4443 	case RSMSI_STATE_CONNECTING:
4444 		sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4445 		break;
4446 	case RSMSI_STATE_ABORT_CONNECT:
4447 		break;
4448 	case RSMSI_STATE_CONNECTED:
4449 		/* do the rsmpi disconnect */
4450 		if (sharedp->rsmsi_node != my_nodeid) {
4451 			e = adapter->rsmpi_ops->
4452 			    rsm_disconnect(sharedp->rsmsi_handle);
4453 
4454 			DBG_PRINTF((category, RSM_DEBUG,
4455 			    "rsm:rsmpi disconnect seg=%x:err=%d\n",
4456 			    sharedp->rsmsi_segid, e));
4457 		}
4458 
4459 		sharedp->rsmsi_handle = NULL;
4460 
4461 		sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4462 		break;
4463 	case RSMSI_STATE_CONN_QUIESCE:
4464 		break;
4465 	case RSMSI_STATE_MAPPED:
4466 		/* do the rsmpi unmap and disconnect */
4467 		if (sharedp->rsmsi_node != my_nodeid) {
4468 			e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4469 
4470 			DBG_PRINTF((category, RSM_DEBUG,
4471 			    "rsmshare_suspend: rsmpi unmap %d\n", e));
4472 
4473 			e = adapter->rsmpi_ops->
4474 			    rsm_disconnect(sharedp->rsmsi_handle);
4475 			DBG_PRINTF((category, RSM_DEBUG,
4476 			    "rsm:rsmpi disconnect seg=%x:err=%d\n",
4477 			    sharedp->rsmsi_segid, e));
4478 		}
4479 
4480 		sharedp->rsmsi_handle = NULL;
4481 
4482 		sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4483 		break;
4484 	case RSMSI_STATE_MAP_QUIESCE:
4485 		break;
4486 	case RSMSI_STATE_DISCONNECTED:
4487 		break;
4488 	default:
4489 		ASSERT(0); /* invalid state */
4490 	}
4491 
4492 	rsmsharelock_release(seg);
4493 	rsmseglock_release(seg);
4494 
4495 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4496 	    "rsmsegshare_suspend done\n"));
4497 }
4498 
4499 /*
4500  * This should get called on receiving a RESUME message or from
4501  * the pathmanger if the node undergoing DR dies.
4502  */
4503 static void
4504 importer_resume(rsm_node_id_t src_node)
4505 {
4506 	int		i;
4507 	rsmresource_t	*p = NULL;
4508 	rsmhash_table_t *rhash = &rsm_import_segs;
4509 	void		*cookie;
4510 	DBG_DEFINE(category,
4511 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4512 
4513 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4514 
4515 	rw_enter(&rhash->rsmhash_rw, RW_READER);
4516 
4517 	for (i = 0; i < rsm_hash_size; i++) {
4518 		p = rhash->bucket[i];
4519 
4520 		for (; p; p = p->rsmrc_next) {
4521 			rsmseg_t *seg = (rsmseg_t *)p;
4522 
4523 			rsmseglock_acquire(seg);
4524 
4525 			/* process only importers of node undergoing DR */
4526 			if (seg->s_node != src_node) {
4527 				rsmseglock_release(seg);
4528 				continue;
4529 			}
4530 
4531 			if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4532 				rsmipc_request_t	request;
4533 				/*
4534 				 * rsmpi map/connect failed
4535 				 * inform the exporter so that it can
4536 				 * remove the importer.
4537 				 */
4538 				request.rsmipc_hdr.rsmipc_type =
4539 				    RSMIPC_MSG_NOTIMPORTING;
4540 				request.rsmipc_key = seg->s_segid;
4541 				request.rsmipc_segment_cookie = cookie;
4542 				rsmseglock_release(seg);
4543 				(void) rsmipc_send(seg->s_node, &request,
4544 					    RSM_NO_REPLY);
4545 			} else {
4546 				rsmseglock_release(seg);
4547 			}
4548 		}
4549 	}
4550 
4551 	rw_exit(&rhash->rsmhash_rw);
4552 
4553 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4554 }
4555 
4556 static int
4557 rsmseg_resume(rsmseg_t *seg, void **cookie)
4558 {
4559 	int			e;
4560 	int			retc;
4561 	off_t			dev_offset;
4562 	size_t			maplen;
4563 	uint_t			maxprot;
4564 	rsm_mapinfo_t		*p;
4565 	rsmcookie_t		*hdl;
4566 	rsm_import_share_t	*sharedp;
4567 	DBG_DEFINE(category,
4568 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4569 
4570 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4571 	    "rsmseg_resume enter: key=%u\n", seg->s_key));
4572 
4573 	*cookie = NULL;
4574 
4575 	ASSERT(rsmseglock_held(seg));
4576 
4577 	if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4578 	    (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4579 		return (RSM_SUCCESS);
4580 	}
4581 
4582 	sharedp = seg->s_share;
4583 
4584 	rsmsharelock_acquire(seg);
4585 
4586 	/* resume the shared connection and/or mapping */
4587 	retc = rsmsegshare_resume(seg);
4588 
4589 	if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4590 		/* shared state can either be connected or mapped */
4591 		if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4592 		    (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4593 			ASSERT(retc == RSM_SUCCESS);
4594 			seg->s_handle.in = sharedp->rsmsi_handle;
4595 			rsmsharelock_release(seg);
4596 			seg->s_state = RSM_STATE_CONNECT;
4597 
4598 		} else { /* error in rsmpi connect during resume */
4599 			seg->s_handle.in = NULL;
4600 			seg->s_state = RSM_STATE_DISCONNECT;
4601 
4602 			sharedp->rsmsi_refcnt--;
4603 			cookie = (void *)sharedp->rsmsi_cookie;
4604 
4605 			if (sharedp->rsmsi_refcnt == 0) {
4606 				ASSERT(sharedp->rsmsi_mapcnt == 0);
4607 				rsmsharelock_release(seg);
4608 
4609 				/* clean up the shared data structure */
4610 				mutex_destroy(&sharedp->rsmsi_lock);
4611 				cv_destroy(&sharedp->rsmsi_cv);
4612 				kmem_free((void *)(sharedp),
4613 				    sizeof (rsm_import_share_t));
4614 
4615 			} else {
4616 				rsmsharelock_release(seg);
4617 			}
4618 			/*
4619 			 * The following needs to be done after any
4620 			 * rsmsharelock calls which use seg->s_share.
4621 			 */
4622 			seg->s_share = NULL;
4623 		}
4624 
4625 		/* signal any waiting segment */
4626 		cv_broadcast(&seg->s_cv);
4627 
4628 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4629 		    "rsmseg_resume done:state=%d\n", seg->s_state));
4630 		return (retc);
4631 	}
4632 
4633 	ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4634 
4635 	/* Setup protections for remap */
4636 	maxprot = PROT_USER;
4637 	if (seg->s_mode & RSM_PERM_READ) {
4638 		maxprot |= PROT_READ;
4639 	}
4640 	if (seg->s_mode & RSM_PERM_WRITE) {
4641 		maxprot |= PROT_WRITE;
4642 	}
4643 
4644 	if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4645 		/* error in rsmpi connect or map during resume */
4646 
4647 		/* remap to trash page */
4648 		ASSERT(seg->s_ckl != NULL);
4649 
4650 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4651 			e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4652 			    remap_cookie, hdl->c_off, hdl->c_len,
4653 			    maxprot, 0, NULL);
4654 
4655 			DBG_PRINTF((category, RSM_ERR,
4656 			    "rsmseg_resume:remap=%d\n", e));
4657 		}
4658 
4659 		seg->s_handle.in = NULL;
4660 		seg->s_state = RSM_STATE_DISCONNECT;
4661 
4662 		sharedp->rsmsi_refcnt--;
4663 
4664 		sharedp->rsmsi_mapcnt--;
4665 		seg->s_mapinfo = NULL;
4666 
4667 		if (sharedp->rsmsi_refcnt == 0) {
4668 			ASSERT(sharedp->rsmsi_mapcnt == 0);
4669 			rsmsharelock_release(seg);
4670 
4671 			/* clean up the shared data structure */
4672 			mutex_destroy(&sharedp->rsmsi_lock);
4673 			cv_destroy(&sharedp->rsmsi_cv);
4674 			kmem_free((void *)(sharedp),
4675 			    sizeof (rsm_import_share_t));
4676 
4677 		} else {
4678 			rsmsharelock_release(seg);
4679 		}
4680 		/*
4681 		 * The following needs to be done after any
4682 		 * rsmsharelock calls which use seg->s_share.
4683 		 */
4684 		seg->s_share = NULL;
4685 
4686 		/* signal any waiting segment */
4687 		cv_broadcast(&seg->s_cv);
4688 
4689 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4690 		    "rsmseg_resume done:seg=%x,err=%d\n",
4691 		    seg->s_key, retc));
4692 		return (retc);
4693 
4694 	}
4695 
4696 	seg->s_handle.in = sharedp->rsmsi_handle;
4697 
4698 	if (seg->s_node == my_nodeid) { /* loopback */
4699 		ASSERT(seg->s_mapinfo == NULL);
4700 
4701 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4702 			e = devmap_umem_remap(hdl->c_dhp,
4703 			    rsm_dip, seg->s_cookie,
4704 			    hdl->c_off, hdl->c_len,
4705 			    maxprot, 0, NULL);
4706 
4707 			DBG_PRINTF((category, RSM_ERR,
4708 			    "rsmseg_resume:remap=%d\n", e));
4709 		}
4710 	} else { /* remote exporter */
4711 		/* remap to the new rsmpi maps */
4712 		seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4713 
4714 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4715 			p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4716 			    &dev_offset, &maplen);
4717 			e = devmap_devmem_remap(hdl->c_dhp,
4718 			    p->dip, p->dev_register, dev_offset,
4719 			    maplen, maxprot, 0, NULL);
4720 
4721 			DBG_PRINTF((category, RSM_ERR,
4722 			    "rsmseg_resume:remap=%d\n", e));
4723 		}
4724 	}
4725 
4726 	rsmsharelock_release(seg);
4727 
4728 	seg->s_state = RSM_STATE_ACTIVE;
4729 	cv_broadcast(&seg->s_cv);
4730 
4731 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4732 
4733 	return (retc);
4734 }
4735 
4736 static int
4737 rsmsegshare_resume(rsmseg_t *seg)
4738 {
4739 	int			e = RSM_SUCCESS;
4740 	adapter_t		*adapter;
4741 	rsm_import_share_t	*sharedp;
4742 	DBG_DEFINE(category,
4743 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4744 
4745 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4746 
4747 	ASSERT(rsmseglock_held(seg));
4748 	ASSERT(rsmsharelock_held(seg));
4749 
4750 	sharedp = seg->s_share;
4751 
4752 	/*
4753 	 * If we are not in a xxxx_QUIESCE state that means shared
4754 	 * connect/mapping processing has been already been done
4755 	 * so return success.
4756 	 */
4757 	if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4758 	    (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4759 		return (RSM_SUCCESS);
4760 	}
4761 
4762 	adapter = seg->s_adapter;
4763 
4764 	if (sharedp->rsmsi_node != my_nodeid) {
4765 		rsm_addr_t	hwaddr;
4766 		hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4767 
4768 		e = adapter->rsmpi_ops->rsm_connect(
4769 		    adapter->rsmpi_handle, hwaddr,
4770 		    sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4771 
4772 		DBG_PRINTF((category, RSM_DEBUG,
4773 		    "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4774 		    sharedp->rsmsi_segid, e));
4775 
4776 		if (e != RSM_SUCCESS) {
4777 			/* when do we send the NOT_IMPORTING message */
4778 			sharedp->rsmsi_handle = NULL;
4779 			sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4780 			/* signal any waiting segment */
4781 			cv_broadcast(&sharedp->rsmsi_cv);
4782 			return (e);
4783 		}
4784 	}
4785 
4786 	if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4787 		sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4788 		/* signal any waiting segment */
4789 		cv_broadcast(&sharedp->rsmsi_cv);
4790 		return (e);
4791 	}
4792 
4793 	ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4794 
4795 	/* do the rsmpi map of the whole segment here */
4796 	if (sharedp->rsmsi_node != my_nodeid) {
4797 		size_t mapped_len;
4798 		rsm_mapinfo_t *p;
4799 
4800 		/*
4801 		 * We need to do rsmpi maps with <off, lens> identical to
4802 		 * the old mapinfo list because the segment mapping handles
4803 		 * dhp and such need the fragmentation of rsmpi maps to be
4804 		 * identical to what it was during the mmap of the segment
4805 		 */
4806 		p = sharedp->rsmsi_mapinfo;
4807 
4808 		while (p != NULL) {
4809 			mapped_len = 0;
4810 
4811 			e = adapter->rsmpi_ops->rsm_map(
4812 			    sharedp->rsmsi_handle, p->start_offset,
4813 			    p->individual_len, &mapped_len,
4814 			    &p->dip, &p->dev_register, &p->dev_offset,
4815 			    NULL, NULL);
4816 
4817 			if (e != 0) {
4818 				DBG_PRINTF((category, RSM_ERR,
4819 				    "rsmsegshare_resume: rsmpi map err=%d\n",
4820 				    e));
4821 				break;
4822 			}
4823 
4824 			if (mapped_len != p->individual_len) {
4825 				DBG_PRINTF((category, RSM_ERR,
4826 				    "rsmsegshare_resume: rsmpi maplen"
4827 				    "< reqlen=%lx\n", mapped_len));
4828 				e = RSMERR_BAD_LENGTH;
4829 				break;
4830 			}
4831 
4832 			p = p->next;
4833 
4834 		}
4835 
4836 
4837 		if (e != RSM_SUCCESS) { /* rsmpi map failed */
4838 			int	err;
4839 			/* Check if this is the first rsm_map */
4840 			if (p != sharedp->rsmsi_mapinfo) {
4841 				/*
4842 				 * A single rsm_unmap undoes multiple rsm_maps.
4843 				 */
4844 				(void) seg->s_adapter->rsmpi_ops->
4845 				    rsm_unmap(sharedp->rsmsi_handle);
4846 			}
4847 
4848 			rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4849 			sharedp->rsmsi_mapinfo = NULL;
4850 
4851 			err = adapter->rsmpi_ops->
4852 				    rsm_disconnect(sharedp->rsmsi_handle);
4853 
4854 			DBG_PRINTF((category, RSM_DEBUG,
4855 			    "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4856 			    sharedp->rsmsi_segid, err));
4857 
4858 			sharedp->rsmsi_handle = NULL;
4859 			sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4860 
4861 			/* signal the waiting segments */
4862 			cv_broadcast(&sharedp->rsmsi_cv);
4863 			DBG_PRINTF((category, RSM_DEBUG,
4864 			    "rsmsegshare_resume done: rsmpi map err\n"));
4865 			return (e);
4866 		}
4867 	}
4868 
4869 	sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4870 
4871 	/* signal any waiting segment */
4872 	cv_broadcast(&sharedp->rsmsi_cv);
4873 
4874 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4875 
4876 	return (e);
4877 }
4878 
4879 /*
4880  * this is the routine that gets called by recv_taskq which is the
4881  * thread that processes messages that are flow-controlled.
4882  */
4883 static void
4884 rsm_intr_proc_deferred(void *arg)
4885 {
4886 	path_t			*path = (path_t *)arg;
4887 	rsmipc_request_t	*msg;
4888 	rsmipc_msghdr_t		*msghdr;
4889 	rsm_node_id_t		src_node;
4890 	msgbuf_elem_t		*head;
4891 	int			e;
4892 	DBG_DEFINE(category,
4893 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4894 
4895 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4896 	    "rsm_intr_proc_deferred enter\n"));
4897 
4898 	mutex_enter(&path->mutex);
4899 
4900 	/* use the head of the msgbuf_queue */
4901 	head = rsmka_gethead_msgbuf(path);
4902 
4903 	mutex_exit(&path->mutex);
4904 
4905 	msg = (rsmipc_request_t *)&(head->msg);
4906 	msghdr = (rsmipc_msghdr_t *)msg;
4907 
4908 	src_node = msghdr->rsmipc_src;
4909 
4910 	/*
4911 	 * messages that need to send a reply should check the message version
4912 	 * before processing the message. And all messages that need to
4913 	 * send a reply should be processed here by the worker thread.
4914 	 */
4915 	switch (msghdr->rsmipc_type) {
4916 	case RSMIPC_MSG_SEGCONNECT:
4917 		if (msghdr->rsmipc_version != RSM_VERSION) {
4918 			rsmipc_reply_t reply;
4919 			reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4920 			reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4921 			reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4922 			(void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4923 		} else {
4924 			rsm_intr_segconnect(src_node, msg);
4925 		}
4926 		break;
4927 	case RSMIPC_MSG_DISCONNECT:
4928 		rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4929 		break;
4930 	case RSMIPC_MSG_SUSPEND:
4931 		importer_suspend(src_node);
4932 		break;
4933 	case RSMIPC_MSG_SUSPEND_DONE:
4934 		rsm_suspend_complete(src_node, 0);
4935 		break;
4936 	case RSMIPC_MSG_RESUME:
4937 		importer_resume(src_node);
4938 		break;
4939 	default:
4940 		ASSERT(0);
4941 	}
4942 
4943 	mutex_enter(&path->mutex);
4944 
4945 	rsmka_dequeue_msgbuf(path);
4946 
4947 	/* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4948 	if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4949 		path->procmsg_cnt++;
4950 
4951 	ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4952 
4953 	/* No need to send credits if path is going down */
4954 	if ((path->state == RSMKA_PATH_ACTIVE) &&
4955 	    (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4956 		/*
4957 		 * send credits and reset procmsg_cnt if success otherwise
4958 		 * credits will be sent after processing the next message
4959 		 */
4960 		e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4961 		if (e == 0)
4962 			path->procmsg_cnt = 0;
4963 		else
4964 			DBG_PRINTF((category, RSM_ERR,
4965 			    "rsm_intr_proc_deferred:send credits err=%d\n", e));
4966 	}
4967 
4968 	/*
4969 	 * decrement the path refcnt since we incremented it in
4970 	 * rsm_intr_callback_dispatch
4971 	 */
4972 	PATH_RELE_NOLOCK(path);
4973 
4974 	mutex_exit(&path->mutex);
4975 
4976 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4977 	    "rsm_intr_proc_deferred done\n"));
4978 }
4979 
4980 /*
4981  * Flow-controlled messages are enqueued and dispatched onto a taskq here
4982  */
4983 static void
4984 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4985     rsm_intr_hand_arg_t arg)
4986 {
4987 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
4988 	path_t			*path;
4989 	rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4990 	DBG_DEFINE(category,
4991 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4992 
4993 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4994 	    "rsm_intr_callback_dispatch enter\n"));
4995 	ASSERT(data && hdlr_argp);
4996 
4997 	/* look up the path - incr the path refcnt */
4998 	path = rsm_find_path(hdlr_argp->adapter_name,
4999 	    hdlr_argp->adapter_instance, src_hwaddr);
5000 
5001 	/* the path has been removed - drop this message */
5002 	if (path == NULL) {
5003 		DBG_PRINTF((category, RSM_DEBUG,
5004 		    "rsm_intr_callback_dispatch done: msg dropped\n"));
5005 		return;
5006 	}
5007 	/* the path is not active - don't accept new messages */
5008 	if (path->state != RSMKA_PATH_ACTIVE) {
5009 		PATH_RELE_NOLOCK(path);
5010 		mutex_exit(&path->mutex);
5011 		DBG_PRINTF((category, RSM_DEBUG,
5012 		    "rsm_intr_callback_dispatch done: msg dropped"
5013 		    " path=%lx !ACTIVE\n", path));
5014 		return;
5015 	}
5016 
5017 	/*
5018 	 * Check if this message was sent to an older incarnation
5019 	 * of the path/sendq.
5020 	 */
5021 	if (path->local_incn != msghdr->rsmipc_incn) {
5022 		/* decrement the refcnt */
5023 		PATH_RELE_NOLOCK(path);
5024 		mutex_exit(&path->mutex);
5025 		DBG_PRINTF((category, RSM_DEBUG,
5026 		    "rsm_intr_callback_dispatch done: old incn %lld\n",
5027 		    msghdr->rsmipc_incn));
5028 		return;
5029 	}
5030 
5031 	/* copy and enqueue msg on the path's msgbuf queue */
5032 	rsmka_enqueue_msgbuf(path, data);
5033 
5034 	/*
5035 	 * schedule task to process messages - ignore retval from
5036 	 * task_dispatch because we sender cannot send more than
5037 	 * what receiver can handle.
5038 	 */
5039 	(void) taskq_dispatch(path->recv_taskq,
5040 	    rsm_intr_proc_deferred, path, KM_NOSLEEP);
5041 
5042 	mutex_exit(&path->mutex);
5043 
5044 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5045 	    "rsm_intr_callback_dispatch done\n"));
5046 }
5047 
5048 /*
5049  * This procedure is called from rsm_srv_func when a remote node creates a
5050  * a send queue.  This event is used as a hint that an  earlier failed
5051  * attempt to create a send queue to that remote node may now succeed and
5052  * should be retried.  Indication of an earlier failed attempt is provided
5053  * by the RSMKA_SQCREATE_PENDING flag.
5054  */
5055 static void
5056 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5057 {
5058 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
5059 	path_t			*path;
5060 	DBG_DEFINE(category,
5061 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5062 
5063 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5064 	    "rsm_sqcreateop_callback enter\n"));
5065 
5066 	/* look up the path - incr the path refcnt */
5067 	path = rsm_find_path(hdlr_argp->adapter_name,
5068 	    hdlr_argp->adapter_instance, src_hwaddr);
5069 
5070 	if (path == NULL) {
5071 		DBG_PRINTF((category, RSM_DEBUG,
5072 		    "rsm_sqcreateop_callback done: no path\n"));
5073 		return;
5074 	}
5075 
5076 	if ((path->state == RSMKA_PATH_UP) &&
5077 	    (path->flags & RSMKA_SQCREATE_PENDING)) {
5078 		/*
5079 		 * previous attempt to create sendq had failed, retry
5080 		 * it and move to RSMKA_PATH_ACTIVE state if successful.
5081 		 * the refcnt will be decremented in the do_deferred_work
5082 		 */
5083 		(void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5084 	} else {
5085 		/* decrement the refcnt */
5086 		PATH_RELE_NOLOCK(path);
5087 	}
5088 	mutex_exit(&path->mutex);
5089 
5090 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5091 	    "rsm_sqcreateop_callback done\n"));
5092 }
5093 
5094 static void
5095 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5096 {
5097 	rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5098 	rsmipc_request_t *msg = (rsmipc_request_t *)data;
5099 	rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5100 	rsm_node_id_t src_node;
5101 	DBG_DEFINE(category,
5102 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5103 
5104 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5105 	    "src=%d, type=%d\n", msghdr->rsmipc_src,
5106 	    msghdr->rsmipc_type));
5107 
5108 	/*
5109 	 * Check for the version number in the msg header. If it is not
5110 	 * RSM_VERSION, drop the message. In the future, we need to manage
5111 	 * incompatible version numbers in some way
5112 	 */
5113 	if (msghdr->rsmipc_version != RSM_VERSION) {
5114 		DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5115 		/*
5116 		 * Drop requests that don't have a reply right here
5117 		 * Request with reply will send a BAD_VERSION reply
5118 		 * when they get processed by the worker thread.
5119 		 */
5120 		if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5121 			return;
5122 		}
5123 
5124 	}
5125 
5126 	src_node = msghdr->rsmipc_src;
5127 
5128 	switch (msghdr->rsmipc_type) {
5129 	case RSMIPC_MSG_SEGCONNECT:
5130 	case RSMIPC_MSG_DISCONNECT:
5131 	case RSMIPC_MSG_SUSPEND:
5132 	case RSMIPC_MSG_SUSPEND_DONE:
5133 	case RSMIPC_MSG_RESUME:
5134 		/*
5135 		 * These message types are handled by a worker thread using
5136 		 * the flow-control algorithm.
5137 		 * Any message processing that does one or more of the
5138 		 * following should be handled in a worker thread.
5139 		 *	- allocates resources and might sleep
5140 		 *	- makes RSMPI calls down to the interconnect driver
5141 		 *	this by defn include requests with reply.
5142 		 *	- takes a long duration of time
5143 		 */
5144 		rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5145 		break;
5146 	case RSMIPC_MSG_NOTIMPORTING:
5147 		importer_list_rm(src_node, msg->rsmipc_key,
5148 		    msg->rsmipc_segment_cookie);
5149 		break;
5150 	case RSMIPC_MSG_SQREADY:
5151 		rsm_proc_sqready(data, src_hwaddr, arg);
5152 		break;
5153 	case RSMIPC_MSG_SQREADY_ACK:
5154 		rsm_proc_sqready_ack(data, src_hwaddr, arg);
5155 		break;
5156 	case RSMIPC_MSG_CREDIT:
5157 		rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5158 		break;
5159 	case RSMIPC_MSG_REPLY:
5160 		rsm_intr_reply(msghdr);
5161 		break;
5162 	case RSMIPC_MSG_BELL:
5163 		rsm_intr_event(msg);
5164 		break;
5165 	case RSMIPC_MSG_IMPORTING:
5166 		importer_list_add(src_node, msg->rsmipc_key,
5167 		    msg->rsmipc_adapter_hwaddr,
5168 		    msg->rsmipc_segment_cookie);
5169 		break;
5170 	case RSMIPC_MSG_REPUBLISH:
5171 		importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5172 		break;
5173 	default:
5174 		DBG_PRINTF((category, RSM_DEBUG,
5175 		    "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5176 		    (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5177 	}
5178 
5179 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5180 
5181 }
5182 
5183 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5184     rsm_intr_q_op_t opcode, rsm_addr_t src,
5185     void *data, size_t size, rsm_intr_hand_arg_t arg)
5186 {
5187 	DBG_DEFINE(category,
5188 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5189 
5190 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5191 
5192 	switch (opcode) {
5193 	case RSM_INTR_Q_OP_CREATE:
5194 		DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5195 		rsm_sqcreateop_callback(src, arg);
5196 		break;
5197 	case RSM_INTR_Q_OP_DESTROY:
5198 		DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5199 		break;
5200 	case RSM_INTR_Q_OP_RECEIVE:
5201 		rsm_intr_callback(data, src, arg);
5202 		break;
5203 	default:
5204 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5205 		    "rsm_srv_func: unknown opcode = %x\n", opcode));
5206 	}
5207 
5208 	chd = chd;
5209 	size = size;
5210 
5211 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5212 
5213 	return (RSM_INTR_HAND_CLAIMED);
5214 }
5215 
5216 /* *************************** IPC slots ************************* */
5217 static rsmipc_slot_t *
5218 rsmipc_alloc()
5219 {
5220 	int i;
5221 	rsmipc_slot_t *slot;
5222 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5223 
5224 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5225 
5226 	/* try to find a free slot, if not wait */
5227 	mutex_enter(&rsm_ipc.lock);
5228 
5229 	while (rsm_ipc.count == 0) {
5230 		rsm_ipc.wanted = 1;
5231 		cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5232 	}
5233 
5234 	/* An empty slot is available, find it */
5235 	slot = &rsm_ipc.slots[0];
5236 	for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5237 		if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5238 			RSMIPC_CLEAR(slot, RSMIPC_FREE);
5239 			break;
5240 		}
5241 	}
5242 
5243 	ASSERT(i < RSMIPC_SZ);
5244 	rsm_ipc.count--;	/* one less is available */
5245 	rsm_ipc.sequence++; /* new sequence */
5246 
5247 	slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5248 	slot->rsmipc_cookie.ic.index = (uint_t)i;
5249 
5250 	mutex_exit(&rsm_ipc.lock);
5251 
5252 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5253 
5254 	return (slot);
5255 }
5256 
5257 static void
5258 rsmipc_free(rsmipc_slot_t *slot)
5259 {
5260 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5261 
5262 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5263 
5264 	ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5265 	ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5266 
5267 	mutex_enter(&rsm_ipc.lock);
5268 
5269 	RSMIPC_SET(slot, RSMIPC_FREE);
5270 
5271 	slot->rsmipc_cookie.ic.sequence = 0;
5272 
5273 	mutex_exit(&slot->rsmipc_lock);
5274 	rsm_ipc.count++;
5275 	ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5276 	if (rsm_ipc.wanted) {
5277 		rsm_ipc.wanted = 0;
5278 		cv_broadcast(&rsm_ipc.cv);
5279 	}
5280 
5281 	mutex_exit(&rsm_ipc.lock);
5282 
5283 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5284 }
5285 
5286 static int
5287 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5288 {
5289 	int		e = 0;
5290 	int		credit_check = 0;
5291 	int		retry_cnt = 0;
5292 	int		min_retry_cnt = 10;
5293 	clock_t		ticks;
5294 	rsm_send_t	is;
5295 	rsmipc_slot_t	*rslot;
5296 	adapter_t	*adapter;
5297 	path_t		*path;
5298 	sendq_token_t	*sendq_token;
5299 	sendq_token_t	*used_sendq_token = NULL;
5300 	rsm_send_q_handle_t	ipc_handle;
5301 	DBG_DEFINE(category,
5302 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5303 
5304 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5305 	    dest));
5306 
5307 	/*
5308 	 * Check if this is a local case
5309 	 */
5310 	if (dest == my_nodeid) {
5311 		switch (req->rsmipc_hdr.rsmipc_type) {
5312 		case RSMIPC_MSG_SEGCONNECT:
5313 			reply->rsmipc_status = (short)rsmsegacl_validate(
5314 							    req, dest, reply);
5315 			break;
5316 		case RSMIPC_MSG_BELL:
5317 			req->rsmipc_hdr.rsmipc_src = dest;
5318 			rsm_intr_event(req);
5319 			break;
5320 		case RSMIPC_MSG_IMPORTING:
5321 			importer_list_add(dest, req->rsmipc_key,
5322 			    req->rsmipc_adapter_hwaddr,
5323 			    req->rsmipc_segment_cookie);
5324 			break;
5325 		case RSMIPC_MSG_NOTIMPORTING:
5326 			importer_list_rm(dest, req->rsmipc_key,
5327 			    req->rsmipc_segment_cookie);
5328 			break;
5329 		case RSMIPC_MSG_REPUBLISH:
5330 			importer_update(dest, req->rsmipc_key,
5331 			    req->rsmipc_perm);
5332 			break;
5333 		case RSMIPC_MSG_SUSPEND:
5334 			importer_suspend(dest);
5335 			break;
5336 		case RSMIPC_MSG_SUSPEND_DONE:
5337 			rsm_suspend_complete(dest, 0);
5338 			break;
5339 		case RSMIPC_MSG_RESUME:
5340 			importer_resume(dest);
5341 			break;
5342 		default:
5343 			ASSERT(0);
5344 		}
5345 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5346 		    "rsmipc_send done\n"));
5347 		return (0);
5348 	}
5349 
5350 	if (dest >= MAX_NODES) {
5351 		DBG_PRINTF((category, RSM_ERR,
5352 		    "rsm: rsmipc_send bad node number %x\n", dest));
5353 		return (RSMERR_REMOTE_NODE_UNREACHABLE);
5354 	}
5355 
5356 	/*
5357 	 * Oh boy! we are going remote.
5358 	 */
5359 
5360 	/*
5361 	 * identify if we need to have credits to send this message
5362 	 * - only selected requests are flow controlled
5363 	 */
5364 	if (req != NULL) {
5365 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5366 		    "rsmipc_send:request type=%d\n",
5367 		    req->rsmipc_hdr.rsmipc_type));
5368 
5369 		switch (req->rsmipc_hdr.rsmipc_type) {
5370 		case RSMIPC_MSG_SEGCONNECT:
5371 		case RSMIPC_MSG_DISCONNECT:
5372 		case RSMIPC_MSG_IMPORTING:
5373 		case RSMIPC_MSG_SUSPEND:
5374 		case RSMIPC_MSG_SUSPEND_DONE:
5375 		case RSMIPC_MSG_RESUME:
5376 			credit_check = 1;
5377 			break;
5378 		default:
5379 			credit_check = 0;
5380 		}
5381 	}
5382 
5383 again:
5384 	if (retry_cnt++ == min_retry_cnt) {
5385 		/* backoff before further retries for 10ms */
5386 		delay(drv_usectohz(10000));
5387 		retry_cnt = 0; /* reset retry_cnt */
5388 	}
5389 	sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5390 	if (sendq_token == NULL) {
5391 		DBG_PRINTF((category, RSM_ERR,
5392 		    "rsm: rsmipc_send no device to reach node %d\n", dest));
5393 		return (RSMERR_REMOTE_NODE_UNREACHABLE);
5394 	}
5395 
5396 	if ((sendq_token == used_sendq_token) &&
5397 	    ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5398 		(e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5399 		rele_sendq_token(sendq_token);
5400 		DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5401 		return (RSMERR_CONN_ABORTED);
5402 	} else
5403 		used_sendq_token = sendq_token;
5404 
5405 /* lint -save -e413 */
5406 	path = SQ_TOKEN_TO_PATH(sendq_token);
5407 	adapter = path->local_adapter;
5408 /* lint -restore */
5409 	ipc_handle = sendq_token->rsmpi_sendq_handle;
5410 
5411 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5412 	    "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5413 
5414 	if (reply == NULL) {
5415 		/* Send request without ack */
5416 		/*
5417 		 * Set the rsmipc_version number in the msghdr for KA
5418 		 * communication versioning
5419 		 */
5420 		req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5421 		req->rsmipc_hdr.rsmipc_src = my_nodeid;
5422 		/*
5423 		 * remote endpoints incn should match the value in our
5424 		 * path's remote_incn field. No need to grab any lock
5425 		 * since we have refcnted the path in rsmka_get_sendq_token
5426 		 */
5427 		req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5428 
5429 		is.is_data = (void *)req;
5430 		is.is_size = sizeof (*req);
5431 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5432 		is.is_wait = 0;
5433 
5434 		if (credit_check) {
5435 			mutex_enter(&path->mutex);
5436 			/*
5437 			 * wait till we recv credits or path goes down. If path
5438 			 * goes down rsm_send will fail and we handle the error
5439 			 * then
5440 			 */
5441 			while ((sendq_token->msgbuf_avail == 0) &&
5442 			    (path->state == RSMKA_PATH_ACTIVE)) {
5443 				e = cv_wait_sig(&sendq_token->sendq_cv,
5444 				    &path->mutex);
5445 				if (e == 0) {
5446 					mutex_exit(&path->mutex);
5447 					no_reply_cnt++;
5448 					rele_sendq_token(sendq_token);
5449 					DBG_PRINTF((category, RSM_DEBUG,
5450 					    "rsmipc_send done: "
5451 					    "cv_wait INTERRUPTED"));
5452 					return (RSMERR_INTERRUPTED);
5453 				}
5454 			}
5455 
5456 			/*
5457 			 * path is not active retry on another path.
5458 			 */
5459 			if (path->state != RSMKA_PATH_ACTIVE) {
5460 				mutex_exit(&path->mutex);
5461 				rele_sendq_token(sendq_token);
5462 				e = RSMERR_CONN_ABORTED;
5463 				DBG_PRINTF((category, RSM_ERR,
5464 				    "rsm: rsmipc_send: path !ACTIVE"));
5465 				goto again;
5466 			}
5467 
5468 			ASSERT(sendq_token->msgbuf_avail > 0);
5469 
5470 			/*
5471 			 * reserve a msgbuf
5472 			 */
5473 			sendq_token->msgbuf_avail--;
5474 
5475 			mutex_exit(&path->mutex);
5476 
5477 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5478 			    NULL);
5479 
5480 			if (e != RSM_SUCCESS) {
5481 				mutex_enter(&path->mutex);
5482 				/*
5483 				 * release the reserved msgbuf since
5484 				 * the send failed
5485 				 */
5486 				sendq_token->msgbuf_avail++;
5487 				cv_broadcast(&sendq_token->sendq_cv);
5488 				mutex_exit(&path->mutex);
5489 			}
5490 		} else
5491 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5492 			    NULL);
5493 
5494 		no_reply_cnt++;
5495 		rele_sendq_token(sendq_token);
5496 		if (e != RSM_SUCCESS) {
5497 			DBG_PRINTF((category, RSM_ERR,
5498 			    "rsm: rsmipc_send no reply send"
5499 			    " err = %d no reply count = %d\n",
5500 			    e, no_reply_cnt));
5501 			ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5502 			    e != RSMERR_BAD_BARRIER_HNDL);
5503 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5504 			goto again;
5505 		} else {
5506 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5507 			    "rsmipc_send done\n"));
5508 			return (e);
5509 		}
5510 
5511 	}
5512 
5513 	if (req == NULL) {
5514 		/* Send reply - No flow control is done for reply */
5515 		/*
5516 		 * Set the version in the msg header for KA communication
5517 		 * versioning
5518 		 */
5519 		reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5520 		reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5521 		/* incn number is not used for reply msgs currently */
5522 		reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5523 
5524 		is.is_data = (void *)reply;
5525 		is.is_size = sizeof (*reply);
5526 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5527 		is.is_wait = 0;
5528 		e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5529 		rele_sendq_token(sendq_token);
5530 		if (e != RSM_SUCCESS) {
5531 			DBG_PRINTF((category, RSM_ERR,
5532 			    "rsm: rsmipc_send reply send"
5533 			    " err = %d\n", e));
5534 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5535 			goto again;
5536 		} else {
5537 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5538 			    "rsmipc_send done\n"));
5539 			return (e);
5540 		}
5541 	}
5542 
5543 	/* Reply needed */
5544 	rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5545 
5546 	mutex_enter(&rslot->rsmipc_lock);
5547 
5548 	rslot->rsmipc_data = (void *)reply;
5549 	RSMIPC_SET(rslot, RSMIPC_PENDING);
5550 
5551 	while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5552 		/*
5553 		 * Set the rsmipc_version number in the msghdr for KA
5554 		 * communication versioning
5555 		 */
5556 		req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5557 		req->rsmipc_hdr.rsmipc_src = my_nodeid;
5558 		req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5559 		/*
5560 		 * remote endpoints incn should match the value in our
5561 		 * path's remote_incn field. No need to grab any lock
5562 		 * since we have refcnted the path in rsmka_get_sendq_token
5563 		 */
5564 		req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5565 
5566 		is.is_data = (void *)req;
5567 		is.is_size = sizeof (*req);
5568 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5569 		is.is_wait = 0;
5570 		if (credit_check) {
5571 
5572 			mutex_enter(&path->mutex);
5573 			/*
5574 			 * wait till we recv credits or path goes down. If path
5575 			 * goes down rsm_send will fail and we handle the error
5576 			 * then.
5577 			 */
5578 			while ((sendq_token->msgbuf_avail == 0) &&
5579 			    (path->state == RSMKA_PATH_ACTIVE)) {
5580 				e = cv_wait_sig(&sendq_token->sendq_cv,
5581 				    &path->mutex);
5582 				if (e == 0) {
5583 					mutex_exit(&path->mutex);
5584 					RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5585 					rsmipc_free(rslot);
5586 					rele_sendq_token(sendq_token);
5587 					DBG_PRINTF((category, RSM_DEBUG,
5588 					    "rsmipc_send done: "
5589 					    "cv_wait INTERRUPTED"));
5590 					return (RSMERR_INTERRUPTED);
5591 				}
5592 			}
5593 
5594 			/*
5595 			 * path is not active retry on another path.
5596 			 */
5597 			if (path->state != RSMKA_PATH_ACTIVE) {
5598 				mutex_exit(&path->mutex);
5599 				RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5600 				rsmipc_free(rslot);
5601 				rele_sendq_token(sendq_token);
5602 				e = RSMERR_CONN_ABORTED;
5603 				DBG_PRINTF((category, RSM_ERR,
5604 				    "rsm: rsmipc_send: path !ACTIVE"));
5605 				goto again;
5606 			}
5607 
5608 			ASSERT(sendq_token->msgbuf_avail > 0);
5609 
5610 			/*
5611 			 * reserve a msgbuf
5612 			 */
5613 			sendq_token->msgbuf_avail--;
5614 
5615 			mutex_exit(&path->mutex);
5616 
5617 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5618 			    NULL);
5619 
5620 			if (e != RSM_SUCCESS) {
5621 				mutex_enter(&path->mutex);
5622 				/*
5623 				 * release the reserved msgbuf since
5624 				 * the send failed
5625 				 */
5626 				sendq_token->msgbuf_avail++;
5627 				cv_broadcast(&sendq_token->sendq_cv);
5628 				mutex_exit(&path->mutex);
5629 			}
5630 		} else
5631 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5632 			    NULL);
5633 
5634 		if (e != RSM_SUCCESS) {
5635 			DBG_PRINTF((category, RSM_ERR,
5636 			    "rsm: rsmipc_send rsmpi send err = %d\n", e));
5637 			RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5638 			rsmipc_free(rslot);
5639 			rele_sendq_token(sendq_token);
5640 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5641 			goto again;
5642 		}
5643 
5644 		/* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5645 		(void) drv_getparm(LBOLT, &ticks);
5646 		ticks += drv_usectohz(5000000);
5647 		e = cv_timedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5648 			ticks);
5649 		if (e < 0) {
5650 			/* timed out - retry */
5651 			e = RSMERR_TIMEOUT;
5652 		} else if (e == 0) {
5653 			/* signalled - return error */
5654 			e = RSMERR_INTERRUPTED;
5655 			break;
5656 		} else {
5657 			e = RSM_SUCCESS;
5658 		}
5659 	}
5660 
5661 	RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5662 	rsmipc_free(rslot);
5663 	rele_sendq_token(sendq_token);
5664 
5665 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5666 	return (e);
5667 }
5668 
5669 static int
5670 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,  void *cookie)
5671 {
5672 	rsmipc_request_t request;
5673 
5674 	/*
5675 	 *  inform the exporter to delete this importer
5676 	 */
5677 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5678 	request.rsmipc_key = segid;
5679 	request.rsmipc_segment_cookie = cookie;
5680 	return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5681 }
5682 
5683 static void
5684 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t	*acl,
5685     int acl_len, rsm_permission_t default_permission)
5686 {
5687 	int			i;
5688 	importing_token_t	*token;
5689 	rsmipc_request_t	request;
5690 	republish_token_t	*republish_list = NULL;
5691 	republish_token_t	*rp;
5692 	rsm_permission_t	permission;
5693 	int			index;
5694 
5695 	/*
5696 	 * send the new access mode to all the nodes that have imported
5697 	 * this segment.
5698 	 * If the new acl does not have a node that was present in
5699 	 * the old acl a access permission of 0 is sent.
5700 	 */
5701 
5702 	index = rsmhash(segid);
5703 
5704 	/*
5705 	 * create a list of node/permissions to send the republish message
5706 	 */
5707 	mutex_enter(&importer_list.lock);
5708 
5709 	token = importer_list.bucket[index];
5710 	while (token != NULL) {
5711 		if (segid == token->key) {
5712 			permission = default_permission;
5713 
5714 			for (i = 0; i < acl_len; i++) {
5715 				if (token->importing_node == acl[i].ae_node) {
5716 					permission = acl[i].ae_permission;
5717 					break;
5718 				}
5719 			}
5720 			rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5721 
5722 			rp->key = segid;
5723 			rp->importing_node = token->importing_node;
5724 			rp->permission = permission;
5725 			rp->next = republish_list;
5726 			republish_list = rp;
5727 		}
5728 		token = token->next;
5729 	}
5730 
5731 	mutex_exit(&importer_list.lock);
5732 
5733 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5734 	request.rsmipc_key = segid;
5735 
5736 	while (republish_list != NULL) {
5737 		request.rsmipc_perm = republish_list->permission;
5738 		(void) rsmipc_send(republish_list->importing_node,
5739 		    &request, RSM_NO_REPLY);
5740 		rp = republish_list;
5741 		republish_list = republish_list->next;
5742 		kmem_free(rp, sizeof (republish_token_t));
5743 	}
5744 }
5745 
5746 static void
5747 rsm_send_suspend()
5748 {
5749 	int			i, e;
5750 	rsmipc_request_t 	request;
5751 	list_element_t		*tokp;
5752 	list_element_t		*head = NULL;
5753 	importing_token_t	*token;
5754 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5755 	    "rsm_send_suspend enter\n"));
5756 
5757 	/*
5758 	 * create a list of node to send the suspend message
5759 	 *
5760 	 * Currently the whole importer list is scanned and we obtain
5761 	 * all the nodes - this basically gets all nodes that at least
5762 	 * import one segment from the local node.
5763 	 *
5764 	 * no need to grab the rsm_suspend_list lock here since we are
5765 	 * single threaded when suspend is called.
5766 	 */
5767 
5768 	mutex_enter(&importer_list.lock);
5769 	for (i = 0; i < rsm_hash_size; i++) {
5770 
5771 		token = importer_list.bucket[i];
5772 
5773 		while (token != NULL) {
5774 
5775 			tokp = head;
5776 
5777 			/*
5778 			 * make sure that the token's node
5779 			 * is not already on the suspend list
5780 			 */
5781 			while (tokp != NULL) {
5782 				if (tokp->nodeid == token->importing_node) {
5783 					break;
5784 				}
5785 				tokp = tokp->next;
5786 			}
5787 
5788 			if (tokp == NULL) { /* not in suspend list */
5789 				tokp = kmem_zalloc(sizeof (list_element_t),
5790 						KM_SLEEP);
5791 				tokp->nodeid = token->importing_node;
5792 				tokp->next = head;
5793 				head = tokp;
5794 			}
5795 
5796 			token = token->next;
5797 		}
5798 	}
5799 	mutex_exit(&importer_list.lock);
5800 
5801 	if (head == NULL) { /* no importers so go ahead and quiesce segments */
5802 		exporter_quiesce();
5803 		return;
5804 	}
5805 
5806 	mutex_enter(&rsm_suspend_list.list_lock);
5807 	ASSERT(rsm_suspend_list.list_head == NULL);
5808 	/*
5809 	 * update the suspend list righaway so that if a node dies the
5810 	 * pathmanager can set the NODE dead flag
5811 	 */
5812 	rsm_suspend_list.list_head = head;
5813 	mutex_exit(&rsm_suspend_list.list_lock);
5814 
5815 	tokp = head;
5816 
5817 	while (tokp != NULL) {
5818 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5819 		e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5820 		/*
5821 		 * Error in rsmipc_send currently happens due to inaccessibility
5822 		 * of the remote node.
5823 		 */
5824 		if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5825 			tokp->flags |= RSM_SUSPEND_ACKPENDING;
5826 		}
5827 
5828 		tokp = tokp->next;
5829 	}
5830 
5831 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5832 	    "rsm_send_suspend done\n"));
5833 
5834 }
5835 
5836 static void
5837 rsm_send_resume()
5838 {
5839 	rsmipc_request_t 	request;
5840 	list_element_t		*elem, *head;
5841 
5842 	/*
5843 	 * save the suspend list so that we know where to send
5844 	 * the resume messages and make the suspend list head
5845 	 * NULL.
5846 	 */
5847 	mutex_enter(&rsm_suspend_list.list_lock);
5848 	head = rsm_suspend_list.list_head;
5849 	rsm_suspend_list.list_head = NULL;
5850 	mutex_exit(&rsm_suspend_list.list_lock);
5851 
5852 	while (head != NULL) {
5853 		elem = head;
5854 		head = head->next;
5855 
5856 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5857 
5858 		(void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5859 
5860 		kmem_free((void *)elem, sizeof (list_element_t));
5861 
5862 	}
5863 
5864 }
5865 
5866 /*
5867  * This function takes path and sends a message using the sendq
5868  * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5869  * and RSMIPC_MSG_CREDIT are sent using this function.
5870  */
5871 int
5872 rsmipc_send_controlmsg(path_t *path, int msgtype)
5873 {
5874 	int			e;
5875 	int			retry_cnt = 0;
5876 	int			min_retry_cnt = 10;
5877 	clock_t			timeout;
5878 	adapter_t		*adapter;
5879 	rsm_send_t		is;
5880 	rsm_send_q_handle_t	ipc_handle;
5881 	rsmipc_controlmsg_t	msg;
5882 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5883 
5884 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5885 	    "rsmipc_send_controlmsg enter\n"));
5886 
5887 	ASSERT(MUTEX_HELD(&path->mutex));
5888 
5889 	adapter = path->local_adapter;
5890 
5891 	DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5892 	    "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5893 	    my_nodeid, adapter->hwaddr, path->remote_node,
5894 	    path->remote_hwaddr, path->procmsg_cnt));
5895 
5896 	if (path->state != RSMKA_PATH_ACTIVE) {
5897 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5898 		    "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5899 		return (1);
5900 	}
5901 
5902 	ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5903 
5904 	msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5905 	msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5906 	msg.rsmipc_hdr.rsmipc_type = msgtype;
5907 	msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5908 
5909 	if (msgtype == RSMIPC_MSG_CREDIT)
5910 		msg.rsmipc_credits = path->procmsg_cnt;
5911 
5912 	msg.rsmipc_local_incn = path->local_incn;
5913 
5914 	msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5915 	/* incr the sendq, path refcnt */
5916 	PATH_HOLD_NOLOCK(path);
5917 	SENDQ_TOKEN_HOLD(path);
5918 
5919 	do {
5920 		/* drop the path lock before doing the rsm_send */
5921 		mutex_exit(&path->mutex);
5922 
5923 		is.is_data = (void *)&msg;
5924 		is.is_size = sizeof (msg);
5925 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5926 		is.is_wait = 0;
5927 
5928 		e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5929 
5930 		ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5931 		    e != RSMERR_BAD_BARRIER_HNDL);
5932 
5933 		mutex_enter(&path->mutex);
5934 
5935 		if (e == RSM_SUCCESS) {
5936 			break;
5937 		}
5938 		/* error counter for statistics */
5939 		atomic_add_64(&rsm_ctrlmsg_errcnt, 1);
5940 
5941 		DBG_PRINTF((category, RSM_ERR,
5942 		    "rsmipc_send_controlmsg:rsm_send error=%d", e));
5943 
5944 		if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5945 			timeout  = ddi_get_lbolt() + drv_usectohz(10000);
5946 			(void) cv_timedwait(&path->sendq_token.sendq_cv,
5947 			    &path->mutex, timeout);
5948 			retry_cnt = 0;
5949 		}
5950 	} while (path->state == RSMKA_PATH_ACTIVE);
5951 
5952 	/* decrement the sendq,path refcnt that we incr before rsm_send */
5953 	SENDQ_TOKEN_RELE(path);
5954 	PATH_RELE_NOLOCK(path);
5955 
5956 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5957 	    "rsmipc_send_controlmsg done=%d", e));
5958 	return (e);
5959 }
5960 
5961 /*
5962  * Called from rsm_force_unload and path_importer_disconnect. The memory
5963  * mapping for the imported segment is removed and the segment is
5964  * disconnected at the interconnect layer if disconnect_flag is TRUE.
5965  * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5966  * and FALSE from rsm_rebind.
5967  *
5968  * When subsequent accesses cause page faulting, the dummy page is mapped
5969  * to resolve the fault, and the mapping generation number is incremented
5970  * so that the application can be notified on a close barrier operation.
5971  *
5972  * It is important to note that the caller of rsmseg_unload is responsible for
5973  * acquiring the segment lock before making a call to rsmseg_unload. This is
5974  * required to make the caller and rsmseg_unload thread safe. The segment lock
5975  * will be released by the rsmseg_unload function.
5976  */
5977 void
5978 rsmseg_unload(rsmseg_t *im_seg)
5979 {
5980 	rsmcookie_t		*hdl;
5981 	void			*shared_cookie;
5982 	rsmipc_request_t	request;
5983 	uint_t			maxprot;
5984 
5985 	DBG_DEFINE(category,
5986 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5987 
5988 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5989 
5990 	ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5991 
5992 	/* wait until segment leaves the mapping state */
5993 	while (im_seg->s_state == RSM_STATE_MAPPING)
5994 		cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5995 	/*
5996 	 * An unload is only necessary if the segment is connected. However,
5997 	 * if the segment was on the import list in state RSM_STATE_CONNECTING
5998 	 * then a connection was in progress. Change to RSM_STATE_NEW
5999 	 * here to cause an early exit from the connection process.
6000 	 */
6001 	if (im_seg->s_state == RSM_STATE_NEW) {
6002 		rsmseglock_release(im_seg);
6003 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6004 		    "rsmseg_unload done: RSM_STATE_NEW\n"));
6005 		return;
6006 	} else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6007 		im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6008 		rsmsharelock_acquire(im_seg);
6009 		im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6010 		rsmsharelock_release(im_seg);
6011 		rsmseglock_release(im_seg);
6012 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6013 		    "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6014 		return;
6015 	}
6016 
6017 	if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6018 		if (im_seg->s_ckl != NULL) {
6019 			int e;
6020 			/* Setup protections for remap */
6021 			maxprot = PROT_USER;
6022 			if (im_seg->s_mode & RSM_PERM_READ) {
6023 				maxprot |= PROT_READ;
6024 			}
6025 			if (im_seg->s_mode & RSM_PERM_WRITE) {
6026 				maxprot |= PROT_WRITE;
6027 			}
6028 			hdl = im_seg->s_ckl;
6029 			for (; hdl != NULL; hdl = hdl->c_next) {
6030 				e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6031 				    remap_cookie,
6032 				    hdl->c_off, hdl->c_len,
6033 				    maxprot, 0, NULL);
6034 
6035 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6036 				    "remap returns %d\n", e));
6037 			}
6038 		}
6039 
6040 		(void) rsm_closeconnection(im_seg, &shared_cookie);
6041 
6042 		if (shared_cookie != NULL) {
6043 			/*
6044 			 * inform the exporting node so this import
6045 			 * can be deleted from the list of importers.
6046 			 */
6047 			request.rsmipc_hdr.rsmipc_type =
6048 			    RSMIPC_MSG_NOTIMPORTING;
6049 			request.rsmipc_key = im_seg->s_segid;
6050 			request.rsmipc_segment_cookie = shared_cookie;
6051 			rsmseglock_release(im_seg);
6052 			(void) rsmipc_send(im_seg->s_node, &request,
6053 			    RSM_NO_REPLY);
6054 		} else {
6055 			rsmseglock_release(im_seg);
6056 		}
6057 	}
6058 	else
6059 		rsmseglock_release(im_seg);
6060 
6061 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6062 
6063 }
6064 
6065 /* ****************************** Importer Calls ************************ */
6066 
6067 static int
6068 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6069 {
6070 	int shifts = 0;
6071 
6072 	if (crgetuid(cr) != owner) {
6073 		shifts += 3;
6074 		if (!groupmember(group, cr))
6075 			shifts += 3;
6076 	}
6077 
6078 	mode &= ~(perm << shifts);
6079 
6080 	if (mode == 0)
6081 		return (0);
6082 
6083 	return (secpolicy_rsm_access(cr, owner, mode));
6084 }
6085 
6086 
6087 static int
6088 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6089     intptr_t dataptr, int mode)
6090 {
6091 	int e;
6092 	int			recheck_state = 0;
6093 	void			*shared_cookie;
6094 	rsmipc_request_t	request;
6095 	rsmipc_reply_t		reply;
6096 	rsm_permission_t	access;
6097 	adapter_t		*adapter;
6098 	rsm_addr_t		addr = 0;
6099 	rsm_import_share_t	*sharedp;
6100 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6101 
6102 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6103 
6104 	adapter = rsm_getadapter(msg, mode);
6105 	if (adapter == NULL) {
6106 		DBG_PRINTF((category, RSM_ERR,
6107 		    "rsm_connect done:ENODEV adapter=NULL\n"));
6108 		return (RSMERR_CTLR_NOT_PRESENT);
6109 	}
6110 
6111 	if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6112 		rsmka_release_adapter(adapter);
6113 		DBG_PRINTF((category, RSM_ERR,
6114 		    "rsm_connect done:ENODEV loopback\n"));
6115 		return (RSMERR_CTLR_NOT_PRESENT);
6116 	}
6117 
6118 
6119 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6120 	ASSERT(seg->s_state == RSM_STATE_NEW);
6121 
6122 	/*
6123 	 * Translate perm to access
6124 	 */
6125 	if (msg->perm & ~RSM_PERM_RDWR) {
6126 		rsmka_release_adapter(adapter);
6127 		DBG_PRINTF((category, RSM_ERR,
6128 		    "rsm_connect done:EINVAL invalid perms\n"));
6129 		return (RSMERR_BAD_PERMS);
6130 	}
6131 	access = 0;
6132 	if (msg->perm & RSM_PERM_READ)
6133 		access |= RSM_ACCESS_READ;
6134 	if (msg->perm & RSM_PERM_WRITE)
6135 		access |= RSM_ACCESS_WRITE;
6136 
6137 	seg->s_node = msg->nodeid;
6138 
6139 	/*
6140 	 * Adding to the import list locks the segment; release the segment
6141 	 * lock so we can get the reply for the send.
6142 	 */
6143 	e = rsmimport_add(seg, msg->key);
6144 	if (e) {
6145 		rsmka_release_adapter(adapter);
6146 		DBG_PRINTF((category, RSM_ERR,
6147 		    "rsm_connect done:rsmimport_add failed %d\n", e));
6148 		return (e);
6149 	}
6150 	seg->s_state = RSM_STATE_CONNECTING;
6151 
6152 	/*
6153 	 * Set the s_adapter field here so as to have a valid comparison of
6154 	 * the adapter and the s_adapter value during rsmshare_get. For
6155 	 * any error, set s_adapter to NULL before doing a release_adapter
6156 	 */
6157 	seg->s_adapter = adapter;
6158 
6159 	rsmseglock_release(seg);
6160 
6161 	/*
6162 	 * get the pointer to the shared data structure; the
6163 	 * shared data is locked and refcount has been incremented
6164 	 */
6165 	sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6166 
6167 	ASSERT(rsmsharelock_held(seg));
6168 
6169 	do {
6170 		/* flag indicates whether we need to recheck the state */
6171 		recheck_state = 0;
6172 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6173 		    "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6174 		switch (sharedp->rsmsi_state) {
6175 		case RSMSI_STATE_NEW:
6176 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6177 			break;
6178 		case RSMSI_STATE_CONNECTING:
6179 			/* FALLTHRU */
6180 		case RSMSI_STATE_CONN_QUIESCE:
6181 			/* FALLTHRU */
6182 		case RSMSI_STATE_MAP_QUIESCE:
6183 			/* wait for the state to change */
6184 			while ((sharedp->rsmsi_state ==
6185 			    RSMSI_STATE_CONNECTING) ||
6186 			    (sharedp->rsmsi_state ==
6187 			    RSMSI_STATE_CONN_QUIESCE) ||
6188 			    (sharedp->rsmsi_state ==
6189 			    RSMSI_STATE_MAP_QUIESCE)) {
6190 				if (cv_wait_sig(&sharedp->rsmsi_cv,
6191 				    &sharedp->rsmsi_lock) == 0) {
6192 					/* signalled - clean up and return */
6193 					rsmsharelock_release(seg);
6194 					rsmimport_rm(seg);
6195 					seg->s_adapter = NULL;
6196 					rsmka_release_adapter(adapter);
6197 					seg->s_state = RSM_STATE_NEW;
6198 					DBG_PRINTF((category, RSM_ERR,
6199 					    "rsm_connect done: INTERRUPTED\n"));
6200 					return (RSMERR_INTERRUPTED);
6201 				}
6202 			}
6203 			/*
6204 			 * the state changed, loop back and check what it is
6205 			 */
6206 			recheck_state = 1;
6207 			break;
6208 		case RSMSI_STATE_ABORT_CONNECT:
6209 			/* exit the loop and clean up further down */
6210 			break;
6211 		case RSMSI_STATE_CONNECTED:
6212 			/* already connected, good - fall through */
6213 		case RSMSI_STATE_MAPPED:
6214 			/* already mapped, wow - fall through */
6215 			/* access validation etc is done further down */
6216 			break;
6217 		case RSMSI_STATE_DISCONNECTED:
6218 			/* disconnected - so reconnect now */
6219 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6220 			break;
6221 		default:
6222 			ASSERT(0); /* Invalid State */
6223 		}
6224 	} while (recheck_state);
6225 
6226 	if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6227 		/* we are the first to connect */
6228 		rsmsharelock_release(seg);
6229 
6230 		if (msg->nodeid != my_nodeid) {
6231 			addr = get_remote_hwaddr(adapter, msg->nodeid);
6232 
6233 			if ((int64_t)addr < 0) {
6234 				rsmsharelock_acquire(seg);
6235 				rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6236 				    RSMSI_STATE_NEW);
6237 				rsmsharelock_release(seg);
6238 				rsmimport_rm(seg);
6239 				seg->s_adapter = NULL;
6240 				rsmka_release_adapter(adapter);
6241 				seg->s_state = RSM_STATE_NEW;
6242 				DBG_PRINTF((category, RSM_ERR,
6243 				    "rsm_connect done: hwaddr<0\n"));
6244 				return (RSMERR_INTERNAL_ERROR);
6245 			}
6246 		} else {
6247 			addr = adapter->hwaddr;
6248 		}
6249 
6250 		/*
6251 		 * send request to node [src, dest, key, msgid] and get back
6252 		 * [status, msgid, cookie]
6253 		 */
6254 		request.rsmipc_key = msg->key;
6255 		/*
6256 		 * we need the s_mode of the exporter so pass
6257 		 * RSM_ACCESS_TRUSTED
6258 		 */
6259 		request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6260 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6261 		request.rsmipc_adapter_hwaddr = addr;
6262 		request.rsmipc_segment_cookie = sharedp;
6263 
6264 		e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6265 		if (e) {
6266 			rsmsharelock_acquire(seg);
6267 			rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6268 			    RSMSI_STATE_NEW);
6269 			rsmsharelock_release(seg);
6270 			rsmimport_rm(seg);
6271 			seg->s_adapter = NULL;
6272 			rsmka_release_adapter(adapter);
6273 			seg->s_state = RSM_STATE_NEW;
6274 			DBG_PRINTF((category, RSM_ERR,
6275 			    "rsm_connect done:rsmipc_send failed %d\n", e));
6276 			return (e);
6277 		}
6278 
6279 		if (reply.rsmipc_status != RSM_SUCCESS) {
6280 			rsmsharelock_acquire(seg);
6281 			rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6282 			    RSMSI_STATE_NEW);
6283 			rsmsharelock_release(seg);
6284 			rsmimport_rm(seg);
6285 			seg->s_adapter = NULL;
6286 			rsmka_release_adapter(adapter);
6287 			seg->s_state = RSM_STATE_NEW;
6288 			DBG_PRINTF((category, RSM_ERR,
6289 			    "rsm_connect done:rsmipc_send reply err %d\n",
6290 			    reply.rsmipc_status));
6291 			return (reply.rsmipc_status);
6292 		}
6293 
6294 		rsmsharelock_acquire(seg);
6295 		/* store the information recvd into the shared data struct */
6296 		sharedp->rsmsi_mode = reply.rsmipc_mode;
6297 		sharedp->rsmsi_uid = reply.rsmipc_uid;
6298 		sharedp->rsmsi_gid = reply.rsmipc_gid;
6299 		sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6300 		sharedp->rsmsi_cookie = sharedp;
6301 	}
6302 
6303 	rsmsharelock_release(seg);
6304 
6305 	/*
6306 	 * Get the segment lock and check for a force disconnect
6307 	 * from the export side which would have changed the state
6308 	 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6309 	 * force disconnect will be held off until the connection
6310 	 * has completed.
6311 	 */
6312 	rsmseglock_acquire(seg);
6313 	rsmsharelock_acquire(seg);
6314 	ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6315 	    seg->s_state == RSM_STATE_ABORT_CONNECT);
6316 
6317 	shared_cookie = sharedp->rsmsi_cookie;
6318 
6319 	if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6320 	    (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6321 		seg->s_state = RSM_STATE_NEW;
6322 		seg->s_adapter = NULL;
6323 		rsmsharelock_release(seg);
6324 		rsmseglock_release(seg);
6325 		rsmimport_rm(seg);
6326 		rsmka_release_adapter(adapter);
6327 
6328 		rsmsharelock_acquire(seg);
6329 		if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6330 			/*
6331 			 * set a flag indicating abort handling has been
6332 			 * done
6333 			 */
6334 			sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6335 			rsmsharelock_release(seg);
6336 			/* send a message to exporter - only once */
6337 			(void) rsm_send_notimporting(msg->nodeid,
6338 			    msg->key, shared_cookie);
6339 			rsmsharelock_acquire(seg);
6340 			/*
6341 			 * wake up any waiting importers and inform that
6342 			 * connection has been aborted
6343 			 */
6344 			cv_broadcast(&sharedp->rsmsi_cv);
6345 		}
6346 		rsmsharelock_release(seg);
6347 
6348 		DBG_PRINTF((category, RSM_ERR,
6349 		    "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6350 		return (RSMERR_INTERRUPTED);
6351 	}
6352 
6353 
6354 	/*
6355 	 * We need to verify that this process has access
6356 	 */
6357 	e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6358 			    access & sharedp->rsmsi_mode,
6359 			    (int)(msg->perm & RSM_PERM_RDWR), cred);
6360 	if (e) {
6361 		rsmsharelock_release(seg);
6362 		seg->s_state = RSM_STATE_NEW;
6363 		seg->s_adapter = NULL;
6364 		rsmseglock_release(seg);
6365 		rsmimport_rm(seg);
6366 		rsmka_release_adapter(adapter);
6367 		/*
6368 		 * No need to lock segment it has been removed
6369 		 * from the hash table
6370 		 */
6371 		rsmsharelock_acquire(seg);
6372 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6373 			rsmsharelock_release(seg);
6374 			/* this is the first importer */
6375 
6376 			(void) rsm_send_notimporting(msg->nodeid, msg->key,
6377 			    shared_cookie);
6378 			rsmsharelock_acquire(seg);
6379 			sharedp->rsmsi_state = RSMSI_STATE_NEW;
6380 			cv_broadcast(&sharedp->rsmsi_cv);
6381 		}
6382 		rsmsharelock_release(seg);
6383 
6384 		DBG_PRINTF((category, RSM_ERR,
6385 		    "rsm_connect done: ipcaccess failed\n"));
6386 		return (RSMERR_PERM_DENIED);
6387 	}
6388 
6389 	/* update state and cookie */
6390 	seg->s_segid = sharedp->rsmsi_segid;
6391 	seg->s_len = sharedp->rsmsi_seglen;
6392 	seg->s_mode = access & sharedp->rsmsi_mode;
6393 	seg->s_pid = ddi_get_pid();
6394 	seg->s_mapinfo = NULL;
6395 
6396 	if (seg->s_node != my_nodeid) {
6397 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6398 			e = adapter->rsmpi_ops->rsm_connect(
6399 			    adapter->rsmpi_handle,
6400 			    addr, seg->s_segid, &sharedp->rsmsi_handle);
6401 
6402 			if (e != RSM_SUCCESS) {
6403 				seg->s_state = RSM_STATE_NEW;
6404 				seg->s_adapter = NULL;
6405 				rsmsharelock_release(seg);
6406 				rsmseglock_release(seg);
6407 				rsmimport_rm(seg);
6408 				rsmka_release_adapter(adapter);
6409 				/*
6410 				 *  inform the exporter to delete this importer
6411 				 */
6412 				(void) rsm_send_notimporting(msg->nodeid,
6413 				    msg->key, shared_cookie);
6414 
6415 				/*
6416 				 * Now inform any waiting importers to
6417 				 * retry connect. This needs to be done
6418 				 * after sending notimporting so that
6419 				 * the notimporting is sent before a waiting
6420 				 * importer sends a segconnect while retrying
6421 				 *
6422 				 * No need to lock segment it has been removed
6423 				 * from the hash table
6424 				 */
6425 
6426 				rsmsharelock_acquire(seg);
6427 				sharedp->rsmsi_state = RSMSI_STATE_NEW;
6428 				cv_broadcast(&sharedp->rsmsi_cv);
6429 				rsmsharelock_release(seg);
6430 
6431 				DBG_PRINTF((category, RSM_ERR,
6432 				    "rsm_connect error %d\n", e));
6433 				if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6434 					return (
6435 					    RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6436 				else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6437 					(e == RSMERR_UNKNOWN_RSM_ADDR))
6438 					return (RSMERR_REMOTE_NODE_UNREACHABLE);
6439 				else
6440 					return (e);
6441 			}
6442 
6443 		}
6444 		seg->s_handle.in = sharedp->rsmsi_handle;
6445 
6446 	}
6447 
6448 	seg->s_state = RSM_STATE_CONNECT;
6449 
6450 
6451 	seg->s_flags &= ~RSM_IMPORT_DUMMY;	/* clear dummy flag */
6452 	if (bar_va) {
6453 		/* increment generation number on barrier page */
6454 		atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6455 		/* return user off into barrier page where status will be */
6456 		msg->off = (int)seg->s_hdr.rsmrc_num;
6457 		msg->gnum = bar_va[msg->off]; 	/* gnum race */
6458 	} else {
6459 		msg->off = 0;
6460 		msg->gnum = 0;	/* gnum race */
6461 	}
6462 
6463 	msg->len = (int)sharedp->rsmsi_seglen;
6464 	msg->rnum = seg->s_minor;
6465 	rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6466 	rsmsharelock_release(seg);
6467 	rsmseglock_release(seg);
6468 
6469 	/* Return back to user the segment size & perm in case it's needed */
6470 
6471 #ifdef _MULTI_DATAMODEL
6472 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6473 		rsm_ioctlmsg32_t msg32;
6474 
6475 		if (msg->len > UINT_MAX)
6476 			msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6477 		else
6478 			msg32.len = msg->len;
6479 		msg32.off = msg->off;
6480 		msg32.perm = msg->perm;
6481 		msg32.gnum = msg->gnum;
6482 		msg32.rnum = msg->rnum;
6483 
6484 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6485 		    "rsm_connect done\n"));
6486 
6487 		if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6488 		    sizeof (msg32), mode))
6489 			return (RSMERR_BAD_ADDR);
6490 		else
6491 			return (RSM_SUCCESS);
6492 	}
6493 #endif
6494 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6495 
6496 	if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6497 	    mode))
6498 		return (RSMERR_BAD_ADDR);
6499 	else
6500 		return (RSM_SUCCESS);
6501 }
6502 
6503 static int
6504 rsm_unmap(rsmseg_t *seg)
6505 {
6506 	int			err;
6507 	adapter_t		*adapter;
6508 	rsm_import_share_t	*sharedp;
6509 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6510 
6511 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6512 	    "rsm_unmap enter %u\n", seg->s_segid));
6513 
6514 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6515 
6516 	/* assert seg is locked */
6517 	ASSERT(rsmseglock_held(seg));
6518 	ASSERT(seg->s_state != RSM_STATE_MAPPING);
6519 
6520 	if ((seg->s_state != RSM_STATE_ACTIVE) &&
6521 	    (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6522 		/* segment unmap has already been done */
6523 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6524 		return (RSM_SUCCESS);
6525 	}
6526 
6527 	sharedp = seg->s_share;
6528 
6529 	rsmsharelock_acquire(seg);
6530 
6531 	/*
6532 	 *	- shared data struct is in MAPPED or MAP_QUIESCE state
6533 	 */
6534 
6535 	ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6536 	    sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6537 
6538 	/*
6539 	 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6540 	 * the segment cookie list was NULL; but it is always NULL when
6541 	 * called from rsmmap_unmap and won't be NULL when called for
6542 	 * a force disconnect - so the check for NULL cookie list was removed
6543 	 */
6544 
6545 	ASSERT(sharedp->rsmsi_mapcnt > 0);
6546 
6547 	sharedp->rsmsi_mapcnt--;
6548 
6549 	if (sharedp->rsmsi_mapcnt == 0) {
6550 		if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6551 			/* unmap the shared RSMPI mapping */
6552 			adapter = seg->s_adapter;
6553 			if (seg->s_node != my_nodeid) {
6554 				ASSERT(sharedp->rsmsi_handle != NULL);
6555 				err = adapter->rsmpi_ops->
6556 				    rsm_unmap(sharedp->rsmsi_handle);
6557 				DBG_PRINTF((category, RSM_DEBUG,
6558 				    "rsm_unmap: rsmpi unmap %d\n", err));
6559 				rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6560 				sharedp->rsmsi_mapinfo = NULL;
6561 			}
6562 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6563 		} else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6564 			sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6565 		}
6566 	}
6567 
6568 	rsmsharelock_release(seg);
6569 
6570 	/*
6571 	 * The s_cookie field is used to store the cookie returned from the
6572 	 * ddi_umem_lock when binding the pages for an export segment. This
6573 	 * is the primary use of the s_cookie field and does not normally
6574 	 * pertain to any importing segment except in the loopback case.
6575 	 * For the loopback case, the import segment and export segment are
6576 	 * on the same node, the s_cookie field of the segment structure for
6577 	 * the importer is initialized to the s_cookie field in the exported
6578 	 * segment during the map operation and is used during the call to
6579 	 * devmap_umem_setup for the import mapping.
6580 	 * Thus, during unmap, we simply need to set s_cookie to NULL to
6581 	 * indicate that the mapping no longer exists.
6582 	 */
6583 	seg->s_cookie = NULL;
6584 
6585 	seg->s_mapinfo = NULL;
6586 
6587 	if (seg->s_state == RSM_STATE_ACTIVE)
6588 		seg->s_state = RSM_STATE_CONNECT;
6589 	else
6590 		seg->s_state = RSM_STATE_CONN_QUIESCE;
6591 
6592 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6593 
6594 	return (RSM_SUCCESS);
6595 }
6596 
6597 /*
6598  * cookie returned here if not null indicates that it is
6599  * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6600  * message.
6601  */
6602 static int
6603 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6604 {
6605 	int			e;
6606 	adapter_t		*adapter;
6607 	rsm_import_share_t	*sharedp;
6608 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6609 
6610 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6611 					"rsm_closeconnection enter\n"));
6612 
6613 	*cookie = (void *)NULL;
6614 
6615 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6616 
6617 	/* assert seg is locked */
6618 	ASSERT(rsmseglock_held(seg));
6619 
6620 	if (seg->s_state == RSM_STATE_DISCONNECT) {
6621 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6622 		    "rsm_closeconnection done: already disconnected\n"));
6623 		return (RSM_SUCCESS);
6624 	}
6625 
6626 	/* wait for all putv/getv ops to get done */
6627 	while (seg->s_rdmacnt > 0) {
6628 		cv_wait(&seg->s_cv, &seg->s_lock);
6629 	}
6630 
6631 	(void) rsm_unmap(seg);
6632 
6633 	ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6634 	    seg->s_state == RSM_STATE_CONN_QUIESCE);
6635 
6636 	adapter = seg->s_adapter;
6637 	sharedp = seg->s_share;
6638 
6639 	ASSERT(sharedp != NULL);
6640 
6641 	rsmsharelock_acquire(seg);
6642 
6643 	/*
6644 	 * Disconnect on adapter
6645 	 *
6646 	 * The current algorithm is stateless, I don't have to contact
6647 	 * server when I go away. He only gives me permissions. Of course,
6648 	 * the adapters will talk to terminate the connect.
6649 	 *
6650 	 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6651 	 */
6652 	if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6653 	    (sharedp->rsmsi_node != my_nodeid)) {
6654 
6655 		if (sharedp->rsmsi_refcnt == 1) {
6656 			/* this is the last importer */
6657 			ASSERT(sharedp->rsmsi_mapcnt == 0);
6658 
6659 			e = adapter->rsmpi_ops->
6660 			    rsm_disconnect(sharedp->rsmsi_handle);
6661 			if (e != RSM_SUCCESS) {
6662 				DBG_PRINTF((category, RSM_DEBUG,
6663 				    "rsm:disconnect failed seg=%x:err=%d\n",
6664 				    seg->s_key, e));
6665 			}
6666 		}
6667 	}
6668 
6669 	seg->s_handle.in = NULL;
6670 
6671 	sharedp->rsmsi_refcnt--;
6672 
6673 	if (sharedp->rsmsi_refcnt == 0) {
6674 		*cookie = (void *)sharedp->rsmsi_cookie;
6675 		sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6676 		sharedp->rsmsi_handle = NULL;
6677 		rsmsharelock_release(seg);
6678 
6679 		/* clean up the shared data structure */
6680 		mutex_destroy(&sharedp->rsmsi_lock);
6681 		cv_destroy(&sharedp->rsmsi_cv);
6682 		kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6683 
6684 	} else {
6685 		rsmsharelock_release(seg);
6686 	}
6687 
6688 	/* increment generation number on barrier page */
6689 	if (bar_va) {
6690 		atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6691 	}
6692 
6693 	/*
6694 	 * The following needs to be done after any
6695 	 * rsmsharelock calls which use seg->s_share.
6696 	 */
6697 	seg->s_share = NULL;
6698 
6699 	seg->s_state = RSM_STATE_DISCONNECT;
6700 	/* signal anyone waiting in the CONN_QUIESCE state */
6701 	cv_broadcast(&seg->s_cv);
6702 
6703 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6704 	    "rsm_closeconnection done\n"));
6705 
6706 	return (RSM_SUCCESS);
6707 }
6708 
6709 int
6710 rsm_disconnect(rsmseg_t *seg)
6711 {
6712 	rsmipc_request_t	request;
6713 	void			*shared_cookie;
6714 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6715 
6716 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6717 
6718 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6719 
6720 	/* assert seg isn't locked */
6721 	ASSERT(!rsmseglock_held(seg));
6722 
6723 
6724 	/* Remove segment from imported list */
6725 	rsmimport_rm(seg);
6726 
6727 	/* acquire the segment */
6728 	rsmseglock_acquire(seg);
6729 
6730 	/* wait until segment leaves the mapping state */
6731 	while (seg->s_state == RSM_STATE_MAPPING)
6732 		cv_wait(&seg->s_cv, &seg->s_lock);
6733 
6734 	if (seg->s_state == RSM_STATE_DISCONNECT) {
6735 		seg->s_state = RSM_STATE_NEW;
6736 		rsmseglock_release(seg);
6737 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6738 		    "rsm_disconnect done: already disconnected\n"));
6739 		return (RSM_SUCCESS);
6740 	}
6741 
6742 	(void) rsm_closeconnection(seg, &shared_cookie);
6743 
6744 	/* update state */
6745 	seg->s_state = RSM_STATE_NEW;
6746 
6747 	if (shared_cookie != NULL) {
6748 		/*
6749 		 *  This is the last importer so inform the exporting node
6750 		 *  so this import can be deleted from the list of importers.
6751 		 */
6752 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6753 		request.rsmipc_key = seg->s_segid;
6754 		request.rsmipc_segment_cookie = shared_cookie;
6755 		rsmseglock_release(seg);
6756 		(void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6757 	} else {
6758 		rsmseglock_release(seg);
6759 	}
6760 
6761 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6762 
6763 	return (DDI_SUCCESS);
6764 }
6765 
6766 /*ARGSUSED*/
6767 static int
6768 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6769     struct pollhead **phpp)
6770 {
6771 	minor_t		rnum;
6772 	rsmresource_t	*res;
6773 	rsmseg_t 	*seg;
6774 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6775 
6776 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6777 
6778 	/* find minor, no lock */
6779 	rnum = getminor(dev);
6780 	res = rsmresource_lookup(rnum, RSM_NOLOCK);
6781 
6782 	/* poll is supported only for export/import segments */
6783 	if ((res == NULL) || (res == RSMRC_RESERVED) ||
6784 	    (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6785 		return (ENXIO);
6786 	}
6787 
6788 	*reventsp = 0;
6789 
6790 	/*
6791 	 * An exported segment must be in state RSM_STATE_EXPORT; an
6792 	 * imported segment must be in state RSM_STATE_ACTIVE.
6793 	 */
6794 	seg = (rsmseg_t *)res;
6795 
6796 	if (seg->s_pollevent) {
6797 		*reventsp = POLLRDNORM;
6798 	} else if (!anyyet) {
6799 		/* cannot take segment lock here */
6800 		*phpp = &seg->s_poll;
6801 		seg->s_pollflag |= RSM_SEGMENT_POLL;
6802 	}
6803 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6804 	return (0);
6805 }
6806 
6807 
6808 
6809 /* ************************* IOCTL Commands ********************* */
6810 
6811 static rsmseg_t *
6812 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6813     rsm_resource_type_t type)
6814 {
6815 	/* get segment from resource handle */
6816 	rsmseg_t *seg;
6817 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6818 
6819 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6820 
6821 
6822 	if (res != RSMRC_RESERVED) {
6823 		seg = (rsmseg_t *)res;
6824 	} else {
6825 		/* Allocate segment now and bind it */
6826 		seg = rsmseg_alloc(rnum, credp);
6827 
6828 		/*
6829 		 * if DR pre-processing is going on or DR is in progress
6830 		 * then the new export segments should be in the NEW_QSCD state
6831 		 */
6832 		if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6833 			mutex_enter(&rsm_drv_data.drv_lock);
6834 			if ((rsm_drv_data.drv_state ==
6835 			    RSM_DRV_PREDEL_STARTED) ||
6836 			    (rsm_drv_data.drv_state ==
6837 			    RSM_DRV_PREDEL_COMPLETED) ||
6838 			    (rsm_drv_data.drv_state ==
6839 			    RSM_DRV_DR_IN_PROGRESS)) {
6840 				seg->s_state = RSM_STATE_NEW_QUIESCED;
6841 			}
6842 			mutex_exit(&rsm_drv_data.drv_lock);
6843 		}
6844 
6845 		rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6846 	}
6847 
6848 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6849 
6850 	return (seg);
6851 }
6852 
6853 static int
6854 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6855     int mode, cred_t *credp)
6856 {
6857 	int error;
6858 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6859 
6860 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6861 
6862 	arg = arg;
6863 	credp = credp;
6864 
6865 	ASSERT(seg != NULL);
6866 
6867 	switch (cmd) {
6868 	case RSM_IOCTL_BIND:
6869 		error = rsm_bind(seg, msg, arg, mode);
6870 		break;
6871 	case RSM_IOCTL_REBIND:
6872 		error = rsm_rebind(seg, msg);
6873 		break;
6874 	case RSM_IOCTL_UNBIND:
6875 		error = ENOTSUP;
6876 		break;
6877 	case RSM_IOCTL_PUBLISH:
6878 		error = rsm_publish(seg, msg, arg, mode);
6879 		break;
6880 	case RSM_IOCTL_REPUBLISH:
6881 		error = rsm_republish(seg, msg, mode);
6882 		break;
6883 	case RSM_IOCTL_UNPUBLISH:
6884 		error = rsm_unpublish(seg, 1);
6885 		break;
6886 	default:
6887 		error = EINVAL;
6888 		break;
6889 	}
6890 
6891 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6892 	    error));
6893 
6894 	return (error);
6895 }
6896 static int
6897 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6898     int mode, cred_t *credp)
6899 {
6900 	int error;
6901 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6902 
6903 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6904 
6905 	ASSERT(seg);
6906 
6907 	switch (cmd) {
6908 	case RSM_IOCTL_CONNECT:
6909 		error = rsm_connect(seg, msg, credp, arg, mode);
6910 		break;
6911 	default:
6912 		error = EINVAL;
6913 		break;
6914 	}
6915 
6916 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6917 	    error));
6918 	return (error);
6919 }
6920 
6921 static int
6922 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6923     int mode)
6924 {
6925 	int e;
6926 	adapter_t *adapter;
6927 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6928 
6929 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6930 
6931 
6932 	if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6933 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6934 		    "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6935 		return (RSMERR_CONN_ABORTED);
6936 	} else if (seg->s_node == my_nodeid) {
6937 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6938 		    "rsmbar_ioctl done: loopback\n"));
6939 		return (RSM_SUCCESS);
6940 	}
6941 
6942 	adapter = seg->s_adapter;
6943 
6944 	switch (cmd) {
6945 	case RSM_IOCTL_BAR_CHECK:
6946 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6947 		    "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6948 		return (bar_va ? RSM_SUCCESS : EINVAL);
6949 	case RSM_IOCTL_BAR_OPEN:
6950 		e = adapter->rsmpi_ops->
6951 		    rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6952 		break;
6953 	case RSM_IOCTL_BAR_ORDER:
6954 		e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6955 		break;
6956 	case RSM_IOCTL_BAR_CLOSE:
6957 		e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6958 		break;
6959 	default:
6960 		e = EINVAL;
6961 		break;
6962 	}
6963 
6964 	if (e == RSM_SUCCESS) {
6965 #ifdef _MULTI_DATAMODEL
6966 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6967 			rsm_ioctlmsg32_t msg32;
6968 			int i;
6969 
6970 			for (i = 0; i < 4; i++) {
6971 				msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6972 			}
6973 
6974 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6975 			    "rsmbar_ioctl done\n"));
6976 			if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6977 			    sizeof (msg32), mode))
6978 				return (RSMERR_BAD_ADDR);
6979 			else
6980 				return (RSM_SUCCESS);
6981 		}
6982 #endif
6983 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6984 		    "rsmbar_ioctl done\n"));
6985 		if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6986 		    sizeof (*msg), mode))
6987 			return (RSMERR_BAD_ADDR);
6988 		else
6989 			return (RSM_SUCCESS);
6990 	}
6991 
6992 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6993 	    "rsmbar_ioctl done: error=%d\n", e));
6994 
6995 	return (e);
6996 }
6997 
6998 /*
6999  * Ring the doorbell of the export segment to which this segment is
7000  * connected.
7001  */
7002 static int
7003 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7004 {
7005 	int e = 0;
7006 	rsmipc_request_t request;
7007 
7008 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7009 
7010 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7011 
7012 	request.rsmipc_key = seg->s_segid;
7013 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7014 	request.rsmipc_segment_cookie = NULL;
7015 	e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7016 
7017 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7018 	    "exportbell_ioctl done: %d\n", e));
7019 
7020 	return (e);
7021 }
7022 
7023 /*
7024  * Ring the doorbells of all segments importing this segment
7025  */
7026 static int
7027 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7028 {
7029 	importing_token_t	*token = NULL;
7030 	rsmipc_request_t	request;
7031 	int			index;
7032 
7033 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7034 
7035 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7036 
7037 	ASSERT(seg->s_state != RSM_STATE_NEW &&
7038 	    seg->s_state != RSM_STATE_NEW_QUIESCED);
7039 
7040 	request.rsmipc_key = seg->s_segid;
7041 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7042 
7043 	index = rsmhash(seg->s_segid);
7044 
7045 	token = importer_list.bucket[index];
7046 
7047 	while (token != NULL) {
7048 		if (seg->s_key == token->key) {
7049 			request.rsmipc_segment_cookie =
7050 			    token->import_segment_cookie;
7051 			(void) rsmipc_send(token->importing_node,
7052 				    &request, RSM_NO_REPLY);
7053 		}
7054 		token = token->next;
7055 	}
7056 
7057 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7058 	    "importbell_ioctl done\n"));
7059 	return (RSM_SUCCESS);
7060 }
7061 
7062 static int
7063 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7064     rsm_poll_event_t **eventspp, int mode)
7065 {
7066 	rsm_poll_event_t	*evlist = NULL;
7067 	size_t			evlistsz;
7068 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7069 
7070 #ifdef _MULTI_DATAMODEL
7071 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7072 		int i;
7073 		rsm_consume_event_msg32_t cemsg32 = {0};
7074 		rsm_poll_event32_t	event32[RSM_MAX_POLLFDS];
7075 		rsm_poll_event32_t	*evlist32;
7076 		size_t			evlistsz32;
7077 
7078 		/* copyin the ioctl message */
7079 		if (ddi_copyin(arg, (caddr_t)&cemsg32,
7080 		    sizeof (rsm_consume_event_msg32_t), mode)) {
7081 			DBG_PRINTF((category, RSM_ERR,
7082 			    "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7083 			return (RSMERR_BAD_ADDR);
7084 		}
7085 		msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7086 		msgp->numents = (int)cemsg32.numents;
7087 
7088 		evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7089 		/*
7090 		 * If numents is large alloc events list on heap otherwise
7091 		 * use the address of array that was passed in.
7092 		 */
7093 		if (msgp->numents > RSM_MAX_POLLFDS) {
7094 			if (msgp->numents > max_segs) { /* validate numents */
7095 				DBG_PRINTF((category, RSM_ERR,
7096 				    "consumeevent_copyin: "
7097 				    "RSMERR_BAD_ARGS_ERRORS\n"));
7098 				return (RSMERR_BAD_ARGS_ERRORS);
7099 			}
7100 			evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7101 		} else {
7102 			evlist32 = event32;
7103 		}
7104 
7105 		/* copyin the seglist into the rsm_poll_event32_t array */
7106 		if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7107 		    evlistsz32, mode)) {
7108 			if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7109 				kmem_free(evlist32, evlistsz32);
7110 			}
7111 			DBG_PRINTF((category, RSM_ERR,
7112 			    "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7113 			return (RSMERR_BAD_ADDR);
7114 		}
7115 
7116 		/* evlist and evlistsz are based on rsm_poll_event_t type */
7117 		evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7118 
7119 		if (msgp->numents > RSM_MAX_POLLFDS) {
7120 			evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7121 			*eventspp = evlist;
7122 		} else {
7123 			evlist = *eventspp;
7124 		}
7125 		/*
7126 		 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7127 		 * array
7128 		 */
7129 		for (i = 0; i < msgp->numents; i++) {
7130 			evlist[i].rnum = evlist32[i].rnum;
7131 			evlist[i].fdsidx = evlist32[i].fdsidx;
7132 			evlist[i].revent = evlist32[i].revent;
7133 		}
7134 		/* free the temp 32-bit event list */
7135 		if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7136 			kmem_free(evlist32, evlistsz32);
7137 		}
7138 
7139 		return (RSM_SUCCESS);
7140 	}
7141 #endif
7142 	/* copyin the ioctl message */
7143 	if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7144 	    mode)) {
7145 		DBG_PRINTF((category, RSM_ERR,
7146 		    "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7147 		return (RSMERR_BAD_ADDR);
7148 	}
7149 	/*
7150 	 * If numents is large alloc events list on heap otherwise
7151 	 * use the address of array that was passed in.
7152 	 */
7153 	if (msgp->numents > RSM_MAX_POLLFDS) {
7154 		if (msgp->numents > max_segs) { /* validate numents */
7155 			DBG_PRINTF((category, RSM_ERR,
7156 			    "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7157 			return (RSMERR_BAD_ARGS_ERRORS);
7158 		}
7159 		evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7160 		evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7161 		*eventspp  = evlist;
7162 	}
7163 
7164 	/* copyin the seglist */
7165 	if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7166 	    sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7167 		if (evlist) {
7168 			kmem_free(evlist, evlistsz);
7169 			*eventspp = NULL;
7170 		}
7171 		DBG_PRINTF((category, RSM_ERR,
7172 		    "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7173 		return (RSMERR_BAD_ADDR);
7174 	}
7175 
7176 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7177 	    "consumeevent_copyin done\n"));
7178 	return (RSM_SUCCESS);
7179 }
7180 
7181 static int
7182 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7183     rsm_poll_event_t *eventsp, int mode)
7184 {
7185 	size_t			evlistsz;
7186 	int			err = RSM_SUCCESS;
7187 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7188 
7189 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7190 	    "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7191 	    msgp->numents, eventsp));
7192 
7193 #ifdef _MULTI_DATAMODEL
7194 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7195 		int i;
7196 		rsm_poll_event32_t	event32[RSM_MAX_POLLFDS];
7197 		rsm_poll_event32_t	*evlist32;
7198 		size_t			evlistsz32;
7199 
7200 		evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7201 		if (msgp->numents > RSM_MAX_POLLFDS) {
7202 			evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7203 		} else {
7204 			evlist32 = event32;
7205 		}
7206 
7207 		/*
7208 		 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7209 		 * array
7210 		 */
7211 		for (i = 0; i < msgp->numents; i++) {
7212 			evlist32[i].rnum = eventsp[i].rnum;
7213 			evlist32[i].fdsidx = eventsp[i].fdsidx;
7214 			evlist32[i].revent = eventsp[i].revent;
7215 		}
7216 
7217 		if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7218 		    evlistsz32, mode)) {
7219 			err = RSMERR_BAD_ADDR;
7220 		}
7221 
7222 		if (msgp->numents > RSM_MAX_POLLFDS) {
7223 			if (evlist32) {	/* free the temp 32-bit event list */
7224 				kmem_free(evlist32, evlistsz32);
7225 			}
7226 			/*
7227 			 * eventsp and evlistsz are based on rsm_poll_event_t
7228 			 * type
7229 			 */
7230 			evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7231 			/* event list on the heap and needs to be freed here */
7232 			if (eventsp) {
7233 				kmem_free(eventsp, evlistsz);
7234 			}
7235 		}
7236 
7237 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7238 		    "consumeevent_copyout done: err=%d\n", err));
7239 		return (err);
7240 	}
7241 #endif
7242 	evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7243 
7244 	if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7245 	    mode)) {
7246 		err = RSMERR_BAD_ADDR;
7247 	}
7248 
7249 	if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7250 		/* event list on the heap and needs to be freed here */
7251 		kmem_free(eventsp, evlistsz);
7252 	}
7253 
7254 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7255 	    "consumeevent_copyout done: err=%d\n", err));
7256 	return (err);
7257 }
7258 
7259 static int
7260 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7261 {
7262 	int	rc;
7263 	int	i;
7264 	minor_t	rnum;
7265 	rsm_consume_event_msg_t	msg = {0};
7266 	rsmseg_t		*seg;
7267 	rsm_poll_event_t	*event_list;
7268 	rsm_poll_event_t	events[RSM_MAX_POLLFDS];
7269 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7270 
7271 	event_list = events;
7272 
7273 	if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7274 	    RSM_SUCCESS) {
7275 		return (rc);
7276 	}
7277 
7278 	for (i = 0; i < msg.numents; i++) {
7279 		rnum = event_list[i].rnum;
7280 		event_list[i].revent = 0;
7281 		/* get the segment structure */
7282 		seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7283 		if (seg) {
7284 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7285 			    "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7286 			    seg));
7287 			if (seg->s_pollevent) {
7288 				/* consume the event */
7289 				atomic_add_32(&seg->s_pollevent, -1);
7290 				event_list[i].revent = POLLRDNORM;
7291 			}
7292 			rsmseglock_release(seg);
7293 		}
7294 	}
7295 
7296 	if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7297 	    RSM_SUCCESS) {
7298 		return (rc);
7299 	}
7300 
7301 	return (RSM_SUCCESS);
7302 }
7303 
7304 static int
7305 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7306 {
7307 	int size;
7308 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7309 
7310 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7311 
7312 #ifdef _MULTI_DATAMODEL
7313 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7314 		rsmka_iovec32_t	*iovec32, *iovec32_base;
7315 		int i;
7316 
7317 		size = count * sizeof (rsmka_iovec32_t);
7318 		iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7319 		if (ddi_copyin((caddr_t)user_vec,
7320 		    (caddr_t)iovec32, size, mode)) {
7321 			kmem_free(iovec32, size);
7322 			DBG_PRINTF((category, RSM_DEBUG,
7323 			    "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7324 			return (RSMERR_BAD_ADDR);
7325 		}
7326 
7327 		for (i = 0; i < count; i++, iovec++, iovec32++) {
7328 			iovec->io_type = (int)iovec32->io_type;
7329 			if (iovec->io_type == RSM_HANDLE_TYPE)
7330 				iovec->local.segid = (rsm_memseg_id_t)
7331 							iovec32->local;
7332 			else
7333 				iovec->local.vaddr =
7334 				    (caddr_t)(uintptr_t)iovec32->local;
7335 			iovec->local_offset = (size_t)iovec32->local_offset;
7336 			iovec->remote_offset = (size_t)iovec32->remote_offset;
7337 			iovec->transfer_len = (size_t)iovec32->transfer_len;
7338 
7339 		}
7340 		kmem_free(iovec32_base, size);
7341 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7342 		    "iovec_copyin done\n"));
7343 		return (DDI_SUCCESS);
7344 	}
7345 #endif
7346 
7347 	size = count * sizeof (rsmka_iovec_t);
7348 	if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7349 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7350 		    "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7351 		return (RSMERR_BAD_ADDR);
7352 	}
7353 
7354 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7355 
7356 	return (DDI_SUCCESS);
7357 }
7358 
7359 
7360 static int
7361 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7362 {
7363 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7364 
7365 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7366 
7367 #ifdef _MULTI_DATAMODEL
7368 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7369 		rsmka_scat_gath32_t sg_io32;
7370 
7371 		if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7372 		    mode)) {
7373 			DBG_PRINTF((category, RSM_DEBUG,
7374 			    "sgio_copyin done: returning EFAULT\n"));
7375 			return (RSMERR_BAD_ADDR);
7376 		}
7377 		sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7378 		sg_io->io_request_count =  (size_t)sg_io32.io_request_count;
7379 		sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7380 		sg_io->flags = (size_t)sg_io32.flags;
7381 		sg_io->remote_handle = (rsm_memseg_import_handle_t)
7382 		    (uintptr_t)sg_io32.remote_handle;
7383 		sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7384 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7385 		    "sgio_copyin done\n"));
7386 		return (DDI_SUCCESS);
7387 	}
7388 #endif
7389 	if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7390 	    mode)) {
7391 		DBG_PRINTF((category, RSM_DEBUG,
7392 		    "sgio_copyin done: returning EFAULT\n"));
7393 		return (RSMERR_BAD_ADDR);
7394 	}
7395 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7396 	return (DDI_SUCCESS);
7397 }
7398 
7399 static int
7400 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7401 {
7402 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7403 
7404 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7405 	    "sgio_resid_copyout enter\n"));
7406 
7407 #ifdef _MULTI_DATAMODEL
7408 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7409 		rsmka_scat_gath32_t sg_io32;
7410 
7411 		sg_io32.io_residual_count = sg_io->io_residual_count;
7412 		sg_io32.flags = sg_io->flags;
7413 
7414 		if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7415 		    (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7416 		    sizeof (uint32_t), mode)) {
7417 
7418 			DBG_PRINTF((category, RSM_ERR,
7419 			    "sgio_resid_copyout error: rescnt\n"));
7420 			return (RSMERR_BAD_ADDR);
7421 		}
7422 
7423 		if (ddi_copyout((caddr_t)&sg_io32.flags,
7424 		    (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7425 		    sizeof (uint32_t), mode)) {
7426 
7427 			DBG_PRINTF((category, RSM_ERR,
7428 			    "sgio_resid_copyout error: flags\n"));
7429 			return (RSMERR_BAD_ADDR);
7430 		}
7431 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7432 		    "sgio_resid_copyout done\n"));
7433 		return (DDI_SUCCESS);
7434 	}
7435 #endif
7436 	if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7437 	    (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7438 	    sizeof (ulong_t), mode)) {
7439 
7440 		DBG_PRINTF((category, RSM_ERR,
7441 		    "sgio_resid_copyout error:rescnt\n"));
7442 		return (RSMERR_BAD_ADDR);
7443 	}
7444 
7445 	if (ddi_copyout((caddr_t)&sg_io->flags,
7446 	    (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7447 	    sizeof (uint_t), mode)) {
7448 
7449 		DBG_PRINTF((category, RSM_ERR,
7450 		    "sgio_resid_copyout error:flags\n"));
7451 		return (RSMERR_BAD_ADDR);
7452 	}
7453 
7454 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7455 	return (DDI_SUCCESS);
7456 }
7457 
7458 
7459 static int
7460 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7461 {
7462 	rsmka_scat_gath_t	sg_io;
7463 	rsmka_iovec_t		ka_iovec_arr[RSM_MAX_IOVLEN];
7464 	rsmka_iovec_t		*ka_iovec;
7465 	rsmka_iovec_t		*ka_iovec_start;
7466 	rsmpi_scat_gath_t	rsmpi_sg_io;
7467 	rsmpi_iovec_t		iovec_arr[RSM_MAX_IOVLEN];
7468 	rsmpi_iovec_t		*iovec;
7469 	rsmpi_iovec_t		*iovec_start = NULL;
7470 	rsmapi_access_entry_t	*acl;
7471 	rsmresource_t		*res;
7472 	minor_t			rnum;
7473 	rsmseg_t		*im_seg, *ex_seg;
7474 	int			e;
7475 	int			error = 0;
7476 	uint_t			i;
7477 	uint_t			iov_proc = 0; /* num of iovecs processed */
7478 	size_t			size = 0;
7479 	size_t			ka_size;
7480 
7481 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7482 
7483 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7484 
7485 	credp = credp;
7486 
7487 	/*
7488 	 * Copyin the scatter/gather structure  and build new structure
7489 	 * for rsmpi.
7490 	 */
7491 	e = sgio_copyin(arg, &sg_io, mode);
7492 	if (e != DDI_SUCCESS) {
7493 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7494 		    "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7495 		return (e);
7496 	}
7497 
7498 	if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7499 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7500 		    "rsm_iovec_ioctl done: request_count(%d) too large\n",
7501 		    sg_io.io_request_count));
7502 		return (RSMERR_BAD_SGIO);
7503 	}
7504 
7505 	rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7506 	rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7507 	rsmpi_sg_io.io_segflg = 0;
7508 
7509 	/* Allocate memory and copyin io vector array  */
7510 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7511 		ka_size =  sg_io.io_request_count * sizeof (rsmka_iovec_t);
7512 		ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7513 	} else {
7514 		ka_iovec_start = ka_iovec = ka_iovec_arr;
7515 	}
7516 	e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7517 	    sg_io.io_request_count, mode);
7518 	if (e != DDI_SUCCESS) {
7519 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7520 			kmem_free(ka_iovec, ka_size);
7521 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7522 		    "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7523 		return (e);
7524 	}
7525 
7526 	/* get the import segment descriptor */
7527 	rnum = getminor(dev);
7528 	res = rsmresource_lookup(rnum, RSM_LOCK);
7529 
7530 	/*
7531 	 * The following sequence of locking may (or MAY NOT) cause a
7532 	 * deadlock but this is currently not addressed here since the
7533 	 * implementation will be changed to incorporate the use of
7534 	 * reference counting for both the import and the export segments.
7535 	 */
7536 
7537 	/* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7538 
7539 	im_seg = (rsmseg_t *)res;
7540 
7541 	if (im_seg == NULL) {
7542 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7543 			kmem_free(ka_iovec, ka_size);
7544 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7545 		    "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7546 		return (EINVAL);
7547 	}
7548 	/* putv/getv supported is supported only on import segments */
7549 	if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7550 		rsmseglock_release(im_seg);
7551 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7552 			kmem_free(ka_iovec, ka_size);
7553 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7554 		    "rsm_iovec_ioctl done: not an import segment\n"));
7555 		return (EINVAL);
7556 	}
7557 
7558 	/*
7559 	 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7560 	 * as well as wait for a local DR to complete.
7561 	 */
7562 	while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7563 	    (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7564 	    (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7565 		if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7566 			DBG_PRINTF((category, RSM_DEBUG,
7567 			    "rsm_iovec_ioctl done: cv_wait INTR"));
7568 			rsmseglock_release(im_seg);
7569 			return (RSMERR_INTERRUPTED);
7570 		}
7571 	}
7572 
7573 	if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7574 	    (im_seg->s_state != RSM_STATE_ACTIVE)) {
7575 
7576 		ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7577 		    im_seg->s_state == RSM_STATE_NEW);
7578 
7579 		DBG_PRINTF((category, RSM_DEBUG,
7580 		    "rsm_iovec_ioctl done: im_seg not conn/map"));
7581 		rsmseglock_release(im_seg);
7582 		e = RSMERR_BAD_SGIO;
7583 		goto out;
7584 	}
7585 
7586 	im_seg->s_rdmacnt++;
7587 	rsmseglock_release(im_seg);
7588 
7589 	/*
7590 	 * Allocate and set up the io vector for rsmpi
7591 	 */
7592 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7593 		size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7594 		iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7595 	} else {
7596 		iovec_start = iovec = iovec_arr;
7597 	}
7598 
7599 	rsmpi_sg_io.iovec = iovec;
7600 	for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7601 		if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7602 			ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7603 
7604 			if (ex_seg == NULL) {
7605 				e = RSMERR_BAD_SGIO;
7606 				break;
7607 			}
7608 			ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7609 
7610 			acl = ex_seg->s_acl;
7611 			if (acl[0].ae_permission == 0) {
7612 				struct buf *xbuf;
7613 				dev_t sdev = 0;
7614 
7615 				xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7616 				    0, ex_seg->s_len, B_WRITE,
7617 				    sdev, 0, NULL, DDI_UMEM_SLEEP);
7618 
7619 				ASSERT(xbuf != NULL);
7620 
7621 				iovec->local_mem.ms_type = RSM_MEM_BUF;
7622 				iovec->local_mem.ms_memory.bp = xbuf;
7623 			} else {
7624 				iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7625 				iovec->local_mem.ms_memory.handle =
7626 					ex_seg->s_handle.out;
7627 			}
7628 			ex_seg->s_rdmacnt++; /* refcnt the handle */
7629 			rsmseglock_release(ex_seg);
7630 		} else {
7631 			iovec->local_mem.ms_type = RSM_MEM_VADDR;
7632 			iovec->local_mem.ms_memory.vr.vaddr =
7633 			    ka_iovec->local.vaddr;
7634 		}
7635 
7636 		iovec->local_offset = ka_iovec->local_offset;
7637 		iovec->remote_handle = im_seg->s_handle.in;
7638 		iovec->remote_offset = ka_iovec->remote_offset;
7639 		iovec->transfer_length = ka_iovec->transfer_len;
7640 		iovec++;
7641 		ka_iovec++;
7642 	}
7643 
7644 	if (iov_proc <  sg_io.io_request_count) {
7645 		/* error while processing handle */
7646 		rsmseglock_acquire(im_seg);
7647 		im_seg->s_rdmacnt--;   /* decrement the refcnt for importseg */
7648 		if (im_seg->s_rdmacnt == 0) {
7649 			cv_broadcast(&im_seg->s_cv);
7650 		}
7651 		rsmseglock_release(im_seg);
7652 		goto out;
7653 	}
7654 
7655 	/* call rsmpi */
7656 	if (cmd == RSM_IOCTL_PUTV)
7657 		e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7658 		    im_seg->s_adapter->rsmpi_handle,
7659 		    &rsmpi_sg_io);
7660 	else if (cmd == RSM_IOCTL_GETV)
7661 		e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7662 		    im_seg->s_adapter->rsmpi_handle,
7663 		    &rsmpi_sg_io);
7664 	else {
7665 		e = EINVAL;
7666 		DBG_PRINTF((category, RSM_DEBUG,
7667 		    "iovec_ioctl: bad command = %x\n", cmd));
7668 	}
7669 
7670 
7671 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7672 	    "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7673 
7674 	sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7675 
7676 	/*
7677 	 * Check for implicit signal post flag and do the signal
7678 	 * post if needed
7679 	 */
7680 	if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7681 	    e == RSM_SUCCESS) {
7682 		rsmipc_request_t request;
7683 
7684 		request.rsmipc_key = im_seg->s_segid;
7685 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7686 		request.rsmipc_segment_cookie = NULL;
7687 		e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7688 		/*
7689 		 * Reset the implicit signal post flag to 0 to indicate
7690 		 * that the signal post has been done and need not be
7691 		 * done in the RSMAPI library
7692 		 */
7693 		sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7694 	}
7695 
7696 	rsmseglock_acquire(im_seg);
7697 	im_seg->s_rdmacnt--;
7698 	if (im_seg->s_rdmacnt == 0) {
7699 		cv_broadcast(&im_seg->s_cv);
7700 	}
7701 	rsmseglock_release(im_seg);
7702 	error = sgio_resid_copyout(arg, &sg_io, mode);
7703 out:
7704 	iovec = iovec_start;
7705 	ka_iovec = ka_iovec_start;
7706 	for (i = 0; i < iov_proc; i++) {
7707 		if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7708 			ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7709 
7710 			ASSERT(ex_seg != NULL);
7711 			ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7712 
7713 			ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7714 			if (ex_seg->s_rdmacnt == 0) {
7715 				cv_broadcast(&ex_seg->s_cv);
7716 			}
7717 			rsmseglock_release(ex_seg);
7718 		}
7719 
7720 		ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7721 
7722 		/*
7723 		 * At present there is no dependency on the existence of xbufs
7724 		 * created by ddi_umem_iosetup for each of the iovecs. So we
7725 		 * can these xbufs here.
7726 		 */
7727 		if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7728 			freerbuf(iovec->local_mem.ms_memory.bp);
7729 		}
7730 
7731 		iovec++;
7732 		ka_iovec++;
7733 	}
7734 
7735 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7736 		if (iovec_start)
7737 			kmem_free(iovec_start, size);
7738 		kmem_free(ka_iovec_start, ka_size);
7739 	}
7740 
7741 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7742 	    "rsm_iovec_ioctl done %d\n", e));
7743 	/* if RSMPI call fails return that else return copyout's retval */
7744 	return ((e != RSM_SUCCESS) ? e : error);
7745 
7746 }
7747 
7748 
7749 static int
7750 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7751 {
7752 	adapter_t	*adapter;
7753 	rsm_addr_t	addr;
7754 	rsm_node_id_t	node;
7755 	int		rval = DDI_SUCCESS;
7756 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7757 
7758 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7759 
7760 	adapter =  rsm_getadapter(msg, mode);
7761 	if (adapter == NULL) {
7762 		DBG_PRINTF((category, RSM_DEBUG,
7763 		    "rsmaddr_ioctl done: adapter not found\n"));
7764 		return (RSMERR_CTLR_NOT_PRESENT);
7765 	}
7766 
7767 	switch (cmd) {
7768 	case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7769 		/* returns the hwaddr in msg->hwaddr */
7770 		if (msg->nodeid == my_nodeid) {
7771 			msg->hwaddr = adapter->hwaddr;
7772 		} else {
7773 			addr = get_remote_hwaddr(adapter, msg->nodeid);
7774 			if ((int64_t)addr < 0) {
7775 				rval = RSMERR_INTERNAL_ERROR;
7776 			} else {
7777 				msg->hwaddr = addr;
7778 			}
7779 		}
7780 		break;
7781 	case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7782 		/* returns the nodeid in msg->nodeid */
7783 		if (msg->hwaddr == adapter->hwaddr) {
7784 			msg->nodeid = my_nodeid;
7785 		} else {
7786 			node = get_remote_nodeid(adapter, msg->hwaddr);
7787 			if ((int)node < 0) {
7788 				rval = RSMERR_INTERNAL_ERROR;
7789 			} else {
7790 				msg->nodeid = (rsm_node_id_t)node;
7791 			}
7792 		}
7793 		break;
7794 	default:
7795 		rval = EINVAL;
7796 		break;
7797 	}
7798 
7799 	rsmka_release_adapter(adapter);
7800 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7801 	    "rsmaddr_ioctl done: %d\n", rval));
7802 	return (rval);
7803 }
7804 
7805 static int
7806 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7807 {
7808 	DBG_DEFINE(category,
7809 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7810 
7811 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7812 
7813 #ifdef _MULTI_DATAMODEL
7814 
7815 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7816 		rsm_ioctlmsg32_t msg32;
7817 		int i;
7818 
7819 		if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7820 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7821 			    "rsm_ddi_copyin done: EFAULT\n"));
7822 			return (RSMERR_BAD_ADDR);
7823 		}
7824 		msg->len = msg32.len;
7825 		msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7826 		msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7827 		msg->key = msg32.key;
7828 		msg->acl_len = msg32.acl_len;
7829 		msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7830 		msg->cnum = msg32.cnum;
7831 		msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7832 		msg->cname_len = msg32.cname_len;
7833 		msg->nodeid = msg32.nodeid;
7834 		msg->hwaddr = msg32.hwaddr;
7835 		msg->perm = msg32.perm;
7836 		for (i = 0; i < 4; i++) {
7837 			msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7838 		}
7839 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7840 		    "rsm_ddi_copyin done\n"));
7841 		return (RSM_SUCCESS);
7842 	}
7843 #endif
7844 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7845 	if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7846 		return (RSMERR_BAD_ADDR);
7847 	else
7848 		return (RSM_SUCCESS);
7849 }
7850 
7851 static int
7852 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7853 {
7854 	rsmka_int_controller_attr_t	rsm_cattr;
7855 	DBG_DEFINE(category,
7856 		RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7857 
7858 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7859 	    "rsmattr_ddi_copyout enter\n"));
7860 	/*
7861 	 * need to copy appropriate data from rsm_controller_attr_t
7862 	 * to rsmka_int_controller_attr_t
7863 	 */
7864 #ifdef	_MULTI_DATAMODEL
7865 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7866 		rsmka_int_controller_attr32_t rsm_cattr32;
7867 
7868 		rsm_cattr32.attr_direct_access_sizes =
7869 		    adapter->rsm_attr.attr_direct_access_sizes;
7870 		rsm_cattr32.attr_atomic_sizes =
7871 		    adapter->rsm_attr.attr_atomic_sizes;
7872 		rsm_cattr32.attr_page_size =
7873 		    adapter->rsm_attr.attr_page_size;
7874 		if (adapter->rsm_attr.attr_max_export_segment_size >
7875 		    UINT_MAX)
7876 			rsm_cattr32.attr_max_export_segment_size =
7877 			    RSM_MAXSZ_PAGE_ALIGNED;
7878 		else
7879 			rsm_cattr32.attr_max_export_segment_size =
7880 			    adapter->rsm_attr.attr_max_export_segment_size;
7881 		if (adapter->rsm_attr.attr_tot_export_segment_size >
7882 		    UINT_MAX)
7883 			rsm_cattr32.attr_tot_export_segment_size =
7884 			    RSM_MAXSZ_PAGE_ALIGNED;
7885 		else
7886 			rsm_cattr32.attr_tot_export_segment_size =
7887 			    adapter->rsm_attr.attr_tot_export_segment_size;
7888 		if (adapter->rsm_attr.attr_max_export_segments >
7889 		    UINT_MAX)
7890 			rsm_cattr32.attr_max_export_segments =
7891 			    UINT_MAX;
7892 		else
7893 			rsm_cattr32.attr_max_export_segments =
7894 			    adapter->rsm_attr.attr_max_export_segments;
7895 		if (adapter->rsm_attr.attr_max_import_map_size >
7896 		    UINT_MAX)
7897 			rsm_cattr32.attr_max_import_map_size =
7898 			    RSM_MAXSZ_PAGE_ALIGNED;
7899 		else
7900 			rsm_cattr32.attr_max_import_map_size =
7901 			    adapter->rsm_attr.attr_max_import_map_size;
7902 		if (adapter->rsm_attr.attr_tot_import_map_size >
7903 		    UINT_MAX)
7904 			rsm_cattr32.attr_tot_import_map_size =
7905 			    RSM_MAXSZ_PAGE_ALIGNED;
7906 		else
7907 			rsm_cattr32.attr_tot_import_map_size =
7908 			    adapter->rsm_attr.attr_tot_import_map_size;
7909 		if (adapter->rsm_attr.attr_max_import_segments >
7910 		    UINT_MAX)
7911 			rsm_cattr32.attr_max_import_segments =
7912 			    UINT_MAX;
7913 		else
7914 			rsm_cattr32.attr_max_import_segments =
7915 			    adapter->rsm_attr.attr_max_import_segments;
7916 		rsm_cattr32.attr_controller_addr =
7917 		    adapter->rsm_attr.attr_controller_addr;
7918 
7919 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7920 		    "rsmattr_ddi_copyout done\n"));
7921 		if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7922 		    sizeof (rsmka_int_controller_attr32_t), mode)) {
7923 			return (RSMERR_BAD_ADDR);
7924 		}
7925 		else
7926 			return (RSM_SUCCESS);
7927 	}
7928 #endif
7929 	rsm_cattr.attr_direct_access_sizes =
7930 	    adapter->rsm_attr.attr_direct_access_sizes;
7931 	rsm_cattr.attr_atomic_sizes =
7932 	    adapter->rsm_attr.attr_atomic_sizes;
7933 	rsm_cattr.attr_page_size =
7934 	    adapter->rsm_attr.attr_page_size;
7935 	rsm_cattr.attr_max_export_segment_size =
7936 	    adapter->rsm_attr.attr_max_export_segment_size;
7937 	rsm_cattr.attr_tot_export_segment_size =
7938 	    adapter->rsm_attr.attr_tot_export_segment_size;
7939 	rsm_cattr.attr_max_export_segments =
7940 	    adapter->rsm_attr.attr_max_export_segments;
7941 	rsm_cattr.attr_max_import_map_size =
7942 	    adapter->rsm_attr.attr_max_import_map_size;
7943 	rsm_cattr.attr_tot_import_map_size =
7944 	    adapter->rsm_attr.attr_tot_import_map_size;
7945 	rsm_cattr.attr_max_import_segments =
7946 	    adapter->rsm_attr.attr_max_import_segments;
7947 	rsm_cattr.attr_controller_addr =
7948 	    adapter->rsm_attr.attr_controller_addr;
7949 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7950 	    "rsmattr_ddi_copyout done\n"));
7951 	if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7952 	    sizeof (rsmka_int_controller_attr_t), mode)) {
7953 		return (RSMERR_BAD_ADDR);
7954 	}
7955 	else
7956 		return (RSM_SUCCESS);
7957 }
7958 
7959 /*ARGSUSED*/
7960 static int
7961 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7962     int *rvalp)
7963 {
7964 	rsmseg_t *seg;
7965 	rsmresource_t	*res;
7966 	minor_t		rnum;
7967 	rsm_ioctlmsg_t msg = {0};
7968 	int error;
7969 	adapter_t *adapter;
7970 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7971 
7972 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7973 
7974 	if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7975 		error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7976 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7977 		    "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7978 		return (error);
7979 	}
7980 
7981 	/* topology cmd does not use the arg common to other cmds */
7982 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7983 		error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7984 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7985 		    "rsm_ioctl done: %d\n", error));
7986 		return (error);
7987 	}
7988 
7989 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7990 		error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7991 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7992 		    "rsm_ioctl done: %d\n", error));
7993 		return (error);
7994 	}
7995 
7996 	/*
7997 	 * try to load arguments
7998 	 */
7999 	if (cmd != RSM_IOCTL_RING_BELL &&
8000 	    rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
8001 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8002 		    "rsm_ioctl done: EFAULT\n"));
8003 		return (RSMERR_BAD_ADDR);
8004 	}
8005 
8006 	if (cmd == RSM_IOCTL_ATTR) {
8007 		adapter =  rsm_getadapter(&msg, mode);
8008 		if (adapter == NULL) {
8009 			DBG_PRINTF((category, RSM_DEBUG,
8010 			    "rsm_ioctl done: ENODEV\n"));
8011 			return (RSMERR_CTLR_NOT_PRESENT);
8012 		}
8013 		error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8014 		rsmka_release_adapter(adapter);
8015 		DBG_PRINTF((category, RSM_DEBUG,
8016 		    "rsm_ioctl:after copyout %d\n", error));
8017 		return (error);
8018 	}
8019 
8020 	if (cmd == RSM_IOCTL_BAR_INFO) {
8021 		/* Return library off,len of barrier page */
8022 		msg.off = barrier_offset;
8023 		msg.len = (int)barrier_size;
8024 #ifdef _MULTI_DATAMODEL
8025 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8026 			rsm_ioctlmsg32_t msg32;
8027 
8028 			if (msg.len > UINT_MAX)
8029 				msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8030 			else
8031 				msg32.len = (int32_t)msg.len;
8032 			msg32.off = (int32_t)msg.off;
8033 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8034 			    "rsm_ioctl done\n"));
8035 			if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8036 			    sizeof (msg32), mode))
8037 				return (RSMERR_BAD_ADDR);
8038 			else
8039 				return (RSM_SUCCESS);
8040 		}
8041 #endif
8042 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8043 		    "rsm_ioctl done\n"));
8044 		if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8045 		    sizeof (msg), mode))
8046 			return (RSMERR_BAD_ADDR);
8047 		else
8048 			return (RSM_SUCCESS);
8049 	}
8050 
8051 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8052 		/* map the nodeid or hwaddr */
8053 		error = rsmaddr_ioctl(cmd, &msg, mode);
8054 		if (error == RSM_SUCCESS) {
8055 #ifdef _MULTI_DATAMODEL
8056 			if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8057 				rsm_ioctlmsg32_t msg32;
8058 
8059 				msg32.hwaddr = (uint64_t)msg.hwaddr;
8060 				msg32.nodeid = (uint32_t)msg.nodeid;
8061 
8062 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8063 				    "rsm_ioctl done\n"));
8064 				if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8065 				    sizeof (msg32), mode))
8066 					return (RSMERR_BAD_ADDR);
8067 				else
8068 					return (RSM_SUCCESS);
8069 			}
8070 #endif
8071 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8072 			    "rsm_ioctl done\n"));
8073 			if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8074 			    sizeof (msg), mode))
8075 				return (RSMERR_BAD_ADDR);
8076 			else
8077 				return (RSM_SUCCESS);
8078 		}
8079 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8080 		    "rsm_ioctl done: %d\n", error));
8081 		return (error);
8082 	}
8083 
8084 	/* Find resource and look it in read mode */
8085 	rnum = getminor(dev);
8086 	res = rsmresource_lookup(rnum, RSM_NOLOCK);
8087 	ASSERT(res != NULL);
8088 
8089 	/*
8090 	 * Find command group
8091 	 */
8092 	switch (RSM_IOCTL_CMDGRP(cmd)) {
8093 	case RSM_IOCTL_EXPORT_SEG:
8094 		/*
8095 		 * Export list is searched during publish, loopback and
8096 		 * remote lookup call.
8097 		 */
8098 		seg = rsmresource_seg(res, rnum, credp,
8099 		    RSM_RESOURCE_EXPORT_SEGMENT);
8100 		if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8101 			error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8102 			    credp);
8103 		} else { /* export ioctl on an import/barrier resource */
8104 			error = RSMERR_BAD_SEG_HNDL;
8105 		}
8106 		break;
8107 	case RSM_IOCTL_IMPORT_SEG:
8108 		/* Import list is searched during remote unmap call. */
8109 		seg = rsmresource_seg(res, rnum, credp,
8110 		    RSM_RESOURCE_IMPORT_SEGMENT);
8111 		if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8112 			error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8113 			    credp);
8114 		} else  { /* import ioctl on an export/barrier resource */
8115 			error = RSMERR_BAD_SEG_HNDL;
8116 		}
8117 		break;
8118 	case RSM_IOCTL_BAR:
8119 		if (res != RSMRC_RESERVED &&
8120 		    res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8121 			error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8122 			    mode);
8123 		} else { /* invalid res value */
8124 			error = RSMERR_BAD_SEG_HNDL;
8125 		}
8126 		break;
8127 	case RSM_IOCTL_BELL:
8128 		if (res != RSMRC_RESERVED) {
8129 			if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8130 				error = exportbell_ioctl((rsmseg_t *)res, cmd);
8131 			else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8132 				error = importbell_ioctl((rsmseg_t *)res, cmd);
8133 			else /* RSM_RESOURCE_BAR */
8134 				error = RSMERR_BAD_SEG_HNDL;
8135 		} else { /* invalid res value */
8136 			error = RSMERR_BAD_SEG_HNDL;
8137 		}
8138 		break;
8139 	default:
8140 		error = EINVAL;
8141 	}
8142 
8143 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8144 	    error));
8145 	return (error);
8146 }
8147 
8148 
8149 /* **************************** Segment Mapping Operations ********* */
8150 static rsm_mapinfo_t *
8151 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8152     size_t *map_len)
8153 {
8154 	rsm_mapinfo_t	*p;
8155 	/*
8156 	 * Find the correct mapinfo structure to use during the mapping
8157 	 * from the seg->s_mapinfo list.
8158 	 * The seg->s_mapinfo list contains in reverse order the mappings
8159 	 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8160 	 * access the correct entry within this list for the mapping
8161 	 * requested.
8162 	 *
8163 	 * The algorithm for selecting a list entry is as follows:
8164 	 *
8165 	 * When start_offset of an entry <= off we have found the entry
8166 	 * we were looking for. Adjust the dev_offset and map_len (needs
8167 	 * to be PAGESIZE aligned).
8168 	 */
8169 	p = seg->s_mapinfo;
8170 	for (; p; p = p->next) {
8171 		if (p->start_offset <= off) {
8172 			*dev_offset = p->dev_offset + off - p->start_offset;
8173 			*map_len = (len > p->individual_len) ?
8174 			    p->individual_len : ptob(btopr(len));
8175 			return (p);
8176 		}
8177 		p = p->next;
8178 	}
8179 
8180 	return (NULL);
8181 }
8182 
8183 static void
8184 rsm_free_mapinfo(rsm_mapinfo_t  *mapinfo)
8185 {
8186 	rsm_mapinfo_t *p;
8187 
8188 	while (mapinfo != NULL) {
8189 		p = mapinfo;
8190 		mapinfo = mapinfo->next;
8191 		kmem_free(p, sizeof (*p));
8192 	}
8193 }
8194 
8195 static int
8196 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8197     size_t len, void **pvtp)
8198 {
8199 	rsmcookie_t	*p;
8200 	rsmresource_t	*res;
8201 	rsmseg_t	*seg;
8202 	minor_t rnum;
8203 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8204 
8205 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8206 
8207 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8208 	    "rsmmap_map: dhp = %x\n", dhp));
8209 
8210 	flags = flags;
8211 
8212 	rnum = getminor(dev);
8213 	res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8214 	ASSERT(res != NULL);
8215 
8216 	seg = (rsmseg_t *)res;
8217 
8218 	rsmseglock_acquire(seg);
8219 
8220 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8221 
8222 	/*
8223 	 * Allocate structure and add cookie to segment list
8224 	 */
8225 	p = kmem_alloc(sizeof (*p), KM_SLEEP);
8226 
8227 	p->c_dhp = dhp;
8228 	p->c_off = off;
8229 	p->c_len = len;
8230 	p->c_next = seg->s_ckl;
8231 	seg->s_ckl = p;
8232 
8233 	*pvtp = (void *)seg;
8234 
8235 	rsmseglock_release(seg);
8236 
8237 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8238 	return (DDI_SUCCESS);
8239 }
8240 
8241 /*
8242  * Page fault handling is done here. The prerequisite mapping setup
8243  * has been done in rsm_devmap with calls to ddi_devmem_setup or
8244  * ddi_umem_setup
8245  */
8246 static int
8247 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8248     uint_t type, uint_t rw)
8249 {
8250 	int e;
8251 	rsmseg_t *seg = (rsmseg_t *)pvt;
8252 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8253 
8254 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8255 
8256 	rsmseglock_acquire(seg);
8257 
8258 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8259 
8260 	while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8261 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8262 			DBG_PRINTF((category, RSM_DEBUG,
8263 			    "rsmmap_access done: cv_wait INTR"));
8264 			rsmseglock_release(seg);
8265 			return (RSMERR_INTERRUPTED);
8266 		}
8267 	}
8268 
8269 	ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8270 	    seg->s_state == RSM_STATE_ACTIVE);
8271 
8272 	if (seg->s_state == RSM_STATE_DISCONNECT)
8273 		seg->s_flags |= RSM_IMPORT_DUMMY;
8274 
8275 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8276 	    "rsmmap_access: dhp = %x\n", dhp));
8277 
8278 	rsmseglock_release(seg);
8279 
8280 	if (e = devmap_load(dhp, offset, len, type, rw)) {
8281 		DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8282 	}
8283 
8284 
8285 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8286 
8287 	return (e);
8288 }
8289 
8290 static int
8291 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8292 	void **newpvt)
8293 {
8294 	rsmseg_t	*seg = (rsmseg_t *)oldpvt;
8295 	rsmcookie_t	*p, *old;
8296 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8297 
8298 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8299 
8300 	/*
8301 	 * Same as map, create an entry to hold cookie and add it to
8302 	 * connect segment list. The oldpvt is a pointer to segment.
8303 	 * Return segment pointer in newpvt.
8304 	 */
8305 	rsmseglock_acquire(seg);
8306 
8307 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8308 
8309 	/*
8310 	 * Find old cookie
8311 	 */
8312 	for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8313 		if (old->c_dhp == dhp) {
8314 			break;
8315 		}
8316 	}
8317 	if (old == NULL) {
8318 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8319 		    "rsmmap_dup done: EINVAL\n"));
8320 		rsmseglock_release(seg);
8321 		return (EINVAL);
8322 	}
8323 
8324 	p = kmem_alloc(sizeof (*p), KM_SLEEP);
8325 
8326 	p->c_dhp = new_dhp;
8327 	p->c_off = old->c_off;
8328 	p->c_len = old->c_len;
8329 	p->c_next = seg->s_ckl;
8330 	seg->s_ckl = p;
8331 
8332 	*newpvt = (void *)seg;
8333 
8334 	rsmseglock_release(seg);
8335 
8336 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8337 
8338 	return (DDI_SUCCESS);
8339 }
8340 
8341 static void
8342 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8343 	devmap_cookie_t new_dhp1, void **pvtp1,
8344 	devmap_cookie_t new_dhp2, void **pvtp2)
8345 {
8346 	/*
8347 	 * Remove pvtp structure from segment list.
8348 	 */
8349 	rsmseg_t	*seg = (rsmseg_t *)pvtp;
8350 	int freeflag;
8351 
8352 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8353 
8354 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8355 
8356 	off = off; len = len;
8357 	pvtp1 = pvtp1; pvtp2 = pvtp2;
8358 
8359 	rsmseglock_acquire(seg);
8360 
8361 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8362 
8363 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8364 	    "rsmmap_unmap: dhp = %x\n", dhp));
8365 	/*
8366 	 * We can go ahead and remove the dhps even if we are in
8367 	 * the MAPPING state because the dhps being removed here
8368 	 * belong to a different mmap and we are holding the segment
8369 	 * lock.
8370 	 */
8371 	if (new_dhp1 == NULL && new_dhp2 == NULL) {
8372 		/* find and remove dhp handle */
8373 		rsmcookie_t *tmp, **back = &seg->s_ckl;
8374 
8375 		while (*back != NULL) {
8376 			tmp = *back;
8377 			if (tmp->c_dhp == dhp) {
8378 				*back = tmp->c_next;
8379 				kmem_free(tmp, sizeof (*tmp));
8380 				break;
8381 			}
8382 			back = &tmp->c_next;
8383 		}
8384 	} else {
8385 		DBG_PRINTF((category, RSM_DEBUG_LVL2,
8386 		    "rsmmap_unmap:parital unmap"
8387 		    "new_dhp1 %lx, new_dhp2 %lx\n",
8388 		    (size_t)new_dhp1, (size_t)new_dhp2));
8389 	}
8390 
8391 	/*
8392 	 * rsmmap_unmap is called for each mapping cookie on the list.
8393 	 * When the list becomes empty and we are not in the MAPPING
8394 	 * state then unmap in the rsmpi driver.
8395 	 */
8396 	if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8397 		(void) rsm_unmap(seg);
8398 
8399 	if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8400 		freeflag = 1;
8401 	} else {
8402 		freeflag = 0;
8403 	}
8404 
8405 	rsmseglock_release(seg);
8406 
8407 	if (freeflag) {
8408 		/* Free the segment structure */
8409 		rsmseg_free(seg);
8410 	}
8411 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8412 
8413 }
8414 
8415 static struct devmap_callback_ctl rsmmap_ops = {
8416 	DEVMAP_OPS_REV,	/* devmap_ops version number	*/
8417 	rsmmap_map,	/* devmap_ops map routine */
8418 	rsmmap_access,	/* devmap_ops access routine */
8419 	rsmmap_dup,		/* devmap_ops dup routine		*/
8420 	rsmmap_unmap,	/* devmap_ops unmap routine */
8421 };
8422 
8423 static int
8424 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8425     size_t *maplen, uint_t model /*ARGSUSED*/)
8426 {
8427 	struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8428 	int		err;
8429 	uint_t		maxprot;
8430 	minor_t		rnum;
8431 	rsmseg_t	*seg;
8432 	off_t		dev_offset;
8433 	size_t		cur_len;
8434 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8435 
8436 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8437 
8438 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8439 	    "rsm_devmap: off = %lx, len = %lx\n", off, len));
8440 	rnum = getminor(dev);
8441 	seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8442 	ASSERT(seg != NULL);
8443 
8444 	if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8445 		if ((off == barrier_offset) &&
8446 		    (len == barrier_size)) {
8447 
8448 			ASSERT(bar_va != NULL && bar_cookie != NULL);
8449 
8450 			/*
8451 			 * The offset argument in devmap_umem_setup represents
8452 			 * the offset within the kernel memory defined by the
8453 			 * cookie. We use this offset as barrier_offset.
8454 			 */
8455 			err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8456 			    barrier_offset, len, PROT_USER|PROT_READ,
8457 			    DEVMAP_DEFAULTS, 0);
8458 
8459 			if (err != 0) {
8460 				DBG_PRINTF((category, RSM_ERR,
8461 				    "rsm_devmap done: %d\n", err));
8462 				return (RSMERR_MAP_FAILED);
8463 			}
8464 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8465 			    "rsm_devmap done: %d\n", err));
8466 
8467 			*maplen = barrier_size;
8468 
8469 			return (err);
8470 		} else {
8471 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8472 			    "rsm_devmap done: %d\n", err));
8473 			return (RSMERR_MAP_FAILED);
8474 		}
8475 	}
8476 
8477 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8478 	ASSERT(seg->s_state == RSM_STATE_MAPPING);
8479 
8480 	/*
8481 	 * Make sure we still have permission for the map operation.
8482 	 */
8483 	maxprot = PROT_USER;
8484 	if (seg->s_mode & RSM_PERM_READ) {
8485 		maxprot |= PROT_READ;
8486 	}
8487 
8488 	if (seg->s_mode & RSM_PERM_WRITE) {
8489 		maxprot |= PROT_WRITE;
8490 	}
8491 
8492 	/*
8493 	 * For each devmap call, rsmmap_map is called. This maintains driver
8494 	 * private information for the mapping. Thus, if there are multiple
8495 	 * devmap calls there will be multiple rsmmap_map calls and for each
8496 	 * call, the mapping information will be stored.
8497 	 * In case of an error during the processing of the devmap call, error
8498 	 * will be returned. This error return causes the caller of rsm_devmap
8499 	 * to undo all the mappings by calling rsmmap_unmap for each one.
8500 	 * rsmmap_unmap will free up the private information for the requested
8501 	 * mapping.
8502 	 */
8503 	if (seg->s_node != my_nodeid) {
8504 		rsm_mapinfo_t *p;
8505 
8506 		p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8507 		if (p == NULL) {
8508 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8509 			    "rsm_devmap: incorrect mapping info\n"));
8510 			return (RSMERR_MAP_FAILED);
8511 		}
8512 		err = devmap_devmem_setup(dhc, p->dip,
8513 		    callbackops, p->dev_register,
8514 		    dev_offset, cur_len, maxprot,
8515 		    DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8516 
8517 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8518 		    "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8519 		    "off=%lx,len=%lx\n",
8520 		    p->dip, p->dev_register, dev_offset, off, cur_len));
8521 
8522 		if (err != 0) {
8523 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8524 			    "rsm_devmap: devmap_devmem_setup failed %d\n",
8525 			    err));
8526 			return (RSMERR_MAP_FAILED);
8527 		}
8528 		/* cur_len is always an integral multiple pagesize */
8529 		ASSERT((cur_len & (PAGESIZE-1)) == 0);
8530 		*maplen = cur_len;
8531 		return (err);
8532 
8533 	} else {
8534 		err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8535 		    seg->s_cookie, off, len, maxprot,
8536 		    DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8537 		if (err != 0) {
8538 			DBG_PRINTF((category, RSM_DEBUG,
8539 			    "rsm_devmap: devmap_umem_setup failed %d\n",
8540 				err));
8541 			return (RSMERR_MAP_FAILED);
8542 		}
8543 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8544 		    "rsm_devmap: loopback done\n"));
8545 
8546 		*maplen = ptob(btopr(len));
8547 
8548 		return (err);
8549 	}
8550 }
8551 
8552 /*
8553  * We can use the devmap framework for mapping device memory to user space by
8554  * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8555  * processing calls this entry point and devmap_setup is called within this
8556  * function, which eventually calls rsm_devmap
8557  */
8558 static int
8559 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8560     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8561 {
8562 	int			error = 0;
8563 	int			old_state;
8564 	minor_t			rnum;
8565 	rsmseg_t		*seg, *eseg;
8566 	adapter_t		*adapter;
8567 	rsm_import_share_t	*sharedp;
8568 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8569 
8570 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8571 
8572 	/*
8573 	 * find segment
8574 	 */
8575 	rnum = getminor(dev);
8576 	seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8577 
8578 	if (seg == NULL) {
8579 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8580 		    "rsm_segmap done: invalid segment\n"));
8581 		return (EINVAL);
8582 	}
8583 
8584 	/*
8585 	 * the user is trying to map a resource that has not been
8586 	 * defined yet. The library uses this to map in the
8587 	 * barrier page.
8588 	 */
8589 	if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8590 		rsmseglock_release(seg);
8591 
8592 		/*
8593 		 * The mapping for the barrier page is identified
8594 		 * by the special offset barrier_offset
8595 		 */
8596 
8597 		if (off == (off_t)barrier_offset ||
8598 		    len == (off_t)barrier_size) {
8599 			if (bar_cookie == NULL || bar_va == NULL) {
8600 				DBG_PRINTF((category, RSM_DEBUG,
8601 				    "rsm_segmap: bar cookie/va is NULL\n"));
8602 				return (EINVAL);
8603 			}
8604 
8605 			error = devmap_setup(dev, (offset_t)off, as, addrp,
8606 			    (size_t)len, prot, maxprot, flags,  cred);
8607 
8608 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8609 			    "rsm_segmap done: %d\n", error));
8610 			return (error);
8611 		} else {
8612 			DBG_PRINTF((category, RSM_DEBUG,
8613 			    "rsm_segmap: bad offset/length\n"));
8614 			return (EINVAL);
8615 		}
8616 	}
8617 
8618 	/* Make sure you can only map imported segments */
8619 	if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8620 		rsmseglock_release(seg);
8621 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8622 		    "rsm_segmap done: not an import segment\n"));
8623 		return (EINVAL);
8624 	}
8625 	/* check means library is broken */
8626 	ASSERT(seg->s_hdr.rsmrc_num == rnum);
8627 
8628 	/* wait for the segment to become unquiesced */
8629 	while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8630 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8631 			rsmseglock_release(seg);
8632 			DBG_PRINTF((category, RSM_DEBUG,
8633 			    "rsm_segmap done: cv_wait INTR"));
8634 			return (ENODEV);
8635 		}
8636 	}
8637 
8638 	/* wait until segment leaves the mapping state */
8639 	while (seg->s_state == RSM_STATE_MAPPING)
8640 		cv_wait(&seg->s_cv, &seg->s_lock);
8641 
8642 	/*
8643 	 * we allow multiple maps of the same segment in the KA
8644 	 * and it works because we do an rsmpi map of the whole
8645 	 * segment during the first map and all the device mapping
8646 	 * information needed in rsm_devmap is in the mapinfo list.
8647 	 */
8648 	if ((seg->s_state != RSM_STATE_CONNECT) &&
8649 	    (seg->s_state != RSM_STATE_ACTIVE)) {
8650 		rsmseglock_release(seg);
8651 		DBG_PRINTF((category, RSM_DEBUG,
8652 		    "rsm_segmap done: segment not connected\n"));
8653 		return (ENODEV);
8654 	}
8655 
8656 	/*
8657 	 * Make sure we are not mapping a larger segment than what's
8658 	 * exported
8659 	 */
8660 	if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8661 		rsmseglock_release(seg);
8662 		DBG_PRINTF((category, RSM_DEBUG,
8663 		    "rsm_segmap done: off+len>seg size\n"));
8664 		return (ENXIO);
8665 	}
8666 
8667 	/*
8668 	 * Make sure we still have permission for the map operation.
8669 	 */
8670 	maxprot = PROT_USER;
8671 	if (seg->s_mode & RSM_PERM_READ) {
8672 		maxprot |= PROT_READ;
8673 	}
8674 
8675 	if (seg->s_mode & RSM_PERM_WRITE) {
8676 		maxprot |= PROT_WRITE;
8677 	}
8678 
8679 	if ((prot & maxprot) != prot) {
8680 		/* No permission */
8681 		rsmseglock_release(seg);
8682 		DBG_PRINTF((category, RSM_DEBUG,
8683 		    "rsm_segmap done: no permission\n"));
8684 		return (EACCES);
8685 	}
8686 
8687 	old_state = seg->s_state;
8688 
8689 	ASSERT(seg->s_share != NULL);
8690 
8691 	rsmsharelock_acquire(seg);
8692 
8693 	sharedp = seg->s_share;
8694 
8695 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8696 	    "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8697 
8698 	if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8699 	    (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8700 		rsmsharelock_release(seg);
8701 		rsmseglock_release(seg);
8702 		DBG_PRINTF((category, RSM_DEBUG,
8703 		    "rsm_segmap done:RSMSI_STATE %d invalid\n",
8704 		    sharedp->rsmsi_state));
8705 		return (ENODEV);
8706 	}
8707 
8708 	/*
8709 	 * Do the map - since we want importers to share mappings
8710 	 * we do the rsmpi map for the whole segment
8711 	 */
8712 	if (seg->s_node != my_nodeid) {
8713 		uint_t dev_register;
8714 		off_t dev_offset;
8715 		dev_info_t *dip;
8716 		size_t tmp_len;
8717 		size_t total_length_mapped = 0;
8718 		size_t length_to_map = seg->s_len;
8719 		off_t tmp_off = 0;
8720 		rsm_mapinfo_t *p;
8721 
8722 		/*
8723 		 * length_to_map = seg->s_len is always an integral
8724 		 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8725 		 * list is a multiple of PAGESIZE - RSMPI map ensures this
8726 		 */
8727 
8728 		adapter = seg->s_adapter;
8729 		ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8730 		    sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8731 
8732 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8733 			error = 0;
8734 			/* map the whole segment */
8735 			while (total_length_mapped < seg->s_len) {
8736 				tmp_len = 0;
8737 
8738 				error = adapter->rsmpi_ops->rsm_map(
8739 				    seg->s_handle.in, tmp_off,
8740 				    length_to_map, &tmp_len,
8741 				    &dip, &dev_register, &dev_offset,
8742 				    NULL, NULL);
8743 
8744 				if (error != 0)
8745 					break;
8746 
8747 				/*
8748 				 * Store the mapping info obtained from rsm_map
8749 				 */
8750 				p = kmem_alloc(sizeof (*p), KM_SLEEP);
8751 				p->dev_register = dev_register;
8752 				p->dev_offset = dev_offset;
8753 				p->dip = dip;
8754 				p->individual_len = tmp_len;
8755 				p->start_offset = tmp_off;
8756 				p->next = sharedp->rsmsi_mapinfo;
8757 				sharedp->rsmsi_mapinfo = p;
8758 
8759 				total_length_mapped += tmp_len;
8760 				length_to_map -= tmp_len;
8761 				tmp_off += tmp_len;
8762 			}
8763 			seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8764 
8765 			if (error != RSM_SUCCESS) {
8766 				/* Check if this is the the first rsm_map */
8767 				if (sharedp->rsmsi_mapinfo != NULL) {
8768 					/*
8769 					 * A single rsm_unmap undoes
8770 					 * multiple rsm_maps.
8771 					 */
8772 					(void) seg->s_adapter->rsmpi_ops->
8773 					    rsm_unmap(sharedp->rsmsi_handle);
8774 					rsm_free_mapinfo(sharedp->
8775 					    rsmsi_mapinfo);
8776 				}
8777 				sharedp->rsmsi_mapinfo = NULL;
8778 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8779 				rsmsharelock_release(seg);
8780 				rsmseglock_release(seg);
8781 				DBG_PRINTF((category, RSM_DEBUG,
8782 				    "rsm_segmap done: rsmpi map err %d\n",
8783 				    error));
8784 				ASSERT(error != RSMERR_BAD_LENGTH &&
8785 				    error != RSMERR_BAD_MEM_ALIGNMENT &&
8786 				    error != RSMERR_BAD_SEG_HNDL);
8787 				if (error == RSMERR_UNSUPPORTED_OPERATION)
8788 					return (ENOTSUP);
8789 				else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8790 					return (EAGAIN);
8791 				else if (error == RSMERR_CONN_ABORTED)
8792 					return (ENODEV);
8793 				else
8794 					return (error);
8795 			} else {
8796 				sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8797 			}
8798 		} else {
8799 			seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8800 		}
8801 
8802 		sharedp->rsmsi_mapcnt++;
8803 
8804 		rsmsharelock_release(seg);
8805 
8806 		/* move to an intermediate mapping state */
8807 		seg->s_state = RSM_STATE_MAPPING;
8808 		rsmseglock_release(seg);
8809 
8810 		error = devmap_setup(dev, (offset_t)off, as, addrp,
8811 		    len, prot, maxprot, flags, cred);
8812 
8813 		rsmseglock_acquire(seg);
8814 		ASSERT(seg->s_state == RSM_STATE_MAPPING);
8815 
8816 		if (error == DDI_SUCCESS) {
8817 			seg->s_state = RSM_STATE_ACTIVE;
8818 		} else {
8819 			rsmsharelock_acquire(seg);
8820 
8821 			ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8822 
8823 			sharedp->rsmsi_mapcnt--;
8824 			if (sharedp->rsmsi_mapcnt == 0) {
8825 				/* unmap the shared RSMPI mapping */
8826 				ASSERT(sharedp->rsmsi_handle != NULL);
8827 				(void) adapter->rsmpi_ops->
8828 					    rsm_unmap(sharedp->rsmsi_handle);
8829 				rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8830 				sharedp->rsmsi_mapinfo = NULL;
8831 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8832 			}
8833 
8834 			rsmsharelock_release(seg);
8835 			seg->s_state = old_state;
8836 			DBG_PRINTF((category, RSM_ERR,
8837 			    "rsm: devmap_setup failed %d\n", error));
8838 		}
8839 		cv_broadcast(&seg->s_cv);
8840 		rsmseglock_release(seg);
8841 		DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8842 		    error));
8843 		return (error);
8844 	} else {
8845 		/*
8846 		 * For loopback, the export segment mapping cookie (s_cookie)
8847 		 * is also used as the s_cookie value for its import segments
8848 		 * during mapping.
8849 		 * Note that reference counting for s_cookie of the export
8850 		 * segment is not required due to the following:
8851 		 * We never have a case of the export segment being destroyed,
8852 		 * leaving the import segments with a stale value for the
8853 		 * s_cookie field, since a force disconnect is done prior to a
8854 		 * destroy of an export segment. The force disconnect causes
8855 		 * the s_cookie value to be reset to NULL. Also for the
8856 		 * rsm_rebind operation, we change the s_cookie value of the
8857 		 * export segment as well as of all its local (loopback)
8858 		 * importers.
8859 		 */
8860 		DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8861 
8862 		rsmsharelock_release(seg);
8863 		/*
8864 		 * In order to maintain the lock ordering between the export
8865 		 * and import segment locks, we need to acquire the export
8866 		 * segment lock first and only then acquire the import
8867 		 * segment lock.
8868 		 * The above is necessary to avoid any deadlock scenarios
8869 		 * with rsm_rebind which also acquires both the export
8870 		 * and import segment locks in the above mentioned order.
8871 		 * Based on code inspection, there seem to be no other
8872 		 * situations in which both the export and import segment
8873 		 * locks are acquired either in the same or opposite order
8874 		 * as mentioned above.
8875 		 * Thus in order to conform to the above lock order, we
8876 		 * need to change the state of the import segment to
8877 		 * RSM_STATE_MAPPING, release the lock. Once this is done we
8878 		 * can now safely acquire the export segment lock first
8879 		 * followed by the import segment lock which is as per
8880 		 * the lock order mentioned above.
8881 		 */
8882 		/* move to an intermediate mapping state */
8883 		seg->s_state = RSM_STATE_MAPPING;
8884 		rsmseglock_release(seg);
8885 
8886 		eseg = rsmexport_lookup(seg->s_key);
8887 
8888 		if (eseg == NULL) {
8889 			rsmseglock_acquire(seg);
8890 			/*
8891 			 * Revert to old_state and signal any waiters
8892 			 * The shared state is not changed
8893 			 */
8894 
8895 			seg->s_state = old_state;
8896 			cv_broadcast(&seg->s_cv);
8897 			rsmseglock_release(seg);
8898 			DBG_PRINTF((category, RSM_DEBUG,
8899 			    "rsm_segmap done: key %d not found\n", seg->s_key));
8900 			return (ENODEV);
8901 		}
8902 
8903 		rsmsharelock_acquire(seg);
8904 		ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8905 		    sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8906 
8907 		sharedp->rsmsi_mapcnt++;
8908 		sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8909 		rsmsharelock_release(seg);
8910 
8911 		ASSERT(eseg->s_cookie != NULL);
8912 
8913 		/*
8914 		 * It is not required or necessary to acquire the import
8915 		 * segment lock here to change the value of s_cookie since
8916 		 * no one will touch the import segment as long as it is
8917 		 * in the RSM_STATE_MAPPING state.
8918 		 */
8919 		seg->s_cookie = eseg->s_cookie;
8920 
8921 		rsmseglock_release(eseg);
8922 
8923 		error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8924 		    prot, maxprot, flags, cred);
8925 
8926 		rsmseglock_acquire(seg);
8927 		ASSERT(seg->s_state == RSM_STATE_MAPPING);
8928 		if (error == 0) {
8929 			seg->s_state = RSM_STATE_ACTIVE;
8930 		} else {
8931 			rsmsharelock_acquire(seg);
8932 
8933 			ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8934 
8935 			sharedp->rsmsi_mapcnt--;
8936 			if (sharedp->rsmsi_mapcnt == 0) {
8937 				sharedp->rsmsi_mapinfo = NULL;
8938 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8939 			}
8940 			rsmsharelock_release(seg);
8941 			seg->s_state = old_state;
8942 			seg->s_cookie = NULL;
8943 		}
8944 		cv_broadcast(&seg->s_cv);
8945 		rsmseglock_release(seg);
8946 		DBG_PRINTF((category, RSM_DEBUG_LVL2,
8947 		    "rsm_segmap done: %d\n", error));
8948 		return (error);
8949 	}
8950 }
8951 
8952 int
8953 rsmka_null_seg_create(
8954     rsm_controller_handle_t argcp,
8955     rsm_memseg_export_handle_t *handle,
8956     size_t size,
8957     uint_t flags,
8958     rsm_memory_local_t *memory,
8959     rsm_resource_callback_t callback,
8960     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
8961 {
8962 	return (RSM_SUCCESS);
8963 }
8964 
8965 
8966 int
8967 rsmka_null_seg_destroy(
8968     rsm_memseg_export_handle_t argmemseg	/*ARGSUSED*/)
8969 {
8970 	return (RSM_SUCCESS);
8971 }
8972 
8973 
8974 int
8975 rsmka_null_bind(
8976     rsm_memseg_export_handle_t argmemseg,
8977     off_t offset,
8978     rsm_memory_local_t *argmemory,
8979     rsm_resource_callback_t callback,
8980     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
8981 {
8982 	return (RSM_SUCCESS);
8983 }
8984 
8985 
8986 int
8987 rsmka_null_unbind(
8988     rsm_memseg_export_handle_t argmemseg,
8989     off_t offset,
8990     size_t length	/*ARGSUSED*/)
8991 {
8992 	return (DDI_SUCCESS);
8993 }
8994 
8995 int
8996 rsmka_null_rebind(
8997     rsm_memseg_export_handle_t argmemseg,
8998     off_t offset,
8999     rsm_memory_local_t *memory,
9000     rsm_resource_callback_t callback,
9001     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9002 {
9003 	return (RSM_SUCCESS);
9004 }
9005 
9006 int
9007 rsmka_null_publish(
9008     rsm_memseg_export_handle_t argmemseg,
9009     rsm_access_entry_t access_list[],
9010     uint_t access_list_length,
9011     rsm_memseg_id_t segment_id,
9012     rsm_resource_callback_t callback,
9013     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9014 {
9015 	return (RSM_SUCCESS);
9016 }
9017 
9018 
9019 int
9020 rsmka_null_republish(
9021     rsm_memseg_export_handle_t memseg,
9022     rsm_access_entry_t access_list[],
9023     uint_t access_list_length,
9024     rsm_resource_callback_t callback,
9025     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9026 {
9027 	return (RSM_SUCCESS);
9028 }
9029 
9030 int
9031 rsmka_null_unpublish(
9032     rsm_memseg_export_handle_t argmemseg	/*ARGSUSED*/)
9033 {
9034 	return (RSM_SUCCESS);
9035 }
9036 
9037 
9038 void
9039 rsmka_init_loopback()
9040 {
9041 	rsm_ops_t	*ops = &null_rsmpi_ops;
9042 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9043 
9044 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9045 	    "rsmka_init_loopback enter\n"));
9046 
9047 	/* initialize null ops vector */
9048 	ops->rsm_seg_create = rsmka_null_seg_create;
9049 	ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9050 	ops->rsm_bind = rsmka_null_bind;
9051 	ops->rsm_unbind = rsmka_null_unbind;
9052 	ops->rsm_rebind = rsmka_null_rebind;
9053 	ops->rsm_publish = rsmka_null_publish;
9054 	ops->rsm_unpublish = rsmka_null_unpublish;
9055 	ops->rsm_republish = rsmka_null_republish;
9056 
9057 	/* initialize attributes for loopback adapter */
9058 	loopback_attr.attr_name = loopback_str;
9059 	loopback_attr.attr_page_size = 0x8; /* 8K */
9060 
9061 	/* initialize loopback adapter */
9062 	loopback_adapter.rsm_attr = loopback_attr;
9063 	loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9064 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9065 	    "rsmka_init_loopback done\n"));
9066 }
9067 
9068 /* ************** DR functions ********************************** */
9069 static void
9070 rsm_quiesce_exp_seg(rsmresource_t *resp)
9071 {
9072 	int		recheck_state;
9073 	rsmseg_t	*segp = (rsmseg_t *)resp;
9074 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9075 	DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9076 
9077 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9078 	    "%s enter: key=%u\n", function, segp->s_key));
9079 
9080 	rsmseglock_acquire(segp);
9081 	do {
9082 		recheck_state = 0;
9083 		if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9084 		    (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9085 		    (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9086 		    (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9087 			rsmseglock_release(segp);
9088 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9089 			    "%s done:state =%d\n", function,
9090 			    segp->s_state));
9091 			return;
9092 		}
9093 
9094 		if (segp->s_state == RSM_STATE_NEW) {
9095 			segp->s_state = RSM_STATE_NEW_QUIESCED;
9096 			rsmseglock_release(segp);
9097 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9098 			    "%s done:state =%d\n", function,
9099 			    segp->s_state));
9100 			return;
9101 		}
9102 
9103 		if (segp->s_state == RSM_STATE_BIND) {
9104 			/* unbind */
9105 			(void) rsm_unbind_pages(segp);
9106 			segp->s_state = RSM_STATE_BIND_QUIESCED;
9107 			rsmseglock_release(segp);
9108 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9109 			    "%s done:state =%d\n", function,
9110 			    segp->s_state));
9111 			return;
9112 		}
9113 
9114 		if (segp->s_state == RSM_STATE_EXPORT) {
9115 			/*
9116 			 * wait for putv/getv to complete if the segp is
9117 			 * a local memory handle
9118 			 */
9119 			while ((segp->s_state == RSM_STATE_EXPORT) &&
9120 			    (segp->s_rdmacnt != 0)) {
9121 				cv_wait(&segp->s_cv, &segp->s_lock);
9122 			}
9123 
9124 			if (segp->s_state != RSM_STATE_EXPORT) {
9125 				/*
9126 				 * state changed need to see what it
9127 				 * should be changed to.
9128 				 */
9129 				recheck_state = 1;
9130 				continue;
9131 			}
9132 
9133 			segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9134 			rsmseglock_release(segp);
9135 			/*
9136 			 * send SUSPEND messages - currently it will be
9137 			 * done at the end
9138 			 */
9139 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9140 			    "%s done:state =%d\n", function,
9141 			    segp->s_state));
9142 			return;
9143 		}
9144 	} while (recheck_state);
9145 
9146 	rsmseglock_release(segp);
9147 }
9148 
9149 static void
9150 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9151 {
9152 	int			ret;
9153 	rsmseg_t		*segp = (rsmseg_t *)resp;
9154 	rsmapi_access_entry_t	*acl;
9155 	rsm_access_entry_t	*rsmpi_acl;
9156 	int			acl_len;
9157 	int			create_flags = 0;
9158 	struct buf		*xbuf;
9159 	rsm_memory_local_t	mem;
9160 	adapter_t		*adapter;
9161 	dev_t			sdev = 0;
9162 	rsm_resource_callback_t callback_flag;
9163 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9164 	DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9165 
9166 	rsmseglock_acquire(segp);
9167 
9168 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9169 	    "%s enter: key=%u, state=%d\n", function, segp->s_key,
9170 	    segp->s_state));
9171 
9172 	if ((segp->s_state == RSM_STATE_NEW) ||
9173 	    (segp->s_state == RSM_STATE_BIND) ||
9174 	    (segp->s_state == RSM_STATE_EXPORT)) {
9175 		rsmseglock_release(segp);
9176 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9177 		    function, segp->s_state));
9178 		return;
9179 	}
9180 
9181 	if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9182 		segp->s_state = RSM_STATE_NEW;
9183 		cv_broadcast(&segp->s_cv);
9184 		rsmseglock_release(segp);
9185 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9186 		    function, segp->s_state));
9187 		return;
9188 	}
9189 
9190 	if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9191 		/* bind the segment */
9192 		ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9193 		    segp->s_len, segp->s_proc);
9194 		if (ret == RSM_SUCCESS) { /* bind successful */
9195 			segp->s_state = RSM_STATE_BIND;
9196 		} else { /* bind failed - resource unavailable */
9197 			segp->s_state = RSM_STATE_NEW;
9198 		}
9199 		cv_broadcast(&segp->s_cv);
9200 		rsmseglock_release(segp);
9201 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9202 		    "%s done: bind_qscd bind = %d\n", function, ret));
9203 		return;
9204 	}
9205 
9206 	while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9207 		/* wait for the segment to move to EXPORT_QUIESCED state */
9208 		cv_wait(&segp->s_cv, &segp->s_lock);
9209 	}
9210 
9211 	if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9212 		/* bind the segment */
9213 		ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9214 		    segp->s_len, segp->s_proc);
9215 
9216 		if (ret != RSM_SUCCESS) {
9217 			/* bind failed - resource unavailable */
9218 			acl_len = segp->s_acl_len;
9219 			acl = segp->s_acl;
9220 			rsmpi_acl = segp->s_acl_in;
9221 			segp->s_acl_len = 0;
9222 			segp->s_acl = NULL;
9223 			segp->s_acl_in = NULL;
9224 			rsmseglock_release(segp);
9225 
9226 			rsmexport_rm(segp);
9227 			rsmacl_free(acl, acl_len);
9228 			rsmpiacl_free(rsmpi_acl, acl_len);
9229 
9230 			rsmseglock_acquire(segp);
9231 			segp->s_state = RSM_STATE_NEW;
9232 			cv_broadcast(&segp->s_cv);
9233 			rsmseglock_release(segp);
9234 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9235 			    "%s done: exp_qscd bind failed = %d\n",
9236 			    function, ret));
9237 			return;
9238 		}
9239 		/*
9240 		 * publish the segment
9241 		 * if  successful
9242 		 *   segp->s_state = RSM_STATE_EXPORT;
9243 		 * else failed
9244 		 *   segp->s_state = RSM_STATE_BIND;
9245 		 */
9246 
9247 		/* check whether it is a local_memory_handle */
9248 		if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9249 			if ((segp->s_acl[0].ae_node == my_nodeid) &&
9250 			    (segp->s_acl[0].ae_permission == 0)) {
9251 				segp->s_state = RSM_STATE_EXPORT;
9252 				cv_broadcast(&segp->s_cv);
9253 				rsmseglock_release(segp);
9254 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9255 				    "%s done:exp_qscd\n", function));
9256 				return;
9257 			}
9258 		}
9259 		xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9260 		    sdev, 0, NULL, DDI_UMEM_SLEEP);
9261 		ASSERT(xbuf != NULL);
9262 
9263 		mem.ms_type = RSM_MEM_BUF;
9264 		mem.ms_bp = xbuf;
9265 
9266 		adapter = segp->s_adapter;
9267 
9268 		if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9269 			create_flags = RSM_ALLOW_UNBIND_REBIND;
9270 		}
9271 
9272 		if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9273 			callback_flag  = RSM_RESOURCE_DONTWAIT;
9274 		} else {
9275 			callback_flag  = RSM_RESOURCE_SLEEP;
9276 		}
9277 
9278 		ret = adapter->rsmpi_ops->rsm_seg_create(
9279 		    adapter->rsmpi_handle, &segp->s_handle.out,
9280 		    segp->s_len, create_flags, &mem,
9281 		    callback_flag, NULL);
9282 
9283 		if (ret != RSM_SUCCESS) {
9284 			acl_len = segp->s_acl_len;
9285 			acl = segp->s_acl;
9286 			rsmpi_acl = segp->s_acl_in;
9287 			segp->s_acl_len = 0;
9288 			segp->s_acl = NULL;
9289 			segp->s_acl_in = NULL;
9290 			rsmseglock_release(segp);
9291 
9292 			rsmexport_rm(segp);
9293 			rsmacl_free(acl, acl_len);
9294 			rsmpiacl_free(rsmpi_acl, acl_len);
9295 
9296 			rsmseglock_acquire(segp);
9297 			segp->s_state = RSM_STATE_BIND;
9298 			cv_broadcast(&segp->s_cv);
9299 			rsmseglock_release(segp);
9300 			DBG_PRINTF((category, RSM_ERR,
9301 			    "%s done: exp_qscd create failed = %d\n",
9302 			    function, ret));
9303 			return;
9304 		}
9305 
9306 		ret = adapter->rsmpi_ops->rsm_publish(
9307 		    segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9308 		    segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9309 
9310 		if (ret != RSM_SUCCESS) {
9311 			acl_len = segp->s_acl_len;
9312 			acl = segp->s_acl;
9313 			rsmpi_acl = segp->s_acl_in;
9314 			segp->s_acl_len = 0;
9315 			segp->s_acl = NULL;
9316 			segp->s_acl_in = NULL;
9317 			adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9318 			rsmseglock_release(segp);
9319 
9320 			rsmexport_rm(segp);
9321 			rsmacl_free(acl, acl_len);
9322 			rsmpiacl_free(rsmpi_acl, acl_len);
9323 
9324 			rsmseglock_acquire(segp);
9325 			segp->s_state = RSM_STATE_BIND;
9326 			cv_broadcast(&segp->s_cv);
9327 			rsmseglock_release(segp);
9328 			DBG_PRINTF((category, RSM_ERR,
9329 			    "%s done: exp_qscd publish failed = %d\n",
9330 			    function, ret));
9331 			return;
9332 		}
9333 
9334 		segp->s_state = RSM_STATE_EXPORT;
9335 		cv_broadcast(&segp->s_cv);
9336 		rsmseglock_release(segp);
9337 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9338 		    function));
9339 		return;
9340 	}
9341 
9342 	rsmseglock_release(segp);
9343 
9344 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9345 }
9346 
9347 static void
9348 rsm_quiesce_imp_seg(rsmresource_t *resp)
9349 {
9350 	rsmseg_t	*segp = (rsmseg_t *)resp;
9351 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9352 	DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9353 
9354 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9355 	    "%s enter: key=%u\n", function, segp->s_key));
9356 
9357 	rsmseglock_acquire(segp);
9358 	segp->s_flags |= RSM_DR_INPROGRESS;
9359 
9360 	while (segp->s_rdmacnt != 0) {
9361 		/* wait for the RDMA to complete */
9362 		cv_wait(&segp->s_cv, &segp->s_lock);
9363 	}
9364 
9365 	rsmseglock_release(segp);
9366 
9367 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9368 
9369 }
9370 
9371 static void
9372 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9373 {
9374 	rsmseg_t	*segp = (rsmseg_t *)resp;
9375 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9376 	DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9377 
9378 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9379 	    "%s enter: key=%u\n", function, segp->s_key));
9380 
9381 	rsmseglock_acquire(segp);
9382 
9383 	segp->s_flags &= ~RSM_DR_INPROGRESS;
9384 	/* wake up any waiting putv/getv ops */
9385 	cv_broadcast(&segp->s_cv);
9386 
9387 	rsmseglock_release(segp);
9388 
9389 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9390 
9391 
9392 }
9393 
9394 static void
9395 rsm_process_exp_seg(rsmresource_t *resp, int event)
9396 {
9397 	if (event == RSM_DR_QUIESCE)
9398 		rsm_quiesce_exp_seg(resp);
9399 	else /* UNQUIESCE */
9400 		rsm_unquiesce_exp_seg(resp);
9401 }
9402 
9403 static void
9404 rsm_process_imp_seg(rsmresource_t *resp, int event)
9405 {
9406 	if (event == RSM_DR_QUIESCE)
9407 		rsm_quiesce_imp_seg(resp);
9408 	else /* UNQUIESCE */
9409 		rsm_unquiesce_imp_seg(resp);
9410 }
9411 
9412 static void
9413 rsm_dr_process_local_segments(int event)
9414 {
9415 
9416 	int i, j;
9417 	rsmresource_blk_t	*blk;
9418 	rsmresource_t		*p;
9419 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9420 
9421 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9422 	    "rsm_dr_process_local_segments enter\n"));
9423 
9424 	/* iterate through the resource structure */
9425 
9426 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9427 
9428 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9429 		blk = rsm_resource.rsmrc_root[i];
9430 		if (blk != NULL) {
9431 			for (j = 0; j < RSMRC_BLKSZ; j++) {
9432 				p = blk->rsmrcblk_blks[j];
9433 				if ((p != NULL) && (p != RSMRC_RESERVED)) {
9434 					/* valid resource */
9435 					if (p->rsmrc_type ==
9436 					    RSM_RESOURCE_EXPORT_SEGMENT)
9437 						rsm_process_exp_seg(p, event);
9438 					else if (p->rsmrc_type ==
9439 					    RSM_RESOURCE_IMPORT_SEGMENT)
9440 						rsm_process_imp_seg(p, event);
9441 				}
9442 			}
9443 		}
9444 	}
9445 
9446 	rw_exit(&rsm_resource.rsmrc_lock);
9447 
9448 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9449 	    "rsm_dr_process_local_segments done\n"));
9450 }
9451 
9452 /* *************** DR callback functions ************ */
9453 static void
9454 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9455 {
9456 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9457 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9458 	    "rsm_dr_callback_post_add is a no-op\n"));
9459 	/* Noop */
9460 }
9461 
9462 static int
9463 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9464 {
9465 	int	recheck_state = 0;
9466 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9467 
9468 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9469 	    "rsm_dr_callback_pre_del enter\n"));
9470 
9471 	mutex_enter(&rsm_drv_data.drv_lock);
9472 
9473 	do {
9474 		recheck_state = 0;
9475 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9476 		    "rsm_dr_callback_pre_del:state=%d\n",
9477 		    rsm_drv_data.drv_state));
9478 
9479 		switch (rsm_drv_data.drv_state) {
9480 		case RSM_DRV_NEW:
9481 			/*
9482 			 * The state should usually never be RSM_DRV_NEW
9483 			 * since in this state the callbacks have not yet
9484 			 * been registered. So, ASSERT.
9485 			 */
9486 			ASSERT(0);
9487 			return (0);
9488 		case RSM_DRV_REG_PROCESSING:
9489 			/*
9490 			 * The driver is in the process of registering
9491 			 * with the DR framework. So, wait till the
9492 			 * registration process is complete.
9493 			 */
9494 			recheck_state = 1;
9495 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9496 			break;
9497 		case RSM_DRV_UNREG_PROCESSING:
9498 			/*
9499 			 * If the state is RSM_DRV_UNREG_PROCESSING, the
9500 			 * module is in the process of detaching and
9501 			 * unregistering the callbacks from the DR
9502 			 * framework. So, simply return.
9503 			 */
9504 			mutex_exit(&rsm_drv_data.drv_lock);
9505 			DBG_PRINTF((category, RSM_DEBUG,
9506 			    "rsm_dr_callback_pre_del:"
9507 			    "pre-del on NEW/UNREG\n"));
9508 			return (0);
9509 		case RSM_DRV_OK:
9510 			rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9511 			break;
9512 		case RSM_DRV_PREDEL_STARTED:
9513 			/* FALLTHRU */
9514 		case RSM_DRV_PREDEL_COMPLETED:
9515 			/* FALLTHRU */
9516 		case RSM_DRV_POSTDEL_IN_PROGRESS:
9517 			recheck_state = 1;
9518 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9519 			break;
9520 		case RSM_DRV_DR_IN_PROGRESS:
9521 			rsm_drv_data.drv_memdel_cnt++;
9522 			mutex_exit(&rsm_drv_data.drv_lock);
9523 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9524 			    "rsm_dr_callback_pre_del done\n"));
9525 			return (0);
9526 			/* break; */
9527 		default:
9528 			ASSERT(0);
9529 			break;
9530 		}
9531 
9532 	} while (recheck_state);
9533 
9534 	rsm_drv_data.drv_memdel_cnt++;
9535 
9536 	mutex_exit(&rsm_drv_data.drv_lock);
9537 
9538 	/* Do all the quiescing stuff here */
9539 	DBG_PRINTF((category, RSM_DEBUG,
9540 	    "rsm_dr_callback_pre_del: quiesce things now\n"));
9541 
9542 	rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9543 
9544 	/*
9545 	 * now that all local segments have been quiesced lets inform
9546 	 * the importers
9547 	 */
9548 	rsm_send_suspend();
9549 
9550 	/*
9551 	 * In response to the suspend message the remote node(s) will process
9552 	 * the segments and send a suspend_complete message. Till all
9553 	 * the nodes send the suspend_complete message we wait in the
9554 	 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9555 	 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9556 	 */
9557 	mutex_enter(&rsm_drv_data.drv_lock);
9558 
9559 	while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9560 		cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9561 	}
9562 
9563 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9564 
9565 	rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9566 	cv_broadcast(&rsm_drv_data.drv_cv);
9567 
9568 	mutex_exit(&rsm_drv_data.drv_lock);
9569 
9570 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9571 	    "rsm_dr_callback_pre_del done\n"));
9572 
9573 	return (0);
9574 }
9575 
9576 static void
9577 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9578 {
9579 	int	recheck_state = 0;
9580 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9581 
9582 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9583 	    "rsm_dr_callback_post_del enter\n"));
9584 
9585 	mutex_enter(&rsm_drv_data.drv_lock);
9586 
9587 	do {
9588 		recheck_state = 0;
9589 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9590 		    "rsm_dr_callback_post_del:state=%d\n",
9591 		    rsm_drv_data.drv_state));
9592 
9593 		switch (rsm_drv_data.drv_state) {
9594 		case RSM_DRV_NEW:
9595 			/*
9596 			 * The driver state cannot not be RSM_DRV_NEW
9597 			 * since in this state the callbacks have not
9598 			 * yet been registered.
9599 			 */
9600 			ASSERT(0);
9601 			return;
9602 		case RSM_DRV_REG_PROCESSING:
9603 			/*
9604 			 * The driver is in the process of registering with
9605 			 * the DR framework. Wait till the registration is
9606 			 * complete.
9607 			 */
9608 			recheck_state = 1;
9609 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9610 			break;
9611 		case RSM_DRV_UNREG_PROCESSING:
9612 			/*
9613 			 * RSM_DRV_UNREG_PROCESSING state means the module
9614 			 * is detaching and unregistering the callbacks
9615 			 * from the DR framework. So simply return.
9616 			 */
9617 			/* FALLTHRU */
9618 		case RSM_DRV_OK:
9619 			/*
9620 			 * RSM_DRV_OK means we missed the pre-del
9621 			 * corresponding to this post-del coz we had not
9622 			 * registered yet, so simply return.
9623 			 */
9624 			mutex_exit(&rsm_drv_data.drv_lock);
9625 			DBG_PRINTF((category, RSM_DEBUG,
9626 			    "rsm_dr_callback_post_del:"
9627 			    "post-del on OK/UNREG\n"));
9628 			return;
9629 			/* break; */
9630 		case RSM_DRV_PREDEL_STARTED:
9631 			/* FALLTHRU */
9632 		case RSM_DRV_PREDEL_COMPLETED:
9633 			/* FALLTHRU */
9634 		case RSM_DRV_POSTDEL_IN_PROGRESS:
9635 			recheck_state = 1;
9636 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9637 			break;
9638 		case RSM_DRV_DR_IN_PROGRESS:
9639 			rsm_drv_data.drv_memdel_cnt--;
9640 			if (rsm_drv_data.drv_memdel_cnt > 0) {
9641 				mutex_exit(&rsm_drv_data.drv_lock);
9642 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9643 				    "rsm_dr_callback_post_del done:\n"));
9644 				return;
9645 			}
9646 			rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9647 			break;
9648 		default:
9649 			ASSERT(0);
9650 			return;
9651 			/* break; */
9652 		}
9653 	} while (recheck_state);
9654 
9655 	mutex_exit(&rsm_drv_data.drv_lock);
9656 
9657 	/* Do all the unquiescing stuff here */
9658 	DBG_PRINTF((category, RSM_DEBUG,
9659 	    "rsm_dr_callback_post_del: unquiesce things now\n"));
9660 
9661 	rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9662 
9663 	/*
9664 	 * now that all local segments have been unquiesced lets inform
9665 	 * the importers
9666 	 */
9667 	rsm_send_resume();
9668 
9669 	mutex_enter(&rsm_drv_data.drv_lock);
9670 
9671 	rsm_drv_data.drv_state = RSM_DRV_OK;
9672 
9673 	cv_broadcast(&rsm_drv_data.drv_cv);
9674 
9675 	mutex_exit(&rsm_drv_data.drv_lock);
9676 
9677 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9678 	    "rsm_dr_callback_post_del done\n"));
9679 
9680 	return;
9681 
9682 }
9683