xref: /titanic_52/usr/src/uts/common/io/rsm/rsm.c (revision 922d2c76afbee21520ffa2088c4e60dcb80d3945)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Overview of the RSM Kernel Agent:
30  * ---------------------------------
31  *
32  * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
33  * kernel agent is a pseudo device driver which makes use of the RSMPI
34  * interface on behalf of the RSMAPI user library.
35  *
36  * The kernel agent functionality can be categorized into the following
37  * components:
38  * 1. Driver Infrastructure
39  * 2. Export/Import Segment Management
40  * 3. Internal resource allocation/deallocation
41  *
42  * The driver infrastructure includes the basic module loading entry points
43  * like _init, _info, _fini to load, unload and report information about
44  * the driver module. The driver infrastructure also includes the
45  * autoconfiguration entry points namely, attach, detach and getinfo for
46  * the device autoconfiguration.
47  *
48  * The kernel agent is a pseudo character device driver and exports
49  * a cb_ops structure which defines the driver entry points for character
50  * device access. This includes the open and close entry points. The
51  * other entry points provided include ioctl, devmap and segmap and chpoll.
52  * read and write entry points are not used since the device is memory
53  * mapped. Also ddi_prop_op is used for the prop_op entry point.
54  *
55  * The ioctl entry point supports a number of commands, which are used by
56  * the RSMAPI library in order to export and import segments. These
57  * commands include commands for binding and rebinding the physical pages
58  * allocated to the virtual address range, publishing the export segment,
59  * unpublishing and republishing an export segment, creating an
60  * import segment and a virtual connection from this import segment to
61  * an export segment, performing scatter-gather data transfer, barrier
62  * operations.
63  *
64  *
65  * Export and Import segments:
66  * ---------------------------
67  *
68  * In order to create an RSM export segment a process allocates a range in its
69  * virtual address space for the segment using standard Solaris interfaces.
70  * The process then calls RSMAPI, which in turn makes an ioctl call to the
71  * RSM kernel agent for an allocation of physical memory pages and for
72  * creation of the export segment by binding these pages to the virtual
73  * address range. These pages are locked in memory so that remote accesses
74  * are always applied to the correct page. Then the RSM segment is published,
75  * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
76  * is assigned to it.
77  *
78  * In order to import a published RSM segment, RSMAPI creates an import
79  * segment and forms a virtual connection across the interconnect to the
80  * export segment, via an ioctl into the kernel agent with the connect
81  * command. The import segment setup is completed by mapping the
82  * local device memory into the importers virtual address space. The
83  * mapping of the import segment is handled by the segmap/devmap
84  * infrastructure described as follows.
85  *
86  * Segmap and Devmap interfaces:
87  *
88  * The RSM kernel agent allows device memory to be directly accessed by user
89  * threads via memory mapping. In order to do so, the RSM kernel agent
90  * supports the devmap and segmap entry points.
91  *
92  * The segmap entry point(rsm_segmap) is responsible for setting up a memory
93  * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
94  * responsible for exporting the device memory to the user applications.
95  * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
96  * control is transfered to the devmap_setup call which calls rsm_devmap.
97  *
98  * rsm_devmap validates the user mapping to the device or kernel memory
99  * and passes the information to the system for setting up the mapping. The
100  * actual setting up of the mapping is done by devmap_devmem_setup(for
101  * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
102  * registered for device context management via the devmap_devmem_setup
103  * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
104  * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
105  * is created, a mapping is freed, a mapping is accessed or an existing
106  * mapping is duplicated respectively. These callbacks allow the RSM kernel
107  * agent to maintain state information associated with the mappings.
108  * The state information is mainly in the form of a cookie list for the import
109  * segment for which mapping has been done.
110  *
111  * Forced disconnect of import segments:
112  *
113  * When an exported segment is unpublished, the exporter sends a forced
114  * disconnect message to all its importers. The importer segments are
115  * unloaded and disconnected. This involves unloading the original
116  * mappings and remapping to a preallocated kernel trash page. This is
117  * done by devmap_umem_remap. The trash/dummy page is a kernel page,
118  * preallocated by the kernel agent during attach using ddi_umem_alloc with
119  * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
120  * due to unloading of the original mappings.
121  *
122  * Additionally every segment has a mapping generation number associated
123  * with it. This is an entry in the barrier generation page, created
124  * during attach time. This mapping generation number for the import
125  * segments is incremented on a force disconnect to notify the application
126  * of the force disconnect. On this notification, the application needs
127  * to reconnect the segment to establish a new legitimate mapping.
128  *
129  *
130  * Locks used in the kernel agent:
131  * -------------------------------
132  *
133  * The kernel agent uses a variety of mutexes and condition variables for
134  * mutual exclusion of the shared data structures and for synchronization
135  * between the various threads. Some of the locks are described as follows.
136  *
137  * Each resource structure, which represents either an export/import segment
138  * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
139  * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
140  * rsmseglock_acquire and rsmseglock_release macros. An additional
141  * lock called the rsmsi_lock is used for the shared import data structure
142  * that is relevant for resources representing import segments. There is
143  * also a condition variable associated with the resource called s_cv. This
144  * is used to wait for events like the segment state change etc.
145  *
146  * The resource structures are allocated from a pool of resource structures,
147  * called rsm_resource. This pool is protected via a reader-writer lock,
148  * called rsmrc_lock.
149  *
150  * There are two separate hash tables, one for the export segments and
151  * one for the import segments. The export segments are inserted into the
152  * export segment hash table only after they have been published and the
153  * import segments are inserted in the import segments list only after they
154  * have successfully connected to an exported segment. These tables are
155  * protected via reader-writer locks.
156  *
157  * Debug Support in the kernel agent:
158  * ----------------------------------
159  *
160  * Debugging support in the kernel agent is provided by the following
161  * macros.
162  *
163  * DBG_PRINTF((category, level, message)) is a macro which logs a debug
164  * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
165  * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
166  * on the definition of the category and level. All messages that belong to
167  * the specified category(rsmdbg_category) and are of an equal or greater
168  * severity than the specified level(rsmdbg_level) are logged. The message
169  * is a string which uses the same formatting rules as the strings used in
170  * printf.
171  *
172  * The category defines which component of the kernel agent has logged this
173  * message. There are a number of categories that have been defined such as
174  * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
175  * DBG_ADDCATEGORY is used to add in another category to the currently
176  * specified category value so that the component using this new category
177  * can also effectively log debug messages. Thus, the category of a specific
178  * message is some combination of the available categories and we can define
179  * sub-categories if we want a finer level of granularity.
180  *
181  * The level defines the severity of the message. Different level values are
182  * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
183  * the least severe(debug level is 0).
184  *
185  * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
186  * variable or a string respectively.
187  *
188  *
189  * NOTES:
190  *
191  * Special Fork and Exec Handling:
192  * -------------------------------
193  *
194  * The backing physical pages of an exported segment are always locked down.
195  * Thus, there are two cases in which a process having exported segments
196  * will cause a cpu to hang: (1) the process invokes exec; (2) a process
197  * forks and invokes exit before the duped file descriptors for the export
198  * segments are closed in the child process. The hang is caused because the
199  * address space release algorithm in Solaris VM subsystem is based on a
200  * non-blocking loop which does not terminate while segments are locked
201  * down. In addition to this, Solaris VM subsystem lacks a callback
202  * mechanism to the rsm kernel agent to allow unlocking these export
203  * segment pages.
204  *
205  * In order to circumvent this problem, the kernel agent does the following.
206  * The Solaris VM subsystem keeps memory segments in increasing order of
207  * virtual addressses. Thus a special page(special_exit_offset) is allocated
208  * by the kernel agent and is mmapped into the heap area of the process address
209  * space(the mmap is done by the RSMAPI library). During the mmap processing
210  * of this special page by the devmap infrastructure, a callback(the same
211  * devmap context management callbacks discussed above) is registered for an
212  * unmap.
213  *
214  * As discussed above, this page is processed by the Solaris address space
215  * release code before any of the exported segments pages(which are allocated
216  * from high memory). It is during this processing that the unmap callback gets
217  * called and this callback is responsible for force destroying the exported
218  * segments and thus eliminating the problem of locked pages.
219  *
220  * Flow-control:
221  * ------------
222  *
223  * A credit based flow control algorithm is used for messages whose
224  * processing cannot be done in the interrupt context because it might
225  * involve invoking rsmpi calls, or might take a long time to complete
226  * or might need to allocate resources. The algorithm operates on a per
227  * path basis. To send a message the pathend needs to have a credit and
228  * it consumes one for every message that is flow controlled. On the
229  * receiving pathend the message is put on a msgbuf_queue and a task is
230  * dispatched on the worker thread - recv_taskq where it is processed.
231  * After processing the message, the receiving pathend dequeues the message,
232  * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
233  * credits to the sender pathend.
234  *
235  * RSM_DRTEST:
236  * -----------
237  *
238  * This is used to enable the DR testing using a test driver on test
239  * platforms which do not supported DR.
240  *
241  */
242 
243 #include <sys/types.h>
244 #include <sys/param.h>
245 #include <sys/user.h>
246 #include <sys/buf.h>
247 #include <sys/systm.h>
248 #include <sys/cred.h>
249 #include <sys/vm.h>
250 #include <sys/uio.h>
251 #include <vm/seg.h>
252 #include <vm/page.h>
253 #include <sys/stat.h>
254 
255 #include <sys/time.h>
256 #include <sys/errno.h>
257 
258 #include <sys/file.h>
259 #include <sys/uio.h>
260 #include <sys/proc.h>
261 #include <sys/mman.h>
262 #include <sys/open.h>
263 #include <sys/atomic.h>
264 #include <sys/mem_config.h>
265 
266 
267 #include <sys/ddi.h>
268 #include <sys/devops.h>
269 #include <sys/ddidevmap.h>
270 #include <sys/sunddi.h>
271 #include <sys/esunddi.h>
272 #include <sys/ddi_impldefs.h>
273 
274 #include <sys/kmem.h>
275 #include <sys/conf.h>
276 #include <sys/devops.h>
277 #include <sys/ddi_impldefs.h>
278 
279 #include <sys/modctl.h>
280 
281 #include <sys/policy.h>
282 #include <sys/types.h>
283 #include <sys/conf.h>
284 #include <sys/param.h>
285 
286 #include <sys/taskq.h>
287 
288 #include <sys/rsm/rsm_common.h>
289 #include <sys/rsm/rsmapi_common.h>
290 #include <sys/rsm/rsm.h>
291 #include <rsm_in.h>
292 #include <sys/rsm/rsmka_path_int.h>
293 #include <sys/rsm/rsmpi.h>
294 
295 #include <sys/modctl.h>
296 #include <sys/debug.h>
297 
298 #include <sys/tuneable.h>
299 
300 #ifdef	RSM_DRTEST
301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
302 		void *arg);
303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
304 		void *arg);
305 #endif
306 
307 extern void dbg_printf(int category, int level, char *fmt, ...);
308 extern void rsmka_pathmanager_init();
309 extern void rsmka_pathmanager_cleanup();
310 extern void rele_sendq_token();
311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
313 extern int rsmka_topology_ioctl(caddr_t, int, int);
314 
315 extern pri_t maxclsyspri;
316 extern work_queue_t work_queue;
317 extern kmutex_t ipc_info_lock;
318 extern kmutex_t ipc_info_cvlock;
319 extern kcondvar_t ipc_info_cv;
320 extern kmutex_t path_hold_cvlock;
321 extern kcondvar_t path_hold_cv;
322 
323 extern kmutex_t rsmka_buf_lock;
324 
325 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
326 extern adapter_t *rsmka_lookup_adapter(char *, int);
327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
328 extern boolean_t rsmka_do_path_active(path_t *, int);
329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
330 extern void rsmka_release_adapter(adapter_t *);
331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
332 extern void rsmka_dequeue_msgbuf(path_t *path);
333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
334 /* lint -w2 */
335 
336 static int rsm_open(dev_t *, int, int, cred_t *);
337 static int rsm_close(dev_t, int, int, cred_t *);
338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
339     cred_t *credp, int *rvalp);
340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
341     uint_t);
342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
343     uint_t, uint_t, cred_t *);
344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
345     struct pollhead **phpp);
346 
347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
350 
351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
355 				rsm_permission_t);
356 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
357 static void rsmacl_free(rsmapi_access_entry_t *, int);
358 static void rsmpiacl_free(rsm_access_entry_t *, int);
359 
360 static int rsm_inc_pgcnt(pgcnt_t);
361 static void rsm_dec_pgcnt(pgcnt_t);
362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
364 					size_t *);
365 static void exporter_quiesce();
366 static void rsmseg_suspend(rsmseg_t *, int *);
367 static void rsmsegshare_suspend(rsmseg_t *);
368 static int rsmseg_resume(rsmseg_t *, void **);
369 static int rsmsegshare_resume(rsmseg_t *);
370 
371 static struct cb_ops rsm_cb_ops = {
372 	rsm_open,		/* open */
373 	rsm_close,		/* close */
374 	nodev,			/* strategy */
375 	nodev,			/* print */
376 	nodev,			/* dump */
377 	nodev,			/* read */
378 	nodev,			/* write */
379 	rsm_ioctl,		/* ioctl */
380 	rsm_devmap,		/* devmap */
381 	NULL,			/* mmap */
382 	rsm_segmap,		/* segmap */
383 	rsm_chpoll,		/* poll */
384 	ddi_prop_op,		/* cb_prop_op */
385 	0,			/* streamtab  */
386 	D_NEW|D_MP|D_DEVMAP,	/* Driver compatibility flag */
387 	0,
388 	0,
389 	0
390 };
391 
392 static struct dev_ops rsm_ops = {
393 	DEVO_REV,		/* devo_rev, */
394 	0,			/* refcnt  */
395 	rsm_info,		/* get_dev_info */
396 	nulldev,		/* identify */
397 	nulldev,		/* probe */
398 	rsm_attach,		/* attach */
399 	rsm_detach,		/* detach */
400 	nodev,			/* reset */
401 	&rsm_cb_ops,		/* driver operations */
402 	(struct bus_ops *)0,	/* bus operations */
403 	0
404 };
405 
406 /*
407  * Module linkage information for the kernel.
408  */
409 
410 static struct modldrv modldrv = {
411 	&mod_driverops, /* Type of module.  This one is a pseudo driver */
412 	"Remote Shared Memory Driver %I%",
413 	&rsm_ops,	/* driver ops */
414 };
415 
416 static struct modlinkage modlinkage = {
417 	MODREV_1,
418 	(void *)&modldrv,
419 	0,
420 	0,
421 	0
422 };
423 
424 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
425 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
426 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
427 
428 static kphysm_setup_vector_t rsm_dr_callback_vec = {
429 	KPHYSM_SETUP_VECTOR_VERSION,
430 	rsm_dr_callback_post_add,
431 	rsm_dr_callback_pre_del,
432 	rsm_dr_callback_post_del
433 };
434 
435 /* This flag can be changed to 0 to help with PIT testing */
436 int rsmka_modunloadok = 1;
437 int no_reply_cnt = 0;
438 
439 uint64_t rsm_ctrlmsg_errcnt = 0;
440 uint64_t rsm_ipcsend_errcnt = 0;
441 
442 #define	MAX_NODES 64
443 
444 static struct rsm_driver_data rsm_drv_data;
445 static struct rsmresource_table rsm_resource;
446 
447 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
448 static void rsmresource_destroy(void);
449 static int rsmresource_alloc(minor_t *);
450 static rsmresource_t *rsmresource_free(minor_t rnum);
451 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
452 static int rsm_unpublish(rsmseg_t *seg, int mode);
453 static int rsm_unbind(rsmseg_t *seg);
454 static uint_t rsmhash(rsm_memseg_id_t key);
455 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
456 static void rsmhash_free(rsmhash_table_t *rhash, int size);
457 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
458 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
459 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
460 					void *cookie);
461 int rsm_disconnect(rsmseg_t *seg);
462 void rsmseg_unload(rsmseg_t *);
463 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
464 
465 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
466     rsm_intr_q_op_t opcode, rsm_addr_t src,
467     void *data, size_t size, rsm_intr_hand_arg_t arg);
468 
469 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
470 
471 rsm_node_id_t my_nodeid;
472 
473 /* cookie, va, offsets and length for the barrier */
474 static rsm_gnum_t		*bar_va;
475 static ddi_umem_cookie_t	bar_cookie;
476 static off_t			barrier_offset;
477 static size_t			barrier_size;
478 static int			max_segs;
479 
480 /* cookie for the trash memory */
481 static ddi_umem_cookie_t	remap_cookie;
482 
483 static rsm_memseg_id_t	rsm_nextavail_segmentid;
484 
485 extern taskq_t *work_taskq;
486 extern char *taskq_name;
487 
488 static dev_info_t *rsm_dip;	/* private copy of devinfo pointer */
489 
490 static rsmhash_table_t rsm_export_segs;		/* list of exported segs */
491 rsmhash_table_t rsm_import_segs;		/* list of imported segs */
492 static rsmhash_table_t rsm_event_queues;	/* list of event queues */
493 
494 static	rsm_ipc_t	rsm_ipc;		/* ipc info */
495 
496 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
497 static list_head_t	rsm_suspend_list;
498 
499 /* list of descriptors for remote importers */
500 static importers_table_t importer_list;
501 
502 kmutex_t rsm_suspend_cvlock;
503 kcondvar_t rsm_suspend_cv;
504 
505 static kmutex_t rsm_lock;
506 
507 adapter_t loopback_adapter;
508 rsm_controller_attr_t loopback_attr;
509 
510 int rsmipc_send_controlmsg(path_t *path, int msgtype);
511 
512 void rsmka_init_loopback();
513 
514 int rsmka_null_seg_create(
515     rsm_controller_handle_t,
516     rsm_memseg_export_handle_t *,
517     size_t,
518     uint_t,
519     rsm_memory_local_t *,
520     rsm_resource_callback_t,
521     rsm_resource_callback_arg_t);
522 
523 int rsmka_null_seg_destroy(
524     rsm_memseg_export_handle_t);
525 
526 int rsmka_null_bind(
527     rsm_memseg_export_handle_t,
528     off_t,
529     rsm_memory_local_t *,
530     rsm_resource_callback_t,
531     rsm_resource_callback_arg_t);
532 
533 int rsmka_null_unbind(
534     rsm_memseg_export_handle_t,
535     off_t,
536     size_t);
537 
538 int rsmka_null_rebind(
539     rsm_memseg_export_handle_t,
540     off_t,
541     rsm_memory_local_t *,
542     rsm_resource_callback_t,
543     rsm_resource_callback_arg_t);
544 
545 int rsmka_null_publish(
546     rsm_memseg_export_handle_t,
547     rsm_access_entry_t [],
548     uint_t,
549     rsm_memseg_id_t,
550     rsm_resource_callback_t,
551     rsm_resource_callback_arg_t);
552 
553 
554 int rsmka_null_republish(
555     rsm_memseg_export_handle_t,
556     rsm_access_entry_t [],
557     uint_t,
558     rsm_resource_callback_t,
559     rsm_resource_callback_arg_t);
560 
561 int rsmka_null_unpublish(
562     rsm_memseg_export_handle_t);
563 
564 rsm_ops_t null_rsmpi_ops;
565 
566 /*
567  * data and locks to keep track of total amount of exported memory
568  */
569 static	pgcnt_t		rsm_pgcnt;
570 static	pgcnt_t		rsm_pgcnt_max;	/* max allowed */
571 static	kmutex_t	rsm_pgcnt_lock;
572 
573 static	int		rsm_enable_dr;
574 
575 static	char		loopback_str[] = "loopback";
576 
577 int		rsm_hash_size;
578 
579 /*
580  * The locking model is as follows:
581  *
582  * Local operations:
583  *		find resource - grab reader lock on resouce list
584  *		insert rc     - grab writer lock
585  *		delete rc     - grab writer lock and resource mutex
586  *		read/write    - no lock
587  *
588  * Remote invocations:
589  *		find resource - grab read lock and resource mutex
590  *
591  * State:
592  *		resource state - grab resource mutex
593  */
594 
595 int
596 _init(void)
597 {
598 	int e;
599 
600 	e = mod_install(&modlinkage);
601 	if (e != 0) {
602 		return (e);
603 	}
604 
605 	mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
606 
607 	mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
608 
609 
610 	rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
611 
612 	rsm_hash_size = RSM_HASHSZ;
613 
614 	rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
615 
616 	rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
617 
618 	mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
619 
620 	mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
621 	cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
622 
623 	mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
624 	cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
625 
626 	mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
627 	cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
628 
629 	rsm_ipc.count = RSMIPC_SZ;
630 	rsm_ipc.wanted = 0;
631 	rsm_ipc.sequence = 0;
632 
633 	(void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
634 
635 	for (e = 0; e < RSMIPC_SZ; e++) {
636 		rsmipc_slot_t *slot = &rsm_ipc.slots[e];
637 
638 		RSMIPC_SET(slot, RSMIPC_FREE);
639 		mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
640 		cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
641 	}
642 
643 	/*
644 	 * Initialize the suspend message list
645 	 */
646 	rsm_suspend_list.list_head = NULL;
647 	mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
648 
649 	/*
650 	 * It is assumed here that configuration data is available
651 	 * during system boot since _init may be called at that time.
652 	 */
653 
654 	rsmka_pathmanager_init();
655 
656 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
657 	    "rsm: _init done\n"));
658 
659 	return (DDI_SUCCESS);
660 
661 }
662 
663 int
664 _info(struct modinfo *modinfop)
665 {
666 
667 	return (mod_info(&modlinkage, modinfop));
668 }
669 
670 int
671 _fini(void)
672 {
673 	int e;
674 
675 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
676 	    "rsm: _fini enter\n"));
677 
678 	/*
679 	 * The rsmka_modunloadok flag is simply used to help with
680 	 * the PIT testing. Make this flag 0 to disallow modunload.
681 	 */
682 	if (rsmka_modunloadok == 0)
683 		return (EBUSY);
684 
685 	/* rsm_detach will be called as a result of mod_remove */
686 	e = mod_remove(&modlinkage);
687 	if (e) {
688 		DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
689 		    "Unable to fini RSM %x\n", e));
690 		return (e);
691 	}
692 
693 	rsmka_pathmanager_cleanup();
694 
695 	rw_destroy(&rsm_resource.rsmrc_lock);
696 
697 	rw_destroy(&rsm_export_segs.rsmhash_rw);
698 	rw_destroy(&rsm_import_segs.rsmhash_rw);
699 	rw_destroy(&rsm_event_queues.rsmhash_rw);
700 
701 	mutex_destroy(&importer_list.lock);
702 
703 	mutex_destroy(&rsm_ipc.lock);
704 	cv_destroy(&rsm_ipc.cv);
705 
706 	(void) mutex_destroy(&rsm_suspend_list.list_lock);
707 
708 	(void) mutex_destroy(&rsm_pgcnt_lock);
709 
710 	DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
711 
712 	return (DDI_SUCCESS);
713 
714 }
715 
716 /*ARGSUSED1*/
717 static int
718 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
719 {
720 	minor_t	rnum;
721 	int	percent;
722 	int	ret;
723 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
724 
725 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
726 
727 	switch (cmd) {
728 	case DDI_ATTACH:
729 		break;
730 	case DDI_RESUME:
731 	default:
732 		DBG_PRINTF((category, RSM_ERR,
733 		    "rsm:rsm_attach - cmd not supported\n"));
734 		return (DDI_FAILURE);
735 	}
736 
737 	if (rsm_dip != NULL) {
738 		DBG_PRINTF((category, RSM_ERR,
739 		    "rsm:rsm_attach - supports only "
740 		    "one instance\n"));
741 		return (DDI_FAILURE);
742 	}
743 
744 	rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
745 			    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
746 			    "enable-dynamic-reconfiguration", 1);
747 
748 	mutex_enter(&rsm_drv_data.drv_lock);
749 	rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
750 	mutex_exit(&rsm_drv_data.drv_lock);
751 
752 	if (rsm_enable_dr) {
753 #ifdef	RSM_DRTEST
754 		ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
755 		    (void *)NULL);
756 #else
757 		ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
758 		    (void *)NULL);
759 #endif
760 		if (ret != 0) {
761 			mutex_exit(&rsm_drv_data.drv_lock);
762 			cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
763 			    "reconfiguration setup failed\n");
764 			return (DDI_FAILURE);
765 		}
766 	}
767 
768 	mutex_enter(&rsm_drv_data.drv_lock);
769 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
770 	rsm_drv_data.drv_state = RSM_DRV_OK;
771 	cv_broadcast(&rsm_drv_data.drv_cv);
772 	mutex_exit(&rsm_drv_data.drv_lock);
773 
774 	/*
775 	 * page_list_read_lock();
776 	 * xx_setup();
777 	 * page_list_read_unlock();
778 	 */
779 
780 	rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
781 			    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
782 			    "segment-hashtable-size", RSM_HASHSZ);
783 	if (rsm_hash_size == 0) {
784 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
785 		    "rsm: segment-hashtable-size in rsm.conf "
786 		    "must be greater than 0, defaulting to 128\n"));
787 		rsm_hash_size = RSM_HASHSZ;
788 	}
789 
790 	DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
791 	    rsm_hash_size));
792 
793 	rsm_pgcnt = 0;
794 
795 	percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
796 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
797 	    "max-exported-memory", 0);
798 	if (percent < 0) {
799 		DBG_PRINTF((category, RSM_ERR,
800 		    "rsm:rsm_attach not enough memory available to "
801 		    "export, or max-exported-memory set incorrectly.\n"));
802 		return (DDI_FAILURE);
803 	}
804 	/* 0 indicates no fixed upper limit. maxmem is the max	*/
805 	/* available pageable physical mem			*/
806 	rsm_pgcnt_max = (percent*maxmem)/100;
807 
808 	if (rsm_pgcnt_max > 0) {
809 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
810 		    "rsm: Available physical memory = %lu pages, "
811 		    "Max exportable memory = %lu pages",
812 		    maxmem, rsm_pgcnt_max));
813 	}
814 
815 	/*
816 	 * Create minor number
817 	 */
818 	if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
819 		DBG_PRINTF((category, RSM_ERR,
820 		    "rsm: rsm_attach - Unable to get "
821 		    "minor number\n"));
822 		return (DDI_FAILURE);
823 	}
824 
825 	ASSERT(rnum == RSM_DRIVER_MINOR);
826 
827 	if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
828 	    rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
829 		DBG_PRINTF((category, RSM_ERR,
830 		    "rsm: rsm_attach - unable to allocate "
831 		    "minor #\n"));
832 		return (DDI_FAILURE);
833 	}
834 
835 	rsm_dip = devi;
836 	/*
837 	 * Allocate the hashtables
838 	 */
839 	rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
840 	rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
841 
842 	importer_list.bucket = (importing_token_t **)
843 		kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *),
844 		    KM_SLEEP);
845 
846 	/*
847 	 * Allocate a resource struct
848 	 */
849 	{
850 		rsmresource_t *p;
851 
852 		p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
853 
854 		mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
855 
856 		rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
857 	}
858 
859 	/*
860 	 * Based on the rsm.conf property max-segments, determine the maximum
861 	 * number of segments that can be exported/imported. This is then used
862 	 * to determine the size for barrier failure pages.
863 	 */
864 
865 	/* First get the max number of segments from the rsm.conf file */
866 	max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
867 			    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
868 			    "max-segments", 0);
869 	if (max_segs == 0) {
870 		/* Use default number of segments */
871 		max_segs = RSM_MAX_NUM_SEG;
872 	}
873 
874 	/*
875 	 * Based on the max number of segments allowed, determine the barrier
876 	 * page size. add 1 to max_segs since the barrier page itself uses
877 	 * a slot
878 	 */
879 	barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
880 			    PAGESIZE);
881 
882 	/*
883 	 * allocation of the barrier failure page
884 	 */
885 	bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
886 				    DDI_UMEM_SLEEP, &bar_cookie);
887 
888 	/*
889 	 * Set the barrier_offset
890 	 */
891 	barrier_offset = 0;
892 
893 	/*
894 	 * Allocate a trash memory and get a cookie for it. This will be used
895 	 * when remapping segments during force disconnects. Allocate the
896 	 * trash memory with a large size which is page aligned.
897 	 */
898 	(void) ddi_umem_alloc((size_t)TRASHSIZE,
899 		    DDI_UMEM_TRASH, &remap_cookie);
900 
901 	/* initialize user segment id allocation variable */
902 	rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
903 
904 	/*
905 	 * initialize the null_rsmpi_ops vector and the loopback adapter
906 	 */
907 	rsmka_init_loopback();
908 
909 
910 	ddi_report_dev(devi);
911 
912 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
913 
914 	return (DDI_SUCCESS);
915 }
916 
917 /*
918  * The call to mod_remove in the _fine routine will cause the system
919  * to call rsm_detach
920  */
921 /*ARGSUSED*/
922 static int
923 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
924 {
925 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
926 
927 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
928 
929 	switch (cmd) {
930 	case DDI_DETACH:
931 		break;
932 	default:
933 		DBG_PRINTF((category, RSM_ERR,
934 		    "rsm:rsm_detach - cmd %x not supported\n",
935 		    cmd));
936 		return (DDI_FAILURE);
937 	}
938 
939 	mutex_enter(&rsm_drv_data.drv_lock);
940 	while (rsm_drv_data.drv_state != RSM_DRV_OK)
941 		cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
942 	rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
943 	mutex_exit(&rsm_drv_data.drv_lock);
944 
945 	/*
946 	 * Unregister the DR callback functions
947 	 */
948 	if (rsm_enable_dr) {
949 #ifdef	RSM_DRTEST
950 		rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
951 		    (void *)NULL);
952 #else
953 		kphysm_setup_func_unregister(&rsm_dr_callback_vec,
954 		    (void *)NULL);
955 #endif
956 	}
957 
958 	mutex_enter(&rsm_drv_data.drv_lock);
959 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
960 	rsm_drv_data.drv_state = RSM_DRV_NEW;
961 	mutex_exit(&rsm_drv_data.drv_lock);
962 
963 	ASSERT(rsm_suspend_list.list_head == NULL);
964 
965 	/*
966 	 * Release all resources, seglist, controller, ...
967 	 */
968 
969 	/* remove intersend queues */
970 	/* remove registered services */
971 
972 
973 	ddi_remove_minor_node(dip, DRIVER_NAME);
974 	rsm_dip = NULL;
975 
976 	/*
977 	 * Free minor zero resource
978 	 */
979 	{
980 		rsmresource_t *p;
981 
982 		p = rsmresource_free(RSM_DRIVER_MINOR);
983 		if (p) {
984 			mutex_destroy(&p->rsmrc_lock);
985 			kmem_free((void *)p, sizeof (*p));
986 		}
987 	}
988 
989 	/*
990 	 * Free resource table
991 	 */
992 
993 	rsmresource_destroy();
994 
995 	/*
996 	 * Free the hash tables
997 	 */
998 	rsmhash_free(&rsm_export_segs, rsm_hash_size);
999 	rsmhash_free(&rsm_import_segs, rsm_hash_size);
1000 
1001 	kmem_free((void *)importer_list.bucket,
1002 	    rsm_hash_size * sizeof (importing_token_t *));
1003 	importer_list.bucket = NULL;
1004 
1005 
1006 	/* free barrier page */
1007 	if (bar_cookie != NULL) {
1008 		ddi_umem_free(bar_cookie);
1009 	}
1010 	bar_va = NULL;
1011 	bar_cookie = NULL;
1012 
1013 	/*
1014 	 * Free the memory allocated for the trash
1015 	 */
1016 	if (remap_cookie != NULL) {
1017 		ddi_umem_free(remap_cookie);
1018 	}
1019 	remap_cookie = NULL;
1020 
1021 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1022 
1023 	return (DDI_SUCCESS);
1024 }
1025 
1026 /*ARGSUSED*/
1027 static int
1028 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1029 {
1030 	register int error;
1031 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1032 
1033 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1034 
1035 	switch (infocmd) {
1036 	case DDI_INFO_DEVT2DEVINFO:
1037 		if (rsm_dip == NULL)
1038 			error = DDI_FAILURE;
1039 		else {
1040 			*result = (void *)rsm_dip;
1041 			error = DDI_SUCCESS;
1042 		}
1043 		break;
1044 	case DDI_INFO_DEVT2INSTANCE:
1045 		*result = (void *)0;
1046 		error = DDI_SUCCESS;
1047 		break;
1048 	default:
1049 		error = DDI_FAILURE;
1050 	}
1051 
1052 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1053 	return (error);
1054 }
1055 
1056 adapter_t *
1057 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1058 {
1059 	adapter_t *adapter;
1060 	char adapter_devname[MAXNAMELEN];
1061 	int instance;
1062 	DBG_DEFINE(category,
1063 	    RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1064 
1065 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1066 
1067 	instance = msg->cnum;
1068 
1069 	if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1070 		return (NULL);
1071 	}
1072 
1073 	if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1074 		return (NULL);
1075 
1076 	if (strcmp(adapter_devname, "loopback") == 0)
1077 		return (&loopback_adapter);
1078 
1079 	adapter = rsmka_lookup_adapter(adapter_devname, instance);
1080 
1081 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1082 
1083 	return (adapter);
1084 }
1085 
1086 
1087 /*
1088  * *********************** Resource Number Management ********************
1089  * All resources are stored in a simple hash table. The table is an array
1090  * of pointers to resource blks. Each blk contains:
1091  *	base	- base number of this blk
1092  *	used	- number of used slots in this blk.
1093  *	blks    - array of pointers to resource items.
1094  * An entry in a resource blk is empty if it's NULL.
1095  *
1096  * We start with no resource array. Each time we run out of slots, we
1097  * reallocate a new larger array and copy the pointer to the new array and
1098  * a new resource blk is allocated and added to the hash table.
1099  *
1100  * The resource control block contains:
1101  *      root    - array of pointer of resource blks
1102  *      sz      - current size of array.
1103  *      len     - last valid entry in array.
1104  *
1105  * A search operation based on a resource number is as follows:
1106  *      index = rnum / RESOURCE_BLKSZ;
1107  *      ASSERT(index < resource_block.len);
1108  *      ASSERT(index < resource_block.sz);
1109  *	offset = rnum % RESOURCE_BLKSZ;
1110  *      ASSERT(offset >= resource_block.root[index]->base);
1111  *	ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1112  *	return resource_block.root[index]->blks[offset];
1113  *
1114  * A resource blk is freed with its used count reachs zero.
1115  */
1116 static int
1117 rsmresource_alloc(minor_t *rnum)
1118 {
1119 
1120 	/* search for available resource slot */
1121 	int i, j, empty = -1;
1122 	rsmresource_blk_t *blk;
1123 
1124 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1125 	    "rsmresource_alloc enter\n"));
1126 
1127 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1128 
1129 	/* Try to find an empty slot */
1130 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1131 		blk = rsm_resource.rsmrc_root[i];
1132 		if (blk != NULL && blk->rsmrcblk_avail > 0) {
1133 			/* found an empty slot in this blk */
1134 			for (j = 0; j < RSMRC_BLKSZ; j++) {
1135 				if (blk->rsmrcblk_blks[j] == NULL) {
1136 					*rnum = (minor_t)
1137 					    (j + (i * RSMRC_BLKSZ));
1138 					/*
1139 					 * obey gen page limits
1140 					 */
1141 					if (*rnum >= max_segs + 1) {
1142 						if (empty < 0) {
1143 							rw_exit(&rsm_resource.
1144 							    rsmrc_lock);
1145 							DBG_PRINTF((
1146 							    RSM_KERNEL_ALL,
1147 							    RSM_ERR,
1148 							    "rsmresource"
1149 							    "_alloc failed:"
1150 							    "not enough res"
1151 							    "%d\n", *rnum));
1152 							return (
1153 RSMERR_INSUFFICIENT_RESOURCES);
1154 						} else {
1155 							/* use empty slot */
1156 							break;
1157 						}
1158 
1159 					}
1160 
1161 					blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1162 					blk->rsmrcblk_avail--;
1163 					rw_exit(&rsm_resource.rsmrc_lock);
1164 					DBG_PRINTF((RSM_KERNEL_ALL,
1165 					    RSM_DEBUG_VERBOSE,
1166 					    "rsmresource_alloc done\n"));
1167 					return (RSM_SUCCESS);
1168 				}
1169 			}
1170 		} else if (blk == NULL && empty < 0) {
1171 			/* remember first empty slot */
1172 			empty = i;
1173 		}
1174 	}
1175 
1176 	/* Couldn't find anything, allocate a new blk */
1177 	/*
1178 	 * Do we need to reallocate the root array
1179 	 */
1180 	if (empty < 0) {
1181 		if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1182 			/*
1183 			 * Allocate new array and copy current stuff into it
1184 			 */
1185 			rsmresource_blk_t	**p;
1186 			uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1187 								RSMRC_BLKSZ;
1188 			/*
1189 			 * Don't allocate more that max valid rnum
1190 			 */
1191 			if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1192 			    max_segs + 1) {
1193 				rw_exit(&rsm_resource.rsmrc_lock);
1194 				return (RSMERR_INSUFFICIENT_RESOURCES);
1195 			}
1196 
1197 			p = (rsmresource_blk_t **)kmem_zalloc(
1198 			    newsz * sizeof (*p),
1199 			    KM_SLEEP);
1200 
1201 			if (rsm_resource.rsmrc_root) {
1202 				uint_t oldsz;
1203 
1204 				oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1205 				    (int)sizeof (*p));
1206 
1207 				/*
1208 				 * Copy old data into new space and
1209 				 * free old stuff
1210 				 */
1211 				bcopy(rsm_resource.rsmrc_root, p, oldsz);
1212 				kmem_free(rsm_resource.rsmrc_root, oldsz);
1213 			}
1214 
1215 			rsm_resource.rsmrc_root = p;
1216 			rsm_resource.rsmrc_sz = (int)newsz;
1217 		}
1218 
1219 		empty = rsm_resource.rsmrc_len;
1220 		rsm_resource.rsmrc_len++;
1221 	}
1222 
1223 	/*
1224 	 * Allocate a new blk
1225 	 */
1226 	blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1227 	ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1228 	rsm_resource.rsmrc_root[empty] = blk;
1229 	blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1230 
1231 	/*
1232 	 * Allocate slot
1233 	 */
1234 
1235 	*rnum = (minor_t)(empty * RSMRC_BLKSZ);
1236 
1237 	/*
1238 	 * watch out not to exceed bounds of barrier page
1239 	 */
1240 	if (*rnum >= max_segs + 1) {
1241 		rw_exit(&rsm_resource.rsmrc_lock);
1242 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1243 		    "rsmresource_alloc failed %d\n", *rnum));
1244 
1245 		return (RSMERR_INSUFFICIENT_RESOURCES);
1246 	}
1247 	blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1248 
1249 
1250 	rw_exit(&rsm_resource.rsmrc_lock);
1251 
1252 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1253 	    "rsmresource_alloc done\n"));
1254 
1255 	return (RSM_SUCCESS);
1256 }
1257 
1258 static rsmresource_t *
1259 rsmresource_free(minor_t rnum)
1260 {
1261 
1262 	/* search for available resource slot */
1263 	int i, j;
1264 	rsmresource_blk_t *blk;
1265 	rsmresource_t *p;
1266 
1267 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1268 	    "rsmresource_free enter\n"));
1269 
1270 	i = (int)(rnum / RSMRC_BLKSZ);
1271 	j = (int)(rnum % RSMRC_BLKSZ);
1272 
1273 	if (i >= rsm_resource.rsmrc_len) {
1274 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1275 		    "rsmresource_free done\n"));
1276 		return (NULL);
1277 	}
1278 
1279 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1280 
1281 	ASSERT(rsm_resource.rsmrc_root);
1282 	ASSERT(i < rsm_resource.rsmrc_len);
1283 	ASSERT(i < rsm_resource.rsmrc_sz);
1284 	blk = rsm_resource.rsmrc_root[i];
1285 	if (blk == NULL) {
1286 		rw_exit(&rsm_resource.rsmrc_lock);
1287 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1288 		    "rsmresource_free done\n"));
1289 		return (NULL);
1290 	}
1291 
1292 	ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1293 
1294 	p = blk->rsmrcblk_blks[j];
1295 	if (p == RSMRC_RESERVED) {
1296 		p = NULL;
1297 	}
1298 
1299 	blk->rsmrcblk_blks[j] = NULL;
1300 	blk->rsmrcblk_avail++;
1301 	if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1302 		/* free this blk */
1303 		kmem_free(blk, sizeof (*blk));
1304 		rsm_resource.rsmrc_root[i] = NULL;
1305 	}
1306 
1307 	rw_exit(&rsm_resource.rsmrc_lock);
1308 
1309 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1310 	    "rsmresource_free done\n"));
1311 
1312 	return (p);
1313 }
1314 
1315 static rsmresource_t *
1316 rsmresource_lookup(minor_t rnum, int lock)
1317 {
1318 	int i, j;
1319 	rsmresource_blk_t *blk;
1320 	rsmresource_t *p;
1321 
1322 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1323 	    "rsmresource_lookup enter\n"));
1324 
1325 	/* Find resource and lock it in READER mode */
1326 	/* search for available resource slot */
1327 
1328 	i = (int)(rnum / RSMRC_BLKSZ);
1329 	j = (int)(rnum % RSMRC_BLKSZ);
1330 
1331 	if (i >= rsm_resource.rsmrc_len) {
1332 		DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1333 		    "rsmresource_lookup done\n"));
1334 		return (NULL);
1335 	}
1336 
1337 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1338 
1339 	blk = rsm_resource.rsmrc_root[i];
1340 	if (blk != NULL) {
1341 		ASSERT(i < rsm_resource.rsmrc_len);
1342 		ASSERT(i < rsm_resource.rsmrc_sz);
1343 
1344 		p = blk->rsmrcblk_blks[j];
1345 		if (lock == RSM_LOCK) {
1346 			if (p != RSMRC_RESERVED) {
1347 				mutex_enter(&p->rsmrc_lock);
1348 			} else {
1349 				p = NULL;
1350 			}
1351 		}
1352 	} else {
1353 		p = NULL;
1354 	}
1355 	rw_exit(&rsm_resource.rsmrc_lock);
1356 
1357 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1358 	    "rsmresource_lookup done\n"));
1359 
1360 	return (p);
1361 }
1362 
1363 static void
1364 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1365 {
1366 	/* Find resource and lock it in READER mode */
1367 	/* Caller can upgrade if need be */
1368 	/* search for available resource slot */
1369 	int i, j;
1370 	rsmresource_blk_t *blk;
1371 
1372 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1373 	    "rsmresource_insert enter\n"));
1374 
1375 	i = (int)(rnum / RSMRC_BLKSZ);
1376 	j = (int)(rnum % RSMRC_BLKSZ);
1377 
1378 	p->rsmrc_type = type;
1379 	p->rsmrc_num = rnum;
1380 
1381 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1382 
1383 	ASSERT(rsm_resource.rsmrc_root);
1384 	ASSERT(i < rsm_resource.rsmrc_len);
1385 	ASSERT(i < rsm_resource.rsmrc_sz);
1386 
1387 	blk = rsm_resource.rsmrc_root[i];
1388 	ASSERT(blk);
1389 
1390 	ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1391 
1392 	blk->rsmrcblk_blks[j] = p;
1393 
1394 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1395 	    "rsmresource_insert done\n"));
1396 
1397 	rw_exit(&rsm_resource.rsmrc_lock);
1398 }
1399 
1400 static void
1401 rsmresource_destroy()
1402 {
1403 	int i, j;
1404 
1405 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1406 	    "rsmresource_destroy enter\n"));
1407 
1408 	rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1409 
1410 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1411 		rsmresource_blk_t	*blk;
1412 
1413 		blk = rsm_resource.rsmrc_root[i];
1414 		if (blk == NULL) {
1415 			continue;
1416 		}
1417 		for (j = 0; j < RSMRC_BLKSZ; j++) {
1418 			if (blk->rsmrcblk_blks[j] != NULL) {
1419 				DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1420 				    "Not null slot %d, %lx\n", j,
1421 				    (size_t)blk->rsmrcblk_blks[j]));
1422 			}
1423 		}
1424 		kmem_free(blk, sizeof (*blk));
1425 		rsm_resource.rsmrc_root[i] = NULL;
1426 	}
1427 	if (rsm_resource.rsmrc_root) {
1428 		i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1429 		kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1430 		rsm_resource.rsmrc_root = NULL;
1431 		rsm_resource.rsmrc_len = 0;
1432 		rsm_resource.rsmrc_sz = 0;
1433 	}
1434 
1435 	DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1436 	    "rsmresource_destroy done\n"));
1437 
1438 	rw_exit(&rsm_resource.rsmrc_lock);
1439 }
1440 
1441 
1442 /* ******************** Generic Key Hash Table Management ********* */
1443 static rsmresource_t *
1444 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1445     rsm_resource_state_t state)
1446 {
1447 	rsmresource_t	*p;
1448 	uint_t		hashval;
1449 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1450 
1451 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1452 
1453 	hashval = rsmhash(key);
1454 
1455 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1456 	    key, hashval));
1457 
1458 	rw_enter(&rhash->rsmhash_rw, RW_READER);
1459 
1460 	p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1461 
1462 	for (; p; p = p->rsmrc_next) {
1463 		if (p->rsmrc_key == key) {
1464 			/* acquire resource lock */
1465 			RSMRC_LOCK(p);
1466 			break;
1467 		}
1468 	}
1469 
1470 	rw_exit(&rhash->rsmhash_rw);
1471 
1472 	if (p != NULL && p->rsmrc_state != state) {
1473 		/* state changed, release lock and return null */
1474 		RSMRC_UNLOCK(p);
1475 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1476 		    "rsmhash_lookup done: state changed\n"));
1477 		return (NULL);
1478 	}
1479 
1480 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1481 
1482 	return (p);
1483 }
1484 
1485 static void
1486 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1487 {
1488 	rsmresource_t		*p, **back;
1489 	uint_t			hashval;
1490 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1491 
1492 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1493 
1494 	hashval = rsmhash(rcelm->rsmrc_key);
1495 
1496 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1497 	    rcelm->rsmrc_key, hashval));
1498 
1499 	/*
1500 	 * It's ok not to find the segment.
1501 	 */
1502 	rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1503 
1504 	back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1505 
1506 	for (; (p = *back) != NULL;  back = &p->rsmrc_next) {
1507 		if (p == rcelm) {
1508 			*back = rcelm->rsmrc_next;
1509 			break;
1510 		}
1511 	}
1512 
1513 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1514 
1515 	rw_exit(&rhash->rsmhash_rw);
1516 }
1517 
1518 static int
1519 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1520     int dup_check, rsm_resource_state_t state)
1521 {
1522 	rsmresource_t	*p = NULL, **bktp;
1523 	uint_t		hashval;
1524 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1525 
1526 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1527 
1528 	/* lock table */
1529 	rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1530 
1531 	/*
1532 	 * If the current resource state is other than the state passed in
1533 	 * then the resource is (probably) already on the list. eg. for an
1534 	 * import segment if the state is not RSM_STATE_NEW then it's on the
1535 	 * list already.
1536 	 */
1537 	RSMRC_LOCK(new);
1538 	if (new->rsmrc_state != state) {
1539 		RSMRC_UNLOCK(new);
1540 		rw_exit(&rhash->rsmhash_rw);
1541 		return (RSMERR_BAD_SEG_HNDL);
1542 	}
1543 
1544 	hashval = rsmhash(key);
1545 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1546 
1547 	if (dup_check) {
1548 		/*
1549 		 * Used for checking export segments; don't want to have
1550 		 * the same key used for multiple segments.
1551 		 */
1552 
1553 		p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1554 
1555 		for (; p; p = p->rsmrc_next) {
1556 			if (p->rsmrc_key == key) {
1557 				RSMRC_UNLOCK(new);
1558 				break;
1559 			}
1560 		}
1561 	}
1562 
1563 	if (p == NULL) {
1564 		/* Key doesn't exist, add it */
1565 
1566 		bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1567 
1568 		new->rsmrc_key = key;
1569 		new->rsmrc_next = *bktp;
1570 		*bktp = new;
1571 	}
1572 
1573 	rw_exit(&rhash->rsmhash_rw);
1574 
1575 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1576 
1577 	return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1578 }
1579 
1580 /*
1581  * XOR each byte of the key.
1582  */
1583 static uint_t
1584 rsmhash(rsm_memseg_id_t key)
1585 {
1586 	uint_t	hash = key;
1587 
1588 	hash ^=  (key >> 8);
1589 	hash ^=  (key >> 16);
1590 	hash ^=  (key >> 24);
1591 
1592 	return (hash % rsm_hash_size);
1593 
1594 }
1595 
1596 /*
1597  * generic function to get a specific bucket
1598  */
1599 static void *
1600 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1601 {
1602 
1603 	if (rhash->bucket == NULL)
1604 		return (NULL);
1605 	else
1606 		return ((void *)rhash->bucket[hashval]);
1607 }
1608 
1609 /*
1610  * generic function to get a specific bucket's address
1611  */
1612 static void **
1613 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1614 {
1615 	if (rhash->bucket == NULL)
1616 		return (NULL);
1617 	else
1618 		return ((void **)&(rhash->bucket[hashval]));
1619 }
1620 
1621 /*
1622  * generic function to alloc a hash table
1623  */
1624 static void
1625 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1626 {
1627 	rhash->bucket = (rsmresource_t **)
1628 	    kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1629 }
1630 
1631 /*
1632  * generic function to free a hash table
1633  */
1634 static void
1635 rsmhash_free(rsmhash_table_t *rhash, int size)
1636 {
1637 
1638 	kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1639 	rhash->bucket = NULL;
1640 
1641 }
1642 /* *********************** Exported Segment Key Management ************ */
1643 
1644 #define	rsmexport_add(new, key)		\
1645 	rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1646 	    RSM_STATE_BIND)
1647 
1648 #define	rsmexport_rm(arg)	\
1649 	rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1650 
1651 #define	rsmexport_lookup(key)	\
1652 	(rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1653 
1654 /* ************************** Import Segment List Management ********** */
1655 
1656 /*
1657  *  Add segment to import list. This will be useful for paging and loopback
1658  * segment unloading.
1659  */
1660 #define	rsmimport_add(arg, key)	\
1661 	rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1662 	    RSM_STATE_NEW)
1663 
1664 #define	rsmimport_rm(arg)	\
1665 	rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1666 
1667 /*
1668  *	#define	rsmimport_lookup(key)	\
1669  *	(rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1670  */
1671 
1672 /*
1673  * increase the ref count and make the import segment point to the
1674  * shared data structure. Return a pointer to the share data struct
1675  * and the shared data struct is locked upon return
1676  */
1677 static rsm_import_share_t *
1678 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1679     rsmseg_t *segp)
1680 {
1681 	uint_t		hash;
1682 	rsmresource_t		*p;
1683 	rsm_import_share_t	*shdatap;
1684 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1685 
1686 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1687 
1688 	hash = rsmhash(key);
1689 	/* lock table */
1690 	rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1691 	DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1692 	    key, hash));
1693 
1694 	p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1695 
1696 	for (; p; p = p->rsmrc_next) {
1697 		/*
1698 		 * Look for an entry that is importing the same exporter
1699 		 * with the share data structure allocated.
1700 		 */
1701 		if ((p->rsmrc_key == key) &&
1702 		    (p->rsmrc_node == node) &&
1703 		    (p->rsmrc_adapter == adapter) &&
1704 		    (((rsmseg_t *)p)->s_share != NULL)) {
1705 			shdatap = ((rsmseg_t *)p)->s_share;
1706 			break;
1707 		}
1708 	}
1709 
1710 	if (p == NULL) {
1711 		/* we are the first importer, create the shared data struct */
1712 		shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1713 		shdatap->rsmsi_state = RSMSI_STATE_NEW;
1714 		shdatap->rsmsi_segid = key;
1715 		shdatap->rsmsi_node = node;
1716 		mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1717 		cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1718 	}
1719 
1720 	rsmseglock_acquire(segp);
1721 
1722 	/* we grab the shared lock before returning from this function */
1723 	mutex_enter(&shdatap->rsmsi_lock);
1724 
1725 	shdatap->rsmsi_refcnt++;
1726 	segp->s_share = shdatap;
1727 
1728 	rsmseglock_release(segp);
1729 
1730 	rw_exit(&rsm_import_segs.rsmhash_rw);
1731 
1732 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1733 
1734 	return (shdatap);
1735 }
1736 
1737 /*
1738  * the shared data structure should be locked before calling
1739  * rsmsharecv_signal().
1740  * Change the state and signal any waiting segments.
1741  */
1742 void
1743 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1744 {
1745 	ASSERT(rsmsharelock_held(seg));
1746 
1747 	if (seg->s_share->rsmsi_state == oldstate) {
1748 		seg->s_share->rsmsi_state = newstate;
1749 		cv_broadcast(&seg->s_share->rsmsi_cv);
1750 	}
1751 }
1752 
1753 /*
1754  * Add to the hash table
1755  */
1756 static void
1757 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1758     void *cookie)
1759 {
1760 
1761 	importing_token_t	*head;
1762 	importing_token_t	*new_token;
1763 	int			index;
1764 
1765 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1766 
1767 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1768 
1769 	new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1770 	new_token->importing_node = node;
1771 	new_token->key = key;
1772 	new_token->import_segment_cookie = cookie;
1773 	new_token->importing_adapter_hwaddr = hwaddr;
1774 
1775 	index = rsmhash(key);
1776 
1777 	mutex_enter(&importer_list.lock);
1778 
1779 	head = importer_list.bucket[index];
1780 	importer_list.bucket[index] = new_token;
1781 	new_token->next = head;
1782 	mutex_exit(&importer_list.lock);
1783 
1784 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1785 }
1786 
1787 static void
1788 importer_list_rm(rsm_node_id_t node,  rsm_memseg_id_t key, void *cookie)
1789 {
1790 
1791 	importing_token_t	*prev, *token = NULL;
1792 	int			index;
1793 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1794 
1795 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1796 
1797 	index = rsmhash(key);
1798 
1799 	mutex_enter(&importer_list.lock);
1800 
1801 	token = importer_list.bucket[index];
1802 
1803 	prev = token;
1804 	while (token != NULL) {
1805 		if (token->importing_node == node &&
1806 		    token->import_segment_cookie == cookie) {
1807 			if (prev == token)
1808 				importer_list.bucket[index] = token->next;
1809 			else
1810 				prev->next = token->next;
1811 			kmem_free((void *)token, sizeof (*token));
1812 			break;
1813 		} else {
1814 			prev = token;
1815 			token = token->next;
1816 		}
1817 	}
1818 
1819 	mutex_exit(&importer_list.lock);
1820 
1821 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1822 
1823 
1824 }
1825 
1826 /* **************************Segment Structure Management ************* */
1827 
1828 /*
1829  * Free segment structure
1830  */
1831 static void
1832 rsmseg_free(rsmseg_t *seg)
1833 {
1834 
1835 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1836 
1837 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1838 
1839 	/* need to take seglock here to avoid race with rsmmap_unmap() */
1840 	rsmseglock_acquire(seg);
1841 	if (seg->s_ckl != NULL) {
1842 		/* Segment is still busy */
1843 		seg->s_state = RSM_STATE_END;
1844 		rsmseglock_release(seg);
1845 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1846 		    "rsmseg_free done\n"));
1847 		return;
1848 	}
1849 
1850 	rsmseglock_release(seg);
1851 
1852 	ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1853 
1854 	/*
1855 	 * If it's an importer decrement the refcount
1856 	 * and if its down to zero free the shared data structure.
1857 	 * This is where failures during rsm_connect() are unrefcounted
1858 	 */
1859 	if (seg->s_share != NULL) {
1860 
1861 		ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1862 
1863 		rsmsharelock_acquire(seg);
1864 
1865 		ASSERT(seg->s_share->rsmsi_refcnt > 0);
1866 
1867 		seg->s_share->rsmsi_refcnt--;
1868 
1869 		if (seg->s_share->rsmsi_refcnt == 0) {
1870 			rsmsharelock_release(seg);
1871 			mutex_destroy(&seg->s_share->rsmsi_lock);
1872 			cv_destroy(&seg->s_share->rsmsi_cv);
1873 			kmem_free((void *)(seg->s_share),
1874 			    sizeof (rsm_import_share_t));
1875 		} else {
1876 			rsmsharelock_release(seg);
1877 		}
1878 		/*
1879 		 * The following needs to be done after any
1880 		 * rsmsharelock calls which use seg->s_share.
1881 		 */
1882 		seg->s_share = NULL;
1883 	}
1884 
1885 	cv_destroy(&seg->s_cv);
1886 	mutex_destroy(&seg->s_lock);
1887 	rsmacl_free(seg->s_acl, seg->s_acl_len);
1888 	rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1889 	if (seg->s_adapter)
1890 		rsmka_release_adapter(seg->s_adapter);
1891 
1892 	kmem_free((void *)seg, sizeof (*seg));
1893 
1894 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1895 
1896 }
1897 
1898 
1899 static rsmseg_t *
1900 rsmseg_alloc(minor_t num, struct cred *cred)
1901 {
1902 	rsmseg_t	*new;
1903 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1904 
1905 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1906 	/*
1907 	 * allocate memory for new segment. This should be a segkmem cache.
1908 	 */
1909 	new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1910 
1911 	new->s_state = RSM_STATE_NEW;
1912 	new->s_minor	= num;
1913 	new->s_acl_len	= 0;
1914 	new->s_cookie = NULL;
1915 	new->s_adapter = NULL;
1916 
1917 	new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1918 	/* we don't have a key yet, will set at export/connect */
1919 	new->s_uid  = crgetuid(cred);
1920 	new->s_gid  = crgetgid(cred);
1921 
1922 	mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1923 	cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1924 
1925 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1926 
1927 	return (new);
1928 }
1929 
1930 /* ******************************** Driver Open/Close/Poll *************** */
1931 
1932 /*ARGSUSED1*/
1933 static int
1934 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1935 {
1936 	minor_t rnum;
1937 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1938 
1939 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1940 	/*
1941 	 * Char only
1942 	 */
1943 	if (otyp != OTYP_CHR) {
1944 		DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1945 		return (EINVAL);
1946 	}
1947 
1948 	/*
1949 	 * Only zero can be opened, clones are used for resources.
1950 	 */
1951 	if (getminor(*devp) != RSM_DRIVER_MINOR) {
1952 		DBG_PRINTF((category, RSM_ERR,
1953 		    "rsm_open: bad minor %d\n", getminor(*devp)));
1954 		return (ENODEV);
1955 	}
1956 
1957 	if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1958 		DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1959 		return (EPERM);
1960 	}
1961 
1962 	if (!(flag & FWRITE)) {
1963 		/*
1964 		 * The library function _rsm_librsm_init calls open for
1965 		 * /dev/rsm with flag set to O_RDONLY.  We want a valid
1966 		 * file descriptor to be returned for minor device zero.
1967 		 */
1968 
1969 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1970 		    "rsm_open RDONLY done\n"));
1971 		return (DDI_SUCCESS);
1972 	}
1973 
1974 	/*
1975 	 * - allocate new minor number and segment.
1976 	 * - add segment to list of all segments.
1977 	 * - set minordev data to segment
1978 	 * - update devp argument to new device
1979 	 * - update s_cred to cred; make sure you do crhold(cred);
1980 	 */
1981 
1982 	/* allocate a new resource number */
1983 	if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1984 		/*
1985 		 * We will bind this minor to a specific resource in first
1986 		 * ioctl
1987 		 */
1988 		*devp = makedevice(getmajor(*devp), rnum);
1989 	} else {
1990 		return (EAGAIN);
1991 	}
1992 
1993 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1994 	return (DDI_SUCCESS);
1995 }
1996 
1997 static void
1998 rsmseg_close(rsmseg_t *seg, int force_flag)
1999 {
2000 	int e = RSM_SUCCESS;
2001 
2002 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2003 
2004 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2005 
2006 	rsmseglock_acquire(seg);
2007 	if (!force_flag && (seg->s_hdr.rsmrc_type ==
2008 	    RSM_RESOURCE_EXPORT_SEGMENT)) {
2009 		/*
2010 		 * If we are processing rsm_close wait for force_destroy
2011 		 * processing to complete since force_destroy processing
2012 		 * needs to finish first before we can free the segment.
2013 		 * force_destroy is only for export segments
2014 		 */
2015 		while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2016 			cv_wait(&seg->s_cv, &seg->s_lock);
2017 		}
2018 	}
2019 	rsmseglock_release(seg);
2020 
2021 	/* It's ok to read the state without a lock */
2022 	switch (seg->s_state) {
2023 	case RSM_STATE_EXPORT:
2024 	case RSM_STATE_EXPORT_QUIESCING:
2025 	case RSM_STATE_EXPORT_QUIESCED:
2026 		e = rsm_unpublish(seg, 1);
2027 		/* FALLTHRU */
2028 	case RSM_STATE_BIND_QUIESCED:
2029 		/* FALLTHRU */
2030 	case RSM_STATE_BIND:
2031 		e = rsm_unbind(seg);
2032 		if (e != RSM_SUCCESS && force_flag == 1)
2033 			return;
2034 		ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2035 		/* FALLTHRU */
2036 	case RSM_STATE_NEW_QUIESCED:
2037 		rsmseglock_acquire(seg);
2038 		seg->s_state = RSM_STATE_NEW;
2039 		cv_broadcast(&seg->s_cv);
2040 		rsmseglock_release(seg);
2041 		break;
2042 	case RSM_STATE_NEW:
2043 		break;
2044 	case RSM_STATE_ZOMBIE:
2045 		/*
2046 		 * Segments in this state have been removed off the
2047 		 * exported segments list and have been unpublished
2048 		 * and unbind. These segments have been removed during
2049 		 * a callback to the rsm_export_force_destroy, which
2050 		 * is called for the purpose of unlocking these
2051 		 * exported memory segments when a process exits but
2052 		 * leaves the segments locked down since rsm_close is
2053 		 * is not called for the segments. This can happen
2054 		 * when a process calls fork or exec and then exits.
2055 		 * Once the segments are in the ZOMBIE state, all that
2056 		 * remains is to destroy them when rsm_close is called.
2057 		 * This is done here. Thus, for such segments the
2058 		 * the state is changed to new so that later in this
2059 		 * function rsmseg_free is called.
2060 		 */
2061 		rsmseglock_acquire(seg);
2062 		seg->s_state = RSM_STATE_NEW;
2063 		rsmseglock_release(seg);
2064 		break;
2065 	case RSM_STATE_MAP_QUIESCE:
2066 	case RSM_STATE_ACTIVE:
2067 		/* Disconnect will handle the unmap */
2068 	case RSM_STATE_CONN_QUIESCE:
2069 	case RSM_STATE_CONNECT:
2070 	case RSM_STATE_DISCONNECT:
2071 		ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2072 		(void) rsm_disconnect(seg);
2073 		break;
2074 	case RSM_STATE_MAPPING:
2075 		/*FALLTHRU*/
2076 	case RSM_STATE_END:
2077 		DBG_PRINTF((category, RSM_ERR,
2078 		    "Invalid segment state %d in rsm_close\n", seg->s_state));
2079 		break;
2080 	default:
2081 		DBG_PRINTF((category, RSM_ERR,
2082 		    "Invalid segment state %d in rsm_close\n", seg->s_state));
2083 		break;
2084 	}
2085 
2086 	/*
2087 	 * check state.
2088 	 * - make sure you do crfree(s_cred);
2089 	 * release segment and minor number
2090 	 */
2091 	ASSERT(seg->s_state == RSM_STATE_NEW);
2092 
2093 	/*
2094 	 * The export_force_destroy callback is created to unlock
2095 	 * the exported segments of a process
2096 	 * when the process does a fork or exec and then exits calls this
2097 	 * function with the force flag set to 1 which indicates that the
2098 	 * segment state must be converted to ZOMBIE. This state means that the
2099 	 * segments still exist and have been unlocked and most importantly the
2100 	 * only operation allowed is to destroy them on an rsm_close.
2101 	 */
2102 	if (force_flag) {
2103 		rsmseglock_acquire(seg);
2104 		seg->s_state = RSM_STATE_ZOMBIE;
2105 		rsmseglock_release(seg);
2106 	} else {
2107 		rsmseg_free(seg);
2108 	}
2109 
2110 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2111 }
2112 
2113 static int
2114 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2115 {
2116 	minor_t	rnum = getminor(dev);
2117 	rsmresource_t *res;
2118 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2119 
2120 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2121 
2122 	flag = flag; cred = cred;
2123 
2124 	if (otyp != OTYP_CHR)
2125 		return (EINVAL);
2126 
2127 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2128 
2129 	/*
2130 	 * At this point we are the last reference to the resource.
2131 	 * Free resource number from resource table.
2132 	 * It's ok to remove number before we free the segment.
2133 	 * We need to lock the resource to protect against remote calls.
2134 	 */
2135 	if (rnum == RSM_DRIVER_MINOR ||
2136 	    (res = rsmresource_free(rnum)) == NULL) {
2137 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2138 		return (DDI_SUCCESS);
2139 	}
2140 
2141 	switch (res->rsmrc_type) {
2142 	case RSM_RESOURCE_EXPORT_SEGMENT:
2143 	case RSM_RESOURCE_IMPORT_SEGMENT:
2144 		rsmseg_close((rsmseg_t *)res, 0);
2145 		break;
2146 	case RSM_RESOURCE_BAR:
2147 		DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2148 		break;
2149 	default:
2150 		break;
2151 	}
2152 
2153 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2154 
2155 	return (DDI_SUCCESS);
2156 }
2157 
2158 /*
2159  * rsm_inc_pgcnt
2160  *
2161  * Description: increment rsm page counter.
2162  *
2163  * Parameters:	pgcnt_t	pnum;	number of pages to be used
2164  *
2165  * Returns:	RSM_SUCCESS	if memory limit not exceeded
2166  *		ENOSPC		if memory limit exceeded. In this case, the
2167  *				page counter remains unchanged.
2168  *
2169  */
2170 static int
2171 rsm_inc_pgcnt(pgcnt_t pnum)
2172 {
2173 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2174 	if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2175 		return (RSM_SUCCESS);
2176 	}
2177 
2178 	mutex_enter(&rsm_pgcnt_lock);
2179 
2180 	if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2181 		/* ensure that limits have not been exceeded */
2182 		mutex_exit(&rsm_pgcnt_lock);
2183 		return (RSMERR_INSUFFICIENT_MEM);
2184 	}
2185 
2186 	rsm_pgcnt += pnum;
2187 	DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2188 	    rsm_pgcnt));
2189 	mutex_exit(&rsm_pgcnt_lock);
2190 
2191 	return (RSM_SUCCESS);
2192 }
2193 
2194 /*
2195  * rsm_dec_pgcnt
2196  *
2197  * Description:	decrement rsm page counter.
2198  *
2199  * Parameters:	pgcnt_t	pnum;	number of pages freed
2200  *
2201  */
2202 static void
2203 rsm_dec_pgcnt(pgcnt_t pnum)
2204 {
2205 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2206 
2207 	if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2208 		return;
2209 	}
2210 
2211 	mutex_enter(&rsm_pgcnt_lock);
2212 	ASSERT(rsm_pgcnt >= pnum);
2213 	rsm_pgcnt -= pnum;
2214 	DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2215 	    rsm_pgcnt));
2216 	mutex_exit(&rsm_pgcnt_lock);
2217 }
2218 
2219 static struct umem_callback_ops rsm_as_ops = {
2220 	UMEM_CALLBACK_VERSION, /* version number */
2221 	rsm_export_force_destroy,
2222 };
2223 
2224 static int
2225 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2226     proc_t *procp)
2227 {
2228 	int error = RSM_SUCCESS;
2229 	ulong_t pnum;
2230 	struct umem_callback_ops *callbackops = &rsm_as_ops;
2231 
2232 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2233 
2234 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2235 
2236 	/*
2237 	 * Make sure vaddr and len are aligned on a page boundary
2238 	 */
2239 	if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2240 		return (RSMERR_BAD_ADDR);
2241 	}
2242 
2243 	if (len & (PAGESIZE - 1)) {
2244 		return (RSMERR_BAD_LENGTH);
2245 	}
2246 
2247 	/*
2248 	 * Find number of pages
2249 	 */
2250 	pnum = btopr(len);
2251 	error = rsm_inc_pgcnt(pnum);
2252 	if (error != RSM_SUCCESS) {
2253 		DBG_PRINTF((category, RSM_ERR,
2254 		    "rsm_bind_pages:mem limit exceeded\n"));
2255 		return (RSMERR_INSUFFICIENT_MEM);
2256 	}
2257 
2258 	error = umem_lockmemory(vaddr, len,
2259 	    DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2260 	    cookie,
2261 	    callbackops, procp);
2262 
2263 	if (error) {
2264 		rsm_dec_pgcnt(pnum);
2265 		DBG_PRINTF((category, RSM_ERR,
2266 		    "rsm_bind_pages:ddi_umem_lock failed\n"));
2267 		/*
2268 		 * ddi_umem_lock, in the case of failure, returns one of
2269 		 * the following three errors. These are translated into
2270 		 * the RSMERR namespace and returned.
2271 		 */
2272 		if (error == EFAULT)
2273 			return (RSMERR_BAD_ADDR);
2274 		else if (error == EACCES)
2275 			return (RSMERR_PERM_DENIED);
2276 		else
2277 			return (RSMERR_INSUFFICIENT_MEM);
2278 	}
2279 
2280 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2281 
2282 	return (error);
2283 
2284 }
2285 
2286 static int
2287 rsm_unbind_pages(rsmseg_t *seg)
2288 {
2289 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2290 
2291 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2292 
2293 	ASSERT(rsmseglock_held(seg));
2294 
2295 	if (seg->s_cookie != NULL) {
2296 		/* unlock address range */
2297 		ddi_umem_unlock(seg->s_cookie);
2298 		rsm_dec_pgcnt(btopr(seg->s_len));
2299 		seg->s_cookie = NULL;
2300 	}
2301 
2302 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2303 
2304 	return (RSM_SUCCESS);
2305 }
2306 
2307 
2308 static int
2309 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2310 {
2311 	int e;
2312 	adapter_t *adapter;
2313 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2314 
2315 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2316 
2317 	adapter = rsm_getadapter(msg, mode);
2318 	if (adapter == NULL) {
2319 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2320 		    "rsm_bind done:no adapter\n"));
2321 		return (RSMERR_CTLR_NOT_PRESENT);
2322 	}
2323 
2324 	/* lock address range */
2325 	if (msg->vaddr == NULL) {
2326 		rsmka_release_adapter(adapter);
2327 		DBG_PRINTF((category, RSM_ERR,
2328 		    "rsm: rsm_bind done: invalid vaddr\n"));
2329 		return (RSMERR_BAD_ADDR);
2330 	}
2331 	if (msg->len <= 0) {
2332 		rsmka_release_adapter(adapter);
2333 		DBG_PRINTF((category, RSM_ERR,
2334 		    "rsm_bind: invalid length\n"));
2335 		return (RSMERR_BAD_LENGTH);
2336 	}
2337 
2338 	/* Lock segment */
2339 	rsmseglock_acquire(seg);
2340 
2341 	while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2342 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2343 			DBG_PRINTF((category, RSM_DEBUG,
2344 			    "rsm_bind done: cv_wait INTERRUPTED"));
2345 			rsmka_release_adapter(adapter);
2346 			rsmseglock_release(seg);
2347 			return (RSMERR_INTERRUPTED);
2348 		}
2349 	}
2350 
2351 	ASSERT(seg->s_state == RSM_STATE_NEW);
2352 
2353 	ASSERT(seg->s_cookie == NULL);
2354 
2355 	e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2356 	if (e == RSM_SUCCESS) {
2357 		seg->s_flags |= RSM_USER_MEMORY;
2358 		if (msg->perm & RSM_ALLOW_REBIND) {
2359 			seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2360 		}
2361 		if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2362 			seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2363 		}
2364 		seg->s_region.r_vaddr = msg->vaddr;
2365 		/*
2366 		 * Set the s_pid value in the segment structure. This is used
2367 		 * to identify exported segments belonging to a particular
2368 		 * process so that when the process exits, these segments can
2369 		 * be unlocked forcefully even if rsm_close is not called on
2370 		 * process exit since there maybe other processes referencing
2371 		 * them (for example on a fork or exec).
2372 		 * The s_pid value is also used to authenticate the process
2373 		 * doing a publish or unpublish on the export segment. Only
2374 		 * the creator of the export segment has a right to do a
2375 		 * publish or unpublish and unbind on the segment.
2376 		 */
2377 		seg->s_pid = ddi_get_pid();
2378 		seg->s_len = msg->len;
2379 		seg->s_state = RSM_STATE_BIND;
2380 		seg->s_adapter = adapter;
2381 		seg->s_proc = curproc;
2382 	} else {
2383 		rsmka_release_adapter(adapter);
2384 		DBG_PRINTF((category, RSM_WARNING,
2385 		    "unable to lock down pages\n"));
2386 	}
2387 
2388 	msg->rnum = seg->s_minor;
2389 	/* Unlock segment */
2390 	rsmseglock_release(seg);
2391 
2392 	if (e == RSM_SUCCESS) {
2393 		/* copyout the resource number */
2394 #ifdef _MULTI_DATAMODEL
2395 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2396 			rsm_ioctlmsg32_t msg32;
2397 
2398 			msg32.rnum = msg->rnum;
2399 			if (ddi_copyout((caddr_t)&msg32.rnum,
2400 			    (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2401 			    sizeof (minor_t), mode)) {
2402 				rsmka_release_adapter(adapter);
2403 				e = RSMERR_BAD_ADDR;
2404 			}
2405 		}
2406 #endif
2407 		if (ddi_copyout((caddr_t)&msg->rnum,
2408 		    (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2409 		    sizeof (minor_t), mode)) {
2410 			rsmka_release_adapter(adapter);
2411 			e = RSMERR_BAD_ADDR;
2412 		}
2413 	}
2414 
2415 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2416 
2417 	return (e);
2418 }
2419 
2420 static void
2421 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2422     rsm_memseg_id_t ex_segid,
2423     ddi_umem_cookie_t cookie)
2424 
2425 {
2426 	rsmresource_t	*p = NULL;
2427 	rsmhash_table_t *rhash = &rsm_import_segs;
2428 	uint_t		index;
2429 
2430 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2431 	    "rsm_remap_local_importers enter\n"));
2432 
2433 	index = rsmhash(ex_segid);
2434 
2435 	rw_enter(&rhash->rsmhash_rw, RW_READER);
2436 
2437 	p = rsmhash_getbkt(rhash, index);
2438 
2439 	for (; p; p = p->rsmrc_next) {
2440 		rsmseg_t *seg = (rsmseg_t *)p;
2441 		rsmseglock_acquire(seg);
2442 		/*
2443 		 * Change the s_cookie value of only the local importers
2444 		 * which have been mapped (in state RSM_STATE_ACTIVE).
2445 		 * Note that there is no need to change the s_cookie value
2446 		 * if the imported segment is in RSM_STATE_MAPPING since
2447 		 * eventually the s_cookie will be updated via the mapping
2448 		 * functionality.
2449 		 */
2450 		if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2451 		    (seg->s_state == RSM_STATE_ACTIVE)) {
2452 			seg->s_cookie = cookie;
2453 		}
2454 		rsmseglock_release(seg);
2455 	}
2456 	rw_exit(&rhash->rsmhash_rw);
2457 
2458 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2459 	    "rsm_remap_local_importers done\n"));
2460 }
2461 
2462 static int
2463 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2464 {
2465 	int e;
2466 	adapter_t *adapter;
2467 	ddi_umem_cookie_t cookie;
2468 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2469 
2470 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2471 
2472 	/* Check for permissions to rebind */
2473 	if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2474 		return (RSMERR_REBIND_NOT_ALLOWED);
2475 	}
2476 
2477 	if (seg->s_pid != ddi_get_pid() &&
2478 	    ddi_get_pid() != 0) {
2479 		DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2480 		return (RSMERR_NOT_CREATOR);
2481 	}
2482 
2483 	/*
2484 	 * We will not be allowing partial rebind and hence length passed
2485 	 * in must be same as segment length
2486 	 */
2487 	if (msg->vaddr == NULL) {
2488 		DBG_PRINTF((category, RSM_ERR,
2489 		    "rsm_rebind done: null msg->vaddr\n"));
2490 		return (RSMERR_BAD_ADDR);
2491 	}
2492 	if (msg->len != seg->s_len) {
2493 		DBG_PRINTF((category, RSM_ERR,
2494 		    "rsm_rebind: invalid length\n"));
2495 		return (RSMERR_BAD_LENGTH);
2496 	}
2497 
2498 	/* Lock segment */
2499 	rsmseglock_acquire(seg);
2500 
2501 	while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2502 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2503 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2504 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2505 			rsmseglock_release(seg);
2506 			DBG_PRINTF((category, RSM_DEBUG,
2507 			    "rsm_rebind done: cv_wait INTERRUPTED"));
2508 			return (RSMERR_INTERRUPTED);
2509 		}
2510 	}
2511 
2512 	/* verify segment state */
2513 	if ((seg->s_state != RSM_STATE_BIND) &&
2514 	    (seg->s_state != RSM_STATE_EXPORT)) {
2515 		/* Unlock segment */
2516 		rsmseglock_release(seg);
2517 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2518 		    "rsm_rebind done: invalid state\n"));
2519 		return (RSMERR_BAD_SEG_HNDL);
2520 	}
2521 
2522 	ASSERT(seg->s_cookie != NULL);
2523 
2524 	if (msg->vaddr == seg->s_region.r_vaddr) {
2525 		rsmseglock_release(seg);
2526 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2527 		return (RSM_SUCCESS);
2528 	}
2529 
2530 	e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2531 	if (e == RSM_SUCCESS) {
2532 		struct buf *xbuf;
2533 		dev_t sdev = 0;
2534 		rsm_memory_local_t mem;
2535 
2536 		xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2537 		    sdev, 0, NULL, DDI_UMEM_SLEEP);
2538 		ASSERT(xbuf != NULL);
2539 
2540 		mem.ms_type = RSM_MEM_BUF;
2541 		mem.ms_bp = xbuf;
2542 
2543 		adapter = seg->s_adapter;
2544 		e = adapter->rsmpi_ops->rsm_rebind(
2545 		    seg->s_handle.out, 0, &mem,
2546 		    RSM_RESOURCE_DONTWAIT, NULL);
2547 
2548 		if (e == RSM_SUCCESS) {
2549 			/*
2550 			 * unbind the older pages, and unload local importers;
2551 			 * but don't disconnect importers
2552 			 */
2553 			(void) rsm_unbind_pages(seg);
2554 			seg->s_cookie = cookie;
2555 			seg->s_region.r_vaddr = msg->vaddr;
2556 			rsm_remap_local_importers(my_nodeid, seg->s_segid,
2557 			    cookie);
2558 		} else {
2559 			/*
2560 			 * Unbind the pages associated with "cookie" by the
2561 			 * rsm_bind_pages calls prior to this. This is
2562 			 * similar to what is done in the rsm_unbind_pages
2563 			 * routine for the seg->s_cookie.
2564 			 */
2565 			ddi_umem_unlock(cookie);
2566 			rsm_dec_pgcnt(btopr(msg->len));
2567 			DBG_PRINTF((category, RSM_ERR,
2568 			    "rsm_rebind failed with %d\n", e));
2569 		}
2570 		/*
2571 		 * At present there is no dependency on the existence of xbuf.
2572 		 * So we can free it here. If in the future this changes, it can
2573 		 * be freed sometime during the segment destroy.
2574 		 */
2575 		freerbuf(xbuf);
2576 	}
2577 
2578 	/* Unlock segment */
2579 	rsmseglock_release(seg);
2580 
2581 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2582 
2583 	return (e);
2584 }
2585 
2586 static int
2587 rsm_unbind(rsmseg_t *seg)
2588 {
2589 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2590 
2591 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2592 
2593 	rsmseglock_acquire(seg);
2594 
2595 	/* verify segment state */
2596 	if ((seg->s_state != RSM_STATE_BIND) &&
2597 	    (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2598 		rsmseglock_release(seg);
2599 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2600 		    "rsm_unbind: invalid state\n"));
2601 		return (RSMERR_BAD_SEG_HNDL);
2602 	}
2603 
2604 	/* unlock current range */
2605 	(void) rsm_unbind_pages(seg);
2606 
2607 	if (seg->s_state == RSM_STATE_BIND) {
2608 		seg->s_state = RSM_STATE_NEW;
2609 	} else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2610 		seg->s_state = RSM_STATE_NEW_QUIESCED;
2611 	}
2612 
2613 	rsmseglock_release(seg);
2614 
2615 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2616 
2617 	return (RSM_SUCCESS);
2618 }
2619 
2620 /* **************************** Exporter Access List Management ******* */
2621 static void
2622 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2623 {
2624 	int	acl_sz;
2625 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2626 
2627 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2628 
2629 	/* acl could be NULL */
2630 
2631 	if (acl != NULL && acl_len > 0) {
2632 		acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2633 		kmem_free((void *)acl, acl_sz);
2634 	}
2635 
2636 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2637 }
2638 
2639 static void
2640 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2641 {
2642 	int	acl_sz;
2643 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2644 
2645 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2646 
2647 	if (acl != NULL && acl_len > 0) {
2648 		acl_sz = acl_len * sizeof (rsm_access_entry_t);
2649 		kmem_free((void *)acl, acl_sz);
2650 	}
2651 
2652 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2653 
2654 }
2655 
2656 static int
2657 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2658     rsmapi_access_entry_t **list, int *len, int loopback)
2659 {
2660 	rsmapi_access_entry_t *acl;
2661 	int	acl_len;
2662 	int i;
2663 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2664 
2665 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2666 
2667 	*len = 0;
2668 	*list = NULL;
2669 
2670 	acl_len = msg->acl_len;
2671 	if ((loopback && acl_len > 1) || (acl_len < 0) ||
2672 	    (acl_len > MAX_NODES)) {
2673 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2674 		    "rsmacl_build done: acl invalid\n"));
2675 		return (RSMERR_BAD_ACL);
2676 	}
2677 
2678 	if (acl_len > 0 && acl_len <= MAX_NODES) {
2679 		size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2680 
2681 		acl = kmem_alloc(acl_size, KM_SLEEP);
2682 
2683 		if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2684 		    acl_size, mode)) {
2685 			kmem_free((void *) acl, acl_size);
2686 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2687 			    "rsmacl_build done: BAD_ADDR\n"));
2688 			return (RSMERR_BAD_ADDR);
2689 		}
2690 
2691 		/*
2692 		 * Verify access list
2693 		 */
2694 		for (i = 0; i < acl_len; i++) {
2695 			if (acl[i].ae_node > MAX_NODES ||
2696 			    (loopback && (acl[i].ae_node != my_nodeid)) ||
2697 			    acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2698 				/* invalid entry */
2699 				kmem_free((void *) acl, acl_size);
2700 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2701 				    "rsmacl_build done: EINVAL\n"));
2702 				return (RSMERR_BAD_ACL);
2703 			}
2704 		}
2705 
2706 		*len = acl_len;
2707 		*list = acl;
2708 	}
2709 
2710 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2711 
2712 	return (DDI_SUCCESS);
2713 }
2714 
2715 static int
2716 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2717     int acl_len, adapter_t *adapter)
2718 {
2719 	rsm_access_entry_t *acl;
2720 	rsm_addr_t hwaddr;
2721 	int i;
2722 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2723 
2724 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2725 
2726 	if (src != NULL) {
2727 		size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2728 		acl = kmem_alloc(acl_size, KM_SLEEP);
2729 
2730 		/*
2731 		 * translate access list
2732 		 */
2733 		for (i = 0; i < acl_len; i++) {
2734 			if (src[i].ae_node == my_nodeid) {
2735 				acl[i].ae_addr = adapter->hwaddr;
2736 			} else {
2737 				hwaddr = get_remote_hwaddr(adapter,
2738 				    src[i].ae_node);
2739 				if ((int64_t)hwaddr < 0) {
2740 					/* invalid hwaddr */
2741 					kmem_free((void *) acl, acl_size);
2742 					DBG_PRINTF((category,
2743 					    RSM_DEBUG_VERBOSE,
2744 					    "rsmpiacl_create done:"
2745 					    "EINVAL hwaddr\n"));
2746 					return (RSMERR_INTERNAL_ERROR);
2747 				}
2748 				acl[i].ae_addr = hwaddr;
2749 			}
2750 			/* rsmpi understands only RSM_PERM_XXXX */
2751 			acl[i].ae_permission =
2752 			    src[i].ae_permission & RSM_PERM_RDWR;
2753 		}
2754 		*dest = acl;
2755 	} else {
2756 		*dest = NULL;
2757 	}
2758 
2759 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2760 
2761 	return (RSM_SUCCESS);
2762 }
2763 
2764 static int
2765 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2766     rsmipc_reply_t *reply)
2767 {
2768 
2769 	int		i;
2770 	rsmseg_t	*seg;
2771 	rsm_memseg_id_t key = req->rsmipc_key;
2772 	rsm_permission_t perm = req->rsmipc_perm;
2773 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2774 
2775 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2776 	    "rsmsegacl_validate enter\n"));
2777 
2778 	/*
2779 	 * Find segment and grab its lock. The reason why we grab the segment
2780 	 * lock in side the search is to avoid the race when the segment is
2781 	 * being deleted and we already have a pointer to it.
2782 	 */
2783 	seg = rsmexport_lookup(key);
2784 	if (!seg) {
2785 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2786 		    "rsmsegacl_validate done: %u ENXIO\n", key));
2787 		return (RSMERR_SEG_NOT_PUBLISHED);
2788 	}
2789 
2790 	ASSERT(rsmseglock_held(seg));
2791 	ASSERT(seg->s_state == RSM_STATE_EXPORT);
2792 
2793 	/*
2794 	 * We implement a 2-level protection scheme.
2795 	 * First, we check if local/remote host has access rights.
2796 	 * Second, we check if the user has access rights.
2797 	 *
2798 	 * This routine only validates the rnode access_list
2799 	 */
2800 	if (seg->s_acl_len > 0) {
2801 		/*
2802 		 * Check host access list
2803 		 */
2804 		ASSERT(seg->s_acl != NULL);
2805 		for (i = 0; i < seg->s_acl_len; i++) {
2806 			if (seg->s_acl[i].ae_node == rnode) {
2807 			    perm &= seg->s_acl[i].ae_permission;
2808 			    goto found;
2809 			}
2810 		}
2811 		/* rnode is not found in the list */
2812 		rsmseglock_release(seg);
2813 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2814 		    "rsmsegacl_validate done: EPERM\n"));
2815 		return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2816 	} else {
2817 		/* use default owner creation umask */
2818 		perm &= seg->s_mode;
2819 	}
2820 
2821 found:
2822 	/* update perm for this node */
2823 	reply->rsmipc_mode = perm;
2824 	reply->rsmipc_uid = seg->s_uid;
2825 	reply->rsmipc_gid = seg->s_gid;
2826 	reply->rsmipc_segid = seg->s_segid;
2827 	reply->rsmipc_seglen = seg->s_len;
2828 
2829 	/*
2830 	 * Perm of requesting node is valid; source will validate user
2831 	 */
2832 	rsmseglock_release(seg);
2833 
2834 	/*
2835 	 * Add the importer to the list right away, if connect fails
2836 	 * the importer will ask the exporter to remove it.
2837 	 */
2838 	importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2839 	    req->rsmipc_segment_cookie);
2840 
2841 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2842 
2843 	return (RSM_SUCCESS);
2844 }
2845 
2846 
2847 /* ************************** Exporter Calls ************************* */
2848 
2849 static int
2850 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2851 {
2852 	int			e;
2853 	int			acl_len;
2854 	rsmapi_access_entry_t	*acl;
2855 	rsm_access_entry_t	*rsmpi_acl;
2856 	rsm_memory_local_t	mem;
2857 	struct buf		*xbuf;
2858 	dev_t 			sdev = 0;
2859 	adapter_t		*adapter;
2860 	rsm_memseg_id_t		segment_id = 0;
2861 	int			loopback_flag = 0;
2862 	int			create_flags = 0;
2863 	rsm_resource_callback_t	callback_flag;
2864 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2865 
2866 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2867 
2868 	if (seg->s_adapter == &loopback_adapter)
2869 		loopback_flag = 1;
2870 
2871 	if (seg->s_pid != ddi_get_pid() &&
2872 	    ddi_get_pid() != 0) {
2873 		DBG_PRINTF((category, RSM_ERR,
2874 		    "rsm_publish: Not creator\n"));
2875 		return (RSMERR_NOT_CREATOR);
2876 	}
2877 
2878 	/*
2879 	 * Get per node access list
2880 	 */
2881 	e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2882 	if (e != DDI_SUCCESS) {
2883 		DBG_PRINTF((category, RSM_ERR,
2884 		    "rsm_publish done: rsmacl_build failed\n"));
2885 		return (e);
2886 	}
2887 
2888 	/*
2889 	 * The application provided msg->key is used for resolving a
2890 	 * segment id according to the following:
2891 	 *    key = 0   		Kernel Agent selects the segment id
2892 	 *    key <= RSM_DLPI_ID_END	Reserved for system usage except
2893 	 *				RSMLIB range
2894 	 *    key < RSM_USER_APP_ID_BASE segment id = key
2895 	 *    key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2896 	 *
2897 	 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2898 	 * overflows to zero after 0x80000000 allocations.
2899 	 * An algorithm is needed which allows reinitialization and provides
2900 	 * for reallocation after overflow.  For now, ENOMEM is returned
2901 	 * once the overflow condition has occurred.
2902 	 */
2903 	if (msg->key == 0) {
2904 		mutex_enter(&rsm_lock);
2905 		segment_id = rsm_nextavail_segmentid;
2906 		if (segment_id != 0) {
2907 			rsm_nextavail_segmentid++;
2908 			mutex_exit(&rsm_lock);
2909 		} else {
2910 			mutex_exit(&rsm_lock);
2911 			DBG_PRINTF((category, RSM_ERR,
2912 			    "rsm_publish done: no more keys avlbl\n"));
2913 			return (RSMERR_INSUFFICIENT_RESOURCES);
2914 		}
2915 	} else	if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2916 		/* range reserved for internal use by base/ndi libraries */
2917 		segment_id = msg->key;
2918 	else	if (msg->key <= RSM_DLPI_ID_END)
2919 		return (RSMERR_RESERVED_SEGID);
2920 	else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2921 		segment_id = msg->key;
2922 	else {
2923 		DBG_PRINTF((category, RSM_ERR,
2924 		    "rsm_publish done: invalid key %u\n", msg->key));
2925 		return (RSMERR_RESERVED_SEGID);
2926 	}
2927 
2928 	/* Add key to exportlist; The segment lock is held on success */
2929 	e = rsmexport_add(seg, segment_id);
2930 	if (e) {
2931 		rsmacl_free(acl, acl_len);
2932 		DBG_PRINTF((category, RSM_ERR,
2933 		    "rsm_publish done: export_add failed: %d\n", e));
2934 		return (e);
2935 	}
2936 
2937 	seg->s_segid = segment_id;
2938 
2939 	if ((seg->s_state != RSM_STATE_BIND) &&
2940 	    (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2941 		/* state changed since then, free acl and return */
2942 		rsmseglock_release(seg);
2943 		rsmexport_rm(seg);
2944 		rsmacl_free(acl, acl_len);
2945 		DBG_PRINTF((category, RSM_ERR,
2946 		    "rsm_publish done: segment in wrong state: %d\n",
2947 		    seg->s_state));
2948 		return (RSMERR_BAD_SEG_HNDL);
2949 	}
2950 
2951 	/*
2952 	 * If this is for a local memory handle and permissions are zero,
2953 	 * then the surrogate segment is very large and we want to skip
2954 	 * allocation of DVMA space.
2955 	 *
2956 	 * Careful!  If the user didn't use an ACL list, acl will be a NULL
2957 	 * pointer.  Check that before dereferencing it.
2958 	 */
2959 	if (acl != (rsmapi_access_entry_t *)NULL) {
2960 		if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2961 			goto skipdriver;
2962 	}
2963 
2964 	/* create segment  */
2965 	xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2966 	    sdev, 0, NULL, DDI_UMEM_SLEEP);
2967 	ASSERT(xbuf != NULL);
2968 
2969 	mem.ms_type = RSM_MEM_BUF;
2970 	mem.ms_bp = xbuf;
2971 
2972 	/* This call includes a bind operations */
2973 
2974 	adapter = seg->s_adapter;
2975 	/*
2976 	 * create a acl list with hwaddr for RSMPI publish
2977 	 */
2978 	e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2979 
2980 	if (e != RSM_SUCCESS) {
2981 		rsmseglock_release(seg);
2982 		rsmexport_rm(seg);
2983 		rsmacl_free(acl, acl_len);
2984 		freerbuf(xbuf);
2985 		DBG_PRINTF((category, RSM_ERR,
2986 		    "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2987 		return (e);
2988 	}
2989 
2990 	if (seg->s_state == RSM_STATE_BIND) {
2991 		/* create segment  */
2992 
2993 		/* This call includes a bind operations */
2994 
2995 		if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2996 			create_flags = RSM_ALLOW_UNBIND_REBIND;
2997 		}
2998 
2999 		if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
3000 			callback_flag  = RSM_RESOURCE_DONTWAIT;
3001 		} else {
3002 			callback_flag  = RSM_RESOURCE_SLEEP;
3003 		}
3004 
3005 		e = adapter->rsmpi_ops->rsm_seg_create(
3006 		    adapter->rsmpi_handle,
3007 		    &seg->s_handle.out, seg->s_len,
3008 		    create_flags, &mem,
3009 		    callback_flag, NULL);
3010 		/*
3011 		 * At present there is no dependency on the existence of xbuf.
3012 		 * So we can free it here. If in the future this changes, it can
3013 		 * be freed sometime during the segment destroy.
3014 		 */
3015 		freerbuf(xbuf);
3016 
3017 		if (e != RSM_SUCCESS) {
3018 			rsmseglock_release(seg);
3019 			rsmexport_rm(seg);
3020 			rsmacl_free(acl, acl_len);
3021 			rsmpiacl_free(rsmpi_acl, acl_len);
3022 			DBG_PRINTF((category, RSM_ERR,
3023 			    "rsm_publish done: export_create failed: %d\n", e));
3024 			/*
3025 			 * The following assertion ensures that the two errors
3026 			 * related to the length and its alignment do not occur
3027 			 * since they have been checked during export_create
3028 			 */
3029 			ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3030 			    e != RSMERR_BAD_LENGTH);
3031 			if (e == RSMERR_NOT_MEM)
3032 				e = RSMERR_INSUFFICIENT_MEM;
3033 
3034 			return (e);
3035 		}
3036 		/* export segment, this should create an IMMU mapping */
3037 		e = adapter->rsmpi_ops->rsm_publish(
3038 		    seg->s_handle.out,
3039 		    rsmpi_acl, acl_len,
3040 		    seg->s_segid,
3041 		    RSM_RESOURCE_DONTWAIT, NULL);
3042 
3043 		if (e != RSM_SUCCESS) {
3044 			adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3045 			rsmseglock_release(seg);
3046 			rsmexport_rm(seg);
3047 			rsmacl_free(acl, acl_len);
3048 			rsmpiacl_free(rsmpi_acl, acl_len);
3049 			DBG_PRINTF((category, RSM_ERR,
3050 			    "rsm_publish done: export_publish failed: %d\n",
3051 			    e));
3052 			return (e);
3053 		}
3054 	}
3055 
3056 	seg->s_acl_in = rsmpi_acl;
3057 
3058 skipdriver:
3059 	/* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3060 	seg->s_acl_len	= acl_len;
3061 	seg->s_acl	= acl;
3062 
3063 	if (seg->s_state == RSM_STATE_BIND) {
3064 		seg->s_state = RSM_STATE_EXPORT;
3065 	} else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3066 		seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3067 		cv_broadcast(&seg->s_cv);
3068 	}
3069 
3070 	rsmseglock_release(seg);
3071 
3072 	/*
3073 	 * If the segment id was solicited, then return it in
3074 	 * the original incoming message.
3075 	 */
3076 	if (msg->key == 0) {
3077 		msg->key = segment_id;
3078 #ifdef _MULTI_DATAMODEL
3079 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3080 			rsm_ioctlmsg32_t msg32;
3081 
3082 			msg32.key = msg->key;
3083 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3084 			    "rsm_publish done\n"));
3085 			return (ddi_copyout((caddr_t)&msg32,
3086 			    (caddr_t)dataptr, sizeof (msg32), mode));
3087 		}
3088 #endif
3089 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3090 		    "rsm_publish done\n"));
3091 		return (ddi_copyout((caddr_t)msg,
3092 		    (caddr_t)dataptr, sizeof (*msg), mode));
3093 	}
3094 
3095 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3096 	return (DDI_SUCCESS);
3097 }
3098 
3099 /*
3100  * This function modifies the access control list of an already published
3101  * segment.  There is no effect on import segments which are already
3102  * connected.
3103  */
3104 static int
3105 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3106 {
3107 	rsmapi_access_entry_t	*new_acl, *old_acl, *tmp_acl;
3108 	rsm_access_entry_t	*rsmpi_new_acl, *rsmpi_old_acl;
3109 	int			new_acl_len, old_acl_len, tmp_acl_len;
3110 	int			e, i;
3111 	adapter_t		*adapter;
3112 	int			loopback_flag = 0;
3113 	rsm_memseg_id_t		key;
3114 	rsm_permission_t	permission;
3115 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3116 
3117 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3118 
3119 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3120 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3121 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3122 		return (RSMERR_SEG_NOT_PUBLISHED);
3123 
3124 	if (seg->s_pid != ddi_get_pid() &&
3125 	    ddi_get_pid() != 0) {
3126 		DBG_PRINTF((category, RSM_ERR,
3127 		    "rsm_republish: Not owner\n"));
3128 		return (RSMERR_NOT_CREATOR);
3129 	}
3130 
3131 	if (seg->s_adapter == &loopback_adapter)
3132 		loopback_flag = 1;
3133 
3134 	/*
3135 	 * Build new list first
3136 	 */
3137 	e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3138 	if (e) {
3139 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3140 		    "rsm_republish done: rsmacl_build failed %d", e));
3141 		return (e);
3142 	}
3143 
3144 	/* Lock segment */
3145 	rsmseglock_acquire(seg);
3146 	/*
3147 	 * a republish is in progress - REPUBLISH message is being
3148 	 * sent to the importers so wait for it to complete OR
3149 	 * wait till DR completes
3150 	 */
3151 	while (((seg->s_state == RSM_STATE_EXPORT) &&
3152 	    (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3153 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3154 	    (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3155 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3156 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3157 			    "rsm_republish done: cv_wait  INTERRUPTED"));
3158 			rsmseglock_release(seg);
3159 			rsmacl_free(new_acl, new_acl_len);
3160 			return (RSMERR_INTERRUPTED);
3161 		}
3162 	}
3163 
3164 	/* recheck if state is valid */
3165 	if (seg->s_state != RSM_STATE_EXPORT) {
3166 		rsmseglock_release(seg);
3167 		rsmacl_free(new_acl, new_acl_len);
3168 		return (RSMERR_SEG_NOT_PUBLISHED);
3169 	}
3170 
3171 	key = seg->s_key;
3172 	old_acl = seg->s_acl;
3173 	old_acl_len = seg->s_acl_len;
3174 
3175 	seg->s_acl = new_acl;
3176 	seg->s_acl_len = new_acl_len;
3177 
3178 	/*
3179 	 * This call will only be meaningful if and when the interconnect
3180 	 * layer makes use of the access list
3181 	 */
3182 	adapter = seg->s_adapter;
3183 	/*
3184 	 * create a acl list with hwaddr for RSMPI publish
3185 	 */
3186 	e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3187 
3188 	if (e != RSM_SUCCESS) {
3189 		seg->s_acl = old_acl;
3190 		seg->s_acl_len = old_acl_len;
3191 		rsmseglock_release(seg);
3192 		rsmacl_free(new_acl, new_acl_len);
3193 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3194 		    "rsm_republish done: rsmpiacl_create failed %d", e));
3195 		return (e);
3196 	}
3197 	rsmpi_old_acl = seg->s_acl_in;
3198 	seg->s_acl_in = rsmpi_new_acl;
3199 
3200 	e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3201 	    seg->s_acl_in, seg->s_acl_len,
3202 	    RSM_RESOURCE_DONTWAIT, NULL);
3203 
3204 	if (e != RSM_SUCCESS) {
3205 		seg->s_acl = old_acl;
3206 		seg->s_acl_in = rsmpi_old_acl;
3207 		seg->s_acl_len = old_acl_len;
3208 		rsmseglock_release(seg);
3209 		rsmacl_free(new_acl, new_acl_len);
3210 		rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3211 
3212 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3213 		    "rsm_republish done: rsmpi republish failed %d\n", e));
3214 		return (e);
3215 	}
3216 
3217 	/* create a tmp copy of the new acl */
3218 	tmp_acl_len = new_acl_len;
3219 	if (tmp_acl_len > 0) {
3220 		tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3221 		for (i = 0; i < tmp_acl_len; i++) {
3222 			tmp_acl[i].ae_node = new_acl[i].ae_node;
3223 			tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3224 		}
3225 		/*
3226 		 * The default permission of a node which was in the old
3227 		 * ACL but not in the new ACL is 0 ie no access.
3228 		 */
3229 		permission = 0;
3230 	} else {
3231 		/*
3232 		 * NULL acl means all importers can connect and
3233 		 * default permission will be owner creation umask
3234 		 */
3235 		tmp_acl = NULL;
3236 		permission = seg->s_mode;
3237 	}
3238 
3239 	/* make other republishers to wait for republish to complete */
3240 	seg->s_flags |= RSM_REPUBLISH_WAIT;
3241 
3242 	rsmseglock_release(seg);
3243 
3244 	/* send the new perms to the importing nodes */
3245 	rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3246 
3247 	rsmseglock_acquire(seg);
3248 	seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3249 	/* wake up any one waiting for republish to complete */
3250 	cv_broadcast(&seg->s_cv);
3251 	rsmseglock_release(seg);
3252 
3253 	rsmacl_free(tmp_acl, tmp_acl_len);
3254 	rsmacl_free(old_acl, old_acl_len);
3255 	rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3256 
3257 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3258 	return (DDI_SUCCESS);
3259 }
3260 
3261 static int
3262 rsm_unpublish(rsmseg_t *seg, int mode)
3263 {
3264 	rsmapi_access_entry_t	*acl;
3265 	rsm_access_entry_t	*rsmpi_acl;
3266 	int			acl_len;
3267 	int			e;
3268 	clock_t			ticks;
3269 	adapter_t *adapter;
3270 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3271 
3272 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3273 
3274 	if (seg->s_pid != ddi_get_pid() &&
3275 	    ddi_get_pid() != 0) {
3276 		DBG_PRINTF((category, RSM_ERR,
3277 		    "rsm_unpublish: Not creator\n"));
3278 		return (RSMERR_NOT_CREATOR);
3279 	}
3280 
3281 	rsmseglock_acquire(seg);
3282 	/*
3283 	 * wait for QUIESCING to complete here before rsmexport_rm
3284 	 * is called because the SUSPEND_COMPLETE mesg which changes
3285 	 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3286 	 * signals the cv_wait needs to find it in the hashtable.
3287 	 */
3288 	while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3289 	    ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3290 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3291 			rsmseglock_release(seg);
3292 			DBG_PRINTF((category, RSM_ERR,
3293 			    "rsm_unpublish done: cv_wait INTR qscing"
3294 			    "getv/putv in progress"));
3295 			return (RSMERR_INTERRUPTED);
3296 		}
3297 	}
3298 
3299 	/* verify segment state */
3300 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3301 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3302 		rsmseglock_release(seg);
3303 		DBG_PRINTF((category, RSM_ERR,
3304 		    "rsm_unpublish done: bad state %x\n", seg->s_state));
3305 		return (RSMERR_SEG_NOT_PUBLISHED);
3306 	}
3307 
3308 	rsmseglock_release(seg);
3309 
3310 	rsmexport_rm(seg);
3311 
3312 	rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3313 
3314 	rsmseglock_acquire(seg);
3315 	/*
3316 	 * wait for republish to complete
3317 	 */
3318 	while ((seg->s_state == RSM_STATE_EXPORT) &&
3319 	    (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3320 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3321 			DBG_PRINTF((category, RSM_ERR,
3322 			    "rsm_unpublish done: cv_wait INTR repubing"));
3323 			rsmseglock_release(seg);
3324 			return (RSMERR_INTERRUPTED);
3325 		}
3326 	}
3327 
3328 	if ((seg->s_state != RSM_STATE_EXPORT) &&
3329 	    (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3330 		DBG_PRINTF((category, RSM_ERR,
3331 		    "rsm_unpublish done: invalid state"));
3332 		rsmseglock_release(seg);
3333 		return (RSMERR_SEG_NOT_PUBLISHED);
3334 	}
3335 
3336 	/*
3337 	 * check for putv/get surrogate segment which was not published
3338 	 * to the driver.
3339 	 *
3340 	 * Be certain to see if there is an ACL first!  If this segment was
3341 	 * not published with an ACL, acl will be a null pointer.  Check
3342 	 * that before dereferencing it.
3343 	 */
3344 	acl = seg->s_acl;
3345 	if (acl != (rsmapi_access_entry_t *)NULL) {
3346 		if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3347 			goto bypass;
3348 	}
3349 
3350 	/* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3351 	if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3352 		goto bypass;
3353 
3354 	adapter = seg->s_adapter;
3355 	for (;;) {
3356 		if (seg->s_state != RSM_STATE_EXPORT) {
3357 			rsmseglock_release(seg);
3358 			DBG_PRINTF((category, RSM_ERR,
3359 			    "rsm_unpublish done: bad state %x\n",
3360 			    seg->s_state));
3361 			return (RSMERR_SEG_NOT_PUBLISHED);
3362 		}
3363 
3364 		/* unpublish from adapter */
3365 		e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3366 
3367 		if (e == RSM_SUCCESS) {
3368 			break;
3369 		}
3370 
3371 		if (e == RSMERR_SEG_IN_USE && mode == 1) {
3372 			/*
3373 			 * wait for unpublish to succeed, it's busy.
3374 			 */
3375 			seg->s_flags |= RSM_EXPORT_WAIT;
3376 
3377 			/* wait for a max of 1 ms - this is an empirical */
3378 			/* value that was found by some minimal testing  */
3379 			/* can be fine tuned when we have better numbers */
3380 			/* A long term fix would be to send cv_signal	 */
3381 			/* from the intr callback routine		 */
3382 			(void) drv_getparm(LBOLT, &ticks);
3383 			ticks += drv_usectohz(1000);
3384 			/* currently nobody signals this wait		*/
3385 			(void) cv_timedwait(&seg->s_cv, &seg->s_lock, ticks);
3386 
3387 			DBG_PRINTF((category, RSM_ERR,
3388 			    "rsm_unpublish: SEG_IN_USE\n"));
3389 
3390 			seg->s_flags &= ~RSM_EXPORT_WAIT;
3391 		} else {
3392 			if (mode == 1) {
3393 				DBG_PRINTF((category, RSM_ERR,
3394 				    "rsm:rsmpi unpublish err %x\n", e));
3395 				seg->s_state = RSM_STATE_BIND;
3396 			}
3397 			rsmseglock_release(seg);
3398 			return (e);
3399 		}
3400 	}
3401 
3402 	/* Free segment */
3403 	e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3404 
3405 	if (e != RSM_SUCCESS) {
3406 		DBG_PRINTF((category, RSM_ERR,
3407 		    "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3408 		    seg->s_key, e));
3409 	}
3410 
3411 bypass:
3412 	acl = seg->s_acl;
3413 	rsmpi_acl = seg->s_acl_in;
3414 	acl_len = seg->s_acl_len;
3415 
3416 	seg->s_acl = NULL;
3417 	seg->s_acl_in = NULL;
3418 	seg->s_acl_len = 0;
3419 
3420 	if (seg->s_state == RSM_STATE_EXPORT) {
3421 		seg->s_state = RSM_STATE_BIND;
3422 	} else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3423 		seg->s_state = RSM_STATE_BIND_QUIESCED;
3424 		cv_broadcast(&seg->s_cv);
3425 	}
3426 
3427 	rsmseglock_release(seg);
3428 
3429 	rsmacl_free(acl, acl_len);
3430 	rsmpiacl_free(rsmpi_acl, acl_len);
3431 
3432 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3433 
3434 	return (DDI_SUCCESS);
3435 }
3436 
3437 /*
3438  * Called from rsm_unpublish to force an unload and disconnection of all
3439  * importers of the unpublished segment.
3440  *
3441  * First build the list of segments requiring a force disconnect, then
3442  * send a request for each.
3443  */
3444 static void
3445 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3446     rsm_node_id_t ex_nodeid)
3447 {
3448 	rsmipc_request_t 	request;
3449 	importing_token_t	*prev_token, *token, *tmp_token, *tokp;
3450 	importing_token_t	*force_disconnect_list = NULL;
3451 	int			index;
3452 
3453 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3454 	    "rsm_send_importer_disconnects enter\n"));
3455 
3456 	index = rsmhash(ex_segid);
3457 
3458 	mutex_enter(&importer_list.lock);
3459 
3460 	prev_token = NULL;
3461 	token = importer_list.bucket[index];
3462 
3463 	while (token != NULL) {
3464 		if (token->key == ex_segid) {
3465 			/*
3466 			 * take it off the importer list and add it
3467 			 * to the force disconnect list.
3468 			 */
3469 			if (prev_token == NULL)
3470 				importer_list.bucket[index] = token->next;
3471 			else
3472 				prev_token->next = token->next;
3473 			tmp_token = token;
3474 			token = token->next;
3475 			if (force_disconnect_list == NULL) {
3476 				force_disconnect_list = tmp_token;
3477 				tmp_token->next = NULL;
3478 			} else {
3479 				tokp = force_disconnect_list;
3480 				/*
3481 				 * make sure that the tmp_token's node
3482 				 * is not already on the force disconnect
3483 				 * list.
3484 				 */
3485 				while (tokp != NULL) {
3486 					if (tokp->importing_node ==
3487 					    tmp_token->importing_node) {
3488 						break;
3489 					}
3490 					tokp = tokp->next;
3491 				}
3492 				if (tokp == NULL) {
3493 					tmp_token->next =
3494 					    force_disconnect_list;
3495 					force_disconnect_list = tmp_token;
3496 				} else {
3497 					kmem_free((void *)tmp_token,
3498 					    sizeof (*token));
3499 				}
3500 			}
3501 
3502 		} else {
3503 			prev_token = token;
3504 			token = token->next;
3505 		}
3506 	}
3507 	mutex_exit(&importer_list.lock);
3508 
3509 	token = force_disconnect_list;
3510 	while (token != NULL) {
3511 		if (token->importing_node == my_nodeid) {
3512 			rsm_force_unload(ex_nodeid, ex_segid,
3513 			    DISCONNECT);
3514 		} else {
3515 			request.rsmipc_hdr.rsmipc_type =
3516 			    RSMIPC_MSG_DISCONNECT;
3517 			request.rsmipc_key = token->key;
3518 			for (;;) {
3519 				if (rsmipc_send(token->importing_node,
3520 				    &request,
3521 				    RSM_NO_REPLY) == RSM_SUCCESS) {
3522 					break;
3523 				} else {
3524 					delay(drv_usectohz(10000));
3525 				}
3526 			}
3527 		}
3528 		tmp_token = token;
3529 		token = token->next;
3530 		kmem_free((void *)tmp_token, sizeof (*token));
3531 	}
3532 
3533 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3534 			"rsm_send_importer_disconnects done\n"));
3535 }
3536 
3537 /*
3538  * This function is used as a callback for unlocking the pages locked
3539  * down by a process which then does a fork or an exec.
3540  * It marks the export segments corresponding to umem cookie given by
3541  * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3542  * destroyed later when an rsm_close occurs).
3543  */
3544 static void
3545 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3546 {
3547 	rsmresource_blk_t *blk;
3548 	rsmresource_t *p;
3549 	rsmseg_t *eseg = NULL;
3550 	int i, j;
3551 	int found = 0;
3552 
3553 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3554 	    "rsm_export_force_destroy enter\n"));
3555 
3556 	/*
3557 	 * Walk the resource list and locate the export segment (either
3558 	 * in the BIND or the EXPORT state) which corresponds to the
3559 	 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3560 	 * Change the state to ZOMBIE by calling rsmseg_close with the
3561 	 * force_flag argument (the second argument) set to 1. Also,
3562 	 * unpublish and unbind the segment, but don't free it. Free it
3563 	 * only on a rsm_close call for the segment.
3564 	 */
3565 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3566 
3567 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3568 		blk = rsm_resource.rsmrc_root[i];
3569 		if (blk == NULL) {
3570 			continue;
3571 		}
3572 
3573 		for (j = 0; j < RSMRC_BLKSZ; j++) {
3574 			p = blk->rsmrcblk_blks[j];
3575 			if ((p != NULL) && (p != RSMRC_RESERVED) &&
3576 			    (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3577 				eseg = (rsmseg_t *)p;
3578 				if (eseg->s_cookie != ck)
3579 					continue; /* continue searching */
3580 				/*
3581 				 * Found the segment, set flag to indicate
3582 				 * force destroy processing is in progress
3583 				 */
3584 				rsmseglock_acquire(eseg);
3585 				eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3586 				rsmseglock_release(eseg);
3587 				found = 1;
3588 				break;
3589 			}
3590 		}
3591 
3592 		if (found)
3593 			break;
3594 	}
3595 
3596 	rw_exit(&rsm_resource.rsmrc_lock);
3597 
3598 	if (found) {
3599 		ASSERT(eseg != NULL);
3600 		/* call rsmseg_close with force flag set to 1 */
3601 		rsmseg_close(eseg, 1);
3602 		/*
3603 		 * force destroy processing done, clear flag and signal any
3604 		 * thread waiting in rsmseg_close.
3605 		 */
3606 		rsmseglock_acquire(eseg);
3607 		eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3608 		cv_broadcast(&eseg->s_cv);
3609 		rsmseglock_release(eseg);
3610 	}
3611 
3612 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3613 	    "rsm_export_force_destroy done\n"));
3614 }
3615 
3616 /* ******************************* Remote Calls *********************** */
3617 static void
3618 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3619 {
3620 	rsmipc_reply_t reply;
3621 	DBG_DEFINE(category,
3622 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3623 
3624 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3625 	    "rsm_intr_segconnect enter\n"));
3626 
3627 	reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3628 
3629 	reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3630 	reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3631 
3632 	(void) rsmipc_send(src, NULL, &reply);
3633 
3634 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3635 	    "rsm_intr_segconnect done\n"));
3636 }
3637 
3638 
3639 /*
3640  * When an exported segment is unpublished the exporter sends an ipc
3641  * message (RSMIPC_MSG_DISCONNECT) to all importers.  The recv ipc dispatcher
3642  * calls this function.  The import list is scanned; segments which match the
3643  * exported segment id are unloaded and disconnected.
3644  *
3645  * Will also be called from rsm_rebind with disconnect_flag FALSE.
3646  *
3647  */
3648 static void
3649 rsm_force_unload(rsm_node_id_t src_nodeid,
3650     rsm_memseg_id_t ex_segid,
3651     boolean_t disconnect_flag)
3652 
3653 {
3654 	rsmresource_t	*p = NULL;
3655 	rsmhash_table_t *rhash = &rsm_import_segs;
3656 	uint_t		index;
3657 	DBG_DEFINE(category,
3658 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3659 
3660 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3661 
3662 	index = rsmhash(ex_segid);
3663 
3664 	rw_enter(&rhash->rsmhash_rw, RW_READER);
3665 
3666 	p = rsmhash_getbkt(rhash, index);
3667 
3668 	for (; p; p = p->rsmrc_next) {
3669 		rsmseg_t *seg = (rsmseg_t *)p;
3670 		if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3671 			/*
3672 			 * In order to make rsmseg_unload and rsm_force_unload
3673 			 * thread safe, acquire the segment lock here.
3674 			 * rsmseg_unload is responsible for releasing the lock.
3675 			 * rsmseg_unload releases the lock just before a call
3676 			 * to rsmipc_send or in case of an early exit which
3677 			 * occurs if the segment was in the state
3678 			 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3679 			 */
3680 			rsmseglock_acquire(seg);
3681 			if (disconnect_flag)
3682 				seg->s_flags |= RSM_FORCE_DISCONNECT;
3683 			rsmseg_unload(seg);
3684 		}
3685 	}
3686 	rw_exit(&rhash->rsmhash_rw);
3687 
3688 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3689 }
3690 
3691 static void
3692 rsm_intr_reply(rsmipc_msghdr_t *msg)
3693 {
3694 	/*
3695 	 * Find slot for cookie in reply.
3696 	 * Match sequence with sequence in cookie
3697 	 * If no match; return
3698 	 * Try to grap lock of slot, if locked return
3699 	 * copy data into reply slot area
3700 	 * signal waiter
3701 	 */
3702 	rsmipc_slot_t 	*slot;
3703 	rsmipc_cookie_t	*cookie;
3704 	void *data = (void *) msg;
3705 	size_t size = sizeof (rsmipc_reply_t);
3706 	DBG_DEFINE(category,
3707 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3708 
3709 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3710 
3711 	cookie = &msg->rsmipc_cookie;
3712 	if (cookie->ic.index >= RSMIPC_SZ) {
3713 		DBG_PRINTF((category, RSM_ERR,
3714 		    "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3715 		return;
3716 	}
3717 
3718 	ASSERT(cookie->ic.index < RSMIPC_SZ);
3719 	slot = &rsm_ipc.slots[cookie->ic.index];
3720 	mutex_enter(&slot->rsmipc_lock);
3721 	if (slot->rsmipc_cookie.value == cookie->value) {
3722 		/* found a match */
3723 		if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3724 			bcopy(data, slot->rsmipc_data, size);
3725 			RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3726 			cv_signal(&slot->rsmipc_cv);
3727 		}
3728 	} else {
3729 		DBG_PRINTF((category, RSM_DEBUG,
3730 		    "rsm: rsm_intr_reply mismatched reply %d\n",
3731 		    cookie->ic.index));
3732 	}
3733 	mutex_exit(&slot->rsmipc_lock);
3734 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3735 }
3736 
3737 /*
3738  * This function gets dispatched on the worker thread when we receive
3739  * the SQREADY message. This function sends the SQREADY_ACK message.
3740  */
3741 static void
3742 rsm_sqready_ack_deferred(void *arg)
3743 {
3744 	path_t	*path = (path_t *)arg;
3745 	DBG_DEFINE(category,
3746 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3747 
3748 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3749 	    "rsm_sqready_ack_deferred enter\n"));
3750 
3751 	mutex_enter(&path->mutex);
3752 
3753 	/*
3754 	 * If path is not active no point in sending the ACK
3755 	 * because the whole SQREADY protocol will again start
3756 	 * when the path becomes active.
3757 	 */
3758 	if (path->state != RSMKA_PATH_ACTIVE) {
3759 		/*
3760 		 * decrement the path refcnt incremented in rsm_proc_sqready
3761 		 */
3762 		PATH_RELE_NOLOCK(path);
3763 		mutex_exit(&path->mutex);
3764 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3765 		    "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3766 		return;
3767 	}
3768 
3769 	/* send an SQREADY_ACK message */
3770 	(void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3771 
3772 	/* initialize credits to the max level */
3773 	path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3774 
3775 	/* wake up any send that is waiting for credits */
3776 	cv_broadcast(&path->sendq_token.sendq_cv);
3777 
3778 	/*
3779 	 * decrement the path refcnt since we incremented it in
3780 	 * rsm_proc_sqready
3781 	 */
3782 	PATH_RELE_NOLOCK(path);
3783 
3784 	mutex_exit(&path->mutex);
3785 
3786 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3787 	    "rsm_sqready_ack_deferred done\n"));
3788 }
3789 
3790 /*
3791  * Process the SQREADY message
3792  */
3793 static void
3794 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3795     rsm_intr_hand_arg_t arg)
3796 {
3797 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3798 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3799 	path_t			*path;
3800 	DBG_DEFINE(category,
3801 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3802 
3803 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3804 
3805 	/* look up the path - incr the path refcnt */
3806 	path = rsm_find_path(hdlr_argp->adapter_name,
3807 	    hdlr_argp->adapter_instance, src_hwaddr);
3808 
3809 	/*
3810 	 * No path exists or path is not active - drop the message
3811 	 */
3812 	if (path == NULL) {
3813 		DBG_PRINTF((category, RSM_DEBUG,
3814 		    "rsm_proc_sqready done: msg dropped no path\n"));
3815 		return;
3816 	}
3817 
3818 	mutex_exit(&path->mutex);
3819 
3820 	/* drain any tasks from the previous incarnation */
3821 	taskq_wait(path->recv_taskq);
3822 
3823 	mutex_enter(&path->mutex);
3824 	/*
3825 	 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3826 	 * in the meanwhile we received an SQREADY message, blindly reset
3827 	 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3828 	 * and forget about the SQREADY that we sent.
3829 	 */
3830 	path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3831 
3832 	if (path->state != RSMKA_PATH_ACTIVE) {
3833 		/* decr refcnt and drop the mutex */
3834 		PATH_RELE_NOLOCK(path);
3835 		mutex_exit(&path->mutex);
3836 		DBG_PRINTF((category, RSM_DEBUG,
3837 		    "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3838 		return;
3839 	}
3840 
3841 	DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3842 	    " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3843 
3844 	/*
3845 	 * The sender's local incarnation number is our remote incarnation
3846 	 * number save it in the path data structure
3847 	 */
3848 	path->remote_incn = msg->rsmipc_local_incn;
3849 	path->sendq_token.msgbuf_avail = 0;
3850 	path->procmsg_cnt = 0;
3851 
3852 	/*
3853 	 * path is active - dispatch task to send SQREADY_ACK - remember
3854 	 * RSMPI calls can't be done in interrupt context
3855 	 *
3856 	 * We can use the recv_taskq to send because the remote endpoint
3857 	 * cannot start sending messages till it receives SQREADY_ACK hence
3858 	 * at this point there are no tasks on recv_taskq.
3859 	 *
3860 	 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3861 	 */
3862 	(void) taskq_dispatch(path->recv_taskq,
3863 	    rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3864 
3865 	mutex_exit(&path->mutex);
3866 
3867 
3868 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3869 }
3870 
3871 /*
3872  * Process the SQREADY_ACK message
3873  */
3874 static void
3875 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3876     rsm_intr_hand_arg_t arg)
3877 {
3878 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3879 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3880 	path_t			*path;
3881 	DBG_DEFINE(category,
3882 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3883 
3884 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3885 	    "rsm_proc_sqready_ack enter\n"));
3886 
3887 	/* look up the path - incr the path refcnt */
3888 	path = rsm_find_path(hdlr_argp->adapter_name,
3889 	    hdlr_argp->adapter_instance, src_hwaddr);
3890 
3891 	/*
3892 	 * drop the message if - no path exists or path is not active
3893 	 * or if its not waiting for SQREADY_ACK message
3894 	 */
3895 	if (path == NULL) {
3896 		DBG_PRINTF((category, RSM_DEBUG,
3897 		    "rsm_proc_sqready_ack done: msg dropped no path\n"));
3898 		return;
3899 	}
3900 
3901 	if ((path->state != RSMKA_PATH_ACTIVE) ||
3902 	    !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3903 		/* decrement the refcnt */
3904 		PATH_RELE_NOLOCK(path);
3905 		mutex_exit(&path->mutex);
3906 		DBG_PRINTF((category, RSM_DEBUG,
3907 		    "rsm_proc_sqready_ack done: msg dropped\n"));
3908 		return;
3909 	}
3910 
3911 	/*
3912 	 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3913 	 * sent, if not drop it.
3914 	 */
3915 	if (path->local_incn != msghdr->rsmipc_incn) {
3916 		/* decrement the refcnt */
3917 		PATH_RELE_NOLOCK(path);
3918 		mutex_exit(&path->mutex);
3919 		DBG_PRINTF((category, RSM_DEBUG,
3920 		    "rsm_proc_sqready_ack done: msg old incn %lld\n",
3921 		    msghdr->rsmipc_incn));
3922 		return;
3923 	}
3924 
3925 	DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3926 		" src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3927 
3928 	/*
3929 	 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3930 	 */
3931 	path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3932 
3933 	/* save the remote sendq incn number */
3934 	path->remote_incn = msg->rsmipc_local_incn;
3935 
3936 	/* initialize credits to the max level */
3937 	path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3938 
3939 	/* wake up any send that is waiting for credits */
3940 	cv_broadcast(&path->sendq_token.sendq_cv);
3941 
3942 	/* decrement the refcnt */
3943 	PATH_RELE_NOLOCK(path);
3944 
3945 	mutex_exit(&path->mutex);
3946 
3947 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3948 	    "rsm_proc_sqready_ack done\n"));
3949 }
3950 
3951 /*
3952  * process the RSMIPC_MSG_CREDIT message
3953  */
3954 static void
3955 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3956     rsm_intr_hand_arg_t arg)
3957 {
3958 	rsmipc_msghdr_t		*msghdr = (rsmipc_msghdr_t *)msg;
3959 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
3960 	path_t			*path;
3961 	DBG_DEFINE(category,
3962 	RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3963 
3964 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3965 
3966 	/* look up the path - incr the path refcnt */
3967 	path = rsm_find_path(hdlr_argp->adapter_name,
3968 	    hdlr_argp->adapter_instance, src_hwaddr);
3969 
3970 	if (path == NULL) {
3971 		DBG_PRINTF((category, RSM_DEBUG,
3972 		    "rsm_add_credits enter: path not found\n"));
3973 		return;
3974 	}
3975 
3976 	/* the path is not active - discard credits */
3977 	if (path->state != RSMKA_PATH_ACTIVE) {
3978 		PATH_RELE_NOLOCK(path);
3979 		mutex_exit(&path->mutex);
3980 		DBG_PRINTF((category, RSM_DEBUG,
3981 		    "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3982 		return;
3983 	}
3984 
3985 	/*
3986 	 * Check if these credits are for current incarnation of the path.
3987 	 */
3988 	if (path->local_incn != msghdr->rsmipc_incn) {
3989 		/* decrement the refcnt */
3990 		PATH_RELE_NOLOCK(path);
3991 		mutex_exit(&path->mutex);
3992 		DBG_PRINTF((category, RSM_DEBUG,
3993 		    "rsm_add_credits enter: old incn %lld\n",
3994 		    msghdr->rsmipc_incn));
3995 		return;
3996 	}
3997 
3998 	DBG_PRINTF((category, RSM_DEBUG,
3999 	    "rsm_add_credits:path=%lx new-creds=%d "
4000 	    "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
4001 	    path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
4002 	    src_hwaddr));
4003 
4004 
4005 	/* add credits to the path's sendq */
4006 	path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4007 
4008 	ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4009 
4010 	/* wake up any send that is waiting for credits */
4011 	cv_broadcast(&path->sendq_token.sendq_cv);
4012 
4013 	/* decrement the refcnt */
4014 	PATH_RELE_NOLOCK(path);
4015 
4016 	mutex_exit(&path->mutex);
4017 
4018 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4019 }
4020 
4021 static void
4022 rsm_intr_event(rsmipc_request_t *msg)
4023 {
4024 	rsmseg_t	*seg;
4025 	rsmresource_t	*p;
4026 	rsm_node_id_t	src_node;
4027 	DBG_DEFINE(category,
4028 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4029 
4030 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4031 
4032 	src_node = msg->rsmipc_hdr.rsmipc_src;
4033 
4034 	if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4035 		/* This is for an import segment */
4036 		uint_t hashval = rsmhash(msg->rsmipc_key);
4037 
4038 		rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4039 
4040 		p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4041 
4042 		for (; p; p = p->rsmrc_next) {
4043 			if ((p->rsmrc_key == msg->rsmipc_key) &&
4044 			    (p->rsmrc_node == src_node)) {
4045 				seg = (rsmseg_t *)p;
4046 				rsmseglock_acquire(seg);
4047 
4048 				atomic_add_32(&seg->s_pollevent, 1);
4049 
4050 				if (seg->s_pollflag & RSM_SEGMENT_POLL)
4051 					pollwakeup(&seg->s_poll, POLLRDNORM);
4052 
4053 				rsmseglock_release(seg);
4054 			}
4055 		}
4056 
4057 		rw_exit(&rsm_import_segs.rsmhash_rw);
4058 	} else {
4059 		/* This is for an export segment */
4060 		seg = rsmexport_lookup(msg->rsmipc_key);
4061 		if (!seg) {
4062 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4063 			    "rsm_intr_event done: exp seg not found\n"));
4064 			return;
4065 		}
4066 
4067 		ASSERT(rsmseglock_held(seg));
4068 
4069 		atomic_add_32(&seg->s_pollevent, 1);
4070 
4071 		/*
4072 		 * We must hold the segment lock here, or else the segment
4073 		 * can be freed while pollwakeup is using it. This implies
4074 		 * that we MUST NOT grab the segment lock during rsm_chpoll,
4075 		 * as outlined in the chpoll(2) man page.
4076 		 */
4077 		if (seg->s_pollflag & RSM_SEGMENT_POLL)
4078 			pollwakeup(&seg->s_poll, POLLRDNORM);
4079 
4080 		rsmseglock_release(seg);
4081 	}
4082 
4083 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4084 }
4085 
4086 /*
4087  * The exporter did a republish and changed the ACL - this change is only
4088  * visible to new importers.
4089  */
4090 static void
4091 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4092     rsm_permission_t perm)
4093 {
4094 
4095 	rsmresource_t	*p;
4096 	rsmseg_t	*seg;
4097 	uint_t		hashval = rsmhash(key);
4098 	DBG_DEFINE(category,
4099 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4100 
4101 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4102 
4103 	rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4104 
4105 	p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4106 
4107 	for (; p; p = p->rsmrc_next) {
4108 		/*
4109 		 * find the importer and update the permission in the shared
4110 		 * data structure. Any new importers will use the new perms
4111 		 */
4112 		if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4113 			seg = (rsmseg_t *)p;
4114 
4115 			rsmseglock_acquire(seg);
4116 			rsmsharelock_acquire(seg);
4117 			seg->s_share->rsmsi_mode = perm;
4118 			rsmsharelock_release(seg);
4119 			rsmseglock_release(seg);
4120 
4121 			break;
4122 		}
4123 	}
4124 
4125 	rw_exit(&rsm_import_segs.rsmhash_rw);
4126 
4127 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4128 }
4129 
4130 void
4131 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4132 {
4133 	int		done = 1; /* indicate all SUSPENDS have been acked */
4134 	list_element_t	*elem;
4135 	DBG_DEFINE(category,
4136 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4137 
4138 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4139 	    "rsm_suspend_complete enter\n"));
4140 
4141 	mutex_enter(&rsm_suspend_list.list_lock);
4142 
4143 	if (rsm_suspend_list.list_head == NULL) {
4144 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4145 		    "rsm_suspend_complete done: suspend_list is empty\n"));
4146 		mutex_exit(&rsm_suspend_list.list_lock);
4147 		return;
4148 	}
4149 
4150 	elem = rsm_suspend_list.list_head;
4151 	while (elem != NULL) {
4152 		if (elem->nodeid == src_node) {
4153 			/* clear the pending flag for the node */
4154 			elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4155 			elem->flags |= flag;
4156 		}
4157 
4158 		if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4159 			done = 0; /* still some nodes have not yet ACKED */
4160 
4161 		elem = elem->next;
4162 	}
4163 
4164 	mutex_exit(&rsm_suspend_list.list_lock);
4165 
4166 	if (!done) {
4167 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4168 		    "rsm_suspend_complete done: acks pending\n"));
4169 		return;
4170 	}
4171 	/*
4172 	 * Now that we are done with suspending all the remote importers
4173 	 * time to quiesce the local exporters
4174 	 */
4175 	exporter_quiesce();
4176 
4177 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4178 	    "rsm_suspend_complete done\n"));
4179 }
4180 
4181 static void
4182 exporter_quiesce()
4183 {
4184 	int		i, e;
4185 	rsmresource_t	*current;
4186 	rsmseg_t	*seg;
4187 	adapter_t	*adapter;
4188 	DBG_DEFINE(category,
4189 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4190 
4191 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4192 	/*
4193 	 * The importers send a SUSPEND_COMPLETE to the exporter node
4194 	 *	Unpublish, unbind the export segment and
4195 	 *	move the segments to the EXPORT_QUIESCED state
4196 	 */
4197 
4198 	rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4199 
4200 	for (i = 0; i < rsm_hash_size; i++) {
4201 		current = rsm_export_segs.bucket[i];
4202 		while (current != NULL) {
4203 			seg = (rsmseg_t *)current;
4204 			rsmseglock_acquire(seg);
4205 			if (current->rsmrc_state ==
4206 			    RSM_STATE_EXPORT_QUIESCING) {
4207 				adapter = seg->s_adapter;
4208 				/*
4209 				 * some local memory handles are not published
4210 				 * check if it was published
4211 				 */
4212 				if ((seg->s_acl == NULL) ||
4213 				    (seg->s_acl[0].ae_node != my_nodeid) ||
4214 				    (seg->s_acl[0].ae_permission != 0)) {
4215 
4216 					e = adapter->rsmpi_ops->rsm_unpublish(
4217 					    seg->s_handle.out);
4218 					DBG_PRINTF((category, RSM_DEBUG,
4219 					    "exporter_quiesce:unpub %d\n", e));
4220 
4221 					e = adapter->rsmpi_ops->rsm_seg_destroy(
4222 					    seg->s_handle.out);
4223 
4224 					DBG_PRINTF((category, RSM_DEBUG,
4225 					    "exporter_quiesce:destroy %d\n",
4226 					    e));
4227 				}
4228 
4229 				(void) rsm_unbind_pages(seg);
4230 				seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4231 				cv_broadcast(&seg->s_cv);
4232 			}
4233 			rsmseglock_release(seg);
4234 			current = current->rsmrc_next;
4235 		}
4236 	}
4237 	rw_exit(&rsm_export_segs.rsmhash_rw);
4238 
4239 	/*
4240 	 * All the local segments we are done with the pre-del processing
4241 	 * - time to move to PREDEL_COMPLETED.
4242 	 */
4243 
4244 	mutex_enter(&rsm_drv_data.drv_lock);
4245 
4246 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4247 
4248 	rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4249 
4250 	cv_broadcast(&rsm_drv_data.drv_cv);
4251 
4252 	mutex_exit(&rsm_drv_data.drv_lock);
4253 
4254 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4255 }
4256 
4257 static void
4258 importer_suspend(rsm_node_id_t src_node)
4259 {
4260 	int		i;
4261 	int		susp_flg; /* true means already suspended */
4262 	int		num_importers;
4263 	rsmresource_t	*p = NULL, *curp;
4264 	rsmhash_table_t *rhash = &rsm_import_segs;
4265 	rsmseg_t	*seg;
4266 	rsmipc_request_t request;
4267 	DBG_DEFINE(category,
4268 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4269 
4270 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4271 
4272 	rw_enter(&rhash->rsmhash_rw, RW_READER);
4273 	for (i = 0; i < rsm_hash_size; i++) {
4274 		p = rhash->bucket[i];
4275 
4276 		/*
4277 		 * Suspend all importers with same <node, key> pair.
4278 		 * After the last one of the shared importers has been
4279 		 * suspended - suspend the shared mappings/connection.
4280 		 */
4281 		for (; p; p = p->rsmrc_next) {
4282 			rsmseg_t *first = (rsmseg_t *)p;
4283 			if ((first->s_node != src_node) ||
4284 			    (first->s_state == RSM_STATE_DISCONNECT))
4285 				continue; /* go to next entry */
4286 			/*
4287 			 * search the rest of the bucket for
4288 			 * other siblings (imprtrs with the same key)
4289 			 * of "first" and suspend them.
4290 			 * All importers with same key fall in
4291 			 * the same bucket.
4292 			 */
4293 			num_importers = 0;
4294 			for (curp = p; curp; curp = curp->rsmrc_next) {
4295 				seg = (rsmseg_t *)curp;
4296 
4297 				rsmseglock_acquire(seg);
4298 
4299 				if ((seg->s_node != first->s_node) ||
4300 				    (seg->s_key != first->s_key) ||
4301 				    (seg->s_state == RSM_STATE_DISCONNECT)) {
4302 					/*
4303 					 * either not a peer segment or its a
4304 					 * disconnected segment - skip it
4305 					 */
4306 					rsmseglock_release(seg);
4307 					continue;
4308 				}
4309 
4310 				rsmseg_suspend(seg, &susp_flg);
4311 
4312 				if (susp_flg) { /* seg already suspended */
4313 					rsmseglock_release(seg);
4314 					break; /* the inner for loop */
4315 				}
4316 
4317 				num_importers++;
4318 				rsmsharelock_acquire(seg);
4319 				/*
4320 				 * we've processed all importers that are
4321 				 * siblings of "first"
4322 				 */
4323 				if (num_importers ==
4324 				    seg->s_share->rsmsi_refcnt) {
4325 					rsmsharelock_release(seg);
4326 					rsmseglock_release(seg);
4327 					break;
4328 				}
4329 				rsmsharelock_release(seg);
4330 				rsmseglock_release(seg);
4331 			}
4332 
4333 			/*
4334 			 * All the importers with the same key and
4335 			 * nodeid as "first" have been suspended.
4336 			 * Now suspend the shared connect/mapping.
4337 			 * This is done only once.
4338 			 */
4339 			if (!susp_flg) {
4340 				rsmsegshare_suspend(seg);
4341 			}
4342 		}
4343 	}
4344 
4345 	rw_exit(&rhash->rsmhash_rw);
4346 
4347 	/* send an ACK for SUSPEND message */
4348 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4349 	(void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4350 
4351 
4352 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4353 
4354 }
4355 
4356 static void
4357 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4358 {
4359 	int		recheck_state;
4360 	rsmcookie_t	*hdl;
4361 	DBG_DEFINE(category,
4362 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4363 
4364 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4365 	    "rsmseg_suspend enter: key=%u\n", seg->s_key));
4366 
4367 	*susp_flg = 0;
4368 
4369 	ASSERT(rsmseglock_held(seg));
4370 	/* wait if putv/getv is in progress */
4371 	while (seg->s_rdmacnt > 0)
4372 		cv_wait(&seg->s_cv, &seg->s_lock);
4373 
4374 	do {
4375 		recheck_state = 0;
4376 
4377 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4378 		    "rsmseg_suspend:segment %x state=%d\n",
4379 		    seg->s_key, seg->s_state));
4380 
4381 		switch (seg->s_state) {
4382 		case RSM_STATE_NEW:
4383 			/* not a valid state */
4384 			break;
4385 		case RSM_STATE_CONNECTING:
4386 			seg->s_state = RSM_STATE_ABORT_CONNECT;
4387 			break;
4388 		case RSM_STATE_ABORT_CONNECT:
4389 			break;
4390 		case RSM_STATE_CONNECT:
4391 			seg->s_handle.in = NULL;
4392 			seg->s_state = RSM_STATE_CONN_QUIESCE;
4393 			break;
4394 		case RSM_STATE_MAPPING:
4395 			/* wait until segment leaves the mapping state */
4396 			while (seg->s_state == RSM_STATE_MAPPING)
4397 				cv_wait(&seg->s_cv, &seg->s_lock);
4398 			recheck_state = 1;
4399 			break;
4400 		case RSM_STATE_ACTIVE:
4401 			/* unload the mappings */
4402 			if (seg->s_ckl != NULL) {
4403 				hdl = seg->s_ckl;
4404 				for (; hdl != NULL; hdl = hdl->c_next) {
4405 					(void) devmap_unload(hdl->c_dhp,
4406 						    hdl->c_off, hdl->c_len);
4407 				}
4408 			}
4409 			seg->s_mapinfo = NULL;
4410 			seg->s_state = RSM_STATE_MAP_QUIESCE;
4411 			break;
4412 		case RSM_STATE_CONN_QUIESCE:
4413 			/* FALLTHRU */
4414 		case RSM_STATE_MAP_QUIESCE:
4415 			/* rsmseg_suspend already done for seg */
4416 			*susp_flg = 1;
4417 			break;
4418 		case RSM_STATE_DISCONNECT:
4419 			break;
4420 		default:
4421 			ASSERT(0); /* invalid state */
4422 		}
4423 	} while (recheck_state);
4424 
4425 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4426 }
4427 
4428 static void
4429 rsmsegshare_suspend(rsmseg_t *seg)
4430 {
4431 	int			e;
4432 	adapter_t		*adapter;
4433 	rsm_import_share_t	*sharedp;
4434 	DBG_DEFINE(category,
4435 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4436 
4437 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4438 	    "rsmsegshare_suspend enter\n"));
4439 
4440 	rsmseglock_acquire(seg);
4441 	rsmsharelock_acquire(seg);
4442 
4443 	sharedp = seg->s_share;
4444 	adapter = seg->s_adapter;
4445 	switch (sharedp->rsmsi_state) {
4446 	case RSMSI_STATE_NEW:
4447 		break;
4448 	case RSMSI_STATE_CONNECTING:
4449 		sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4450 		break;
4451 	case RSMSI_STATE_ABORT_CONNECT:
4452 		break;
4453 	case RSMSI_STATE_CONNECTED:
4454 		/* do the rsmpi disconnect */
4455 		if (sharedp->rsmsi_node != my_nodeid) {
4456 			e = adapter->rsmpi_ops->
4457 			    rsm_disconnect(sharedp->rsmsi_handle);
4458 
4459 			DBG_PRINTF((category, RSM_DEBUG,
4460 			    "rsm:rsmpi disconnect seg=%x:err=%d\n",
4461 			    sharedp->rsmsi_segid, e));
4462 		}
4463 
4464 		sharedp->rsmsi_handle = NULL;
4465 
4466 		sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4467 		break;
4468 	case RSMSI_STATE_CONN_QUIESCE:
4469 		break;
4470 	case RSMSI_STATE_MAPPED:
4471 		/* do the rsmpi unmap and disconnect */
4472 		if (sharedp->rsmsi_node != my_nodeid) {
4473 			e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4474 
4475 			DBG_PRINTF((category, RSM_DEBUG,
4476 			    "rsmshare_suspend: rsmpi unmap %d\n", e));
4477 
4478 			e = adapter->rsmpi_ops->
4479 			    rsm_disconnect(sharedp->rsmsi_handle);
4480 			DBG_PRINTF((category, RSM_DEBUG,
4481 			    "rsm:rsmpi disconnect seg=%x:err=%d\n",
4482 			    sharedp->rsmsi_segid, e));
4483 		}
4484 
4485 		sharedp->rsmsi_handle = NULL;
4486 
4487 		sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4488 		break;
4489 	case RSMSI_STATE_MAP_QUIESCE:
4490 		break;
4491 	case RSMSI_STATE_DISCONNECTED:
4492 		break;
4493 	default:
4494 		ASSERT(0); /* invalid state */
4495 	}
4496 
4497 	rsmsharelock_release(seg);
4498 	rsmseglock_release(seg);
4499 
4500 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4501 	    "rsmsegshare_suspend done\n"));
4502 }
4503 
4504 /*
4505  * This should get called on receiving a RESUME message or from
4506  * the pathmanger if the node undergoing DR dies.
4507  */
4508 static void
4509 importer_resume(rsm_node_id_t src_node)
4510 {
4511 	int		i;
4512 	rsmresource_t	*p = NULL;
4513 	rsmhash_table_t *rhash = &rsm_import_segs;
4514 	void		*cookie;
4515 	DBG_DEFINE(category,
4516 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4517 
4518 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4519 
4520 	rw_enter(&rhash->rsmhash_rw, RW_READER);
4521 
4522 	for (i = 0; i < rsm_hash_size; i++) {
4523 		p = rhash->bucket[i];
4524 
4525 		for (; p; p = p->rsmrc_next) {
4526 			rsmseg_t *seg = (rsmseg_t *)p;
4527 
4528 			rsmseglock_acquire(seg);
4529 
4530 			/* process only importers of node undergoing DR */
4531 			if (seg->s_node != src_node) {
4532 				rsmseglock_release(seg);
4533 				continue;
4534 			}
4535 
4536 			if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4537 				rsmipc_request_t	request;
4538 				/*
4539 				 * rsmpi map/connect failed
4540 				 * inform the exporter so that it can
4541 				 * remove the importer.
4542 				 */
4543 				request.rsmipc_hdr.rsmipc_type =
4544 				    RSMIPC_MSG_NOTIMPORTING;
4545 				request.rsmipc_key = seg->s_segid;
4546 				request.rsmipc_segment_cookie = cookie;
4547 				rsmseglock_release(seg);
4548 				(void) rsmipc_send(seg->s_node, &request,
4549 					    RSM_NO_REPLY);
4550 			} else {
4551 				rsmseglock_release(seg);
4552 			}
4553 		}
4554 	}
4555 
4556 	rw_exit(&rhash->rsmhash_rw);
4557 
4558 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4559 }
4560 
4561 static int
4562 rsmseg_resume(rsmseg_t *seg, void **cookie)
4563 {
4564 	int			e;
4565 	int			retc;
4566 	off_t			dev_offset;
4567 	size_t			maplen;
4568 	uint_t			maxprot;
4569 	rsm_mapinfo_t		*p;
4570 	rsmcookie_t		*hdl;
4571 	rsm_import_share_t	*sharedp;
4572 	DBG_DEFINE(category,
4573 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4574 
4575 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4576 	    "rsmseg_resume enter: key=%u\n", seg->s_key));
4577 
4578 	*cookie = NULL;
4579 
4580 	ASSERT(rsmseglock_held(seg));
4581 
4582 	if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4583 	    (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4584 		return (RSM_SUCCESS);
4585 	}
4586 
4587 	sharedp = seg->s_share;
4588 
4589 	rsmsharelock_acquire(seg);
4590 
4591 	/* resume the shared connection and/or mapping */
4592 	retc = rsmsegshare_resume(seg);
4593 
4594 	if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4595 		/* shared state can either be connected or mapped */
4596 		if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4597 		    (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4598 			ASSERT(retc == RSM_SUCCESS);
4599 			seg->s_handle.in = sharedp->rsmsi_handle;
4600 			rsmsharelock_release(seg);
4601 			seg->s_state = RSM_STATE_CONNECT;
4602 
4603 		} else { /* error in rsmpi connect during resume */
4604 			seg->s_handle.in = NULL;
4605 			seg->s_state = RSM_STATE_DISCONNECT;
4606 
4607 			sharedp->rsmsi_refcnt--;
4608 			cookie = (void *)sharedp->rsmsi_cookie;
4609 
4610 			if (sharedp->rsmsi_refcnt == 0) {
4611 				ASSERT(sharedp->rsmsi_mapcnt == 0);
4612 				rsmsharelock_release(seg);
4613 
4614 				/* clean up the shared data structure */
4615 				mutex_destroy(&sharedp->rsmsi_lock);
4616 				cv_destroy(&sharedp->rsmsi_cv);
4617 				kmem_free((void *)(sharedp),
4618 				    sizeof (rsm_import_share_t));
4619 
4620 			} else {
4621 				rsmsharelock_release(seg);
4622 			}
4623 			/*
4624 			 * The following needs to be done after any
4625 			 * rsmsharelock calls which use seg->s_share.
4626 			 */
4627 			seg->s_share = NULL;
4628 		}
4629 
4630 		/* signal any waiting segment */
4631 		cv_broadcast(&seg->s_cv);
4632 
4633 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4634 		    "rsmseg_resume done:state=%d\n", seg->s_state));
4635 		return (retc);
4636 	}
4637 
4638 	ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4639 
4640 	/* Setup protections for remap */
4641 	maxprot = PROT_USER;
4642 	if (seg->s_mode & RSM_PERM_READ) {
4643 		maxprot |= PROT_READ;
4644 	}
4645 	if (seg->s_mode & RSM_PERM_WRITE) {
4646 		maxprot |= PROT_WRITE;
4647 	}
4648 
4649 	if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4650 		/* error in rsmpi connect or map during resume */
4651 
4652 		/* remap to trash page */
4653 		ASSERT(seg->s_ckl != NULL);
4654 
4655 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4656 			e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4657 			    remap_cookie, hdl->c_off, hdl->c_len,
4658 			    maxprot, 0, NULL);
4659 
4660 			DBG_PRINTF((category, RSM_ERR,
4661 			    "rsmseg_resume:remap=%d\n", e));
4662 		}
4663 
4664 		seg->s_handle.in = NULL;
4665 		seg->s_state = RSM_STATE_DISCONNECT;
4666 
4667 		sharedp->rsmsi_refcnt--;
4668 
4669 		sharedp->rsmsi_mapcnt--;
4670 		seg->s_mapinfo = NULL;
4671 
4672 		if (sharedp->rsmsi_refcnt == 0) {
4673 			ASSERT(sharedp->rsmsi_mapcnt == 0);
4674 			rsmsharelock_release(seg);
4675 
4676 			/* clean up the shared data structure */
4677 			mutex_destroy(&sharedp->rsmsi_lock);
4678 			cv_destroy(&sharedp->rsmsi_cv);
4679 			kmem_free((void *)(sharedp),
4680 			    sizeof (rsm_import_share_t));
4681 
4682 		} else {
4683 			rsmsharelock_release(seg);
4684 		}
4685 		/*
4686 		 * The following needs to be done after any
4687 		 * rsmsharelock calls which use seg->s_share.
4688 		 */
4689 		seg->s_share = NULL;
4690 
4691 		/* signal any waiting segment */
4692 		cv_broadcast(&seg->s_cv);
4693 
4694 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4695 		    "rsmseg_resume done:seg=%x,err=%d\n",
4696 		    seg->s_key, retc));
4697 		return (retc);
4698 
4699 	}
4700 
4701 	seg->s_handle.in = sharedp->rsmsi_handle;
4702 
4703 	if (seg->s_node == my_nodeid) { /* loopback */
4704 		ASSERT(seg->s_mapinfo == NULL);
4705 
4706 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4707 			e = devmap_umem_remap(hdl->c_dhp,
4708 			    rsm_dip, seg->s_cookie,
4709 			    hdl->c_off, hdl->c_len,
4710 			    maxprot, 0, NULL);
4711 
4712 			DBG_PRINTF((category, RSM_ERR,
4713 			    "rsmseg_resume:remap=%d\n", e));
4714 		}
4715 	} else { /* remote exporter */
4716 		/* remap to the new rsmpi maps */
4717 		seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4718 
4719 		for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4720 			p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4721 			    &dev_offset, &maplen);
4722 			e = devmap_devmem_remap(hdl->c_dhp,
4723 			    p->dip, p->dev_register, dev_offset,
4724 			    maplen, maxprot, 0, NULL);
4725 
4726 			DBG_PRINTF((category, RSM_ERR,
4727 			    "rsmseg_resume:remap=%d\n", e));
4728 		}
4729 	}
4730 
4731 	rsmsharelock_release(seg);
4732 
4733 	seg->s_state = RSM_STATE_ACTIVE;
4734 	cv_broadcast(&seg->s_cv);
4735 
4736 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4737 
4738 	return (retc);
4739 }
4740 
4741 static int
4742 rsmsegshare_resume(rsmseg_t *seg)
4743 {
4744 	int			e = RSM_SUCCESS;
4745 	adapter_t		*adapter;
4746 	rsm_import_share_t	*sharedp;
4747 	DBG_DEFINE(category,
4748 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4749 
4750 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4751 
4752 	ASSERT(rsmseglock_held(seg));
4753 	ASSERT(rsmsharelock_held(seg));
4754 
4755 	sharedp = seg->s_share;
4756 
4757 	/*
4758 	 * If we are not in a xxxx_QUIESCE state that means shared
4759 	 * connect/mapping processing has been already been done
4760 	 * so return success.
4761 	 */
4762 	if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4763 	    (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4764 		return (RSM_SUCCESS);
4765 	}
4766 
4767 	adapter = seg->s_adapter;
4768 
4769 	if (sharedp->rsmsi_node != my_nodeid) {
4770 		rsm_addr_t	hwaddr;
4771 		hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4772 
4773 		e = adapter->rsmpi_ops->rsm_connect(
4774 		    adapter->rsmpi_handle, hwaddr,
4775 		    sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4776 
4777 		DBG_PRINTF((category, RSM_DEBUG,
4778 		    "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4779 		    sharedp->rsmsi_segid, e));
4780 
4781 		if (e != RSM_SUCCESS) {
4782 			/* when do we send the NOT_IMPORTING message */
4783 			sharedp->rsmsi_handle = NULL;
4784 			sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4785 			/* signal any waiting segment */
4786 			cv_broadcast(&sharedp->rsmsi_cv);
4787 			return (e);
4788 		}
4789 	}
4790 
4791 	if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4792 		sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4793 		/* signal any waiting segment */
4794 		cv_broadcast(&sharedp->rsmsi_cv);
4795 		return (e);
4796 	}
4797 
4798 	ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4799 
4800 	/* do the rsmpi map of the whole segment here */
4801 	if (sharedp->rsmsi_node != my_nodeid) {
4802 		size_t mapped_len;
4803 		rsm_mapinfo_t *p;
4804 
4805 		/*
4806 		 * We need to do rsmpi maps with <off, lens> identical to
4807 		 * the old mapinfo list because the segment mapping handles
4808 		 * dhp and such need the fragmentation of rsmpi maps to be
4809 		 * identical to what it was during the mmap of the segment
4810 		 */
4811 		p = sharedp->rsmsi_mapinfo;
4812 
4813 		while (p != NULL) {
4814 			mapped_len = 0;
4815 
4816 			e = adapter->rsmpi_ops->rsm_map(
4817 			    sharedp->rsmsi_handle, p->start_offset,
4818 			    p->individual_len, &mapped_len,
4819 			    &p->dip, &p->dev_register, &p->dev_offset,
4820 			    NULL, NULL);
4821 
4822 			if (e != 0) {
4823 				DBG_PRINTF((category, RSM_ERR,
4824 				    "rsmsegshare_resume: rsmpi map err=%d\n",
4825 				    e));
4826 				break;
4827 			}
4828 
4829 			if (mapped_len != p->individual_len) {
4830 				DBG_PRINTF((category, RSM_ERR,
4831 				    "rsmsegshare_resume: rsmpi maplen"
4832 				    "< reqlen=%lx\n", mapped_len));
4833 				e = RSMERR_BAD_LENGTH;
4834 				break;
4835 			}
4836 
4837 			p = p->next;
4838 
4839 		}
4840 
4841 
4842 		if (e != RSM_SUCCESS) { /* rsmpi map failed */
4843 			int	err;
4844 			/* Check if this is the first rsm_map */
4845 			if (p != sharedp->rsmsi_mapinfo) {
4846 				/*
4847 				 * A single rsm_unmap undoes multiple rsm_maps.
4848 				 */
4849 				(void) seg->s_adapter->rsmpi_ops->
4850 				    rsm_unmap(sharedp->rsmsi_handle);
4851 			}
4852 
4853 			rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4854 			sharedp->rsmsi_mapinfo = NULL;
4855 
4856 			err = adapter->rsmpi_ops->
4857 				    rsm_disconnect(sharedp->rsmsi_handle);
4858 
4859 			DBG_PRINTF((category, RSM_DEBUG,
4860 			    "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4861 			    sharedp->rsmsi_segid, err));
4862 
4863 			sharedp->rsmsi_handle = NULL;
4864 			sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4865 
4866 			/* signal the waiting segments */
4867 			cv_broadcast(&sharedp->rsmsi_cv);
4868 			DBG_PRINTF((category, RSM_DEBUG,
4869 			    "rsmsegshare_resume done: rsmpi map err\n"));
4870 			return (e);
4871 		}
4872 	}
4873 
4874 	sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4875 
4876 	/* signal any waiting segment */
4877 	cv_broadcast(&sharedp->rsmsi_cv);
4878 
4879 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4880 
4881 	return (e);
4882 }
4883 
4884 /*
4885  * this is the routine that gets called by recv_taskq which is the
4886  * thread that processes messages that are flow-controlled.
4887  */
4888 static void
4889 rsm_intr_proc_deferred(void *arg)
4890 {
4891 	path_t			*path = (path_t *)arg;
4892 	rsmipc_request_t	*msg;
4893 	rsmipc_msghdr_t		*msghdr;
4894 	rsm_node_id_t		src_node;
4895 	msgbuf_elem_t		*head;
4896 	int			e;
4897 	DBG_DEFINE(category,
4898 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4899 
4900 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4901 	    "rsm_intr_proc_deferred enter\n"));
4902 
4903 	mutex_enter(&path->mutex);
4904 
4905 	/* use the head of the msgbuf_queue */
4906 	head = rsmka_gethead_msgbuf(path);
4907 
4908 	mutex_exit(&path->mutex);
4909 
4910 	msg = (rsmipc_request_t *)&(head->msg);
4911 	msghdr = (rsmipc_msghdr_t *)msg;
4912 
4913 	src_node = msghdr->rsmipc_src;
4914 
4915 	/*
4916 	 * messages that need to send a reply should check the message version
4917 	 * before processing the message. And all messages that need to
4918 	 * send a reply should be processed here by the worker thread.
4919 	 */
4920 	switch (msghdr->rsmipc_type) {
4921 	case RSMIPC_MSG_SEGCONNECT:
4922 		if (msghdr->rsmipc_version != RSM_VERSION) {
4923 			rsmipc_reply_t reply;
4924 			reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4925 			reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4926 			reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4927 			(void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4928 		} else {
4929 			rsm_intr_segconnect(src_node, msg);
4930 		}
4931 		break;
4932 	case RSMIPC_MSG_DISCONNECT:
4933 		rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4934 		break;
4935 	case RSMIPC_MSG_SUSPEND:
4936 		importer_suspend(src_node);
4937 		break;
4938 	case RSMIPC_MSG_SUSPEND_DONE:
4939 		rsm_suspend_complete(src_node, 0);
4940 		break;
4941 	case RSMIPC_MSG_RESUME:
4942 		importer_resume(src_node);
4943 		break;
4944 	default:
4945 		ASSERT(0);
4946 	}
4947 
4948 	mutex_enter(&path->mutex);
4949 
4950 	rsmka_dequeue_msgbuf(path);
4951 
4952 	/* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4953 	if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4954 		path->procmsg_cnt++;
4955 
4956 	ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4957 
4958 	/* No need to send credits if path is going down */
4959 	if ((path->state == RSMKA_PATH_ACTIVE) &&
4960 	    (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4961 		/*
4962 		 * send credits and reset procmsg_cnt if success otherwise
4963 		 * credits will be sent after processing the next message
4964 		 */
4965 		e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4966 		if (e == 0)
4967 			path->procmsg_cnt = 0;
4968 		else
4969 			DBG_PRINTF((category, RSM_ERR,
4970 			    "rsm_intr_proc_deferred:send credits err=%d\n", e));
4971 	}
4972 
4973 	/*
4974 	 * decrement the path refcnt since we incremented it in
4975 	 * rsm_intr_callback_dispatch
4976 	 */
4977 	PATH_RELE_NOLOCK(path);
4978 
4979 	mutex_exit(&path->mutex);
4980 
4981 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4982 	    "rsm_intr_proc_deferred done\n"));
4983 }
4984 
4985 /*
4986  * Flow-controlled messages are enqueued and dispatched onto a taskq here
4987  */
4988 static void
4989 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4990     rsm_intr_hand_arg_t arg)
4991 {
4992 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
4993 	path_t			*path;
4994 	rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4995 	DBG_DEFINE(category,
4996 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4997 
4998 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4999 	    "rsm_intr_callback_dispatch enter\n"));
5000 	ASSERT(data && hdlr_argp);
5001 
5002 	/* look up the path - incr the path refcnt */
5003 	path = rsm_find_path(hdlr_argp->adapter_name,
5004 	    hdlr_argp->adapter_instance, src_hwaddr);
5005 
5006 	/* the path has been removed - drop this message */
5007 	if (path == NULL) {
5008 		DBG_PRINTF((category, RSM_DEBUG,
5009 		    "rsm_intr_callback_dispatch done: msg dropped\n"));
5010 		return;
5011 	}
5012 	/* the path is not active - don't accept new messages */
5013 	if (path->state != RSMKA_PATH_ACTIVE) {
5014 		PATH_RELE_NOLOCK(path);
5015 		mutex_exit(&path->mutex);
5016 		DBG_PRINTF((category, RSM_DEBUG,
5017 		    "rsm_intr_callback_dispatch done: msg dropped"
5018 		    " path=%lx !ACTIVE\n", path));
5019 		return;
5020 	}
5021 
5022 	/*
5023 	 * Check if this message was sent to an older incarnation
5024 	 * of the path/sendq.
5025 	 */
5026 	if (path->local_incn != msghdr->rsmipc_incn) {
5027 		/* decrement the refcnt */
5028 		PATH_RELE_NOLOCK(path);
5029 		mutex_exit(&path->mutex);
5030 		DBG_PRINTF((category, RSM_DEBUG,
5031 		    "rsm_intr_callback_dispatch done: old incn %lld\n",
5032 		    msghdr->rsmipc_incn));
5033 		return;
5034 	}
5035 
5036 	/* copy and enqueue msg on the path's msgbuf queue */
5037 	rsmka_enqueue_msgbuf(path, data);
5038 
5039 	/*
5040 	 * schedule task to process messages - ignore retval from
5041 	 * task_dispatch because we sender cannot send more than
5042 	 * what receiver can handle.
5043 	 */
5044 	(void) taskq_dispatch(path->recv_taskq,
5045 	    rsm_intr_proc_deferred, path, KM_NOSLEEP);
5046 
5047 	mutex_exit(&path->mutex);
5048 
5049 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5050 	    "rsm_intr_callback_dispatch done\n"));
5051 }
5052 
5053 /*
5054  * This procedure is called from rsm_srv_func when a remote node creates a
5055  * a send queue.  This event is used as a hint that an  earlier failed
5056  * attempt to create a send queue to that remote node may now succeed and
5057  * should be retried.  Indication of an earlier failed attempt is provided
5058  * by the RSMKA_SQCREATE_PENDING flag.
5059  */
5060 static void
5061 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5062 {
5063 	srv_handler_arg_t	*hdlr_argp = (srv_handler_arg_t *)arg;
5064 	path_t			*path;
5065 	DBG_DEFINE(category,
5066 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5067 
5068 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5069 	    "rsm_sqcreateop_callback enter\n"));
5070 
5071 	/* look up the path - incr the path refcnt */
5072 	path = rsm_find_path(hdlr_argp->adapter_name,
5073 	    hdlr_argp->adapter_instance, src_hwaddr);
5074 
5075 	if (path == NULL) {
5076 		DBG_PRINTF((category, RSM_DEBUG,
5077 		    "rsm_sqcreateop_callback done: no path\n"));
5078 		return;
5079 	}
5080 
5081 	if ((path->state == RSMKA_PATH_UP) &&
5082 	    (path->flags & RSMKA_SQCREATE_PENDING)) {
5083 		/*
5084 		 * previous attempt to create sendq had failed, retry
5085 		 * it and move to RSMKA_PATH_ACTIVE state if successful.
5086 		 * the refcnt will be decremented in the do_deferred_work
5087 		 */
5088 		(void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5089 	} else {
5090 		/* decrement the refcnt */
5091 		PATH_RELE_NOLOCK(path);
5092 	}
5093 	mutex_exit(&path->mutex);
5094 
5095 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5096 	    "rsm_sqcreateop_callback done\n"));
5097 }
5098 
5099 static void
5100 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5101 {
5102 	rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5103 	rsmipc_request_t *msg = (rsmipc_request_t *)data;
5104 	rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5105 	rsm_node_id_t src_node;
5106 	DBG_DEFINE(category,
5107 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5108 
5109 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5110 	    "src=%d, type=%d\n", msghdr->rsmipc_src,
5111 	    msghdr->rsmipc_type));
5112 
5113 	/*
5114 	 * Check for the version number in the msg header. If it is not
5115 	 * RSM_VERSION, drop the message. In the future, we need to manage
5116 	 * incompatible version numbers in some way
5117 	 */
5118 	if (msghdr->rsmipc_version != RSM_VERSION) {
5119 		DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5120 		/*
5121 		 * Drop requests that don't have a reply right here
5122 		 * Request with reply will send a BAD_VERSION reply
5123 		 * when they get processed by the worker thread.
5124 		 */
5125 		if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5126 			return;
5127 		}
5128 
5129 	}
5130 
5131 	src_node = msghdr->rsmipc_src;
5132 
5133 	switch (msghdr->rsmipc_type) {
5134 	case RSMIPC_MSG_SEGCONNECT:
5135 	case RSMIPC_MSG_DISCONNECT:
5136 	case RSMIPC_MSG_SUSPEND:
5137 	case RSMIPC_MSG_SUSPEND_DONE:
5138 	case RSMIPC_MSG_RESUME:
5139 		/*
5140 		 * These message types are handled by a worker thread using
5141 		 * the flow-control algorithm.
5142 		 * Any message processing that does one or more of the
5143 		 * following should be handled in a worker thread.
5144 		 *	- allocates resources and might sleep
5145 		 *	- makes RSMPI calls down to the interconnect driver
5146 		 *	this by defn include requests with reply.
5147 		 *	- takes a long duration of time
5148 		 */
5149 		rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5150 		break;
5151 	case RSMIPC_MSG_NOTIMPORTING:
5152 		importer_list_rm(src_node, msg->rsmipc_key,
5153 		    msg->rsmipc_segment_cookie);
5154 		break;
5155 	case RSMIPC_MSG_SQREADY:
5156 		rsm_proc_sqready(data, src_hwaddr, arg);
5157 		break;
5158 	case RSMIPC_MSG_SQREADY_ACK:
5159 		rsm_proc_sqready_ack(data, src_hwaddr, arg);
5160 		break;
5161 	case RSMIPC_MSG_CREDIT:
5162 		rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5163 		break;
5164 	case RSMIPC_MSG_REPLY:
5165 		rsm_intr_reply(msghdr);
5166 		break;
5167 	case RSMIPC_MSG_BELL:
5168 		rsm_intr_event(msg);
5169 		break;
5170 	case RSMIPC_MSG_IMPORTING:
5171 		importer_list_add(src_node, msg->rsmipc_key,
5172 		    msg->rsmipc_adapter_hwaddr,
5173 		    msg->rsmipc_segment_cookie);
5174 		break;
5175 	case RSMIPC_MSG_REPUBLISH:
5176 		importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5177 		break;
5178 	default:
5179 		DBG_PRINTF((category, RSM_DEBUG,
5180 		    "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5181 		    (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5182 	}
5183 
5184 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5185 
5186 }
5187 
5188 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5189     rsm_intr_q_op_t opcode, rsm_addr_t src,
5190     void *data, size_t size, rsm_intr_hand_arg_t arg)
5191 {
5192 	DBG_DEFINE(category,
5193 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5194 
5195 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5196 
5197 	switch (opcode) {
5198 	case RSM_INTR_Q_OP_CREATE:
5199 		DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5200 		rsm_sqcreateop_callback(src, arg);
5201 		break;
5202 	case RSM_INTR_Q_OP_DESTROY:
5203 		DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5204 		break;
5205 	case RSM_INTR_Q_OP_RECEIVE:
5206 		rsm_intr_callback(data, src, arg);
5207 		break;
5208 	default:
5209 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5210 		    "rsm_srv_func: unknown opcode = %x\n", opcode));
5211 	}
5212 
5213 	chd = chd;
5214 	size = size;
5215 
5216 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5217 
5218 	return (RSM_INTR_HAND_CLAIMED);
5219 }
5220 
5221 /* *************************** IPC slots ************************* */
5222 static rsmipc_slot_t *
5223 rsmipc_alloc()
5224 {
5225 	int i;
5226 	rsmipc_slot_t *slot;
5227 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5228 
5229 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5230 
5231 	/* try to find a free slot, if not wait */
5232 	mutex_enter(&rsm_ipc.lock);
5233 
5234 	while (rsm_ipc.count == 0) {
5235 		rsm_ipc.wanted = 1;
5236 		cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5237 	}
5238 
5239 	/* An empty slot is available, find it */
5240 	slot = &rsm_ipc.slots[0];
5241 	for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5242 		if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5243 			RSMIPC_CLEAR(slot, RSMIPC_FREE);
5244 			break;
5245 		}
5246 	}
5247 
5248 	ASSERT(i < RSMIPC_SZ);
5249 	rsm_ipc.count--;	/* one less is available */
5250 	rsm_ipc.sequence++; /* new sequence */
5251 
5252 	slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5253 	slot->rsmipc_cookie.ic.index = (uint_t)i;
5254 
5255 	mutex_exit(&rsm_ipc.lock);
5256 
5257 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5258 
5259 	return (slot);
5260 }
5261 
5262 static void
5263 rsmipc_free(rsmipc_slot_t *slot)
5264 {
5265 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5266 
5267 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5268 
5269 	ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5270 	ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5271 
5272 	mutex_enter(&rsm_ipc.lock);
5273 
5274 	RSMIPC_SET(slot, RSMIPC_FREE);
5275 
5276 	slot->rsmipc_cookie.ic.sequence = 0;
5277 
5278 	mutex_exit(&slot->rsmipc_lock);
5279 	rsm_ipc.count++;
5280 	ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5281 	if (rsm_ipc.wanted) {
5282 		rsm_ipc.wanted = 0;
5283 		cv_broadcast(&rsm_ipc.cv);
5284 	}
5285 
5286 	mutex_exit(&rsm_ipc.lock);
5287 
5288 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5289 }
5290 
5291 static int
5292 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5293 {
5294 	int		e = 0;
5295 	int		credit_check = 0;
5296 	int		retry_cnt = 0;
5297 	int		min_retry_cnt = 10;
5298 	clock_t		ticks;
5299 	rsm_send_t	is;
5300 	rsmipc_slot_t	*rslot;
5301 	adapter_t	*adapter;
5302 	path_t		*path;
5303 	sendq_token_t	*sendq_token;
5304 	sendq_token_t	*used_sendq_token = NULL;
5305 	rsm_send_q_handle_t	ipc_handle;
5306 	DBG_DEFINE(category,
5307 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5308 
5309 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5310 	    dest));
5311 
5312 	/*
5313 	 * Check if this is a local case
5314 	 */
5315 	if (dest == my_nodeid) {
5316 		switch (req->rsmipc_hdr.rsmipc_type) {
5317 		case RSMIPC_MSG_SEGCONNECT:
5318 			reply->rsmipc_status = (short)rsmsegacl_validate(
5319 							    req, dest, reply);
5320 			break;
5321 		case RSMIPC_MSG_BELL:
5322 			req->rsmipc_hdr.rsmipc_src = dest;
5323 			rsm_intr_event(req);
5324 			break;
5325 		case RSMIPC_MSG_IMPORTING:
5326 			importer_list_add(dest, req->rsmipc_key,
5327 			    req->rsmipc_adapter_hwaddr,
5328 			    req->rsmipc_segment_cookie);
5329 			break;
5330 		case RSMIPC_MSG_NOTIMPORTING:
5331 			importer_list_rm(dest, req->rsmipc_key,
5332 			    req->rsmipc_segment_cookie);
5333 			break;
5334 		case RSMIPC_MSG_REPUBLISH:
5335 			importer_update(dest, req->rsmipc_key,
5336 			    req->rsmipc_perm);
5337 			break;
5338 		case RSMIPC_MSG_SUSPEND:
5339 			importer_suspend(dest);
5340 			break;
5341 		case RSMIPC_MSG_SUSPEND_DONE:
5342 			rsm_suspend_complete(dest, 0);
5343 			break;
5344 		case RSMIPC_MSG_RESUME:
5345 			importer_resume(dest);
5346 			break;
5347 		default:
5348 			ASSERT(0);
5349 		}
5350 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5351 		    "rsmipc_send done\n"));
5352 		return (0);
5353 	}
5354 
5355 	if (dest >= MAX_NODES) {
5356 		DBG_PRINTF((category, RSM_ERR,
5357 		    "rsm: rsmipc_send bad node number %x\n", dest));
5358 		return (RSMERR_REMOTE_NODE_UNREACHABLE);
5359 	}
5360 
5361 	/*
5362 	 * Oh boy! we are going remote.
5363 	 */
5364 
5365 	/*
5366 	 * identify if we need to have credits to send this message
5367 	 * - only selected requests are flow controlled
5368 	 */
5369 	if (req != NULL) {
5370 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5371 		    "rsmipc_send:request type=%d\n",
5372 		    req->rsmipc_hdr.rsmipc_type));
5373 
5374 		switch (req->rsmipc_hdr.rsmipc_type) {
5375 		case RSMIPC_MSG_SEGCONNECT:
5376 		case RSMIPC_MSG_DISCONNECT:
5377 		case RSMIPC_MSG_IMPORTING:
5378 		case RSMIPC_MSG_SUSPEND:
5379 		case RSMIPC_MSG_SUSPEND_DONE:
5380 		case RSMIPC_MSG_RESUME:
5381 			credit_check = 1;
5382 			break;
5383 		default:
5384 			credit_check = 0;
5385 		}
5386 	}
5387 
5388 again:
5389 	if (retry_cnt++ == min_retry_cnt) {
5390 		/* backoff before further retries for 10ms */
5391 		delay(drv_usectohz(10000));
5392 		retry_cnt = 0; /* reset retry_cnt */
5393 	}
5394 	sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5395 	if (sendq_token == NULL) {
5396 		DBG_PRINTF((category, RSM_ERR,
5397 		    "rsm: rsmipc_send no device to reach node %d\n", dest));
5398 		return (RSMERR_REMOTE_NODE_UNREACHABLE);
5399 	}
5400 
5401 	if ((sendq_token == used_sendq_token) &&
5402 	    ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5403 		(e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5404 		rele_sendq_token(sendq_token);
5405 		DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5406 		return (RSMERR_CONN_ABORTED);
5407 	} else
5408 		used_sendq_token = sendq_token;
5409 
5410 /* lint -save -e413 */
5411 	path = SQ_TOKEN_TO_PATH(sendq_token);
5412 	adapter = path->local_adapter;
5413 /* lint -restore */
5414 	ipc_handle = sendq_token->rsmpi_sendq_handle;
5415 
5416 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5417 	    "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5418 
5419 	if (reply == NULL) {
5420 		/* Send request without ack */
5421 		/*
5422 		 * Set the rsmipc_version number in the msghdr for KA
5423 		 * communication versioning
5424 		 */
5425 		req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5426 		req->rsmipc_hdr.rsmipc_src = my_nodeid;
5427 		/*
5428 		 * remote endpoints incn should match the value in our
5429 		 * path's remote_incn field. No need to grab any lock
5430 		 * since we have refcnted the path in rsmka_get_sendq_token
5431 		 */
5432 		req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5433 
5434 		is.is_data = (void *)req;
5435 		is.is_size = sizeof (*req);
5436 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5437 		is.is_wait = 0;
5438 
5439 		if (credit_check) {
5440 			mutex_enter(&path->mutex);
5441 			/*
5442 			 * wait till we recv credits or path goes down. If path
5443 			 * goes down rsm_send will fail and we handle the error
5444 			 * then
5445 			 */
5446 			while ((sendq_token->msgbuf_avail == 0) &&
5447 			    (path->state == RSMKA_PATH_ACTIVE)) {
5448 				e = cv_wait_sig(&sendq_token->sendq_cv,
5449 				    &path->mutex);
5450 				if (e == 0) {
5451 					mutex_exit(&path->mutex);
5452 					no_reply_cnt++;
5453 					rele_sendq_token(sendq_token);
5454 					DBG_PRINTF((category, RSM_DEBUG,
5455 					    "rsmipc_send done: "
5456 					    "cv_wait INTERRUPTED"));
5457 					return (RSMERR_INTERRUPTED);
5458 				}
5459 			}
5460 
5461 			/*
5462 			 * path is not active retry on another path.
5463 			 */
5464 			if (path->state != RSMKA_PATH_ACTIVE) {
5465 				mutex_exit(&path->mutex);
5466 				rele_sendq_token(sendq_token);
5467 				e = RSMERR_CONN_ABORTED;
5468 				DBG_PRINTF((category, RSM_ERR,
5469 				    "rsm: rsmipc_send: path !ACTIVE"));
5470 				goto again;
5471 			}
5472 
5473 			ASSERT(sendq_token->msgbuf_avail > 0);
5474 
5475 			/*
5476 			 * reserve a msgbuf
5477 			 */
5478 			sendq_token->msgbuf_avail--;
5479 
5480 			mutex_exit(&path->mutex);
5481 
5482 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5483 			    NULL);
5484 
5485 			if (e != RSM_SUCCESS) {
5486 				mutex_enter(&path->mutex);
5487 				/*
5488 				 * release the reserved msgbuf since
5489 				 * the send failed
5490 				 */
5491 				sendq_token->msgbuf_avail++;
5492 				cv_broadcast(&sendq_token->sendq_cv);
5493 				mutex_exit(&path->mutex);
5494 			}
5495 		} else
5496 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5497 			    NULL);
5498 
5499 		no_reply_cnt++;
5500 		rele_sendq_token(sendq_token);
5501 		if (e != RSM_SUCCESS) {
5502 			DBG_PRINTF((category, RSM_ERR,
5503 			    "rsm: rsmipc_send no reply send"
5504 			    " err = %d no reply count = %d\n",
5505 			    e, no_reply_cnt));
5506 			ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5507 			    e != RSMERR_BAD_BARRIER_HNDL);
5508 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5509 			goto again;
5510 		} else {
5511 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5512 			    "rsmipc_send done\n"));
5513 			return (e);
5514 		}
5515 
5516 	}
5517 
5518 	if (req == NULL) {
5519 		/* Send reply - No flow control is done for reply */
5520 		/*
5521 		 * Set the version in the msg header for KA communication
5522 		 * versioning
5523 		 */
5524 		reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5525 		reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5526 		/* incn number is not used for reply msgs currently */
5527 		reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5528 
5529 		is.is_data = (void *)reply;
5530 		is.is_size = sizeof (*reply);
5531 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5532 		is.is_wait = 0;
5533 		e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5534 		rele_sendq_token(sendq_token);
5535 		if (e != RSM_SUCCESS) {
5536 			DBG_PRINTF((category, RSM_ERR,
5537 			    "rsm: rsmipc_send reply send"
5538 			    " err = %d\n", e));
5539 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5540 			goto again;
5541 		} else {
5542 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5543 			    "rsmipc_send done\n"));
5544 			return (e);
5545 		}
5546 	}
5547 
5548 	/* Reply needed */
5549 	rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5550 
5551 	mutex_enter(&rslot->rsmipc_lock);
5552 
5553 	rslot->rsmipc_data = (void *)reply;
5554 	RSMIPC_SET(rslot, RSMIPC_PENDING);
5555 
5556 	while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5557 		/*
5558 		 * Set the rsmipc_version number in the msghdr for KA
5559 		 * communication versioning
5560 		 */
5561 		req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5562 		req->rsmipc_hdr.rsmipc_src = my_nodeid;
5563 		req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5564 		/*
5565 		 * remote endpoints incn should match the value in our
5566 		 * path's remote_incn field. No need to grab any lock
5567 		 * since we have refcnted the path in rsmka_get_sendq_token
5568 		 */
5569 		req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5570 
5571 		is.is_data = (void *)req;
5572 		is.is_size = sizeof (*req);
5573 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5574 		is.is_wait = 0;
5575 		if (credit_check) {
5576 
5577 			mutex_enter(&path->mutex);
5578 			/*
5579 			 * wait till we recv credits or path goes down. If path
5580 			 * goes down rsm_send will fail and we handle the error
5581 			 * then.
5582 			 */
5583 			while ((sendq_token->msgbuf_avail == 0) &&
5584 			    (path->state == RSMKA_PATH_ACTIVE)) {
5585 				e = cv_wait_sig(&sendq_token->sendq_cv,
5586 				    &path->mutex);
5587 				if (e == 0) {
5588 					mutex_exit(&path->mutex);
5589 					RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5590 					rsmipc_free(rslot);
5591 					rele_sendq_token(sendq_token);
5592 					DBG_PRINTF((category, RSM_DEBUG,
5593 					    "rsmipc_send done: "
5594 					    "cv_wait INTERRUPTED"));
5595 					return (RSMERR_INTERRUPTED);
5596 				}
5597 			}
5598 
5599 			/*
5600 			 * path is not active retry on another path.
5601 			 */
5602 			if (path->state != RSMKA_PATH_ACTIVE) {
5603 				mutex_exit(&path->mutex);
5604 				RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5605 				rsmipc_free(rslot);
5606 				rele_sendq_token(sendq_token);
5607 				e = RSMERR_CONN_ABORTED;
5608 				DBG_PRINTF((category, RSM_ERR,
5609 				    "rsm: rsmipc_send: path !ACTIVE"));
5610 				goto again;
5611 			}
5612 
5613 			ASSERT(sendq_token->msgbuf_avail > 0);
5614 
5615 			/*
5616 			 * reserve a msgbuf
5617 			 */
5618 			sendq_token->msgbuf_avail--;
5619 
5620 			mutex_exit(&path->mutex);
5621 
5622 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5623 			    NULL);
5624 
5625 			if (e != RSM_SUCCESS) {
5626 				mutex_enter(&path->mutex);
5627 				/*
5628 				 * release the reserved msgbuf since
5629 				 * the send failed
5630 				 */
5631 				sendq_token->msgbuf_avail++;
5632 				cv_broadcast(&sendq_token->sendq_cv);
5633 				mutex_exit(&path->mutex);
5634 			}
5635 		} else
5636 			e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5637 			    NULL);
5638 
5639 		if (e != RSM_SUCCESS) {
5640 			DBG_PRINTF((category, RSM_ERR,
5641 			    "rsm: rsmipc_send rsmpi send err = %d\n", e));
5642 			RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5643 			rsmipc_free(rslot);
5644 			rele_sendq_token(sendq_token);
5645 			atomic_add_64(&rsm_ipcsend_errcnt, 1);
5646 			goto again;
5647 		}
5648 
5649 		/* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5650 		(void) drv_getparm(LBOLT, &ticks);
5651 		ticks += drv_usectohz(5000000);
5652 		e = cv_timedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5653 			ticks);
5654 		if (e < 0) {
5655 			/* timed out - retry */
5656 			e = RSMERR_TIMEOUT;
5657 		} else if (e == 0) {
5658 			/* signalled - return error */
5659 			e = RSMERR_INTERRUPTED;
5660 			break;
5661 		} else {
5662 			e = RSM_SUCCESS;
5663 		}
5664 	}
5665 
5666 	RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5667 	rsmipc_free(rslot);
5668 	rele_sendq_token(sendq_token);
5669 
5670 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5671 	return (e);
5672 }
5673 
5674 static int
5675 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,  void *cookie)
5676 {
5677 	rsmipc_request_t request;
5678 
5679 	/*
5680 	 *  inform the exporter to delete this importer
5681 	 */
5682 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5683 	request.rsmipc_key = segid;
5684 	request.rsmipc_segment_cookie = cookie;
5685 	return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5686 }
5687 
5688 static void
5689 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t	*acl,
5690     int acl_len, rsm_permission_t default_permission)
5691 {
5692 	int			i;
5693 	importing_token_t	*token;
5694 	rsmipc_request_t	request;
5695 	republish_token_t	*republish_list = NULL;
5696 	republish_token_t	*rp;
5697 	rsm_permission_t	permission;
5698 	int			index;
5699 
5700 	/*
5701 	 * send the new access mode to all the nodes that have imported
5702 	 * this segment.
5703 	 * If the new acl does not have a node that was present in
5704 	 * the old acl a access permission of 0 is sent.
5705 	 */
5706 
5707 	index = rsmhash(segid);
5708 
5709 	/*
5710 	 * create a list of node/permissions to send the republish message
5711 	 */
5712 	mutex_enter(&importer_list.lock);
5713 
5714 	token = importer_list.bucket[index];
5715 	while (token != NULL) {
5716 		if (segid == token->key) {
5717 			permission = default_permission;
5718 
5719 			for (i = 0; i < acl_len; i++) {
5720 				if (token->importing_node == acl[i].ae_node) {
5721 					permission = acl[i].ae_permission;
5722 					break;
5723 				}
5724 			}
5725 			rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5726 
5727 			rp->key = segid;
5728 			rp->importing_node = token->importing_node;
5729 			rp->permission = permission;
5730 			rp->next = republish_list;
5731 			republish_list = rp;
5732 		}
5733 		token = token->next;
5734 	}
5735 
5736 	mutex_exit(&importer_list.lock);
5737 
5738 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5739 	request.rsmipc_key = segid;
5740 
5741 	while (republish_list != NULL) {
5742 		request.rsmipc_perm = republish_list->permission;
5743 		(void) rsmipc_send(republish_list->importing_node,
5744 		    &request, RSM_NO_REPLY);
5745 		rp = republish_list;
5746 		republish_list = republish_list->next;
5747 		kmem_free(rp, sizeof (republish_token_t));
5748 	}
5749 }
5750 
5751 static void
5752 rsm_send_suspend()
5753 {
5754 	int			i, e;
5755 	rsmipc_request_t 	request;
5756 	list_element_t		*tokp;
5757 	list_element_t		*head = NULL;
5758 	importing_token_t	*token;
5759 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5760 	    "rsm_send_suspend enter\n"));
5761 
5762 	/*
5763 	 * create a list of node to send the suspend message
5764 	 *
5765 	 * Currently the whole importer list is scanned and we obtain
5766 	 * all the nodes - this basically gets all nodes that at least
5767 	 * import one segment from the local node.
5768 	 *
5769 	 * no need to grab the rsm_suspend_list lock here since we are
5770 	 * single threaded when suspend is called.
5771 	 */
5772 
5773 	mutex_enter(&importer_list.lock);
5774 	for (i = 0; i < rsm_hash_size; i++) {
5775 
5776 		token = importer_list.bucket[i];
5777 
5778 		while (token != NULL) {
5779 
5780 			tokp = head;
5781 
5782 			/*
5783 			 * make sure that the token's node
5784 			 * is not already on the suspend list
5785 			 */
5786 			while (tokp != NULL) {
5787 				if (tokp->nodeid == token->importing_node) {
5788 					break;
5789 				}
5790 				tokp = tokp->next;
5791 			}
5792 
5793 			if (tokp == NULL) { /* not in suspend list */
5794 				tokp = kmem_zalloc(sizeof (list_element_t),
5795 						KM_SLEEP);
5796 				tokp->nodeid = token->importing_node;
5797 				tokp->next = head;
5798 				head = tokp;
5799 			}
5800 
5801 			token = token->next;
5802 		}
5803 	}
5804 	mutex_exit(&importer_list.lock);
5805 
5806 	if (head == NULL) { /* no importers so go ahead and quiesce segments */
5807 		exporter_quiesce();
5808 		return;
5809 	}
5810 
5811 	mutex_enter(&rsm_suspend_list.list_lock);
5812 	ASSERT(rsm_suspend_list.list_head == NULL);
5813 	/*
5814 	 * update the suspend list righaway so that if a node dies the
5815 	 * pathmanager can set the NODE dead flag
5816 	 */
5817 	rsm_suspend_list.list_head = head;
5818 	mutex_exit(&rsm_suspend_list.list_lock);
5819 
5820 	tokp = head;
5821 
5822 	while (tokp != NULL) {
5823 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5824 		e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5825 		/*
5826 		 * Error in rsmipc_send currently happens due to inaccessibility
5827 		 * of the remote node.
5828 		 */
5829 		if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5830 			tokp->flags |= RSM_SUSPEND_ACKPENDING;
5831 		}
5832 
5833 		tokp = tokp->next;
5834 	}
5835 
5836 	DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5837 	    "rsm_send_suspend done\n"));
5838 
5839 }
5840 
5841 static void
5842 rsm_send_resume()
5843 {
5844 	rsmipc_request_t 	request;
5845 	list_element_t		*elem, *head;
5846 
5847 	/*
5848 	 * save the suspend list so that we know where to send
5849 	 * the resume messages and make the suspend list head
5850 	 * NULL.
5851 	 */
5852 	mutex_enter(&rsm_suspend_list.list_lock);
5853 	head = rsm_suspend_list.list_head;
5854 	rsm_suspend_list.list_head = NULL;
5855 	mutex_exit(&rsm_suspend_list.list_lock);
5856 
5857 	while (head != NULL) {
5858 		elem = head;
5859 		head = head->next;
5860 
5861 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5862 
5863 		(void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5864 
5865 		kmem_free((void *)elem, sizeof (list_element_t));
5866 
5867 	}
5868 
5869 }
5870 
5871 /*
5872  * This function takes path and sends a message using the sendq
5873  * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5874  * and RSMIPC_MSG_CREDIT are sent using this function.
5875  */
5876 int
5877 rsmipc_send_controlmsg(path_t *path, int msgtype)
5878 {
5879 	int			e;
5880 	int			retry_cnt = 0;
5881 	int			min_retry_cnt = 10;
5882 	clock_t			timeout;
5883 	adapter_t		*adapter;
5884 	rsm_send_t		is;
5885 	rsm_send_q_handle_t	ipc_handle;
5886 	rsmipc_controlmsg_t	msg;
5887 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5888 
5889 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5890 	    "rsmipc_send_controlmsg enter\n"));
5891 
5892 	ASSERT(MUTEX_HELD(&path->mutex));
5893 
5894 	adapter = path->local_adapter;
5895 
5896 	DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5897 	    "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5898 	    my_nodeid, adapter->hwaddr, path->remote_node,
5899 	    path->remote_hwaddr, path->procmsg_cnt));
5900 
5901 	if (path->state != RSMKA_PATH_ACTIVE) {
5902 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5903 		    "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5904 		return (1);
5905 	}
5906 
5907 	ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5908 
5909 	msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5910 	msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5911 	msg.rsmipc_hdr.rsmipc_type = msgtype;
5912 	msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5913 
5914 	if (msgtype == RSMIPC_MSG_CREDIT)
5915 		msg.rsmipc_credits = path->procmsg_cnt;
5916 
5917 	msg.rsmipc_local_incn = path->local_incn;
5918 
5919 	msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5920 	/* incr the sendq, path refcnt */
5921 	PATH_HOLD_NOLOCK(path);
5922 	SENDQ_TOKEN_HOLD(path);
5923 
5924 	do {
5925 		/* drop the path lock before doing the rsm_send */
5926 		mutex_exit(&path->mutex);
5927 
5928 		is.is_data = (void *)&msg;
5929 		is.is_size = sizeof (msg);
5930 		is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5931 		is.is_wait = 0;
5932 
5933 		e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5934 
5935 		ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5936 		    e != RSMERR_BAD_BARRIER_HNDL);
5937 
5938 		mutex_enter(&path->mutex);
5939 
5940 		if (e == RSM_SUCCESS) {
5941 			break;
5942 		}
5943 		/* error counter for statistics */
5944 		atomic_add_64(&rsm_ctrlmsg_errcnt, 1);
5945 
5946 		DBG_PRINTF((category, RSM_ERR,
5947 		    "rsmipc_send_controlmsg:rsm_send error=%d", e));
5948 
5949 		if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5950 			timeout  = ddi_get_lbolt() + drv_usectohz(10000);
5951 			(void) cv_timedwait(&path->sendq_token.sendq_cv,
5952 			    &path->mutex, timeout);
5953 			retry_cnt = 0;
5954 		}
5955 	} while (path->state == RSMKA_PATH_ACTIVE);
5956 
5957 	/* decrement the sendq,path refcnt that we incr before rsm_send */
5958 	SENDQ_TOKEN_RELE(path);
5959 	PATH_RELE_NOLOCK(path);
5960 
5961 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5962 	    "rsmipc_send_controlmsg done=%d", e));
5963 	return (e);
5964 }
5965 
5966 /*
5967  * Called from rsm_force_unload and path_importer_disconnect. The memory
5968  * mapping for the imported segment is removed and the segment is
5969  * disconnected at the interconnect layer if disconnect_flag is TRUE.
5970  * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5971  * and FALSE from rsm_rebind.
5972  *
5973  * When subsequent accesses cause page faulting, the dummy page is mapped
5974  * to resolve the fault, and the mapping generation number is incremented
5975  * so that the application can be notified on a close barrier operation.
5976  *
5977  * It is important to note that the caller of rsmseg_unload is responsible for
5978  * acquiring the segment lock before making a call to rsmseg_unload. This is
5979  * required to make the caller and rsmseg_unload thread safe. The segment lock
5980  * will be released by the rsmseg_unload function.
5981  */
5982 void
5983 rsmseg_unload(rsmseg_t *im_seg)
5984 {
5985 	rsmcookie_t		*hdl;
5986 	void			*shared_cookie;
5987 	rsmipc_request_t	request;
5988 	uint_t			maxprot;
5989 
5990 	DBG_DEFINE(category,
5991 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5992 
5993 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5994 
5995 	ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5996 
5997 	/* wait until segment leaves the mapping state */
5998 	while (im_seg->s_state == RSM_STATE_MAPPING)
5999 		cv_wait(&im_seg->s_cv, &im_seg->s_lock);
6000 	/*
6001 	 * An unload is only necessary if the segment is connected. However,
6002 	 * if the segment was on the import list in state RSM_STATE_CONNECTING
6003 	 * then a connection was in progress. Change to RSM_STATE_NEW
6004 	 * here to cause an early exit from the connection process.
6005 	 */
6006 	if (im_seg->s_state == RSM_STATE_NEW) {
6007 		rsmseglock_release(im_seg);
6008 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6009 		    "rsmseg_unload done: RSM_STATE_NEW\n"));
6010 		return;
6011 	} else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6012 		im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6013 		rsmsharelock_acquire(im_seg);
6014 		im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6015 		rsmsharelock_release(im_seg);
6016 		rsmseglock_release(im_seg);
6017 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6018 		    "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6019 		return;
6020 	}
6021 
6022 	if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6023 		if (im_seg->s_ckl != NULL) {
6024 			int e;
6025 			/* Setup protections for remap */
6026 			maxprot = PROT_USER;
6027 			if (im_seg->s_mode & RSM_PERM_READ) {
6028 				maxprot |= PROT_READ;
6029 			}
6030 			if (im_seg->s_mode & RSM_PERM_WRITE) {
6031 				maxprot |= PROT_WRITE;
6032 			}
6033 			hdl = im_seg->s_ckl;
6034 			for (; hdl != NULL; hdl = hdl->c_next) {
6035 				e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6036 				    remap_cookie,
6037 				    hdl->c_off, hdl->c_len,
6038 				    maxprot, 0, NULL);
6039 
6040 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6041 				    "remap returns %d\n", e));
6042 			}
6043 		}
6044 
6045 		(void) rsm_closeconnection(im_seg, &shared_cookie);
6046 
6047 		if (shared_cookie != NULL) {
6048 			/*
6049 			 * inform the exporting node so this import
6050 			 * can be deleted from the list of importers.
6051 			 */
6052 			request.rsmipc_hdr.rsmipc_type =
6053 			    RSMIPC_MSG_NOTIMPORTING;
6054 			request.rsmipc_key = im_seg->s_segid;
6055 			request.rsmipc_segment_cookie = shared_cookie;
6056 			rsmseglock_release(im_seg);
6057 			(void) rsmipc_send(im_seg->s_node, &request,
6058 			    RSM_NO_REPLY);
6059 		} else {
6060 			rsmseglock_release(im_seg);
6061 		}
6062 	}
6063 	else
6064 		rsmseglock_release(im_seg);
6065 
6066 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6067 
6068 }
6069 
6070 /* ****************************** Importer Calls ************************ */
6071 
6072 static int
6073 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6074 {
6075 	int shifts = 0;
6076 
6077 	if (crgetuid(cr) != owner) {
6078 		shifts += 3;
6079 		if (!groupmember(group, cr))
6080 			shifts += 3;
6081 	}
6082 
6083 	mode &= ~(perm << shifts);
6084 
6085 	if (mode == 0)
6086 		return (0);
6087 
6088 	return (secpolicy_rsm_access(cr, owner, mode));
6089 }
6090 
6091 
6092 static int
6093 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6094     intptr_t dataptr, int mode)
6095 {
6096 	int e;
6097 	int			recheck_state = 0;
6098 	void			*shared_cookie;
6099 	rsmipc_request_t	request;
6100 	rsmipc_reply_t		reply;
6101 	rsm_permission_t	access;
6102 	adapter_t		*adapter;
6103 	rsm_addr_t		addr = 0;
6104 	rsm_import_share_t	*sharedp;
6105 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6106 
6107 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6108 
6109 	adapter = rsm_getadapter(msg, mode);
6110 	if (adapter == NULL) {
6111 		DBG_PRINTF((category, RSM_ERR,
6112 		    "rsm_connect done:ENODEV adapter=NULL\n"));
6113 		return (RSMERR_CTLR_NOT_PRESENT);
6114 	}
6115 
6116 	if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6117 		rsmka_release_adapter(adapter);
6118 		DBG_PRINTF((category, RSM_ERR,
6119 		    "rsm_connect done:ENODEV loopback\n"));
6120 		return (RSMERR_CTLR_NOT_PRESENT);
6121 	}
6122 
6123 
6124 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6125 	ASSERT(seg->s_state == RSM_STATE_NEW);
6126 
6127 	/*
6128 	 * Translate perm to access
6129 	 */
6130 	if (msg->perm & ~RSM_PERM_RDWR) {
6131 		rsmka_release_adapter(adapter);
6132 		DBG_PRINTF((category, RSM_ERR,
6133 		    "rsm_connect done:EINVAL invalid perms\n"));
6134 		return (RSMERR_BAD_PERMS);
6135 	}
6136 	access = 0;
6137 	if (msg->perm & RSM_PERM_READ)
6138 		access |= RSM_ACCESS_READ;
6139 	if (msg->perm & RSM_PERM_WRITE)
6140 		access |= RSM_ACCESS_WRITE;
6141 
6142 	seg->s_node = msg->nodeid;
6143 
6144 	/*
6145 	 * Adding to the import list locks the segment; release the segment
6146 	 * lock so we can get the reply for the send.
6147 	 */
6148 	e = rsmimport_add(seg, msg->key);
6149 	if (e) {
6150 		rsmka_release_adapter(adapter);
6151 		DBG_PRINTF((category, RSM_ERR,
6152 		    "rsm_connect done:rsmimport_add failed %d\n", e));
6153 		return (e);
6154 	}
6155 	seg->s_state = RSM_STATE_CONNECTING;
6156 
6157 	/*
6158 	 * Set the s_adapter field here so as to have a valid comparison of
6159 	 * the adapter and the s_adapter value during rsmshare_get. For
6160 	 * any error, set s_adapter to NULL before doing a release_adapter
6161 	 */
6162 	seg->s_adapter = adapter;
6163 
6164 	rsmseglock_release(seg);
6165 
6166 	/*
6167 	 * get the pointer to the shared data structure; the
6168 	 * shared data is locked and refcount has been incremented
6169 	 */
6170 	sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6171 
6172 	ASSERT(rsmsharelock_held(seg));
6173 
6174 	do {
6175 		/* flag indicates whether we need to recheck the state */
6176 		recheck_state = 0;
6177 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6178 		    "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6179 		switch (sharedp->rsmsi_state) {
6180 		case RSMSI_STATE_NEW:
6181 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6182 			break;
6183 		case RSMSI_STATE_CONNECTING:
6184 			/* FALLTHRU */
6185 		case RSMSI_STATE_CONN_QUIESCE:
6186 			/* FALLTHRU */
6187 		case RSMSI_STATE_MAP_QUIESCE:
6188 			/* wait for the state to change */
6189 			while ((sharedp->rsmsi_state ==
6190 			    RSMSI_STATE_CONNECTING) ||
6191 			    (sharedp->rsmsi_state ==
6192 			    RSMSI_STATE_CONN_QUIESCE) ||
6193 			    (sharedp->rsmsi_state ==
6194 			    RSMSI_STATE_MAP_QUIESCE)) {
6195 				if (cv_wait_sig(&sharedp->rsmsi_cv,
6196 				    &sharedp->rsmsi_lock) == 0) {
6197 					/* signalled - clean up and return */
6198 					rsmsharelock_release(seg);
6199 					rsmimport_rm(seg);
6200 					seg->s_adapter = NULL;
6201 					rsmka_release_adapter(adapter);
6202 					seg->s_state = RSM_STATE_NEW;
6203 					DBG_PRINTF((category, RSM_ERR,
6204 					    "rsm_connect done: INTERRUPTED\n"));
6205 					return (RSMERR_INTERRUPTED);
6206 				}
6207 			}
6208 			/*
6209 			 * the state changed, loop back and check what it is
6210 			 */
6211 			recheck_state = 1;
6212 			break;
6213 		case RSMSI_STATE_ABORT_CONNECT:
6214 			/* exit the loop and clean up further down */
6215 			break;
6216 		case RSMSI_STATE_CONNECTED:
6217 			/* already connected, good - fall through */
6218 		case RSMSI_STATE_MAPPED:
6219 			/* already mapped, wow - fall through */
6220 			/* access validation etc is done further down */
6221 			break;
6222 		case RSMSI_STATE_DISCONNECTED:
6223 			/* disconnected - so reconnect now */
6224 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6225 			break;
6226 		default:
6227 			ASSERT(0); /* Invalid State */
6228 		}
6229 	} while (recheck_state);
6230 
6231 	if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6232 		/* we are the first to connect */
6233 		rsmsharelock_release(seg);
6234 
6235 		if (msg->nodeid != my_nodeid) {
6236 			addr = get_remote_hwaddr(adapter, msg->nodeid);
6237 
6238 			if ((int64_t)addr < 0) {
6239 				rsmsharelock_acquire(seg);
6240 				rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6241 				    RSMSI_STATE_NEW);
6242 				rsmsharelock_release(seg);
6243 				rsmimport_rm(seg);
6244 				seg->s_adapter = NULL;
6245 				rsmka_release_adapter(adapter);
6246 				seg->s_state = RSM_STATE_NEW;
6247 				DBG_PRINTF((category, RSM_ERR,
6248 				    "rsm_connect done: hwaddr<0\n"));
6249 				return (RSMERR_INTERNAL_ERROR);
6250 			}
6251 		} else {
6252 			addr = adapter->hwaddr;
6253 		}
6254 
6255 		/*
6256 		 * send request to node [src, dest, key, msgid] and get back
6257 		 * [status, msgid, cookie]
6258 		 */
6259 		request.rsmipc_key = msg->key;
6260 		/*
6261 		 * we need the s_mode of the exporter so pass
6262 		 * RSM_ACCESS_TRUSTED
6263 		 */
6264 		request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6265 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6266 		request.rsmipc_adapter_hwaddr = addr;
6267 		request.rsmipc_segment_cookie = sharedp;
6268 
6269 		e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6270 		if (e) {
6271 			rsmsharelock_acquire(seg);
6272 			rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6273 			    RSMSI_STATE_NEW);
6274 			rsmsharelock_release(seg);
6275 			rsmimport_rm(seg);
6276 			seg->s_adapter = NULL;
6277 			rsmka_release_adapter(adapter);
6278 			seg->s_state = RSM_STATE_NEW;
6279 			DBG_PRINTF((category, RSM_ERR,
6280 			    "rsm_connect done:rsmipc_send failed %d\n", e));
6281 			return (e);
6282 		}
6283 
6284 		if (reply.rsmipc_status != RSM_SUCCESS) {
6285 			rsmsharelock_acquire(seg);
6286 			rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6287 			    RSMSI_STATE_NEW);
6288 			rsmsharelock_release(seg);
6289 			rsmimport_rm(seg);
6290 			seg->s_adapter = NULL;
6291 			rsmka_release_adapter(adapter);
6292 			seg->s_state = RSM_STATE_NEW;
6293 			DBG_PRINTF((category, RSM_ERR,
6294 			    "rsm_connect done:rsmipc_send reply err %d\n",
6295 			    reply.rsmipc_status));
6296 			return (reply.rsmipc_status);
6297 		}
6298 
6299 		rsmsharelock_acquire(seg);
6300 		/* store the information recvd into the shared data struct */
6301 		sharedp->rsmsi_mode = reply.rsmipc_mode;
6302 		sharedp->rsmsi_uid = reply.rsmipc_uid;
6303 		sharedp->rsmsi_gid = reply.rsmipc_gid;
6304 		sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6305 		sharedp->rsmsi_cookie = sharedp;
6306 	}
6307 
6308 	rsmsharelock_release(seg);
6309 
6310 	/*
6311 	 * Get the segment lock and check for a force disconnect
6312 	 * from the export side which would have changed the state
6313 	 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6314 	 * force disconnect will be held off until the connection
6315 	 * has completed.
6316 	 */
6317 	rsmseglock_acquire(seg);
6318 	rsmsharelock_acquire(seg);
6319 	ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6320 	    seg->s_state == RSM_STATE_ABORT_CONNECT);
6321 
6322 	shared_cookie = sharedp->rsmsi_cookie;
6323 
6324 	if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6325 	    (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6326 		seg->s_state = RSM_STATE_NEW;
6327 		seg->s_adapter = NULL;
6328 		rsmsharelock_release(seg);
6329 		rsmseglock_release(seg);
6330 		rsmimport_rm(seg);
6331 		rsmka_release_adapter(adapter);
6332 
6333 		rsmsharelock_acquire(seg);
6334 		if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6335 			/*
6336 			 * set a flag indicating abort handling has been
6337 			 * done
6338 			 */
6339 			sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6340 			rsmsharelock_release(seg);
6341 			/* send a message to exporter - only once */
6342 			(void) rsm_send_notimporting(msg->nodeid,
6343 			    msg->key, shared_cookie);
6344 			rsmsharelock_acquire(seg);
6345 			/*
6346 			 * wake up any waiting importers and inform that
6347 			 * connection has been aborted
6348 			 */
6349 			cv_broadcast(&sharedp->rsmsi_cv);
6350 		}
6351 		rsmsharelock_release(seg);
6352 
6353 		DBG_PRINTF((category, RSM_ERR,
6354 		    "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6355 		return (RSMERR_INTERRUPTED);
6356 	}
6357 
6358 
6359 	/*
6360 	 * We need to verify that this process has access
6361 	 */
6362 	e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6363 			    access & sharedp->rsmsi_mode,
6364 			    (int)(msg->perm & RSM_PERM_RDWR), cred);
6365 	if (e) {
6366 		rsmsharelock_release(seg);
6367 		seg->s_state = RSM_STATE_NEW;
6368 		seg->s_adapter = NULL;
6369 		rsmseglock_release(seg);
6370 		rsmimport_rm(seg);
6371 		rsmka_release_adapter(adapter);
6372 		/*
6373 		 * No need to lock segment it has been removed
6374 		 * from the hash table
6375 		 */
6376 		rsmsharelock_acquire(seg);
6377 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6378 			rsmsharelock_release(seg);
6379 			/* this is the first importer */
6380 
6381 			(void) rsm_send_notimporting(msg->nodeid, msg->key,
6382 			    shared_cookie);
6383 			rsmsharelock_acquire(seg);
6384 			sharedp->rsmsi_state = RSMSI_STATE_NEW;
6385 			cv_broadcast(&sharedp->rsmsi_cv);
6386 		}
6387 		rsmsharelock_release(seg);
6388 
6389 		DBG_PRINTF((category, RSM_ERR,
6390 		    "rsm_connect done: ipcaccess failed\n"));
6391 		return (RSMERR_PERM_DENIED);
6392 	}
6393 
6394 	/* update state and cookie */
6395 	seg->s_segid = sharedp->rsmsi_segid;
6396 	seg->s_len = sharedp->rsmsi_seglen;
6397 	seg->s_mode = access & sharedp->rsmsi_mode;
6398 	seg->s_pid = ddi_get_pid();
6399 	seg->s_mapinfo = NULL;
6400 
6401 	if (seg->s_node != my_nodeid) {
6402 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6403 			e = adapter->rsmpi_ops->rsm_connect(
6404 			    adapter->rsmpi_handle,
6405 			    addr, seg->s_segid, &sharedp->rsmsi_handle);
6406 
6407 			if (e != RSM_SUCCESS) {
6408 				seg->s_state = RSM_STATE_NEW;
6409 				seg->s_adapter = NULL;
6410 				rsmsharelock_release(seg);
6411 				rsmseglock_release(seg);
6412 				rsmimport_rm(seg);
6413 				rsmka_release_adapter(adapter);
6414 				/*
6415 				 *  inform the exporter to delete this importer
6416 				 */
6417 				(void) rsm_send_notimporting(msg->nodeid,
6418 				    msg->key, shared_cookie);
6419 
6420 				/*
6421 				 * Now inform any waiting importers to
6422 				 * retry connect. This needs to be done
6423 				 * after sending notimporting so that
6424 				 * the notimporting is sent before a waiting
6425 				 * importer sends a segconnect while retrying
6426 				 *
6427 				 * No need to lock segment it has been removed
6428 				 * from the hash table
6429 				 */
6430 
6431 				rsmsharelock_acquire(seg);
6432 				sharedp->rsmsi_state = RSMSI_STATE_NEW;
6433 				cv_broadcast(&sharedp->rsmsi_cv);
6434 				rsmsharelock_release(seg);
6435 
6436 				DBG_PRINTF((category, RSM_ERR,
6437 				    "rsm_connect error %d\n", e));
6438 				if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6439 					return (
6440 					    RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6441 				else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6442 					(e == RSMERR_UNKNOWN_RSM_ADDR))
6443 					return (RSMERR_REMOTE_NODE_UNREACHABLE);
6444 				else
6445 					return (e);
6446 			}
6447 
6448 		}
6449 		seg->s_handle.in = sharedp->rsmsi_handle;
6450 
6451 	}
6452 
6453 	seg->s_state = RSM_STATE_CONNECT;
6454 
6455 
6456 	seg->s_flags &= ~RSM_IMPORT_DUMMY;	/* clear dummy flag */
6457 	if (bar_va) {
6458 		/* increment generation number on barrier page */
6459 		atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6460 		/* return user off into barrier page where status will be */
6461 		msg->off = (int)seg->s_hdr.rsmrc_num;
6462 		msg->gnum = bar_va[msg->off]; 	/* gnum race */
6463 	} else {
6464 		msg->off = 0;
6465 		msg->gnum = 0;	/* gnum race */
6466 	}
6467 
6468 	msg->len = (int)sharedp->rsmsi_seglen;
6469 	msg->rnum = seg->s_minor;
6470 	rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6471 	rsmsharelock_release(seg);
6472 	rsmseglock_release(seg);
6473 
6474 	/* Return back to user the segment size & perm in case it's needed */
6475 
6476 #ifdef _MULTI_DATAMODEL
6477 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6478 		rsm_ioctlmsg32_t msg32;
6479 
6480 		if (msg->len > UINT_MAX)
6481 			msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6482 		else
6483 			msg32.len = msg->len;
6484 		msg32.off = msg->off;
6485 		msg32.perm = msg->perm;
6486 		msg32.gnum = msg->gnum;
6487 		msg32.rnum = msg->rnum;
6488 
6489 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6490 		    "rsm_connect done\n"));
6491 
6492 		if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6493 		    sizeof (msg32), mode))
6494 			return (RSMERR_BAD_ADDR);
6495 		else
6496 			return (RSM_SUCCESS);
6497 	}
6498 #endif
6499 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6500 
6501 	if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6502 	    mode))
6503 		return (RSMERR_BAD_ADDR);
6504 	else
6505 		return (RSM_SUCCESS);
6506 }
6507 
6508 static int
6509 rsm_unmap(rsmseg_t *seg)
6510 {
6511 	int			err;
6512 	adapter_t		*adapter;
6513 	rsm_import_share_t	*sharedp;
6514 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6515 
6516 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6517 	    "rsm_unmap enter %u\n", seg->s_segid));
6518 
6519 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6520 
6521 	/* assert seg is locked */
6522 	ASSERT(rsmseglock_held(seg));
6523 	ASSERT(seg->s_state != RSM_STATE_MAPPING);
6524 
6525 	if ((seg->s_state != RSM_STATE_ACTIVE) &&
6526 	    (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6527 		/* segment unmap has already been done */
6528 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6529 		return (RSM_SUCCESS);
6530 	}
6531 
6532 	sharedp = seg->s_share;
6533 
6534 	rsmsharelock_acquire(seg);
6535 
6536 	/*
6537 	 *	- shared data struct is in MAPPED or MAP_QUIESCE state
6538 	 */
6539 
6540 	ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6541 	    sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6542 
6543 	/*
6544 	 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6545 	 * the segment cookie list was NULL; but it is always NULL when
6546 	 * called from rsmmap_unmap and won't be NULL when called for
6547 	 * a force disconnect - so the check for NULL cookie list was removed
6548 	 */
6549 
6550 	ASSERT(sharedp->rsmsi_mapcnt > 0);
6551 
6552 	sharedp->rsmsi_mapcnt--;
6553 
6554 	if (sharedp->rsmsi_mapcnt == 0) {
6555 		if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6556 			/* unmap the shared RSMPI mapping */
6557 			adapter = seg->s_adapter;
6558 			if (seg->s_node != my_nodeid) {
6559 				ASSERT(sharedp->rsmsi_handle != NULL);
6560 				err = adapter->rsmpi_ops->
6561 				    rsm_unmap(sharedp->rsmsi_handle);
6562 				DBG_PRINTF((category, RSM_DEBUG,
6563 				    "rsm_unmap: rsmpi unmap %d\n", err));
6564 				rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6565 				sharedp->rsmsi_mapinfo = NULL;
6566 			}
6567 			sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6568 		} else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6569 			sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6570 		}
6571 	}
6572 
6573 	rsmsharelock_release(seg);
6574 
6575 	/*
6576 	 * The s_cookie field is used to store the cookie returned from the
6577 	 * ddi_umem_lock when binding the pages for an export segment. This
6578 	 * is the primary use of the s_cookie field and does not normally
6579 	 * pertain to any importing segment except in the loopback case.
6580 	 * For the loopback case, the import segment and export segment are
6581 	 * on the same node, the s_cookie field of the segment structure for
6582 	 * the importer is initialized to the s_cookie field in the exported
6583 	 * segment during the map operation and is used during the call to
6584 	 * devmap_umem_setup for the import mapping.
6585 	 * Thus, during unmap, we simply need to set s_cookie to NULL to
6586 	 * indicate that the mapping no longer exists.
6587 	 */
6588 	seg->s_cookie = NULL;
6589 
6590 	seg->s_mapinfo = NULL;
6591 
6592 	if (seg->s_state == RSM_STATE_ACTIVE)
6593 		seg->s_state = RSM_STATE_CONNECT;
6594 	else
6595 		seg->s_state = RSM_STATE_CONN_QUIESCE;
6596 
6597 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6598 
6599 	return (RSM_SUCCESS);
6600 }
6601 
6602 /*
6603  * cookie returned here if not null indicates that it is
6604  * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6605  * message.
6606  */
6607 static int
6608 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6609 {
6610 	int			e;
6611 	adapter_t		*adapter;
6612 	rsm_import_share_t	*sharedp;
6613 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6614 
6615 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6616 					"rsm_closeconnection enter\n"));
6617 
6618 	*cookie = (void *)NULL;
6619 
6620 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6621 
6622 	/* assert seg is locked */
6623 	ASSERT(rsmseglock_held(seg));
6624 
6625 	if (seg->s_state == RSM_STATE_DISCONNECT) {
6626 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6627 		    "rsm_closeconnection done: already disconnected\n"));
6628 		return (RSM_SUCCESS);
6629 	}
6630 
6631 	/* wait for all putv/getv ops to get done */
6632 	while (seg->s_rdmacnt > 0) {
6633 		cv_wait(&seg->s_cv, &seg->s_lock);
6634 	}
6635 
6636 	(void) rsm_unmap(seg);
6637 
6638 	ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6639 	    seg->s_state == RSM_STATE_CONN_QUIESCE);
6640 
6641 	adapter = seg->s_adapter;
6642 	sharedp = seg->s_share;
6643 
6644 	ASSERT(sharedp != NULL);
6645 
6646 	rsmsharelock_acquire(seg);
6647 
6648 	/*
6649 	 * Disconnect on adapter
6650 	 *
6651 	 * The current algorithm is stateless, I don't have to contact
6652 	 * server when I go away. He only gives me permissions. Of course,
6653 	 * the adapters will talk to terminate the connect.
6654 	 *
6655 	 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6656 	 */
6657 	if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6658 	    (sharedp->rsmsi_node != my_nodeid)) {
6659 
6660 		if (sharedp->rsmsi_refcnt == 1) {
6661 			/* this is the last importer */
6662 			ASSERT(sharedp->rsmsi_mapcnt == 0);
6663 
6664 			e = adapter->rsmpi_ops->
6665 			    rsm_disconnect(sharedp->rsmsi_handle);
6666 			if (e != RSM_SUCCESS) {
6667 				DBG_PRINTF((category, RSM_DEBUG,
6668 				    "rsm:disconnect failed seg=%x:err=%d\n",
6669 				    seg->s_key, e));
6670 			}
6671 		}
6672 	}
6673 
6674 	seg->s_handle.in = NULL;
6675 
6676 	sharedp->rsmsi_refcnt--;
6677 
6678 	if (sharedp->rsmsi_refcnt == 0) {
6679 		*cookie = (void *)sharedp->rsmsi_cookie;
6680 		sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6681 		sharedp->rsmsi_handle = NULL;
6682 		rsmsharelock_release(seg);
6683 
6684 		/* clean up the shared data structure */
6685 		mutex_destroy(&sharedp->rsmsi_lock);
6686 		cv_destroy(&sharedp->rsmsi_cv);
6687 		kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6688 
6689 	} else {
6690 		rsmsharelock_release(seg);
6691 	}
6692 
6693 	/* increment generation number on barrier page */
6694 	if (bar_va) {
6695 		atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6696 	}
6697 
6698 	/*
6699 	 * The following needs to be done after any
6700 	 * rsmsharelock calls which use seg->s_share.
6701 	 */
6702 	seg->s_share = NULL;
6703 
6704 	seg->s_state = RSM_STATE_DISCONNECT;
6705 	/* signal anyone waiting in the CONN_QUIESCE state */
6706 	cv_broadcast(&seg->s_cv);
6707 
6708 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6709 	    "rsm_closeconnection done\n"));
6710 
6711 	return (RSM_SUCCESS);
6712 }
6713 
6714 int
6715 rsm_disconnect(rsmseg_t *seg)
6716 {
6717 	rsmipc_request_t	request;
6718 	void			*shared_cookie;
6719 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6720 
6721 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6722 
6723 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6724 
6725 	/* assert seg isn't locked */
6726 	ASSERT(!rsmseglock_held(seg));
6727 
6728 
6729 	/* Remove segment from imported list */
6730 	rsmimport_rm(seg);
6731 
6732 	/* acquire the segment */
6733 	rsmseglock_acquire(seg);
6734 
6735 	/* wait until segment leaves the mapping state */
6736 	while (seg->s_state == RSM_STATE_MAPPING)
6737 		cv_wait(&seg->s_cv, &seg->s_lock);
6738 
6739 	if (seg->s_state == RSM_STATE_DISCONNECT) {
6740 		seg->s_state = RSM_STATE_NEW;
6741 		rsmseglock_release(seg);
6742 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6743 		    "rsm_disconnect done: already disconnected\n"));
6744 		return (RSM_SUCCESS);
6745 	}
6746 
6747 	(void) rsm_closeconnection(seg, &shared_cookie);
6748 
6749 	/* update state */
6750 	seg->s_state = RSM_STATE_NEW;
6751 
6752 	if (shared_cookie != NULL) {
6753 		/*
6754 		 *  This is the last importer so inform the exporting node
6755 		 *  so this import can be deleted from the list of importers.
6756 		 */
6757 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6758 		request.rsmipc_key = seg->s_segid;
6759 		request.rsmipc_segment_cookie = shared_cookie;
6760 		rsmseglock_release(seg);
6761 		(void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6762 	} else {
6763 		rsmseglock_release(seg);
6764 	}
6765 
6766 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6767 
6768 	return (DDI_SUCCESS);
6769 }
6770 
6771 /*ARGSUSED*/
6772 static int
6773 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6774     struct pollhead **phpp)
6775 {
6776 	minor_t		rnum;
6777 	rsmresource_t	*res;
6778 	rsmseg_t 	*seg;
6779 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6780 
6781 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6782 
6783 	/* find minor, no lock */
6784 	rnum = getminor(dev);
6785 	res = rsmresource_lookup(rnum, RSM_NOLOCK);
6786 
6787 	/* poll is supported only for export/import segments */
6788 	if ((res == NULL) || (res == RSMRC_RESERVED) ||
6789 	    (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6790 		return (ENXIO);
6791 	}
6792 
6793 	*reventsp = 0;
6794 
6795 	/*
6796 	 * An exported segment must be in state RSM_STATE_EXPORT; an
6797 	 * imported segment must be in state RSM_STATE_ACTIVE.
6798 	 */
6799 	seg = (rsmseg_t *)res;
6800 
6801 	if (seg->s_pollevent) {
6802 		*reventsp = POLLRDNORM;
6803 	} else if (!anyyet) {
6804 		/* cannot take segment lock here */
6805 		*phpp = &seg->s_poll;
6806 		seg->s_pollflag |= RSM_SEGMENT_POLL;
6807 	}
6808 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6809 	return (0);
6810 }
6811 
6812 
6813 
6814 /* ************************* IOCTL Commands ********************* */
6815 
6816 static rsmseg_t *
6817 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6818     rsm_resource_type_t type)
6819 {
6820 	/* get segment from resource handle */
6821 	rsmseg_t *seg;
6822 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6823 
6824 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6825 
6826 
6827 	if (res != RSMRC_RESERVED) {
6828 		seg = (rsmseg_t *)res;
6829 	} else {
6830 		/* Allocate segment now and bind it */
6831 		seg = rsmseg_alloc(rnum, credp);
6832 
6833 		/*
6834 		 * if DR pre-processing is going on or DR is in progress
6835 		 * then the new export segments should be in the NEW_QSCD state
6836 		 */
6837 		if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6838 			mutex_enter(&rsm_drv_data.drv_lock);
6839 			if ((rsm_drv_data.drv_state ==
6840 			    RSM_DRV_PREDEL_STARTED) ||
6841 			    (rsm_drv_data.drv_state ==
6842 			    RSM_DRV_PREDEL_COMPLETED) ||
6843 			    (rsm_drv_data.drv_state ==
6844 			    RSM_DRV_DR_IN_PROGRESS)) {
6845 				seg->s_state = RSM_STATE_NEW_QUIESCED;
6846 			}
6847 			mutex_exit(&rsm_drv_data.drv_lock);
6848 		}
6849 
6850 		rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6851 	}
6852 
6853 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6854 
6855 	return (seg);
6856 }
6857 
6858 static int
6859 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6860     int mode, cred_t *credp)
6861 {
6862 	int error;
6863 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6864 
6865 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6866 
6867 	arg = arg;
6868 	credp = credp;
6869 
6870 	ASSERT(seg != NULL);
6871 
6872 	switch (cmd) {
6873 	case RSM_IOCTL_BIND:
6874 		error = rsm_bind(seg, msg, arg, mode);
6875 		break;
6876 	case RSM_IOCTL_REBIND:
6877 		error = rsm_rebind(seg, msg);
6878 		break;
6879 	case RSM_IOCTL_UNBIND:
6880 		error = ENOTSUP;
6881 		break;
6882 	case RSM_IOCTL_PUBLISH:
6883 		error = rsm_publish(seg, msg, arg, mode);
6884 		break;
6885 	case RSM_IOCTL_REPUBLISH:
6886 		error = rsm_republish(seg, msg, mode);
6887 		break;
6888 	case RSM_IOCTL_UNPUBLISH:
6889 		error = rsm_unpublish(seg, 1);
6890 		break;
6891 	default:
6892 		error = EINVAL;
6893 		break;
6894 	}
6895 
6896 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6897 	    error));
6898 
6899 	return (error);
6900 }
6901 static int
6902 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6903     int mode, cred_t *credp)
6904 {
6905 	int error;
6906 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6907 
6908 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6909 
6910 	ASSERT(seg);
6911 
6912 	switch (cmd) {
6913 	case RSM_IOCTL_CONNECT:
6914 		error = rsm_connect(seg, msg, credp, arg, mode);
6915 		break;
6916 	default:
6917 		error = EINVAL;
6918 		break;
6919 	}
6920 
6921 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6922 	    error));
6923 	return (error);
6924 }
6925 
6926 static int
6927 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6928     int mode)
6929 {
6930 	int e;
6931 	adapter_t *adapter;
6932 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6933 
6934 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6935 
6936 
6937 	if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6938 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6939 		    "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6940 		return (RSMERR_CONN_ABORTED);
6941 	} else if (seg->s_node == my_nodeid) {
6942 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6943 		    "rsmbar_ioctl done: loopback\n"));
6944 		return (RSM_SUCCESS);
6945 	}
6946 
6947 	adapter = seg->s_adapter;
6948 
6949 	switch (cmd) {
6950 	case RSM_IOCTL_BAR_CHECK:
6951 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6952 		    "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6953 		return (bar_va ? RSM_SUCCESS : EINVAL);
6954 	case RSM_IOCTL_BAR_OPEN:
6955 		e = adapter->rsmpi_ops->
6956 		    rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6957 		break;
6958 	case RSM_IOCTL_BAR_ORDER:
6959 		e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6960 		break;
6961 	case RSM_IOCTL_BAR_CLOSE:
6962 		e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6963 		break;
6964 	default:
6965 		e = EINVAL;
6966 		break;
6967 	}
6968 
6969 	if (e == RSM_SUCCESS) {
6970 #ifdef _MULTI_DATAMODEL
6971 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6972 			rsm_ioctlmsg32_t msg32;
6973 			int i;
6974 
6975 			for (i = 0; i < 4; i++) {
6976 				msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6977 			}
6978 
6979 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6980 			    "rsmbar_ioctl done\n"));
6981 			if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6982 			    sizeof (msg32), mode))
6983 				return (RSMERR_BAD_ADDR);
6984 			else
6985 				return (RSM_SUCCESS);
6986 		}
6987 #endif
6988 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6989 		    "rsmbar_ioctl done\n"));
6990 		if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6991 		    sizeof (*msg), mode))
6992 			return (RSMERR_BAD_ADDR);
6993 		else
6994 			return (RSM_SUCCESS);
6995 	}
6996 
6997 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6998 	    "rsmbar_ioctl done: error=%d\n", e));
6999 
7000 	return (e);
7001 }
7002 
7003 /*
7004  * Ring the doorbell of the export segment to which this segment is
7005  * connected.
7006  */
7007 static int
7008 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7009 {
7010 	int e = 0;
7011 	rsmipc_request_t request;
7012 
7013 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7014 
7015 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7016 
7017 	request.rsmipc_key = seg->s_segid;
7018 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7019 	request.rsmipc_segment_cookie = NULL;
7020 	e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7021 
7022 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7023 	    "exportbell_ioctl done: %d\n", e));
7024 
7025 	return (e);
7026 }
7027 
7028 /*
7029  * Ring the doorbells of all segments importing this segment
7030  */
7031 static int
7032 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7033 {
7034 	importing_token_t	*token = NULL;
7035 	rsmipc_request_t	request;
7036 	int			index;
7037 
7038 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7039 
7040 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7041 
7042 	ASSERT(seg->s_state != RSM_STATE_NEW &&
7043 	    seg->s_state != RSM_STATE_NEW_QUIESCED);
7044 
7045 	request.rsmipc_key = seg->s_segid;
7046 	request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7047 
7048 	index = rsmhash(seg->s_segid);
7049 
7050 	token = importer_list.bucket[index];
7051 
7052 	while (token != NULL) {
7053 		if (seg->s_key == token->key) {
7054 			request.rsmipc_segment_cookie =
7055 			    token->import_segment_cookie;
7056 			(void) rsmipc_send(token->importing_node,
7057 				    &request, RSM_NO_REPLY);
7058 		}
7059 		token = token->next;
7060 	}
7061 
7062 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7063 	    "importbell_ioctl done\n"));
7064 	return (RSM_SUCCESS);
7065 }
7066 
7067 static int
7068 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7069     rsm_poll_event_t **eventspp, int mode)
7070 {
7071 	rsm_poll_event_t	*evlist = NULL;
7072 	size_t			evlistsz;
7073 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7074 
7075 #ifdef _MULTI_DATAMODEL
7076 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7077 		int i;
7078 		rsm_consume_event_msg32_t cemsg32 = {0};
7079 		rsm_poll_event32_t	event32[RSM_MAX_POLLFDS];
7080 		rsm_poll_event32_t	*evlist32;
7081 		size_t			evlistsz32;
7082 
7083 		/* copyin the ioctl message */
7084 		if (ddi_copyin(arg, (caddr_t)&cemsg32,
7085 		    sizeof (rsm_consume_event_msg32_t), mode)) {
7086 			DBG_PRINTF((category, RSM_ERR,
7087 			    "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7088 			return (RSMERR_BAD_ADDR);
7089 		}
7090 		msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7091 		msgp->numents = (int)cemsg32.numents;
7092 
7093 		evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7094 		/*
7095 		 * If numents is large alloc events list on heap otherwise
7096 		 * use the address of array that was passed in.
7097 		 */
7098 		if (msgp->numents > RSM_MAX_POLLFDS) {
7099 			if (msgp->numents > max_segs) { /* validate numents */
7100 				DBG_PRINTF((category, RSM_ERR,
7101 				    "consumeevent_copyin: "
7102 				    "RSMERR_BAD_ARGS_ERRORS\n"));
7103 				return (RSMERR_BAD_ARGS_ERRORS);
7104 			}
7105 			evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7106 		} else {
7107 			evlist32 = event32;
7108 		}
7109 
7110 		/* copyin the seglist into the rsm_poll_event32_t array */
7111 		if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7112 		    evlistsz32, mode)) {
7113 			if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7114 				kmem_free(evlist32, evlistsz32);
7115 			}
7116 			DBG_PRINTF((category, RSM_ERR,
7117 			    "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7118 			return (RSMERR_BAD_ADDR);
7119 		}
7120 
7121 		/* evlist and evlistsz are based on rsm_poll_event_t type */
7122 		evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7123 
7124 		if (msgp->numents > RSM_MAX_POLLFDS) {
7125 			evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7126 			*eventspp = evlist;
7127 		} else {
7128 			evlist = *eventspp;
7129 		}
7130 		/*
7131 		 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7132 		 * array
7133 		 */
7134 		for (i = 0; i < msgp->numents; i++) {
7135 			evlist[i].rnum = evlist32[i].rnum;
7136 			evlist[i].fdsidx = evlist32[i].fdsidx;
7137 			evlist[i].revent = evlist32[i].revent;
7138 		}
7139 		/* free the temp 32-bit event list */
7140 		if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7141 			kmem_free(evlist32, evlistsz32);
7142 		}
7143 
7144 		return (RSM_SUCCESS);
7145 	}
7146 #endif
7147 	/* copyin the ioctl message */
7148 	if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7149 	    mode)) {
7150 		DBG_PRINTF((category, RSM_ERR,
7151 		    "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7152 		return (RSMERR_BAD_ADDR);
7153 	}
7154 	/*
7155 	 * If numents is large alloc events list on heap otherwise
7156 	 * use the address of array that was passed in.
7157 	 */
7158 	if (msgp->numents > RSM_MAX_POLLFDS) {
7159 		if (msgp->numents > max_segs) { /* validate numents */
7160 			DBG_PRINTF((category, RSM_ERR,
7161 			    "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7162 			return (RSMERR_BAD_ARGS_ERRORS);
7163 		}
7164 		evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7165 		evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7166 		*eventspp  = evlist;
7167 	}
7168 
7169 	/* copyin the seglist */
7170 	if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7171 	    sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7172 		if (evlist) {
7173 			kmem_free(evlist, evlistsz);
7174 			*eventspp = NULL;
7175 		}
7176 		DBG_PRINTF((category, RSM_ERR,
7177 		    "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7178 		return (RSMERR_BAD_ADDR);
7179 	}
7180 
7181 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7182 	    "consumeevent_copyin done\n"));
7183 	return (RSM_SUCCESS);
7184 }
7185 
7186 static int
7187 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7188     rsm_poll_event_t *eventsp, int mode)
7189 {
7190 	size_t			evlistsz;
7191 	int			err = RSM_SUCCESS;
7192 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7193 
7194 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7195 	    "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7196 	    msgp->numents, eventsp));
7197 
7198 #ifdef _MULTI_DATAMODEL
7199 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7200 		int i;
7201 		rsm_poll_event32_t	event32[RSM_MAX_POLLFDS];
7202 		rsm_poll_event32_t	*evlist32;
7203 		size_t			evlistsz32;
7204 
7205 		evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7206 		if (msgp->numents > RSM_MAX_POLLFDS) {
7207 			evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7208 		} else {
7209 			evlist32 = event32;
7210 		}
7211 
7212 		/*
7213 		 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7214 		 * array
7215 		 */
7216 		for (i = 0; i < msgp->numents; i++) {
7217 			evlist32[i].rnum = eventsp[i].rnum;
7218 			evlist32[i].fdsidx = eventsp[i].fdsidx;
7219 			evlist32[i].revent = eventsp[i].revent;
7220 		}
7221 
7222 		if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7223 		    evlistsz32, mode)) {
7224 			err = RSMERR_BAD_ADDR;
7225 		}
7226 
7227 		if (msgp->numents > RSM_MAX_POLLFDS) {
7228 			if (evlist32) {	/* free the temp 32-bit event list */
7229 				kmem_free(evlist32, evlistsz32);
7230 			}
7231 			/*
7232 			 * eventsp and evlistsz are based on rsm_poll_event_t
7233 			 * type
7234 			 */
7235 			evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7236 			/* event list on the heap and needs to be freed here */
7237 			if (eventsp) {
7238 				kmem_free(eventsp, evlistsz);
7239 			}
7240 		}
7241 
7242 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7243 		    "consumeevent_copyout done: err=%d\n", err));
7244 		return (err);
7245 	}
7246 #endif
7247 	evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7248 
7249 	if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7250 	    mode)) {
7251 		err = RSMERR_BAD_ADDR;
7252 	}
7253 
7254 	if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7255 		/* event list on the heap and needs to be freed here */
7256 		kmem_free(eventsp, evlistsz);
7257 	}
7258 
7259 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7260 	    "consumeevent_copyout done: err=%d\n", err));
7261 	return (err);
7262 }
7263 
7264 static int
7265 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7266 {
7267 	int	rc;
7268 	int	i;
7269 	minor_t	rnum;
7270 	rsm_consume_event_msg_t	msg = {0};
7271 	rsmseg_t		*seg;
7272 	rsm_poll_event_t	*event_list;
7273 	rsm_poll_event_t	events[RSM_MAX_POLLFDS];
7274 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7275 
7276 	event_list = events;
7277 
7278 	if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7279 	    RSM_SUCCESS) {
7280 		return (rc);
7281 	}
7282 
7283 	for (i = 0; i < msg.numents; i++) {
7284 		rnum = event_list[i].rnum;
7285 		event_list[i].revent = 0;
7286 		/* get the segment structure */
7287 		seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7288 		if (seg) {
7289 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7290 			    "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7291 			    seg));
7292 			if (seg->s_pollevent) {
7293 				/* consume the event */
7294 				atomic_add_32(&seg->s_pollevent, -1);
7295 				event_list[i].revent = POLLRDNORM;
7296 			}
7297 			rsmseglock_release(seg);
7298 		}
7299 	}
7300 
7301 	if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7302 	    RSM_SUCCESS) {
7303 		return (rc);
7304 	}
7305 
7306 	return (RSM_SUCCESS);
7307 }
7308 
7309 static int
7310 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7311 {
7312 	int size;
7313 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7314 
7315 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7316 
7317 #ifdef _MULTI_DATAMODEL
7318 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7319 		rsmka_iovec32_t	*iovec32, *iovec32_base;
7320 		int i;
7321 
7322 		size = count * sizeof (rsmka_iovec32_t);
7323 		iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7324 		if (ddi_copyin((caddr_t)user_vec,
7325 		    (caddr_t)iovec32, size, mode)) {
7326 			kmem_free(iovec32, size);
7327 			DBG_PRINTF((category, RSM_DEBUG,
7328 			    "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7329 			return (RSMERR_BAD_ADDR);
7330 		}
7331 
7332 		for (i = 0; i < count; i++, iovec++, iovec32++) {
7333 			iovec->io_type = (int)iovec32->io_type;
7334 			if (iovec->io_type == RSM_HANDLE_TYPE)
7335 				iovec->local.segid = (rsm_memseg_id_t)
7336 							iovec32->local;
7337 			else
7338 				iovec->local.vaddr =
7339 				    (caddr_t)(uintptr_t)iovec32->local;
7340 			iovec->local_offset = (size_t)iovec32->local_offset;
7341 			iovec->remote_offset = (size_t)iovec32->remote_offset;
7342 			iovec->transfer_len = (size_t)iovec32->transfer_len;
7343 
7344 		}
7345 		kmem_free(iovec32_base, size);
7346 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7347 		    "iovec_copyin done\n"));
7348 		return (DDI_SUCCESS);
7349 	}
7350 #endif
7351 
7352 	size = count * sizeof (rsmka_iovec_t);
7353 	if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7354 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7355 		    "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7356 		return (RSMERR_BAD_ADDR);
7357 	}
7358 
7359 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7360 
7361 	return (DDI_SUCCESS);
7362 }
7363 
7364 
7365 static int
7366 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7367 {
7368 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7369 
7370 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7371 
7372 #ifdef _MULTI_DATAMODEL
7373 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7374 		rsmka_scat_gath32_t sg_io32;
7375 
7376 		if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7377 		    mode)) {
7378 			DBG_PRINTF((category, RSM_DEBUG,
7379 			    "sgio_copyin done: returning EFAULT\n"));
7380 			return (RSMERR_BAD_ADDR);
7381 		}
7382 		sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7383 		sg_io->io_request_count =  (size_t)sg_io32.io_request_count;
7384 		sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7385 		sg_io->flags = (size_t)sg_io32.flags;
7386 		sg_io->remote_handle = (rsm_memseg_import_handle_t)
7387 		    (uintptr_t)sg_io32.remote_handle;
7388 		sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7389 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7390 		    "sgio_copyin done\n"));
7391 		return (DDI_SUCCESS);
7392 	}
7393 #endif
7394 	if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7395 	    mode)) {
7396 		DBG_PRINTF((category, RSM_DEBUG,
7397 		    "sgio_copyin done: returning EFAULT\n"));
7398 		return (RSMERR_BAD_ADDR);
7399 	}
7400 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7401 	return (DDI_SUCCESS);
7402 }
7403 
7404 static int
7405 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7406 {
7407 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7408 
7409 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7410 	    "sgio_resid_copyout enter\n"));
7411 
7412 #ifdef _MULTI_DATAMODEL
7413 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7414 		rsmka_scat_gath32_t sg_io32;
7415 
7416 		sg_io32.io_residual_count = sg_io->io_residual_count;
7417 		sg_io32.flags = sg_io->flags;
7418 
7419 		if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7420 		    (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7421 		    sizeof (uint32_t), mode)) {
7422 
7423 			DBG_PRINTF((category, RSM_ERR,
7424 			    "sgio_resid_copyout error: rescnt\n"));
7425 			return (RSMERR_BAD_ADDR);
7426 		}
7427 
7428 		if (ddi_copyout((caddr_t)&sg_io32.flags,
7429 		    (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7430 		    sizeof (uint32_t), mode)) {
7431 
7432 			DBG_PRINTF((category, RSM_ERR,
7433 			    "sgio_resid_copyout error: flags\n"));
7434 			return (RSMERR_BAD_ADDR);
7435 		}
7436 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7437 		    "sgio_resid_copyout done\n"));
7438 		return (DDI_SUCCESS);
7439 	}
7440 #endif
7441 	if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7442 	    (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7443 	    sizeof (ulong_t), mode)) {
7444 
7445 		DBG_PRINTF((category, RSM_ERR,
7446 		    "sgio_resid_copyout error:rescnt\n"));
7447 		return (RSMERR_BAD_ADDR);
7448 	}
7449 
7450 	if (ddi_copyout((caddr_t)&sg_io->flags,
7451 	    (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7452 	    sizeof (uint_t), mode)) {
7453 
7454 		DBG_PRINTF((category, RSM_ERR,
7455 		    "sgio_resid_copyout error:flags\n"));
7456 		return (RSMERR_BAD_ADDR);
7457 	}
7458 
7459 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7460 	return (DDI_SUCCESS);
7461 }
7462 
7463 
7464 static int
7465 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7466 {
7467 	rsmka_scat_gath_t	sg_io;
7468 	rsmka_iovec_t		ka_iovec_arr[RSM_MAX_IOVLEN];
7469 	rsmka_iovec_t		*ka_iovec;
7470 	rsmka_iovec_t		*ka_iovec_start;
7471 	rsmpi_scat_gath_t	rsmpi_sg_io;
7472 	rsmpi_iovec_t		iovec_arr[RSM_MAX_IOVLEN];
7473 	rsmpi_iovec_t		*iovec;
7474 	rsmpi_iovec_t		*iovec_start = NULL;
7475 	rsmapi_access_entry_t	*acl;
7476 	rsmresource_t		*res;
7477 	minor_t			rnum;
7478 	rsmseg_t		*im_seg, *ex_seg;
7479 	int			e;
7480 	int			error = 0;
7481 	uint_t			i;
7482 	uint_t			iov_proc = 0; /* num of iovecs processed */
7483 	size_t			size = 0;
7484 	size_t			ka_size;
7485 
7486 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7487 
7488 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7489 
7490 	credp = credp;
7491 
7492 	/*
7493 	 * Copyin the scatter/gather structure  and build new structure
7494 	 * for rsmpi.
7495 	 */
7496 	e = sgio_copyin(arg, &sg_io, mode);
7497 	if (e != DDI_SUCCESS) {
7498 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7499 		    "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7500 		return (e);
7501 	}
7502 
7503 	if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7504 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7505 		    "rsm_iovec_ioctl done: request_count(%d) too large\n",
7506 		    sg_io.io_request_count));
7507 		return (RSMERR_BAD_SGIO);
7508 	}
7509 
7510 	rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7511 	rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7512 	rsmpi_sg_io.io_segflg = 0;
7513 
7514 	/* Allocate memory and copyin io vector array  */
7515 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7516 		ka_size =  sg_io.io_request_count * sizeof (rsmka_iovec_t);
7517 		ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7518 	} else {
7519 		ka_iovec_start = ka_iovec = ka_iovec_arr;
7520 	}
7521 	e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7522 	    sg_io.io_request_count, mode);
7523 	if (e != DDI_SUCCESS) {
7524 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7525 			kmem_free(ka_iovec, ka_size);
7526 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7527 		    "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7528 		return (e);
7529 	}
7530 
7531 	/* get the import segment descriptor */
7532 	rnum = getminor(dev);
7533 	res = rsmresource_lookup(rnum, RSM_LOCK);
7534 
7535 	/*
7536 	 * The following sequence of locking may (or MAY NOT) cause a
7537 	 * deadlock but this is currently not addressed here since the
7538 	 * implementation will be changed to incorporate the use of
7539 	 * reference counting for both the import and the export segments.
7540 	 */
7541 
7542 	/* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7543 
7544 	im_seg = (rsmseg_t *)res;
7545 
7546 	if (im_seg == NULL) {
7547 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7548 			kmem_free(ka_iovec, ka_size);
7549 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7550 		    "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7551 		return (EINVAL);
7552 	}
7553 	/* putv/getv supported is supported only on import segments */
7554 	if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7555 		rsmseglock_release(im_seg);
7556 		if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7557 			kmem_free(ka_iovec, ka_size);
7558 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7559 		    "rsm_iovec_ioctl done: not an import segment\n"));
7560 		return (EINVAL);
7561 	}
7562 
7563 	/*
7564 	 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7565 	 * as well as wait for a local DR to complete.
7566 	 */
7567 	while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7568 	    (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7569 	    (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7570 		if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7571 			DBG_PRINTF((category, RSM_DEBUG,
7572 			    "rsm_iovec_ioctl done: cv_wait INTR"));
7573 			rsmseglock_release(im_seg);
7574 			return (RSMERR_INTERRUPTED);
7575 		}
7576 	}
7577 
7578 	if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7579 	    (im_seg->s_state != RSM_STATE_ACTIVE)) {
7580 
7581 		ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7582 		    im_seg->s_state == RSM_STATE_NEW);
7583 
7584 		DBG_PRINTF((category, RSM_DEBUG,
7585 		    "rsm_iovec_ioctl done: im_seg not conn/map"));
7586 		rsmseglock_release(im_seg);
7587 		e = RSMERR_BAD_SGIO;
7588 		goto out;
7589 	}
7590 
7591 	im_seg->s_rdmacnt++;
7592 	rsmseglock_release(im_seg);
7593 
7594 	/*
7595 	 * Allocate and set up the io vector for rsmpi
7596 	 */
7597 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7598 		size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7599 		iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7600 	} else {
7601 		iovec_start = iovec = iovec_arr;
7602 	}
7603 
7604 	rsmpi_sg_io.iovec = iovec;
7605 	for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7606 		if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7607 			ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7608 
7609 			if (ex_seg == NULL) {
7610 				e = RSMERR_BAD_SGIO;
7611 				break;
7612 			}
7613 			ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7614 
7615 			acl = ex_seg->s_acl;
7616 			if (acl[0].ae_permission == 0) {
7617 				struct buf *xbuf;
7618 				dev_t sdev = 0;
7619 
7620 				xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7621 				    0, ex_seg->s_len, B_WRITE,
7622 				    sdev, 0, NULL, DDI_UMEM_SLEEP);
7623 
7624 				ASSERT(xbuf != NULL);
7625 
7626 				iovec->local_mem.ms_type = RSM_MEM_BUF;
7627 				iovec->local_mem.ms_memory.bp = xbuf;
7628 			} else {
7629 				iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7630 				iovec->local_mem.ms_memory.handle =
7631 					ex_seg->s_handle.out;
7632 			}
7633 			ex_seg->s_rdmacnt++; /* refcnt the handle */
7634 			rsmseglock_release(ex_seg);
7635 		} else {
7636 			iovec->local_mem.ms_type = RSM_MEM_VADDR;
7637 			iovec->local_mem.ms_memory.vr.vaddr =
7638 			    ka_iovec->local.vaddr;
7639 		}
7640 
7641 		iovec->local_offset = ka_iovec->local_offset;
7642 		iovec->remote_handle = im_seg->s_handle.in;
7643 		iovec->remote_offset = ka_iovec->remote_offset;
7644 		iovec->transfer_length = ka_iovec->transfer_len;
7645 		iovec++;
7646 		ka_iovec++;
7647 	}
7648 
7649 	if (iov_proc <  sg_io.io_request_count) {
7650 		/* error while processing handle */
7651 		rsmseglock_acquire(im_seg);
7652 		im_seg->s_rdmacnt--;   /* decrement the refcnt for importseg */
7653 		if (im_seg->s_rdmacnt == 0) {
7654 			cv_broadcast(&im_seg->s_cv);
7655 		}
7656 		rsmseglock_release(im_seg);
7657 		goto out;
7658 	}
7659 
7660 	/* call rsmpi */
7661 	if (cmd == RSM_IOCTL_PUTV)
7662 		e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7663 		    im_seg->s_adapter->rsmpi_handle,
7664 		    &rsmpi_sg_io);
7665 	else if (cmd == RSM_IOCTL_GETV)
7666 		e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7667 		    im_seg->s_adapter->rsmpi_handle,
7668 		    &rsmpi_sg_io);
7669 	else {
7670 		e = EINVAL;
7671 		DBG_PRINTF((category, RSM_DEBUG,
7672 		    "iovec_ioctl: bad command = %x\n", cmd));
7673 	}
7674 
7675 
7676 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7677 	    "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7678 
7679 	sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7680 
7681 	/*
7682 	 * Check for implicit signal post flag and do the signal
7683 	 * post if needed
7684 	 */
7685 	if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7686 	    e == RSM_SUCCESS) {
7687 		rsmipc_request_t request;
7688 
7689 		request.rsmipc_key = im_seg->s_segid;
7690 		request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7691 		request.rsmipc_segment_cookie = NULL;
7692 		e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7693 		/*
7694 		 * Reset the implicit signal post flag to 0 to indicate
7695 		 * that the signal post has been done and need not be
7696 		 * done in the RSMAPI library
7697 		 */
7698 		sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7699 	}
7700 
7701 	rsmseglock_acquire(im_seg);
7702 	im_seg->s_rdmacnt--;
7703 	if (im_seg->s_rdmacnt == 0) {
7704 		cv_broadcast(&im_seg->s_cv);
7705 	}
7706 	rsmseglock_release(im_seg);
7707 	error = sgio_resid_copyout(arg, &sg_io, mode);
7708 out:
7709 	iovec = iovec_start;
7710 	ka_iovec = ka_iovec_start;
7711 	for (i = 0; i < iov_proc; i++) {
7712 		if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7713 			ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7714 
7715 			ASSERT(ex_seg != NULL);
7716 			ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7717 
7718 			ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7719 			if (ex_seg->s_rdmacnt == 0) {
7720 				cv_broadcast(&ex_seg->s_cv);
7721 			}
7722 			rsmseglock_release(ex_seg);
7723 		}
7724 
7725 		ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7726 
7727 		/*
7728 		 * At present there is no dependency on the existence of xbufs
7729 		 * created by ddi_umem_iosetup for each of the iovecs. So we
7730 		 * can these xbufs here.
7731 		 */
7732 		if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7733 			freerbuf(iovec->local_mem.ms_memory.bp);
7734 		}
7735 
7736 		iovec++;
7737 		ka_iovec++;
7738 	}
7739 
7740 	if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7741 		if (iovec_start)
7742 			kmem_free(iovec_start, size);
7743 		kmem_free(ka_iovec_start, ka_size);
7744 	}
7745 
7746 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7747 	    "rsm_iovec_ioctl done %d\n", e));
7748 	/* if RSMPI call fails return that else return copyout's retval */
7749 	return ((e != RSM_SUCCESS) ? e : error);
7750 
7751 }
7752 
7753 
7754 static int
7755 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7756 {
7757 	adapter_t	*adapter;
7758 	rsm_addr_t	addr;
7759 	rsm_node_id_t	node;
7760 	int		rval = DDI_SUCCESS;
7761 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7762 
7763 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7764 
7765 	adapter =  rsm_getadapter(msg, mode);
7766 	if (adapter == NULL) {
7767 		DBG_PRINTF((category, RSM_DEBUG,
7768 		    "rsmaddr_ioctl done: adapter not found\n"));
7769 		return (RSMERR_CTLR_NOT_PRESENT);
7770 	}
7771 
7772 	switch (cmd) {
7773 	case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7774 		/* returns the hwaddr in msg->hwaddr */
7775 		if (msg->nodeid == my_nodeid) {
7776 			msg->hwaddr = adapter->hwaddr;
7777 		} else {
7778 			addr = get_remote_hwaddr(adapter, msg->nodeid);
7779 			if ((int64_t)addr < 0) {
7780 				rval = RSMERR_INTERNAL_ERROR;
7781 			} else {
7782 				msg->hwaddr = addr;
7783 			}
7784 		}
7785 		break;
7786 	case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7787 		/* returns the nodeid in msg->nodeid */
7788 		if (msg->hwaddr == adapter->hwaddr) {
7789 			msg->nodeid = my_nodeid;
7790 		} else {
7791 			node = get_remote_nodeid(adapter, msg->hwaddr);
7792 			if ((int)node < 0) {
7793 				rval = RSMERR_INTERNAL_ERROR;
7794 			} else {
7795 				msg->nodeid = (rsm_node_id_t)node;
7796 			}
7797 		}
7798 		break;
7799 	default:
7800 		rval = EINVAL;
7801 		break;
7802 	}
7803 
7804 	rsmka_release_adapter(adapter);
7805 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7806 	    "rsmaddr_ioctl done: %d\n", rval));
7807 	return (rval);
7808 }
7809 
7810 static int
7811 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7812 {
7813 	DBG_DEFINE(category,
7814 	    RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7815 
7816 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7817 
7818 #ifdef _MULTI_DATAMODEL
7819 
7820 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7821 		rsm_ioctlmsg32_t msg32;
7822 		int i;
7823 
7824 		if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7825 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7826 			    "rsm_ddi_copyin done: EFAULT\n"));
7827 			return (RSMERR_BAD_ADDR);
7828 		}
7829 		msg->len = msg32.len;
7830 		msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7831 		msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7832 		msg->key = msg32.key;
7833 		msg->acl_len = msg32.acl_len;
7834 		msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7835 		msg->cnum = msg32.cnum;
7836 		msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7837 		msg->cname_len = msg32.cname_len;
7838 		msg->nodeid = msg32.nodeid;
7839 		msg->hwaddr = msg32.hwaddr;
7840 		msg->perm = msg32.perm;
7841 		for (i = 0; i < 4; i++) {
7842 			msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7843 		}
7844 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7845 		    "rsm_ddi_copyin done\n"));
7846 		return (RSM_SUCCESS);
7847 	}
7848 #endif
7849 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7850 	if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7851 		return (RSMERR_BAD_ADDR);
7852 	else
7853 		return (RSM_SUCCESS);
7854 }
7855 
7856 static int
7857 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7858 {
7859 	rsmka_int_controller_attr_t	rsm_cattr;
7860 	DBG_DEFINE(category,
7861 		RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7862 
7863 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7864 	    "rsmattr_ddi_copyout enter\n"));
7865 	/*
7866 	 * need to copy appropriate data from rsm_controller_attr_t
7867 	 * to rsmka_int_controller_attr_t
7868 	 */
7869 #ifdef	_MULTI_DATAMODEL
7870 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7871 		rsmka_int_controller_attr32_t rsm_cattr32;
7872 
7873 		rsm_cattr32.attr_direct_access_sizes =
7874 		    adapter->rsm_attr.attr_direct_access_sizes;
7875 		rsm_cattr32.attr_atomic_sizes =
7876 		    adapter->rsm_attr.attr_atomic_sizes;
7877 		rsm_cattr32.attr_page_size =
7878 		    adapter->rsm_attr.attr_page_size;
7879 		if (adapter->rsm_attr.attr_max_export_segment_size >
7880 		    UINT_MAX)
7881 			rsm_cattr32.attr_max_export_segment_size =
7882 			    RSM_MAXSZ_PAGE_ALIGNED;
7883 		else
7884 			rsm_cattr32.attr_max_export_segment_size =
7885 			    adapter->rsm_attr.attr_max_export_segment_size;
7886 		if (adapter->rsm_attr.attr_tot_export_segment_size >
7887 		    UINT_MAX)
7888 			rsm_cattr32.attr_tot_export_segment_size =
7889 			    RSM_MAXSZ_PAGE_ALIGNED;
7890 		else
7891 			rsm_cattr32.attr_tot_export_segment_size =
7892 			    adapter->rsm_attr.attr_tot_export_segment_size;
7893 		if (adapter->rsm_attr.attr_max_export_segments >
7894 		    UINT_MAX)
7895 			rsm_cattr32.attr_max_export_segments =
7896 			    UINT_MAX;
7897 		else
7898 			rsm_cattr32.attr_max_export_segments =
7899 			    adapter->rsm_attr.attr_max_export_segments;
7900 		if (adapter->rsm_attr.attr_max_import_map_size >
7901 		    UINT_MAX)
7902 			rsm_cattr32.attr_max_import_map_size =
7903 			    RSM_MAXSZ_PAGE_ALIGNED;
7904 		else
7905 			rsm_cattr32.attr_max_import_map_size =
7906 			    adapter->rsm_attr.attr_max_import_map_size;
7907 		if (adapter->rsm_attr.attr_tot_import_map_size >
7908 		    UINT_MAX)
7909 			rsm_cattr32.attr_tot_import_map_size =
7910 			    RSM_MAXSZ_PAGE_ALIGNED;
7911 		else
7912 			rsm_cattr32.attr_tot_import_map_size =
7913 			    adapter->rsm_attr.attr_tot_import_map_size;
7914 		if (adapter->rsm_attr.attr_max_import_segments >
7915 		    UINT_MAX)
7916 			rsm_cattr32.attr_max_import_segments =
7917 			    UINT_MAX;
7918 		else
7919 			rsm_cattr32.attr_max_import_segments =
7920 			    adapter->rsm_attr.attr_max_import_segments;
7921 		rsm_cattr32.attr_controller_addr =
7922 		    adapter->rsm_attr.attr_controller_addr;
7923 
7924 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7925 		    "rsmattr_ddi_copyout done\n"));
7926 		if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7927 		    sizeof (rsmka_int_controller_attr32_t), mode)) {
7928 			return (RSMERR_BAD_ADDR);
7929 		}
7930 		else
7931 			return (RSM_SUCCESS);
7932 	}
7933 #endif
7934 	rsm_cattr.attr_direct_access_sizes =
7935 	    adapter->rsm_attr.attr_direct_access_sizes;
7936 	rsm_cattr.attr_atomic_sizes =
7937 	    adapter->rsm_attr.attr_atomic_sizes;
7938 	rsm_cattr.attr_page_size =
7939 	    adapter->rsm_attr.attr_page_size;
7940 	rsm_cattr.attr_max_export_segment_size =
7941 	    adapter->rsm_attr.attr_max_export_segment_size;
7942 	rsm_cattr.attr_tot_export_segment_size =
7943 	    adapter->rsm_attr.attr_tot_export_segment_size;
7944 	rsm_cattr.attr_max_export_segments =
7945 	    adapter->rsm_attr.attr_max_export_segments;
7946 	rsm_cattr.attr_max_import_map_size =
7947 	    adapter->rsm_attr.attr_max_import_map_size;
7948 	rsm_cattr.attr_tot_import_map_size =
7949 	    adapter->rsm_attr.attr_tot_import_map_size;
7950 	rsm_cattr.attr_max_import_segments =
7951 	    adapter->rsm_attr.attr_max_import_segments;
7952 	rsm_cattr.attr_controller_addr =
7953 	    adapter->rsm_attr.attr_controller_addr;
7954 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7955 	    "rsmattr_ddi_copyout done\n"));
7956 	if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7957 	    sizeof (rsmka_int_controller_attr_t), mode)) {
7958 		return (RSMERR_BAD_ADDR);
7959 	}
7960 	else
7961 		return (RSM_SUCCESS);
7962 }
7963 
7964 /*ARGSUSED*/
7965 static int
7966 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7967     int *rvalp)
7968 {
7969 	rsmseg_t *seg;
7970 	rsmresource_t	*res;
7971 	minor_t		rnum;
7972 	rsm_ioctlmsg_t msg = {0};
7973 	int error;
7974 	adapter_t *adapter;
7975 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7976 
7977 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7978 
7979 	if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7980 		error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7981 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7982 		    "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7983 		return (error);
7984 	}
7985 
7986 	/* topology cmd does not use the arg common to other cmds */
7987 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7988 		error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7989 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7990 		    "rsm_ioctl done: %d\n", error));
7991 		return (error);
7992 	}
7993 
7994 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7995 		error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7996 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7997 		    "rsm_ioctl done: %d\n", error));
7998 		return (error);
7999 	}
8000 
8001 	/*
8002 	 * try to load arguments
8003 	 */
8004 	if (cmd != RSM_IOCTL_RING_BELL &&
8005 	    rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
8006 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8007 		    "rsm_ioctl done: EFAULT\n"));
8008 		return (RSMERR_BAD_ADDR);
8009 	}
8010 
8011 	if (cmd == RSM_IOCTL_ATTR) {
8012 		adapter =  rsm_getadapter(&msg, mode);
8013 		if (adapter == NULL) {
8014 			DBG_PRINTF((category, RSM_DEBUG,
8015 			    "rsm_ioctl done: ENODEV\n"));
8016 			return (RSMERR_CTLR_NOT_PRESENT);
8017 		}
8018 		error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8019 		rsmka_release_adapter(adapter);
8020 		DBG_PRINTF((category, RSM_DEBUG,
8021 		    "rsm_ioctl:after copyout %d\n", error));
8022 		return (error);
8023 	}
8024 
8025 	if (cmd == RSM_IOCTL_BAR_INFO) {
8026 		/* Return library off,len of barrier page */
8027 		msg.off = barrier_offset;
8028 		msg.len = (int)barrier_size;
8029 #ifdef _MULTI_DATAMODEL
8030 		if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8031 			rsm_ioctlmsg32_t msg32;
8032 
8033 			if (msg.len > UINT_MAX)
8034 				msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8035 			else
8036 				msg32.len = (int32_t)msg.len;
8037 			msg32.off = (int32_t)msg.off;
8038 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8039 			    "rsm_ioctl done\n"));
8040 			if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8041 			    sizeof (msg32), mode))
8042 				return (RSMERR_BAD_ADDR);
8043 			else
8044 				return (RSM_SUCCESS);
8045 		}
8046 #endif
8047 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8048 		    "rsm_ioctl done\n"));
8049 		if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8050 		    sizeof (msg), mode))
8051 			return (RSMERR_BAD_ADDR);
8052 		else
8053 			return (RSM_SUCCESS);
8054 	}
8055 
8056 	if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8057 		/* map the nodeid or hwaddr */
8058 		error = rsmaddr_ioctl(cmd, &msg, mode);
8059 		if (error == RSM_SUCCESS) {
8060 #ifdef _MULTI_DATAMODEL
8061 			if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8062 				rsm_ioctlmsg32_t msg32;
8063 
8064 				msg32.hwaddr = (uint64_t)msg.hwaddr;
8065 				msg32.nodeid = (uint32_t)msg.nodeid;
8066 
8067 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8068 				    "rsm_ioctl done\n"));
8069 				if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8070 				    sizeof (msg32), mode))
8071 					return (RSMERR_BAD_ADDR);
8072 				else
8073 					return (RSM_SUCCESS);
8074 			}
8075 #endif
8076 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8077 			    "rsm_ioctl done\n"));
8078 			if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8079 			    sizeof (msg), mode))
8080 				return (RSMERR_BAD_ADDR);
8081 			else
8082 				return (RSM_SUCCESS);
8083 		}
8084 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8085 		    "rsm_ioctl done: %d\n", error));
8086 		return (error);
8087 	}
8088 
8089 	/* Find resource and look it in read mode */
8090 	rnum = getminor(dev);
8091 	res = rsmresource_lookup(rnum, RSM_NOLOCK);
8092 	ASSERT(res != NULL);
8093 
8094 	/*
8095 	 * Find command group
8096 	 */
8097 	switch (RSM_IOCTL_CMDGRP(cmd)) {
8098 	case RSM_IOCTL_EXPORT_SEG:
8099 		/*
8100 		 * Export list is searched during publish, loopback and
8101 		 * remote lookup call.
8102 		 */
8103 		seg = rsmresource_seg(res, rnum, credp,
8104 		    RSM_RESOURCE_EXPORT_SEGMENT);
8105 		if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8106 			error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8107 			    credp);
8108 		} else { /* export ioctl on an import/barrier resource */
8109 			error = RSMERR_BAD_SEG_HNDL;
8110 		}
8111 		break;
8112 	case RSM_IOCTL_IMPORT_SEG:
8113 		/* Import list is searched during remote unmap call. */
8114 		seg = rsmresource_seg(res, rnum, credp,
8115 		    RSM_RESOURCE_IMPORT_SEGMENT);
8116 		if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8117 			error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8118 			    credp);
8119 		} else  { /* import ioctl on an export/barrier resource */
8120 			error = RSMERR_BAD_SEG_HNDL;
8121 		}
8122 		break;
8123 	case RSM_IOCTL_BAR:
8124 		if (res != RSMRC_RESERVED &&
8125 		    res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8126 			error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8127 			    mode);
8128 		} else { /* invalid res value */
8129 			error = RSMERR_BAD_SEG_HNDL;
8130 		}
8131 		break;
8132 	case RSM_IOCTL_BELL:
8133 		if (res != RSMRC_RESERVED) {
8134 			if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8135 				error = exportbell_ioctl((rsmseg_t *)res, cmd);
8136 			else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8137 				error = importbell_ioctl((rsmseg_t *)res, cmd);
8138 			else /* RSM_RESOURCE_BAR */
8139 				error = RSMERR_BAD_SEG_HNDL;
8140 		} else { /* invalid res value */
8141 			error = RSMERR_BAD_SEG_HNDL;
8142 		}
8143 		break;
8144 	default:
8145 		error = EINVAL;
8146 	}
8147 
8148 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8149 	    error));
8150 	return (error);
8151 }
8152 
8153 
8154 /* **************************** Segment Mapping Operations ********* */
8155 static rsm_mapinfo_t *
8156 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8157     size_t *map_len)
8158 {
8159 	rsm_mapinfo_t	*p;
8160 	/*
8161 	 * Find the correct mapinfo structure to use during the mapping
8162 	 * from the seg->s_mapinfo list.
8163 	 * The seg->s_mapinfo list contains in reverse order the mappings
8164 	 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8165 	 * access the correct entry within this list for the mapping
8166 	 * requested.
8167 	 *
8168 	 * The algorithm for selecting a list entry is as follows:
8169 	 *
8170 	 * When start_offset of an entry <= off we have found the entry
8171 	 * we were looking for. Adjust the dev_offset and map_len (needs
8172 	 * to be PAGESIZE aligned).
8173 	 */
8174 	p = seg->s_mapinfo;
8175 	for (; p; p = p->next) {
8176 		if (p->start_offset <= off) {
8177 			*dev_offset = p->dev_offset + off - p->start_offset;
8178 			*map_len = (len > p->individual_len) ?
8179 			    p->individual_len : ptob(btopr(len));
8180 			return (p);
8181 		}
8182 		p = p->next;
8183 	}
8184 
8185 	return (NULL);
8186 }
8187 
8188 static void
8189 rsm_free_mapinfo(rsm_mapinfo_t  *mapinfo)
8190 {
8191 	rsm_mapinfo_t *p;
8192 
8193 	while (mapinfo != NULL) {
8194 		p = mapinfo;
8195 		mapinfo = mapinfo->next;
8196 		kmem_free(p, sizeof (*p));
8197 	}
8198 }
8199 
8200 static int
8201 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8202     size_t len, void **pvtp)
8203 {
8204 	rsmcookie_t	*p;
8205 	rsmresource_t	*res;
8206 	rsmseg_t	*seg;
8207 	minor_t rnum;
8208 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8209 
8210 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8211 
8212 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8213 	    "rsmmap_map: dhp = %x\n", dhp));
8214 
8215 	flags = flags;
8216 
8217 	rnum = getminor(dev);
8218 	res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8219 	ASSERT(res != NULL);
8220 
8221 	seg = (rsmseg_t *)res;
8222 
8223 	rsmseglock_acquire(seg);
8224 
8225 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8226 
8227 	/*
8228 	 * Allocate structure and add cookie to segment list
8229 	 */
8230 	p = kmem_alloc(sizeof (*p), KM_SLEEP);
8231 
8232 	p->c_dhp = dhp;
8233 	p->c_off = off;
8234 	p->c_len = len;
8235 	p->c_next = seg->s_ckl;
8236 	seg->s_ckl = p;
8237 
8238 	*pvtp = (void *)seg;
8239 
8240 	rsmseglock_release(seg);
8241 
8242 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8243 	return (DDI_SUCCESS);
8244 }
8245 
8246 /*
8247  * Page fault handling is done here. The prerequisite mapping setup
8248  * has been done in rsm_devmap with calls to ddi_devmem_setup or
8249  * ddi_umem_setup
8250  */
8251 static int
8252 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8253     uint_t type, uint_t rw)
8254 {
8255 	int e;
8256 	rsmseg_t *seg = (rsmseg_t *)pvt;
8257 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8258 
8259 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8260 
8261 	rsmseglock_acquire(seg);
8262 
8263 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8264 
8265 	while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8266 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8267 			DBG_PRINTF((category, RSM_DEBUG,
8268 			    "rsmmap_access done: cv_wait INTR"));
8269 			rsmseglock_release(seg);
8270 			return (RSMERR_INTERRUPTED);
8271 		}
8272 	}
8273 
8274 	ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8275 	    seg->s_state == RSM_STATE_ACTIVE);
8276 
8277 	if (seg->s_state == RSM_STATE_DISCONNECT)
8278 		seg->s_flags |= RSM_IMPORT_DUMMY;
8279 
8280 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8281 	    "rsmmap_access: dhp = %x\n", dhp));
8282 
8283 	rsmseglock_release(seg);
8284 
8285 	if (e = devmap_load(dhp, offset, len, type, rw)) {
8286 		DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8287 	}
8288 
8289 
8290 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8291 
8292 	return (e);
8293 }
8294 
8295 static int
8296 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8297 	void **newpvt)
8298 {
8299 	rsmseg_t	*seg = (rsmseg_t *)oldpvt;
8300 	rsmcookie_t	*p, *old;
8301 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8302 
8303 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8304 
8305 	/*
8306 	 * Same as map, create an entry to hold cookie and add it to
8307 	 * connect segment list. The oldpvt is a pointer to segment.
8308 	 * Return segment pointer in newpvt.
8309 	 */
8310 	rsmseglock_acquire(seg);
8311 
8312 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8313 
8314 	/*
8315 	 * Find old cookie
8316 	 */
8317 	for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8318 		if (old->c_dhp == dhp) {
8319 			break;
8320 		}
8321 	}
8322 	if (old == NULL) {
8323 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8324 		    "rsmmap_dup done: EINVAL\n"));
8325 		rsmseglock_release(seg);
8326 		return (EINVAL);
8327 	}
8328 
8329 	p = kmem_alloc(sizeof (*p), KM_SLEEP);
8330 
8331 	p->c_dhp = new_dhp;
8332 	p->c_off = old->c_off;
8333 	p->c_len = old->c_len;
8334 	p->c_next = seg->s_ckl;
8335 	seg->s_ckl = p;
8336 
8337 	*newpvt = (void *)seg;
8338 
8339 	rsmseglock_release(seg);
8340 
8341 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8342 
8343 	return (DDI_SUCCESS);
8344 }
8345 
8346 static void
8347 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8348 	devmap_cookie_t new_dhp1, void **pvtp1,
8349 	devmap_cookie_t new_dhp2, void **pvtp2)
8350 {
8351 	/*
8352 	 * Remove pvtp structure from segment list.
8353 	 */
8354 	rsmseg_t	*seg = (rsmseg_t *)pvtp;
8355 	int freeflag;
8356 
8357 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8358 
8359 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8360 
8361 	off = off; len = len;
8362 	pvtp1 = pvtp1; pvtp2 = pvtp2;
8363 
8364 	rsmseglock_acquire(seg);
8365 
8366 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8367 
8368 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8369 	    "rsmmap_unmap: dhp = %x\n", dhp));
8370 	/*
8371 	 * We can go ahead and remove the dhps even if we are in
8372 	 * the MAPPING state because the dhps being removed here
8373 	 * belong to a different mmap and we are holding the segment
8374 	 * lock.
8375 	 */
8376 	if (new_dhp1 == NULL && new_dhp2 == NULL) {
8377 		/* find and remove dhp handle */
8378 		rsmcookie_t *tmp, **back = &seg->s_ckl;
8379 
8380 		while (*back != NULL) {
8381 			tmp = *back;
8382 			if (tmp->c_dhp == dhp) {
8383 				*back = tmp->c_next;
8384 				kmem_free(tmp, sizeof (*tmp));
8385 				break;
8386 			}
8387 			back = &tmp->c_next;
8388 		}
8389 	} else {
8390 		DBG_PRINTF((category, RSM_DEBUG_LVL2,
8391 		    "rsmmap_unmap:parital unmap"
8392 		    "new_dhp1 %lx, new_dhp2 %lx\n",
8393 		    (size_t)new_dhp1, (size_t)new_dhp2));
8394 	}
8395 
8396 	/*
8397 	 * rsmmap_unmap is called for each mapping cookie on the list.
8398 	 * When the list becomes empty and we are not in the MAPPING
8399 	 * state then unmap in the rsmpi driver.
8400 	 */
8401 	if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8402 		(void) rsm_unmap(seg);
8403 
8404 	if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8405 		freeflag = 1;
8406 	} else {
8407 		freeflag = 0;
8408 	}
8409 
8410 	rsmseglock_release(seg);
8411 
8412 	if (freeflag) {
8413 		/* Free the segment structure */
8414 		rsmseg_free(seg);
8415 	}
8416 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8417 
8418 }
8419 
8420 static struct devmap_callback_ctl rsmmap_ops = {
8421 	DEVMAP_OPS_REV,	/* devmap_ops version number	*/
8422 	rsmmap_map,	/* devmap_ops map routine */
8423 	rsmmap_access,	/* devmap_ops access routine */
8424 	rsmmap_dup,		/* devmap_ops dup routine		*/
8425 	rsmmap_unmap,	/* devmap_ops unmap routine */
8426 };
8427 
8428 static int
8429 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8430     size_t *maplen, uint_t model /*ARGSUSED*/)
8431 {
8432 	struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8433 	int		err;
8434 	uint_t		maxprot;
8435 	minor_t		rnum;
8436 	rsmseg_t	*seg;
8437 	off_t		dev_offset;
8438 	size_t		cur_len;
8439 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8440 
8441 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8442 
8443 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8444 	    "rsm_devmap: off = %lx, len = %lx\n", off, len));
8445 	rnum = getminor(dev);
8446 	seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8447 	ASSERT(seg != NULL);
8448 
8449 	if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8450 		if ((off == barrier_offset) &&
8451 		    (len == barrier_size)) {
8452 
8453 			ASSERT(bar_va != NULL && bar_cookie != NULL);
8454 
8455 			/*
8456 			 * The offset argument in devmap_umem_setup represents
8457 			 * the offset within the kernel memory defined by the
8458 			 * cookie. We use this offset as barrier_offset.
8459 			 */
8460 			err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8461 			    barrier_offset, len, PROT_USER|PROT_READ,
8462 			    DEVMAP_DEFAULTS, 0);
8463 
8464 			if (err != 0) {
8465 				DBG_PRINTF((category, RSM_ERR,
8466 				    "rsm_devmap done: %d\n", err));
8467 				return (RSMERR_MAP_FAILED);
8468 			}
8469 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8470 			    "rsm_devmap done: %d\n", err));
8471 
8472 			*maplen = barrier_size;
8473 
8474 			return (err);
8475 		} else {
8476 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8477 			    "rsm_devmap done: %d\n", err));
8478 			return (RSMERR_MAP_FAILED);
8479 		}
8480 	}
8481 
8482 	ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8483 	ASSERT(seg->s_state == RSM_STATE_MAPPING);
8484 
8485 	/*
8486 	 * Make sure we still have permission for the map operation.
8487 	 */
8488 	maxprot = PROT_USER;
8489 	if (seg->s_mode & RSM_PERM_READ) {
8490 		maxprot |= PROT_READ;
8491 	}
8492 
8493 	if (seg->s_mode & RSM_PERM_WRITE) {
8494 		maxprot |= PROT_WRITE;
8495 	}
8496 
8497 	/*
8498 	 * For each devmap call, rsmmap_map is called. This maintains driver
8499 	 * private information for the mapping. Thus, if there are multiple
8500 	 * devmap calls there will be multiple rsmmap_map calls and for each
8501 	 * call, the mapping information will be stored.
8502 	 * In case of an error during the processing of the devmap call, error
8503 	 * will be returned. This error return causes the caller of rsm_devmap
8504 	 * to undo all the mappings by calling rsmmap_unmap for each one.
8505 	 * rsmmap_unmap will free up the private information for the requested
8506 	 * mapping.
8507 	 */
8508 	if (seg->s_node != my_nodeid) {
8509 		rsm_mapinfo_t *p;
8510 
8511 		p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8512 		if (p == NULL) {
8513 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8514 			    "rsm_devmap: incorrect mapping info\n"));
8515 			return (RSMERR_MAP_FAILED);
8516 		}
8517 		err = devmap_devmem_setup(dhc, p->dip,
8518 		    callbackops, p->dev_register,
8519 		    dev_offset, cur_len, maxprot,
8520 		    DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8521 
8522 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8523 		    "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8524 		    "off=%lx,len=%lx\n",
8525 		    p->dip, p->dev_register, dev_offset, off, cur_len));
8526 
8527 		if (err != 0) {
8528 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8529 			    "rsm_devmap: devmap_devmem_setup failed %d\n",
8530 			    err));
8531 			return (RSMERR_MAP_FAILED);
8532 		}
8533 		/* cur_len is always an integral multiple pagesize */
8534 		ASSERT((cur_len & (PAGESIZE-1)) == 0);
8535 		*maplen = cur_len;
8536 		return (err);
8537 
8538 	} else {
8539 		err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8540 		    seg->s_cookie, off, len, maxprot,
8541 		    DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8542 		if (err != 0) {
8543 			DBG_PRINTF((category, RSM_DEBUG,
8544 			    "rsm_devmap: devmap_umem_setup failed %d\n",
8545 				err));
8546 			return (RSMERR_MAP_FAILED);
8547 		}
8548 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8549 		    "rsm_devmap: loopback done\n"));
8550 
8551 		*maplen = ptob(btopr(len));
8552 
8553 		return (err);
8554 	}
8555 }
8556 
8557 /*
8558  * We can use the devmap framework for mapping device memory to user space by
8559  * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8560  * processing calls this entry point and devmap_setup is called within this
8561  * function, which eventually calls rsm_devmap
8562  */
8563 static int
8564 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8565     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8566 {
8567 	int			error = 0;
8568 	int			old_state;
8569 	minor_t			rnum;
8570 	rsmseg_t		*seg, *eseg;
8571 	adapter_t		*adapter;
8572 	rsm_import_share_t	*sharedp;
8573 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8574 
8575 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8576 
8577 	/*
8578 	 * find segment
8579 	 */
8580 	rnum = getminor(dev);
8581 	seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8582 
8583 	if (seg == NULL) {
8584 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8585 		    "rsm_segmap done: invalid segment\n"));
8586 		return (EINVAL);
8587 	}
8588 
8589 	/*
8590 	 * the user is trying to map a resource that has not been
8591 	 * defined yet. The library uses this to map in the
8592 	 * barrier page.
8593 	 */
8594 	if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8595 		rsmseglock_release(seg);
8596 
8597 		/*
8598 		 * The mapping for the barrier page is identified
8599 		 * by the special offset barrier_offset
8600 		 */
8601 
8602 		if (off == (off_t)barrier_offset ||
8603 		    len == (off_t)barrier_size) {
8604 			if (bar_cookie == NULL || bar_va == NULL) {
8605 				DBG_PRINTF((category, RSM_DEBUG,
8606 				    "rsm_segmap: bar cookie/va is NULL\n"));
8607 				return (EINVAL);
8608 			}
8609 
8610 			error = devmap_setup(dev, (offset_t)off, as, addrp,
8611 			    (size_t)len, prot, maxprot, flags,  cred);
8612 
8613 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8614 			    "rsm_segmap done: %d\n", error));
8615 			return (error);
8616 		} else {
8617 			DBG_PRINTF((category, RSM_DEBUG,
8618 			    "rsm_segmap: bad offset/length\n"));
8619 			return (EINVAL);
8620 		}
8621 	}
8622 
8623 	/* Make sure you can only map imported segments */
8624 	if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8625 		rsmseglock_release(seg);
8626 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8627 		    "rsm_segmap done: not an import segment\n"));
8628 		return (EINVAL);
8629 	}
8630 	/* check means library is broken */
8631 	ASSERT(seg->s_hdr.rsmrc_num == rnum);
8632 
8633 	/* wait for the segment to become unquiesced */
8634 	while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8635 		if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8636 			rsmseglock_release(seg);
8637 			DBG_PRINTF((category, RSM_DEBUG,
8638 			    "rsm_segmap done: cv_wait INTR"));
8639 			return (ENODEV);
8640 		}
8641 	}
8642 
8643 	/* wait until segment leaves the mapping state */
8644 	while (seg->s_state == RSM_STATE_MAPPING)
8645 		cv_wait(&seg->s_cv, &seg->s_lock);
8646 
8647 	/*
8648 	 * we allow multiple maps of the same segment in the KA
8649 	 * and it works because we do an rsmpi map of the whole
8650 	 * segment during the first map and all the device mapping
8651 	 * information needed in rsm_devmap is in the mapinfo list.
8652 	 */
8653 	if ((seg->s_state != RSM_STATE_CONNECT) &&
8654 	    (seg->s_state != RSM_STATE_ACTIVE)) {
8655 		rsmseglock_release(seg);
8656 		DBG_PRINTF((category, RSM_DEBUG,
8657 		    "rsm_segmap done: segment not connected\n"));
8658 		return (ENODEV);
8659 	}
8660 
8661 	/*
8662 	 * Make sure we are not mapping a larger segment than what's
8663 	 * exported
8664 	 */
8665 	if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8666 		rsmseglock_release(seg);
8667 		DBG_PRINTF((category, RSM_DEBUG,
8668 		    "rsm_segmap done: off+len>seg size\n"));
8669 		return (ENXIO);
8670 	}
8671 
8672 	/*
8673 	 * Make sure we still have permission for the map operation.
8674 	 */
8675 	maxprot = PROT_USER;
8676 	if (seg->s_mode & RSM_PERM_READ) {
8677 		maxprot |= PROT_READ;
8678 	}
8679 
8680 	if (seg->s_mode & RSM_PERM_WRITE) {
8681 		maxprot |= PROT_WRITE;
8682 	}
8683 
8684 	if ((prot & maxprot) != prot) {
8685 		/* No permission */
8686 		rsmseglock_release(seg);
8687 		DBG_PRINTF((category, RSM_DEBUG,
8688 		    "rsm_segmap done: no permission\n"));
8689 		return (EACCES);
8690 	}
8691 
8692 	old_state = seg->s_state;
8693 
8694 	ASSERT(seg->s_share != NULL);
8695 
8696 	rsmsharelock_acquire(seg);
8697 
8698 	sharedp = seg->s_share;
8699 
8700 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8701 	    "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8702 
8703 	if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8704 	    (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8705 		rsmsharelock_release(seg);
8706 		rsmseglock_release(seg);
8707 		DBG_PRINTF((category, RSM_DEBUG,
8708 		    "rsm_segmap done:RSMSI_STATE %d invalid\n",
8709 		    sharedp->rsmsi_state));
8710 		return (ENODEV);
8711 	}
8712 
8713 	/*
8714 	 * Do the map - since we want importers to share mappings
8715 	 * we do the rsmpi map for the whole segment
8716 	 */
8717 	if (seg->s_node != my_nodeid) {
8718 		uint_t dev_register;
8719 		off_t dev_offset;
8720 		dev_info_t *dip;
8721 		size_t tmp_len;
8722 		size_t total_length_mapped = 0;
8723 		size_t length_to_map = seg->s_len;
8724 		off_t tmp_off = 0;
8725 		rsm_mapinfo_t *p;
8726 
8727 		/*
8728 		 * length_to_map = seg->s_len is always an integral
8729 		 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8730 		 * list is a multiple of PAGESIZE - RSMPI map ensures this
8731 		 */
8732 
8733 		adapter = seg->s_adapter;
8734 		ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8735 		    sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8736 
8737 		if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8738 			error = 0;
8739 			/* map the whole segment */
8740 			while (total_length_mapped < seg->s_len) {
8741 				tmp_len = 0;
8742 
8743 				error = adapter->rsmpi_ops->rsm_map(
8744 				    seg->s_handle.in, tmp_off,
8745 				    length_to_map, &tmp_len,
8746 				    &dip, &dev_register, &dev_offset,
8747 				    NULL, NULL);
8748 
8749 				if (error != 0)
8750 					break;
8751 
8752 				/*
8753 				 * Store the mapping info obtained from rsm_map
8754 				 */
8755 				p = kmem_alloc(sizeof (*p), KM_SLEEP);
8756 				p->dev_register = dev_register;
8757 				p->dev_offset = dev_offset;
8758 				p->dip = dip;
8759 				p->individual_len = tmp_len;
8760 				p->start_offset = tmp_off;
8761 				p->next = sharedp->rsmsi_mapinfo;
8762 				sharedp->rsmsi_mapinfo = p;
8763 
8764 				total_length_mapped += tmp_len;
8765 				length_to_map -= tmp_len;
8766 				tmp_off += tmp_len;
8767 			}
8768 			seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8769 
8770 			if (error != RSM_SUCCESS) {
8771 				/* Check if this is the the first rsm_map */
8772 				if (sharedp->rsmsi_mapinfo != NULL) {
8773 					/*
8774 					 * A single rsm_unmap undoes
8775 					 * multiple rsm_maps.
8776 					 */
8777 					(void) seg->s_adapter->rsmpi_ops->
8778 					    rsm_unmap(sharedp->rsmsi_handle);
8779 					rsm_free_mapinfo(sharedp->
8780 					    rsmsi_mapinfo);
8781 				}
8782 				sharedp->rsmsi_mapinfo = NULL;
8783 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8784 				rsmsharelock_release(seg);
8785 				rsmseglock_release(seg);
8786 				DBG_PRINTF((category, RSM_DEBUG,
8787 				    "rsm_segmap done: rsmpi map err %d\n",
8788 				    error));
8789 				ASSERT(error != RSMERR_BAD_LENGTH &&
8790 				    error != RSMERR_BAD_MEM_ALIGNMENT &&
8791 				    error != RSMERR_BAD_SEG_HNDL);
8792 				if (error == RSMERR_UNSUPPORTED_OPERATION)
8793 					return (ENOTSUP);
8794 				else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8795 					return (EAGAIN);
8796 				else if (error == RSMERR_CONN_ABORTED)
8797 					return (ENODEV);
8798 				else
8799 					return (error);
8800 			} else {
8801 				sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8802 			}
8803 		} else {
8804 			seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8805 		}
8806 
8807 		sharedp->rsmsi_mapcnt++;
8808 
8809 		rsmsharelock_release(seg);
8810 
8811 		/* move to an intermediate mapping state */
8812 		seg->s_state = RSM_STATE_MAPPING;
8813 		rsmseglock_release(seg);
8814 
8815 		error = devmap_setup(dev, (offset_t)off, as, addrp,
8816 		    len, prot, maxprot, flags, cred);
8817 
8818 		rsmseglock_acquire(seg);
8819 		ASSERT(seg->s_state == RSM_STATE_MAPPING);
8820 
8821 		if (error == DDI_SUCCESS) {
8822 			seg->s_state = RSM_STATE_ACTIVE;
8823 		} else {
8824 			rsmsharelock_acquire(seg);
8825 
8826 			ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8827 
8828 			sharedp->rsmsi_mapcnt--;
8829 			if (sharedp->rsmsi_mapcnt == 0) {
8830 				/* unmap the shared RSMPI mapping */
8831 				ASSERT(sharedp->rsmsi_handle != NULL);
8832 				(void) adapter->rsmpi_ops->
8833 					    rsm_unmap(sharedp->rsmsi_handle);
8834 				rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8835 				sharedp->rsmsi_mapinfo = NULL;
8836 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8837 			}
8838 
8839 			rsmsharelock_release(seg);
8840 			seg->s_state = old_state;
8841 			DBG_PRINTF((category, RSM_ERR,
8842 			    "rsm: devmap_setup failed %d\n", error));
8843 		}
8844 		cv_broadcast(&seg->s_cv);
8845 		rsmseglock_release(seg);
8846 		DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8847 		    error));
8848 		return (error);
8849 	} else {
8850 		/*
8851 		 * For loopback, the export segment mapping cookie (s_cookie)
8852 		 * is also used as the s_cookie value for its import segments
8853 		 * during mapping.
8854 		 * Note that reference counting for s_cookie of the export
8855 		 * segment is not required due to the following:
8856 		 * We never have a case of the export segment being destroyed,
8857 		 * leaving the import segments with a stale value for the
8858 		 * s_cookie field, since a force disconnect is done prior to a
8859 		 * destroy of an export segment. The force disconnect causes
8860 		 * the s_cookie value to be reset to NULL. Also for the
8861 		 * rsm_rebind operation, we change the s_cookie value of the
8862 		 * export segment as well as of all its local (loopback)
8863 		 * importers.
8864 		 */
8865 		DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8866 
8867 		rsmsharelock_release(seg);
8868 		/*
8869 		 * In order to maintain the lock ordering between the export
8870 		 * and import segment locks, we need to acquire the export
8871 		 * segment lock first and only then acquire the import
8872 		 * segment lock.
8873 		 * The above is necessary to avoid any deadlock scenarios
8874 		 * with rsm_rebind which also acquires both the export
8875 		 * and import segment locks in the above mentioned order.
8876 		 * Based on code inspection, there seem to be no other
8877 		 * situations in which both the export and import segment
8878 		 * locks are acquired either in the same or opposite order
8879 		 * as mentioned above.
8880 		 * Thus in order to conform to the above lock order, we
8881 		 * need to change the state of the import segment to
8882 		 * RSM_STATE_MAPPING, release the lock. Once this is done we
8883 		 * can now safely acquire the export segment lock first
8884 		 * followed by the import segment lock which is as per
8885 		 * the lock order mentioned above.
8886 		 */
8887 		/* move to an intermediate mapping state */
8888 		seg->s_state = RSM_STATE_MAPPING;
8889 		rsmseglock_release(seg);
8890 
8891 		eseg = rsmexport_lookup(seg->s_key);
8892 
8893 		if (eseg == NULL) {
8894 			rsmseglock_acquire(seg);
8895 			/*
8896 			 * Revert to old_state and signal any waiters
8897 			 * The shared state is not changed
8898 			 */
8899 
8900 			seg->s_state = old_state;
8901 			cv_broadcast(&seg->s_cv);
8902 			rsmseglock_release(seg);
8903 			DBG_PRINTF((category, RSM_DEBUG,
8904 			    "rsm_segmap done: key %d not found\n", seg->s_key));
8905 			return (ENODEV);
8906 		}
8907 
8908 		rsmsharelock_acquire(seg);
8909 		ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8910 		    sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8911 
8912 		sharedp->rsmsi_mapcnt++;
8913 		sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8914 		rsmsharelock_release(seg);
8915 
8916 		ASSERT(eseg->s_cookie != NULL);
8917 
8918 		/*
8919 		 * It is not required or necessary to acquire the import
8920 		 * segment lock here to change the value of s_cookie since
8921 		 * no one will touch the import segment as long as it is
8922 		 * in the RSM_STATE_MAPPING state.
8923 		 */
8924 		seg->s_cookie = eseg->s_cookie;
8925 
8926 		rsmseglock_release(eseg);
8927 
8928 		error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8929 		    prot, maxprot, flags, cred);
8930 
8931 		rsmseglock_acquire(seg);
8932 		ASSERT(seg->s_state == RSM_STATE_MAPPING);
8933 		if (error == 0) {
8934 			seg->s_state = RSM_STATE_ACTIVE;
8935 		} else {
8936 			rsmsharelock_acquire(seg);
8937 
8938 			ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8939 
8940 			sharedp->rsmsi_mapcnt--;
8941 			if (sharedp->rsmsi_mapcnt == 0) {
8942 				sharedp->rsmsi_mapinfo = NULL;
8943 				sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8944 			}
8945 			rsmsharelock_release(seg);
8946 			seg->s_state = old_state;
8947 			seg->s_cookie = NULL;
8948 		}
8949 		cv_broadcast(&seg->s_cv);
8950 		rsmseglock_release(seg);
8951 		DBG_PRINTF((category, RSM_DEBUG_LVL2,
8952 		    "rsm_segmap done: %d\n", error));
8953 		return (error);
8954 	}
8955 }
8956 
8957 int
8958 rsmka_null_seg_create(
8959     rsm_controller_handle_t argcp,
8960     rsm_memseg_export_handle_t *handle,
8961     size_t size,
8962     uint_t flags,
8963     rsm_memory_local_t *memory,
8964     rsm_resource_callback_t callback,
8965     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
8966 {
8967 	return (RSM_SUCCESS);
8968 }
8969 
8970 
8971 int
8972 rsmka_null_seg_destroy(
8973     rsm_memseg_export_handle_t argmemseg	/*ARGSUSED*/)
8974 {
8975 	return (RSM_SUCCESS);
8976 }
8977 
8978 
8979 int
8980 rsmka_null_bind(
8981     rsm_memseg_export_handle_t argmemseg,
8982     off_t offset,
8983     rsm_memory_local_t *argmemory,
8984     rsm_resource_callback_t callback,
8985     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
8986 {
8987 	return (RSM_SUCCESS);
8988 }
8989 
8990 
8991 int
8992 rsmka_null_unbind(
8993     rsm_memseg_export_handle_t argmemseg,
8994     off_t offset,
8995     size_t length	/*ARGSUSED*/)
8996 {
8997 	return (DDI_SUCCESS);
8998 }
8999 
9000 int
9001 rsmka_null_rebind(
9002     rsm_memseg_export_handle_t argmemseg,
9003     off_t offset,
9004     rsm_memory_local_t *memory,
9005     rsm_resource_callback_t callback,
9006     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9007 {
9008 	return (RSM_SUCCESS);
9009 }
9010 
9011 int
9012 rsmka_null_publish(
9013     rsm_memseg_export_handle_t argmemseg,
9014     rsm_access_entry_t access_list[],
9015     uint_t access_list_length,
9016     rsm_memseg_id_t segment_id,
9017     rsm_resource_callback_t callback,
9018     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9019 {
9020 	return (RSM_SUCCESS);
9021 }
9022 
9023 
9024 int
9025 rsmka_null_republish(
9026     rsm_memseg_export_handle_t memseg,
9027     rsm_access_entry_t access_list[],
9028     uint_t access_list_length,
9029     rsm_resource_callback_t callback,
9030     rsm_resource_callback_arg_t callback_arg	/*ARGSUSED*/)
9031 {
9032 	return (RSM_SUCCESS);
9033 }
9034 
9035 int
9036 rsmka_null_unpublish(
9037     rsm_memseg_export_handle_t argmemseg	/*ARGSUSED*/)
9038 {
9039 	return (RSM_SUCCESS);
9040 }
9041 
9042 
9043 void
9044 rsmka_init_loopback()
9045 {
9046 	rsm_ops_t	*ops = &null_rsmpi_ops;
9047 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9048 
9049 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9050 	    "rsmka_init_loopback enter\n"));
9051 
9052 	/* initialize null ops vector */
9053 	ops->rsm_seg_create = rsmka_null_seg_create;
9054 	ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9055 	ops->rsm_bind = rsmka_null_bind;
9056 	ops->rsm_unbind = rsmka_null_unbind;
9057 	ops->rsm_rebind = rsmka_null_rebind;
9058 	ops->rsm_publish = rsmka_null_publish;
9059 	ops->rsm_unpublish = rsmka_null_unpublish;
9060 	ops->rsm_republish = rsmka_null_republish;
9061 
9062 	/* initialize attributes for loopback adapter */
9063 	loopback_attr.attr_name = loopback_str;
9064 	loopback_attr.attr_page_size = 0x8; /* 8K */
9065 
9066 	/* initialize loopback adapter */
9067 	loopback_adapter.rsm_attr = loopback_attr;
9068 	loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9069 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9070 	    "rsmka_init_loopback done\n"));
9071 }
9072 
9073 /* ************** DR functions ********************************** */
9074 static void
9075 rsm_quiesce_exp_seg(rsmresource_t *resp)
9076 {
9077 	int		recheck_state;
9078 	rsmseg_t	*segp = (rsmseg_t *)resp;
9079 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9080 	DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9081 
9082 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9083 	    "%s enter: key=%u\n", function, segp->s_key));
9084 
9085 	rsmseglock_acquire(segp);
9086 	do {
9087 		recheck_state = 0;
9088 		if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9089 		    (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9090 		    (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9091 		    (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9092 			rsmseglock_release(segp);
9093 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9094 			    "%s done:state =%d\n", function,
9095 			    segp->s_state));
9096 			return;
9097 		}
9098 
9099 		if (segp->s_state == RSM_STATE_NEW) {
9100 			segp->s_state = RSM_STATE_NEW_QUIESCED;
9101 			rsmseglock_release(segp);
9102 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9103 			    "%s done:state =%d\n", function,
9104 			    segp->s_state));
9105 			return;
9106 		}
9107 
9108 		if (segp->s_state == RSM_STATE_BIND) {
9109 			/* unbind */
9110 			(void) rsm_unbind_pages(segp);
9111 			segp->s_state = RSM_STATE_BIND_QUIESCED;
9112 			rsmseglock_release(segp);
9113 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9114 			    "%s done:state =%d\n", function,
9115 			    segp->s_state));
9116 			return;
9117 		}
9118 
9119 		if (segp->s_state == RSM_STATE_EXPORT) {
9120 			/*
9121 			 * wait for putv/getv to complete if the segp is
9122 			 * a local memory handle
9123 			 */
9124 			while ((segp->s_state == RSM_STATE_EXPORT) &&
9125 			    (segp->s_rdmacnt != 0)) {
9126 				cv_wait(&segp->s_cv, &segp->s_lock);
9127 			}
9128 
9129 			if (segp->s_state != RSM_STATE_EXPORT) {
9130 				/*
9131 				 * state changed need to see what it
9132 				 * should be changed to.
9133 				 */
9134 				recheck_state = 1;
9135 				continue;
9136 			}
9137 
9138 			segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9139 			rsmseglock_release(segp);
9140 			/*
9141 			 * send SUSPEND messages - currently it will be
9142 			 * done at the end
9143 			 */
9144 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9145 			    "%s done:state =%d\n", function,
9146 			    segp->s_state));
9147 			return;
9148 		}
9149 	} while (recheck_state);
9150 
9151 	rsmseglock_release(segp);
9152 }
9153 
9154 static void
9155 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9156 {
9157 	int			ret;
9158 	rsmseg_t		*segp = (rsmseg_t *)resp;
9159 	rsmapi_access_entry_t	*acl;
9160 	rsm_access_entry_t	*rsmpi_acl;
9161 	int			acl_len;
9162 	int			create_flags = 0;
9163 	struct buf		*xbuf;
9164 	rsm_memory_local_t	mem;
9165 	adapter_t		*adapter;
9166 	dev_t			sdev = 0;
9167 	rsm_resource_callback_t callback_flag;
9168 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9169 	DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9170 
9171 	rsmseglock_acquire(segp);
9172 
9173 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9174 	    "%s enter: key=%u, state=%d\n", function, segp->s_key,
9175 	    segp->s_state));
9176 
9177 	if ((segp->s_state == RSM_STATE_NEW) ||
9178 	    (segp->s_state == RSM_STATE_BIND) ||
9179 	    (segp->s_state == RSM_STATE_EXPORT)) {
9180 		rsmseglock_release(segp);
9181 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9182 		    function, segp->s_state));
9183 		return;
9184 	}
9185 
9186 	if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9187 		segp->s_state = RSM_STATE_NEW;
9188 		cv_broadcast(&segp->s_cv);
9189 		rsmseglock_release(segp);
9190 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9191 		    function, segp->s_state));
9192 		return;
9193 	}
9194 
9195 	if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9196 		/* bind the segment */
9197 		ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9198 		    segp->s_len, segp->s_proc);
9199 		if (ret == RSM_SUCCESS) { /* bind successful */
9200 			segp->s_state = RSM_STATE_BIND;
9201 		} else { /* bind failed - resource unavailable */
9202 			segp->s_state = RSM_STATE_NEW;
9203 		}
9204 		cv_broadcast(&segp->s_cv);
9205 		rsmseglock_release(segp);
9206 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9207 		    "%s done: bind_qscd bind = %d\n", function, ret));
9208 		return;
9209 	}
9210 
9211 	while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9212 		/* wait for the segment to move to EXPORT_QUIESCED state */
9213 		cv_wait(&segp->s_cv, &segp->s_lock);
9214 	}
9215 
9216 	if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9217 		/* bind the segment */
9218 		ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9219 		    segp->s_len, segp->s_proc);
9220 
9221 		if (ret != RSM_SUCCESS) {
9222 			/* bind failed - resource unavailable */
9223 			acl_len = segp->s_acl_len;
9224 			acl = segp->s_acl;
9225 			rsmpi_acl = segp->s_acl_in;
9226 			segp->s_acl_len = 0;
9227 			segp->s_acl = NULL;
9228 			segp->s_acl_in = NULL;
9229 			rsmseglock_release(segp);
9230 
9231 			rsmexport_rm(segp);
9232 			rsmacl_free(acl, acl_len);
9233 			rsmpiacl_free(rsmpi_acl, acl_len);
9234 
9235 			rsmseglock_acquire(segp);
9236 			segp->s_state = RSM_STATE_NEW;
9237 			cv_broadcast(&segp->s_cv);
9238 			rsmseglock_release(segp);
9239 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9240 			    "%s done: exp_qscd bind failed = %d\n",
9241 			    function, ret));
9242 			return;
9243 		}
9244 		/*
9245 		 * publish the segment
9246 		 * if  successful
9247 		 *   segp->s_state = RSM_STATE_EXPORT;
9248 		 * else failed
9249 		 *   segp->s_state = RSM_STATE_BIND;
9250 		 */
9251 
9252 		/* check whether it is a local_memory_handle */
9253 		if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9254 			if ((segp->s_acl[0].ae_node == my_nodeid) &&
9255 			    (segp->s_acl[0].ae_permission == 0)) {
9256 				segp->s_state = RSM_STATE_EXPORT;
9257 				cv_broadcast(&segp->s_cv);
9258 				rsmseglock_release(segp);
9259 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9260 				    "%s done:exp_qscd\n", function));
9261 				return;
9262 			}
9263 		}
9264 		xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9265 		    sdev, 0, NULL, DDI_UMEM_SLEEP);
9266 		ASSERT(xbuf != NULL);
9267 
9268 		mem.ms_type = RSM_MEM_BUF;
9269 		mem.ms_bp = xbuf;
9270 
9271 		adapter = segp->s_adapter;
9272 
9273 		if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9274 			create_flags = RSM_ALLOW_UNBIND_REBIND;
9275 		}
9276 
9277 		if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9278 			callback_flag  = RSM_RESOURCE_DONTWAIT;
9279 		} else {
9280 			callback_flag  = RSM_RESOURCE_SLEEP;
9281 		}
9282 
9283 		ret = adapter->rsmpi_ops->rsm_seg_create(
9284 		    adapter->rsmpi_handle, &segp->s_handle.out,
9285 		    segp->s_len, create_flags, &mem,
9286 		    callback_flag, NULL);
9287 
9288 		if (ret != RSM_SUCCESS) {
9289 			acl_len = segp->s_acl_len;
9290 			acl = segp->s_acl;
9291 			rsmpi_acl = segp->s_acl_in;
9292 			segp->s_acl_len = 0;
9293 			segp->s_acl = NULL;
9294 			segp->s_acl_in = NULL;
9295 			rsmseglock_release(segp);
9296 
9297 			rsmexport_rm(segp);
9298 			rsmacl_free(acl, acl_len);
9299 			rsmpiacl_free(rsmpi_acl, acl_len);
9300 
9301 			rsmseglock_acquire(segp);
9302 			segp->s_state = RSM_STATE_BIND;
9303 			cv_broadcast(&segp->s_cv);
9304 			rsmseglock_release(segp);
9305 			DBG_PRINTF((category, RSM_ERR,
9306 			    "%s done: exp_qscd create failed = %d\n",
9307 			    function, ret));
9308 			return;
9309 		}
9310 
9311 		ret = adapter->rsmpi_ops->rsm_publish(
9312 		    segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9313 		    segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9314 
9315 		if (ret != RSM_SUCCESS) {
9316 			acl_len = segp->s_acl_len;
9317 			acl = segp->s_acl;
9318 			rsmpi_acl = segp->s_acl_in;
9319 			segp->s_acl_len = 0;
9320 			segp->s_acl = NULL;
9321 			segp->s_acl_in = NULL;
9322 			adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9323 			rsmseglock_release(segp);
9324 
9325 			rsmexport_rm(segp);
9326 			rsmacl_free(acl, acl_len);
9327 			rsmpiacl_free(rsmpi_acl, acl_len);
9328 
9329 			rsmseglock_acquire(segp);
9330 			segp->s_state = RSM_STATE_BIND;
9331 			cv_broadcast(&segp->s_cv);
9332 			rsmseglock_release(segp);
9333 			DBG_PRINTF((category, RSM_ERR,
9334 			    "%s done: exp_qscd publish failed = %d\n",
9335 			    function, ret));
9336 			return;
9337 		}
9338 
9339 		segp->s_state = RSM_STATE_EXPORT;
9340 		cv_broadcast(&segp->s_cv);
9341 		rsmseglock_release(segp);
9342 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9343 		    function));
9344 		return;
9345 	}
9346 
9347 	rsmseglock_release(segp);
9348 
9349 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9350 }
9351 
9352 static void
9353 rsm_quiesce_imp_seg(rsmresource_t *resp)
9354 {
9355 	rsmseg_t	*segp = (rsmseg_t *)resp;
9356 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9357 	DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9358 
9359 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9360 	    "%s enter: key=%u\n", function, segp->s_key));
9361 
9362 	rsmseglock_acquire(segp);
9363 	segp->s_flags |= RSM_DR_INPROGRESS;
9364 
9365 	while (segp->s_rdmacnt != 0) {
9366 		/* wait for the RDMA to complete */
9367 		cv_wait(&segp->s_cv, &segp->s_lock);
9368 	}
9369 
9370 	rsmseglock_release(segp);
9371 
9372 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9373 
9374 }
9375 
9376 static void
9377 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9378 {
9379 	rsmseg_t	*segp = (rsmseg_t *)resp;
9380 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9381 	DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9382 
9383 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9384 	    "%s enter: key=%u\n", function, segp->s_key));
9385 
9386 	rsmseglock_acquire(segp);
9387 
9388 	segp->s_flags &= ~RSM_DR_INPROGRESS;
9389 	/* wake up any waiting putv/getv ops */
9390 	cv_broadcast(&segp->s_cv);
9391 
9392 	rsmseglock_release(segp);
9393 
9394 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9395 
9396 
9397 }
9398 
9399 static void
9400 rsm_process_exp_seg(rsmresource_t *resp, int event)
9401 {
9402 	if (event == RSM_DR_QUIESCE)
9403 		rsm_quiesce_exp_seg(resp);
9404 	else /* UNQUIESCE */
9405 		rsm_unquiesce_exp_seg(resp);
9406 }
9407 
9408 static void
9409 rsm_process_imp_seg(rsmresource_t *resp, int event)
9410 {
9411 	if (event == RSM_DR_QUIESCE)
9412 		rsm_quiesce_imp_seg(resp);
9413 	else /* UNQUIESCE */
9414 		rsm_unquiesce_imp_seg(resp);
9415 }
9416 
9417 static void
9418 rsm_dr_process_local_segments(int event)
9419 {
9420 
9421 	int i, j;
9422 	rsmresource_blk_t	*blk;
9423 	rsmresource_t		*p;
9424 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9425 
9426 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9427 	    "rsm_dr_process_local_segments enter\n"));
9428 
9429 	/* iterate through the resource structure */
9430 
9431 	rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9432 
9433 	for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9434 		blk = rsm_resource.rsmrc_root[i];
9435 		if (blk != NULL) {
9436 			for (j = 0; j < RSMRC_BLKSZ; j++) {
9437 				p = blk->rsmrcblk_blks[j];
9438 				if ((p != NULL) && (p != RSMRC_RESERVED)) {
9439 					/* valid resource */
9440 					if (p->rsmrc_type ==
9441 					    RSM_RESOURCE_EXPORT_SEGMENT)
9442 						rsm_process_exp_seg(p, event);
9443 					else if (p->rsmrc_type ==
9444 					    RSM_RESOURCE_IMPORT_SEGMENT)
9445 						rsm_process_imp_seg(p, event);
9446 				}
9447 			}
9448 		}
9449 	}
9450 
9451 	rw_exit(&rsm_resource.rsmrc_lock);
9452 
9453 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9454 	    "rsm_dr_process_local_segments done\n"));
9455 }
9456 
9457 /* *************** DR callback functions ************ */
9458 static void
9459 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9460 {
9461 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9462 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9463 	    "rsm_dr_callback_post_add is a no-op\n"));
9464 	/* Noop */
9465 }
9466 
9467 static int
9468 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9469 {
9470 	int	recheck_state = 0;
9471 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9472 
9473 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9474 	    "rsm_dr_callback_pre_del enter\n"));
9475 
9476 	mutex_enter(&rsm_drv_data.drv_lock);
9477 
9478 	do {
9479 		recheck_state = 0;
9480 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9481 		    "rsm_dr_callback_pre_del:state=%d\n",
9482 		    rsm_drv_data.drv_state));
9483 
9484 		switch (rsm_drv_data.drv_state) {
9485 		case RSM_DRV_NEW:
9486 			/*
9487 			 * The state should usually never be RSM_DRV_NEW
9488 			 * since in this state the callbacks have not yet
9489 			 * been registered. So, ASSERT.
9490 			 */
9491 			ASSERT(0);
9492 			return (0);
9493 		case RSM_DRV_REG_PROCESSING:
9494 			/*
9495 			 * The driver is in the process of registering
9496 			 * with the DR framework. So, wait till the
9497 			 * registration process is complete.
9498 			 */
9499 			recheck_state = 1;
9500 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9501 			break;
9502 		case RSM_DRV_UNREG_PROCESSING:
9503 			/*
9504 			 * If the state is RSM_DRV_UNREG_PROCESSING, the
9505 			 * module is in the process of detaching and
9506 			 * unregistering the callbacks from the DR
9507 			 * framework. So, simply return.
9508 			 */
9509 			mutex_exit(&rsm_drv_data.drv_lock);
9510 			DBG_PRINTF((category, RSM_DEBUG,
9511 			    "rsm_dr_callback_pre_del:"
9512 			    "pre-del on NEW/UNREG\n"));
9513 			return (0);
9514 		case RSM_DRV_OK:
9515 			rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9516 			break;
9517 		case RSM_DRV_PREDEL_STARTED:
9518 			/* FALLTHRU */
9519 		case RSM_DRV_PREDEL_COMPLETED:
9520 			/* FALLTHRU */
9521 		case RSM_DRV_POSTDEL_IN_PROGRESS:
9522 			recheck_state = 1;
9523 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9524 			break;
9525 		case RSM_DRV_DR_IN_PROGRESS:
9526 			rsm_drv_data.drv_memdel_cnt++;
9527 			mutex_exit(&rsm_drv_data.drv_lock);
9528 			DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9529 			    "rsm_dr_callback_pre_del done\n"));
9530 			return (0);
9531 			/* break; */
9532 		default:
9533 			ASSERT(0);
9534 			break;
9535 		}
9536 
9537 	} while (recheck_state);
9538 
9539 	rsm_drv_data.drv_memdel_cnt++;
9540 
9541 	mutex_exit(&rsm_drv_data.drv_lock);
9542 
9543 	/* Do all the quiescing stuff here */
9544 	DBG_PRINTF((category, RSM_DEBUG,
9545 	    "rsm_dr_callback_pre_del: quiesce things now\n"));
9546 
9547 	rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9548 
9549 	/*
9550 	 * now that all local segments have been quiesced lets inform
9551 	 * the importers
9552 	 */
9553 	rsm_send_suspend();
9554 
9555 	/*
9556 	 * In response to the suspend message the remote node(s) will process
9557 	 * the segments and send a suspend_complete message. Till all
9558 	 * the nodes send the suspend_complete message we wait in the
9559 	 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9560 	 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9561 	 */
9562 	mutex_enter(&rsm_drv_data.drv_lock);
9563 
9564 	while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9565 		cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9566 	}
9567 
9568 	ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9569 
9570 	rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9571 	cv_broadcast(&rsm_drv_data.drv_cv);
9572 
9573 	mutex_exit(&rsm_drv_data.drv_lock);
9574 
9575 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9576 	    "rsm_dr_callback_pre_del done\n"));
9577 
9578 	return (0);
9579 }
9580 
9581 static void
9582 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9583 {
9584 	int	recheck_state = 0;
9585 	DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9586 
9587 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9588 	    "rsm_dr_callback_post_del enter\n"));
9589 
9590 	mutex_enter(&rsm_drv_data.drv_lock);
9591 
9592 	do {
9593 		recheck_state = 0;
9594 		DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9595 		    "rsm_dr_callback_post_del:state=%d\n",
9596 		    rsm_drv_data.drv_state));
9597 
9598 		switch (rsm_drv_data.drv_state) {
9599 		case RSM_DRV_NEW:
9600 			/*
9601 			 * The driver state cannot not be RSM_DRV_NEW
9602 			 * since in this state the callbacks have not
9603 			 * yet been registered.
9604 			 */
9605 			ASSERT(0);
9606 			return;
9607 		case RSM_DRV_REG_PROCESSING:
9608 			/*
9609 			 * The driver is in the process of registering with
9610 			 * the DR framework. Wait till the registration is
9611 			 * complete.
9612 			 */
9613 			recheck_state = 1;
9614 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9615 			break;
9616 		case RSM_DRV_UNREG_PROCESSING:
9617 			/*
9618 			 * RSM_DRV_UNREG_PROCESSING state means the module
9619 			 * is detaching and unregistering the callbacks
9620 			 * from the DR framework. So simply return.
9621 			 */
9622 			/* FALLTHRU */
9623 		case RSM_DRV_OK:
9624 			/*
9625 			 * RSM_DRV_OK means we missed the pre-del
9626 			 * corresponding to this post-del coz we had not
9627 			 * registered yet, so simply return.
9628 			 */
9629 			mutex_exit(&rsm_drv_data.drv_lock);
9630 			DBG_PRINTF((category, RSM_DEBUG,
9631 			    "rsm_dr_callback_post_del:"
9632 			    "post-del on OK/UNREG\n"));
9633 			return;
9634 			/* break; */
9635 		case RSM_DRV_PREDEL_STARTED:
9636 			/* FALLTHRU */
9637 		case RSM_DRV_PREDEL_COMPLETED:
9638 			/* FALLTHRU */
9639 		case RSM_DRV_POSTDEL_IN_PROGRESS:
9640 			recheck_state = 1;
9641 			cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9642 			break;
9643 		case RSM_DRV_DR_IN_PROGRESS:
9644 			rsm_drv_data.drv_memdel_cnt--;
9645 			if (rsm_drv_data.drv_memdel_cnt > 0) {
9646 				mutex_exit(&rsm_drv_data.drv_lock);
9647 				DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9648 				    "rsm_dr_callback_post_del done:\n"));
9649 				return;
9650 			}
9651 			rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9652 			break;
9653 		default:
9654 			ASSERT(0);
9655 			return;
9656 			/* break; */
9657 		}
9658 	} while (recheck_state);
9659 
9660 	mutex_exit(&rsm_drv_data.drv_lock);
9661 
9662 	/* Do all the unquiescing stuff here */
9663 	DBG_PRINTF((category, RSM_DEBUG,
9664 	    "rsm_dr_callback_post_del: unquiesce things now\n"));
9665 
9666 	rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9667 
9668 	/*
9669 	 * now that all local segments have been unquiesced lets inform
9670 	 * the importers
9671 	 */
9672 	rsm_send_resume();
9673 
9674 	mutex_enter(&rsm_drv_data.drv_lock);
9675 
9676 	rsm_drv_data.drv_state = RSM_DRV_OK;
9677 
9678 	cv_broadcast(&rsm_drv_data.drv_cv);
9679 
9680 	mutex_exit(&rsm_drv_data.drv_lock);
9681 
9682 	DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9683 	    "rsm_dr_callback_post_del done\n"));
9684 
9685 	return;
9686 
9687 }
9688