xref: /titanic_52/usr/src/uts/common/avs/ns/dsw/dsw_dev.c (revision ad9a7bd3532cf0ef931ded51a5ffe5d0496aad88)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/time.h>
28 #include <sys/ksynch.h>
29 #include <sys/kmem.h>
30 #include <sys/errno.h>
31 #include <sys/cmn_err.h>
32 #include <sys/debug.h>
33 #include <sys/ddi.h>
34 #include <sys/nsc_thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/unistat/spcs_s.h>
37 #include <sys/unistat/spcs_errors.h>
38 
39 #include <sys/unistat/spcs_s_k.h>
40 #include <sys/nsctl/nsctl.h>
41 #include "dsw.h"
42 #include "dsw_dev.h"
43 #include "../rdc/rdc_update.h"
44 #include <sys/nskernd.h>
45 
46 #include <sys/sdt.h>		/* dtrace is S10 or later */
47 
48 #ifdef DS_DDICT
49 #include "../contract.h"
50 #endif
51 
52 /*
53  * Instant Image
54  *
55  * This file contains the core implementation of II.
56  *
57  * II is implemented as a simple filter module that pushes itself between
58  * user (SV, STE, etc.) and SDBC or NET.
59  *
60  */
61 
62 
63 #define	REMOTE_VOL(s, ip)	(((s) && ((ip->bi_flags)&DSW_SHDEXPORT)) || \
64 				    (!(s)&&((ip->bi_flags)&DSW_SHDIMPORT)))
65 
66 #define	total_ref(ip)	((ip->bi_shdref + ip->bi_shdrref + ip->bi_bmpref) + \
67 			    (NSHADOWS(ip) ? 0 : ip->bi_mstref + ip->bi_mstrref))
68 
69 
70 #define	II_TAIL_COPY(d, s, m, t)	bcopy(&(s.m), &(d.m), \
71 					sizeof (d) - (uintptr_t)&((t *)0)->m)
72 extern dev_info_t *ii_dip;
73 
74 #define	II_LINK_CLUSTER(ip, cluster) \
75 	_ii_ll_add(ip, &_ii_cluster_mutex, &_ii_cluster_top, cluster, \
76 	    &ip->bi_cluster)
77 #define	II_UNLINK_CLUSTER(ip) \
78 	_ii_ll_remove(ip, &_ii_cluster_mutex, &_ii_cluster_top, &ip->bi_cluster)
79 
80 #define	II_LINK_GROUP(ip, group) \
81 	_ii_ll_add(ip, &_ii_group_mutex, &_ii_group_top, group, &ip->bi_group)
82 #define	II_UNLINK_GROUP(ip) \
83 	_ii_ll_remove(ip, &_ii_group_mutex, &_ii_group_top, &ip->bi_group)
84 
85 _ii_info_t *_ii_info_top;
86 _ii_info_t *_ii_mst_top = 0;
87 _ii_overflow_t	*_ii_overflow_top;
88 _ii_lsthead_t *_ii_cluster_top;
89 _ii_lsthead_t *_ii_group_top;
90 
91 int	ii_debug;		/* level of cmn_err noise */
92 int	ii_bitmap;		/* bitmap operations switch */
93 uint_t	ii_header = 16;		/* Undocumented tunable (with adb!), start */
94 				/* of area cleared in volume when a dependent */
95 				/* shadow is disabled. */
96 				/* max # of chunks in copy loop before delay */
97 int	ii_throttle_unit = MIN_THROTTLE_UNIT;
98 				/* length of delay during update loop */
99 int	ii_throttle_delay = MIN_THROTTLE_DELAY;
100 int	ii_copy_direct = 1;
101 int	ii_nconcopy = 10;	/* default value when starting with no cache */
102 kmutex_t _ii_cluster_mutex;
103 kmutex_t _ii_group_mutex;
104 
105 static int _ii_shutting_down = 0;
106 static nsc_io_t *_ii_io, *_ii_ior;
107 static nsc_mem_t *_ii_local_mem;
108 static nsc_def_t _ii_fd_def[], _ii_io_def[], _ii_ior_def[];
109 static kmutex_t	_ii_info_mutex;
110 static kmutex_t	_ii_overflow_mutex;
111 static kmutex_t _ii_config_mutex;
112 static _ii_bmp_ops_t alloc_buf_bmp, kmem_buf_bmp;
113 static nsc_svc_t *ii_volume_update;	/* IIVolumeUpdate token */
114 static nsc_svc_t *ii_report_luns;	/* IIReportLuns token */
115 static nsc_svc_t *ii_get_initiators;	/* IIGetInitiators token */
116 static ksema_t	_ii_concopy_sema;
117 static int	_ii_concopy_init = 0;
118 static int	_ii_instance = 0;
119 
120 void _ii_deinit_dev();
121 
122 static void _ii_info_free(_ii_info_t *ip);
123 static void _ii_info_freeshd(_ii_info_t *ip);
124 static void ii_sibling_free(_ii_info_t *ip);
125 ii_header_t *_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp);
126 int _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip,
127     nsc_buf_t *tmp);
128 static void _ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip,
129     nsc_buf_t *tmp);
130 static int _ii_copyvol(_ii_info_t *, int, int, spcs_s_info_t, int);
131 static void _ii_stopvol(_ii_info_t *ip);
132 static int _ii_stopcopy(_ii_info_t *ip);
133 static _ii_info_t *_ii_find_set(char *volume);
134 static _ii_info_t *_ii_find_vol(char *, int);
135 static _ii_overflow_t *_ii_find_overflow(char *volume);
136 static void _ii_ioctl_done(_ii_info_t *ip);
137 static void _ii_lock_chunk(_ii_info_t *ip, chunkid_t);
138 static void _ii_unlock_chunks(_ii_info_t *ip, chunkid_t, int);
139 void _ii_error(_ii_info_t *ip, int error_type);
140 static nsc_buf_t *_ii_alloc_handle(void (*d_cb)(), void (*r_cb)(),
141     void (*w_cb)(), ii_fd_t *bfd);
142 static int _ii_free_handle(ii_buf_t *h, ii_fd_t *bfd);
143 extern nsc_size_t ii_btsize(nsc_size_t);
144 extern int ii_tinit(_ii_info_t *);
145 extern chunkid_t ii_tsearch(_ii_info_t *, chunkid_t);
146 extern void ii_tdelete(_ii_info_t *, chunkid_t);
147 extern void ii_reclaim_overflow(_ii_info_t *);
148 static void ii_overflow_free(_ii_info_t *ip, int disable);
149 static int ii_overflow_attach(_ii_info_t *, char *, int);
150 int _ii_nsc_io(_ii_info_t *, int, nsc_fd_t *, int, nsc_off_t, unsigned char *,
151 	nsc_size_t);
152 static nsc_path_t *_ii_register_path(char *path, int type, nsc_io_t *io);
153 static int _ii_unregister_path(nsc_path_t *sp, int flag, char *type);
154 static int _ii_reserve_begin(_ii_info_t *ip);
155 static int _ii_wait_for_it(_ii_info_t *ip);
156 static void _ii_reserve_end(_ii_info_t *ip);
157 static kstat_t *_ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op);
158 static int _ii_ll_add(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char *,
159     char **);
160 static int _ii_ll_remove(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char **);
161 #define	_ii_unlock_chunk(ip, chunk)	_ii_unlock_chunks(ip, chunk, 1)
162 extern const int dsw_major_rev;
163 extern const int dsw_minor_rev;
164 extern const int dsw_micro_rev;
165 extern const int dsw_baseline_rev;
166 
167 /*
168  * These constants are used by ii_overflow_free() to indicate how the
169  * reclamation should take place.
170  *	NO_RECLAIM: just detach the overflow from the set; do not
171  *		attempt to reclaim chunks, do not decrement the
172  *		used-by count
173  *	RECLAIM: reclaim all chunks before decrementing the used-by count
174  *	INIT_OVR: decrement the used-by count only; do not reclaim chunks
175  */
176 
177 #define	NO_RECLAIM 0
178 #define	RECLAIM 1
179 #define	INIT_OVR 2
180 
181 struct	copy_args {			/* arguments passed to copy process */
182 	_ii_info_t *ip;
183 	int flag;
184 	int rtype;
185 	int wait;
186 	spcs_s_info_t kstatus;
187 	int rc;
188 };
189 
190 /* set-specific kstats info */
191 ii_kstat_set_t ii_kstat_set = {
192 	{ DSW_SKSTAT_SIZE, KSTAT_DATA_ULONG },
193 	{ DSW_SKSTAT_MTIME, KSTAT_DATA_ULONG },
194 	{ DSW_SKSTAT_FLAGS, KSTAT_DATA_ULONG },
195 	{ DSW_SKSTAT_THROTTLE_UNIT, KSTAT_DATA_ULONG },
196 	{ DSW_SKSTAT_THROTTLE_DELAY, KSTAT_DATA_ULONG },
197 	{ DSW_SKSTAT_SHDCHKS, KSTAT_DATA_ULONG },
198 	{ DSW_SKSTAT_SHDCHKUSED, KSTAT_DATA_ULONG },
199 	{ DSW_SKSTAT_SHDBITS, KSTAT_DATA_ULONG },
200 	{ DSW_SKSTAT_COPYBITS, KSTAT_DATA_ULONG },
201 	{ DSW_SKSTAT_MSTA, KSTAT_DATA_CHAR },
202 	{ DSW_SKSTAT_MSTB, KSTAT_DATA_CHAR },
203 	{ DSW_SKSTAT_MSTC, KSTAT_DATA_CHAR },
204 	{ DSW_SKSTAT_MSTD, KSTAT_DATA_CHAR },
205 	{ DSW_SKSTAT_SETA, KSTAT_DATA_CHAR },
206 	{ DSW_SKSTAT_SETB, KSTAT_DATA_CHAR },
207 	{ DSW_SKSTAT_SETC, KSTAT_DATA_CHAR },
208 	{ DSW_SKSTAT_SETD, KSTAT_DATA_CHAR },
209 	{ DSW_SKSTAT_BMPA, KSTAT_DATA_CHAR },
210 	{ DSW_SKSTAT_BMPB, KSTAT_DATA_CHAR },
211 	{ DSW_SKSTAT_BMPC, KSTAT_DATA_CHAR },
212 	{ DSW_SKSTAT_BMPD, KSTAT_DATA_CHAR },
213 	{ DSW_SKSTAT_OVRA, KSTAT_DATA_CHAR },
214 	{ DSW_SKSTAT_OVRB, KSTAT_DATA_CHAR },
215 	{ DSW_SKSTAT_OVRC, KSTAT_DATA_CHAR },
216 	{ DSW_SKSTAT_OVRD, KSTAT_DATA_CHAR },
217 	{ DSW_SKSTAT_MSTIO, KSTAT_DATA_CHAR },
218 	{ DSW_SKSTAT_SHDIO, KSTAT_DATA_CHAR },
219 	{ DSW_SKSTAT_BMPIO, KSTAT_DATA_CHAR },
220 	{ DSW_SKSTAT_OVRIO, KSTAT_DATA_CHAR },
221 };
222 
223 /*
224  * _ii_init_dev
225  *	Initialise the shadow driver
226  *
227  */
228 
229 int
230 _ii_init_dev()
231 {
232 	_ii_io = nsc_register_io("ii", NSC_II_ID|NSC_REFCNT|NSC_FILTER,
233 	    _ii_io_def);
234 	if (_ii_io == NULL)
235 		cmn_err(CE_WARN, "!ii: nsc_register_io failed.");
236 
237 	_ii_ior = nsc_register_io("ii-raw", NSC_IIR_ID|NSC_REFCNT|NSC_FILTER,
238 	    _ii_ior_def);
239 	if (_ii_ior == NULL)
240 		cmn_err(CE_WARN, "!ii: nsc_register_io r failed.");
241 
242 	_ii_local_mem = nsc_register_mem("ii:kmem", NSC_MEM_LOCAL, 0);
243 	if (_ii_local_mem == NULL)
244 		cmn_err(CE_WARN, "!ii: nsc_register_mem failed.");
245 
246 
247 	if (!_ii_io || !_ii_ior || !_ii_local_mem) {
248 		_ii_deinit_dev();
249 		return (ENOMEM);
250 	}
251 
252 	mutex_init(&_ii_info_mutex, NULL, MUTEX_DRIVER, NULL);
253 	mutex_init(&_ii_overflow_mutex, NULL, MUTEX_DRIVER, NULL);
254 	mutex_init(&_ii_config_mutex, NULL, MUTEX_DRIVER, NULL);
255 	mutex_init(&_ii_cluster_mutex, NULL, MUTEX_DRIVER, NULL);
256 	mutex_init(&_ii_group_mutex, NULL, MUTEX_DRIVER, NULL);
257 
258 	ii_volume_update = nsc_register_svc("RDCVolumeUpdated", 0);
259 	ii_report_luns = nsc_register_svc("IIReportLuns", 0);
260 	ii_get_initiators = nsc_register_svc("IIGetInitiators", 0);
261 
262 	if (!ii_volume_update || !ii_report_luns || !ii_get_initiators) {
263 		_ii_deinit_dev();
264 		return (ENOMEM);
265 	}
266 
267 	return (0);
268 }
269 
270 
271 /*
272  * _ii_deinit_dev
273  *	De-initialise the shadow driver
274  *
275  */
276 
277 void
278 _ii_deinit_dev()
279 {
280 
281 	if (_ii_io)
282 		(void) nsc_unregister_io(_ii_io, 0);
283 
284 	if (_ii_ior)
285 		(void) nsc_unregister_io(_ii_ior, 0);
286 
287 	if (_ii_local_mem)
288 		(void) nsc_unregister_mem(_ii_local_mem);
289 
290 	if (ii_volume_update)
291 		(void) nsc_unregister_svc(ii_volume_update);
292 
293 	if (ii_report_luns)
294 		(void) nsc_unregister_svc(ii_report_luns);
295 
296 	if (ii_get_initiators)
297 		(void) nsc_unregister_svc(ii_get_initiators);
298 
299 	mutex_destroy(&_ii_info_mutex);
300 	mutex_destroy(&_ii_overflow_mutex);
301 	mutex_destroy(&_ii_config_mutex);
302 	mutex_destroy(&_ii_cluster_mutex);
303 	mutex_destroy(&_ii_group_mutex);
304 	if (_ii_concopy_init)
305 		sema_destroy(&_ii_concopy_sema);
306 	_ii_concopy_init = 0;
307 
308 }
309 
310 static char *
311 ii_pathname(nsc_fd_t *fd)
312 {
313 	char *rc;
314 
315 	if (fd == NULL || (rc = nsc_pathname(fd)) == NULL)
316 		return ("");
317 	else
318 		return (rc);
319 }
320 
321 
322 /*
323  * _ii_rlse_d
324  *	Internal mechanics of _ii_rlse_devs().  Takes care of
325  *	resetting the ownership information as required.
326  */
327 
328 static void
329 _ii_rlse_d(ip, mst, raw)
330 _ii_info_t *ip;
331 int mst, raw;
332 {
333 	_ii_info_dev_t *cip;
334 	_ii_info_dev_t *rip;
335 
336 	rip = mst ? (ip->bi_mstrdev) : &(ip->bi_shdrdev);
337 	cip = mst ? (ip->bi_mstdev) : &(ip->bi_shddev);
338 
339 	DTRACE_PROBE2(_ii_rlse_d_type,
340 			_ii_info_dev_t *, rip,
341 			_ii_info_dev_t *, cip);
342 
343 
344 	if (RSRV(cip)) {
345 		if (raw) {
346 			ASSERT(cip->bi_orsrv > 0);
347 			cip->bi_orsrv--;
348 		} else {
349 			ASSERT(cip->bi_rsrv > 0);
350 			cip->bi_rsrv--;
351 		}
352 
353 		if (cip->bi_rsrv > 0) {
354 			nsc_set_owner(cip->bi_fd, cip->bi_iodev);
355 		} else if (cip->bi_orsrv > 0) {
356 			nsc_set_owner(cip->bi_fd, rip->bi_iodev);
357 		} else {
358 			nsc_set_owner(cip->bi_fd, NULL);
359 		}
360 
361 		if (!RSRV(cip)) {
362 			nsc_release(cip->bi_fd);
363 		}
364 	} else {
365 		if (raw) {
366 			ASSERT(rip->bi_rsrv > 0);
367 			rip->bi_rsrv--;
368 		} else {
369 			ASSERT(rip->bi_orsrv > 0);
370 			rip->bi_orsrv--;
371 		}
372 
373 		if (rip->bi_rsrv > 0) {
374 			nsc_set_owner(rip->bi_fd, rip->bi_iodev);
375 		} else if (rip->bi_orsrv > 0) {
376 			nsc_set_owner(rip->bi_fd, cip->bi_iodev);
377 		} else {
378 			nsc_set_owner(rip->bi_fd, NULL);
379 		}
380 
381 		if (!RSRV(rip)) {
382 			rip->bi_flag = 0;
383 			nsc_release(rip->bi_fd);
384 			cv_broadcast(&ip->bi_releasecv);
385 		}
386 	}
387 
388 }
389 
390 
391 /*
392  * _ii_rlse_devs
393  *	Release named underlying devices.
394  *
395  *	NOTE: the 'devs' argument must be the same as that passed to
396  *	the preceding _ii_rsrv_devs call.
397  */
398 
399 void
400 _ii_rlse_devs(ip, devs)
401 _ii_info_t *ip;
402 int devs;
403 {
404 
405 	ASSERT(!(devs & (MST|SHD)));
406 
407 	ASSERT(ip->bi_head != (_ii_info_t *)0xdeadbeef);
408 	if (!ip) {
409 		cmn_err(CE_WARN, "!ii: _ii_rlse_devs null ip");
410 		return;
411 	}
412 
413 	mutex_enter(&ip->bi_rsrvmutex);
414 
415 	DTRACE_PROBE(_ii_rlse_devs_mutex);
416 
417 	if ((devs&(MST|MSTR)) != 0 && (ip->bi_flags&DSW_SHDIMPORT) == 0) {
418 		if (NSHADOWS(ip) && ip != ip->bi_master)
419 			_ii_rlse_devs(ip->bi_master, devs&(MST|MSTR));
420 		else
421 			_ii_rlse_d(ip, 1, (devs&MSTR));
422 	}
423 
424 	if ((devs&(SHD|SHDR)) != 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0) {
425 		_ii_rlse_d(ip, 0, (devs&SHDR));
426 	}
427 
428 	if ((devs&BMP) != 0 && ip->bi_bmpfd) {
429 		if (--(ip->bi_bmprsrv) == 0)
430 			nsc_release(ip->bi_bmpfd);
431 	}
432 
433 	ASSERT(ip->bi_bmprsrv >= 0);
434 	ASSERT(ip->bi_shdrsrv >= 0);
435 	ASSERT(ip->bi_shdrrsrv >= 0);
436 	mutex_exit(&ip->bi_rsrvmutex);
437 
438 }
439 
440 
441 /*
442  * _ii_rsrv_d
443  *	Reserve device flagged, unless its companion is already reserved,
444  *	in that case increase the reserve on the companion.
445  */
446 
447 static int
448 _ii_rsrv_d(int raw, _ii_info_dev_t *rid, _ii_info_dev_t *cid, int flag,
449     _ii_info_t *ip)
450 {
451 	_ii_info_dev_t *p = NULL;
452 	int other = 0;
453 	int rc;
454 
455 	/*
456 	 * If user wants to do a cache reserve and it's already
457 	 * raw reserved, we need to do a real nsc_reserve, so wait
458 	 * until the release has been done.
459 	 */
460 	if (RSRV(rid) && (flag == II_EXTERNAL) &&
461 	    (raw == 0) && (rid->bi_flag != II_EXTERNAL)) {
462 		ip->bi_release++;
463 		while (RSRV(rid)) {
464 			DTRACE_PROBE1(_ii_rsrv_d_wait, _ii_info_dev_t *, rid);
465 			cv_wait(&ip->bi_releasecv, &ip->bi_rsrvmutex);
466 			DTRACE_PROBE1(_ii_rsrv_d_resume, _ii_info_dev_t *, rid);
467 		}
468 		ip->bi_release--;
469 	}
470 
471 	if (RSRV(rid)) {
472 		p = rid;
473 		if (!raw) {
474 			other = 1;
475 		}
476 	} else if (RSRV(cid)) {
477 		p = cid;
478 		if (raw) {
479 			other = 1;
480 		}
481 	}
482 
483 	if (p) {
484 		if (other) {
485 			p->bi_orsrv++;
486 		} else {
487 			p->bi_rsrv++;
488 		}
489 
490 		if (p->bi_iodev) {
491 			nsc_set_owner(p->bi_fd, p->bi_iodev);
492 		}
493 
494 		return (0);
495 	}
496 	p = raw ? rid : cid;
497 
498 	if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) {
499 		if (p->bi_iodev) {
500 			nsc_set_owner(p->bi_fd, p->bi_iodev);
501 		}
502 		p->bi_rsrv++;
503 		if (raw)
504 			p->bi_flag = flag;
505 	}
506 
507 	return (rc);
508 }
509 
510 /*
511  * _ii_rsrv_devs
512  *	Reserve named underlying devices.
513  *
514  */
515 
516 int
517 _ii_rsrv_devs(_ii_info_t *ip, int devs, int flag)
518 {
519 	int rc = 0;
520 	int got = 0;
521 
522 	ASSERT(!(devs & (MST|SHD)));
523 
524 	if (!ip) {
525 		cmn_err(CE_WARN, "!ii: _ii_rsrv_devs null ip");
526 		return (EINVAL);
527 	}
528 
529 	mutex_enter(&ip->bi_rsrvmutex);
530 
531 	DTRACE_PROBE(_ii_rsrv_devs_mutex);
532 
533 	if (rc == 0 && (devs&(MST|MSTR)) != 0 &&
534 	    (ip->bi_flags&DSW_SHDIMPORT) == 0) {
535 		DTRACE_PROBE(_ii_rsrv_devs_master);
536 		if (NSHADOWS(ip) && ip != ip->bi_master) {
537 			if ((rc = _ii_rsrv_devs(ip->bi_master, devs&(MST|MSTR),
538 			    flag)) != 0) {
539 				cmn_err(CE_WARN,
540 				    "!ii: nsc_reserve multi-master failed");
541 			} else {
542 				got |= devs&(MST|MSTR);
543 			}
544 		} else {
545 			if ((rc = _ii_rsrv_d((devs&MSTR) != 0, ip->bi_mstrdev,
546 			    ip->bi_mstdev, flag, ip)) != 0) {
547 				cmn_err(CE_WARN,
548 				    "!ii: nsc_reserve master failed %d", rc);
549 			} else {
550 				got |= (devs&(MST|MSTR));
551 			}
552 		}
553 	}
554 
555 	if (rc == 0 && (devs&(SHD|SHDR)) != 0 &&
556 	    (ip->bi_flags&DSW_SHDEXPORT) == 0) {
557 		DTRACE_PROBE(_ii_rsrv_devs_shadow);
558 		if ((rc = _ii_rsrv_d((devs&SHDR) != 0, &ip->bi_shdrdev,
559 		    &ip->bi_shddev, flag, ip)) != 0) {
560 			cmn_err(CE_WARN,
561 			    "!ii: nsc_reserve shadow failed %d", rc);
562 		} else {
563 			got |= (devs&(SHD|SHDR));
564 		}
565 	}
566 
567 	if (rc == 0 && (devs&BMP) != 0 && ip->bi_bmpfd) {
568 		DTRACE_PROBE(_ii_rsrv_devs_bitmap);
569 		if ((ip->bi_bmprsrv == 0) &&
570 		    (rc = nsc_reserve(ip->bi_bmpfd, 0)) != 0) {
571 			cmn_err(CE_WARN,
572 			    "!ii: nsc_reserve bitmap failed %d", rc);
573 		} else {
574 			(ip->bi_bmprsrv)++;
575 			got |= BMP;
576 		}
577 	}
578 	mutex_exit(&ip->bi_rsrvmutex);
579 	if (rc != 0 && got != 0)
580 		_ii_rlse_devs(ip, got);
581 
582 	return (rc);
583 }
584 
585 static int
586 _ii_reserve_begin(_ii_info_t *ip)
587 {
588 	int rc;
589 
590 	mutex_enter(&ip->bi_rlsemutex);
591 	if ((rc = _ii_wait_for_it(ip)) == 0) {
592 		++ip->bi_rsrvcnt;
593 	}
594 	mutex_exit(&ip->bi_rlsemutex);
595 
596 	return (rc);
597 }
598 
599 static int
600 _ii_wait_for_it(_ii_info_t *ip)
601 {
602 	int nosig;
603 
604 	nosig = 1;
605 	while (ip->bi_rsrvcnt > 0) {
606 		nosig = cv_wait_sig(&ip->bi_reservecv, &ip->bi_rlsemutex);
607 		if (!nosig) {
608 			break;
609 		}
610 	}
611 
612 	return (nosig? 0 : EINTR);
613 }
614 
615 static void
616 _ii_reserve_end(_ii_info_t *ip)
617 {
618 	mutex_enter(&ip->bi_rlsemutex);
619 	if (ip->bi_rsrvcnt <= 0) {
620 		mutex_exit(&ip->bi_rlsemutex);
621 		return;
622 	}
623 	--ip->bi_rsrvcnt;
624 	mutex_exit(&ip->bi_rlsemutex);
625 	cv_broadcast(&ip->bi_reservecv);
626 
627 }
628 
629 static int
630 ii_fill_copy_bmp(_ii_info_t *ip)
631 {
632 	int rc;
633 	chunkid_t max_chunk, chunk_num;
634 
635 	if ((rc = II_FILL_COPY_BMP(ip)) != 0)
636 		return (rc);
637 	/*
638 	 * make certain that the last bits of the last byte of the bitmap
639 	 * aren't filled as they may be copied out to the user.
640 	 */
641 
642 	chunk_num = ip->bi_size / DSW_SIZE;
643 	if ((ip->bi_size % DSW_SIZE) != 0)
644 		++chunk_num;
645 
646 	max_chunk = chunk_num;
647 	if ((max_chunk & 0x7) != 0)
648 		max_chunk = (max_chunk + 7) & ~7;
649 
650 	DTRACE_PROBE2(_ii_fill_copy_bmp_chunks, chunkid_t, chunk_num,
651 	    chunkid_t, max_chunk);
652 
653 	for (; chunk_num < max_chunk; chunk_num++) {
654 		(void) II_CLR_COPY_BIT(ip, chunk_num);
655 	}
656 
657 	return (0);
658 }
659 
660 static int
661 ii_update_denied(_ii_info_t *ip, spcs_s_info_t kstatus,
662 				int direction, int all)
663 {
664 	rdc_update_t update;
665 	int size;
666 	unsigned char *bmp;
667 
668 	update.volume = direction == CV_SHD2MST ? ii_pathname(MSTFD(ip)) :
669 	    ip->bi_keyname;
670 	update.denied = 0;
671 	update.protocol = RDC_SVC_ONRETURN;
672 	update.size = size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
673 	update.status = kstatus;
674 	update.bitmap = bmp = kmem_alloc(update.size, KM_SLEEP);
675 	if (bmp == NULL) {
676 		spcs_s_add(kstatus, ENOMEM);
677 		return (1);
678 	}
679 
680 	DTRACE_PROBE2(_ii_update_denied, int, all, int, size);
681 
682 	if (all) {
683 		while (size-- > 0)
684 			*bmp++ = (unsigned char)0xff;
685 	} else {
686 		if (II_CHANGE_BMP(ip, update.bitmap) != 0) {
687 			/* failed to read bitmap */
688 			spcs_s_add(kstatus, EIO);
689 			update.denied = 1;
690 		}
691 	}
692 
693 	/* check that no user of volume objects */
694 	if (update.denied == 0) {
695 		(void) nsc_call_svc(ii_volume_update, (intptr_t)&update);
696 	}
697 	kmem_free(update.bitmap, FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)));
698 
699 	return (update.denied);
700 }
701 
702 static int
703 ii_need_same_size(_ii_info_t *ip)
704 {
705 	rdc_update_t update;
706 
707 	update.volume = ip->bi_keyname;
708 	update.denied = 0;
709 	update.protocol = RDC_SVC_VOL_ENABLED;
710 
711 	(void) nsc_call_svc(ii_volume_update, (intptr_t)&update);
712 
713 	return (update.denied);
714 }
715 
716 /*
717  * ii_volume:	check if vol is already known to Instant Image and return
718  *	volume type if it is.
719  */
720 
721 static int
722 ii_volume(char *vol, int locked)
723 {
724 	_ii_info_t *ip;
725 	_ii_overflow_t	*op;
726 	int rc = NONE;
727 
728 	/* scan overflow volume list */
729 	mutex_enter(&_ii_overflow_mutex);
730 
731 	DTRACE_PROBE(_ii_volume_mutex);
732 
733 	for (op = _ii_overflow_top; op; op = op->ii_next) {
734 		if (strcmp(vol, op->ii_volname) == 0)
735 			break;
736 	}
737 	mutex_exit(&_ii_overflow_mutex);
738 	if (op) {
739 		return (OVR);
740 	}
741 
742 	if (!locked) {
743 		mutex_enter(&_ii_info_mutex);
744 	}
745 
746 	DTRACE_PROBE(_ii_volume_mutex2);
747 
748 	for (ip = _ii_info_top; ip; ip = ip->bi_next) {
749 		if (strcmp(vol, ii_pathname(ip->bi_mstfd)) == 0) {
750 			rc = MST;
751 			break;
752 		}
753 		if (strcmp(vol, ip->bi_keyname)  == 0) {
754 			rc = SHD;
755 			break;
756 		}
757 		if (strcmp(vol, ii_pathname(ip->bi_bmpfd)) == 0) {
758 			rc = BMP;
759 			break;
760 		}
761 	}
762 	DTRACE_PROBE1(_ii_volume_data, int, rc);
763 
764 	if (!locked) {
765 		mutex_exit(&_ii_info_mutex);
766 	}
767 
768 	return (rc);
769 }
770 
771 /*
772  * ii_open_shadow: open shadow volume for both cached and raw access,
773  *	if the normal device open fails attempt a file open to allow
774  *	shadowing into a file.
775  */
776 
777 static int
778 ii_open_shadow(_ii_info_t *ip, char *shadow_vol)
779 {
780 	int rc = 0;
781 	int file_rc = 0;
782 
783 	ip->bi_shdfd = nsc_open(shadow_vol,
784 	    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
785 	    (blind_t)&(ip->bi_shddev), &rc);
786 	if (!ip->bi_shdfd) {
787 		ip->bi_shdfd = nsc_open(shadow_vol,
788 		    NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def,
789 		    (blind_t)&(ip->bi_shddev), &file_rc);
790 		file_rc = 1;
791 		if (!ip->bi_shdfd) {
792 			return (rc);
793 		}
794 		DTRACE_PROBE(_ii_open_shadow);
795 	}
796 	else
797 		DTRACE_PROBE(_ii_open_shadow);
798 
799 	if (file_rc == 0) {
800 		ip->bi_shdrfd = nsc_open(shadow_vol,
801 		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
802 		    (blind_t)&(ip->bi_shdrdev), &rc);
803 		DTRACE_PROBE(_ii_open_shadow);
804 	} else {
805 		ip->bi_shdrfd = nsc_open(shadow_vol,
806 		    NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def,
807 		    (blind_t)&(ip->bi_shdrdev), &rc);
808 		DTRACE_PROBE(_ii_open_shadow);
809 	}
810 
811 	if (!ip->bi_shdrfd) {
812 		(void) nsc_close(ip->bi_shdfd);
813 		DTRACE_PROBE(_ii_open_shadow);
814 		return (rc);
815 	}
816 
817 	return (0);
818 }
819 
820 static void
821 ii_register_shd(_ii_info_t *ip)
822 {
823 	ip->bi_shd_tok = _ii_register_path(ip->bi_keyname,
824 	    NSC_CACHE, _ii_io);
825 	ip->bi_shdr_tok = _ii_register_path(ip->bi_keyname,
826 	    NSC_DEVICE, _ii_ior);
827 
828 }
829 
830 static void
831 ii_register_mst(_ii_info_t *ip)
832 {
833 	ip->bi_mst_tok = _ii_register_path(ii_pathname(ip->bi_mstfd),
834 	    NSC_CACHE, _ii_io);
835 	ip->bi_mstr_tok = _ii_register_path(ii_pathname(ip->bi_mstrfd),
836 	    NSC_DEVICE, _ii_ior);
837 
838 }
839 
840 static int
841 ii_register_ok(_ii_info_t *ip)
842 {
843 	int rc;
844 	int sibling;
845 	int exported;
846 
847 	rc = 1;
848 	sibling = NSHADOWS(ip) && ip != ip->bi_head;
849 	exported = ip->bi_flags & DSW_SHDEXPORT;
850 
851 	if ((ip->bi_bmpfd && !ip->bi_bmp_tok) || (!exported && (
852 	    !ip->bi_shd_tok || !ip->bi_shdr_tok)))
853 		rc = 0;
854 	else if (!sibling && (!ip->bi_mst_tok || !ip->bi_mstr_tok))
855 		rc = 0;
856 
857 	return (rc);
858 }
859 
860 #ifndef DISABLE_KSTATS
861 
862 /*
863  * _ii_kstat_create
864  *	Create and install kstat_io data
865  *
866  * Calling/Exit State:
867  *	Returns 0 if kstats couldn't be created, otherwise it returns
868  *	a pointer to the created kstat_t.
869  */
870 
871 static kstat_t *
872 _ii_kstat_create(_ii_info_t *ip, char *type)
873 {
874 	kstat_t *result;
875 	char name[ IOSTAT_NAME_LEN ];
876 	int setnum;
877 	char *nptr;
878 	static int mstnum = 0;
879 	static int shdbmpnum = -1;
880 
881 	switch (*type) {
882 	case 'm':
883 		setnum = mstnum++;
884 		nptr = ip->bi_kstat_io.mstio;
885 		break;
886 	case 's':
887 		/* assumption: shadow kstats created before bitmap */
888 		setnum = ++shdbmpnum;
889 		nptr = ip->bi_kstat_io.shdio;
890 		break;
891 	case 'b':
892 		setnum = shdbmpnum;
893 		nptr = ip->bi_kstat_io.bmpio;
894 		break;
895 	default:
896 		cmn_err(CE_WARN, "!Unable to determine kstat type (%c)", *type);
897 		setnum = -1;
898 		break;
899 	}
900 	/*
901 	 * The name of the kstat, defined below, is designed to work
902 	 * with the 'iostat -x' command.  This command leaves only
903 	 * 9 characters for the name, and the kstats built in to Solaris
904 	 * all seem to be of the form <service><number>.  For that
905 	 * reason, we have chosen ii<type><number>, where <type> is
906 	 * m, s, b, or o (for master, shadow, bitmap, and overflow
907 	 * respectively), and the number is monotonically increasing from
908 	 * 0 for each time one of those <type>s are created.  Note that
909 	 * the shadow and bitmap are always created in pairs and so, for
910 	 * any given set, they will have the same <number>.
911 	 */
912 	(void) sprintf(name, "ii%c%d", *type, setnum);
913 	(void) strncpy(nptr, name, IOSTAT_NAME_LEN);
914 	result = kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0);
915 	if (result) {
916 		result->ks_private = ip;
917 		result->ks_lock = &ip->bi_kstat_io.statmutex;
918 		kstat_install(result);
919 	} else {
920 		cmn_err(CE_WARN, "!Unable to create %s kstats for set %s", type,
921 		    ip->bi_keyname);
922 	}
923 
924 	return (result);
925 }
926 
927 /*
928  * _ii_overflow_kstat_create
929  *	Create and install kstat_io data for an overflow volume
930  *
931  * Calling/Exit State:
932  *	Returns 0 if kstats couldn't be created, otherwise it returns
933  *	a pointer to the created kstat_t.
934  *
935  * See comments in _ii_kstat_create for additional information.
936  *
937  */
938 static kstat_t *
939 _ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op)
940 {
941 	kstat_t *result;
942 	char *nptr;
943 	char name [IOSTAT_NAME_LEN];
944 	static int ovrnum = 0;
945 	int setnum = ovrnum++;
946 
947 	nptr = ip->bi_kstat_io.ovrio;
948 
949 	(void) sprintf(name, "iio%d", setnum);
950 	(void) strncpy(nptr, name, IOSTAT_NAME_LEN);
951 
952 	mutex_init(&op->ii_kstat_mutex, NULL, MUTEX_DRIVER, NULL);
953 
954 	if ((result =
955 	    kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0))) {
956 		result->ks_private = ip;
957 		result->ks_lock = &op->ii_kstat_mutex;
958 		kstat_install(result);
959 	} else {
960 		mutex_destroy(&op->ii_kstat_mutex);
961 		cmn_err(CE_WARN, "!Unabled to create overflow kstat for set "
962 		    "%s", ip->bi_keyname);
963 	}
964 
965 	return (result);
966 }
967 
968 #endif
969 
970 static void
971 ii_str_kstat_copy(char *str, char *p1, char *p2, char *p3, char *p4)
972 {
973 	static int whinged = 0;
974 	char *part[ 4 ];
975 	char fulldata[ DSW_NAMELEN ];
976 	int i, offset, remain;
977 	int num_parts;
978 	int leftover;
979 	int kscharsize = KSTAT_DATA_CHAR_LEN - 1;
980 
981 	/*
982 	 * NOTE: the following lines must be changed if DSW_NAMELEN
983 	 * ever changes.  You'll need a part[] for every kscharsize
984 	 * characters (or fraction thereof).  The ii_kstat_set_t
985 	 * definition in dsw_dev.h will also need new ovr_? entries.
986 	 */
987 	part[ 0 ] = p1;
988 	part[ 1 ] = p2;
989 	part[ 2 ] = p3;
990 	part[ 3 ] = p4;
991 
992 	bzero(fulldata, DSW_NAMELEN);
993 	if (str) {
994 		(void) strncpy(fulldata, str, DSW_NAMELEN);
995 	}
996 
997 	num_parts = DSW_NAMELEN / kscharsize;
998 	leftover = DSW_NAMELEN % kscharsize;
999 	if (leftover) {
1000 		++num_parts;
1001 	}
1002 
1003 	if (num_parts > sizeof (part) / sizeof (part[0])) {
1004 		/*
1005 		 * DSW_NAMELEN is 64 and kscharsize is 15.
1006 		 * It's always "whinged"
1007 		 */
1008 		if (!whinged) {
1009 #ifdef DEBUG
1010 			cmn_err(CE_WARN, "!May not have enough room "
1011 			    "to store volume name in kstats");
1012 #endif
1013 			whinged = 1;
1014 		}
1015 		num_parts = sizeof (part) / sizeof (part[0]);
1016 	}
1017 
1018 	offset = 0;
1019 	remain = DSW_NAMELEN;
1020 	for (i = 0; i < num_parts; i++) {
1021 		int to_copy = remain > kscharsize? kscharsize : remain;
1022 		bcopy(&fulldata[ offset ], part[ i ], to_copy);
1023 		offset += to_copy;
1024 		remain -= to_copy;
1025 	}
1026 }
1027 
1028 static int
1029 ii_set_stats_update(kstat_t *ksp, int rw)
1030 {
1031 	_ii_info_t *ip = (_ii_info_t *)ksp->ks_private;
1032 	ii_kstat_set_t *kp = (ii_kstat_set_t *)ksp->ks_data;
1033 
1034 	if (KSTAT_WRITE == rw) {
1035 		return (EACCES);
1036 	}
1037 
1038 	/* copy values over */
1039 	kp->size.value.ul = ip->bi_size;
1040 	kp->flags.value.ul = ip->bi_flags;
1041 	kp->unit.value.ul = ip->bi_throttle_unit;
1042 	kp->delay.value.ul = ip->bi_throttle_delay;
1043 	kp->mtime.value.ul = ip->bi_mtime;
1044 
1045 	/* update bitmap counters if necessary */
1046 	if (ip->bi_state & DSW_CNTCPYBITS) {
1047 		ip->bi_copybits = 0;
1048 		if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) {
1049 			ip->bi_state &= ~DSW_CNTCPYBITS;
1050 			II_CNT_BITS(ip, ip->bi_copyfba,
1051 			    &ip->bi_copybits,
1052 			    DSW_BM_SIZE_BYTES(ip));
1053 			_ii_rlse_devs(ip, BMP);
1054 		}
1055 	}
1056 
1057 	if (ip->bi_state & DSW_CNTSHDBITS) {
1058 		ip->bi_shdbits = 0;
1059 		if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) {
1060 			ip->bi_state &= ~DSW_CNTSHDBITS;
1061 			II_CNT_BITS(ip, ip->bi_shdfba,
1062 			    &ip->bi_shdbits,
1063 			    DSW_BM_SIZE_BYTES(ip));
1064 			_ii_rlse_devs(ip, BMP);
1065 		}
1066 	}
1067 
1068 	kp->copybits.value.ul = ip->bi_copybits;
1069 	kp->shdbits.value.ul = ip->bi_shdbits;
1070 
1071 	/* copy volume names */
1072 	ii_str_kstat_copy(ii_pathname(MSTFD(ip)),
1073 	    kp->mst_a.value.c, kp->mst_b.value.c,
1074 	    kp->mst_c.value.c, kp->mst_d.value.c);
1075 
1076 	ii_str_kstat_copy(ip->bi_keyname, kp->set_a.value.c, kp->set_b.value.c,
1077 	    kp->set_c.value.c, kp->set_d.value.c);
1078 
1079 	ii_str_kstat_copy(ii_pathname(ip->bi_bmpfd),
1080 	    kp->bmp_a.value.c, kp->bmp_b.value.c,
1081 	    kp->bmp_c.value.c, kp->bmp_d.value.c);
1082 
1083 	if (ip->bi_overflow) {
1084 		ii_str_kstat_copy(ip->bi_overflow->ii_volname,
1085 		    kp->ovr_a.value.c, kp->ovr_b.value.c, kp->ovr_c.value.c,
1086 		    kp->ovr_d.value.c);
1087 		(void) strlcpy(kp->ovr_io.value.c, ip->bi_kstat_io.ovrio,
1088 		    KSTAT_DATA_CHAR_LEN);
1089 	} else {
1090 		ii_str_kstat_copy("", kp->ovr_a.value.c, kp->ovr_b.value.c,
1091 		    kp->ovr_c.value.c, kp->ovr_d.value.c);
1092 		bzero(kp->ovr_io.value.c, KSTAT_DATA_CHAR_LEN);
1093 	}
1094 	if ((ip->bi_flags) & DSW_TREEMAP) {
1095 		kp->shdchks.value.ul = ip->bi_shdchks;
1096 		kp->shdchkused.value.ul = ip->bi_shdchkused;
1097 	} else {
1098 		kp->shdchks.value.ul = 0;
1099 		kp->shdchkused.value.ul = 0;
1100 	}
1101 	/* make sure value.c are always null terminated */
1102 	(void) strlcpy(kp->mst_io.value.c, ip->bi_kstat_io.mstio,
1103 	    KSTAT_DATA_CHAR_LEN);
1104 	(void) strlcpy(kp->shd_io.value.c, ip->bi_kstat_io.shdio,
1105 	    KSTAT_DATA_CHAR_LEN);
1106 	(void) strlcpy(kp->bmp_io.value.c, ip->bi_kstat_io.bmpio,
1107 	    KSTAT_DATA_CHAR_LEN);
1108 
1109 	return (0);
1110 }
1111 
1112 /*
1113  * _ii_config
1114  *	Configure an II device pair
1115  *
1116  * Calling/Exit State:
1117  *	Returns 0 if the pairing was configured, otherwise an
1118  *	error code. The ioctl data stucture is copied out to the user
1119  *	and contains any additional error information, and the master
1120  *	and shadow volume names if not supplied by the user.
1121  *
1122  * Description:
1123  *	Reads the user configuration structure and attempts
1124  *	to establish an II pairing. The snapshot of the master
1125  *	device is established at this point in time.
1126  */
1127 
1128 int
1129 _ii_config(intptr_t arg, int ilp32, int *rvp, int iflags)
1130 {
1131 	dsw_config_t uconf;
1132 	dsw_config32_t *uconf32;
1133 	_ii_info_t *ip, *hip, **ipp;
1134 	int rc;
1135 	int type;
1136 	int nshadows;
1137 	int add_to_mst_top;
1138 	int import;
1139 	int existing;
1140 	int resized;
1141 	nsc_size_t mst_size, shd_size, bmp_size;
1142 	nsc_off_t shdfba;
1143 	nsc_off_t copyfba;
1144 	int keylen, keyoffset;
1145 	ii_header_t *bm_header;
1146 	nsc_buf_t *tmp;
1147 	spcs_s_info_t kstatus;
1148 	spcs_s_info32_t ustatus32;
1149 	int rtype;
1150 	uint_t hints;
1151 
1152 	/* Import is a once only operation like an enable */
1153 	ASSERT((iflags&(II_EXISTING|II_IMPORT)) != (II_EXISTING|II_IMPORT));
1154 	existing = (iflags&II_EXISTING) != 0;
1155 	import = (iflags&II_IMPORT) != 0;
1156 	*rvp = 0;
1157 	if (ilp32) {
1158 		uconf32 = kmem_zalloc(sizeof (dsw_config32_t), KM_SLEEP);
1159 		if (uconf32 == NULL) {
1160 			return (ENOMEM);
1161 		}
1162 		if (copyin((void *)arg, uconf32, sizeof (*uconf32)) < 0)
1163 			return (EFAULT);
1164 		II_TAIL_COPY(uconf, (*uconf32), master_vol, dsw_config_t);
1165 		uconf.status = (spcs_s_info_t)uconf32->status;
1166 		ustatus32 = uconf32->status;
1167 		kmem_free(uconf32, sizeof (dsw_config32_t));
1168 	} else if (copyin((void *)arg, &uconf, sizeof (uconf)) < 0)
1169 		return (EFAULT);
1170 
1171 	DTRACE_PROBE3(_ii_config_info, char *, uconf.master_vol,
1172 	    char *, uconf.shadow_vol, char *, uconf.bitmap_vol);
1173 
1174 	kstatus = spcs_s_kcreate();
1175 	if (kstatus == NULL)
1176 		return (ENOMEM);
1177 
1178 	if (_ii_shutting_down)
1179 		return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1180 		    DSW_ESHUTDOWN));
1181 
1182 	if (uconf.bitmap_vol[0] == 0)
1183 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY));
1184 
1185 	mutex_enter(&_ii_config_mutex);
1186 	ip = nsc_kmem_zalloc(sizeof (*ip), KM_SLEEP, _ii_local_mem);
1187 	if (!ip) {
1188 		mutex_exit(&_ii_config_mutex);
1189 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM));
1190 	}
1191 	ip->bi_mstdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP,
1192 	    _ii_local_mem);
1193 	ip->bi_mstrdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP,
1194 	    _ii_local_mem);
1195 	if (ip->bi_mstdev == NULL || ip->bi_mstrdev == NULL) {
1196 		mutex_exit(&_ii_config_mutex);
1197 		_ii_info_free(ip);
1198 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM));
1199 	}
1200 
1201 	ip->bi_disabled = 1;	/* mark as disabled until we are ready to go */
1202 	mutex_init(&ip->bi_mutex, NULL, MUTEX_DRIVER, NULL);
1203 	mutex_init(&ip->bi_bmpmutex, NULL, MUTEX_DRIVER, NULL);
1204 	mutex_init(&ip->bi_rsrvmutex, NULL, MUTEX_DRIVER, NULL);
1205 	mutex_init(&ip->bi_rlsemutex, NULL, MUTEX_DRIVER, NULL);
1206 	mutex_init(&ip->bi_chksmutex, NULL, MUTEX_DRIVER, NULL);
1207 	cv_init(&ip->bi_copydonecv, NULL, CV_DRIVER, NULL);
1208 	cv_init(&ip->bi_reservecv, NULL, CV_DRIVER, NULL);
1209 	cv_init(&ip->bi_releasecv, NULL, CV_DRIVER, NULL);
1210 	cv_init(&ip->bi_ioctlcv, NULL, CV_DRIVER, NULL);
1211 	cv_init(&ip->bi_closingcv, NULL, CV_DRIVER, NULL);
1212 	cv_init(&ip->bi_busycv, NULL, CV_DRIVER, NULL);
1213 	rw_init(&ip->bi_busyrw, NULL, RW_DRIVER, NULL);
1214 	rw_init(&ip->bi_linkrw, NULL, RW_DRIVER, NULL);
1215 	(void) strncpy(ip->bi_keyname, uconf.shadow_vol, DSW_NAMELEN);
1216 	ip->bi_keyname[DSW_NAMELEN-1] = '\0';
1217 	ip->bi_throttle_unit = ii_throttle_unit;
1218 	ip->bi_throttle_delay = ii_throttle_delay;
1219 
1220 	/* First check the list to see if uconf.bitmap_vol's already there */
1221 
1222 	if (ii_volume(uconf.bitmap_vol, 0) != NONE) {
1223 		DTRACE_PROBE(_ii_config_bmp_found);
1224 		mutex_exit(&_ii_config_mutex);
1225 		_ii_info_free(ip);
1226 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1227 	}
1228 
1229 	ip->bi_bmpfd = nsc_open(uconf.bitmap_vol,
1230 	    NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(ip->bi_bmpdev), &rc);
1231 	if (!ip->bi_bmpfd)
1232 		ip->bi_bmpfd = nsc_open(uconf.bitmap_vol,
1233 		    NSC_IIR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, NULL,
1234 		    (blind_t)&(ip->bi_bmpdev), &rc);
1235 	if (!ip->bi_bmpfd && !existing) {
1236 		mutex_exit(&_ii_config_mutex);
1237 		_ii_info_free(ip);
1238 		spcs_s_add(kstatus, rc);
1239 		DTRACE_PROBE(_ii_config_no_bmp);
1240 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1241 	}
1242 
1243 	if (import) {
1244 		uconf.flag = DSW_GOLDEN;
1245 		II_FLAG_SETX(DSW_SHDIMPORT|DSW_GOLDEN, ip);
1246 	}
1247 
1248 	if (existing) {
1249 
1250 		DTRACE_PROBE(_ii_config_existing);
1251 		/*
1252 		 * ii_config is used by enable, import and resume (existing)
1253 		 * If not importing or resuming, then this must be enable.
1254 		 * Indicate this fact for SNMP use.
1255 		 */
1256 
1257 		if (!ip->bi_bmpfd) {
1258 			/*
1259 			 * Couldn't read bitmap, mark master and shadow as
1260 			 * unusable.
1261 			 */
1262 			II_FLAG_ASSIGN(DSW_BMPOFFLINE|DSW_MSTOFFLINE|
1263 			    DSW_SHDOFFLINE, ip);
1264 
1265 			/*
1266 			 * Set cluster tag for this element so it can
1267 			 * be suspended later
1268 			 */
1269 			(void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
1270 
1271 			/* need to check on master, might be shared */
1272 			goto header_checked;
1273 		}
1274 		/* check the header */
1275 		(void) _ii_rsrv_devs(ip, BMP, II_INTERNAL);
1276 
1277 		/* get first block of bit map */
1278 		mutex_enter(&ip->bi_mutex);
1279 		bm_header = _ii_bm_header_get(ip, &tmp);
1280 		mutex_exit(&ip->bi_mutex);
1281 		if (bm_header == NULL) {
1282 			if (ii_debug > 0)
1283 				cmn_err(CE_WARN,
1284 				    "!ii: _ii_bm_header_get returned NULL");
1285 			mutex_exit(&_ii_config_mutex);
1286 			_ii_info_free(ip);
1287 			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1288 			    DSW_EHDRBMP));
1289 		}
1290 
1291 		if (bm_header->ii_magic != DSW_DIRTY &&
1292 		    bm_header->ii_magic != DSW_CLEAN) {
1293 			mutex_exit(&_ii_config_mutex);
1294 			_ii_bm_header_free(bm_header, ip, tmp);
1295 			_ii_info_free(ip);
1296 			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1297 			    DSW_EINVALBMP));
1298 		}
1299 
1300 		II_FLAG_ASSIGN(bm_header->ii_state, ip);
1301 		/* Restore copy throttle parameters, if header version is 3 */
1302 		if (bm_header->ii_version >= 3) {	/* II_HEADER_VERSION */
1303 			ip->bi_throttle_delay = bm_header->ii_throttle_delay;
1304 			ip->bi_throttle_unit  = bm_header->ii_throttle_unit;
1305 		}
1306 
1307 		/* Restore cluster & group names, if header version is 4 */
1308 		if (bm_header->ii_version >= 4) {
1309 			/* cluster */
1310 			if (*bm_header->clstr_name) {
1311 				(void) strncpy(uconf.cluster_tag,
1312 				    bm_header->clstr_name, DSW_NAMELEN);
1313 				(void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
1314 			}
1315 
1316 			/* group */
1317 			if (*bm_header->group_name) {
1318 				(void) strncpy(uconf.group_name,
1319 				    bm_header->group_name, DSW_NAMELEN);
1320 				(void) II_LINK_GROUP(ip, uconf.group_name);
1321 			}
1322 		}
1323 		/* restore latest modification time, if header version >= 5 */
1324 		if (bm_header->ii_version >= 5) {
1325 			ip->bi_mtime = bm_header->ii_mtime;
1326 		}
1327 
1328 		/* Fetch master and shadow names from bitmap header */
1329 		if (uconf.master_vol[0] == 0)
1330 			(void) strncpy(uconf.master_vol, bm_header->master_vol,
1331 			    DSW_NAMELEN);
1332 		if (uconf.shadow_vol[0] == 0)
1333 			(void) strncpy(uconf.shadow_vol, bm_header->shadow_vol,
1334 			    DSW_NAMELEN);
1335 
1336 		/* return the fetched names to the user */
1337 		if (ilp32) {
1338 			uconf32 = kmem_zalloc(sizeof (dsw_config32_t),
1339 			    KM_SLEEP);
1340 			if (uconf32 == NULL) {
1341 				mutex_exit(&_ii_config_mutex);
1342 				_ii_bm_header_free(bm_header, ip, tmp);
1343 				_ii_rlse_devs(ip, BMP);
1344 				_ii_info_free(ip);
1345 				return (ENOMEM);
1346 			}
1347 			uconf32->status = ustatus32;
1348 			II_TAIL_COPY((*uconf32), uconf, master_vol,
1349 			    dsw_config32_t);
1350 			rc = copyout(uconf32, (void *)arg, sizeof (*uconf32));
1351 			kmem_free(uconf32, sizeof (dsw_config32_t));
1352 		} else {
1353 			rc = copyout(&uconf, (void *)arg, sizeof (uconf));
1354 		}
1355 		if (rc) {
1356 			mutex_exit(&_ii_config_mutex);
1357 			_ii_bm_header_free(bm_header, ip, tmp);
1358 			_ii_rlse_devs(ip, BMP);
1359 			_ii_info_free(ip);
1360 			return (EFAULT);
1361 		}
1362 
1363 		if (strncmp(bm_header->bitmap_vol, uconf.bitmap_vol,
1364 		    DSW_NAMELEN) || ((!(ip->bi_flags&DSW_SHDIMPORT)) &&
1365 		    strncmp(bm_header->master_vol, uconf.master_vol,
1366 		    DSW_NAMELEN)) || strncmp(bm_header->shadow_vol,
1367 		    uconf.shadow_vol, DSW_NAMELEN)) {
1368 			mutex_exit(&_ii_config_mutex);
1369 			_ii_bm_header_free(bm_header, ip, tmp);
1370 			_ii_rlse_devs(ip, BMP);
1371 			_ii_info_free(ip);
1372 			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1373 			    DSW_EMISMATCH));
1374 		}
1375 		shdfba = bm_header->ii_shdfba;
1376 		copyfba = bm_header->ii_copyfba;
1377 		if ((ip->bi_flags)&DSW_TREEMAP) {
1378 			if (ii_debug > 0)
1379 				cmn_err(CE_NOTE,
1380 				    "!II: Resuming short shadow volume");
1381 
1382 			ip->bi_mstchks = bm_header->ii_mstchks;
1383 			ip->bi_shdchks = bm_header->ii_shdchks;
1384 			ip->bi_shdchkused = bm_header->ii_shdchkused;
1385 			ip->bi_shdfchk = bm_header->ii_shdfchk;
1386 
1387 			if (bm_header->overflow_vol[0] != 0)
1388 				if ((rc = ii_overflow_attach(ip,
1389 				    bm_header->overflow_vol, 0)) != 0) {
1390 					mutex_exit(&_ii_config_mutex);
1391 					_ii_bm_header_free(bm_header, ip, tmp);
1392 					_ii_rlse_devs(ip, BMP);
1393 					_ii_info_free(ip);
1394 					return (spcs_s_ocopyoutf(&kstatus,
1395 					    uconf.status, rc));
1396 			}
1397 		}
1398 		_ii_bm_header_free(bm_header, ip, tmp);
1399 		_ii_rlse_devs(ip, BMP);
1400 	}
1401 header_checked:
1402 
1403 	if (ip->bi_flags&DSW_SHDIMPORT)
1404 		(void) strcpy(uconf.master_vol, "<imported shadow>");
1405 	if (!uconf.master_vol[0] || !uconf.shadow_vol[0]) {
1406 		mutex_exit(&_ii_config_mutex);
1407 		_ii_info_free(ip);
1408 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY));
1409 	}
1410 
1411 	/* check that no volume has been given twice */
1412 	if (strncmp(uconf.master_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) {
1413 		mutex_exit(&_ii_config_mutex);
1414 		_ii_info_free(ip);
1415 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1416 	}
1417 
1418 	if (strncmp(uconf.master_vol, uconf.bitmap_vol, DSW_NAMELEN) == 0) {
1419 		mutex_exit(&_ii_config_mutex);
1420 		_ii_info_free(ip);
1421 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1422 	}
1423 
1424 	if (strncmp(uconf.bitmap_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) {
1425 		mutex_exit(&_ii_config_mutex);
1426 		_ii_info_free(ip);
1427 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1428 	}
1429 
1430 	/* check that master is not already a bitmap, shadow or overflow */
1431 	type = ii_volume(uconf.master_vol, 1);
1432 	if (type != NONE && type != MST) {
1433 		mutex_exit(&_ii_config_mutex);
1434 		_ii_info_free(ip);
1435 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1436 	}
1437 
1438 	/* check that shadow is not used as anything else */
1439 	type = ii_volume(uconf.shadow_vol, 1);
1440 	if (type != NONE && type != SHD) {
1441 		mutex_exit(&_ii_config_mutex);
1442 		_ii_info_free(ip);
1443 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1444 	}
1445 
1446 	/* Setup the table bitmap operations table */
1447 	switch (ii_bitmap) {
1448 	case II_KMEM:
1449 		if (ii_debug > 0)
1450 			cmn_err(CE_NOTE, "!ii: using volatile bitmaps");
1451 		ip->bi_bitmap_ops = &kmem_buf_bmp;
1452 		break;
1453 	case II_FWC:
1454 		hints = 0;
1455 		(void) nsc_node_hints(&hints);
1456 		if ((hints & NSC_FORCED_WRTHRU) == 0)
1457 			ip->bi_bitmap_ops = &kmem_buf_bmp;
1458 		else
1459 			ip->bi_bitmap_ops = &alloc_buf_bmp;
1460 		if (ii_debug > 0) {
1461 			cmn_err(CE_NOTE, "!ii: chosen to use %s bitmaps",
1462 			    ip->bi_bitmap_ops == &kmem_buf_bmp ?
1463 			    "volatile" : "persistent");
1464 		}
1465 		break;
1466 	case II_WTHRU:
1467 	default:
1468 		if (ii_debug > 0)
1469 			cmn_err(CE_NOTE, "!ii: using persistent bitmaps");
1470 		ip->bi_bitmap_ops = &alloc_buf_bmp;
1471 		break;
1472 	}
1473 
1474 	/*
1475 	 * If we found aother shadow volume with the same name,
1476 	 * If this is an resume operation,
1477 	 * If this shadow is in the exported state
1478 	 * then try an on the fly join instead
1479 	 */
1480 	for (hip = _ii_info_top; hip; hip = hip->bi_next)
1481 		if (strcmp(uconf.shadow_vol, hip->bi_keyname) == 0)
1482 				break;
1483 	if ((hip) && (type == SHD) && existing &&
1484 	    (ip->bi_flags & DSW_SHDEXPORT)) {
1485 
1486 		/*
1487 		 * Stop any copy in progress
1488 		 */
1489 		while (_ii_stopcopy(hip) == EINTR)
1490 			;
1491 
1492 		/*
1493 		 * Start the imported shadow teardown
1494 		 */
1495 		mutex_enter(&hip->bi_mutex);
1496 
1497 		/* disable accesss to imported shadow */
1498 		hip->bi_disabled = 1;
1499 
1500 		/* Wait for any I/O's to complete */
1501 		while (hip->bi_ioctl) {
1502 			hip->bi_state |= DSW_IOCTL;
1503 			cv_wait(&hip->bi_ioctlcv, &hip->bi_mutex);
1504 		}
1505 		mutex_exit(&hip->bi_mutex);
1506 
1507 		/* this rw_enter forces us to drain all active IO */
1508 		rw_enter(&hip->bi_linkrw, RW_WRITER);
1509 		rw_exit(&hip->bi_linkrw);
1510 
1511 		/* remove ip from _ii_info_top linked list */
1512 		mutex_enter(&_ii_info_mutex);
1513 		for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) {
1514 			if (hip == *ipp) {
1515 				*ipp = hip->bi_next;
1516 				break;
1517 			}
1518 		}
1519 		if (hip->bi_kstat) {
1520 			kstat_delete(hip->bi_kstat);
1521 			hip->bi_kstat = NULL;
1522 		}
1523 		mutex_exit(&_ii_info_mutex);
1524 
1525 		/* Gain access to both bitmap volumes */
1526 		rtype = BMP;
1527 		if (((rc = _ii_rsrv_devs(hip, rtype, II_INTERNAL)) != 0) ||
1528 		    ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0)) {
1529 			mutex_exit(&_ii_config_mutex);
1530 			_ii_info_free(ip);
1531 			return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
1532 		}
1533 
1534 		/* Merge imported bitmap */
1535 		rc = II_JOIN_BMP(ip, hip);
1536 
1537 		/* Release access to bitmap volume */
1538 		_ii_rlse_devs(hip, rtype);
1539 		ii_sibling_free(hip);
1540 
1541 		/* Clear the fact that we are exported */
1542 		mutex_enter(&ip->bi_mutex);
1543 		II_FLAG_CLR(DSW_SHDEXPORT, ip);
1544 
1545 		/* Release resources */
1546 		mutex_exit(&ip->bi_mutex);
1547 		_ii_rlse_devs(ip, BMP);
1548 
1549 	} else if (type != NONE) {
1550 		mutex_exit(&_ii_config_mutex);
1551 		_ii_info_free(ip);
1552 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1553 	}
1554 
1555 	/*
1556 	 * Handle non-exported shadow
1557 	 */
1558 	if ((ip->bi_flags & DSW_SHDEXPORT) == 0) {
1559 		if ((rc = ii_open_shadow(ip, uconf.shadow_vol)) != 0) {
1560 			mutex_exit(&_ii_config_mutex);
1561 			_ii_info_free(ip);
1562 			spcs_s_add(kstatus, rc);
1563 			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1564 			    DSW_EOPEN));
1565 		}
1566 	}
1567 
1568 	/*
1569 	 * allocate _ii_concopy_sema and set to a value that won't allow
1570 	 * all cache to be allocated by copy loops.
1571 	 */
1572 
1573 	if (_ii_concopy_init == 0 && ip->bi_bmpfd != NULL) {
1574 		int asize = 0, wsize;
1575 		nsc_size_t cfbas, maxfbas;
1576 
1577 		(void) nsc_cache_sizes(&asize, &wsize);
1578 
1579 		if (asize > 0) {
1580 			cfbas = FBA_NUM(asize);
1581 			(void) _ii_rsrv_devs(ip, BMP, II_INTERNAL);
1582 			rc = nsc_maxfbas(ip->bi_bmpfd, 0, &maxfbas);
1583 			_ii_rlse_devs(ip, BMP);
1584 			if (!II_SUCCESS(rc))
1585 				maxfbas = 1024;		/* i.e. _SD_MAX_FBAS */
1586 			ii_nconcopy = cfbas / (maxfbas * 2) / 3;
1587 		}
1588 		if (ii_nconcopy < 2)
1589 			ii_nconcopy = 2;
1590 		ASSERT(ii_nconcopy > 0);
1591 		sema_init(&_ii_concopy_sema, ii_nconcopy, NULL,
1592 		    SEMA_DRIVER, NULL);
1593 		_ii_concopy_init = 1;
1594 	}
1595 
1596 	/* check for shared master volume */
1597 	for (hip = _ii_mst_top; hip; hip = hip->bi_nextmst)
1598 		if (strcmp(uconf.master_vol, ii_pathname(hip->bi_mstfd)) == 0)
1599 			break;
1600 	add_to_mst_top = (hip == NULL);
1601 	if (!hip)
1602 		for (hip = _ii_info_top; hip; hip = hip->bi_next)
1603 			if (strcmp(uconf.master_vol,
1604 			    ii_pathname(hip->bi_mstfd)) == 0)
1605 				break;
1606 	nshadows = (hip != NULL);
1607 
1608 	/* Check if master is offline */
1609 	if (hip) {
1610 		if (hip->bi_flags & DSW_MSTOFFLINE) {
1611 			mutex_exit(&_ii_config_mutex);
1612 			_ii_info_free(ip);
1613 			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1614 			    DSW_EOFFLINE));
1615 		}
1616 	}
1617 
1618 	if (!nshadows && (ip->bi_flags&DSW_SHDIMPORT) == 0) {
1619 		ip->bi_mstfd = nsc_open(uconf.master_vol,
1620 		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
1621 		    (blind_t)(ip->bi_mstdev), &rc);
1622 		if (!ip->bi_mstfd) {
1623 			mutex_exit(&_ii_config_mutex);
1624 			_ii_info_free(ip);
1625 			spcs_s_add(kstatus, rc);
1626 			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1627 			    DSW_EOPEN));
1628 		}
1629 
1630 		ip->bi_mstrfd = nsc_open(uconf.master_vol,
1631 		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
1632 		    (blind_t)(ip->bi_mstrdev), &rc);
1633 		if (!ip->bi_mstrfd) {
1634 			mutex_exit(&_ii_config_mutex);
1635 			_ii_info_free(ip);
1636 			spcs_s_add(kstatus, rc);
1637 			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1638 			    DSW_EOPEN));
1639 		}
1640 	}
1641 
1642 	ip->bi_head = ip;
1643 	ip->bi_master = ip;
1644 
1645 	mutex_enter(&_ii_info_mutex);
1646 	ip->bi_next = _ii_info_top;
1647 	_ii_info_top = ip;
1648 	if (nshadows) {
1649 		/* link new shadow group together with others sharing master */
1650 		if (ii_debug > 0)
1651 			cmn_err(CE_NOTE,
1652 			    "!II: shadow %s shares master %s with other shadow"
1653 			    " groups", uconf.shadow_vol, uconf.master_vol);
1654 		hip = hip->bi_head;
1655 		nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev));
1656 		nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev));
1657 		ip->bi_mstrdev = hip->bi_mstrdev;
1658 		ip->bi_mstdev = hip->bi_mstdev;
1659 		ip->bi_head = hip;
1660 		ip->bi_sibling = hip->bi_sibling;
1661 		if (add_to_mst_top) {
1662 			hip->bi_nextmst = _ii_mst_top;
1663 			_ii_mst_top = hip;
1664 		}
1665 		hip->bi_sibling = ip;
1666 		ip->bi_master = ip->bi_head->bi_master;
1667 	}
1668 	mutex_exit(&_ii_info_mutex);
1669 	mutex_exit(&_ii_config_mutex);
1670 
1671 	keylen = strlen(ip->bi_keyname);
1672 	if (keylen > KSTAT_STRLEN - 1) {
1673 		keyoffset = keylen + 1 - KSTAT_STRLEN;
1674 	} else {
1675 		keyoffset = 0;
1676 	}
1677 	ip->bi_kstat = kstat_create("ii", _ii_instance++,
1678 	    &ip->bi_keyname[ keyoffset ], "iiset", KSTAT_TYPE_NAMED,
1679 	    sizeof (ii_kstat_set) / sizeof (kstat_named_t),
1680 	    KSTAT_FLAG_VIRTUAL);
1681 	if (ip->bi_kstat) {
1682 		ip->bi_kstat->ks_data = &ii_kstat_set;
1683 		ip->bi_kstat->ks_update = ii_set_stats_update;
1684 		ip->bi_kstat->ks_private = ip;
1685 		kstat_install(ip->bi_kstat);
1686 	} else {
1687 		cmn_err(CE_WARN, "!Unable to create set-specific kstats");
1688 	}
1689 
1690 #ifndef DISABLE_KSTATS
1691 	/* create kstats information */
1692 	mutex_init(&ip->bi_kstat_io.statmutex, NULL, MUTEX_DRIVER, NULL);
1693 	if (ip == ip->bi_master) {
1694 		ip->bi_kstat_io.master = _ii_kstat_create(ip, "master");
1695 	} else {
1696 		ip->bi_kstat_io.master = ip->bi_master->bi_kstat_io.master;
1697 		(void) strlcpy(ip->bi_kstat_io.mstio,
1698 		    ip->bi_master->bi_kstat_io.mstio, KSTAT_DATA_CHAR_LEN);
1699 	}
1700 	ip->bi_kstat_io.shadow = _ii_kstat_create(ip, "shadow");
1701 	ip->bi_kstat_io.bitmap = _ii_kstat_create(ip, "bitmap");
1702 #endif
1703 
1704 	(void) _ii_reserve_begin(ip);
1705 	rtype = MSTR|SHDR|BMP;
1706 	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
1707 		spcs_s_add(kstatus, rc);
1708 		rc = DSW_ERSRVFAIL;
1709 		goto fail;
1710 	}
1711 
1712 	if (ip->bi_flags&DSW_SHDIMPORT) {
1713 		rc = 0;		/* no master for imported volumes */
1714 		mst_size = 0;
1715 	} else
1716 		rc = nsc_partsize(MSTFD(ip), &mst_size);
1717 	if (rc == 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0)
1718 		rc = nsc_partsize(SHDFD(ip), &shd_size);
1719 	if (!ip->bi_bmpfd)
1720 		rc = EINVAL;
1721 	if (rc == 0)
1722 		rc = nsc_partsize(ip->bi_bmpfd, &bmp_size);
1723 
1724 	if (ip->bi_flags&DSW_SHDIMPORT)
1725 		ip->bi_size = shd_size;
1726 	else
1727 		ip->bi_size = mst_size;
1728 
1729 	if ((((ip->bi_flags&DSW_SHDIMPORT) != DSW_SHDIMPORT) &&
1730 	    (mst_size < 1)) ||
1731 	    (((ip->bi_flags&DSW_SHDEXPORT) != DSW_SHDEXPORT) &&
1732 	    (shd_size < 1)) ||
1733 	    ((rc == 0) && (bmp_size < 1))) {
1734 		/* could be really zero, or could be > 1 TB; fail the enable */
1735 		rc = EINVAL;
1736 	}
1737 
1738 	if (rc != 0) {	/* rc set means an nsc_partsize() failed */
1739 		/*
1740 		 * If existing group, mark bitmap as offline and set
1741 		 * bmp_size to "right size".
1742 		 */
1743 		if (existing) {
1744 			bmp_size = 2 * DSW_BM_FBA_LEN(mst_size) +
1745 			    DSW_SHD_BM_OFFSET;
1746 			goto no_more_bmp_tests;
1747 		}
1748 		spcs_s_add(kstatus, rc);
1749 		rc = DSW_EPARTSIZE;
1750 		_ii_rlse_devs(ip, rtype);
1751 		_ii_reserve_end(ip);
1752 		goto fail;
1753 	}
1754 
1755 	if (ip->bi_flags&DSW_SHDIMPORT)
1756 		mst_size = shd_size;
1757 	if (ip->bi_flags&DSW_SHDEXPORT)
1758 		shd_size = mst_size;
1759 	/*
1760 	 * Check with RDC if the master & shadow sizes are different.
1761 	 * Once II is enabled, the shadow size will be made to appear
1762 	 * the same as the master, and this will panic RDC if we're
1763 	 * changing sizes on it.
1764 	 */
1765 	resized = (shd_size != mst_size);
1766 	if (resized && ii_need_same_size(ip)) {
1767 		cmn_err(CE_WARN, "!Cannot enable II set: would change volume "
1768 		    "size on RDC");
1769 		rc = DSW_EOPACKAGE;
1770 		_ii_rlse_devs(ip, rtype);
1771 		_ii_reserve_end(ip);
1772 		goto fail;
1773 	}
1774 	if (bmp_size < 2 * DSW_BM_FBA_LEN(mst_size) + DSW_SHD_BM_OFFSET) {
1775 		/* bitmap volume too small */
1776 		if (ii_debug > 0)
1777 			cmn_err(CE_NOTE,
1778 			    "!ii: invalid sizes: bmp %" NSC_SZFMT " mst %"
1779 			    NSC_SZFMT " %" NSC_SZFMT "",
1780 			    bmp_size, mst_size, DSW_BM_FBA_LEN(mst_size));
1781 		rc = DSW_EBMPSIZE;
1782 		_ii_rlse_devs(ip, rtype);
1783 		_ii_reserve_end(ip);
1784 		goto fail;
1785 	}
1786 	if ((shd_size < mst_size) && (uconf.flag&DSW_GOLDEN) != 0) {
1787 		/* shadow volume too small */
1788 		if (ii_debug > 0)
1789 			cmn_err(CE_NOTE, "!shd size too small (%" NSC_SZFMT
1790 			    ") for independent set's master (%" NSC_SZFMT ")",
1791 			    shd_size, mst_size);
1792 		rc = DSW_ESHDSIZE;
1793 		_ii_rlse_devs(ip, rtype);
1794 		_ii_reserve_end(ip);
1795 		goto fail;
1796 	}
1797 
1798 	ip->bi_busy = kmem_zalloc(1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)),
1799 	    KM_SLEEP);
1800 	if (!ip->bi_busy) {
1801 		rc = ENOMEM;
1802 		_ii_rlse_devs(ip, rtype);
1803 		_ii_reserve_end(ip);
1804 		goto fail;
1805 	}
1806 
1807 	if (existing == 0) {
1808 
1809 		DTRACE_PROBE(_ii_config);
1810 
1811 		/* first time this shadow has been set up */
1812 		mutex_enter(&ip->bi_mutex);
1813 		bm_header = _ii_bm_header_get(ip, &tmp);
1814 		mutex_exit(&ip->bi_mutex);
1815 		if (bm_header == NULL) {
1816 			if (ii_debug > 0)
1817 				cmn_err(CE_WARN,
1818 				    "!ii: _ii_bm_header_get returned NULL");
1819 			rc = DSW_EHDRBMP;
1820 			_ii_rlse_devs(ip, rtype);
1821 			_ii_reserve_end(ip);
1822 			goto fail;
1823 		}
1824 		bzero(bm_header, sizeof (*bm_header));
1825 		/* copy pathnames into it */
1826 		(void) strncpy(bm_header->master_vol, uconf.master_vol,
1827 		    DSW_NAMELEN);
1828 		(void) strncpy(bm_header->shadow_vol, uconf.shadow_vol,
1829 		    DSW_NAMELEN);
1830 		(void) strncpy(bm_header->bitmap_vol, uconf.bitmap_vol,
1831 		    DSW_NAMELEN);
1832 		(void) strncpy(bm_header->clstr_name, uconf.cluster_tag,
1833 		    DSW_NAMELEN);
1834 		(void) strncpy(bm_header->group_name, uconf.group_name,
1835 		    DSW_NAMELEN);
1836 
1837 		if (uconf.cluster_tag[0] != 0)
1838 			(void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
1839 
1840 		if (uconf.group_name[0] != 0)
1841 			(void) II_LINK_GROUP(ip, uconf.group_name);
1842 
1843 
1844 		bm_header->ii_state = (uconf.flag & DSW_GOLDEN);
1845 		II_FLAG_ASSIGN(bm_header->ii_state, ip);
1846 
1847 		if (import) {
1848 			II_FLAG_SETX(DSW_SHDIMPORT, ip);
1849 			bm_header->ii_state |= DSW_SHDIMPORT;
1850 		}
1851 		if (resized) {
1852 			II_FLAG_SETX(DSW_RESIZED, ip);
1853 			bm_header->ii_state |= DSW_RESIZED;
1854 		}
1855 		bm_header->ii_type = (uconf.flag & DSW_GOLDEN) ?
1856 		    DSW_GOLDEN_TYPE : DSW_QUICK_TYPE;
1857 		bm_header->ii_magic = DSW_DIRTY;
1858 		bm_header->ii_version = II_HEADER_VERSION;
1859 		bm_header->ii_shdfba = DSW_SHD_BM_OFFSET;
1860 		bm_header->ii_copyfba = DSW_COPY_BM_OFFSET;
1861 		bm_header->ii_throttle_delay = ip->bi_throttle_delay;
1862 		bm_header->ii_throttle_unit = ip->bi_throttle_unit;
1863 		ip->bi_shdfba = bm_header->ii_shdfba;
1864 		ip->bi_copyfba = bm_header->ii_copyfba;
1865 		ip->bi_mtime = ddi_get_time();
1866 
1867 		/* write it to disk */
1868 		mutex_enter(&ip->bi_mutex);
1869 		rc = _ii_bm_header_put(bm_header, ip, tmp);
1870 		mutex_exit(&ip->bi_mutex);
1871 		if (!II_SUCCESS(rc)) {
1872 			spcs_s_add(kstatus, rc);
1873 			rc = DSW_EHDRBMP;
1874 			_ii_rlse_devs(ip, rtype);
1875 			_ii_reserve_end(ip);
1876 			goto fail;
1877 		}
1878 		if ((shd_size < mst_size) && (uconf.flag & DSW_GOLDEN) == 0) {
1879 		/*
1880 		 * shadow volume smaller than master, must use a dependent
1881 		 * copy with a bitmap file stored mapping for chunk locations.
1882 		 */
1883 					/* number of chunks in shadow volume */
1884 			nsc_size_t shd_chunks;
1885 			nsc_size_t bmp_chunks;
1886 			nsc_size_t tmp_chunks;
1887 
1888 			if (ii_debug > 1)
1889 				cmn_err(CE_NOTE, "!ii: using tree index on %s",
1890 				    uconf.master_vol);
1891 			shd_chunks = shd_size / DSW_SIZE;
1892 			/* do not add in partial chunk at end */
1893 
1894 			ip->bi_mstchks = mst_size / DSW_SIZE;
1895 			if (mst_size % DSW_SIZE != 0)
1896 				ip->bi_mstchks++;
1897 			bmp_chunks = ii_btsize(bmp_size - ip->bi_copyfba -
1898 			    DSW_BM_FBA_LEN(ip->bi_size));
1899 			tmp_chunks = ip->bi_copyfba +
1900 			    DSW_BM_FBA_LEN(ip->bi_size);
1901 			if (bmp_chunks < (nsc_size_t)ip->bi_mstchks) {
1902 				if (ii_debug > -1) {
1903 					cmn_err(CE_NOTE, "!ii: bitmap vol too"
1904 					    "small: %" NSC_SZFMT " vs. %"
1905 					    NSC_SZFMT, bmp_size,
1906 					    tmp_chunks);
1907 				}
1908 				spcs_s_add(kstatus, rc);
1909 				rc = DSW_EHDRBMP;
1910 				_ii_rlse_devs(ip, rtype);
1911 				_ii_reserve_end(ip);
1912 				goto fail;
1913 			}
1914 			mutex_enter(&ip->bi_mutex);
1915 			II_FLAG_SET(DSW_TREEMAP, ip);
1916 			mutex_exit(&ip->bi_mutex);
1917 
1918 			/* following values are written to header by ii_tinit */
1919 #if (defined(NSC_MULTI_TERABYTE) && !defined(II_MULTIMULTI_TERABYTE))
1920 			ASSERT(shd_chunks <= INT32_MAX);
1921 			ASSERT(mst_size / DSW_SIZE <= INT32_MAX);
1922 #endif
1923 			ip->bi_mstchks = mst_size / DSW_SIZE;
1924 			if (mst_size % DSW_SIZE != 0)
1925 				ip->bi_mstchks++;
1926 #ifdef	II_MULTIMULTI_TERABYTE
1927 			ip->bi_shdchks = shd_chunks;
1928 #else
1929 			/* still have 31 bit chunkid's */
1930 			ip->bi_shdchks = (chunkid_t)shd_chunks;
1931 #endif
1932 			ip->bi_shdchkused = 0;
1933 			rc = ii_tinit(ip);
1934 		} else {
1935 			ip->bi_shdchks = shd_size / DSW_SIZE;
1936 			ip->bi_shdchkused = 0;
1937 		}
1938 		if (rc == 0)
1939 			rc = II_LOAD_BMP(ip, 1);
1940 		if (rc == 0)
1941 			rc = II_ZEROBM(ip);
1942 		if (rc == 0)
1943 			rc = II_COPYBM(ip);	/* also clear copy bitmap */
1944 		if (rc == 0 && (uconf.flag & DSW_GOLDEN) && !import)
1945 			rc = ii_fill_copy_bmp(ip);
1946 		if (rc) {
1947 			spcs_s_add(kstatus, rc);
1948 			rc = DSW_EHDRBMP;
1949 			_ii_rlse_devs(ip, rtype);
1950 			goto fail;
1951 		}
1952 		/* check that changing shadow won't upset RDC */
1953 		if (ii_update_denied(ip, kstatus, 0, 1)) {
1954 			rc = DSW_EOPACKAGE;
1955 			_ii_rlse_devs(ip, rtype);
1956 			_ii_reserve_end(ip);
1957 			goto fail;
1958 		}
1959 		ip->bi_disabled = 0;	/* all okay and ready, we can go now */
1960 		_ii_rlse_devs(ip, rtype);
1961 		/* no _ii_reserve_end() here - we must register first */
1962 		ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd),
1963 		    NSC_CACHE|NSC_DEVICE, _ii_io);
1964 		if (!nshadows)
1965 			ii_register_mst(ip);
1966 		ii_register_shd(ip);
1967 
1968 		if (!ii_register_ok(ip)) {
1969 			ip->bi_disabled = 1;	/* argh */
1970 			rc = DSW_EREGISTER;
1971 			goto fail;
1972 		}
1973 		/* no _ii_reserve_begin() here -- we're still in process */
1974 		(void) _ii_rsrv_devs(ip, rtype, II_INTERNAL);
1975 
1976 		if (ii_debug > 0)
1977 			cmn_err(CE_NOTE, "!ii: config: master %s shadow %s",
1978 			    uconf.master_vol, uconf.shadow_vol);
1979 		rc = 0;
1980 		if ((uconf.flag & DSW_GOLDEN) && !import) {
1981 			mutex_enter(&ip->bi_mutex);
1982 			II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip);
1983 			ip->bi_ioctl++;	/* we are effectively in an ioctl */
1984 			mutex_exit(&ip->bi_mutex);
1985 			rc = _ii_copyvol(ip, 0, rtype, kstatus, 1);
1986 		}
1987 		_ii_rlse_devs(ip, rtype);
1988 		_ii_reserve_end(ip);
1989 
1990 		++iigkstat.num_sets.value.ul;
1991 
1992 		return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
1993 	}
1994 
1995 	ip->bi_shdchks = shd_size / DSW_SIZE;
1996 	ip->bi_shdfba = shdfba;
1997 	ip->bi_copyfba = copyfba;
1998 	rc = II_LOAD_BMP(ip, 0);		/* reload saved bitmap */
1999 	mutex_enter(&ip->bi_mutex);
2000 	if (rc == 0)
2001 		bm_header = _ii_bm_header_get(ip, &tmp);
2002 	mutex_exit(&ip->bi_mutex);
2003 	if (rc || bm_header == NULL) {
2004 		if (existing) {
2005 			goto no_more_bmp_tests;
2006 		}
2007 		rc = DSW_EHDRBMP;
2008 		goto fail;
2009 	}
2010 
2011 	/*
2012 	 * If the header is dirty and it wasn't kept on persistent storage
2013 	 * then the bitmaps must be assumed to be bad.
2014 	 */
2015 	if (bm_header->ii_magic == DSW_DIRTY &&
2016 	    ip->bi_bitmap_ops != &alloc_buf_bmp) {
2017 		type = bm_header->ii_type;
2018 		_ii_bm_header_free(bm_header, ip, tmp);
2019 		if (type == DSW_GOLDEN_TYPE) {
2020 			if ((ip->bi_flags & DSW_COPYINGM) != 0)
2021 				_ii_error(ip, DSW_SHDOFFLINE);
2022 			else if ((ip->bi_flags & DSW_COPYINGS) != 0)
2023 				_ii_error(ip, DSW_MSTOFFLINE);
2024 			else {
2025 				/* No copying, so they're just different */
2026 				rc = ii_fill_copy_bmp(ip);
2027 				if (rc) {
2028 					spcs_s_add(kstatus, rc);
2029 					rc = DSW_EHDRBMP;
2030 					goto fail;
2031 				}
2032 			}
2033 		} else
2034 			_ii_error(ip, DSW_SHDOFFLINE);
2035 
2036 		mutex_enter(&ip->bi_mutex);
2037 		bm_header = _ii_bm_header_get(ip, &tmp);
2038 		mutex_exit(&ip->bi_mutex);
2039 		if (bm_header == NULL) {
2040 			rc = DSW_EHDRBMP;
2041 			goto fail;
2042 		}
2043 	}
2044 
2045 	bm_header->ii_magic = DSW_DIRTY;
2046 	mutex_enter(&ip->bi_mutex);
2047 	rc = _ii_bm_header_put(bm_header, ip, tmp);
2048 	mutex_exit(&ip->bi_mutex);
2049 	if (!II_SUCCESS(rc)) {
2050 		spcs_s_add(kstatus, rc);
2051 		rc = DSW_EHDRBMP;
2052 		goto fail;
2053 	}
2054 
2055 	ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd),
2056 	    NSC_CACHE|NSC_DEVICE, _ii_io);
2057 no_more_bmp_tests:
2058 	_ii_rlse_devs(ip, rtype);
2059 	ip->bi_disabled = 0;	/* all okay and ready, we can go now */
2060 	if (!nshadows)
2061 		ii_register_mst(ip);
2062 	if ((ip->bi_flags & DSW_SHDEXPORT) == 0)
2063 		ii_register_shd(ip);
2064 
2065 	if (!ii_register_ok(ip)) {
2066 		rc = DSW_EREGISTER;
2067 		goto fail;
2068 	}
2069 	_ii_reserve_end(ip);
2070 
2071 	if (ii_debug > 0)
2072 		cmn_err(CE_NOTE, "!ii: config: master %s shadow %s",
2073 		    uconf.master_vol, uconf.shadow_vol);
2074 
2075 	rc = 0;
2076 	if (ip->bi_flags & DSW_COPYINGP) {
2077 		/* Copy was in progress, so continue it */
2078 		(void) _ii_rsrv_devs(ip, rtype, II_INTERNAL);
2079 		mutex_enter(&ip->bi_mutex);
2080 		ip->bi_ioctl++;		/* we are effectively in an ioctl */
2081 		mutex_exit(&ip->bi_mutex);
2082 		rc = _ii_copyvol(ip, ((ip->bi_flags & DSW_COPYINGS) != 0) ?
2083 		    CV_SHD2MST : 0, rtype, kstatus, 0);
2084 	}
2085 
2086 	++iigkstat.num_sets.value.ul;
2087 
2088 	return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
2089 
2090 fail:
2091 	/* remove ip from _ii_info_top linked list */
2092 	mutex_enter(&_ii_info_mutex);
2093 	for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) {
2094 		if (ip == *ipp) {
2095 			*ipp = ip->bi_next;
2096 			break;
2097 		}
2098 	}
2099 	mutex_exit(&_ii_info_mutex);
2100 	ii_sibling_free(ip);
2101 
2102 	return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
2103 }
2104 
2105 static int
2106 _ii_perform_disable(char *setname, spcs_s_info_t *kstatusp, int reclaim)
2107 {
2108 	_ii_info_t **xip, *ip;
2109 	_ii_overflow_t *op;
2110 	nsc_buf_t *tmp = NULL;
2111 	int rc;
2112 	ii_header_t *bm_header;
2113 	int rtype;
2114 
2115 	mutex_enter(&_ii_info_mutex);
2116 	ip = _ii_find_set(setname);
2117 	if (ip == NULL) {
2118 		mutex_exit(&_ii_info_mutex);
2119 		return (DSW_ENOTFOUND);
2120 	}
2121 
2122 	if ((ip->bi_flags & DSW_GOLDEN) &&
2123 	    ((ip->bi_flags & DSW_COPYINGP) != 0)) {
2124 		/*
2125 		 * Cannot disable an independent copy while still copying
2126 		 * as it means that a data dependency exists.
2127 		 */
2128 		mutex_exit(&_ii_info_mutex);
2129 		_ii_ioctl_done(ip);
2130 		mutex_exit(&ip->bi_mutex);
2131 		DTRACE_PROBE(_ii_perform_disable_end_DSW_EDEPENDENCY);
2132 		return (DSW_EDEPENDENCY);
2133 	}
2134 
2135 	if ((ip->bi_flags & DSW_GOLDEN) == 0 &&
2136 	    ii_update_denied(ip, *kstatusp, 0, 1)) {
2137 		/* Cannot disable a dependent shadow while RDC is unsure */
2138 		mutex_exit(&_ii_info_mutex);
2139 		_ii_ioctl_done(ip);
2140 		mutex_exit(&ip->bi_mutex);
2141 		DTRACE_PROBE(DSW_EOPACKAGE);
2142 		return (DSW_EOPACKAGE);
2143 	}
2144 
2145 	if (((ip->bi_flags & DSW_RESIZED) == DSW_RESIZED) &&
2146 	    ii_need_same_size(ip)) {
2147 		/* We can't disable the set whilst RDC is using it */
2148 		mutex_exit(&_ii_info_mutex);
2149 		_ii_ioctl_done(ip);
2150 		mutex_exit(&ip->bi_mutex);
2151 		cmn_err(CE_WARN, "!Cannot disable II set: would change "
2152 		    "volume size on RDC");
2153 		DTRACE_PROBE(DSW_EOPACKAGE_resize);
2154 		return (DSW_EOPACKAGE);
2155 	}
2156 
2157 	ip->bi_disabled = 1;
2158 	if (NSHADOWS(ip) && (ip->bi_master == ip)) {
2159 		ip->bi_flags &= (~DSW_COPYING);
2160 		ip->bi_state |= DSW_MULTIMST;
2161 	}
2162 	mutex_exit(&_ii_info_mutex);
2163 
2164 	_ii_ioctl_done(ip);
2165 	mutex_exit(&ip->bi_mutex);
2166 
2167 	_ii_stopvol(ip);
2168 
2169 	rtype = SHDR|BMP;
2170 	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
2171 		spcs_s_add(*kstatusp, rc);
2172 		DTRACE_PROBE(DSW_ERSRVFAIL);
2173 		return (DSW_ERSRVFAIL);
2174 	}
2175 
2176 	if ((ii_header < 128) &&
2177 	    (((ip->bi_flags & DSW_GOLDEN) == 0) ||
2178 	    (ip->bi_flags & DSW_COPYING))) {
2179 		/*
2180 		 * Not a full copy so attempt to prevent use of partial copy
2181 		 * by clearing where the first ufs super-block would be
2182 		 * located. Solaris often incorporates the disk header into
2183 		 * the start of the first slice, so avoid clearing the very
2184 		 * first 16 blocks of the volume.
2185 		 */
2186 
2187 		if (ii_debug > 1)
2188 			cmn_err(CE_NOTE, "!ii: Shadow copy invalidated");
2189 		II_READ_START(ip, shadow);
2190 		rc = nsc_alloc_buf(SHDFD(ip), ii_header, 128 - ii_header,
2191 		    NSC_RDWRBUF, &tmp);
2192 		II_READ_END(ip, shadow, rc, 128 - ii_header);
2193 		if (II_SUCCESS(rc)) {
2194 			rc = nsc_zero(tmp, ii_header, 128 - ii_header, 0);
2195 			if (II_SUCCESS(rc)) {
2196 				II_NSC_WRITE(ip, shadow, rc, tmp, ii_header,
2197 				    (128 - ii_header), 0);
2198 			}
2199 		}
2200 		if (tmp)
2201 			(void) nsc_free_buf(tmp);
2202 		if (!II_SUCCESS(rc))
2203 			_ii_error(ip, DSW_SHDOFFLINE);
2204 	}
2205 
2206 	/* this rw_enter forces us to drain all active IO */
2207 	rw_enter(&ip->bi_linkrw, RW_WRITER);
2208 	rw_exit(&ip->bi_linkrw);
2209 
2210 	/* remove ip from _ii_info_top linked list */
2211 	mutex_enter(&_ii_info_mutex);
2212 	for (xip = &_ii_info_top; *xip; xip = &((*xip)->bi_next)) {
2213 		if (ip == *xip) {
2214 			*xip = ip->bi_next;
2215 			break;
2216 		}
2217 	}
2218 	if (ip->bi_kstat) {
2219 		kstat_delete(ip->bi_kstat);
2220 		ip->bi_kstat = NULL;
2221 	}
2222 	mutex_exit(&_ii_info_mutex);
2223 
2224 	rc = II_SAVE_BMP(ip, 1);
2225 	mutex_enter(&ip->bi_mutex);
2226 	if (rc == 0)
2227 		bm_header = _ii_bm_header_get(ip, &tmp);
2228 	if (rc == 0 && bm_header) {
2229 		if (ii_debug > 1)
2230 			cmn_err(CE_NOTE, "!ii: Invalid header written");
2231 		bm_header->ii_magic = DSW_INVALID;
2232 		/* write it to disk */
2233 		(void) _ii_bm_header_put(bm_header, ip, tmp);
2234 	}
2235 	mutex_exit(&ip->bi_mutex);
2236 
2237 	op = ip->bi_overflow;
2238 	if (op && (reclaim == -1)) {
2239 		reclaim = (op->ii_drefcnt == 1? NO_RECLAIM : RECLAIM);
2240 	}
2241 
2242 	if ((op != NULL) && (op->ii_hversion >= 1) &&
2243 	    (op->ii_hmagic == II_OMAGIC)) {
2244 		mutex_enter(&_ii_overflow_mutex);
2245 		if (ip->bi_flags & DSW_OVRHDRDRTY) {
2246 			mutex_enter(&ip->bi_mutex);
2247 			ip->bi_flags &= ~DSW_OVRHDRDRTY;
2248 			mutex_exit(&ip->bi_mutex);
2249 			ASSERT(op->ii_urefcnt > 0);
2250 			op->ii_urefcnt--;
2251 		}
2252 		if (op->ii_urefcnt == 0) {
2253 			op->ii_flags &= ~IIO_CNTR_INVLD;
2254 			op->ii_unused = op->ii_nchunks - 1;
2255 		}
2256 		mutex_exit(&_ii_overflow_mutex);
2257 	}
2258 	ii_overflow_free(ip, reclaim);
2259 	_ii_rlse_devs(ip, rtype);
2260 
2261 	ii_sibling_free(ip);
2262 
2263 	--iigkstat.num_sets.value.ul;
2264 	return (0);
2265 }
2266 
2267 /*
2268  * _ii_disable
2269  *	Deconfigures an II pair
2270  *
2271  * Calling/Exit State:
2272  *	Returns 0 if the pair was disabled. Otherwise an error code
2273  *	is returned and any additional error information is copied
2274  *	out to the user.
2275  *
2276  * Description:
2277  *	Reads the user configuration structure and attempts to
2278  *	deconfigure that pairing based on the master device pathname.
2279  */
2280 
2281 int
2282 _ii_disable(intptr_t arg, int ilp32, int *rvp)
2283 {
2284 	dsw_ioctl_t uparms;
2285 	dsw_ioctl32_t uparms32;
2286 	_ii_overflow_t *op;
2287 	int rc, rerr;
2288 	spcs_s_info_t kstatus;
2289 	uint64_t hash;
2290 	int reclaim;
2291 	_ii_lsthead_t *oldhead, **head;
2292 	_ii_lstinfo_t *np, **xnp, *oldp;
2293 
2294 	*rvp = 0;
2295 
2296 	if (ilp32) {
2297 		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2298 			return (EFAULT);
2299 		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2300 		uparms.status = (spcs_s_info_t)uparms32.status;
2301 	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2302 		return (EFAULT);
2303 
2304 	kstatus = spcs_s_kcreate();
2305 	if (kstatus == NULL)
2306 		return (ENOMEM);
2307 
2308 	if (!uparms.shadow_vol[0])
2309 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2310 
2311 	DTRACE_PROBE2(_ii_disable_info, char *, uparms.shadow_vol,
2312 	    int, uparms.flags);
2313 
2314 	/* group or single set? */
2315 	if (uparms.flags & CV_IS_GROUP) {
2316 		hash = nsc_strhash(uparms.shadow_vol);
2317 		mutex_enter(&_ii_group_mutex);
2318 		for (head = &_ii_group_top; *head;
2319 		    head = &((*head)->lst_next)) {
2320 			if ((hash == (*head)->lst_hash) &&
2321 			    strncmp((*head)->lst_name, uparms.shadow_vol,
2322 			    DSW_NAMELEN) == 0)
2323 				break;
2324 		}
2325 
2326 		if (!*head) {
2327 			mutex_exit(&_ii_group_mutex);
2328 			return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2329 			    DSW_EGNOTFOUND));
2330 		}
2331 
2332 		/* clear any overflow vol usage counts */
2333 		for (np = (*head)->lst_start; np; np = np->lst_next) {
2334 			if (np->lst_ip->bi_overflow) {
2335 				np->lst_ip->bi_overflow->ii_detachcnt = 0;
2336 			}
2337 		}
2338 
2339 		/* now increment */
2340 		for (np = (*head)->lst_start; np; np = np->lst_next) {
2341 			if (np->lst_ip->bi_overflow) {
2342 				++np->lst_ip->bi_overflow->ii_detachcnt;
2343 			}
2344 		}
2345 
2346 		/* finally, disable all group members */
2347 		rerr = 0;
2348 		xnp = &(*head)->lst_start;
2349 		while (*xnp) {
2350 			op = (*xnp)->lst_ip->bi_overflow;
2351 			if (op) {
2352 				reclaim = (op->ii_drefcnt == op->ii_detachcnt?
2353 				    NO_RECLAIM : RECLAIM);
2354 				--op->ii_detachcnt;
2355 			}
2356 
2357 			/* clear out the group pointer */
2358 			(*xnp)->lst_ip->bi_group = NULL;
2359 
2360 			rc = _ii_perform_disable((*xnp)->lst_ip->bi_keyname,
2361 			    &kstatus, reclaim);
2362 			if (rc) {
2363 				/* restore group name */
2364 				(*xnp)->lst_ip->bi_group = (*head)->lst_name;
2365 
2366 				/* restore detachcnt */
2367 				if (op) {
2368 					++op->ii_detachcnt;
2369 				}
2370 
2371 				/* don't delete branch */
2372 				++rerr;
2373 				spcs_s_add(kstatus, rc);
2374 
2375 				/* move forward in linked list */
2376 				xnp = &(*xnp)->lst_next;
2377 			} else {
2378 				oldp = (*xnp);
2379 				*xnp = (*xnp)->lst_next;
2380 				kmem_free(oldp, sizeof (_ii_lstinfo_t));
2381 			}
2382 		}
2383 		if (rerr) {
2384 			mutex_exit(&_ii_group_mutex);
2385 			return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2386 			    DSW_EDISABLE));
2387 		}
2388 		/* no errors, all sets disabled, OK to free list head */
2389 		oldhead = *head;
2390 		*head = (*head)->lst_next;
2391 		kmem_free(oldhead, sizeof (_ii_lsthead_t));
2392 		mutex_exit(&_ii_group_mutex);
2393 	} else {
2394 		/* only a single set is being disabled */
2395 		rc = _ii_perform_disable(uparms.shadow_vol, &kstatus, -1);
2396 		if (rc)
2397 			return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
2398 	}
2399 
2400 	spcs_s_kfree(kstatus);
2401 
2402 	return (0);
2403 }
2404 
2405 
2406 /*
2407  * _ii_stat
2408  *	Get state of the shadow.
2409  *
2410  * Calling/Exit State:
2411  *	Returns 0 on success, otherwise an error code is returned
2412  *	and any additional error information is copied out to the user.
2413  *	The size variable in the dsw_stat_t is set to the FBA size
2414  *	of the volume, the stat variable is set to the state, and
2415  *	the structure is copied out.
2416  */
2417 /*ARGSUSED*/
2418 int
2419 _ii_stat(intptr_t arg, int ilp32, int *rvp)
2420 {
2421 	dsw_stat_t ustat;
2422 	dsw_stat32_t ustat32;
2423 	_ii_info_t *ip;
2424 	spcs_s_info_t kstatus;
2425 	char *group, *cluster;
2426 
2427 	if (ilp32) {
2428 		if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0)
2429 			return (EFAULT);
2430 		II_TAIL_COPY(ustat, ustat32, shadow_vol, dsw_stat_t);
2431 		ustat.status = (spcs_s_info_t)ustat32.status;
2432 	} else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0)
2433 		return (EFAULT);
2434 
2435 	kstatus = spcs_s_kcreate();
2436 	if (kstatus == NULL)
2437 		return (ENOMEM);
2438 
2439 	if (!ustat.shadow_vol[0])
2440 		return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY));
2441 
2442 	mutex_enter(&_ii_info_mutex);
2443 	ip = _ii_find_set(ustat.shadow_vol);
2444 	mutex_exit(&_ii_info_mutex);
2445 	if (ip == NULL)
2446 		return (spcs_s_ocopyoutf(&kstatus, ustat.status,
2447 		    DSW_ENOTFOUND));
2448 
2449 	ustat.stat = ip->bi_flags;
2450 	ustat.size = ip->bi_size;
2451 	ustat.mtime = ip->bi_mtime;
2452 
2453 	if (ilp32)
2454 		bzero(ustat32.overflow_vol, DSW_NAMELEN);
2455 	else
2456 		bzero(ustat.overflow_vol, DSW_NAMELEN);
2457 	if (ip->bi_overflow) {
2458 		(void) strncpy(ilp32 ? ustat32.overflow_vol :
2459 		    ustat.overflow_vol, ip->bi_overflow->ii_volname,
2460 		    DSW_NAMELEN);
2461 	}
2462 
2463 	ustat.shdsize = ip->bi_shdchks;
2464 	if ((ip->bi_flags) & DSW_TREEMAP) {
2465 		ustat.shdused = ip->bi_shdchkused;
2466 	} else {
2467 		ustat.shdused = 0;
2468 	}
2469 
2470 	/* copy over group and cluster associations */
2471 	group = ilp32? ustat32.group_name : ustat.group_name;
2472 	cluster = ilp32? ustat32.cluster_tag : ustat.cluster_tag;
2473 	bzero(group, DSW_NAMELEN);
2474 	bzero(cluster, DSW_NAMELEN);
2475 	if (ip->bi_group)
2476 		(void) strncpy(group, ip->bi_group, DSW_NAMELEN);
2477 	if (ip->bi_cluster)
2478 		(void) strncpy(cluster, ip->bi_cluster, DSW_NAMELEN);
2479 
2480 	_ii_ioctl_done(ip);
2481 	mutex_exit(&ip->bi_mutex);
2482 
2483 	spcs_s_kfree(kstatus);
2484 	if (ilp32) {
2485 		ustat32.stat = ustat.stat;
2486 		ustat32.size = ustat.size;
2487 		ustat32.shdsize = ustat.shdsize;
2488 		ustat32.shdused = ustat.shdused;
2489 		ustat32.mtime = ustat.mtime;
2490 		if (copyout(&ustat32, (void *)arg, sizeof (ustat32)))
2491 			return (EFAULT);
2492 	} else if (copyout(&ustat, (void *)arg, sizeof (ustat)))
2493 		return (EFAULT);
2494 
2495 	return (0);
2496 }
2497 
2498 
2499 /*
2500  * _ii_list
2501  *	List what shadow sets are currently configured.
2502  *
2503  * Calling/Exit State:
2504  *	Returns 0 on success, otherwise an error code is returned
2505  *	and any additional error information is copied out to the user.
2506  */
2507 /*ARGSUSED*/
2508 int
2509 _ii_list(intptr_t arg, int ilp32, int *rvp)
2510 {
2511 	dsw_list_t ulist;
2512 	dsw_list32_t ulist32;
2513 	_ii_info_t *ip;
2514 	dsw_config_t cf, *cfp;
2515 	dsw_config32_t cf32, *cf32p;
2516 	int rc;
2517 	int used;
2518 	spcs_s_info_t kstatus;
2519 
2520 	if (ilp32) {
2521 		if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0)
2522 			return (EFAULT);
2523 		II_TAIL_COPY(ulist, ulist32, list_size, dsw_list_t);
2524 		ulist.status = (spcs_s_info_t)ulist32.status;
2525 	} else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0)
2526 		return (EFAULT);
2527 
2528 	kstatus = spcs_s_kcreate();
2529 	if (kstatus == NULL)
2530 		return (ENOMEM);
2531 
2532 	cf32p = (dsw_config32_t *)(unsigned long)ulist32.list;
2533 	cfp = ulist.list;
2534 	ulist.list_used = 0;
2535 	mutex_enter(&_ii_info_mutex);
2536 	ip = _ii_info_top;
2537 
2538 	DTRACE_PROBE1(_ii_list_count, int, ulist.list_size);
2539 
2540 	for (rc = used = 0; used < ulist.list_size && ip; ip = ip->bi_next) {
2541 
2542 		if (ip->bi_disabled)
2543 			continue;
2544 
2545 		mutex_enter(&ip->bi_mutex);
2546 		ip->bi_ioctl++;
2547 		if (ilp32) {
2548 			bzero(&cf32, sizeof (cf32));
2549 			cf32.flag = ip->bi_flags;
2550 			(void) strncpy(cf32.master_vol,
2551 			    ii_pathname(ip->bi_mstfd), DSW_NAMELEN);
2552 			(void) strncpy(cf32.shadow_vol,
2553 			    ip->bi_keyname, DSW_NAMELEN);
2554 			(void) strncpy(cf32.bitmap_vol, (ip->bi_bmpfd)
2555 			    ? ii_pathname(ip->bi_bmpfd)
2556 			    : "<offline_bitmap>", DSW_NAMELEN);
2557 			if (copyout(&cf32, (void *)cf32p, sizeof (cf32)))
2558 				rc = EFAULT;
2559 			cf32p++;
2560 		} else {
2561 			bzero(&cf, sizeof (cf));
2562 			cf.flag = ip->bi_flags;
2563 			(void) strncpy(cf.master_vol,
2564 			    ii_pathname(ip->bi_mstfd), DSW_NAMELEN);
2565 			(void) strncpy(cf.shadow_vol,
2566 			    ip->bi_keyname, DSW_NAMELEN);
2567 			(void) strncpy(cf.bitmap_vol, (ip->bi_bmpfd)
2568 			    ? ii_pathname(ip->bi_bmpfd)
2569 			    : "<offline_bitmap>", DSW_NAMELEN);
2570 			if (copyout(&cf, (void *)cfp, sizeof (cf)))
2571 				rc = EFAULT;
2572 			cfp++;
2573 		}
2574 		_ii_ioctl_done(ip);
2575 		mutex_exit(&ip->bi_mutex);
2576 		used++;
2577 	}
2578 	mutex_exit(&_ii_info_mutex);
2579 
2580 	spcs_s_kfree(kstatus);
2581 	if (rc)
2582 		return (rc);
2583 
2584 	ulist.list_used = used;
2585 	if (ilp32) {
2586 		ulist32.list_used = ulist.list_used;
2587 		if (copyout(&ulist32, (void *)arg, sizeof (ulist32)))
2588 			return (EFAULT);
2589 	} else if (copyout(&ulist, (void *)arg, sizeof (ulist)))
2590 		return (EFAULT);
2591 
2592 	return (0);
2593 }
2594 
2595 /*
2596  * _ii_listlen
2597  *	Counts the number of items the DSWIOC_LIST and DSWIOC_OLIST
2598  *	ioctl calls would return.
2599  *
2600  * Calling/Exit State:
2601  *	Returns 0 on success, otherwise an error code is returned.
2602  *	Result is returned as successful ioctl value.
2603  */
2604 /*ARGSUSED*/
2605 int
2606 _ii_listlen(int cmd, int ilp32, int *rvp)
2607 {
2608 	_ii_info_t *ip;
2609 	_ii_overflow_t *op;
2610 	int count = 0;
2611 
2612 	switch (cmd) {
2613 
2614 	case DSWIOC_LISTLEN:
2615 		mutex_enter(&_ii_info_mutex);
2616 		for (ip = _ii_info_top; ip; ip = ip->bi_next) {
2617 			if (ip->bi_disabled == 0) {
2618 				count++;
2619 			}
2620 		}
2621 		mutex_exit(&_ii_info_mutex);
2622 		break;
2623 	case DSWIOC_OLISTLEN:
2624 		mutex_enter(&_ii_overflow_mutex);
2625 		for (op = _ii_overflow_top; op; op = op->ii_next)
2626 			count++;
2627 		mutex_exit(&_ii_overflow_mutex);
2628 		break;
2629 	default:
2630 		return (EINVAL);
2631 	}
2632 	*rvp = count;
2633 
2634 	return (0);
2635 }
2636 
2637 /*
2638  * _ii_report_bmp
2639  *
2640  *	Report to the user daemon that the bitmap has gone bad
2641  */
2642 static int
2643 _ii_report_bmp(_ii_info_t *ip)
2644 {
2645 	int rc;
2646 	struct nskernd *nsk;
2647 
2648 	nsk = kmem_zalloc(sizeof (*nsk), KM_SLEEP);
2649 	if (!nsk) {
2650 		return (ENOMEM);
2651 	}
2652 	nsk->command = NSKERND_IIBITMAP;
2653 	nsk->data1 = (int64_t)(ip->bi_flags | DSW_BMPOFFLINE);
2654 	(void) strncpy(nsk->char1, ip->bi_keyname,
2655 	    min(DSW_NAMELEN, NSC_MAXPATH));
2656 
2657 	rc = nskernd_get(nsk);
2658 	if (rc == 0) {
2659 		rc = (int)nsk->data1;
2660 	}
2661 	if (rc == 0) {
2662 		DTRACE_PROBE(_ii_report_bmp_end);
2663 	} else {
2664 		DTRACE_PROBE1(_ii_report_bmp_end_2, int, rc);
2665 	}
2666 	kmem_free(nsk, sizeof (*nsk));
2667 	return (rc);
2668 }
2669 
2670 /*
2671  * _ii_offline
2672  *	Set volume offline flag(s) for a shadow.
2673  *
2674  * Calling/Exit State:
2675  *	Returns 0 on success, otherwise an error code is returned
2676  *	and any additional error information is copied out to the user.
2677  */
2678 /*ARGSUSED*/
2679 int
2680 _ii_offline(intptr_t arg, int ilp32, int *rvp)
2681 {
2682 	dsw_ioctl_t uparms;
2683 	dsw_ioctl32_t uparms32;
2684 	_ii_info_t *ip;
2685 	int rc;
2686 	spcs_s_info_t kstatus;
2687 
2688 	if (ilp32) {
2689 		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2690 			return (EFAULT);
2691 		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2692 		uparms.status = (spcs_s_info_t)uparms32.status;
2693 	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2694 		return (EFAULT);
2695 
2696 	kstatus = spcs_s_kcreate();
2697 	if (kstatus == NULL)
2698 		return (ENOMEM);
2699 
2700 	if (!uparms.shadow_vol[0])
2701 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2702 
2703 	mutex_enter(&_ii_info_mutex);
2704 	ip = _ii_find_set(uparms.shadow_vol);
2705 	mutex_exit(&_ii_info_mutex);
2706 	if (ip == NULL)
2707 		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2708 		    DSW_ENOTFOUND));
2709 
2710 	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
2711 		_ii_ioctl_done(ip);
2712 		mutex_exit(&ip->bi_mutex);
2713 		spcs_s_add(kstatus, rc);
2714 		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2715 		    DSW_ERSRVFAIL));
2716 	}
2717 
2718 	mutex_exit(&ip->bi_mutex);
2719 	_ii_error(ip, uparms.flags & DSW_OFFLINE);
2720 	mutex_enter(&ip->bi_mutex);
2721 	_ii_ioctl_done(ip);
2722 	mutex_exit(&ip->bi_mutex);
2723 
2724 	_ii_rlse_devs(ip, BMP);
2725 
2726 	spcs_s_kfree(kstatus);
2727 
2728 	return (0);
2729 }
2730 
2731 
2732 /*
2733  * _ii_wait
2734  *	Wait for a copy to complete.
2735  *
2736  * Calling/Exit State:
2737  *	Returns 0 if the copy completed, otherwise error code.
2738  *
2739  */
2740 /*ARGSUSED*/
2741 int
2742 _ii_wait(intptr_t arg, int ilp32, int *rvp)
2743 {
2744 	dsw_ioctl_t uparms;
2745 	dsw_ioctl32_t uparms32;
2746 	_ii_info_t *ip;
2747 	int rc = 0;
2748 	spcs_s_info_t kstatus;
2749 
2750 	if (ilp32) {
2751 		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2752 			return (EFAULT);
2753 		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2754 		uparms.status = (spcs_s_info_t)uparms32.status;
2755 		uparms.pid = uparms32.pid;
2756 	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2757 		return (EFAULT);
2758 
2759 	kstatus = spcs_s_kcreate();
2760 	if (kstatus == NULL)
2761 		return (ENOMEM);
2762 
2763 	if (!uparms.shadow_vol[0])
2764 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2765 
2766 	mutex_enter(&_ii_info_mutex);
2767 	ip = _ii_find_set(uparms.shadow_vol);
2768 	mutex_exit(&_ii_info_mutex);
2769 	if (ip == NULL)
2770 		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2771 		    DSW_ENOTFOUND));
2772 
2773 	while (ip->bi_flags & DSW_COPYINGP) {
2774 		if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) {
2775 			/* Awoken by a signal */
2776 			rc = EINTR;
2777 			break;
2778 		}
2779 	}
2780 
2781 	/* Is this an attempt to unlock the copy/update PID? */
2782 	if (uparms.flags & CV_LOCK_PID) {
2783 		if (ip->bi_locked_pid == 0) {
2784 			rc = DSW_ENOTLOCKED;
2785 		} else if (uparms.pid == -1) {
2786 			cmn_err(CE_WARN, "!ii: Copy/Update PID %d, cleared",
2787 			    ip->bi_locked_pid);
2788 			ip->bi_locked_pid = 0;
2789 		} else if (uparms.pid != ip->bi_locked_pid) {
2790 			rc = DSW_EINUSE;
2791 		} else {
2792 			ip->bi_locked_pid = 0;
2793 		}
2794 	}
2795 
2796 	_ii_ioctl_done(ip);
2797 	mutex_exit(&ip->bi_mutex);
2798 
2799 	return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
2800 }
2801 
2802 
2803 static int
2804 _ii_reset_mstvol(_ii_info_t *ip)
2805 {
2806 	_ii_info_t *xip;
2807 
2808 	if (!NSHADOWS(ip))
2809 		return (DSW_COPYINGS | DSW_COPYINGP);
2810 
2811 	/* check for siblings updating master */
2812 	for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
2813 		if (xip == ip)
2814 			continue;
2815 		/* check if master is okay */
2816 		if ((xip->bi_flags & DSW_MSTOFFLINE) == 0) {
2817 			return (0);
2818 		}
2819 	}
2820 
2821 	return (DSW_COPYINGS | DSW_COPYINGP);
2822 }
2823 
2824 /*
2825  * _ii_reset
2826  *	Reset offlined underlying volumes
2827  *
2828  * Calling/Exit State:
2829  *	Returns 0 on success, otherwise an error code is returned
2830  *	and any additional error information is copied out to the user.
2831  */
2832 /*ARGSUSED*/
2833 int
2834 _ii_reset(intptr_t arg, int ilp32, int *rvp)
2835 {
2836 	dsw_ioctl_t uparms;
2837 	dsw_ioctl32_t uparms32;
2838 	_ii_info_t *ip;
2839 	nsc_buf_t *tmp = NULL;
2840 	int rc;
2841 	int flags;
2842 	ii_header_t *bm_header;
2843 	spcs_s_info_t kstatus;
2844 	int rtype;
2845 
2846 	if (ilp32) {
2847 		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2848 			return (EFAULT);
2849 		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2850 		uparms.status = (spcs_s_info_t)uparms32.status;
2851 	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2852 		return (EFAULT);
2853 
2854 	kstatus = spcs_s_kcreate();
2855 	if (kstatus == NULL)
2856 		return (ENOMEM);
2857 
2858 	if (!uparms.shadow_vol[0])
2859 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2860 
2861 	mutex_enter(&_ii_info_mutex);
2862 	ip = _ii_find_set(uparms.shadow_vol);
2863 	mutex_exit(&_ii_info_mutex);
2864 	if (ip == NULL)
2865 		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2866 		    DSW_ENOTFOUND));
2867 
2868 	mutex_exit(&ip->bi_mutex);
2869 
2870 	/* Figure out what to do according to what was flagged as  */
2871 
2872 	if ((ip->bi_flags & DSW_OFFLINE) == 0) {
2873 		/* Nothing offline, so no op */
2874 		mutex_enter(&ip->bi_mutex);
2875 		_ii_ioctl_done(ip);
2876 		mutex_exit(&ip->bi_mutex);
2877 		spcs_s_kfree(kstatus);
2878 		return (0);
2879 	}
2880 
2881 	if (!ip->bi_bmpfd) {
2882 		/* No bitmap fd, can't do anything */
2883 		mutex_enter(&ip->bi_mutex);
2884 		_ii_ioctl_done(ip);
2885 		mutex_exit(&ip->bi_mutex);
2886 		spcs_s_kfree(kstatus);
2887 		return (DSW_EHDRBMP);
2888 	}
2889 
2890 	rtype = MSTR|SHDR|BMP;
2891 	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
2892 		mutex_enter(&ip->bi_mutex);
2893 		_ii_ioctl_done(ip);
2894 		mutex_exit(&ip->bi_mutex);
2895 		spcs_s_add(kstatus, rc);
2896 		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2897 		    DSW_ERSRVFAIL));
2898 	}
2899 
2900 	/*
2901 	 * Cannot use _ii_bm_header_get as it will fail if DSW_BMPOFFLINE
2902 	 */
2903 	II_READ_START(ip, bitmap);
2904 	rc = nsc_alloc_buf(ip->bi_bmpfd, 0, FBA_LEN(sizeof (ii_header_t)),
2905 	    NSC_RDWRBUF, &tmp);
2906 	II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t)));
2907 	if (!II_SUCCESS(rc)) {
2908 		_ii_rlse_devs(ip, rtype);
2909 		mutex_enter(&ip->bi_mutex);
2910 		_ii_ioctl_done(ip);
2911 		mutex_exit(&ip->bi_mutex);
2912 		if (tmp)
2913 			(void) nsc_free_buf(tmp);
2914 		_ii_error(ip, DSW_BMPOFFLINE);
2915 		spcs_s_add(kstatus, rc);
2916 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
2917 	}
2918 
2919 	bm_header = (ii_header_t *)(tmp)->sb_vec[0].sv_addr;
2920 	if (bm_header == NULL) {
2921 		_ii_rlse_devs(ip, rtype);
2922 		mutex_enter(&ip->bi_mutex);
2923 		_ii_ioctl_done(ip);
2924 		mutex_exit(&ip->bi_mutex);
2925 		if (tmp)
2926 			(void) nsc_free_buf(tmp);
2927 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
2928 	}
2929 
2930 	flags = ip->bi_flags & ~DSW_COPY_FLAGS;
2931 	if ((flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) == 0) {
2932 		if (((flags & DSW_SHDOFFLINE) == 0) &&
2933 		    ((flags & DSW_MSTOFFLINE) == DSW_MSTOFFLINE)) {
2934 			/* Shadow was OK but master was offline */
2935 			flags |= _ii_reset_mstvol(ip);
2936 		} else if ((flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) {
2937 			/* Shadow was offline, don't care what the master was */
2938 			flags |= (DSW_COPYINGM | DSW_COPYINGP);
2939 		}
2940 	}
2941 	if (ip->bi_flags & DSW_VOVERFLOW) {
2942 		ip->bi_flags &= ~DSW_VOVERFLOW;
2943 		ip->bi_flags |= DSW_FRECLAIM;
2944 	}
2945 	flags &= ~(DSW_OFFLINE | DSW_CFGOFFLINE | DSW_VOVERFLOW | DSW_OVERFLOW);
2946 	if ((ip->bi_flags & DSW_BMPOFFLINE) == DSW_BMPOFFLINE) {
2947 		/* free any overflow allocation */
2948 		ii_overflow_free(ip, INIT_OVR);
2949 		/* Bitmap now OK, so set up new bitmap header */
2950 		(void) strncpy(bm_header->master_vol, ii_pathname(ip->bi_mstfd),
2951 		    DSW_NAMELEN);
2952 		(void) strncpy(bm_header->shadow_vol, ii_pathname(ip->bi_shdfd),
2953 		    DSW_NAMELEN);
2954 		(void) strncpy(bm_header->bitmap_vol, ii_pathname(ip->bi_bmpfd),
2955 		    DSW_NAMELEN);
2956 		if (ip->bi_cluster) {
2957 			(void) strncpy(bm_header->clstr_name, ip->bi_cluster,
2958 			    DSW_NAMELEN);
2959 		}
2960 		if (ip->bi_group) {
2961 			(void) strncpy(bm_header->group_name, ip->bi_group,
2962 			    DSW_NAMELEN);
2963 		}
2964 		bm_header->ii_type = (flags & DSW_GOLDEN) ?
2965 		    DSW_GOLDEN_TYPE : DSW_QUICK_TYPE;
2966 		bm_header->ii_magic = DSW_DIRTY;
2967 		bm_header->ii_version = II_HEADER_VERSION;
2968 		bm_header->ii_shdfba = DSW_SHD_BM_OFFSET;
2969 		bm_header->ii_copyfba = DSW_COPY_BM_OFFSET;
2970 		bm_header->ii_throttle_delay = ip->bi_throttle_delay;
2971 		bm_header->ii_throttle_unit = ip->bi_throttle_unit;
2972 		ip->bi_shdfba = bm_header->ii_shdfba;
2973 		ip->bi_copyfba = bm_header->ii_copyfba;
2974 	} else if ((ip->bi_flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) {
2975 		/* bitmap didn't go offline, but shadow did */
2976 		if (ip->bi_overflow) {
2977 			ii_overflow_free(ip, RECLAIM);
2978 		}
2979 	}
2980 	_ii_lock_chunk(ip, II_NULLCHUNK);
2981 	mutex_enter(&ip->bi_mutex);
2982 	II_FLAG_ASSIGN(flags, ip);
2983 
2984 	mutex_exit(&ip->bi_mutex);
2985 	rc = ii_fill_copy_bmp(ip);
2986 	if (rc == 0)
2987 		rc = II_ZEROBM(ip);
2988 	if (rc == 0) {
2989 		if ((ip->bi_flags&(DSW_GOLDEN)) == 0) {
2990 			/* just clear bitmaps for dependent copy */
2991 			if (ip->bi_flags & DSW_TREEMAP) {
2992 				bm_header->ii_state = ip->bi_flags;
2993 				mutex_enter(&ip->bi_mutex);
2994 				rc = _ii_bm_header_put(bm_header, ip, tmp);
2995 				mutex_exit(&ip->bi_mutex);
2996 				tmp = NULL;
2997 				if (rc == 0) {
2998 					rc = ii_tinit(ip);
2999 					if (rc == 0) {
3000 						mutex_enter(&ip->bi_mutex);
3001 						bm_header =
3002 						    _ii_bm_header_get(ip, &tmp);
3003 						mutex_exit(&ip->bi_mutex);
3004 					}
3005 				}
3006 			}
3007 
3008 			if (rc == 0)
3009 				II_FLAG_CLRX(DSW_COPY_FLAGS, ip);
3010 			/*
3011 			 * if copy flags were set, another process may be
3012 			 * waiting
3013 			 */
3014 			if (rc == 0 && (flags & DSW_COPYINGP))
3015 				cv_broadcast(&ip->bi_copydonecv);
3016 
3017 			if (rc == 0)
3018 				rc = II_COPYBM(ip);
3019 		}
3020 	}
3021 	_ii_unlock_chunk(ip, II_NULLCHUNK);
3022 	if (rc) {
3023 		if (tmp)
3024 			_ii_bm_header_free(bm_header, ip, tmp);
3025 		mutex_enter(&ip->bi_mutex);
3026 		_ii_ioctl_done(ip);
3027 		mutex_exit(&ip->bi_mutex);
3028 		_ii_rlse_devs(ip, rtype);
3029 		spcs_s_add(kstatus, rc);
3030 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
3031 	}
3032 	bm_header->ii_state = ip->bi_flags;
3033 	mutex_enter(&ip->bi_mutex);
3034 	rc = _ii_bm_header_put(bm_header, ip, tmp);
3035 	if (!II_SUCCESS(rc)) {
3036 		_ii_ioctl_done(ip);
3037 		mutex_exit(&ip->bi_mutex);
3038 		_ii_rlse_devs(ip, rtype);
3039 		spcs_s_add(kstatus, rc);
3040 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
3041 	}
3042 
3043 	/* check with RDC */
3044 	if (ii_update_denied(ip, kstatus, (ip->bi_flags & DSW_COPYINGS) ?
3045 	    CV_SHD2MST : 0, 1)) {
3046 		_ii_ioctl_done(ip);
3047 		mutex_exit(&ip->bi_mutex);
3048 		_ii_rlse_devs(ip, rtype);
3049 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3050 	}
3051 
3052 	/* don't perform copy for dependent shadows */
3053 	if ((ip->bi_flags&(DSW_GOLDEN)) == 0) {
3054 		_ii_ioctl_done(ip);
3055 		mutex_exit(&ip->bi_mutex);
3056 		_ii_rlse_devs(ip, rtype);
3057 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3058 	}
3059 
3060 	mutex_exit(&ip->bi_mutex);
3061 	/* _ii_copyvol calls _ii_ioctl_done() */
3062 	if (ip->bi_flags & DSW_COPYINGS)
3063 		rc = _ii_copyvol(ip, CV_SHD2MST, rtype, kstatus, 1);
3064 	else if (ip->bi_flags & DSW_COPYINGM)
3065 		rc = _ii_copyvol(ip, 0, rtype, kstatus, 1);
3066 	else {
3067 		mutex_enter(&ip->bi_mutex);
3068 		_ii_ioctl_done(ip);
3069 		mutex_exit(&ip->bi_mutex);
3070 	}
3071 
3072 	_ii_rlse_devs(ip, rtype);
3073 
3074 	return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3075 }
3076 
3077 
3078 /*
3079  * _ii_version
3080  *	Get version of the InstantImage module.
3081  *
3082  * Calling/Exit State:
3083  *	Returns 0 on success, otherwise EFAULT is returned.
3084  *	The major and minor revisions are copied out to the user if
3085  *	successful.
3086  */
3087 /*ARGSUSED*/
3088 int
3089 _ii_version(intptr_t arg, int ilp32, int *rvp)
3090 {
3091 	dsw_version_t uversion;
3092 	dsw_version32_t uversion32;
3093 
3094 	if (ilp32) {
3095 		if (copyin((void *)arg, &uversion32, sizeof (uversion32)) < 0)
3096 			return (EFAULT);
3097 
3098 		uversion32.major = dsw_major_rev;
3099 		uversion32.minor = dsw_minor_rev;
3100 		uversion32.micro = dsw_micro_rev;
3101 		uversion32.baseline = dsw_baseline_rev;
3102 
3103 		if (copyout(&uversion32, (void *)arg, sizeof (uversion32)))
3104 			return (EFAULT);
3105 	} else {
3106 		if (copyin((void *)arg, &uversion, sizeof (uversion)) < 0)
3107 			return (EFAULT);
3108 
3109 		uversion.major = dsw_major_rev;
3110 		uversion.minor = dsw_minor_rev;
3111 		uversion.micro = dsw_micro_rev;
3112 		uversion.baseline = dsw_baseline_rev;
3113 
3114 		if (copyout(&uversion, (void *)arg, sizeof (uversion)))
3115 			return (EFAULT);
3116 	}
3117 
3118 	return (0);
3119 }
3120 
3121 /*
3122  * _ii_copyparm
3123  *	Get and set copy parameters.
3124  *
3125  * Calling/Exit State:
3126  *	Returns 0 on success, otherwise EFAULT is returned.
3127  *	The previous values are returned to the user.
3128  */
3129 /*ARGSUSED*/
3130 int
3131 _ii_copyparm(intptr_t arg, int ilp32, int *rvp)
3132 {
3133 	dsw_copyp_t copyp;
3134 	dsw_copyp32_t copyp32;
3135 	spcs_s_info_t kstatus;
3136 	_ii_info_t *ip;
3137 	int rc = 0;
3138 	int tmp;
3139 
3140 	if (ilp32) {
3141 		if (copyin((void *)arg, &copyp32, sizeof (copyp32)) < 0)
3142 			return (EFAULT);
3143 		II_TAIL_COPY(copyp, copyp32, shadow_vol, dsw_copyp_t);
3144 		copyp.status = (spcs_s_info_t)copyp32.status;
3145 	} else if (copyin((void *)arg, &copyp, sizeof (copyp)) < 0)
3146 			return (EFAULT);
3147 
3148 	kstatus = spcs_s_kcreate();
3149 	if (kstatus == NULL)
3150 		return (ENOMEM);
3151 
3152 	if (!copyp.shadow_vol[0])
3153 		return (spcs_s_ocopyoutf(&kstatus, copyp.status, DSW_EEMPTY));
3154 
3155 	mutex_enter(&_ii_info_mutex);
3156 	ip = _ii_find_set(copyp.shadow_vol);
3157 	mutex_exit(&_ii_info_mutex);
3158 	if (ip == NULL)
3159 		return (spcs_s_ocopyoutf(&kstatus, copyp.status,
3160 		    DSW_ENOTFOUND));
3161 
3162 	tmp = ip->bi_throttle_delay;
3163 	if (copyp.copy_delay != -1) {
3164 		if (copyp.copy_delay >= MIN_THROTTLE_DELAY &&
3165 		    copyp.copy_delay <= MAX_THROTTLE_DELAY)
3166 			ip->bi_throttle_delay = copyp.copy_delay;
3167 		else {
3168 			cmn_err(CE_WARN, "!ii: delay out of range %d",
3169 			    copyp.copy_delay);
3170 			rc = EINVAL;
3171 		}
3172 	}
3173 	copyp.copy_delay = tmp;
3174 
3175 	tmp = ip->bi_throttle_unit;
3176 	if (copyp.copy_unit != -1) {
3177 		if (copyp.copy_unit >= MIN_THROTTLE_UNIT &&
3178 		    copyp.copy_unit <= MAX_THROTTLE_UNIT) {
3179 			if (rc != EINVAL)
3180 				ip->bi_throttle_unit = copyp.copy_unit;
3181 		} else {
3182 			cmn_err(CE_WARN, "!ii: unit out of range %d",
3183 			    copyp.copy_unit);
3184 			if (rc != EINVAL) {
3185 				rc = EINVAL;
3186 				ip->bi_throttle_delay = copyp.copy_delay;
3187 			}
3188 		}
3189 	}
3190 	copyp.copy_unit = tmp;
3191 
3192 	_ii_ioctl_done(ip);
3193 	mutex_exit(&ip->bi_mutex);
3194 
3195 	if (ilp32) {
3196 		copyp32.copy_delay = copyp.copy_delay;
3197 		copyp32.copy_unit = copyp.copy_unit;
3198 		if (copyout(&copyp32, (void *)arg, sizeof (copyp32)) < 0)
3199 			return (EFAULT);
3200 	} else if (copyout(&copyp, (void *)arg, sizeof (copyp)))
3201 			return (EFAULT);
3202 
3203 	return (spcs_s_ocopyoutf(&kstatus, copyp.status, rc));
3204 }
3205 
3206 
3207 /*
3208  * _ii_suspend_vol
3209  *	suspend an individual InstantImage group
3210  *
3211  * Calling/Exit State:
3212  *	Returns 0 on success, nonzero otherwise
3213  */
3214 
3215 int
3216 _ii_suspend_vol(_ii_info_t *ip)
3217 {
3218 	_ii_info_t **xip;
3219 	int copy_flag;
3220 	int rc;
3221 	nsc_buf_t *tmp = NULL;
3222 	ii_header_t *bm_header;
3223 
3224 	copy_flag = ip->bi_flags & DSW_COPY_FLAGS;
3225 
3226 	_ii_stopvol(ip);
3227 	ASSERT(total_ref(ip) == 0);
3228 
3229 	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0)
3230 		return (rc);
3231 
3232 	/* this rw_enter forces us to drain all active IO */
3233 	rw_enter(&ip->bi_linkrw, RW_WRITER);
3234 	rw_exit(&ip->bi_linkrw);
3235 
3236 	mutex_enter(&_ii_info_mutex);
3237 	for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
3238 		if (ip == *xip)
3239 			break;
3240 	}
3241 	*xip = ip->bi_next;
3242 	mutex_exit(&_ii_info_mutex);
3243 
3244 	rc = II_SAVE_BMP(ip, 1);
3245 	mutex_enter(&ip->bi_mutex);
3246 	if (rc == 0)
3247 		bm_header = _ii_bm_header_get(ip, &tmp);
3248 	if (rc == 0 && bm_header) {
3249 		bm_header->ii_magic = DSW_CLEAN;
3250 		bm_header->ii_state |= copy_flag;
3251 		bm_header->ii_throttle_delay = ip->bi_throttle_delay;
3252 		bm_header->ii_throttle_unit = ip->bi_throttle_unit;
3253 		/* copy over the mtime */
3254 		bm_header->ii_mtime = ip->bi_mtime;
3255 		/* write it to disk */
3256 		rc = _ii_bm_header_put(bm_header, ip, tmp);
3257 	}
3258 	--iigkstat.num_sets.value.ul;
3259 	mutex_exit(&ip->bi_mutex);
3260 
3261 	ii_overflow_free(ip, NO_RECLAIM);
3262 	_ii_rlse_devs(ip, BMP);
3263 
3264 	ii_sibling_free(ip);
3265 
3266 	return (rc);
3267 }
3268 
3269 /*
3270  * _ii_suspend_cluster
3271  *	Cluster resource group is switching over to another node, so
3272  *	all shadowed volumes in that group are suspended.
3273  *
3274  * Returns 0 on success, or ESRCH if the name of the cluster resource
3275  * group couldn't be found.
3276  */
3277 int
3278 _ii_suspend_cluster(char *shadow_vol)
3279 {
3280 	int found, last;
3281 	uint64_t hash;
3282 	_ii_info_t *ip;
3283 	_ii_lsthead_t **cp, *xcp;
3284 	_ii_lstinfo_t **np, *xnp;
3285 
3286 	/* find appropriate cluster list */
3287 	mutex_enter(&_ii_cluster_mutex);
3288 	hash = nsc_strhash(shadow_vol);
3289 	for (cp = &_ii_cluster_top; *cp; cp = &((*cp)->lst_next)) {
3290 		if ((hash == (*cp)->lst_hash) && strncmp(shadow_vol,
3291 		    (*cp)->lst_name, DSW_NAMELEN) == 0)
3292 			break;
3293 	}
3294 
3295 	if (!*cp) {
3296 		mutex_exit(&_ii_cluster_mutex);
3297 		return (DSW_ECNOTFOUND);
3298 	}
3299 
3300 	found = 1;
3301 	last = 0;
3302 	while (found && !last) {
3303 		found = 0;
3304 
3305 		mutex_enter(&_ii_info_mutex);
3306 		for (np = &(*cp)->lst_start; *np; np = &((*np)->lst_next)) {
3307 			ip = (*np)->lst_ip;
3308 
3309 			if (ip->bi_disabled)
3310 				continue;
3311 
3312 			found++;
3313 
3314 			ip->bi_disabled = 1;
3315 			if (NSHADOWS(ip) && (ip->bi_master == ip)) {
3316 				ip->bi_flags &= (~DSW_COPYING);
3317 				ip->bi_state |= DSW_MULTIMST;
3318 			}
3319 			mutex_exit(&_ii_info_mutex);
3320 
3321 			xnp = *np;
3322 			*np = (*np)->lst_next;
3323 			kmem_free(xnp, sizeof (_ii_lstinfo_t));
3324 			ip->bi_cluster = NULL;
3325 
3326 			(void) _ii_suspend_vol(ip);
3327 			break;
3328 		}
3329 		if (found == 0)
3330 			mutex_exit(&_ii_info_mutex);
3331 		else if (!(*cp)->lst_start) {
3332 			xcp = *cp;
3333 			*cp = (*cp)->lst_next;
3334 			kmem_free(xcp, sizeof (_ii_lsthead_t));
3335 			last = 1;
3336 		}
3337 	}
3338 	mutex_exit(&_ii_cluster_mutex);
3339 
3340 	return (0);
3341 }
3342 
3343 /*
3344  * _ii_shutdown
3345  *	System is shutting down, so all shadowed volumes are suspended.
3346  *
3347  *	This always succeeds, so always returns 0.
3348  */
3349 
3350 /* ARGSUSED */
3351 
3352 int
3353 _ii_shutdown(intptr_t arg, int *rvp)
3354 {
3355 	_ii_info_t **xip, *ip;
3356 	int found;
3357 
3358 	*rvp = 0;
3359 
3360 	_ii_shutting_down = 1;
3361 
3362 	/* Go through the list until only disabled entries are found */
3363 
3364 	found = 1;
3365 	while (found) {
3366 		found = 0;
3367 
3368 		mutex_enter(&_ii_info_mutex);
3369 		for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
3370 			ip = *xip;
3371 			if (ip->bi_disabled) {
3372 				/* Also covers not fully configured yet */
3373 				continue;
3374 			}
3375 			found++;
3376 
3377 			ip->bi_disabled = 1;
3378 			mutex_exit(&_ii_info_mutex);
3379 
3380 			(void) _ii_suspend_vol(ip);
3381 
3382 			break;
3383 		}
3384 		if (found == 0)
3385 			mutex_exit(&_ii_info_mutex);
3386 	}
3387 
3388 	_ii_shutting_down = 0;
3389 
3390 	return (0);
3391 }
3392 
3393 /*
3394  * _ii_suspend
3395  *	Suspend an InstantImage, saving its state to allow a subsequent resume.
3396  *
3397  * Calling/Exit State:
3398  *	Returns 0 if the pair was suspended. Otherwise an error code
3399  *	is returned and any additional error information is copied
3400  *	out to the user.
3401  */
3402 
3403 /* ARGSUSED */
3404 
3405 int
3406 _ii_suspend(intptr_t arg, int ilp32, int *rvp)
3407 {
3408 	dsw_ioctl_t uparms;
3409 	dsw_ioctl32_t uparms32;
3410 	_ii_info_t *ip;
3411 	int rc;
3412 	spcs_s_info_t kstatus;
3413 
3414 	*rvp = 0;
3415 
3416 	if (ilp32) {
3417 		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
3418 			return (EFAULT);
3419 		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
3420 		uparms.status = (spcs_s_info_t)uparms32.status;
3421 	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
3422 		return (EFAULT);
3423 
3424 	kstatus = spcs_s_kcreate();
3425 	if (kstatus == NULL)
3426 		return (ENOMEM);
3427 
3428 	if (!uparms.shadow_vol[0])
3429 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
3430 
3431 	if ((uparms.flags & CV_IS_CLUSTER) != 0) {
3432 		rc = _ii_suspend_cluster(uparms.shadow_vol);
3433 	} else {
3434 		mutex_enter(&_ii_info_mutex);
3435 		ip = _ii_find_set(uparms.shadow_vol);
3436 		if (ip == NULL) {
3437 			mutex_exit(&_ii_info_mutex);
3438 			return (spcs_s_ocopyoutf(&kstatus, uparms.status,
3439 			    DSW_ENOTFOUND));
3440 		}
3441 
3442 		ip->bi_disabled = 1;
3443 		if (NSHADOWS(ip) && (ip->bi_master == ip)) {
3444 			ip->bi_flags &= (~DSW_COPYING);
3445 			ip->bi_state |= DSW_MULTIMST;
3446 		}
3447 		mutex_exit(&_ii_info_mutex);
3448 
3449 		_ii_ioctl_done(ip);
3450 		mutex_exit(&ip->bi_mutex);
3451 
3452 		rc = _ii_suspend_vol(ip);
3453 	}
3454 
3455 	return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3456 }
3457 
3458 
3459 /*
3460  * _ii_abort
3461  *	Stop any copying process for shadow.
3462  *
3463  * Calling/Exit State:
3464  *	Returns 0 if the abort succeeded. Otherwise an error code
3465  *	is returned and any additional error information is copied
3466  *	out to the user.
3467  */
3468 
3469 /* ARGSUSED */
3470 
3471 int
3472 _ii_abort(intptr_t arg, int ilp32, int *rvp)
3473 {
3474 	dsw_ioctl_t uabort;
3475 	dsw_ioctl32_t uabort32;
3476 	_ii_info_t *ip;
3477 	int rc;
3478 	spcs_s_info_t kstatus;
3479 
3480 	if (ilp32) {
3481 		if (copyin((void *)arg, &uabort32, sizeof (uabort32)) < 0)
3482 			return (EFAULT);
3483 		II_TAIL_COPY(uabort, uabort32, shadow_vol, dsw_ioctl_t);
3484 		uabort.status = (spcs_s_info_t)uabort32.status;
3485 	} else if (copyin((void *)arg, &uabort, sizeof (uabort)) < 0)
3486 		return (EFAULT);
3487 
3488 	kstatus = spcs_s_kcreate();
3489 	if (kstatus == NULL)
3490 		return (ENOMEM);
3491 
3492 	if (!uabort.shadow_vol[0])
3493 		return (spcs_s_ocopyoutf(&kstatus, uabort.status, DSW_EEMPTY));
3494 
3495 	mutex_enter(&_ii_info_mutex);
3496 	ip = _ii_find_set(uabort.shadow_vol);
3497 	mutex_exit(&_ii_info_mutex);
3498 	if (ip == NULL)
3499 		return (spcs_s_ocopyoutf(&kstatus, uabort.status,
3500 		    DSW_ENOTFOUND));
3501 
3502 	mutex_exit(&ip->bi_mutex);
3503 
3504 	rc = _ii_stopcopy(ip);
3505 
3506 	mutex_enter(&ip->bi_mutex);
3507 	_ii_ioctl_done(ip);
3508 	mutex_exit(&ip->bi_mutex);
3509 
3510 	return (spcs_s_ocopyoutf(&kstatus, uabort.status, rc));
3511 }
3512 
3513 
3514 /*
3515  * _ii_segment
3516  *	Copy out II pair bitmaps (cpy, shd, idx) in segments
3517  *
3518  * Calling/Exit State:
3519  *	Returns 0 if the operation succeeded. Otherwise an error code
3520  *	is returned and any additional error information is copied
3521  *	out to the user.
3522  *
3523  */
3524 int
3525 _ii_segment(intptr_t arg, int ilp32, int *rvp)
3526 {
3527 	dsw_segment_t usegment;
3528 	dsw_segment32_t usegment32;
3529 	_ii_info_t *ip;
3530 	int rc, size;
3531 	spcs_s_info_t kstatus;
3532 	int32_t bi_idxfba;
3533 
3534 	*rvp = 0;
3535 
3536 	if (ilp32) {
3537 		if (copyin((void *)arg, &usegment32, sizeof (usegment32)))
3538 			return (EFAULT);
3539 		usegment.status = (spcs_s_info_t)usegment32.status;
3540 		bcopy(usegment32.shadow_vol, usegment.shadow_vol, DSW_NAMELEN);
3541 		usegment.seg_number = (unsigned)usegment32.seg_number;
3542 		usegment.shd_bitmap =
3543 		    (unsigned char   *)(unsigned long)usegment32.shd_bitmap;
3544 		usegment.shd_size = usegment32.shd_size;
3545 		usegment.cpy_bitmap =
3546 		    (unsigned char   *)(unsigned long)usegment32.cpy_bitmap;
3547 		usegment.cpy_size = usegment32.cpy_size;
3548 		usegment.idx_bitmap =
3549 		    (unsigned char   *)(unsigned long)usegment32.idx_bitmap;
3550 		usegment.idx_size = usegment32.idx_size;
3551 	} else if (copyin((void *)arg, &usegment, sizeof (usegment)))
3552 		return (EFAULT);
3553 
3554 	kstatus = spcs_s_kcreate();
3555 	if (kstatus == NULL)
3556 		return (ENOMEM);
3557 
3558 	if (usegment.shadow_vol[0]) {
3559 		mutex_enter(&_ii_info_mutex);
3560 		ip = _ii_find_set(usegment.shadow_vol);
3561 		mutex_exit(&_ii_info_mutex);
3562 		if (ip == NULL)
3563 			return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3564 			    DSW_ENOTFOUND));
3565 	} else
3566 		return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3567 		    DSW_EEMPTY));
3568 
3569 	mutex_exit(&ip->bi_mutex);
3570 
3571 	size = ((((ip->bi_size + (DSW_SIZE-1))
3572 	    / DSW_SIZE) + (DSW_BITS-1))) / DSW_BITS;
3573 	bi_idxfba = ip->bi_copyfba + (ip->bi_copyfba - ip->bi_shdfba);
3574 	if (((nsc_size_t)usegment.seg_number > DSW_BM_FBA_LEN(ip->bi_size)) ||
3575 	    (usegment.shd_size > size) ||
3576 	    (usegment.cpy_size > size) ||
3577 	    (!(ip->bi_flags & DSW_GOLDEN) && (usegment.idx_size > size*32))) {
3578 		_ii_ioctl_done(ip);
3579 		return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3580 		    DSW_EMISMATCH));
3581 	}
3582 
3583 	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
3584 		mutex_enter(&ip->bi_mutex);
3585 		_ii_ioctl_done(ip);
3586 		mutex_exit(&ip->bi_mutex);
3587 		spcs_s_add(kstatus, rc);
3588 		return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3589 		    DSW_ERSRVFAIL));
3590 	}
3591 
3592 	if (usegment.shd_bitmap && usegment.shd_size > 0)
3593 		rc = II_CO_BMP(ip, ip->bi_shdfba+usegment.seg_number,
3594 		    usegment.shd_bitmap, usegment.shd_size);
3595 	if (rc == 0 && usegment.cpy_bitmap && usegment.cpy_size > 0)
3596 		rc = II_CO_BMP(ip, ip->bi_copyfba+usegment.seg_number,
3597 		    usegment.cpy_bitmap, usegment.cpy_size);
3598 	if (!(ip->bi_flags & DSW_GOLDEN)) {
3599 		if (rc == 0 && usegment.idx_bitmap && usegment.idx_size > 0)
3600 			rc = II_CO_BMP(ip, bi_idxfba+usegment.seg_number*32,
3601 			    usegment.idx_bitmap, usegment.idx_size);
3602 	}
3603 
3604 	_ii_rlse_devs(ip, BMP);
3605 	mutex_enter(&ip->bi_mutex);
3606 	_ii_ioctl_done(ip);
3607 	mutex_exit(&ip->bi_mutex);
3608 	if (rc) {
3609 		spcs_s_add(kstatus, rc);
3610 		return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_EIO));
3611 	}
3612 
3613 	spcs_s_kfree(kstatus);
3614 	return (0);
3615 }
3616 
3617 
3618 /*
3619  * _ii_bitmap
3620  *	Copy out II pair bitmaps to user program
3621  *
3622  * Calling/Exit State:
3623  *	Returns 0 if the operation succeeded. Otherwise an error code
3624  *	is returned and any additional error information is copied
3625  *	out to the user.
3626  */
3627 
3628 int
3629 _ii_bitmap(intptr_t arg, int ilp32, int *rvp)
3630 {
3631 	dsw_bitmap_t ubitmap;
3632 	dsw_bitmap32_t ubitmap32;
3633 	_ii_info_t *ip;
3634 	int rc;
3635 	spcs_s_info_t kstatus;
3636 
3637 	*rvp = 0;
3638 
3639 	if (ilp32) {
3640 		if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)))
3641 			return (EFAULT);
3642 		ubitmap.status = (spcs_s_info_t)ubitmap32.status;
3643 		bcopy(ubitmap32.shadow_vol, ubitmap.shadow_vol, DSW_NAMELEN);
3644 		ubitmap.shd_bitmap =
3645 		    (unsigned char   *)(unsigned long)ubitmap32.shd_bitmap;
3646 		ubitmap.shd_size = ubitmap32.shd_size;
3647 		ubitmap.copy_bitmap =
3648 		    (unsigned char   *)(unsigned long)ubitmap32.copy_bitmap;
3649 		ubitmap.copy_size = ubitmap32.copy_size;
3650 	} else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)))
3651 		return (EFAULT);
3652 
3653 	kstatus = spcs_s_kcreate();
3654 	if (kstatus == NULL)
3655 		return (ENOMEM);
3656 
3657 	if (!ubitmap.shadow_vol[0])
3658 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
3659 
3660 	mutex_enter(&_ii_info_mutex);
3661 	ip = _ii_find_set(ubitmap.shadow_vol);
3662 	mutex_exit(&_ii_info_mutex);
3663 	if (ip == NULL)
3664 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3665 		    DSW_ENOTFOUND));
3666 
3667 	mutex_exit(&ip->bi_mutex);
3668 
3669 	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
3670 		mutex_enter(&ip->bi_mutex);
3671 		_ii_ioctl_done(ip);
3672 		mutex_exit(&ip->bi_mutex);
3673 		spcs_s_add(kstatus, rc);
3674 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3675 		    DSW_ERSRVFAIL));
3676 	}
3677 
3678 	if (ubitmap.shd_bitmap && ubitmap.shd_size > 0)
3679 		rc = II_CO_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap,
3680 		    ubitmap.shd_size);
3681 	if (rc == 0 && ubitmap.copy_bitmap && ubitmap.copy_size > 0)
3682 		rc = II_CO_BMP(ip, ip->bi_copyfba, ubitmap.copy_bitmap,
3683 		    ubitmap.copy_size);
3684 	_ii_rlse_devs(ip, BMP);
3685 	mutex_enter(&ip->bi_mutex);
3686 	_ii_ioctl_done(ip);
3687 	mutex_exit(&ip->bi_mutex);
3688 	if (rc) {
3689 		spcs_s_add(kstatus, rc);
3690 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
3691 	}
3692 
3693 	spcs_s_kfree(kstatus);
3694 
3695 	return (0);
3696 }
3697 
3698 /*
3699  * _ii_export
3700  *	Exports the shadow volume
3701  *
3702  * Calling/Exit State:
3703  *	Returns 0 if the shadow was exported. Otherwise an error code
3704  *	is returned and any additional error information is copied
3705  *	out to the user.
3706  *
3707  * Description:
3708  */
3709 
3710 int
3711 _ii_export(intptr_t arg, int ilp32, int *rvp)
3712 {
3713 	dsw_ioctl_t uparms;
3714 	dsw_ioctl32_t uparms32;
3715 	_ii_info_t *ip;
3716 	nsc_fd_t *fd;
3717 	int rc = 0;
3718 	spcs_s_info_t kstatus;
3719 
3720 	*rvp = 0;
3721 
3722 	if (ilp32) {
3723 		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
3724 			return (EFAULT);
3725 		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
3726 		uparms.status = (spcs_s_info_t)uparms32.status;
3727 	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
3728 		return (EFAULT);
3729 
3730 	kstatus = spcs_s_kcreate();
3731 	if (kstatus == NULL)
3732 		return (ENOMEM);
3733 
3734 	if (!uparms.shadow_vol[0])
3735 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
3736 
3737 	mutex_enter(&_ii_info_mutex);
3738 	ip = _ii_find_set(uparms.shadow_vol);
3739 	mutex_exit(&_ii_info_mutex);
3740 	if (ip == NULL)
3741 		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
3742 		    DSW_ENOTFOUND));
3743 
3744 	if ((ip->bi_flags & DSW_GOLDEN) == 0 ||
3745 	    ((ip->bi_flags & (DSW_COPYING|DSW_SHDEXPORT|DSW_SHDIMPORT)) != 0)) {
3746 		/*
3747 		 * Cannot export a dependent copy or while still copying or
3748 		 * the shadow is already in an exported state
3749 		 */
3750 		rc = ip->bi_flags & (DSW_SHDEXPORT|DSW_SHDIMPORT)
3751 		    ? DSW_EALREADY : DSW_EDEPENDENCY;
3752 		_ii_ioctl_done(ip);
3753 		mutex_exit(&ip->bi_mutex);
3754 		return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3755 	}
3756 	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
3757 		_ii_ioctl_done(ip);
3758 		mutex_exit(&ip->bi_mutex);
3759 		spcs_s_add(kstatus, rc);
3760 		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
3761 		    DSW_ERSRVFAIL));
3762 	}
3763 	II_FLAG_SET(DSW_SHDEXPORT, ip);
3764 
3765 	mutex_exit(&ip->bi_mutex);
3766 
3767 	/* this rw_enter forces us to drain all active IO */
3768 	rw_enter(&ip->bi_linkrw, RW_WRITER);
3769 	rw_exit(&ip->bi_linkrw);
3770 
3771 	mutex_enter(&ip->bi_mutex);
3772 
3773 	_ii_rlse_devs(ip, BMP);
3774 
3775 	/* Shut shadow volume. */
3776 	if (ip->bi_shdfd) {
3777 		if (ip->bi_shdrsrv) {
3778 			nsc_release(ip->bi_shdfd);
3779 			ip->bi_shdrsrv = NULL;
3780 		}
3781 		fd = ip->bi_shdfd;
3782 		ip->bi_shdfd = NULL;
3783 		mutex_exit(&ip->bi_mutex);
3784 		(void) nsc_close(fd);
3785 		mutex_enter(&ip->bi_mutex);
3786 	}
3787 
3788 	if (ip->bi_shdrfd) {
3789 		if (ip->bi_shdrrsrv) {
3790 			nsc_release(ip->bi_shdrfd);
3791 			ip->bi_shdrrsrv = NULL;
3792 		}
3793 		fd = ip->bi_shdrfd;
3794 		ip->bi_shdrfd = NULL;
3795 		mutex_exit(&ip->bi_mutex);
3796 		(void) nsc_close(fd);
3797 		mutex_enter(&ip->bi_mutex);
3798 	}
3799 	_ii_ioctl_done(ip);
3800 	mutex_exit(&ip->bi_mutex);
3801 
3802 	(void) _ii_reserve_begin(ip);
3803 	if (ip->bi_shd_tok) {
3804 		(void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
3805 		ip->bi_shd_tok = NULL;
3806 	}
3807 
3808 	if (ip->bi_shdr_tok) {
3809 		(void) _ii_unregister_path(ip->bi_shdr_tok, 0,
3810 		    "raw shadow");
3811 		ip->bi_shdr_tok = NULL;
3812 	}
3813 	_ii_reserve_end(ip);
3814 
3815 	spcs_s_kfree(kstatus);
3816 
3817 	return (0);
3818 }
3819 
3820 /*
3821  * _ii_join
3822  *	Rejoins the shadow volume
3823  *
3824  * Calling/Exit State:
3825  *	Returns 0 if the shadow was exported. Otherwise an error code
3826  *	is returned and any additional error information is copied
3827  *	out to the user.
3828  *
3829  * Description:
3830  */
3831 
3832 int
3833 _ii_join(intptr_t arg, int ilp32, int *rvp)
3834 {
3835 	dsw_bitmap_t ubitmap;
3836 	dsw_bitmap32_t ubitmap32;
3837 	_ii_info_t *ip;
3838 	uint64_t bm_size;
3839 	int rc = 0;
3840 	int rtype = 0;
3841 	spcs_s_info_t kstatus;
3842 
3843 	*rvp = 0;
3844 
3845 	if (ilp32) {
3846 		if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0)
3847 			return (EFAULT);
3848 		II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t);
3849 		ubitmap.status = (spcs_s_info_t)ubitmap32.status;
3850 		ubitmap.shd_bitmap =
3851 		    (unsigned char   *)(unsigned long)ubitmap32.shd_bitmap;
3852 		ubitmap.shd_size = ubitmap32.shd_size;
3853 	} else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0)
3854 		return (EFAULT);
3855 
3856 	kstatus = spcs_s_kcreate();
3857 	if (kstatus == NULL)
3858 		return (ENOMEM);
3859 
3860 	if (!ubitmap.shadow_vol[0])
3861 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
3862 
3863 	mutex_enter(&_ii_info_mutex);
3864 	ip = _ii_find_set(ubitmap.shadow_vol);
3865 	mutex_exit(&_ii_info_mutex);
3866 	if (ip == NULL)
3867 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3868 		    DSW_ENOTFOUND));
3869 
3870 	/*
3871 	 * Check that group has shadow exported.
3872 	 */
3873 	if ((ip->bi_flags & DSW_SHDEXPORT) == 0) {
3874 		/*
3875 		 * Cannot join if the shadow isn't exported.
3876 		 */
3877 		_ii_ioctl_done(ip);
3878 		mutex_exit(&ip->bi_mutex);
3879 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3880 		    DSW_ENOTEXPORTED));
3881 	}
3882 	/* check bitmap is at least large enough for master volume size */
3883 	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
3884 	if (ubitmap.shd_size < bm_size) {
3885 		/* bitmap is to small */
3886 		_ii_ioctl_done(ip);
3887 		mutex_exit(&ip->bi_mutex);
3888 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3889 		    DSW_EINVALBMP));
3890 	}
3891 	/* read in bitmap and or with differences bitmap */
3892 	rtype = BMP;
3893 	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
3894 		_ii_ioctl_done(ip);
3895 		mutex_exit(&ip->bi_mutex);
3896 		spcs_s_add(kstatus, rc);
3897 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3898 		    DSW_ERSRVFAIL));
3899 	}
3900 	rc = II_CI_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap,
3901 	    ubitmap.shd_size);
3902 	/* open up shadow */
3903 	if ((rc = ii_open_shadow(ip, ip->bi_keyname)) != 0) {
3904 		_ii_ioctl_done(ip);
3905 		mutex_exit(&ip->bi_mutex);
3906 		spcs_s_add(kstatus, rc);
3907 		_ii_rlse_devs(ip, rtype);
3908 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EOPEN));
3909 	}
3910 	ii_register_shd(ip);
3911 	if (!rc)
3912 		II_FLAG_CLR(DSW_SHDEXPORT, ip);
3913 	_ii_ioctl_done(ip);
3914 	mutex_exit(&ip->bi_mutex);
3915 	_ii_rlse_devs(ip, rtype);
3916 
3917 	if (rc) {
3918 		spcs_s_add(kstatus, rc);
3919 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
3920 	}
3921 
3922 	spcs_s_kfree(kstatus);
3923 
3924 	return (0);
3925 }
3926 
3927 
3928 /*
3929  * _ii_ocreate
3930  *	Configures a volume suitable for use as an overflow volume.
3931  *
3932  * Calling/Exit State:
3933  *	Returns 0 if the volume was configured successfully. Otherwise
3934  *	 an error code is returned and any additional error information
3935  *	is copied out to the user.
3936  *
3937  * Description:
3938  */
3939 
3940 int
3941 _ii_ocreate(intptr_t arg, int ilp32, int *rvp)
3942 {
3943 	dsw_ioctl_t uioctl;
3944 	dsw_ioctl32_t uioctl32;
3945 	_ii_overflow_t	ov;
3946 	_ii_overflow_t	*op = &ov;
3947 	int rc = 0;
3948 	nsc_fd_t	*fd;
3949 	nsc_iodev_t	*iodev;
3950 	nsc_size_t vol_size;
3951 	char *overflow_vol;
3952 	spcs_s_info_t kstatus;
3953 
3954 	*rvp = 0;
3955 
3956 	if (ilp32) {
3957 		if (copyin((void *)arg, &uioctl32, sizeof (uioctl32)) < 0)
3958 			return (EFAULT);
3959 		II_TAIL_COPY(uioctl, uioctl32, shadow_vol, dsw_ioctl_t);
3960 		uioctl.status = (spcs_s_info_t)uioctl32.status;
3961 	} else if (copyin((void *)arg, &uioctl, sizeof (uioctl)) < 0)
3962 		return (EFAULT);
3963 
3964 	overflow_vol = uioctl.shadow_vol;
3965 	kstatus = spcs_s_kcreate();
3966 	if (kstatus == NULL)
3967 		return (ENOMEM);
3968 
3969 	if (!overflow_vol[0])
3970 		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EEMPTY));
3971 
3972 	if (ii_volume(overflow_vol, 0) != NONE)
3973 		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EINUSE));
3974 
3975 	fd = nsc_open(overflow_vol,
3976 	    NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(iodev), &rc);
3977 	if (!fd)
3978 		fd = nsc_open(uioctl.shadow_vol,
3979 		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL,
3980 		    (blind_t)&(iodev), &rc);
3981 	if (fd == NULL) {
3982 		spcs_s_add(kstatus, rc);
3983 		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
3984 	}
3985 	if ((rc = nsc_reserve(fd, 0)) != 0) {
3986 		spcs_s_add(kstatus, rc);
3987 		(void) nsc_close(fd);
3988 		return (spcs_s_ocopyoutf(&kstatus, uioctl.status,
3989 		    DSW_ERSRVFAIL));
3990 	}
3991 	/* setup magic number etc; */
3992 	rc = nsc_partsize(fd, &vol_size);
3993 	if (rc) {
3994 		spcs_s_add(kstatus, rc);
3995 		(void) nsc_close(fd);
3996 		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
3997 	}
3998 	op->ii_hmagic = II_OMAGIC;
3999 		/* take 1 off as chunk 0 contains header */
4000 	op->ii_nchunks = (vol_size / DSW_SIZE) -1;
4001 	op->ii_drefcnt = 0;
4002 	op->ii_used = 1;			/* we have used the header */
4003 	op->ii_unused = op->ii_nchunks - op->ii_used;
4004 	op->ii_freehead = II_NULLNODE;
4005 	op->ii_hversion = OV_HEADER_VERSION;
4006 	op->ii_flags = 0;
4007 	op->ii_urefcnt = 0;
4008 	(void) strncpy(op->ii_volname, uioctl.shadow_vol, DSW_NAMELEN);
4009 	rc = _ii_nsc_io(0, KS_NA, fd, NSC_WRBUF, II_OHEADER_FBA,
4010 	    (unsigned char *)&op->ii_do, sizeof (op->ii_do));
4011 	(void) nsc_release(fd);
4012 	(void) nsc_close(fd);
4013 	if (rc) {
4014 		spcs_s_add(kstatus, rc);
4015 		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
4016 	}
4017 
4018 	spcs_s_kfree(kstatus);
4019 
4020 	return (0);
4021 }
4022 
4023 
4024 /*
4025  * _ii_oattach
4026  *	Attaches the volume in the "bitmap_vol" field as an overflow volume.
4027  *
4028  * Calling/Exit State:
4029  *	Returns 0 if the volume was attached. Fails if the shadow group
4030  *	is of the wrong type (eg independent) or already has an overflow
4031  *	volume attached.
4032  *
4033  * Description:
4034  */
4035 
4036 int
4037 _ii_oattach(intptr_t arg, int ilp32, int *rvp)
4038 {
4039 	dsw_config_t uconfig;
4040 	dsw_config32_t uconfig32;
4041 	_ii_info_t *ip;
4042 	int rc = 0;
4043 	int rtype = 0;
4044 	ii_header_t *bm_header;
4045 	nsc_buf_t *tmp = NULL;
4046 	spcs_s_info_t kstatus;
4047 
4048 	*rvp = 0;
4049 
4050 	if (ilp32) {
4051 		if (copyin((void *)arg, &uconfig32, sizeof (uconfig32)) < 0)
4052 			return (EFAULT);
4053 		II_TAIL_COPY(uconfig, uconfig32, shadow_vol, dsw_config_t);
4054 		uconfig.status = (spcs_s_info_t)uconfig32.status;
4055 	} else if (copyin((void *)arg, &uconfig, sizeof (uconfig)) < 0)
4056 		return (EFAULT);
4057 
4058 	kstatus = spcs_s_kcreate();
4059 	if (kstatus == NULL)
4060 		return (ENOMEM);
4061 
4062 	if (!uconfig.shadow_vol[0])
4063 		return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EEMPTY));
4064 
4065 	switch (ii_volume(uconfig.bitmap_vol, 0)) {
4066 	case NONE:
4067 	case OVR:
4068 		break;
4069 	default:
4070 		return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EINUSE));
4071 	}
4072 	mutex_enter(&_ii_info_mutex);
4073 	ip = _ii_find_set(uconfig.shadow_vol);
4074 	mutex_exit(&_ii_info_mutex);
4075 	if (ip == NULL)
4076 		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4077 		    DSW_ENOTFOUND));
4078 
4079 	/* check shadow doesn't already have an overflow volume */
4080 	if (ip->bi_overflow) {
4081 		_ii_ioctl_done(ip);
4082 		mutex_exit(&ip->bi_mutex);
4083 		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4084 		    DSW_EALREADY));
4085 	}
4086 	/* check shadow is mapped so can have an overflow */
4087 	if ((ip->bi_flags&DSW_TREEMAP) == 0) {
4088 		_ii_ioctl_done(ip);
4089 		mutex_exit(&ip->bi_mutex);
4090 		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4091 		    DSW_EWRONGTYPE));
4092 	}
4093 	rtype = BMP;
4094 	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
4095 		_ii_ioctl_done(ip);
4096 		mutex_exit(&ip->bi_mutex);
4097 		spcs_s_add(kstatus, rc);
4098 		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4099 		    DSW_ERSRVFAIL));
4100 	}
4101 	/* attach volume */
4102 	if ((rc = ii_overflow_attach(ip, uconfig.bitmap_vol, 1)) != 0) {
4103 		_ii_ioctl_done(ip);
4104 		mutex_exit(&ip->bi_mutex);
4105 		_ii_rlse_devs(ip, rtype);
4106 		return (spcs_s_ocopyoutf(&kstatus, uconfig.status, rc));
4107 	}
4108 
4109 	/* re-write header so shadow can be restarted with overflow volume */
4110 
4111 	bm_header = _ii_bm_header_get(ip, &tmp);
4112 	if (bm_header == NULL) {
4113 		/* detach volume */
4114 		ii_overflow_free(ip, RECLAIM);
4115 		_ii_ioctl_done(ip);
4116 		mutex_exit(&ip->bi_mutex);
4117 		_ii_rlse_devs(ip, rtype);
4118 		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4119 		    DSW_EHDRBMP));
4120 	}
4121 	(void) strncpy(bm_header->overflow_vol, uconfig.bitmap_vol,
4122 	    DSW_NAMELEN);
4123 	(void) _ii_bm_header_put(bm_header, ip, tmp);
4124 	_ii_rlse_devs(ip, rtype);
4125 	_ii_ioctl_done(ip);
4126 	mutex_exit(&ip->bi_mutex);
4127 
4128 	spcs_s_kfree(kstatus);
4129 
4130 	return (0);
4131 }
4132 
4133 
4134 /*
4135  * _ii_odetach
4136  *	Breaks the link with the overflow volume.
4137  *
4138  * Calling/Exit State:
4139  *	Returns 0 if the overflow volume was detached. Otherwise an error code
4140  *	is returned and any additional error information is copied
4141  *	out to the user.
4142  *
4143  * Description:
4144  */
4145 
4146 int
4147 _ii_odetach(intptr_t arg, int ilp32, int *rvp)
4148 {
4149 	dsw_bitmap_t ubitmap;
4150 	dsw_bitmap32_t ubitmap32;
4151 	_ii_info_t *ip;
4152 	int rc = 0;
4153 	int rtype = 0;
4154 	ii_header_t *bm_header;
4155 	nsc_buf_t *tmp = NULL;
4156 	spcs_s_info_t kstatus;
4157 
4158 	*rvp = 0;
4159 
4160 	if (ilp32) {
4161 		if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0)
4162 			return (EFAULT);
4163 		II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t);
4164 		ubitmap.status = (spcs_s_info_t)ubitmap32.status;
4165 	} else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0)
4166 		return (EFAULT);
4167 
4168 	kstatus = spcs_s_kcreate();
4169 	if (kstatus == NULL)
4170 		return (ENOMEM);
4171 
4172 	if (!ubitmap.shadow_vol[0])
4173 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
4174 
4175 	mutex_enter(&_ii_info_mutex);
4176 	ip = _ii_find_set(ubitmap.shadow_vol);
4177 	mutex_exit(&_ii_info_mutex);
4178 	if (ip == NULL)
4179 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4180 		    DSW_ENOTFOUND));
4181 
4182 	if ((ip->bi_flags&DSW_VOVERFLOW) != 0) {
4183 		_ii_ioctl_done(ip);
4184 		mutex_exit(&ip->bi_mutex);
4185 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4186 		    DSW_EODEPENDENCY));
4187 	}
4188 	rtype = BMP;
4189 	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
4190 		_ii_ioctl_done(ip);
4191 		mutex_exit(&ip->bi_mutex);
4192 		spcs_s_add(kstatus, rc);
4193 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4194 		    DSW_ERSRVFAIL));
4195 	}
4196 	ii_overflow_free(ip, RECLAIM);
4197 	/* re-write header to break link with overflow volume */
4198 
4199 	bm_header = _ii_bm_header_get(ip, &tmp);
4200 	if (bm_header == NULL) {
4201 		_ii_rlse_devs(ip, rtype);
4202 		_ii_ioctl_done(ip);
4203 		mutex_exit(&ip->bi_mutex);
4204 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4205 		    DSW_EHDRBMP));
4206 	}
4207 	bzero(bm_header->overflow_vol, DSW_NAMELEN);
4208 	(void) _ii_bm_header_put(bm_header, ip, tmp);
4209 
4210 	_ii_rlse_devs(ip, rtype);
4211 	_ii_ioctl_done(ip);
4212 
4213 	mutex_exit(&ip->bi_mutex);
4214 	if (rc) {
4215 		spcs_s_add(kstatus, rc);
4216 		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
4217 	}
4218 
4219 	spcs_s_kfree(kstatus);
4220 
4221 	--iigkstat.assoc_over.value.ul;
4222 
4223 	return (0);
4224 }
4225 
4226 
4227 /*
4228  * _ii_gc_list
4229  *	Returns a list of all lists, or all entries in a list
4230  *
4231  */
4232 int
4233 _ii_gc_list(intptr_t arg, int ilp32, int *rvp, kmutex_t *mutex,
4234     _ii_lsthead_t *lst)
4235 {
4236 	dsw_aioctl_t ulist;
4237 	dsw_aioctl32_t ulist32;
4238 	size_t name_offset;
4239 	int i;
4240 	spcs_s_info_t kstatus;
4241 	char *carg = (char *)arg;
4242 	uint64_t hash;
4243 	_ii_lsthead_t *cp;
4244 	_ii_lstinfo_t *np;
4245 
4246 	*rvp = 0;
4247 	name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]);
4248 	if (ilp32) {
4249 		if (copyin((void *) arg, &ulist32, sizeof (ulist32)) < 0)
4250 			return (EFAULT);
4251 		II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t);
4252 		ulist.status = (spcs_s_info_t)ulist32.status;
4253 		name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]);
4254 	} else if (copyin((void *) arg, &ulist, sizeof (ulist)) < 0)
4255 		return (EFAULT);
4256 
4257 	kstatus = spcs_s_kcreate();
4258 	if (kstatus == NULL)
4259 		return (ENOMEM);
4260 
4261 	mutex_enter(mutex);
4262 	if (ulist.shadow_vol[ 0 ] != 0) {
4263 		/* search for specific list */
4264 		hash = nsc_strhash(ulist.shadow_vol);
4265 		for (cp = lst; cp; cp = cp->lst_next) {
4266 			if ((hash == cp->lst_hash) && strncmp(ulist.shadow_vol,
4267 			    cp->lst_name, DSW_NAMELEN) == 0) {
4268 				break;
4269 			}
4270 		}
4271 		if (cp) {
4272 			for (i = 0, np = cp->lst_start; i < ulist.count && np;
4273 			    np = np->lst_next, carg += DSW_NAMELEN, i++) {
4274 				if (copyout(np->lst_ip->bi_keyname,
4275 				    carg + name_offset, DSW_NAMELEN)) {
4276 					mutex_exit(mutex);
4277 					return (spcs_s_ocopyoutf(&kstatus,
4278 					    ulist.status, EFAULT));
4279 				}
4280 			}
4281 		} else {
4282 			i = 0;
4283 		}
4284 	} else {
4285 		/* return full list */
4286 		for (i = 0, cp = lst; i < ulist.count && cp;
4287 		    carg += DSW_NAMELEN, i++, cp = cp->lst_next) {
4288 			if (copyout(cp->lst_name, carg + name_offset,
4289 			    DSW_NAMELEN)) {
4290 				mutex_exit(mutex);
4291 				return (spcs_s_ocopyoutf(&kstatus, ulist.status,
4292 				    EFAULT));
4293 			}
4294 		}
4295 	}
4296 	mutex_exit(mutex);
4297 	ulist32.count = ulist.count = i;
4298 
4299 	if (ilp32) {
4300 		if (copyout(&ulist32, (void *) arg, name_offset))
4301 			return (EFAULT);
4302 	} else {
4303 		if (copyout(&ulist, (void*) arg, name_offset))
4304 			return (EFAULT);
4305 	}
4306 
4307 	return (spcs_s_ocopyoutf(&kstatus, ulist.status, 0));
4308 }
4309 
4310 /*
4311  * _ii_olist
4312  *	Breaks the link with the overflow volume.
4313  *
4314  * Calling/Exit State:
4315  *	Returns 0 if the overflow volume was detached. Otherwise an error code
4316  *	is returned and any additional error information is copied
4317  *	out to the user.
4318  *
4319  * Description:
4320  */
4321 
4322 int
4323 _ii_olist(intptr_t arg, int ilp32, int *rvp)
4324 {
4325 	dsw_aioctl_t ulist;
4326 	dsw_aioctl32_t ulist32;
4327 	_ii_overflow_t *op;
4328 	size_t name_offset;
4329 	int rc = 0;
4330 	int i;
4331 	char *carg = (char *)arg;
4332 	spcs_s_info_t kstatus;
4333 
4334 	*rvp = 0;
4335 
4336 	name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]);
4337 	if (ilp32) {
4338 		if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0)
4339 			return (EFAULT);
4340 		II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t);
4341 		ulist.status = (spcs_s_info_t)ulist32.status;
4342 		name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]);
4343 	} else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0)
4344 		return (EFAULT);
4345 
4346 	kstatus = spcs_s_kcreate();
4347 	if (kstatus == NULL)
4348 		return (ENOMEM);
4349 
4350 	i = 0;
4351 
4352 	mutex_enter(&_ii_overflow_mutex);
4353 	for (op = _ii_overflow_top; i < ulist.count && op;
4354 	    carg += DSW_NAMELEN) {
4355 		if (copyout(op->ii_volname, carg+name_offset, DSW_NAMELEN)) {
4356 			mutex_exit(&_ii_overflow_mutex);
4357 			return (spcs_s_ocopyoutf(&kstatus, ulist.status,
4358 			    EFAULT));
4359 		}
4360 		i++;
4361 		op = op->ii_next;
4362 	}
4363 	mutex_exit(&_ii_overflow_mutex);
4364 	ulist32.count = ulist.count = i;
4365 	/* return count of items listed to user */
4366 	if (ilp32) {
4367 		if (copyout(&ulist32, (void *)arg, name_offset))
4368 			return (EFAULT);
4369 	} else {
4370 		if (copyout(&ulist, (void *)arg, name_offset))
4371 			return (EFAULT);
4372 	}
4373 
4374 	return (spcs_s_ocopyoutf(&kstatus, ulist.status, rc));
4375 }
4376 
4377 /*
4378  * _ii_ostat
4379  *	Breaks the link with the overflow volume.
4380  *
4381  * Calling/Exit State:
4382  *	Returns 0 if the overflow volume was detached. Otherwise an error code
4383  *	is returned and any additional error information is copied
4384  *	out to the user.
4385  *
4386  * Description:
4387  */
4388 
4389 int
4390 _ii_ostat(intptr_t arg, int ilp32, int *rvp, int is_iost_2)
4391 {
4392 	dsw_ostat_t ustat;
4393 	dsw_ostat32_t ustat32;
4394 	_ii_overflow_t *op;
4395 	spcs_s_info_t kstatus;
4396 
4397 	*rvp = 0;
4398 
4399 	if (ilp32) {
4400 		if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0)
4401 			return (EFAULT);
4402 		II_TAIL_COPY(ustat, ustat32, overflow_vol, dsw_ostat_t);
4403 		ustat.status = (spcs_s_info_t)ustat32.status;
4404 	} else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0)
4405 		return (EFAULT);
4406 
4407 	kstatus = spcs_s_kcreate();
4408 	if (kstatus == NULL)
4409 		return (ENOMEM);
4410 	if (!ustat.overflow_vol[0])
4411 		return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY));
4412 
4413 	op = _ii_find_overflow(ustat.overflow_vol);
4414 	if (op == NULL)
4415 		return (spcs_s_ocopyoutf(&kstatus, ustat.status,
4416 		    DSW_ENOTFOUND));
4417 
4418 	ustat.nchunks = op->ii_nchunks;
4419 	ustat.used = op->ii_used;
4420 	ustat.unused = op->ii_unused;
4421 	ustat.drefcnt = op->ii_drefcnt;
4422 	ustat.crefcnt = op->ii_crefcnt;
4423 	if (is_iost_2) {
4424 		ustat.hversion = op->ii_hversion;
4425 		ustat.flags = op->ii_flags;
4426 		ustat.hmagic = op->ii_hmagic;
4427 	}
4428 
4429 	spcs_s_kfree(kstatus);
4430 	if (ilp32) {
4431 		ustat32.nchunks = ustat.nchunks;
4432 		ustat32.used = ustat.used;
4433 		ustat32.unused = ustat.unused;
4434 		ustat32.drefcnt = ustat.drefcnt;
4435 		ustat32.crefcnt = ustat.crefcnt;
4436 		if (is_iost_2) {
4437 			ustat32.hversion = ustat.hversion;
4438 			ustat32.flags = ustat.flags;
4439 			ustat32.hmagic = ustat.hmagic;
4440 		}
4441 		if (copyout(&ustat32, (void *)arg, sizeof (ustat32)))
4442 			return (EFAULT);
4443 	} else {
4444 		if (copyout(&ustat, (void *)arg, sizeof (ustat)))
4445 			return (EFAULT);
4446 	}
4447 	return (0);
4448 }
4449 
4450 /*
4451  * _ii_move_grp()
4452  *	Move a set from one group to another, possibly creating the new
4453  *	group.
4454  */
4455 
4456 int
4457 _ii_move_grp(intptr_t arg, int ilp32, int *rvp)
4458 {
4459 	dsw_movegrp_t umove;
4460 	dsw_movegrp32_t umove32;
4461 	spcs_s_info_t kstatus;
4462 	_ii_info_t *ip;
4463 	int rc = 0;
4464 	nsc_buf_t *tmp;
4465 	ii_header_t *bm_header;
4466 
4467 	*rvp = 0;
4468 
4469 	if (ilp32) {
4470 		if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0)
4471 			return (EFAULT);
4472 		II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t);
4473 		umove.status = (spcs_s_info_t)umove32.status;
4474 	} else if (copyin((void *)arg, &umove, sizeof (umove)) < 0)
4475 		return (EFAULT);
4476 
4477 	kstatus = spcs_s_kcreate();
4478 	if (kstatus == NULL)
4479 		return (ENOMEM);
4480 
4481 	if (!umove.shadow_vol[0])
4482 		return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY));
4483 
4484 	mutex_enter(&_ii_info_mutex);
4485 	ip = _ii_find_set(umove.shadow_vol);
4486 	mutex_exit(&_ii_info_mutex);
4487 
4488 	if (!ip)
4489 		return (spcs_s_ocopyoutf(&kstatus, umove.status,
4490 		    DSW_ENOTFOUND));
4491 
4492 	if (!umove.new_group[0]) {
4493 		/* are we clearing the group association? */
4494 		if (ip->bi_group) {
4495 			DTRACE_PROBE2(_ii_move_grp1, char *, ip->bi_keyname,
4496 			    char *, ip->bi_group);
4497 			rc = II_UNLINK_GROUP(ip);
4498 		}
4499 	} else if (!ip->bi_group) {
4500 		rc = II_LINK_GROUP(ip, umove.new_group);
4501 		DTRACE_PROBE2(_ii_move_grp2, char *, ip->bi_keyname,
4502 		    char *, ip->bi_group);
4503 	} else {
4504 		/* remove it from one group and add it to the other */
4505 		DTRACE_PROBE3(_ii_move_grp, char *, ip->bi_keyname,
4506 		    char *, ip->bi_group, char *, umove.new_group);
4507 		rc = II_UNLINK_GROUP(ip);
4508 		if (!rc)
4509 			rc = II_LINK_GROUP(ip, umove.new_group);
4510 	}
4511 
4512 	/* ** BEGIN UPDATE BITMAP HEADER ** */
4513 	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
4514 		_ii_ioctl_done(ip);
4515 		mutex_exit(&ip->bi_mutex);
4516 		spcs_s_add(kstatus, rc);
4517 		return (spcs_s_ocopyoutf(&kstatus, umove.status,
4518 		    DSW_ERSRVFAIL));
4519 	}
4520 	bm_header = _ii_bm_header_get(ip, &tmp);
4521 	if (bm_header) {
4522 		(void) strncpy(bm_header->group_name, umove.new_group,
4523 		    DSW_NAMELEN);
4524 		(void) _ii_bm_header_put(bm_header, ip, tmp);
4525 	}
4526 	_ii_rlse_devs(ip, BMP);
4527 	/* ** END UPDATE BITMAP HEADER ** */
4528 
4529 	_ii_ioctl_done(ip);
4530 	mutex_exit(&ip->bi_mutex);
4531 
4532 	return (spcs_s_ocopyoutf(&kstatus, umove.status, rc));
4533 }
4534 
4535 /*
4536  * _ii_change_tag()
4537  *	Move a set from one group to another, possibly creating the new
4538  *	group.
4539  */
4540 
4541 int
4542 _ii_change_tag(intptr_t arg, int ilp32, int *rvp)
4543 {
4544 	dsw_movegrp_t umove;
4545 	dsw_movegrp32_t umove32;
4546 	spcs_s_info_t kstatus;
4547 	_ii_info_t *ip;
4548 	int rc = 0;
4549 	nsc_buf_t *tmp;
4550 	ii_header_t *bm_header;
4551 
4552 	*rvp = 0;
4553 
4554 	if (ilp32) {
4555 		if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0)
4556 			return (EFAULT);
4557 		II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t);
4558 		umove.status = (spcs_s_info_t)umove32.status;
4559 	} else if (copyin((void *)arg, &umove, sizeof (umove)) < 0)
4560 		return (EFAULT);
4561 
4562 	kstatus = spcs_s_kcreate();
4563 	if (kstatus == NULL)
4564 		return (ENOMEM);
4565 
4566 	if (!umove.shadow_vol[0])
4567 		return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY));
4568 
4569 	mutex_enter(&_ii_info_mutex);
4570 	ip = _ii_find_set(umove.shadow_vol);
4571 	mutex_exit(&_ii_info_mutex);
4572 
4573 	if (!ip)
4574 		return (spcs_s_ocopyoutf(&kstatus, umove.status,
4575 		    DSW_ENOTFOUND));
4576 
4577 	if (!umove.new_group[0]) {
4578 		/* are we clearing the group association? */
4579 		if (ip->bi_cluster) {
4580 			DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname,
4581 			    char *, ip->bi_cluster);
4582 			rc = II_UNLINK_CLUSTER(ip);
4583 		}
4584 	} else if (!ip->bi_cluster) {
4585 		/* are we adding it to a group for the first time? */
4586 		rc = II_LINK_CLUSTER(ip, umove.new_group);
4587 		DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname,
4588 		    char *, ip->bi_cluster);
4589 	} else {
4590 		/* remove it from one group and add it to the other */
4591 		DTRACE_PROBE3(_ii_change_tag_2, char *, ip->bi_keyname,
4592 		    char *, ip->bi_cluster, char *, umove.new_group);
4593 		rc = II_UNLINK_CLUSTER(ip);
4594 		if (!rc)
4595 			rc = II_LINK_CLUSTER(ip, umove.new_group);
4596 	}
4597 
4598 	/* ** BEGIN UPDATE BITMAP HEADER ** */
4599 	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
4600 		_ii_ioctl_done(ip);
4601 		mutex_exit(&ip->bi_mutex);
4602 		spcs_s_add(kstatus, rc);
4603 		return (spcs_s_ocopyoutf(&kstatus, umove.status,
4604 		    DSW_ERSRVFAIL));
4605 	}
4606 	bm_header = _ii_bm_header_get(ip, &tmp);
4607 	if (bm_header) {
4608 		(void) strncpy(bm_header->clstr_name, umove.new_group,
4609 		    DSW_NAMELEN);
4610 		(void) _ii_bm_header_put(bm_header, ip, tmp);
4611 	}
4612 	_ii_rlse_devs(ip, BMP);
4613 	/* ** END UPDATE BITMAP HEADER ** */
4614 
4615 	_ii_ioctl_done(ip);
4616 	mutex_exit(&ip->bi_mutex);
4617 
4618 	return (spcs_s_ocopyoutf(&kstatus, umove.status, rc));
4619 }
4620 
4621 
4622 /*
4623  * _ii_spcs_s_ocopyoutf()
4624  * Wrapper for spcs_s_ocopyoutf() used by _ii_chk_copy() which permits
4625  * the spcs_s_info_t argument to be NULL. _ii_chk_copy() requires this
4626  * functionality as it is sometimes called by _ii_control_copy() which
4627  * has no user context to copy any errors into. At all other times a NULL
4628  * spcs_s_info_t argument would indicate a bug in the calling function.
4629  */
4630 
4631 static int
4632 _ii_spcs_s_ocopyoutf(spcs_s_info_t *kstatusp, spcs_s_info_t ustatus, int err)
4633 {
4634 	if (ustatus)
4635 		return (spcs_s_ocopyoutf(kstatusp, ustatus, err));
4636 	spcs_s_kfree(*kstatusp);
4637 	return (err);
4638 }
4639 
4640 static int
4641 _ii_chk_copy(_ii_info_t *ip, int flags, spcs_s_info_t *kstatusp, pid_t pid,
4642     spcs_s_info_t ustatus)
4643 {
4644 	_ii_info_t *xip;
4645 	int rc;
4646 	int rtype;
4647 
4648 	if ((ip->bi_flags & DSW_COPYINGP) != 0) {
4649 		_ii_ioctl_done(ip);
4650 		mutex_exit(&ip->bi_mutex);
4651 		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING));
4652 	}
4653 
4654 	if (ip->bi_flags & DSW_OFFLINE) {
4655 		_ii_ioctl_done(ip);
4656 		mutex_exit(&ip->bi_mutex);
4657 		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EOFFLINE));
4658 	}
4659 
4660 	if ((ip->bi_flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) != 0) {
4661 		_ii_ioctl_done(ip);
4662 		mutex_exit(&ip->bi_mutex);
4663 		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4664 		    DSW_EISEXPORTED));
4665 	}
4666 
4667 	if ((flags & CV_SHD2MST) == CV_SHD2MST) {
4668 		if ((ip->bi_flags & DSW_COPYINGM) != 0) {
4669 				_ii_ioctl_done(ip);
4670 				mutex_exit(&ip->bi_mutex);
4671 				return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4672 				    DSW_ECOPYING));
4673 		}
4674 		/* check if any sibling shadow is copying towards this master */
4675 		for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
4676 			if (ip != xip && (xip->bi_flags & DSW_COPYINGS) != 0) {
4677 				_ii_ioctl_done(ip);
4678 				mutex_exit(&ip->bi_mutex);
4679 				return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4680 				    DSW_ECOPYING));
4681 			}
4682 		}
4683 	}
4684 
4685 	if (((flags & CV_SHD2MST) == 0) &&
4686 	    ((ip->bi_flags & DSW_COPYINGS) != 0)) {
4687 		_ii_ioctl_done(ip);
4688 		mutex_exit(&ip->bi_mutex);
4689 		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING));
4690 	}
4691 
4692 	if (ip->bi_flags & DSW_TREEMAP) {
4693 		if ((ip->bi_flags & DSW_OVERFLOW) && (flags & CV_SHD2MST)) {
4694 			_ii_ioctl_done(ip);
4695 			mutex_exit(&ip->bi_mutex);
4696 			return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4697 			    DSW_EINCOMPLETE));
4698 		}
4699 	}
4700 
4701 	/* Assure that no other PID owns this copy/update */
4702 	if (ip->bi_locked_pid == 0) {
4703 		if (flags & CV_LOCK_PID)
4704 			ip->bi_locked_pid = pid;
4705 	} else if (ip->bi_locked_pid != pid) {
4706 		_ii_ioctl_done(ip);
4707 		mutex_exit(&ip->bi_mutex);
4708 		return (spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EINUSE));
4709 	}
4710 
4711 	mutex_exit(&ip->bi_mutex);
4712 
4713 	rtype = MSTR|SHDR|BMP;
4714 	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
4715 		mutex_enter(&ip->bi_mutex);
4716 		_ii_ioctl_done(ip);
4717 		mutex_exit(&ip->bi_mutex);
4718 		spcs_s_add(*kstatusp, rc);
4719 		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4720 		    DSW_ERSRVFAIL));
4721 	}
4722 
4723 	if (ii_update_denied(ip, *kstatusp, flags & CV_SHD2MST, 0)) {
4724 		mutex_enter(&ip->bi_mutex);
4725 		_ii_ioctl_done(ip);
4726 		mutex_exit(&ip->bi_mutex);
4727 		_ii_rlse_devs(ip, rtype);
4728 		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4729 		    DSW_EOPACKAGE));
4730 	}
4731 
4732 	return (0);
4733 }
4734 
4735 static int
4736 _ii_do_copy(_ii_info_t *ip, int flags, spcs_s_info_t kstatus, int waitflag)
4737 {
4738 	int rc = 0;
4739 	int rtype = MSTR|SHDR|BMP;
4740 	_ii_overflow_t *op;
4741 	int quick_update = 0;
4742 
4743 	waitflag = (waitflag != 0);
4744 	/*
4745 	 * a copy of a tree-mapped device must be downgraded to
4746 	 * an update.
4747 	 */
4748 	if (ip->bi_flags & DSW_TREEMAP)
4749 		flags |= CV_BMP_ONLY;
4750 
4751 	/*
4752 	 * If we want to update the dependent shadow we only need to zero
4753 	 * the shadow bitmap.
4754 	 */
4755 
4756 	if (((ip->bi_flags & DSW_GOLDEN) == 0) &&
4757 	    (flags & (CV_BMP_ONLY|CV_SHD2MST)) == CV_BMP_ONLY) {
4758 
4759 		DTRACE_PROBE(DEPENDENT);
4760 
4761 		/* assign updating time */
4762 		ip->bi_mtime = ddi_get_time();
4763 
4764 		if (ip->bi_flags & DSW_TREEMAP) {
4765 			DTRACE_PROBE(COMPACT_DEPENDENT);
4766 
4767 			if (ip->bi_overflow &&
4768 			    (ip->bi_overflow->ii_flags & IIO_VOL_UPDATE) == 0) {
4769 				/* attempt to do a quick update */
4770 				quick_update = 1;
4771 				ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE;
4772 				ip->bi_overflow->ii_detachcnt = 1;
4773 			}
4774 
4775 			rc = ii_tinit(ip);
4776 
4777 			if (quick_update && ip->bi_overflow) {
4778 				/* clean up */
4779 				ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE);
4780 				ip->bi_overflow->ii_detachcnt = 0;
4781 			}
4782 		}
4783 
4784 		if (rc == 0)
4785 			rc = II_ZEROBM(ip);	/* update copy of shadow */
4786 		if (((op = ip->bi_overflow) != NULL) &&
4787 		    (op->ii_hversion >= 1) && (op->ii_hmagic == II_OMAGIC)) {
4788 			mutex_enter(&_ii_overflow_mutex);
4789 			if (ip->bi_flags & DSW_OVRHDRDRTY) {
4790 				mutex_enter(&ip->bi_mutex);
4791 				ip->bi_flags &= ~DSW_OVRHDRDRTY;
4792 				mutex_exit(&ip->bi_mutex);
4793 				ASSERT(op->ii_urefcnt > 0);
4794 				op->ii_urefcnt--;
4795 			}
4796 			if (op->ii_urefcnt == 0) {
4797 				op->ii_flags &= ~IIO_CNTR_INVLD;
4798 				op->ii_unused = op->ii_nchunks - 1;
4799 			}
4800 			mutex_exit(&_ii_overflow_mutex);
4801 		}
4802 		mutex_enter(&ip->bi_mutex);
4803 		II_FLAG_CLR(DSW_OVERFLOW, ip);
4804 		mutex_exit(&ip->bi_mutex);
4805 
4806 		_ii_unlock_chunk(ip, II_NULLCHUNK);
4807 		mutex_enter(&ip->bi_mutex);
4808 		_ii_ioctl_done(ip);
4809 		mutex_exit(&ip->bi_mutex);
4810 		_ii_rlse_devs(ip, rtype);
4811 		if (rc) {
4812 			spcs_s_add(kstatus, rc);
4813 			return (DSW_EIO);
4814 		} else {
4815 			DTRACE_PROBE(_ii_do_copy_end);
4816 			return (0);
4817 		}
4818 	}
4819 
4820 	/*
4821 	 * need to perform an actual copy.
4822 	 */
4823 
4824 	/*
4825 	 * Perform bitmap copy if asked or from dependent shadow to master.
4826 	 */
4827 	if ((flags & CV_BMP_ONLY) ||
4828 	    ((flags & CV_SHD2MST) &&
4829 	    ((ip->bi_flags & DSW_GOLDEN) == 0))) {
4830 		DTRACE_PROBE(INDEPENDENT_fast);
4831 		rc = II_ORBM(ip);		/* save shadow bits for copy */
4832 	} else {
4833 		DTRACE_PROBE(INDEPENDENT_slow);
4834 		rc = ii_fill_copy_bmp(ip); /* set bits for independent copy */
4835 	}
4836 	if (rc == 0)
4837 		rc = II_ZEROBM(ip);
4838 	_ii_unlock_chunk(ip, II_NULLCHUNK);
4839 	if (rc == 0) {
4840 		mutex_enter(&ip->bi_mutex);
4841 		if (ip->bi_flags & (DSW_COPYINGP | DSW_SHDEXPORT)) {
4842 			rc = (ip->bi_flags & DSW_COPYINGP)
4843 			    ? DSW_ECOPYING : DSW_EISEXPORTED;
4844 
4845 			_ii_ioctl_done(ip);
4846 			mutex_exit(&ip->bi_mutex);
4847 			_ii_rlse_devs(ip, rtype);
4848 			return (rc);
4849 		}
4850 
4851 		/* assign copying time */
4852 		ip->bi_mtime = ddi_get_time();
4853 
4854 		if (flags & CV_SHD2MST)
4855 			II_FLAG_SET(DSW_COPYINGS | DSW_COPYINGP, ip);
4856 		else
4857 			II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip);
4858 		mutex_exit(&ip->bi_mutex);
4859 		rc = _ii_copyvol(ip, (flags & CV_SHD2MST),
4860 		    rtype, kstatus, waitflag);
4861 	} else {
4862 		mutex_enter(&ip->bi_mutex);
4863 		_ii_ioctl_done(ip);
4864 		mutex_exit(&ip->bi_mutex);
4865 	}
4866 
4867 	if (waitflag)
4868 		_ii_rlse_devs(ip, rtype);
4869 
4870 	return (rc);
4871 }
4872 
4873 /*
4874  * _ii_copy
4875  *	Copy or update (take snapshot) II volume.
4876  *
4877  * Calling/Exit State:
4878  *	Returns 0 if the operation succeeded. Otherwise an error code
4879  *	is returned and any additional error information is copied
4880  *	out to the user.
4881  */
4882 
4883 int
4884 _ii_copy(intptr_t arg, int ilp32, int *rvp)
4885 {
4886 	dsw_ioctl_t ucopy;
4887 	dsw_ioctl32_t ucopy32;
4888 	_ii_info_t *ip;
4889 	int rc = 0;
4890 	spcs_s_info_t kstatus;
4891 
4892 	*rvp = 0;
4893 
4894 	if (ilp32) {
4895 		if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0)
4896 			return (EFAULT);
4897 		II_TAIL_COPY(ucopy, ucopy32, shadow_vol, dsw_ioctl_t);
4898 		ucopy.status = (spcs_s_info_t)ucopy32.status;
4899 	} else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0)
4900 		return (EFAULT);
4901 
4902 	kstatus = spcs_s_kcreate();
4903 	if (kstatus == NULL)
4904 		return (ENOMEM);
4905 
4906 	if (!ucopy.shadow_vol[0])
4907 		return (spcs_s_ocopyoutf(&kstatus, ucopy.status, DSW_EEMPTY));
4908 
4909 	mutex_enter(&_ii_info_mutex);
4910 	ip = _ii_find_set(ucopy.shadow_vol);
4911 	mutex_exit(&_ii_info_mutex);
4912 	if (ip == NULL)
4913 		return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
4914 		    DSW_ENOTFOUND));
4915 
4916 	/* Check that the copy/update makes sense */
4917 	if ((rc = _ii_chk_copy(ip, ucopy.flags, &kstatus, ucopy.pid,
4918 	    ucopy.status)) == 0) {
4919 		/* perform the copy */
4920 		_ii_lock_chunk(ip, II_NULLCHUNK);
4921 		/* _ii_do_copy() calls _ii_ioctl_done() */
4922 		rc = _ii_do_copy(ip, ucopy.flags, kstatus, 1);
4923 		return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc));
4924 	}
4925 
4926 	return (rc);
4927 }
4928 
4929 /*
4930  * _ii_mass_copy
4931  * Copies/updates the sets pointed to in the ipa array.
4932  *
4933  * Calling/Exit State:
4934  * Returns 0 if the operations was successful.  Otherwise an
4935  * error code.
4936  */
4937 int
4938 _ii_mass_copy(_ii_info_t **ipa, dsw_aioctl_t *ucopy, int wait)
4939 {
4940 	int i;
4941 	int rc = 0;
4942 	int failed;
4943 	int rtype = MSTR|SHDR|BMP;
4944 	_ii_info_t *ip;
4945 	spcs_s_info_t kstatus;
4946 
4947 	kstatus = spcs_s_kcreate();
4948 	if (kstatus == NULL)
4949 		return (ENOMEM);
4950 
4951 	/* Check copy validitity */
4952 	for (i = 0; i < ucopy->count; i++) {
4953 		ip = ipa[i];
4954 
4955 		rc = _ii_chk_copy(ip, ucopy->flags, &kstatus, ucopy->pid,
4956 		    ucopy->status);
4957 
4958 		if (rc) {
4959 			/* Clean up the mess */
4960 
4961 			DTRACE_PROBE1(_ii_mass_copy_end1, int, rc);
4962 
4963 			/*
4964 			 * The array ipa now looks like:
4965 			 *    0..(i-1): needs mutex_enter/ioctl_done/mutex_exit
4966 			 *    i: needs nothing (_ii_chk_copy does cleanup)
4967 			 *    (i+1)..n: needs just ioctl_done/mutex_exit
4968 			 */
4969 
4970 			failed = i;
4971 
4972 			for (i = 0; i < failed; i++) {
4973 				mutex_enter(&(ipa[i]->bi_mutex));
4974 				_ii_ioctl_done(ipa[i]);
4975 				mutex_exit(&(ipa[i]->bi_mutex));
4976 				_ii_rlse_devs(ipa[i], rtype);
4977 			}
4978 
4979 			/* skip 'failed', start with failed + 1 */
4980 
4981 			for (i = failed + 1; i < ucopy->count; i++) {
4982 				_ii_ioctl_done(ipa[i]);
4983 				mutex_exit(&(ipa[i]->bi_mutex));
4984 			}
4985 
4986 			return (rc);
4987 		}
4988 	}
4989 
4990 	/* Check for duplicate shadows in same II group */
4991 	if (ucopy->flags & CV_SHD2MST) {
4992 		/* Reset the state of all masters */
4993 		for (i = 0; i < ucopy->count; i++) {
4994 			ip = ipa[i];
4995 			ip->bi_master->bi_state &= ~DSW_MSTTARGET;
4996 		}
4997 
4998 		for (i = 0; i < ucopy->count; i++) {
4999 			ip = ipa[i];
5000 			/*
5001 			 * Check the state of the master.  If DSW_MSTTARGET is
5002 			 * set, it's because this master is attached to another
5003 			 * shadow within this set.
5004 			 */
5005 			if (ip->bi_master->bi_state & DSW_MSTTARGET) {
5006 				rc = EINVAL;
5007 				break;
5008 			}
5009 
5010 			/*
5011 			 * Set the DSW_MSTTARGET bit on the master associated
5012 			 * with this shadow.  This will allow us to detect
5013 			 * multiple shadows pointing to this master within
5014 			 * this loop.
5015 			 */
5016 			ip->bi_master->bi_state |= DSW_MSTTARGET;
5017 		}
5018 	}
5019 
5020 	/* Handle error */
5021 	if (rc) {
5022 		DTRACE_PROBE1(_ii_mass_copy_end2, int, rc);
5023 		for (i = 0; i < ucopy->count; i++) {
5024 			ip = ipa[i];
5025 
5026 			_ii_rlse_devs(ip, rtype);
5027 
5028 			mutex_enter(&ip->bi_mutex);
5029 			_ii_ioctl_done(ip);
5030 			mutex_exit(&ip->bi_mutex);
5031 		}
5032 
5033 		return (spcs_s_ocopyoutf(&kstatus, ucopy->status, rc));
5034 	}
5035 
5036 	/* Lock bitmaps & prepare counts */
5037 	for (i = 0; i < ucopy->count; i++) {
5038 		ip = ipa[i];
5039 		_ii_lock_chunk(ip, II_NULLCHUNK);
5040 		if (ip->bi_overflow) {
5041 			ip->bi_overflow->ii_detachcnt = 0;
5042 		}
5043 	}
5044 
5045 	/* determine which volumes we're dealing with */
5046 	for (i = 0; i < ucopy->count; i++) {
5047 		ip = ipa[i];
5048 		if (ip->bi_overflow) {
5049 			ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE;
5050 			if ((ucopy->flags & (CV_BMP_ONLY|CV_SHD2MST)) ==
5051 			    CV_BMP_ONLY) {
5052 				++ip->bi_overflow->ii_detachcnt;
5053 			}
5054 		}
5055 	}
5056 
5057 	/* Perform copy */
5058 	for (i = 0; i < ucopy->count; i++) {
5059 		ip = ipa[i];
5060 		rc = _ii_do_copy(ip, ucopy->flags, kstatus, wait);
5061 		/* Hum... what to do if one of these fails? */
5062 	}
5063 
5064 	/* clear out flags so as to prevent any accidental reuse */
5065 	for (i = 0; i < ucopy->count; i++) {
5066 		ip = ipa[i];
5067 		if (ip->bi_overflow)
5068 			ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE);
5069 	}
5070 
5071 	/*
5072 	 * We can only clean up the kstatus structure if there are
5073 	 * no waiters.  If someone's waiting for the information,
5074 	 * _ii_copyvolp() uses spcs_s_add to write to kstatus.  Panic
5075 	 * would ensue if we freed it up now.
5076 	 */
5077 	if (!wait)
5078 		rc = spcs_s_ocopyoutf(&kstatus, ucopy->status, rc);
5079 
5080 	return (rc);
5081 }
5082 
5083 /*
5084  * _ii_list_copy
5085  * Retrieve a list from a character array and use _ii_mass_copy to
5086  * initiate a copy/update operation on all of the specified sets.
5087  *
5088  * Calling/Exit State:
5089  * Returns 0 if the operations was successful.  Otherwise an
5090  * error code.
5091  */
5092 int
5093 _ii_list_copy(char *list, dsw_aioctl_t *ucopy, int wait)
5094 {
5095 	int i;
5096 	int rc = 0;
5097 	char *name;
5098 	_ii_info_t *ip;
5099 	_ii_info_t **ipa;
5100 
5101 	ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP);
5102 
5103 	/* Reserve devices */
5104 	name = list;
5105 	mutex_enter(&_ii_info_mutex);
5106 	for (i = 0; i < ucopy->count; i++, name += DSW_NAMELEN) {
5107 		ip = _ii_find_set(name);
5108 
5109 		if (ip == NULL) {
5110 			rc = DSW_ENOTFOUND;
5111 			break;
5112 		}
5113 
5114 		ipa[i] = ip;
5115 	}
5116 
5117 	if (rc != 0) {
5118 		/* Failed to find all sets, release those we do have */
5119 		while (i-- > 0) {
5120 			ip = ipa[i];
5121 			mutex_enter(&ip->bi_mutex);
5122 			_ii_ioctl_done(ip);
5123 			mutex_exit(&ip->bi_mutex);
5124 		}
5125 	} else {
5126 		/* Begin copy operation */
5127 		rc = _ii_mass_copy(ipa, ucopy, wait);
5128 	}
5129 
5130 	mutex_exit(&_ii_info_mutex);
5131 
5132 	kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count);
5133 
5134 	return (rc);
5135 }
5136 
5137 /*
5138  * _ii_group_copy
5139  * Retrieve list of sets in a group and use _ii_mass_copy to initiate
5140  * a copy/update of all of them.
5141  *
5142  * Calling/Exit State:
5143  * Returns 0 if the operations was successful.  Otherwise an
5144  * error code.
5145  */
5146 int
5147 _ii_group_copy(char *name, dsw_aioctl_t *ucopy, int wait)
5148 {
5149 	int		i;
5150 	int		rc;
5151 	uint64_t	hash;
5152 	_ii_info_t	**ipa;
5153 	_ii_lsthead_t	*head;
5154 	_ii_lstinfo_t	*np;
5155 
5156 	/* find group */
5157 	hash = nsc_strhash(name);
5158 
5159 	mutex_enter(&_ii_group_mutex);
5160 
5161 	for (head = _ii_group_top; head; head = head->lst_next) {
5162 		if (hash == head->lst_hash && strncmp(head->lst_name,
5163 		    name, DSW_NAMELEN) == 0)
5164 			break;
5165 	}
5166 
5167 	if (!head) {
5168 		mutex_exit(&_ii_group_mutex);
5169 		DTRACE_PROBE(_ii_group_copy);
5170 		return (DSW_EGNOTFOUND);
5171 	}
5172 
5173 	/* Count entries */
5174 	for (ucopy->count = 0, np = head->lst_start; np; np = np->lst_next)
5175 		++ucopy->count;
5176 
5177 	if (ucopy->count == 0) {
5178 		mutex_exit(&_ii_group_mutex);
5179 		return (DSW_EGNOTFOUND);
5180 	}
5181 
5182 	ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP);
5183 	if (ipa == NULL) {
5184 		mutex_exit(&_ii_group_mutex);
5185 		return (ENOMEM);
5186 	}
5187 
5188 	/* Create list */
5189 	mutex_enter(&_ii_info_mutex);
5190 	np = head->lst_start;
5191 	for (i = 0; i < ucopy->count; i++) {
5192 		ASSERT(np != 0);
5193 
5194 		ipa[i] = np->lst_ip;
5195 
5196 		mutex_enter(&ipa[i]->bi_mutex);
5197 		ipa[i]->bi_ioctl++;
5198 
5199 		np = np->lst_next;
5200 	}
5201 
5202 	/* Begin copy operation */
5203 	rc = _ii_mass_copy(ipa, ucopy, wait);
5204 
5205 	mutex_exit(&_ii_info_mutex);
5206 	mutex_exit(&_ii_group_mutex);
5207 
5208 	kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count);
5209 
5210 	return (rc);
5211 }
5212 
5213 /*
5214  * _ii_acopy
5215  *	Copy or update (take snapshot) II multiple volumes.
5216  *
5217  * Calling/Exit State:
5218  *	Returns 0 if the operation succeeded. Otherwise an error code
5219  *	is returned and any additional error information is copied
5220  *	out to the user.
5221  */
5222 int
5223 _ii_acopy(intptr_t arg, int ilp32, int *rvp)
5224 {
5225 	int rc;
5226 	size_t name_offset;
5227 	char *list;
5228 	char *nptr;
5229 	char name[DSW_NAMELEN];
5230 	dsw_aioctl_t ucopy;
5231 	dsw_aioctl32_t ucopy32;
5232 	spcs_s_info_t kstatus;
5233 
5234 	*rvp = 0;
5235 
5236 	name_offset = offsetof(dsw_aioctl_t, shadow_vol[0]);
5237 
5238 	if (ilp32) {
5239 		if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0)
5240 			return (EFAULT);
5241 		II_TAIL_COPY(ucopy, ucopy32, flags, dsw_ioctl_t);
5242 		ucopy.status = (spcs_s_info_t)ucopy32.status;
5243 		name_offset = offsetof(dsw_aioctl32_t, shadow_vol[0]);
5244 	} else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0)
5245 		return (EFAULT);
5246 
5247 	kstatus = spcs_s_kcreate();
5248 
5249 	if (kstatus == NULL)
5250 		return (ENOMEM);
5251 
5252 	nptr = (char *)arg + name_offset;
5253 	rc = 0;
5254 
5255 	if (ucopy.flags & CV_IS_GROUP) {
5256 		if (copyin(nptr, name, DSW_NAMELEN) < 0)
5257 			return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
5258 			    EFAULT));
5259 
5260 		/* kstatus information is handled within _ii_group_copy */
5261 		rc = _ii_group_copy(name, &ucopy, 0);
5262 	} else if (ucopy.count > 0) {
5263 		list = kmem_alloc(DSW_NAMELEN * ucopy.count, KM_SLEEP);
5264 
5265 		if (list == NULL)
5266 			return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
5267 			    ENOMEM));
5268 
5269 		if (copyin(nptr, list, DSW_NAMELEN * ucopy.count) < 0)
5270 			return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
5271 			    EFAULT));
5272 
5273 		rc = _ii_list_copy(list, &ucopy, 0);
5274 		kmem_free(list, DSW_NAMELEN * ucopy.count);
5275 	}
5276 
5277 	return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc));
5278 }
5279 
5280 /*
5281  * _ii_bitsset
5282  *	Copy out II pair bitmaps to user program
5283  *
5284  * Calling/Exit State:
5285  *	Returns 0 if the operation succeeded. Otherwise an error code
5286  *	is returned and any additional error information is copied
5287  *	out to the user.
5288  */
5289 int
5290 _ii_bitsset(intptr_t arg, int ilp32, int cmd, int *rvp)
5291 {
5292 	dsw_bitsset_t ubitsset;
5293 	dsw_bitsset32_t ubitsset32;
5294 	nsc_size_t nbitsset;
5295 	_ii_info_t *ip;
5296 	int rc;
5297 	spcs_s_info_t kstatus;
5298 	int bitmap_size;
5299 
5300 	*rvp = 0;
5301 
5302 	if (ilp32) {
5303 		if (copyin((void *)arg, &ubitsset32, sizeof (ubitsset32)))
5304 			return (EFAULT);
5305 		ubitsset.status = (spcs_s_info_t)ubitsset32.status;
5306 		bcopy(ubitsset32.shadow_vol, ubitsset.shadow_vol, DSW_NAMELEN);
5307 	} else if (copyin((void *)arg, &ubitsset, sizeof (ubitsset)))
5308 		return (EFAULT);
5309 
5310 	kstatus = spcs_s_kcreate();
5311 	if (kstatus == NULL)
5312 		return (ENOMEM);
5313 
5314 	if (!ubitsset.shadow_vol[0])
5315 		return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
5316 		    DSW_EEMPTY));
5317 
5318 	mutex_enter(&_ii_info_mutex);
5319 	ip = _ii_find_set(ubitsset.shadow_vol);
5320 	mutex_exit(&_ii_info_mutex);
5321 	if (ip == NULL)
5322 		return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
5323 		    DSW_ENOTFOUND));
5324 
5325 	mutex_exit(&ip->bi_mutex);
5326 
5327 	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
5328 		mutex_enter(&ip->bi_mutex);
5329 		_ii_ioctl_done(ip);
5330 		mutex_exit(&ip->bi_mutex);
5331 		spcs_s_add(kstatus, rc);
5332 		return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
5333 		    DSW_ERSRVFAIL));
5334 	}
5335 
5336 	ubitsset.tot_size = ip->bi_size / DSW_SIZE;
5337 	if ((ip->bi_size % DSW_SIZE) != 0)
5338 		++ubitsset.tot_size;
5339 	bitmap_size = (ubitsset.tot_size + 7) / 8;
5340 	if (cmd == DSWIOC_SBITSSET)
5341 		rc = II_CNT_BITS(ip, ip->bi_shdfba, &nbitsset, bitmap_size);
5342 	else
5343 		rc = II_CNT_BITS(ip, ip->bi_copyfba, &nbitsset, bitmap_size);
5344 	ubitsset.tot_set = nbitsset;
5345 	_ii_rlse_devs(ip, BMP);
5346 	mutex_enter(&ip->bi_mutex);
5347 	_ii_ioctl_done(ip);
5348 	mutex_exit(&ip->bi_mutex);
5349 	if (rc) {
5350 		spcs_s_add(kstatus, rc);
5351 		return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, DSW_EIO));
5352 	}
5353 
5354 	spcs_s_kfree(kstatus);
5355 	/* return the fetched names to the user */
5356 	if (ilp32) {
5357 		ubitsset32.status = (spcs_s_info32_t)ubitsset.status;
5358 		ubitsset32.tot_size = ubitsset.tot_size;
5359 		ubitsset32.tot_set = ubitsset.tot_set;
5360 		rc = copyout(&ubitsset32, (void *)arg, sizeof (ubitsset32));
5361 	} else {
5362 		rc = copyout(&ubitsset, (void *)arg, sizeof (ubitsset));
5363 	}
5364 
5365 	return (rc);
5366 }
5367 
5368 /*
5369  * _ii_stopvol
5370  *	Stop any copying process for shadow, and stop shadowing
5371  *
5372  */
5373 
5374 static void
5375 _ii_stopvol(_ii_info_t *ip)
5376 {
5377 	nsc_path_t *mst_tok;
5378 	nsc_path_t *mstr_tok;
5379 	nsc_path_t *shd_tok;
5380 	nsc_path_t *shdr_tok;
5381 	nsc_path_t *bmp_tok;
5382 	int rc;
5383 
5384 	while (_ii_stopcopy(ip) == EINTR)
5385 		;
5386 
5387 	DTRACE_PROBE(_ii_stopvol);
5388 
5389 	mutex_enter(&ip->bi_mutex);
5390 	mst_tok = ip->bi_mst_tok;
5391 	mstr_tok = ip->bi_mstr_tok;
5392 	shd_tok = ip->bi_shd_tok;
5393 	shdr_tok = ip->bi_shdr_tok;
5394 	bmp_tok = ip->bi_bmp_tok;
5395 	ip->bi_shd_tok = 0;
5396 	ip->bi_shdr_tok = 0;
5397 	if (!NSHADOWS(ip)) {
5398 		ip->bi_mst_tok = 0;
5399 		ip->bi_mstr_tok = 0;
5400 	}
5401 	ip->bi_bmp_tok = 0;
5402 
5403 	/* Wait for any _ii_open() calls to complete */
5404 
5405 	while (ip->bi_ioctl) {
5406 		ip->bi_state |= DSW_IOCTL;
5407 		cv_wait(&ip->bi_ioctlcv, &ip->bi_mutex);
5408 	}
5409 	mutex_exit(&ip->bi_mutex);
5410 
5411 	rc = _ii_reserve_begin(ip);
5412 	if (rc) {
5413 		cmn_err(CE_WARN, "!_ii_stopvol: _ii_reserve_begin %d", rc);
5414 	}
5415 	if (!NSHADOWS(ip)) {
5416 		if (mst_tok) {
5417 			rc = _ii_unregister_path(mst_tok, NSC_PCATCH,
5418 			    "master");
5419 			if (rc)
5420 				cmn_err(CE_WARN, "!ii: unregister master %d",
5421 				    rc);
5422 		}
5423 
5424 		if (mstr_tok) {
5425 			rc = _ii_unregister_path(mstr_tok, NSC_PCATCH,
5426 			    "raw master");
5427 			if (rc)
5428 				cmn_err(CE_WARN, "!ii: unregister raw "
5429 				    "master %d", rc);
5430 		}
5431 	}
5432 
5433 	if (shd_tok) {
5434 		rc = _ii_unregister_path(shd_tok, NSC_PCATCH, "shadow");
5435 		if (rc)
5436 			cmn_err(CE_WARN, "!ii: unregister shadow %d", rc);
5437 	}
5438 
5439 	if (shdr_tok) {
5440 		rc = _ii_unregister_path(shdr_tok, NSC_PCATCH, "raw shadow");
5441 		if (rc)
5442 			cmn_err(CE_WARN, "!ii: unregister raw shadow %d", rc);
5443 	}
5444 
5445 	if (bmp_tok) {
5446 		rc = _ii_unregister_path(bmp_tok, NSC_PCATCH, "bitmap");
5447 		if (rc)
5448 			cmn_err(CE_WARN, "!ii: unregister bitmap %d", rc);
5449 	}
5450 	_ii_reserve_end(ip);
5451 
5452 	/* Wait for all necessary _ii_close() calls to complete */
5453 	mutex_enter(&ip->bi_mutex);
5454 
5455 	while (total_ref(ip) != 0) {
5456 		ip->bi_state |= DSW_CLOSING;
5457 		cv_wait(&ip->bi_closingcv, &ip->bi_mutex);
5458 	}
5459 	if (!NSHADOWS(ip)) {
5460 		nsc_set_owner(ip->bi_mstfd, NULL);
5461 		nsc_set_owner(ip->bi_mstrfd, NULL);
5462 	}
5463 	nsc_set_owner(ip->bi_shdfd, NULL);
5464 	nsc_set_owner(ip->bi_shdrfd, NULL);
5465 	mutex_exit(&ip->bi_mutex);
5466 
5467 }
5468 
5469 
5470 /*
5471  * _ii_ioctl_done
5472  *	If this is the last one to complete, wakeup all processes waiting
5473  *	for ioctls to complete
5474  *
5475  */
5476 
5477 static void
5478 _ii_ioctl_done(_ii_info_t *ip)
5479 {
5480 	ASSERT(ip->bi_ioctl > 0);
5481 	ip->bi_ioctl--;
5482 	if (ip->bi_ioctl == 0 && (ip->bi_state & DSW_IOCTL)) {
5483 		ip->bi_state &= ~DSW_IOCTL;
5484 		cv_broadcast(&ip->bi_ioctlcv);
5485 	}
5486 
5487 }
5488 
5489 /*
5490  * _ii_find_vol
5491  *	Search the configured shadows list for the supplied volume.
5492  *	If found, flag an ioctl in progress and return the locked _ii_info_t.
5493  *
5494  *	The caller must check to see if the bi_disable flag is set and
5495  *	treat it appropriately.
5496  *
5497  * ASSUMPTION:
5498  *	_ii_info_mutex must be locked prior to calling this function
5499  *
5500  */
5501 
5502 static _ii_info_t *
5503 _ii_find_vol(char *volume, int vol)
5504 {
5505 	_ii_info_t **xip, *ip;
5506 
5507 	for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
5508 		if ((*xip)->bi_disabled)
5509 			continue;
5510 		if (strcmp(volume, vol == MST ? ii_pathname((*xip)->bi_mstfd) :
5511 		    (*xip)->bi_keyname) == 0) {
5512 			break;
5513 		}
5514 	}
5515 
5516 	if (!*xip) {
5517 		DTRACE_PROBE(VolNotFound);
5518 		return (NULL);
5519 	}
5520 
5521 	ip = *xip;
5522 	if (!ip->bi_shd_tok && ((ip->bi_flags & DSW_SHDEXPORT) == 0)) {
5523 		/* Not fully configured until bi_shd_tok is set */
5524 		DTRACE_PROBE(SetNotConfiged);
5525 		return (NULL);
5526 
5527 	}
5528 	mutex_enter(&ip->bi_mutex);
5529 	ip->bi_ioctl++;
5530 
5531 	return (ip);
5532 }
5533 
5534 static _ii_info_t *
5535 _ii_find_set(char *volume)
5536 {
5537 	return (_ii_find_vol(volume, SHD));
5538 }
5539 
5540 /*
5541  * _ii_find_overflow
5542  *	Search the configured shadows list for the supplied overflow volume.
5543  *
5544  */
5545 
5546 static _ii_overflow_t *
5547 _ii_find_overflow(char *volume)
5548 {
5549 	_ii_overflow_t **xop, *op;
5550 
5551 	mutex_enter(&_ii_overflow_mutex);
5552 
5553 	DTRACE_PROBE(_ii_find_overflowmutex);
5554 
5555 	for (xop = &_ii_overflow_top; *xop; xop = &(*xop)->ii_next) {
5556 		if (strcmp(volume, (*xop)->ii_volname) == 0) {
5557 			break;
5558 		}
5559 	}
5560 
5561 	if (!*xop) {
5562 		mutex_exit(&_ii_overflow_mutex);
5563 		return (NULL);
5564 	}
5565 
5566 	op = *xop;
5567 	mutex_exit(&_ii_overflow_mutex);
5568 
5569 	return (op);
5570 }
5571 
5572 /*
5573  * _ii_bm_header_get
5574  *	Fetch the bitmap volume header
5575  *
5576  */
5577 
5578 ii_header_t *
5579 _ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp)
5580 {
5581 	ii_header_t *hdr;
5582 	nsc_off_t read_fba;
5583 	int rc;
5584 
5585 	ASSERT(ip->bi_bmprsrv);		/* assert bitmap is reserved */
5586 	ASSERT(MUTEX_HELD(&ip->bi_mutex));
5587 
5588 	if ((ip->bi_flags & DSW_BMPOFFLINE) != 0)
5589 		return (NULL);
5590 
5591 	*tmp = NULL;
5592 	read_fba = 0;
5593 
5594 	II_READ_START(ip, bitmap);
5595 	rc = nsc_alloc_buf(ip->bi_bmpfd, read_fba,
5596 	    FBA_LEN(sizeof (ii_header_t)), NSC_RDWRBUF, tmp);
5597 	II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t)));
5598 	if (!II_SUCCESS(rc)) {
5599 		if (ii_debug > 2)
5600 			cmn_err(CE_WARN, "!ii: nsc_alloc_buf returned 0x%x",
5601 			    rc);
5602 		if (*tmp)
5603 			(void) nsc_free_buf(*tmp);
5604 		*tmp = NULL;
5605 		mutex_exit(&ip->bi_mutex);
5606 		_ii_error(ip, DSW_BMPOFFLINE);
5607 		mutex_enter(&ip->bi_mutex);
5608 		return (NULL);
5609 	}
5610 
5611 	hdr = (ii_header_t *)(*tmp)->sb_vec[0].sv_addr;
5612 
5613 	return (hdr);
5614 }
5615 
5616 
5617 /*
5618  * _ii_bm_header_free
5619  *	Free the bitmap volume header
5620  *
5621  */
5622 
5623 /* ARGSUSED */
5624 
5625 void
5626 _ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp)
5627 {
5628 	(void) nsc_free_buf(tmp);
5629 
5630 }
5631 
5632 /*
5633  * _ii_bm_header_put
5634  *	Write out the modified bitmap volume header and free it
5635  *
5636  */
5637 
5638 /* ARGSUSED */
5639 
5640 int
5641 _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp)
5642 {
5643 	nsc_off_t write_fba;
5644 	int rc;
5645 
5646 	ASSERT(MUTEX_HELD(&ip->bi_mutex));
5647 
5648 	write_fba = 0;
5649 
5650 	II_NSC_WRITE(ip, bitmap, rc, tmp, write_fba,
5651 	    FBA_LEN(sizeof (ii_header_t)), 0);
5652 
5653 	(void) nsc_free_buf(tmp);
5654 	if (!II_SUCCESS(rc)) {
5655 		mutex_exit(&ip->bi_mutex);
5656 		_ii_error(ip, DSW_BMPOFFLINE);
5657 		mutex_enter(&ip->bi_mutex);
5658 		DTRACE_PROBE(_ii_bm_header_put);
5659 		return (rc);
5660 	} else {
5661 		DTRACE_PROBE(_ii_bm_header_put_end);
5662 		return (0);
5663 	}
5664 }
5665 
5666 /*
5667  * _ii_flag_op
5668  *	Clear or set a flag in bi_flags and dsw_state.
5669  *	This relies on the ownership of the header block's nsc_buf
5670  *	for locking.
5671  *
5672  */
5673 
5674 void
5675 _ii_flag_op(and, or, ip, update)
5676 int	and, or;
5677 _ii_info_t *ip;
5678 int update;
5679 {
5680 	ii_header_t *bm_header;
5681 	nsc_buf_t *tmp;
5682 
5683 	ip->bi_flags &= and;
5684 	ip->bi_flags |= or;
5685 
5686 	if (update == TRUE) {
5687 
5688 		/*
5689 		 * No point trying to access bitmap header if it's offline
5690 		 * or has been disassociated from set via DSW_HANGING
5691 		 */
5692 		if ((ip->bi_flags & (DSW_BMPOFFLINE|DSW_HANGING)) == 0) {
5693 			bm_header = _ii_bm_header_get(ip, &tmp);
5694 			if (bm_header == NULL) {
5695 				if (tmp)
5696 					(void) nsc_free_buf(tmp);
5697 				DTRACE_PROBE(_ii_flag_op_end);
5698 				return;
5699 			}
5700 			bm_header->ii_state &= and;
5701 			bm_header->ii_state |= or;
5702 			/* copy over the mtime */
5703 			bm_header->ii_mtime = ip->bi_mtime;
5704 			(void) _ii_bm_header_put(bm_header, ip, tmp);
5705 		}
5706 	}
5707 
5708 }
5709 
5710 /*
5711  * _ii_nsc_io
5712  *	Perform read or write on an underlying nsc device
5713  * fd		- nsc file descriptor
5714  * flag		- nsc io direction and characteristics flag
5715  * fba_pos	- offset from beginning of device in FBAs
5716  * io_addr	- pointer to data buffer
5717  * io_len	- length of io in bytes
5718  */
5719 
5720 int
5721 _ii_nsc_io(_ii_info_t *ip, int ks, nsc_fd_t *fd, int flag, nsc_off_t fba_pos,
5722     unsigned char *io_addr, nsc_size_t io_len)
5723 {
5724 	nsc_buf_t *tmp = NULL;
5725 	nsc_vec_t *vecp;
5726 	uchar_t	*vaddr;
5727 	size_t	copy_len;
5728 	int64_t	vlen;
5729 	int	rc;
5730 	nsc_size_t	fba_req, fba_len;
5731 	nsc_size_t	maxfbas = 0;
5732 	nsc_size_t	tocopy;
5733 	unsigned char *toaddr;
5734 
5735 	rc = nsc_maxfbas(fd, 0, &maxfbas);
5736 	if (!II_SUCCESS(rc)) {
5737 #ifdef DEBUG
5738 		cmn_err(CE_WARN, "!_ii_nsc_io: maxfbas failed (%d)", rc);
5739 #endif
5740 		maxfbas = DSW_CBLK_FBA;
5741 	}
5742 
5743 	toaddr = io_addr;
5744 	fba_req = FBA_LEN(io_len);
5745 
5746 #ifdef DEBUG_SPLIT_IO
5747 	cmn_err(CE_NOTE, "!_ii_nsc_io: maxfbas = %08x", maxfbas);
5748 	cmn_err(CE_NOTE, "!_ii_nsc_io: toaddr=%08x, io_len=%08x, fba_req=%08x",
5749 	    toaddr, io_len, fba_req);
5750 #endif
5751 
5752 loop:
5753 	tmp = NULL;
5754 	fba_len = min(fba_req, maxfbas);
5755 	tocopy = min(io_len, FBA_SIZE(fba_len));
5756 
5757 	DTRACE_PROBE2(_ii_nsc_io_buffer, nsc_off_t, fba_pos,
5758 	    nsc_size_t, fba_len);
5759 
5760 #ifdef DEBUG_SPLIT_IO
5761 	cmn_err(CE_NOTE, "!_ii_nsc_io: fba_pos=%08x, fba_len=%08x",
5762 	    fba_pos, fba_len);
5763 #endif
5764 
5765 #ifndef DISABLE_KSTATS
5766 	if (flag & NSC_READ) {
5767 		switch (ks) {
5768 		case KS_MST:
5769 			II_READ_START(ip, master);
5770 			break;
5771 		case KS_SHD:
5772 			II_READ_START(ip, shadow);
5773 			break;
5774 		case KS_BMP:
5775 			II_READ_START(ip, bitmap);
5776 			break;
5777 		case KS_OVR:
5778 			II_READ_START(ip, overflow);
5779 			break;
5780 		default:
5781 			cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
5782 			break;
5783 		}
5784 	}
5785 #endif
5786 
5787 	rc = nsc_alloc_buf(fd, fba_pos, fba_len, flag, &tmp);
5788 
5789 #ifndef DISABLE_KSTATS
5790 	if (flag & NSC_READ) {
5791 		switch (ks) {
5792 		case KS_MST:
5793 			II_READ_END(ip, master, rc, fba_len);
5794 			break;
5795 		case KS_SHD:
5796 			II_READ_END(ip, shadow, rc, fba_len);
5797 			break;
5798 		case KS_BMP:
5799 			II_READ_END(ip, bitmap, rc, fba_len);
5800 			break;
5801 		case KS_OVR:
5802 			II_READ_END(ip, overflow, rc, fba_len);
5803 			break;
5804 		}
5805 	}
5806 #endif
5807 
5808 	if (!II_SUCCESS(rc)) {
5809 		if (tmp) {
5810 			(void) nsc_free_buf(tmp);
5811 		}
5812 
5813 		return (EIO);
5814 	}
5815 
5816 	if ((flag & (NSC_WRITE|NSC_READ)) == NSC_WRITE &&
5817 	    (FBA_OFF(io_len) != 0)) {
5818 		/*
5819 		 * Not overwriting all of the last FBA, so read in the
5820 		 * old contents now before we overwrite it with the new
5821 		 * data.
5822 		 */
5823 #ifdef DEBUG_SPLIT_IO
5824 		cmn_err(CE_NOTE, "!_ii_nsc_io: Read-B4-Write %08x",
5825 		    fba_pos+FBA_NUM(io_len));
5826 #endif
5827 
5828 #ifdef DISABLE_KSTATS
5829 		rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
5830 #else
5831 		switch (ks) {
5832 		case KS_MST:
5833 			II_NSC_READ(ip, master, rc, tmp,
5834 			    fba_pos+FBA_NUM(io_len), 1, 0);
5835 			break;
5836 		case KS_SHD:
5837 			II_NSC_READ(ip, shadow, rc, tmp,
5838 			    fba_pos+FBA_NUM(io_len), 1, 0);
5839 			break;
5840 		case KS_BMP:
5841 			II_NSC_READ(ip, bitmap, rc, tmp,
5842 			    fba_pos+FBA_NUM(io_len), 1, 0);
5843 			break;
5844 		case KS_OVR:
5845 			II_NSC_READ(ip, overflow, rc, tmp,
5846 			    fba_pos+FBA_NUM(io_len), 1, 0);
5847 			break;
5848 		case KS_NA:
5849 			rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
5850 			break;
5851 		default:
5852 			cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
5853 			rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
5854 			break;
5855 		}
5856 #endif
5857 		if (!II_SUCCESS(rc)) {
5858 			(void) nsc_free_buf(tmp);
5859 			return (EIO);
5860 		}
5861 	}
5862 
5863 	vecp = tmp->sb_vec;
5864 	vlen = vecp->sv_len;
5865 	vaddr = vecp->sv_addr;
5866 
5867 	while (tocopy > 0) {
5868 		if (vecp->sv_addr == 0 || vecp->sv_len == 0) {
5869 #ifdef DEBUG
5870 			cmn_err(CE_WARN, "!_ii_nsc_io: ran off end of handle");
5871 #endif
5872 			break;
5873 		}
5874 
5875 		copy_len = (size_t)min(vlen, tocopy);
5876 
5877 		DTRACE_PROBE1(_ii_nsc_io_bcopy, size_t, copy_len);
5878 
5879 		if (flag & NSC_WRITE)
5880 			bcopy(io_addr, vaddr, copy_len);
5881 		else
5882 			bcopy(vaddr, io_addr, copy_len);
5883 
5884 		toaddr += copy_len;
5885 		tocopy -= copy_len;
5886 		io_addr += copy_len;
5887 		io_len -= copy_len;
5888 		vaddr += copy_len;
5889 		vlen -= copy_len;
5890 
5891 		if (vlen <= 0) {
5892 			vecp++;
5893 			vaddr = vecp->sv_addr;
5894 			vlen = vecp->sv_len;
5895 		}
5896 	}
5897 
5898 	if (flag & NSC_WRITE) {
5899 #ifdef DISABLE_KSTATS
5900 		rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
5901 #else
5902 		switch (ks) {
5903 		case KS_MST:
5904 			II_NSC_WRITE(ip, master, rc, tmp, tmp->sb_pos,
5905 			    tmp->sb_len, 0);
5906 			break;
5907 		case KS_SHD:
5908 			II_NSC_WRITE(ip, shadow, rc, tmp, tmp->sb_pos,
5909 			    tmp->sb_len, 0);
5910 			break;
5911 		case KS_BMP:
5912 			II_NSC_WRITE(ip, bitmap, rc, tmp, tmp->sb_pos,
5913 			    tmp->sb_len, 0);
5914 			break;
5915 		case KS_OVR:
5916 			II_NSC_WRITE(ip, overflow, rc, tmp, tmp->sb_pos,
5917 			    tmp->sb_len, 0);
5918 			break;
5919 		case KS_NA:
5920 			rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
5921 			break;
5922 		default:
5923 			cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
5924 			rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
5925 			break;
5926 		}
5927 #endif
5928 		if (!II_SUCCESS(rc)) {
5929 			(void) nsc_free_buf(tmp);
5930 			return (rc);
5931 		}
5932 	}
5933 
5934 	(void) nsc_free_buf(tmp);
5935 
5936 	fba_pos += fba_len;
5937 	fba_req -= fba_len;
5938 	if (fba_req > 0)
5939 		goto loop;
5940 
5941 	return (0);
5942 }
5943 
5944 
5945 /*
5946  * ii_overflow_attach
5947  */
5948 static int
5949 ii_overflow_attach(_ii_info_t *ip, char *name, int first)
5950 {
5951 	_ii_overflow_t *op;
5952 	int rc = 0;
5953 	int reserved = 0;
5954 	int mutex_set = 0;
5955 	int II_OLD_OMAGIC = 0x426c7565; /* "Blue" */
5956 
5957 	mutex_enter(&_ii_overflow_mutex);
5958 	/* search for name in list */
5959 	for (op = _ii_overflow_top; op; op = op->ii_next) {
5960 		if (strncmp(op->ii_volname, name, DSW_NAMELEN) == 0)
5961 			break;
5962 	}
5963 	if (op) {
5964 		ip->bi_overflow = op;
5965 		op->ii_crefcnt++;
5966 		op->ii_drefcnt++;
5967 		if ((op->ii_flags & IIO_CNTR_INVLD) && (op->ii_hversion >= 1)) {
5968 			if (!first)
5969 				mutex_enter(&ip->bi_mutex);
5970 			ip->bi_flags |= DSW_OVRHDRDRTY;
5971 			if (!first)
5972 				mutex_exit(&ip->bi_mutex);
5973 			op->ii_urefcnt++;
5974 		}
5975 #ifndef DISABLE_KSTATS
5976 		ip->bi_kstat_io.overflow = op->ii_overflow;
5977 		(void) strlcpy(ip->bi_kstat_io.ovrio, op->ii_ioname,
5978 		    KSTAT_DATA_CHAR_LEN);
5979 #endif
5980 		/* write header */
5981 		if (!(rc = nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI))) {
5982 			rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd,
5983 			    NSC_WRBUF, II_OHEADER_FBA,
5984 			    (unsigned char *)&op->ii_do, sizeof (op->ii_do));
5985 			(void) nsc_release(op->ii_dev->bi_fd);
5986 			++iigkstat.assoc_over.value.ul;
5987 		}
5988 		mutex_exit(&_ii_overflow_mutex);
5989 		return (rc);
5990 	}
5991 	if ((op = kmem_zalloc(sizeof (*op), KM_SLEEP)) == NULL) {
5992 		mutex_exit(&_ii_overflow_mutex);
5993 		return (ENOMEM);
5994 	}
5995 	if ((op->ii_dev = kmem_zalloc(sizeof (_ii_info_dev_t), KM_SLEEP))
5996 	    == NULL) {
5997 		kmem_free(op, sizeof (*op));
5998 		mutex_exit(&_ii_overflow_mutex);
5999 		return (ENOMEM);
6000 	}
6001 #ifndef DISABLE_KSTATS
6002 	if ((op->ii_overflow = _ii_overflow_kstat_create(ip, op))) {
6003 		ip->bi_kstat_io.overflow = op->ii_overflow;
6004 		(void) strlcpy(op->ii_ioname, ip->bi_kstat_io.ovrio,
6005 		    KSTAT_DATA_CHAR_LEN);
6006 	} else {
6007 		goto fail;
6008 	}
6009 #endif
6010 	/* open overflow volume */
6011 	op->ii_dev->bi_fd = nsc_open(name, NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL,
6012 	    (blind_t)&(op->ii_dev->bi_iodev), &rc);
6013 	if (!op->ii_dev->bi_fd)
6014 		op->ii_dev->bi_fd = nsc_open(name,
6015 		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL,
6016 		    (blind_t)&(op->ii_dev->bi_iodev), &rc);
6017 	if (op->ii_dev->bi_fd == NULL) {
6018 		goto fail;
6019 	}
6020 	if ((rc = nsc_reserve(op->ii_dev->bi_fd, 0)) != 0)
6021 		goto fail;
6022 	reserved = 1;
6023 	/* register path */
6024 	op->ii_dev->bi_tok = _ii_register_path(name, NSC_DEVICE,
6025 	    _ii_ior);
6026 	if (!op->ii_dev->bi_tok) {
6027 		goto fail;
6028 	}
6029 	/* read header */
6030 	rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_RDBUF,
6031 	    II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do));
6032 	if (!II_SUCCESS(rc)) {
6033 		_ii_error(ip, DSW_OVROFFLINE);
6034 		goto fail;
6035 	}
6036 	/* On resume, check for old hmagic */
6037 	if (strncmp(op->ii_volname, name, DSW_NAMELEN) ||
6038 	    ((op->ii_hmagic != II_OLD_OMAGIC) &&
6039 	    (op->ii_hmagic != II_OMAGIC))) {
6040 		rc = DSW_EOMAGIC;
6041 		goto fail;
6042 	}
6043 	/* set up counts */
6044 	op->ii_crefcnt = 1;
6045 	op->ii_drefcnt = 0;
6046 	op->ii_urefcnt = 0;
6047 	op->ii_hmagic = II_OMAGIC;
6048 	if (!first) {
6049 		/* if header version > 0, check if header written */
6050 		if (((op->ii_flags & IIO_HDR_WRTN) == 0) &&
6051 		    (op->ii_hversion >= 1)) {
6052 			op->ii_flags |= IIO_CNTR_INVLD;
6053 			mutex_enter(&ip->bi_mutex);
6054 			ip->bi_flags |= DSW_OVRHDRDRTY;
6055 			mutex_exit(&ip->bi_mutex);
6056 			op->ii_urefcnt++;
6057 		}
6058 	}
6059 	op->ii_flags &= ~IIO_HDR_WRTN;
6060 	op->ii_drefcnt++;
6061 	/* write header */
6062 	rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF,
6063 	    II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do));
6064 	nsc_release(op->ii_dev->bi_fd);
6065 	reserved = 0;
6066 	if (!II_SUCCESS(rc)) {
6067 		_ii_error(ip, DSW_OVROFFLINE);
6068 		goto fail;
6069 	}
6070 
6071 	mutex_init(&op->ii_mutex, NULL, MUTEX_DRIVER, NULL);
6072 	mutex_set++;
6073 
6074 	/* link onto list */
6075 	op->ii_next = _ii_overflow_top;
6076 	_ii_overflow_top = op;
6077 	ip->bi_overflow = op;
6078 
6079 	++iigkstat.assoc_over.value.ul;
6080 	mutex_exit(&_ii_overflow_mutex);
6081 
6082 	DTRACE_PROBE(_ii_overflow_attach_end);
6083 	return (0);
6084 fail:
6085 #ifndef DISABLE_KSTATS
6086 	/* Clean-up kstat stuff */
6087 	if (op->ii_overflow) {
6088 		kstat_delete(op->ii_overflow);
6089 		mutex_destroy(&op->ii_kstat_mutex);
6090 	}
6091 #endif
6092 	/* clean up mutex if we made it that far */
6093 	if (mutex_set) {
6094 		mutex_destroy(&op->ii_mutex);
6095 	}
6096 
6097 	if (op->ii_dev) {
6098 		if (op->ii_dev->bi_tok) {
6099 			(void) _ii_unregister_path(op->ii_dev->bi_tok, 0,
6100 			    "overflow");
6101 		}
6102 		if (reserved)
6103 			(void) nsc_release(op->ii_dev->bi_fd);
6104 		if (op->ii_dev->bi_fd)
6105 			(void) nsc_close(op->ii_dev->bi_fd);
6106 		kmem_free(op->ii_dev, sizeof (_ii_info_dev_t));
6107 	}
6108 	kmem_free(op, sizeof (*op));
6109 	mutex_exit(&_ii_overflow_mutex);
6110 
6111 	return (rc);
6112 }
6113 
6114 /*
6115  * ii_overflow_free
6116  * Assumes that ip is locked for I/O
6117  */
6118 static void
6119 ii_overflow_free(_ii_info_t *ip, int reclaim)
6120 {
6121 	_ii_overflow_t *op, **xp;
6122 
6123 	if ((op = ip->bi_overflow) == NULL)
6124 		return;
6125 	ip->bi_kstat_io.overflow = NULL;
6126 	mutex_enter(&_ii_overflow_mutex);
6127 	switch (reclaim) {
6128 	case NO_RECLAIM:
6129 		if (--(op->ii_drefcnt) == 0) {
6130 			/* indicate header written */
6131 			op->ii_flags |= IIO_HDR_WRTN;
6132 			/* write out header */
6133 			ASSERT(op->ii_dev->bi_fd);
6134 			(void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI);
6135 			(void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd,
6136 			    NSC_WRBUF, II_OHEADER_FBA,
6137 			    (unsigned char *)&op->ii_do,
6138 			    sizeof (op->ii_do));
6139 			nsc_release(op->ii_dev->bi_fd);
6140 		}
6141 		break;
6142 	case RECLAIM:
6143 		ii_reclaim_overflow(ip);
6144 		/* FALLTHRU */
6145 	case INIT_OVR:
6146 		if (--(op->ii_drefcnt) == 0) {
6147 			/* reset to new condition, c.f. _ii_ocreate() */
6148 			op->ii_used = 1;
6149 			op->ii_unused = op->ii_nchunks - op->ii_used;
6150 			op->ii_freehead = II_NULLNODE;
6151 		}
6152 
6153 		/* write out header */
6154 		ASSERT(op->ii_dev->bi_fd);
6155 		(void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI);
6156 		(void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF,
6157 		    II_OHEADER_FBA, (unsigned char *)&op->ii_do,
6158 		    sizeof (op->ii_do));
6159 		nsc_release(op->ii_dev->bi_fd);
6160 	}
6161 
6162 	if (--(op->ii_crefcnt) == 0) {
6163 		/* Close fd and unlink from active chain; */
6164 
6165 		(void) _ii_unregister_path(op->ii_dev->bi_tok, 0, "overflow");
6166 		(void) nsc_close(op->ii_dev->bi_fd);
6167 
6168 		for (xp = &_ii_overflow_top; *xp && *xp != op;
6169 		    xp = &((*xp)->ii_next))
6170 			/* NULL statement */;
6171 		*xp = op->ii_next;
6172 
6173 		if (op->ii_overflow) {
6174 			kstat_delete(op->ii_overflow);
6175 		}
6176 
6177 		/* Clean up ii_overflow_t mutexs */
6178 		mutex_destroy(&op->ii_kstat_mutex);
6179 		mutex_destroy(&op->ii_mutex);
6180 
6181 		if (op->ii_dev)
6182 			kmem_free(op->ii_dev, sizeof (_ii_info_dev_t));
6183 		kmem_free(op, sizeof (*op));
6184 	}
6185 	ip->bi_overflow = NULL;
6186 	--iigkstat.assoc_over.value.ul;
6187 	mutex_exit(&_ii_overflow_mutex);
6188 
6189 }
6190 
6191 /*
6192  * ii_sibling_free
6193  *	Free resources and unlink the sibling chains etc.
6194  */
6195 
6196 static void
6197 ii_sibling_free(_ii_info_t *ip)
6198 {
6199 	_ii_info_t *hip, *yip;
6200 
6201 	if (!ip)
6202 		return;
6203 
6204 	if (ip->bi_shdr_tok)
6205 		(void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow");
6206 
6207 	if (ip->bi_shd_tok)
6208 		(void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
6209 
6210 	rw_enter(&ip->bi_linkrw, RW_WRITER);
6211 
6212 	ip->bi_shd_tok = NULL;
6213 	ip->bi_shdr_tok = NULL;
6214 
6215 	if (NSHADOWS(ip)) {
6216 		mutex_enter(&_ii_info_mutex);
6217 		if (ip->bi_head == ip) {	/* removing head of list */
6218 			hip = ip->bi_sibling;
6219 			for (yip = hip; yip; yip = yip->bi_sibling)
6220 				yip->bi_head = hip;
6221 
6222 		} else {		/* removing member of list */
6223 			hip = ip->bi_head;
6224 			for (yip = ip->bi_head; yip; yip = yip->bi_sibling) {
6225 				if (yip->bi_sibling == ip) {
6226 					yip->bi_sibling = ip->bi_sibling;
6227 					break;
6228 				}
6229 			}
6230 		}
6231 		hip->bi_master->bi_head = hip;
6232 		if (ip->bi_master == ip) {    /* master I/O goes through this */
6233 			mutex_exit(&_ii_info_mutex);
6234 			_ii_info_freeshd(ip);
6235 			rw_exit(&ip->bi_linkrw);
6236 			return;
6237 		}
6238 		mutex_exit(&_ii_info_mutex);
6239 	} else {
6240 		if (ip->bi_master != ip)	/* last ref to master side ip */
6241 			_ii_info_free(ip->bi_master);	/* ==A== */
6242 	}
6243 
6244 	if (ip->bi_master != ip) {	/* info_free ==A== will close these */
6245 		/*
6246 		 * Null out any pointers to shared master side resources
6247 		 * that should only be freed once when the last reference
6248 		 * to this master is freed and calls _ii_info_free().
6249 		 */
6250 		ip->bi_mstdev = NULL;
6251 		ip->bi_mstrdev = NULL;
6252 		ip->bi_kstat_io.master = NULL;
6253 	}
6254 	rw_exit(&ip->bi_linkrw);
6255 	_ii_info_free(ip);
6256 
6257 }
6258 
6259 /*
6260  * _ii_info_freeshd
6261  *	Free shadow side resources
6262  *
6263  * Calling/Exit State:
6264  *	No mutexes should be held on entry to this function.
6265  *
6266  * Description:
6267  *	Frees the system resources associated with the shadow
6268  *	access, leaving the master side alone. This allows the
6269  *	original master side to continue in use while there are
6270  *	outstanding references to this _ii_info_t.
6271  */
6272 
6273 static void
6274 _ii_info_freeshd(_ii_info_t *ip)
6275 {
6276 	if (!ip)
6277 		return;
6278 	if ((ip->bi_flags&DSW_HANGING) == DSW_HANGING)
6279 		return;		/* this work has already been completed */
6280 
6281 	II_FLAG_SETX(DSW_HANGING, ip);
6282 
6283 	if (ip->bi_cluster)
6284 		(void) II_UNLINK_CLUSTER(ip);
6285 	if (ip->bi_group)
6286 		(void) II_UNLINK_GROUP(ip);
6287 
6288 	if (ip->bi_shdfd && ip->bi_shdrsrv)
6289 		nsc_release(ip->bi_shdfd);
6290 	if (ip->bi_shdrfd && ip->bi_shdrrsrv)
6291 		nsc_release(ip->bi_shdrfd);
6292 	if (ip->bi_bmpfd && ip->bi_bmprsrv)
6293 		nsc_release(ip->bi_bmpfd);
6294 
6295 	if (ip->bi_bmp_tok)
6296 		(void) _ii_unregister_path(ip->bi_bmp_tok, 0, "bitmap");
6297 
6298 	if (ip->bi_shdr_tok)
6299 		(void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow");
6300 
6301 	if (ip->bi_shd_tok)
6302 		(void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
6303 	ip->bi_shd_tok = NULL;
6304 	ip->bi_shdr_tok = NULL;
6305 
6306 	if (ip->bi_shdfd)
6307 		(void) nsc_close(ip->bi_shdfd);
6308 
6309 	if (ip->bi_shdrfd)
6310 		(void) nsc_close(ip->bi_shdrfd);
6311 
6312 	if (ip->bi_bmpfd)
6313 		(void) nsc_close(ip->bi_bmpfd);
6314 
6315 	ip->bi_shdfd = NULL;
6316 	ip->bi_shdrfd = NULL;
6317 	ip->bi_bmpfd = NULL;
6318 
6319 	if (ip->bi_busy)
6320 		kmem_free(ip->bi_busy,
6321 		    1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)));
6322 	ip->bi_busy = NULL;
6323 
6324 	if (ip->bi_kstat_io.shadow) {
6325 		kstat_delete(ip->bi_kstat_io.shadow);
6326 		ip->bi_kstat_io.shadow = NULL;
6327 	}
6328 	if (ip->bi_kstat_io.bitmap) {
6329 		kstat_delete(ip->bi_kstat_io.bitmap);
6330 		ip->bi_kstat_io.bitmap = NULL;
6331 	}
6332 	if (ip->bi_kstat) {
6333 		kstat_delete(ip->bi_kstat);
6334 		ip->bi_kstat = NULL;
6335 	}
6336 
6337 }
6338 
6339 /*
6340  * _ii_info_free
6341  *	Free resources
6342  *
6343  * Calling/Exit State:
6344  *	No mutexes should be held on entry to this function.
6345  *
6346  * Description:
6347  *	Frees the system resources associated with the specified
6348  *	II information structure.
6349  */
6350 
6351 static void
6352 _ii_info_free(_ii_info_t *ip)
6353 {
6354 	_ii_info_t **xip;
6355 
6356 	if (!ip)
6357 		return;
6358 
6359 	mutex_enter(&_ii_info_mutex);
6360 	for (xip = &_ii_mst_top; *xip; xip = &((*xip)->bi_nextmst)) {
6361 		if (ip == *xip) {
6362 			*xip = ip->bi_nextmst;
6363 			break;
6364 		}
6365 	}
6366 	mutex_exit(&_ii_info_mutex);
6367 
6368 	/* this rw_enter forces us to wait until all nsc_buffers are freed */
6369 	rw_enter(&ip->bi_linkrw, RW_WRITER);
6370 	if (ip->bi_mstdev && ip->bi_mstfd && ip->bi_mstrsrv)
6371 		nsc_release(ip->bi_mstfd);
6372 	if (ip->bi_mstrdev && ip->bi_mstrfd && ip->bi_mstrrsrv)
6373 		nsc_release(ip->bi_mstrfd);
6374 
6375 	if (ip->bi_mstdev && ip->bi_mst_tok)
6376 		(void) _ii_unregister_path(ip->bi_mst_tok, 0, "master");
6377 	if (ip->bi_mstrdev && ip->bi_mstr_tok)
6378 		(void) _ii_unregister_path(ip->bi_mstr_tok, 0, "raw master");
6379 
6380 	if (ip->bi_mstdev && ip->bi_mstfd)
6381 		(void) nsc_close(ip->bi_mstfd);
6382 	if (ip->bi_mstrdev && ip->bi_mstrfd)
6383 		(void) nsc_close(ip->bi_mstrfd);
6384 	rw_exit(&ip->bi_linkrw);
6385 
6386 	if (ip->bi_mstdev) {
6387 		nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev));
6388 	}
6389 	if (ip->bi_mstrdev) {
6390 		nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev));
6391 	}
6392 
6393 	if (ip->bi_kstat_io.master) {
6394 		kstat_delete(ip->bi_kstat_io.master);
6395 	}
6396 	if (ip->bi_kstat_io.shadow) {
6397 		kstat_delete(ip->bi_kstat_io.shadow);
6398 		ip->bi_kstat_io.shadow = 0;
6399 	}
6400 	if (ip->bi_kstat_io.bitmap) {
6401 		kstat_delete(ip->bi_kstat_io.bitmap);
6402 		ip->bi_kstat_io.bitmap = 0;
6403 	}
6404 	if (ip->bi_kstat) {
6405 		kstat_delete(ip->bi_kstat);
6406 		ip->bi_kstat = NULL;
6407 	}
6408 
6409 	/* this rw_enter forces us to wait until all nsc_buffers are freed */
6410 	rw_enter(&ip->bi_linkrw, RW_WRITER);
6411 	rw_exit(&ip->bi_linkrw);
6412 
6413 	mutex_destroy(&ip->bi_mutex);
6414 	mutex_destroy(&ip->bi_rsrvmutex);
6415 	mutex_destroy(&ip->bi_rlsemutex);
6416 	mutex_destroy(&ip->bi_bmpmutex);
6417 	mutex_destroy(&ip->bi_chksmutex);
6418 	cv_destroy(&ip->bi_copydonecv);
6419 	cv_destroy(&ip->bi_reservecv);
6420 	cv_destroy(&ip->bi_releasecv);
6421 	cv_destroy(&ip->bi_ioctlcv);
6422 	cv_destroy(&ip->bi_closingcv);
6423 	cv_destroy(&ip->bi_busycv);
6424 	rw_destroy(&ip->bi_busyrw);
6425 	rw_destroy(&ip->bi_linkrw);
6426 
6427 	_ii_info_freeshd(ip);
6428 
6429 #ifdef DEBUG
6430 	ip->bi_head = (_ii_info_t *)0xdeadbeef;
6431 #endif
6432 
6433 	nsc_kmem_free(ip, sizeof (*ip));
6434 
6435 }
6436 
6437 /*
6438  * _ii_copy_chunks
6439  *	Perform a copy of some chunks
6440  *
6441  * Calling/Exit State:
6442  *	Returns 0 if the data was copied successfully, otherwise
6443  *	error code.
6444  *
6445  * Description:
6446  *	flag is set to CV_SHD2MST if the data is to be copied from the shadow
6447  *	to the master, 0 if it is to be copied from the master to the shadow.
6448  */
6449 
6450 static int
6451 _ii_copy_chunks(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks)
6452 {
6453 	int	mst_flag;
6454 	int	shd_flag;
6455 	int	ovr_flag;
6456 	nsc_off_t	pos;
6457 	nsc_size_t	len;
6458 	int	rc;
6459 	nsc_off_t	shd_pos;
6460 	chunkid_t	shd_chunk;
6461 	nsc_buf_t *mst_tmp = NULL;
6462 	nsc_buf_t *shd_tmp = NULL;
6463 
6464 	if (ip->bi_flags & DSW_MSTOFFLINE) {
6465 		DTRACE_PROBE(_ii_copy_chunks_end);
6466 		return (EIO);
6467 	}
6468 
6469 	if (ip->bi_flags & (DSW_SHDOFFLINE|DSW_SHDEXPORT|DSW_SHDIMPORT)) {
6470 		DTRACE_PROBE(_ii_copy_chunks_end);
6471 		return (EIO);
6472 	}
6473 
6474 	if (flag == CV_SHD2MST) {
6475 		mst_flag = NSC_WRBUF|NSC_WRTHRU;
6476 		shd_flag = NSC_RDBUF;
6477 	} else {
6478 		shd_flag = NSC_WRBUF|NSC_WRTHRU;
6479 		mst_flag = NSC_RDBUF;
6480 	}
6481 
6482 	pos = DSW_CHK2FBA(chunk_num);
6483 	len = DSW_SIZE * nchunks;
6484 	if (pos + len > ip->bi_size)
6485 		len = ip->bi_size - pos;
6486 	if (ip->bi_flags & DSW_TREEMAP) {
6487 		ASSERT(nchunks == 1);
6488 		shd_chunk = ii_tsearch(ip, chunk_num);
6489 		if (shd_chunk == II_NULLNODE) {
6490 			/* shadow is full */
6491 			mutex_enter(&ip->bi_mutex);
6492 			II_FLAG_SET(DSW_OVERFLOW, ip);
6493 			mutex_exit(&ip->bi_mutex);
6494 			DTRACE_PROBE(_ii_copy_chunks_end);
6495 			return (EIO);
6496 		}
6497 
6498 		ovr_flag = II_ISOVERFLOW(shd_chunk);
6499 		shd_pos = DSW_CHK2FBA((ovr_flag) ?
6500 		    II_2OVERFLOW(shd_chunk) : shd_chunk);
6501 	} else {
6502 		ovr_flag = FALSE;
6503 		shd_chunk = chunk_num;
6504 		shd_pos = pos;
6505 	}
6506 
6507 	/*
6508 	 * Always allocate the master side before the shadow to
6509 	 * avoid deadlocks on the same chunk.
6510 	 */
6511 
6512 	DTRACE_PROBE2(_ii_copy_chunks_alloc, nsc_off_t, pos, nsc_size_t, len);
6513 
6514 	II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, mst_flag, &mst_tmp);
6515 	if (!II_SUCCESS(rc)) {
6516 		if (mst_tmp)
6517 			(void) nsc_free_buf(mst_tmp);
6518 		_ii_error(ip, DSW_MSTOFFLINE);
6519 		DTRACE_PROBE(_ii_copy_chunks_end);
6520 		return (rc);
6521 	}
6522 
6523 	if (ovr_flag) {
6524 		/* use overflow volume */
6525 		(void) nsc_reserve(OVRFD(ip), NSC_MULTI);
6526 		II_ALLOC_BUF(ip, overflow, rc, OVRFD(ip), shd_pos, len,
6527 		    shd_flag, &shd_tmp);
6528 	} else {
6529 		II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), shd_pos, len, shd_flag,
6530 		    &shd_tmp);
6531 	}
6532 	if (!II_SUCCESS(rc)) {
6533 		(void) nsc_free_buf(mst_tmp);
6534 		if (shd_tmp)
6535 			(void) nsc_free_buf(shd_tmp);
6536 		if (ovr_flag)
6537 			nsc_release(OVRFD(ip));
6538 		_ii_error(ip, DSW_SHDOFFLINE);
6539 		if (ovr_flag)
6540 			_ii_error(ip, DSW_OVROFFLINE);
6541 		DTRACE_PROBE(_ii_copy_chunks_end);
6542 		return (rc);
6543 	}
6544 
6545 	/*
6546 	 * The direction of copy is determined by the mst_flag.
6547 	 */
6548 	DTRACE_PROBE2(_ii_copy_chunks_copy, kstat_named_t, ii_copy_direct,
6549 	    int, mst_flag);
6550 
6551 	if (ii_copy_direct) {
6552 		if (mst_flag & NSC_WRBUF) {
6553 			if (ovr_flag) {
6554 				II_NSC_COPY_DIRECT(ip, overflow, master, rc,
6555 				    shd_tmp, mst_tmp, shd_pos, pos, len)
6556 			} else {
6557 				II_NSC_COPY_DIRECT(ip, shadow, master, rc,
6558 				    shd_tmp, mst_tmp, shd_pos, pos, len)
6559 			}
6560 			if (!II_SUCCESS(rc)) {
6561 				/* A copy has failed - something is wrong */
6562 				_ii_error(ip, DSW_MSTOFFLINE);
6563 				_ii_error(ip, DSW_SHDOFFLINE);
6564 				if (ovr_flag)
6565 					_ii_error(ip, DSW_OVROFFLINE);
6566 			}
6567 		} else {
6568 			if (ovr_flag) {
6569 				II_NSC_COPY_DIRECT(ip, master, overflow, rc,
6570 				    mst_tmp, shd_tmp, pos, shd_pos, len);
6571 			} else {
6572 				II_NSC_COPY_DIRECT(ip, master, shadow, rc,
6573 				    mst_tmp, shd_tmp, pos, shd_pos, len);
6574 			}
6575 			if (!II_SUCCESS(rc)) {
6576 				/*
6577 				 * A failure has occurred during the above copy.
6578 				 * The macro calls nsc_copy_direct, which will
6579 				 * never return a read failure, only a write
6580 				 * failure. With this assumption, we should
6581 				 * take only the target volume offline.
6582 				 */
6583 				_ii_error(ip, DSW_SHDOFFLINE);
6584 				if (ovr_flag)
6585 					_ii_error(ip, DSW_OVROFFLINE);
6586 			}
6587 		}
6588 	} else {
6589 		if (mst_flag & NSC_WRBUF) {
6590 			rc = nsc_copy(shd_tmp, mst_tmp, shd_pos, pos, len);
6591 			if (II_SUCCESS(rc)) {
6592 				II_NSC_WRITE(ip, master, rc, mst_tmp, pos, len,
6593 				    0);
6594 				if (!II_SUCCESS(rc))
6595 					_ii_error(ip, DSW_MSTOFFLINE);
6596 			} else {
6597 				/* A copy has failed - something is wrong */
6598 				_ii_error(ip, DSW_MSTOFFLINE);
6599 				_ii_error(ip, DSW_SHDOFFLINE);
6600 			}
6601 		} else {
6602 			rc = nsc_copy(mst_tmp, shd_tmp, pos, shd_pos, len);
6603 			if (II_SUCCESS(rc)) {
6604 				if (ovr_flag) {
6605 					II_NSC_WRITE(ip, overflow, rc, shd_tmp,
6606 					    shd_pos, len, 0);
6607 				} else {
6608 					II_NSC_WRITE(ip, shadow, rc, shd_tmp,
6609 					    shd_pos, len, 0);
6610 				}
6611 				if (!II_SUCCESS(rc)) {
6612 					_ii_error(ip, DSW_SHDOFFLINE);
6613 					if (ovr_flag)
6614 						_ii_error(ip, DSW_OVROFFLINE);
6615 				}
6616 			} else {
6617 				/* A copy has failed - something is wrong */
6618 				_ii_error(ip, DSW_MSTOFFLINE);
6619 				_ii_error(ip, DSW_SHDOFFLINE);
6620 			}
6621 		}
6622 	}
6623 
6624 	(void) nsc_free_buf(mst_tmp);
6625 	(void) nsc_free_buf(shd_tmp);
6626 	if (ovr_flag)
6627 		nsc_release(OVRFD(ip));
6628 
6629 	DTRACE_PROBE(_ii_copy_chunks);
6630 
6631 	if (II_SUCCESS(rc)) {
6632 		(void) II_CLR_COPY_BITS(ip, chunk_num, nchunks);
6633 		rc = 0;
6634 	}
6635 
6636 	return (rc);
6637 }
6638 
6639 
6640 /*
6641  * _ii_copy_on_write
6642  *
6643  * Calling/Exit State:
6644  *	Returns 0 on success, otherwise error code.
6645  *
6646  * Description:
6647  *	Determines if a copy on write is necessary, and performs it.
6648  *	A copy on write is necessary in the following cases:
6649  *		- No copy is in progress and the shadow bit is clear, which
6650  *		  means this is the first write to this track.
6651  *		- A copy is in progress and the copy bit is set, which means
6652  *		  that a track copy is required.
6653  *	If a copy to the master is to be done, make a recursive call to this
6654  *	function to do any necessary copy on write on other InstantImage groups
6655  * 	that share the same master volume.
6656  */
6657 
6658 static int
6659 _ii_copy_on_write(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks)
6660 {
6661 	int rc = 0;
6662 	int rtype;
6663 	int hanging =  (ip->bi_flags&DSW_HANGING);
6664 
6665 	if (hanging ||
6666 	    (flag & (CV_SIBLING|CV_SHD2MST)) == CV_SHD2MST && NSHADOWS(ip)) {
6667 		_ii_info_t *xip;
6668 		/*
6669 		 * Preserve copy of master for all other shadows of this master
6670 		 * before writing our data onto the master.
6671 		 */
6672 
6673 		/*
6674 		 * Avoid deadlock with COW on same chunk of sibling shadow
6675 		 * by unlocking this chunk before copying all other sibling
6676 		 * chunks.
6677 		 */
6678 
6679 		/*
6680 		 * Only using a single chunk when copying to master avoids
6681 		 * complex code here.
6682 		 */
6683 
6684 		ASSERT(nchunks == 1);
6685 		if (!hanging)
6686 			_ii_unlock_chunk(ip, chunk_num);
6687 		for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
6688 			if (xip == ip)		/* don't copy ourselves again */
6689 				continue;
6690 
6691 			DTRACE_PROBE(_ii_copy_on_write);
6692 
6693 			rw_enter(&xip->bi_linkrw, RW_READER);
6694 			mutex_enter(&xip->bi_mutex);
6695 			if (xip->bi_disabled) {
6696 				mutex_exit(&xip->bi_mutex);
6697 				rw_exit(&xip->bi_linkrw);
6698 				continue;	/* this set is stopping */
6699 			}
6700 			xip->bi_shdref++;
6701 			mutex_exit(&xip->bi_mutex);
6702 			/* don't waste time asking for MST as ip shares it */
6703 			rtype = SHDR|BMP;
6704 			(void) _ii_rsrv_devs(xip, rtype, II_INTERNAL);
6705 			_ii_lock_chunk(xip, chunk_num);
6706 			rc = _ii_copy_on_write(xip, flag | CV_SIBLING,
6707 			    chunk_num, 1);
6708 
6709 			/*
6710 			 * See comments in _ii_shadow_write()
6711 			 */
6712 			if (rc == 0 ||
6713 			    (rc == EIO && (xip->bi_flags&DSW_OVERFLOW) != 0))
6714 				(void) II_SET_SHD_BIT(xip, chunk_num);
6715 
6716 			_ii_unlock_chunk(xip, chunk_num);
6717 			_ii_rlse_devs(xip, rtype);
6718 			mutex_enter(&xip->bi_mutex);
6719 			xip->bi_shdref--;
6720 			if (xip->bi_state & DSW_CLOSING) {
6721 				if (total_ref(xip) == 0) {
6722 					cv_signal(&xip->bi_closingcv);
6723 				}
6724 			}
6725 			mutex_exit(&xip->bi_mutex);
6726 			rw_exit(&xip->bi_linkrw);
6727 		}
6728 		if (hanging) {
6729 			DTRACE_PROBE(_ii_copy_on_write_end);
6730 			return (0);
6731 		}
6732 		/*
6733 		 * Reacquire chunk lock and check that a COW by a sibling
6734 		 * has not already copied this chunk.
6735 		 */
6736 		_ii_lock_chunk(ip, chunk_num);
6737 		rc = II_TST_SHD_BIT(ip, chunk_num);
6738 		if (rc < 0) {
6739 			DTRACE_PROBE(_ii_copy_on_write_end);
6740 			return (EIO);
6741 		}
6742 		if (rc != 0) {
6743 			DTRACE_PROBE(_ii_copy_on_write_end);
6744 			return (0);
6745 		}
6746 	}
6747 
6748 	if ((ip->bi_flags & DSW_COPYING) == 0) {
6749 		/* Not copying at all */
6750 
6751 		if ((ip->bi_flags & DSW_GOLDEN) == DSW_GOLDEN) {
6752 			/* No copy-on-write as it is independent */
6753 			DTRACE_PROBE(_ii_copy_on_write_end);
6754 			return (0);
6755 		}
6756 
6757 		/* Dependent, so depends on shadow bit */
6758 
6759 		if ((flag == CV_SHD2MST) &&
6760 		    ((ip->bi_flags & DSW_SHDOFFLINE) != 0)) {
6761 			/*
6762 			 * Writing master but shadow is offline, so
6763 			 * no need to copy on write or set shadow bit
6764 			 */
6765 			DTRACE_PROBE(_ii_copy_on_write_end);
6766 			return (0);
6767 		}
6768 		if (ip->bi_flags & DSW_BMPOFFLINE) {
6769 			DTRACE_PROBE(_ii_copy_on_write_end);
6770 			return (EIO);
6771 		}
6772 		rc = II_TST_SHD_BIT(ip, chunk_num);
6773 		if (rc < 0) {
6774 			DTRACE_PROBE(_ii_copy_on_write_end);
6775 			return (EIO);
6776 		}
6777 		if (rc == 0) {
6778 			/* Shadow bit clear, copy master to shadow */
6779 			rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks);
6780 		}
6781 	} else {
6782 		/* Copying one way or the other */
6783 		if (ip->bi_flags & DSW_BMPOFFLINE) {
6784 			DTRACE_PROBE(_ii_copy_on_write_end);
6785 			return (EIO);
6786 		}
6787 		rc = II_TST_COPY_BIT(ip, chunk_num);
6788 		if (rc < 0) {
6789 			DTRACE_PROBE(_ii_copy_on_write_end);
6790 			return (EIO);
6791 		}
6792 		if (rc) {
6793 			/* Copy bit set, do a copy */
6794 			if ((ip->bi_flags & DSW_COPYINGS) == 0) {
6795 				/* Copy master to shadow */
6796 				rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks);
6797 			} else {
6798 				/* Copy shadow to master */
6799 				rc = _ii_copy_chunks(ip, CV_SHD2MST, chunk_num,
6800 				    nchunks);
6801 			}
6802 		}
6803 	}
6804 	return (rc);
6805 }
6806 
6807 #ifdef	DEBUG
6808 int ii_maxchunks = 0;
6809 #endif
6810 
6811 /*
6812  * _ii_copyvolp()
6813  *	Copy volume process.
6814  *
6815  * Calling/Exit State:
6816  *	Passes 0 back to caller when the copy is complete or has been aborted,
6817  * 	otherwise error code.
6818  *
6819  * Description:
6820  *	According to the flag, copy the master to the shadow volume or the
6821  *	shadow to the master volume. Upon return wakeup all processes waiting
6822  *	for this copy.
6823  *
6824  */
6825 
6826 static void
6827 _ii_copyvolp(struct copy_args *ca)
6828 {
6829 	chunkid_t	chunk_num;
6830 	int	rc = 0;
6831 	chunkid_t	max_chunk;
6832 	nsc_size_t	nc_max;
6833 	int		nc_try, nc_got;
6834 	nsc_size_t	mst_max, shd_max;
6835 	_ii_info_t *ip;
6836 	int	flag;
6837 	nsc_size_t	bitmap_size;
6838 	nsc_size_t	shadow_set, copy_set;
6839 	int	chunkcount = 0;
6840 	int	rsrv = 1;
6841 	spcs_s_info_t kstatus;
6842 
6843 	ip = ca->ip;
6844 	flag = ca->flag;
6845 	kstatus = ca->kstatus;
6846 
6847 	if (ip->bi_disabled) {
6848 		rc = DSW_EABORTED;
6849 		goto skip;
6850 	}
6851 	max_chunk = ip->bi_size / DSW_SIZE;
6852 	if ((ip->bi_size % DSW_SIZE) != 0)
6853 		++max_chunk;
6854 	if ((ip->bi_flags&DSW_TREEMAP))
6855 		nc_max = 1;
6856 	else {
6857 		mst_max = shd_max = 0;
6858 		(void) nsc_maxfbas(MSTFD(ip), 0, &mst_max);
6859 		(void) nsc_maxfbas(SHDFD(ip), 0, &shd_max);
6860 		nc_max = (mst_max < shd_max) ? mst_max : shd_max;
6861 		nc_max /= DSW_SIZE;
6862 		ASSERT(nc_max > 0 && nc_max < 1000);
6863 	}
6864 #ifdef	DEBUG
6865 	if (ii_maxchunks > 0)
6866 		nc_max = ii_maxchunks;
6867 #endif
6868 	for (chunk_num = nc_got = 0; /* CSTYLED */; /* CSTYLED */) {
6869 		if ((flag & CV_SHD2MST) && NSHADOWS(ip))
6870 			nc_try = 1;
6871 		else
6872 			nc_try = (int)nc_max;
6873 		chunk_num = II_NEXT_COPY_BIT(ip, chunk_num + nc_got,
6874 		    max_chunk, nc_try, &nc_got);
6875 
6876 		if (chunk_num >= max_chunk)	/* loop complete */
6877 			break;
6878 		if (ip->bi_flags & DSW_COPYINGX) {
6879 			/* request to abort copy */
6880 			_ii_unlock_chunks(ip, chunk_num, nc_got);
6881 			rc = DSW_EABORTED;
6882 			break;
6883 		}
6884 
6885 		sema_p(&_ii_concopy_sema);
6886 		rc = _ii_copy_on_write(ip, (flag & CV_SHD2MST), chunk_num,
6887 		    nc_got);
6888 		sema_v(&_ii_concopy_sema);
6889 		if (ip->bi_flags & DSW_TREEMAP)
6890 			ii_tdelete(ip, chunk_num);
6891 		_ii_unlock_chunks(ip, chunk_num, nc_got);
6892 		if (!II_SUCCESS(rc)) {
6893 			if (ca->wait)
6894 				spcs_s_add(kstatus, rc);
6895 			rc = DSW_EIO;
6896 			break;
6897 		}
6898 		if (ip->bi_release ||
6899 		    (++chunkcount % ip->bi_throttle_unit) == 0) {
6900 			_ii_rlse_devs(ip, (ca->rtype&(~BMP)));
6901 			rsrv = 0;
6902 			delay(ip->bi_throttle_delay);
6903 			ca->rtype = MSTR|SHDR|(ca->rtype&BMP);
6904 			if ((rc = _ii_rsrv_devs(ip, (ca->rtype&(~BMP)),
6905 			    II_INTERNAL)) != 0) {
6906 				if (ca->wait)
6907 					spcs_s_add(kstatus, rc);
6908 				rc = DSW_EIO;
6909 				break;
6910 			}
6911 			rsrv = 1;
6912 			if (nc_max > 1) {
6913 				/*
6914 				 * maxfbas could have changed during the
6915 				 * release/reserve, so recalculate the size
6916 				 * of transfer we can do.
6917 				 */
6918 				(void) nsc_maxfbas(MSTFD(ip), 0, &mst_max);
6919 				(void) nsc_maxfbas(SHDFD(ip), 0, &shd_max);
6920 				nc_max = (mst_max < shd_max) ?
6921 				    mst_max : shd_max;
6922 				nc_max /= DSW_SIZE;
6923 			}
6924 		}
6925 	}
6926 skip:
6927 	mutex_enter(&ip->bi_mutex);
6928 	if (ip->bi_flags & DSW_COPYINGX)
6929 		II_FLAG_CLR(DSW_COPYINGP|DSW_COPYINGX, ip);
6930 	else
6931 		II_FLAG_CLR(DSW_COPY_FLAGS, ip);
6932 
6933 	if ((ip->bi_flags & DSW_TREEMAP) && (flag & CV_SHD2MST) &&
6934 	    (ip->bi_flags & DSW_VOVERFLOW)) {
6935 		int rs;
6936 		bitmap_size = ip->bi_size / DSW_SIZE;
6937 		if ((ip->bi_size % DSW_SIZE) != 0)
6938 			++bitmap_size;
6939 		bitmap_size += 7;
6940 		bitmap_size /= 8;
6941 
6942 		/* Count the number of copy bits set */
6943 		rs = II_CNT_BITS(ip, ip->bi_copyfba, &copy_set, bitmap_size);
6944 		if ((rs == 0) && (copy_set == 0)) {
6945 			/*
6946 			 * If we counted successfully and completed the copy
6947 			 * see if any writes have forced the set into the
6948 			 * overflow
6949 			 */
6950 			rs = II_CNT_BITS(ip, ip->bi_shdfba, &shadow_set,
6951 			    bitmap_size);
6952 			if ((rs == 0) && (shadow_set <
6953 			    (nsc_size_t)ip->bi_shdchks)) {
6954 				II_FLAG_CLR(DSW_VOVERFLOW, ip);
6955 				--iigkstat.spilled_over.value.ul;
6956 			}
6957 		}
6958 	}
6959 
6960 	ca->rc = rc;
6961 	cv_broadcast(&ip->bi_copydonecv);
6962 	mutex_exit(&ip->bi_mutex);
6963 	if (!ca->wait) {
6964 		if (rsrv)
6965 			_ii_rlse_devs(ip, ca->rtype);
6966 		kmem_free(ca, sizeof (*ca));
6967 	}
6968 
6969 }
6970 
6971 /*
6972  * _ii_copyvol()
6973  *	Copy a volume.
6974  *
6975  * Calling/Exit State:
6976  *	Returns 0 when the copy is complete or has been aborted,
6977  * 	otherwise error code.
6978  *
6979  * Description:
6980  *	According to the flag, copy the master to the shadow volume or the
6981  *	shadow to the master volume. Upon return wakeup all processes waiting
6982  *	for this copy. Uses a separate process (_ii_copyvolp) to allow the
6983  *	caller to be interrupted.
6984  */
6985 
6986 static int
6987 _ii_copyvol(_ii_info_t *ip, int flag, int rtype, spcs_s_info_t kstatus,
6988 				int wait)
6989 {
6990 	struct copy_args *ca;
6991 	int rc;
6992 
6993 	/*
6994 	 * start copy in separate process.
6995 	 */
6996 
6997 	ca = (struct copy_args *)kmem_alloc(sizeof (*ca), KM_SLEEP);
6998 	ca->ip = ip;
6999 	ca->flag = flag;
7000 	ca->rtype = rtype;
7001 	ca->kstatus = kstatus;
7002 	ca->wait = wait;
7003 	ca->rc = 0;
7004 
7005 	if (rc = nsc_create_process((void (*)(void *))_ii_copyvolp,
7006 	    (void *)ca, FALSE)) {
7007 		mutex_enter(&ip->bi_mutex);
7008 		_ii_ioctl_done(ip);
7009 		mutex_exit(&ip->bi_mutex);
7010 		cmn_err(CE_NOTE, "!Can't create II copy process");
7011 		kmem_free(ca, sizeof (*ca));
7012 		return (rc);
7013 	}
7014 	mutex_enter(&ip->bi_mutex);
7015 	if (wait == 0) {
7016 		_ii_ioctl_done(ip);
7017 		mutex_exit(&ip->bi_mutex);
7018 		return (0);
7019 	}
7020 	while (ip->bi_flags & DSW_COPYINGP) {
7021 		(void) cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex);
7022 	}
7023 	_ii_ioctl_done(ip);
7024 	mutex_exit(&ip->bi_mutex);
7025 	rc = ca->rc;
7026 	kmem_free(ca, sizeof (*ca));
7027 
7028 	return (rc);
7029 }
7030 
7031 /*
7032  * _ii_stopcopy
7033  *	Stops any copy process on ip.
7034  *
7035  * Calling/Exit State:
7036  *	Returns 0 if the copy was stopped, otherwise error code.
7037  *
7038  * Description:
7039  *	Stop an in-progress copy by setting the DSW_COPYINGX flag, then
7040  *	wait for the copy to complete.
7041  */
7042 
7043 static int
7044 _ii_stopcopy(_ii_info_t *ip)
7045 {
7046 	mutex_enter(&ip->bi_mutex);
7047 	DTRACE_PROBE1(_ii_stopcopy_flags,
7048 	    uint_t, ip->bi_flags);
7049 
7050 	while (ip->bi_flags & DSW_COPYINGP) {
7051 
7052 		DTRACE_PROBE(_ii_stopcopy);
7053 
7054 		II_FLAG_SET(DSW_COPYINGX, ip);
7055 
7056 		if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) {
7057 			/* Awoken by a signal */
7058 			mutex_exit(&ip->bi_mutex);
7059 			DTRACE_PROBE(_ii_stopcopy);
7060 			return (EINTR);
7061 		}
7062 	}
7063 
7064 	mutex_exit(&ip->bi_mutex);
7065 
7066 	return (0);
7067 }
7068 
7069 /*
7070  * _ii_error
7071  *	Given the error type that occurred, and the current state of the
7072  *	shadowing, set the appropriate error condition(s).
7073  *
7074  */
7075 
7076 void
7077 _ii_error(_ii_info_t *ip, int error_type)
7078 {
7079 	int copy_flags;
7080 	int golden;
7081 	int flags;
7082 	int recursive_call = (error_type & DSW_OVERFLOW) != 0;
7083 	int offline_bits = DSW_OFFLINE;
7084 	_ii_info_t *xip;
7085 	int rc;
7086 
7087 	error_type &= ~DSW_OVERFLOW;
7088 
7089 	mutex_enter(&ip->bi_mutex);
7090 	flags = (ip->bi_flags) & offline_bits;
7091 	if ((flags ^ error_type) == 0) {
7092 		/* nothing new offline */
7093 		mutex_exit(&ip->bi_mutex);
7094 		return;
7095 	}
7096 
7097 	if (error_type == DSW_BMPOFFLINE &&
7098 	    (ip->bi_flags & DSW_BMPOFFLINE) == 0) {
7099 		/* first, let nskerd know */
7100 		rc = _ii_report_bmp(ip);
7101 		if (rc) {
7102 			if (ii_debug > 0) {
7103 				cmn_err(CE_WARN, "!Unable to mark bitmap bad in"
7104 				    " config DB; rc = %d", rc);
7105 			}
7106 			ip->bi_flags |= DSW_CFGOFFLINE;
7107 		}
7108 	}
7109 
7110 	flags = ip->bi_flags;
7111 	golden = ((flags & DSW_GOLDEN) == DSW_GOLDEN);
7112 	copy_flags = flags & DSW_COPYING;
7113 
7114 	switch (error_type) {
7115 
7116 	case DSW_BMPOFFLINE:
7117 		/* prevent further use of bitmap */
7118 		flags |= DSW_BMPOFFLINE;
7119 		if (ii_debug > 0)
7120 			cmn_err(CE_NOTE, "!ii: Bitmap offline");
7121 
7122 		switch (copy_flags) {
7123 
7124 		case DSW_COPYINGM:
7125 			/* Bitmap offline, copying master to shadow */
7126 			flags |= DSW_SHDOFFLINE;
7127 			if (ii_debug > 0)
7128 				cmn_err(CE_NOTE, "!ii: Implied shadow offline");
7129 			break;
7130 
7131 		case DSW_COPYINGS:
7132 			/* Bitmap offline, copying shadow to master */
7133 			if (golden) {
7134 				/* Shadow is still usable */
7135 				if (ii_debug > 0)
7136 					cmn_err(CE_NOTE,
7137 					    "!ii: Implied master offline");
7138 				flags |= DSW_MSTOFFLINE;
7139 			} else {
7140 				/*
7141 				 * Snapshot restore from shadow to master
7142 				 * is a dumb thing to do anyway. Lose both.
7143 				 */
7144 				flags |= DSW_SHDOFFLINE | DSW_MSTOFFLINE;
7145 				if (ii_debug > 0)
7146 					cmn_err(CE_NOTE,
7147 					    "ii: Implied master and "
7148 					    "shadow offline");
7149 			}
7150 			break;
7151 
7152 		case 0:
7153 			/* Bitmap offline, no copying in progress */
7154 			if (!golden) {
7155 				if (ii_debug > 0)
7156 					cmn_err(CE_NOTE,
7157 					    "!ii: Implied shadow offline");
7158 				flags |= DSW_SHDOFFLINE;
7159 			}
7160 			break;
7161 		}
7162 		break;
7163 
7164 	case DSW_OVROFFLINE:
7165 		flags |= DSW_OVROFFLINE;
7166 		ASSERT(ip->bi_overflow);
7167 		if (ii_debug > 0)
7168 			cmn_err(CE_NOTE, "!ii: Overflow offline");
7169 		/* FALLTHRU */
7170 	case DSW_SHDOFFLINE:
7171 		flags |= DSW_SHDOFFLINE;
7172 		if (ii_debug > 0)
7173 			cmn_err(CE_NOTE, "!ii: Shadow offline");
7174 
7175 		if (copy_flags == DSW_COPYINGS) {
7176 			/* Shadow offline, copying shadow to master */
7177 			if (ii_debug > 0)
7178 				cmn_err(CE_NOTE, "!ii: Implied master offline");
7179 			flags |= DSW_MSTOFFLINE;
7180 		}
7181 		break;
7182 
7183 	case DSW_MSTOFFLINE:
7184 		flags |= DSW_MSTOFFLINE;
7185 		if (ii_debug > 0)
7186 			cmn_err(CE_NOTE, "!ii: Master offline");
7187 
7188 		switch (copy_flags) {
7189 
7190 		case DSW_COPYINGM:
7191 			/* Master offline, copying master to shadow */
7192 			flags |= DSW_SHDOFFLINE;
7193 			if (ii_debug > 0)
7194 				cmn_err(CE_NOTE, "!ii: Implied shadow offline");
7195 			break;
7196 
7197 		case DSW_COPYINGS:
7198 			/* Master offline, copying shadow to master */
7199 			if (!golden) {
7200 				flags |= DSW_SHDOFFLINE;
7201 				if (ii_debug > 0)
7202 					cmn_err(CE_NOTE,
7203 					    "!ii: Implied shadow offline");
7204 			}
7205 			break;
7206 
7207 		case 0:
7208 			/* Master offline, no copying in progress */
7209 			if (!golden) {
7210 				flags |= DSW_SHDOFFLINE;
7211 				if (ii_debug > 0)
7212 					cmn_err(CE_NOTE,
7213 					    "!ii: Implied shadow offline");
7214 			}
7215 			break;
7216 		}
7217 		break;
7218 
7219 	default:
7220 		break;
7221 	}
7222 
7223 	II_FLAG_SET(flags, ip);
7224 	mutex_exit(&ip->bi_mutex);
7225 
7226 	if (!recursive_call &&
7227 	    NSHADOWS(ip) && (flags&DSW_MSTOFFLINE) == DSW_MSTOFFLINE) {
7228 		/* take master offline for all other sibling shadows */
7229 		for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
7230 			if (xip == ip)
7231 				continue;
7232 			if (_ii_rsrv_devs(xip, BMP, II_INTERNAL) != 0)
7233 				continue;
7234 					/* overload DSW_OVERFLOW */
7235 			_ii_error(xip, DSW_MSTOFFLINE|DSW_OVERFLOW);
7236 			_ii_rlse_devs(xip, BMP);
7237 		}
7238 	}
7239 
7240 }
7241 
7242 
7243 /*
7244  * _ii_lock_chunk
7245  *	Locks access to the specified chunk
7246  *
7247  */
7248 
7249 static void
7250 _ii_lock_chunk(_ii_info_t *ip, chunkid_t chunk)
7251 {
7252 	if (chunk == II_NULLCHUNK) {
7253 
7254 		DTRACE_PROBE(_ii_lock_chunk_type);
7255 
7256 		rw_enter(&ip->bi_busyrw, RW_WRITER);
7257 
7258 	} else {
7259 
7260 		DTRACE_PROBE(_ii_lock_chunk_type);
7261 
7262 		if (ip->bi_busy == NULL) {
7263 			DTRACE_PROBE(_ii_lock_chunk_end);
7264 			return;
7265 		}
7266 
7267 		rw_enter(&ip->bi_busyrw, RW_READER);
7268 		mutex_enter(&ip->bi_mutex);
7269 		while (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS],
7270 		    chunk % DSW_BITS))
7271 			cv_wait(&ip->bi_busycv, &ip->bi_mutex);
7272 		DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS);
7273 		mutex_exit(&ip->bi_mutex);
7274 	}
7275 
7276 }
7277 
7278 
7279 /*
7280  * _ii_trylock_chunk
7281  *	Tries to lock access to the specified chunk
7282  * Returns non-zero on success.
7283  *
7284  */
7285 
7286 static int
7287 _ii_trylock_chunk(_ii_info_t *ip, chunkid_t chunk)
7288 {
7289 	int rc;
7290 
7291 	ASSERT(chunk != II_NULLCHUNK);
7292 	if (rw_tryenter(&ip->bi_busyrw, RW_READER) == 0) {
7293 		DTRACE_PROBE(_ii_trylock_chunk);
7294 		return (0);
7295 	}
7296 
7297 	if (ip->bi_busy == NULL) {
7298 		DTRACE_PROBE(_ii_trylock_chunk_end);
7299 		return (0);
7300 	}
7301 
7302 	mutex_enter(&ip->bi_mutex);
7303 	if (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS)) {
7304 		rw_exit(&ip->bi_busyrw);	/* RW_READER */
7305 		rc = 0;
7306 	} else {
7307 		DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS);
7308 		rc = 1;
7309 	}
7310 	mutex_exit(&ip->bi_mutex);
7311 
7312 	return (rc);
7313 }
7314 
7315 /*
7316  * _ii_unlock_chunks
7317  *	Unlocks access to the specified chunks
7318  *
7319  */
7320 
7321 static void
7322 _ii_unlock_chunks(_ii_info_t *ip, chunkid_t  chunk, int n)
7323 {
7324 	if (chunk == II_NULLCHUNK) {
7325 
7326 		DTRACE_PROBE(_ii_unlock_chunks);
7327 
7328 		rw_exit(&ip->bi_busyrw);	/* RW_WRITER */
7329 
7330 	} else {
7331 
7332 		if (ip->bi_busy == NULL) {
7333 			DTRACE_PROBE(_ii_unlock_chunks_end);
7334 			return;
7335 		}
7336 		mutex_enter(&ip->bi_mutex);
7337 
7338 		DTRACE_PROBE(_ii_unlock_chunks);
7339 
7340 		for (; n-- > 0; chunk++) {
7341 			ASSERT(DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS],
7342 			    chunk % DSW_BITS));
7343 			DSW_BIT_CLR(ip->bi_busy[chunk / DSW_BITS],
7344 			    chunk % DSW_BITS);
7345 			rw_exit(&ip->bi_busyrw);	/* RW_READER */
7346 		}
7347 		cv_broadcast(&ip->bi_busycv);
7348 		mutex_exit(&ip->bi_mutex);
7349 
7350 	}
7351 }
7352 
7353 /*
7354  * Copyout the bit map.
7355  */
7356 static int
7357 _ii_ab_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
7358     int user_bm_size)
7359 {
7360 	nsc_off_t	last_fba;
7361 	nsc_buf_t *tmp;
7362 	nsc_vec_t *nsc_vecp;
7363 	nsc_off_t	fba_pos;
7364 	int	buf_fba_len;
7365 	int	buf_byte_len;
7366 	size_t	co_len;
7367 	int	rc;
7368 
7369 	DTRACE_PROBE2(_ii_ab_co_bmp_start, nsc_off_t, bm_offset,
7370 	    nsc_size_t, user_bm_size);
7371 
7372 	if (ip->bi_flags & DSW_BMPOFFLINE)
7373 		return (EIO);
7374 
7375 	/* First calculate the size of the shadow and copy bitmaps */
7376 	co_len = DSW_BM_FBA_LEN(ip->bi_size);
7377 	ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len);
7378 
7379 	/* Are we in the ranges of the various bitmaps/indexes? */
7380 	if (bm_offset < ip->bi_shdfba)
7381 		return (EIO);
7382 	else if (bm_offset < (last_fba = ip->bi_shdfba + co_len))
7383 		/*EMPTY*/;
7384 	else if (bm_offset < (last_fba = ip->bi_copyfba + co_len))
7385 		/*EMPTY*/;
7386 	else if ((ip->bi_flags & DSW_TREEMAP) &&
7387 	    (bm_offset < (last_fba = last_fba + (co_len * 32))))
7388 		/*EMPTY*/;
7389 	else return (EIO);
7390 
7391 	/* Are we within the size of the segment being copied? */
7392 	if (FBA_LEN(user_bm_size) > last_fba - bm_offset)
7393 		return (EIO);
7394 
7395 	for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0;
7396 	    fba_pos += DSW_CBLK_FBA) {
7397 		tmp = NULL;
7398 		buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
7399 		    DSW_CBLK_FBA : last_fba - fba_pos;
7400 		II_READ_START(ip, bitmap);
7401 		rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
7402 		    NSC_RDBUF, &tmp);
7403 		II_READ_END(ip, bitmap, rc, buf_fba_len);
7404 		if (!II_SUCCESS(rc)) {
7405 			if (tmp)
7406 				(void) nsc_free_buf(tmp);
7407 
7408 			_ii_error(ip, DSW_BMPOFFLINE);
7409 			return (EIO);
7410 		}
7411 
7412 		/* copyout each nsc_vec's worth of data */
7413 		buf_byte_len = FBA_SIZE(buf_fba_len);
7414 		for (nsc_vecp = tmp->sb_vec;
7415 		    buf_byte_len > 0 && user_bm_size > 0;
7416 		    nsc_vecp++) {
7417 			co_len = (user_bm_size > nsc_vecp->sv_len) ?
7418 			    nsc_vecp->sv_len : user_bm_size;
7419 			if (copyout(nsc_vecp->sv_addr, user_bm, co_len)) {
7420 				(void) nsc_free_buf(tmp);
7421 				return (EFAULT);
7422 			}
7423 			user_bm += co_len;
7424 			user_bm_size -= co_len;
7425 			buf_byte_len -= co_len;
7426 		}
7427 
7428 
7429 		(void) nsc_free_buf(tmp);
7430 	}
7431 
7432 	return (0);
7433 }
7434 
7435 /*
7436  * Copyin a bit map and or with differences bitmap.
7437  */
7438 static int
7439 _ii_ab_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
7440 int user_bm_size)
7441 {
7442 	nsc_off_t	last_fba;
7443 	nsc_buf_t *tmp;
7444 	nsc_vec_t *nsc_vecp;
7445 	nsc_off_t	fba_pos;
7446 	int	buf_fba_len;
7447 	int	buf_byte_len;
7448 	size_t	ci_len;
7449 	int	rc;
7450 	int	n;
7451 	unsigned char *tmp_buf, *tmpp, *tmpq;
7452 
7453 	DTRACE_PROBE2(_ii_ab_ci_bmp_start, nsc_off_t, bm_offset,
7454 	    nsc_size_t, user_bm_size);
7455 
7456 	if (ip->bi_flags & DSW_BMPOFFLINE)
7457 		return (EIO);
7458 
7459 	tmp_buf = NULL;
7460 	last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size);
7461 
7462 	for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0;
7463 	    fba_pos += DSW_CBLK_FBA) {
7464 		tmp = NULL;
7465 		buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
7466 		    DSW_CBLK_FBA : last_fba - fba_pos;
7467 		II_READ_START(ip, bitmap);
7468 		rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
7469 		    NSC_RDWRBUF, &tmp);
7470 		II_READ_END(ip, bitmap, rc, buf_fba_len);
7471 		if (!II_SUCCESS(rc)) {
7472 			if (tmp)
7473 				(void) nsc_free_buf(tmp);
7474 
7475 			_ii_error(ip, DSW_BMPOFFLINE);
7476 			return (EIO);
7477 		}
7478 
7479 		/* copyin each nsc_vec's worth of data */
7480 		buf_byte_len = FBA_SIZE(buf_fba_len);
7481 		for (nsc_vecp = tmp->sb_vec;
7482 		    buf_byte_len > 0 && user_bm_size > 0;
7483 		    nsc_vecp++) {
7484 			ci_len = (user_bm_size > nsc_vecp->sv_len) ?
7485 			    nsc_vecp->sv_len : user_bm_size;
7486 			tmpp = tmp_buf = kmem_alloc(ci_len, KM_SLEEP);
7487 			tmpq = nsc_vecp->sv_addr;
7488 			if (copyin(user_bm, tmpp, ci_len)) {
7489 				(void) nsc_free_buf(tmp);
7490 				kmem_free(tmp_buf, ci_len);
7491 				return (EFAULT);
7492 			}
7493 			for (n = ci_len; n-- > 0; /* CSTYLED */)
7494 				*tmpq++ |= *tmpp++;
7495 			user_bm += ci_len;
7496 			user_bm_size -= ci_len;
7497 			buf_byte_len -= ci_len;
7498 			kmem_free(tmp_buf, ci_len);
7499 		}
7500 
7501 		II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, buf_fba_len, 0);
7502 		if (!II_SUCCESS(rc)) {
7503 			(void) nsc_free_buf(tmp);
7504 			_ii_error(ip, DSW_BMPOFFLINE);
7505 			return (EIO);
7506 		}
7507 
7508 		(void) nsc_free_buf(tmp);
7509 	}
7510 
7511 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7512 
7513 	return (0);
7514 }
7515 
7516 /*
7517  * Completely zero the bit map.
7518  *
7519  *	Returns 0 if no error
7520  *	Returns non-zero if there was an error
7521  */
7522 static int
7523 _ii_ab_zerobm(_ii_info_t *ip)
7524 {
7525 	nsc_off_t fba_pos;
7526 	int rc;
7527 	nsc_size_t len;
7528 	nsc_size_t size;
7529 	nsc_buf_t *tmp;
7530 
7531 	size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
7532 	for (fba_pos = ip->bi_shdfba; fba_pos < size; fba_pos += DSW_CBLK_FBA) {
7533 		tmp = NULL;
7534 		len = fba_pos + DSW_CBLK_FBA < size ?
7535 		    DSW_CBLK_FBA : size - fba_pos;
7536 		II_READ_START(ip, bitmap);
7537 		rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, len, NSC_RDWRBUF,
7538 		    &tmp);
7539 		II_READ_END(ip, bitmap, rc, len);
7540 		if (!II_SUCCESS(rc)) {
7541 			if (tmp)
7542 				(void) nsc_free_buf(tmp);
7543 
7544 			_ii_error(ip, DSW_BMPOFFLINE);
7545 			return (rc);
7546 		}
7547 
7548 		rc = nsc_zero(tmp, fba_pos, len, 0);
7549 		if (II_SUCCESS(rc)) {
7550 			II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, len, 0);
7551 		}
7552 
7553 		(void) nsc_free_buf(tmp);
7554 		if (!II_SUCCESS(rc)) {
7555 			_ii_error(ip, DSW_BMPOFFLINE);
7556 			return (rc);
7557 		}
7558 	}
7559 
7560 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7561 
7562 	return (0);
7563 }
7564 
7565 
7566 /*
7567  * Copy shadow bitmap to copy bitmap
7568  */
7569 static int
7570 _ii_ab_copybm(_ii_info_t *ip)
7571 {
7572 	nsc_off_t copy_fba_pos, shd_fba_pos;
7573 	int rc;
7574 	nsc_size_t len;
7575 	nsc_off_t size;
7576 	nsc_buf_t *copy_tmp, *shd_tmp;
7577 
7578 	size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
7579 	copy_fba_pos = ip->bi_copyfba;
7580 	for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size;
7581 	    copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) {
7582 		shd_tmp = NULL;
7583 		len = shd_fba_pos + DSW_CBLK_FBA < size ?
7584 		    DSW_CBLK_FBA : size - shd_fba_pos;
7585 		II_READ_START(ip, bitmap);
7586 		rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len, NSC_RDBUF,
7587 		    &shd_tmp);
7588 		II_READ_END(ip, bitmap, rc, len);
7589 		if (!II_SUCCESS(rc)) {
7590 			if (shd_tmp)
7591 				(void) nsc_free_buf(shd_tmp);
7592 
7593 			_ii_error(ip, DSW_BMPOFFLINE);
7594 			if (ii_debug > 1)
7595 				cmn_err(CE_NOTE, "!ii: copybm failed 1 rc %d",
7596 				    rc);
7597 
7598 			return (rc);
7599 		}
7600 
7601 		copy_tmp = NULL;
7602 		rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len, NSC_WRBUF,
7603 		    &copy_tmp);
7604 		if (!II_SUCCESS(rc)) {
7605 			(void) nsc_free_buf(shd_tmp);
7606 			if (copy_tmp)
7607 				(void) nsc_free_buf(copy_tmp);
7608 
7609 			_ii_error(ip, DSW_BMPOFFLINE);
7610 			if (ii_debug > 1)
7611 				cmn_err(CE_NOTE, "!ii: copybm failed 2 rc %d",
7612 				    rc);
7613 
7614 			return (rc);
7615 		}
7616 		rc = nsc_copy(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos,
7617 		    len);
7618 		if (II_SUCCESS(rc)) {
7619 			II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos,
7620 			    len, 0);
7621 		}
7622 
7623 		(void) nsc_free_buf(shd_tmp);
7624 		(void) nsc_free_buf(copy_tmp);
7625 		if (!II_SUCCESS(rc)) {
7626 			if (ii_debug > 1)
7627 				cmn_err(CE_NOTE, "!ii: copybm failed 4 rc %d",
7628 				    rc);
7629 			_ii_error(ip, DSW_BMPOFFLINE);
7630 			return (rc);
7631 		}
7632 	}
7633 
7634 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7635 
7636 	return (0);
7637 }
7638 
7639 
7640 /*
7641  * stolen from nsc_copy_h()
7642  */
7643 
7644 static int
7645 _ii_nsc_or(nsc_buf_t *h1, nsc_buf_t *h2, nsc_off_t pos1, nsc_off_t pos2,
7646 	nsc_size_t len)
7647 {
7648 	unsigned char *a1, *a2;
7649 	unsigned char *b1, *b2;
7650 	nsc_vec_t *v1, *v2;
7651 	int i, sz, l1, l2;
7652 
7653 	if (pos1 < h1->sb_pos || pos1 + len > h1->sb_pos + h1->sb_len ||
7654 	    pos2 < h2->sb_pos || pos2 + len > h2->sb_pos + h2->sb_len)
7655 		return (EINVAL);
7656 
7657 	if (!len)
7658 		return (0);
7659 
7660 	/* find starting point in "from" vector */
7661 
7662 	v1 = h1->sb_vec;
7663 	pos1 -= h1->sb_pos;
7664 
7665 	for (; pos1 >= FBA_NUM(v1->sv_len); v1++)
7666 		pos1 -= FBA_NUM(v1->sv_len);
7667 
7668 	a1 = v1->sv_addr + FBA_SIZE(pos1);
7669 	l1 = v1->sv_len - FBA_SIZE(pos1);
7670 
7671 	/* find starting point in "to" vector */
7672 
7673 	v2 = h2->sb_vec;
7674 	pos2 -= h2->sb_pos;
7675 
7676 	for (; pos2 >= FBA_NUM(v2->sv_len); v2++)
7677 		pos2 -= FBA_NUM(v2->sv_len);
7678 
7679 	a2 = v2->sv_addr + FBA_SIZE(pos2);
7680 	l2 = v2->sv_len - FBA_SIZE(pos2);
7681 
7682 	/* copy required data */
7683 
7684 	len = FBA_SIZE(len);
7685 
7686 	while (len) {
7687 		sz = min(l1, l2);
7688 		sz = (int)min((nsc_size_t)sz, len);
7689 
7690 		b1 = a1;
7691 		b2 = a2;
7692 		for (i = sz; i-- > 0; /* CSTYLED */)
7693 			*b2++ |= *b1++;
7694 
7695 		l1 -= sz;
7696 		l2 -= sz;
7697 		a1 += sz;
7698 		a2 += sz;
7699 		len -= sz;
7700 
7701 		if (!l1) {
7702 			a1 = (++v1)->sv_addr;
7703 			l1 = v1->sv_len;
7704 		}
7705 		if (!l2) {
7706 			a2 = (++v2)->sv_addr;
7707 			l2 = v2->sv_len;
7708 		}
7709 	}
7710 
7711 	return (0);
7712 }
7713 
7714 
7715 /*
7716  * Or the shadow bitmap in to the copy bitmap, clear the
7717  * shadow bitmap.
7718  */
7719 static int
7720 _ii_ab_orbm(_ii_info_t *ip)
7721 {
7722 	nsc_off_t copy_fba_pos, shd_fba_pos;
7723 	int rc;
7724 	nsc_size_t len;
7725 	size_t size;
7726 	nsc_buf_t *copy_tmp, *shd_tmp;
7727 
7728 	if (ip->bi_flags & DSW_BMPOFFLINE)
7729 		return (EIO);
7730 
7731 	size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
7732 	copy_fba_pos = ip->bi_copyfba;
7733 	for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size;
7734 	    copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) {
7735 		shd_tmp = NULL;
7736 		len = shd_fba_pos + DSW_CBLK_FBA < size ?
7737 		    DSW_CBLK_FBA : size - shd_fba_pos;
7738 		II_READ_START(ip, bitmap);
7739 		rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len,
7740 		    NSC_RDBUF|NSC_WRBUF, &shd_tmp);
7741 		II_READ_END(ip, bitmap, rc, len);
7742 		if (!II_SUCCESS(rc)) {
7743 			if (shd_tmp)
7744 				(void) nsc_free_buf(shd_tmp);
7745 
7746 			_ii_error(ip, DSW_BMPOFFLINE);
7747 			return (rc);
7748 		}
7749 
7750 		copy_tmp = NULL;
7751 		II_READ_START(ip, bitmap);
7752 		rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len,
7753 		    NSC_RDBUF|NSC_WRBUF, &copy_tmp);
7754 		II_READ_END(ip, bitmap, rc, len);
7755 		if (!II_SUCCESS(rc)) {
7756 			(void) nsc_free_buf(shd_tmp);
7757 			if (copy_tmp)
7758 				(void) nsc_free_buf(copy_tmp);
7759 
7760 			_ii_error(ip, DSW_BMPOFFLINE);
7761 			return (rc);
7762 		}
7763 		rc = _ii_nsc_or(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos,
7764 		    len);
7765 		if (II_SUCCESS(rc)) {
7766 			II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos,
7767 			    len, 0);
7768 		}
7769 		if (II_SUCCESS(rc))
7770 			rc = nsc_zero(shd_tmp, shd_fba_pos, len, 0);
7771 		if (II_SUCCESS(rc)) {
7772 			II_NSC_WRITE(ip, bitmap, rc, shd_tmp, shd_fba_pos, len,
7773 			    0);
7774 		}
7775 
7776 		(void) nsc_free_buf(shd_tmp);
7777 		(void) nsc_free_buf(copy_tmp);
7778 		if (!II_SUCCESS(rc)) {
7779 			_ii_error(ip, DSW_BMPOFFLINE);
7780 			return (rc);
7781 		}
7782 	}
7783 
7784 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7785 
7786 	return (0);
7787 }
7788 
7789 /*
7790  * _ii_ab_tst_shd_bit
7791  *	Determine if a chunk has been copied to the shadow device
7792  *	Relies on the alloc_buf/free_buf semantics for locking.
7793  *
7794  * Calling/Exit State:
7795  *	Returns 1 if the modified bit has been set for the shadow device,
7796  *	Returns 0 if the modified bit has not been set for the shadow device,
7797  *	Returns -1 if there was an error
7798  */
7799 
7800 static int
7801 _ii_ab_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk)
7802 {
7803 	int rc;
7804 	nsc_off_t fba;
7805 	nsc_buf_t *tmp = NULL;
7806 
7807 	if (ip->bi_flags & DSW_BMPOFFLINE)
7808 		return (EIO);
7809 
7810 	fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7811 	chunk %= FBA_SIZE(1) * DSW_BITS;
7812 	II_READ_START(ip, bitmap);
7813 	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
7814 	II_READ_END(ip, bitmap, rc, 1);
7815 	if (!II_SUCCESS(rc)) {
7816 		_ii_error(ip, DSW_BMPOFFLINE);
7817 		if (tmp)
7818 			(void) nsc_free_buf(tmp);
7819 		return (-1);
7820 	}
7821 	rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7822 	    chunk%DSW_BITS);
7823 	(void) nsc_free_buf(tmp);
7824 
7825 	return (rc);
7826 }
7827 
7828 
7829 /*
7830  * _ii_ab_set_shd_bit
7831  *	Records that a chunk has been copied to the shadow device
7832  *
7833  *	Returns non-zero if an error is encountered
7834  *	Returns 0 if no error
7835  */
7836 
7837 static int
7838 _ii_ab_set_shd_bit(_ii_info_t *ip, chunkid_t chunk)
7839 {
7840 	int rc;
7841 	nsc_off_t fba;
7842 	nsc_buf_t *tmp = NULL;
7843 
7844 	if (ip->bi_flags & DSW_BMPOFFLINE)
7845 		return (EIO);
7846 
7847 	fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7848 	chunk %= FBA_SIZE(1) * DSW_BITS;
7849 	II_READ_START(ip, bitmap);
7850 	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
7851 	II_READ_END(ip, bitmap, rc, 1);
7852 	if (!II_SUCCESS(rc)) {
7853 		_ii_error(ip, DSW_BMPOFFLINE);
7854 		if (tmp)
7855 			(void) nsc_free_buf(tmp);
7856 		return (rc);
7857 	}
7858 	if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7859 	    chunk%DSW_BITS) == 0) {
7860 		DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7861 		    chunk%DSW_BITS);
7862 		II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
7863 		if ((ip->bi_state & DSW_CNTSHDBITS) == 0)
7864 			ip->bi_shdbits++;
7865 	}
7866 	(void) nsc_free_buf(tmp);
7867 	if (!II_SUCCESS(rc)) {
7868 		_ii_error(ip, DSW_BMPOFFLINE);
7869 		return (rc);
7870 	}
7871 
7872 	return (0);
7873 }
7874 
7875 
7876 /*
7877  * _ii_ab_tst_copy_bit
7878  *	Determine if a chunk needs to be copied during updates.
7879  *
7880  * Calling/Exit State:
7881  *	Returns 1 if the copy bit for the chunk is set
7882  *	Returns 0 if the copy bit for the chunk is not set
7883  *	Returns -1 if an error is encountered
7884  */
7885 
7886 static int
7887 _ii_ab_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk)
7888 {
7889 	int rc;
7890 	nsc_off_t fba;
7891 	nsc_buf_t *tmp = NULL;
7892 
7893 	if (ip->bi_flags & DSW_BMPOFFLINE)
7894 		return (-1);
7895 
7896 	fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7897 	chunk %= FBA_SIZE(1) * DSW_BITS;
7898 	II_READ_START(ip, bitmap);
7899 	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
7900 	II_READ_END(ip, bitmap, rc, 1);
7901 	if (!II_SUCCESS(rc)) {
7902 		if (tmp)
7903 			(void) nsc_free_buf(tmp);
7904 		_ii_error(ip, DSW_BMPOFFLINE);
7905 		return (-1);
7906 	}
7907 	rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7908 	    chunk%DSW_BITS);
7909 	(void) nsc_free_buf(tmp);
7910 
7911 	return (rc);
7912 }
7913 
7914 
7915 /*
7916  * _ii_ab_set_copy_bit
7917  *	Records that a chunk has been copied to the shadow device
7918  *
7919  *	Returns non-zero if an error is encountered
7920  *	Returns 0 if no error
7921  */
7922 
7923 static int
7924 _ii_ab_set_copy_bit(_ii_info_t *ip, chunkid_t chunk)
7925 {
7926 	int rc;
7927 	nsc_off_t fba;
7928 	nsc_buf_t *tmp = NULL;
7929 
7930 	if (ip->bi_flags & DSW_BMPOFFLINE)
7931 		return (EIO);
7932 
7933 	fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7934 	chunk %= FBA_SIZE(1) * DSW_BITS;
7935 	II_READ_START(ip, bitmap);
7936 	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
7937 	II_READ_END(ip, bitmap, rc, 1);
7938 	if (!II_SUCCESS(rc)) {
7939 		if (tmp)
7940 			(void) nsc_free_buf(tmp);
7941 		_ii_error(ip, DSW_BMPOFFLINE);
7942 		return (rc);
7943 	}
7944 	if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7945 	    chunk%DSW_BITS) == 0) {
7946 		DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7947 		    chunk%DSW_BITS);
7948 		if ((ip->bi_state & DSW_CNTCPYBITS) == 0)
7949 			ip->bi_copybits++;
7950 
7951 		II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
7952 	}
7953 	(void) nsc_free_buf(tmp);
7954 	if (!II_SUCCESS(rc)) {
7955 		_ii_error(ip, DSW_BMPOFFLINE);
7956 		return (rc);
7957 	}
7958 
7959 	return (0);
7960 }
7961 
7962 
7963 /*
7964  * _ii_ab_clr_copy_bits
7965  *	Records that a chunk has been cleared on the shadow device, this
7966  *	function assumes that the bits to clear are all in the same fba,
7967  *	as is the case when they were generated by _ii_ab_next_copy_bit().
7968  *
7969  *	Returns non-zero if an error is encountered
7970  *	Returns 0 if no error
7971  */
7972 
7973 static int
7974 _ii_ab_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks)
7975 {
7976 	int rc;
7977 	nsc_off_t fba;
7978 	nsc_buf_t *tmp = NULL;
7979 
7980 	if (ip->bi_flags & DSW_BMPOFFLINE)
7981 		return (EIO);
7982 
7983 	fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7984 	chunk %= FBA_SIZE(1) * DSW_BITS;
7985 	II_READ_START(ip, bitmap);
7986 	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
7987 	II_READ_END(ip, bitmap, rc, 1);
7988 	if (!II_SUCCESS(rc)) {
7989 		if (tmp)
7990 			(void) nsc_free_buf(tmp);
7991 		_ii_error(ip, DSW_BMPOFFLINE);
7992 		return (rc);
7993 	}
7994 	for (; nchunks-- > 0; chunk++) {
7995 		DSW_BIT_CLR(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7996 		    chunk%DSW_BITS);
7997 		if (ip->bi_copybits > 0)
7998 			ip->bi_copybits--;
7999 	}
8000 
8001 	II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
8002 	(void) nsc_free_buf(tmp);
8003 	if (!II_SUCCESS(rc)) {
8004 		_ii_error(ip, DSW_BMPOFFLINE);
8005 		return (rc);
8006 	}
8007 
8008 	return (0);
8009 }
8010 
8011 /*
8012  * _ii_ab_fill_copy_bmp
8013  *	Fills the copy bitmap with 1's.
8014  *
8015  *	Returns non-zero if an error is encountered
8016  *	Returns 0 if no error
8017  */
8018 
8019 static int
8020 _ii_ab_fill_copy_bmp(_ii_info_t *ip)
8021 {
8022 	int rc;
8023 	nsc_off_t fba;
8024 	nsc_buf_t *tmp;
8025 	unsigned char *p;
8026 	int i, j;
8027 
8028 	if (ip->bi_flags & DSW_BMPOFFLINE)
8029 		return (EIO);
8030 
8031 	fba = ip->bi_copyfba;
8032 	for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) {
8033 		tmp = NULL;
8034 		rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_WRBUF, &tmp);
8035 		if (!II_SUCCESS(rc)) {
8036 			if (tmp)
8037 				(void) nsc_free_buf(tmp);
8038 			_ii_error(ip, DSW_BMPOFFLINE);
8039 			return (rc);
8040 		}
8041 		p = (unsigned char *)tmp->sb_vec->sv_addr;
8042 		for (j = FBA_SIZE(1); j-- > 0; p++)
8043 			*p = (unsigned char)0xff;
8044 		II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
8045 		if (!II_SUCCESS(rc)) {
8046 			_ii_error(ip, DSW_BMPOFFLINE);
8047 			(void) nsc_free_buf(tmp);
8048 			return (rc);
8049 		}
8050 		(void) nsc_free_buf(tmp);
8051 	}
8052 
8053 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8054 
8055 	return (0);
8056 }
8057 
8058 /*
8059  * _ii_ab_load_bmp
8060  *	Load bitmap from persistent storage.
8061  */
8062 
8063 static int
8064 _ii_ab_load_bmp(_ii_info_t *ip, int flag)
8065 /* ARGSUSED */
8066 {
8067 	if (ip->bi_flags & DSW_BMPOFFLINE)
8068 		return (EIO);
8069 
8070 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8071 
8072 	return (0);
8073 }
8074 
8075 /*
8076  * _ii_ab_next_copy_bit
8077  *	Find next set copy bit.
8078  *
8079  * Returns the next bits set in the copy bitmap, with the corresponding chunks
8080  * locked. Used to avoid having to reread the same bit map block as each bit
8081  * is tested.
8082  */
8083 
8084 static chunkid_t
8085 _ii_ab_next_copy_bit(_ii_info_t *ip, chunkid_t startchunk, chunkid_t maxchunk,
8086 	int wanted, int *got)
8087 {
8088 	chunkid_t rc;
8089 	nsc_off_t fba;
8090 	chunkid_t chunk;
8091 	int bits_per_fba = FBA_SIZE(1) * DSW_BITS;
8092 	int high;
8093 	chunkid_t nextchunk;
8094 	nsc_buf_t *tmp = NULL;
8095 
8096 	*got = 0;
8097 again:
8098 	if (ip->bi_flags & DSW_BMPOFFLINE)
8099 		return (maxchunk + 1);
8100 
8101 	while (startchunk < maxchunk) {
8102 		tmp = NULL;
8103 		fba = ip->bi_copyfba + startchunk / bits_per_fba;
8104 		chunk = startchunk % bits_per_fba;
8105 		II_READ_START(ip, bitmap);
8106 		rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
8107 		II_READ_END(ip, bitmap, rc, 1);
8108 		if (!II_SUCCESS(rc)) {
8109 			if (tmp)
8110 				(void) nsc_free_buf(tmp);
8111 			_ii_error(ip, DSW_BMPOFFLINE);
8112 			return (maxchunk + 1);
8113 		}
8114 		high = startchunk + bits_per_fba - startchunk%bits_per_fba;
8115 		if (high > maxchunk)
8116 			high = maxchunk;
8117 		for (; startchunk < high; chunk++, startchunk++) {
8118 			if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
8119 			    chunk%DSW_BITS)) {
8120 				/*
8121 				 * trylock won't sleep so can use while
8122 				 * holding the buf.
8123 				 */
8124 				if (!_ii_trylock_chunk(ip, startchunk)) {
8125 					(void) nsc_free_buf(tmp);
8126 					_ii_lock_chunk(ip, startchunk);
8127 					if (_ii_ab_tst_copy_bit(ip, startchunk)
8128 					    != 1) {
8129 						/*
8130 						 * another process copied this
8131 						 * chunk while we were acquiring
8132 						 * the chunk lock.
8133 						 */
8134 						_ii_unlock_chunk(ip,
8135 						    startchunk);
8136 						DTRACE_PROBE(
8137 						    _ii_ab_next_copy_bit_again);
8138 						goto again;
8139 					}
8140 					*got = 1;
8141 					DTRACE_PROBE(_ii_ab_next_copy_bit_end);
8142 					return (startchunk);
8143 				}
8144 				*got = 1;
8145 				nextchunk = startchunk + 1;
8146 				chunk++;
8147 				for (; --wanted > 0 && nextchunk < high;
8148 				    nextchunk++, chunk++) {
8149 					if (!DSW_BIT_ISSET(tmp->sb_vec->sv_addr
8150 					    [chunk/DSW_BITS], chunk%DSW_BITS)) {
8151 						break;	/* end of bit run */
8152 					}
8153 					if (_ii_trylock_chunk(ip, nextchunk))
8154 						(*got)++;
8155 					else
8156 						break;
8157 				}
8158 				(void) nsc_free_buf(tmp);
8159 				DTRACE_PROBE(_ii_ab_next_copy_bit);
8160 				return (startchunk);
8161 			}
8162 		}
8163 		(void) nsc_free_buf(tmp);
8164 	}
8165 
8166 	return (maxchunk + 1);
8167 }
8168 
8169 /*
8170  * _ii_ab_save_bmp
8171  *	Save bitmap to persistent storage.
8172  */
8173 
8174 static int
8175 _ii_ab_save_bmp(_ii_info_t *ip, int flag)
8176 /* ARGSUSED */
8177 {
8178 	if (ip->bi_flags & DSW_BMPOFFLINE)
8179 		return (EIO);
8180 
8181 	return (0);
8182 }
8183 
8184 /*
8185  * _ii_ab_change_bmp
8186  *	copy change bitmap to memory
8187  */
8188 
8189 static int
8190 _ii_ab_change_bmp(_ii_info_t *ip, unsigned char *ptr)
8191 /* ARGSUSED */
8192 {
8193 	int	bm_size;
8194 	int	i, j, fba;
8195 	int	rc;
8196 	unsigned char *p;
8197 	nsc_buf_t *tmp = NULL;
8198 
8199 	if (ip->bi_flags & DSW_BMPOFFLINE)
8200 		return (EIO);
8201 	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8202 
8203 	rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba,
8204 	    ptr, bm_size);
8205 	if (!II_SUCCESS(rc)) {
8206 		_ii_error(ip, DSW_BMPOFFLINE);
8207 		return (rc);
8208 	}
8209 
8210 	fba = ip->bi_copyfba;
8211 	for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) {
8212 		tmp = NULL;
8213 		II_READ_START(ip, bitmap);
8214 		rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
8215 		II_READ_END(ip, bitmap, rc, 1);
8216 		if (!II_SUCCESS(rc)) {
8217 			if (tmp)
8218 				(void) nsc_free_buf(tmp);
8219 			_ii_error(ip, DSW_BMPOFFLINE);
8220 			return (rc);
8221 		}
8222 		p = (unsigned char *)tmp->sb_vec->sv_addr;
8223 		for (j = FBA_SIZE(1); j-- > 0; p++)
8224 			*ptr |= *p;
8225 		(void) nsc_free_buf(tmp);
8226 	}
8227 
8228 	return (0);
8229 }
8230 
8231 /*
8232  * Count bits set in the bit map.
8233  */
8234 static int
8235 _ii_ab_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter,
8236 int bm_size)
8237 {
8238 	nsc_size_t	last_fba;
8239 	nsc_buf_t *tmp;
8240 	nsc_vec_t *sd_vecp;
8241 	nsc_off_t	fba_pos;
8242 	int	buf_fba_len;
8243 	int	buf_byte_len;
8244 	int	co_len;
8245 	int	i;
8246 	unsigned int j, k;
8247 	unsigned char *cp;
8248 	int	rc;
8249 
8250 	*counter = 0;
8251 	if (ip->bi_flags & DSW_BMPOFFLINE)
8252 		return (EIO);
8253 
8254 	last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size);
8255 
8256 	for (fba_pos = bm_offset; fba_pos < last_fba && bm_size > 0;
8257 	    fba_pos += DSW_CBLK_FBA) {
8258 		tmp = NULL;
8259 		buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
8260 		    DSW_CBLK_FBA : last_fba - fba_pos;
8261 		II_READ_START(ip, bitmap);
8262 		rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
8263 		    NSC_RDBUF, &tmp);
8264 		II_READ_END(ip, bitmap, rc, 1);
8265 		if (!II_SUCCESS(rc)) {
8266 			if (tmp)
8267 				(void) nsc_free_buf(tmp);
8268 
8269 			_ii_error(ip, DSW_BMPOFFLINE);
8270 			return (EIO);
8271 		}
8272 
8273 		/* count each sd_vec's worth of data */
8274 		buf_byte_len = FBA_SIZE(buf_fba_len);
8275 		for (sd_vecp = tmp->sb_vec;
8276 		    buf_byte_len > 0 && bm_size > 0;
8277 		    sd_vecp++) {
8278 			co_len = (bm_size > sd_vecp->sv_len) ?
8279 			    sd_vecp->sv_len : bm_size;
8280 			cp = sd_vecp->sv_addr;
8281 			for (i = k = 0; i < co_len; i++)
8282 				for (j = (unsigned)*cp++; j; j &= j - 1)
8283 					k++;
8284 			*counter += k;
8285 			bm_size -= co_len;
8286 			buf_byte_len -= co_len;
8287 		}
8288 
8289 
8290 		(void) nsc_free_buf(tmp);
8291 	}
8292 
8293 	return (0);
8294 }
8295 
8296 /*
8297  * OR the bitmaps as part of a join operation
8298  */
8299 static int
8300 _ii_ab_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip)
8301 {
8302 	int rc;
8303 	nsc_size_t len;
8304 	nsc_size_t size;
8305 	nsc_buf_t *dest_tmp, *src_tmp;
8306 	nsc_off_t src_fba_pos;
8307 
8308 	if ((src_ip->bi_flags & DSW_BMPOFFLINE) ||
8309 	    (dest_ip->bi_flags & DSW_BMPOFFLINE))
8310 		return (EIO);
8311 
8312 	size = DSW_BM_FBA_LEN(src_ip->bi_size) + src_ip->bi_shdfba;
8313 	for (src_fba_pos = src_ip->bi_shdfba; src_fba_pos < size;
8314 	    src_fba_pos += DSW_CBLK_FBA) {
8315 		src_tmp = NULL;
8316 		len = src_fba_pos + DSW_CBLK_FBA < size ?
8317 		    DSW_CBLK_FBA : size - src_fba_pos;
8318 		II_READ_START(src_ip, bitmap);
8319 		rc = nsc_alloc_buf(src_ip->bi_bmpfd, src_fba_pos, len,
8320 		    NSC_RDWRBUF, &src_tmp);
8321 		II_READ_END(src_ip, bitmap, rc, len);
8322 		if (!II_SUCCESS(rc)) {
8323 			if (src_tmp)
8324 				(void) nsc_free_buf(src_tmp);
8325 
8326 			_ii_error(src_ip, DSW_BMPOFFLINE);
8327 			return (rc);
8328 		}
8329 
8330 		dest_tmp = NULL;
8331 		II_READ_START(dest_ip, bitmap);
8332 		rc = nsc_alloc_buf(dest_ip->bi_bmpfd, src_fba_pos, len,
8333 		    NSC_RDWRBUF, &dest_tmp);
8334 		II_READ_END(dest_ip, bitmap, rc, len);
8335 		if (!II_SUCCESS(rc)) {
8336 			(void) nsc_free_buf(src_tmp);
8337 			if (dest_tmp)
8338 				(void) nsc_free_buf(dest_tmp);
8339 
8340 			_ii_error(dest_ip, DSW_BMPOFFLINE);
8341 			return (rc);
8342 		}
8343 		rc = _ii_nsc_or(src_tmp, dest_tmp, src_fba_pos, src_fba_pos,
8344 		    len);
8345 		if (II_SUCCESS(rc)) {
8346 			II_NSC_WRITE(dest_ip, bitmap, rc, dest_tmp,
8347 			    src_fba_pos, len, 0);
8348 		}
8349 
8350 		(void) nsc_free_buf(src_tmp);
8351 		(void) nsc_free_buf(dest_tmp);
8352 		if (!II_SUCCESS(rc)) {
8353 			_ii_error(dest_ip, DSW_BMPOFFLINE);
8354 			return (rc);
8355 		}
8356 	}
8357 
8358 	dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8359 
8360 	return (0);
8361 
8362 }
8363 
8364 static _ii_bmp_ops_t alloc_buf_bmp = {
8365 	_ii_ab_co_bmp,
8366 	_ii_ab_ci_bmp,
8367 	_ii_ab_zerobm,
8368 	_ii_ab_copybm,
8369 	_ii_ab_orbm,
8370 	_ii_ab_tst_shd_bit,
8371 	_ii_ab_set_shd_bit,
8372 	_ii_ab_tst_copy_bit,
8373 	_ii_ab_set_copy_bit,
8374 	_ii_ab_clr_copy_bits,
8375 	_ii_ab_next_copy_bit,
8376 	_ii_ab_fill_copy_bmp,
8377 	_ii_ab_load_bmp,
8378 	_ii_ab_save_bmp,
8379 	_ii_ab_change_bmp,
8380 	_ii_ab_cnt_bits,
8381 	_ii_ab_join_bmp
8382 };
8383 
8384 
8385 /*
8386  * Copyout the bit map.
8387  */
8388 static int
8389 _ii_km_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
8390     int user_bm_size)
8391 {
8392 	int	start_offset;
8393 	int	bm_size;
8394 	size_t	co_len;
8395 	nsc_off_t	last_fba;
8396 
8397 	/* First calculate the size of the shadow and copy bitmaps */
8398 	co_len = DSW_BM_FBA_LEN(ip->bi_size);
8399 	ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len);
8400 
8401 	/* Are we in the ranges of the various bitmaps/indexes? */
8402 	if (bm_offset < ip->bi_shdfba)
8403 		return (EIO);
8404 	else if (bm_offset < (last_fba = ip->bi_shdfba + co_len))
8405 		/*EMPTY*/;
8406 	else if (bm_offset < (last_fba = ip->bi_copyfba + co_len))
8407 		/*EMPTY*/;
8408 	else if ((ip->bi_flags & DSW_TREEMAP) &&
8409 	    (bm_offset < (last_fba = last_fba + (co_len * 32))))
8410 		/*EMPTY*/;
8411 	else return (EIO);
8412 
8413 	if (FBA_LEN(user_bm_size) > last_fba - bm_offset)
8414 		return (EIO);
8415 
8416 	start_offset = FBA_SIZE(bm_offset);
8417 	bm_size = FBA_SIZE(last_fba);
8418 
8419 	co_len = (user_bm_size > bm_size) ? bm_size : user_bm_size;
8420 	if (copyout(ip->bi_bitmap + start_offset, user_bm, co_len))
8421 		return (EFAULT);
8422 
8423 	return (0);
8424 }
8425 
8426 /*
8427  * Copyin a bit map and or with differences bitmap.
8428  */
8429 static int
8430 _ii_km_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
8431     int user_bm_size)
8432 {
8433 	unsigned char *tmp_buf;
8434 	unsigned char *dest;
8435 	unsigned char *p;
8436 	size_t	tmp_size;
8437 	int	n;
8438 	int	start_offset;
8439 	int	bm_size;
8440 	size_t	ci_len;
8441 	int	rc = 0;
8442 
8443 	start_offset = FBA_SIZE(bm_offset);
8444 	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8445 
8446 	tmp_buf = NULL;
8447 	tmp_size = FBA_SIZE(1);
8448 
8449 	tmp_buf = kmem_alloc(tmp_size, KM_SLEEP);
8450 	start_offset = FBA_SIZE(bm_offset);
8451 	dest = ip->bi_bitmap + start_offset;
8452 	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8453 
8454 	ci_len = (user_bm_size > bm_size) ? bm_size : user_bm_size;
8455 	while (ci_len > 0) {
8456 		n = (tmp_size > ci_len) ? ci_len : tmp_size;
8457 		if (copyin(user_bm, tmp_buf, n)) {
8458 			rc = EFAULT;
8459 			break;
8460 		}
8461 		user_bm += n;
8462 		for (p = tmp_buf; n--> 0; ci_len--)
8463 			*dest++ |= *p++;
8464 	}
8465 	if (tmp_buf)
8466 		kmem_free(tmp_buf, tmp_size);
8467 
8468 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8469 
8470 	return (rc);
8471 }
8472 
8473 /*
8474  * Completely zero the bit map.
8475  */
8476 static int
8477 _ii_km_zerobm(_ii_info_t *ip)
8478 {
8479 	int start_offset = FBA_SIZE(ip->bi_shdfba);
8480 	int len;
8481 
8482 	len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8483 	mutex_enter(&ip->bi_bmpmutex);
8484 	bzero(ip->bi_bitmap+start_offset, len);
8485 	mutex_exit(&ip->bi_bmpmutex);
8486 
8487 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8488 
8489 	return (0);
8490 }
8491 
8492 
8493 /*
8494  * Copy shadow bitmap to copy bitmap
8495  */
8496 static int
8497 _ii_km_copybm(_ii_info_t *ip)
8498 {
8499 	int copy_offset, shd_offset;
8500 	int len;
8501 
8502 	len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8503 	shd_offset = FBA_SIZE(ip->bi_shdfba);
8504 	copy_offset = FBA_SIZE(ip->bi_copyfba);
8505 	mutex_enter(&ip->bi_bmpmutex);
8506 	bcopy(ip->bi_bitmap+shd_offset, ip->bi_bitmap+copy_offset, len);
8507 	mutex_exit(&ip->bi_bmpmutex);
8508 
8509 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8510 
8511 	return (0);
8512 }
8513 
8514 
8515 /*
8516  * Or the shadow bitmap in to the copy bitmap, clear the
8517  * shadow bitmap.
8518  */
8519 static int
8520 _ii_km_orbm(_ii_info_t *ip)
8521 {
8522 	unsigned char *copy, *shd;
8523 	int copy_offset, shd_offset;
8524 	int len;
8525 
8526 	len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8527 	shd_offset = FBA_SIZE(ip->bi_shdfba);
8528 	copy_offset = FBA_SIZE(ip->bi_copyfba);
8529 	shd = ip->bi_bitmap + shd_offset;
8530 	copy = ip->bi_bitmap + copy_offset;
8531 
8532 	mutex_enter(&ip->bi_bmpmutex);
8533 	while (len-- > 0)
8534 		*copy++ |= *shd++;
8535 	mutex_exit(&ip->bi_bmpmutex);
8536 
8537 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8538 
8539 	return (0);
8540 }
8541 
8542 /*
8543  * _ii_km_tst_shd_bit
8544  *	Determine if a chunk has been copied to the shadow device
8545  *
8546  * Calling/Exit State:
8547  *	Returns 1 if the modified bit has been set for the shadow device,
8548  *	otherwise returns 0.
8549  */
8550 
8551 static int
8552 _ii_km_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk)
8553 {
8554 	unsigned char *bmp;
8555 	int bmp_offset;
8556 	int rc;
8557 
8558 	bmp_offset = FBA_SIZE(ip->bi_shdfba);
8559 	bmp = ip->bi_bitmap + bmp_offset;
8560 
8561 	mutex_enter(&ip->bi_bmpmutex);
8562 	rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8563 	mutex_exit(&ip->bi_bmpmutex);
8564 
8565 	return (rc);
8566 }
8567 
8568 
8569 /*
8570  * _ii_km_set_shd_bit
8571  *	Records that a chunk has been copied to the shadow device
8572  */
8573 
8574 static int
8575 _ii_km_set_shd_bit(_ii_info_t *ip, chunkid_t chunk)
8576 {
8577 	unsigned char *bmp;
8578 	int bmp_offset;
8579 
8580 	bmp_offset = FBA_SIZE(ip->bi_shdfba);
8581 	bmp = ip->bi_bitmap + bmp_offset;
8582 
8583 	mutex_enter(&ip->bi_bmpmutex);
8584 	if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) {
8585 		DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8586 		if ((ip->bi_state & DSW_CNTSHDBITS) == 0)
8587 			ip->bi_shdbits++;
8588 	}
8589 	mutex_exit(&ip->bi_bmpmutex);
8590 
8591 	return (0);
8592 }
8593 
8594 /*
8595  * _ii_km_tst_copy_bit
8596  *	Determine if a chunk needs to be copied during updates.
8597  *
8598  * Calling/Exit State:
8599  *	Returns 1 if the copy bit for the chunk is set,
8600  *	otherwise returns 0
8601  */
8602 
8603 static int
8604 _ii_km_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk)
8605 {
8606 	unsigned char *bmp;
8607 	int bmp_offset;
8608 	int rc;
8609 
8610 	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8611 	bmp = ip->bi_bitmap + bmp_offset;
8612 
8613 	mutex_enter(&ip->bi_bmpmutex);
8614 	rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8615 	mutex_exit(&ip->bi_bmpmutex);
8616 
8617 	return (rc);
8618 }
8619 
8620 
8621 /*
8622  * _ii_km_set_copy_bit
8623  *	Records that a chunk has been copied to the shadow device
8624  */
8625 
8626 static int
8627 _ii_km_set_copy_bit(_ii_info_t *ip, chunkid_t chunk)
8628 {
8629 	unsigned char *bmp;
8630 	int bmp_offset;
8631 
8632 	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8633 	bmp = ip->bi_bitmap + bmp_offset;
8634 
8635 	mutex_enter(&ip->bi_bmpmutex);
8636 	if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) {
8637 		DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8638 		if ((ip->bi_state & DSW_CNTCPYBITS) == 0)
8639 			ip->bi_copybits++;
8640 	}
8641 	mutex_exit(&ip->bi_bmpmutex);
8642 
8643 	return (0);
8644 }
8645 
8646 
8647 /*
8648  * _ii_km_clr_copy_bits
8649  *	Records that a chunk has been cleared on the shadow device
8650  */
8651 
8652 static int
8653 _ii_km_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks)
8654 {
8655 	unsigned char *bmp;
8656 	int bmp_offset;
8657 
8658 	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8659 	bmp = ip->bi_bitmap + bmp_offset;
8660 
8661 	mutex_enter(&ip->bi_bmpmutex);
8662 	for (; nchunks-- > 0; chunk++) {
8663 		DSW_BIT_CLR(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8664 		if (ip->bi_copybits > 0)
8665 			ip->bi_copybits--;
8666 	}
8667 	mutex_exit(&ip->bi_bmpmutex);
8668 
8669 	return (0);
8670 }
8671 
8672 /*
8673  * _ii_km_fill_copy_bmp
8674  *	Fills the copy bitmap with 1's.
8675  */
8676 
8677 static int
8678 _ii_km_fill_copy_bmp(_ii_info_t *ip)
8679 {
8680 	int len;
8681 	unsigned char *bmp;
8682 	int bmp_offset;
8683 
8684 	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8685 	bmp = ip->bi_bitmap + bmp_offset;
8686 
8687 	len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8688 
8689 	mutex_enter(&ip->bi_bmpmutex);
8690 	while (len-- > 0)
8691 		*bmp++ = (unsigned char)0xff;
8692 	mutex_exit(&ip->bi_bmpmutex);
8693 
8694 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8695 
8696 	return (0);
8697 }
8698 
8699 /*
8700  * _ii_km_load_bmp
8701  *	Load bitmap from persistent storage.
8702  */
8703 
8704 static int
8705 _ii_km_load_bmp(_ii_info_t *ip, int flag)
8706 {
8707 	nsc_off_t bmp_offset;
8708 	nsc_size_t bitmap_size;
8709 	int rc;
8710 
8711 	if (ip->bi_flags & DSW_BMPOFFLINE)
8712 		return (EIO);
8713 
8714 	if (ip->bi_bitmap == NULL) {
8715 		bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) +
8716 		    ip->bi_shdfba);
8717 		ip->bi_bitmap = nsc_kmem_zalloc(bitmap_size, KM_SLEEP,
8718 		    _ii_local_mem);
8719 	}
8720 	if (flag)
8721 		return (0);		/* just create an empty bitmap */
8722 	bmp_offset = FBA_SIZE(ip->bi_shdfba);
8723 	rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba,
8724 	    ip->bi_bitmap + bmp_offset,
8725 	    2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba));
8726 	if (!II_SUCCESS(rc))
8727 		_ii_error(ip, DSW_BMPOFFLINE);
8728 
8729 	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8730 
8731 	return (rc);
8732 }
8733 
8734 /*
8735  * _ii_km_save_bmp
8736  *	Save bitmap to persistent storage.
8737  */
8738 
8739 static int
8740 _ii_km_save_bmp(_ii_info_t *ip, int flag)
8741 {
8742 	int bmp_offset;
8743 	int bitmap_size;
8744 	int rc;
8745 
8746 	bmp_offset = FBA_SIZE(ip->bi_shdfba);
8747 	if (ip->bi_flags & DSW_BMPOFFLINE)
8748 		rc = EIO;
8749 	else {
8750 		rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_WRBUF,
8751 		    ip->bi_shdfba, ip->bi_bitmap + bmp_offset,
8752 		    2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba));
8753 		if (!II_SUCCESS(rc))
8754 			_ii_error(ip, DSW_BMPOFFLINE);
8755 	}
8756 
8757 	if (flag && ip->bi_bitmap) {		/* dispose of bitmap memory */
8758 		bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) +
8759 		    ip->bi_shdfba);
8760 		nsc_kmem_free(ip->bi_bitmap, bitmap_size);
8761 		ip->bi_bitmap = NULL;
8762 	}
8763 
8764 	return (rc);
8765 }
8766 
8767 /*
8768  * _ii_km_next_copy_bit
8769  *	Find next set copy bit.
8770  *
8771  * Returns the next bits set in the copy bitmap, with the corresponding chunks
8772  * locked. Used to cut down on the number of times the bmpmutex is acquired.
8773  */
8774 
8775 static chunkid_t
8776 _ii_km_next_copy_bit(_ii_info_t *ip, chunkid_t chunk, chunkid_t maxchunk,
8777 	int want, int *got)
8778 {
8779 	unsigned char *bmp;
8780 	int bmp_offset;
8781 	int nextchunk;
8782 
8783 	*got = 0;
8784 	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8785 	bmp = ip->bi_bitmap + bmp_offset;
8786 
8787 	mutex_enter(&ip->bi_bmpmutex);
8788 	for (; chunk < maxchunk; chunk++) {
8789 		if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS)) {
8790 			/*
8791 			 * trylock won't sleep so can use while
8792 			 * holding bi_bmpmutex.
8793 			 */
8794 			if (!_ii_trylock_chunk(ip, chunk)) {
8795 				mutex_exit(&ip->bi_bmpmutex);
8796 				_ii_lock_chunk(ip, chunk);
8797 				*got = 1;
8798 
8799 				DTRACE_PROBE(_ii_km_next_copy_bit);
8800 
8801 				return (chunk);
8802 			}
8803 			*got = 1;
8804 			for (nextchunk = chunk + 1;
8805 			    *got < want && nextchunk < maxchunk; nextchunk++) {
8806 				if (!DSW_BIT_ISSET(bmp[nextchunk/DSW_BITS],
8807 				    nextchunk%DSW_BITS))
8808 					break;
8809 				if (_ii_trylock_chunk(ip, nextchunk))
8810 					(*got)++;
8811 				else
8812 					break;
8813 			}
8814 			mutex_exit(&ip->bi_bmpmutex);
8815 
8816 			DTRACE_PROBE(_ii_km_next_copy_bit);
8817 			return (chunk);
8818 		}
8819 	}
8820 	mutex_exit(&ip->bi_bmpmutex);
8821 
8822 	return (maxchunk + 1);
8823 }
8824 
8825 /*
8826  * _ii_km_change_bmp
8827  *	copy change bitmap to memory
8828  */
8829 
8830 static int
8831 _ii_km_change_bmp(_ii_info_t *ip, unsigned char *ptr)
8832 /* ARGSUSED */
8833 {
8834 	int	start_offset;
8835 	int	bm_size;
8836 	unsigned char *q;
8837 
8838 	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8839 
8840 	start_offset = FBA_SIZE(ip->bi_shdfba);
8841 	bcopy(ip->bi_bitmap + start_offset, ptr, bm_size);
8842 
8843 	start_offset = FBA_SIZE(ip->bi_copyfba);
8844 	q = ip->bi_bitmap + start_offset;
8845 	while (bm_size-- > 0)
8846 		*ptr |= *q;
8847 
8848 	return (0);
8849 }
8850 
8851 /*
8852  * Count bits set in the bit map.
8853  */
8854 static int
8855 _ii_km_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter,
8856     int bm_size)
8857 {
8858 	int	start_offset;
8859 	int	i;
8860 	nsc_size_t j, k;
8861 	unsigned char *cp;
8862 
8863 	start_offset = FBA_SIZE(bm_offset);
8864 
8865 	cp = ip->bi_bitmap + start_offset;
8866 	for (i = k = 0; i < bm_size; i++)
8867 		for (j = (unsigned)*cp++; j; j &= j - 1)
8868 			k++;
8869 	*counter = k;
8870 
8871 	return (0);
8872 }
8873 
8874 /*
8875  * Or the shadow bitmap in to the copy bitmap, clear the
8876  * shadow bitmap.
8877  */
8878 static int
8879 _ii_km_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip)
8880 {
8881 	uchar_t *dest, *src;
8882 	nsc_size_t bm_size;
8883 
8884 	dest = dest_ip->bi_bitmap + FBA_SIZE(dest_ip->bi_shdfba);
8885 	src = src_ip->bi_bitmap + FBA_SIZE(src_ip->bi_shdfba);
8886 	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(dest_ip->bi_size));
8887 
8888 	while (bm_size-- > 0)
8889 		*dest++ |= *src++;
8890 
8891 	dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8892 
8893 	return (0);
8894 }
8895 
8896 static _ii_bmp_ops_t kmem_buf_bmp = {
8897 	_ii_km_co_bmp,
8898 	_ii_km_ci_bmp,
8899 	_ii_km_zerobm,
8900 	_ii_km_copybm,
8901 	_ii_km_orbm,
8902 	_ii_km_tst_shd_bit,
8903 	_ii_km_set_shd_bit,
8904 	_ii_km_tst_copy_bit,
8905 	_ii_km_set_copy_bit,
8906 	_ii_km_clr_copy_bits,
8907 	_ii_km_next_copy_bit,
8908 	_ii_km_fill_copy_bmp,
8909 	_ii_km_load_bmp,
8910 	_ii_km_save_bmp,
8911 	_ii_km_change_bmp,
8912 	_ii_km_cnt_bits,
8913 	_ii_km_join_bmp
8914 };
8915 
8916 
8917 static int
8918 ii_read_volume(_ii_info_t *ip, int mst_src, nsc_buf_t *srcbuf,
8919 	nsc_buf_t *dstbuf, chunkid_t chunk_num, nsc_off_t fba, nsc_size_t len)
8920 {
8921 	int rc;
8922 	nsc_buf_t *tmp;
8923 	nsc_off_t mapped_fba;
8924 	chunkid_t mapped_chunk;
8925 	int overflow;
8926 
8927 	if (mst_src || (ip->bi_flags&DSW_TREEMAP) == 0) {
8928 		/* simple read with optional copy */
8929 		if (mst_src) {
8930 			II_NSC_READ(ip, master, rc, srcbuf, fba, len, 0);
8931 		} else {
8932 			II_NSC_READ(ip, shadow, rc, srcbuf, fba, len, 0);
8933 		}
8934 		if (dstbuf && II_SUCCESS(rc)) {
8935 			rc = nsc_copy(srcbuf, dstbuf, fba, fba, len);
8936 		}
8937 
8938 		return (rc);
8939 	}
8940 	/* read from mapped shadow into final buffer */
8941 	mapped_chunk = ii_tsearch(ip, chunk_num);
8942 	if (mapped_chunk == II_NULLNODE)
8943 		return (EIO);
8944 	overflow = II_ISOVERFLOW(mapped_chunk);
8945 	if (overflow)
8946 		mapped_chunk = II_2OVERFLOW(mapped_chunk);
8947 	/* convert chunk number from tsearch into final fba */
8948 	mapped_fba = DSW_CHK2FBA(mapped_chunk) + (fba % DSW_SIZE);
8949 	tmp = NULL;
8950 	if (overflow) {
8951 		(void) nsc_reserve(OVRFD(ip), NSC_MULTI);
8952 		II_READ_START(ip, overflow);
8953 		rc = nsc_alloc_buf(OVRFD(ip), mapped_fba, len, NSC_RDBUF, &tmp);
8954 		II_READ_END(ip, overflow, rc, len);
8955 	} else {
8956 		II_READ_START(ip, shadow);
8957 		rc = nsc_alloc_buf(SHDFD(ip), mapped_fba, len, NSC_RDBUF, &tmp);
8958 		II_READ_END(ip, shadow, rc, len);
8959 	}
8960 	if (II_SUCCESS(rc)) {
8961 		if (dstbuf == NULL)
8962 			dstbuf = srcbuf;
8963 		rc = nsc_copy(tmp, dstbuf, mapped_fba, fba, len);
8964 		(void) nsc_free_buf(tmp);
8965 	}
8966 	if (overflow)
8967 		nsc_release(OVRFD(ip));
8968 
8969 	return (rc);
8970 }
8971 
8972 /*
8973  * _ii_fill_buf
8974  *	Read data from the required device
8975  *
8976  * Calling/Exit State:
8977  *	Returns 0 if the data was read successfully, otherwise
8978  *	error code.
8979  *
8980  * Description:
8981  *	Reads the data from fba_pos for length fba_len from the
8982  *	required device. This data may be a mix of data from the master
8983  *	device and the shadow device, depending on the state of the
8984  *	bitmaps.
8985  */
8986 
8987 static int
8988 _ii_fill_buf(ii_fd_t *bfd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
8989     nsc_buf_t **handle, nsc_buf_t **handle2)
8990 {
8991 	_ii_info_t *ip = bfd->ii_info;
8992 	_ii_info_t *xip;
8993 	int second_shd = 0;
8994 	nsc_off_t temp_fba;
8995 	nsc_size_t temp_len;
8996 	nsc_size_t bmp_len;
8997 	chunkid_t chunk_num;
8998 	int rc;
8999 	int fill_from_pair;
9000 	int rtype = SHDR|BMP;
9001 	nsc_buf_t *second_buf = NULL;
9002 
9003 	if (flag&NSC_RDAHEAD)
9004 		return (NSC_DONE);
9005 
9006 	chunk_num = fba_pos / DSW_SIZE;
9007 	temp_fba = fba_pos;
9008 	temp_len = fba_len;
9009 
9010 	/*
9011 	 * If the master is being updated from a shadow we need to fill from
9012 	 * the correct shadow volume.
9013 	 */
9014 	if (NSHADOWS(ip) && bfd->ii_shd == 0) {
9015 		for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
9016 			if (xip == ip)
9017 				continue;
9018 			if (xip->bi_flags &DSW_COPYINGS) {
9019 				second_shd = 1;
9020 				ip = xip;
9021 				if ((rc = _ii_rsrv_devs(ip, rtype,
9022 				    II_INTERNAL)) != 0)
9023 					return (EIO);
9024 				rc = nsc_alloc_buf(SHDFD(ip), fba_pos, fba_len,
9025 				    (flag&NSC_RDAHEAD)|NSC_MIXED, &second_buf);
9026 				if (!II_SUCCESS(rc)) {
9027 					rc = EIO;
9028 					goto out;
9029 				}
9030 				handle2 = &second_buf;
9031 				break;
9032 			}
9033 		}
9034 	}
9035 
9036 	while (temp_len > 0) {
9037 		if ((temp_fba + temp_len) > DSW_CHK2FBA(chunk_num + 1)) {
9038 			bmp_len = DSW_CHK2FBA(chunk_num + 1) - temp_fba;
9039 			temp_len -= bmp_len;
9040 		} else {
9041 			bmp_len = temp_len;
9042 			temp_len = 0;
9043 		}
9044 
9045 		fill_from_pair = 0;
9046 
9047 		if ((ip->bi_flags & DSW_COPYINGM) == DSW_COPYINGM) {
9048 			rc = II_TST_COPY_BIT(ip, chunk_num);
9049 			/* Treat a failed bitmap volume as a clear bit */
9050 			if (rc > 0) {
9051 				/* Copy bit set */
9052 				if (bfd->ii_shd) {
9053 					if (*handle2)
9054 						fill_from_pair = 1;
9055 					else {
9056 						rc = EIO;
9057 						goto out;
9058 					}
9059 				}
9060 			}
9061 		}
9062 		if ((ip->bi_flags & DSW_COPYINGS) == DSW_COPYINGS) {
9063 			rc = II_TST_COPY_BIT(ip, chunk_num);
9064 			/* Treat a failed bitmap volume as a clear bit */
9065 			if (rc > 0) {
9066 				/* Copy bit set */
9067 				if (bfd->ii_shd == 0) {
9068 					if (*handle2 ||
9069 					    (ip->bi_flags&DSW_TREEMAP))
9070 						fill_from_pair = 1;
9071 					else {
9072 						rc = EIO;
9073 						goto out;
9074 					}
9075 				}
9076 			}
9077 		}
9078 		if (((ip->bi_flags & DSW_GOLDEN) == 0) && bfd->ii_shd) {
9079 			/* Dependent shadow read */
9080 
9081 			rc = II_TST_SHD_BIT(ip, chunk_num);
9082 			if (rc < 0) {
9083 				rc = EIO;
9084 				goto out;
9085 			}
9086 			if (rc == 0) {
9087 				/* Shadow bit clear */
9088 				if (*handle2)
9089 					fill_from_pair = 1;
9090 				else {
9091 					rc = EIO;
9092 					goto out;
9093 				}
9094 			}
9095 		}
9096 
9097 		if (fill_from_pair) {
9098 			/* it matters now */
9099 			if (ip->bi_flags & (DSW_MSTOFFLINE | DSW_SHDOFFLINE)) {
9100 				rc = EIO;
9101 				goto out;
9102 			}
9103 			if (*handle2 == NULL &&
9104 			    (ip->bi_flags&DSW_TREEMAP) == 0) {
9105 				rc = EIO;
9106 				goto out;
9107 			}
9108 			rc = ii_read_volume(ip, bfd->ii_shd,
9109 			    *handle2, *handle, chunk_num, temp_fba, bmp_len);
9110 			if (!II_SUCCESS(rc)) {
9111 				_ii_error(ip, DSW_MSTOFFLINE);
9112 				_ii_error(ip, DSW_SHDOFFLINE);
9113 				goto out;
9114 			}
9115 		} else {
9116 			if (bfd->ii_shd && (ip->bi_flags & DSW_SHDOFFLINE)) {
9117 				rc = EIO;
9118 				goto out;
9119 			}
9120 			if ((bfd->ii_shd == 0) &&
9121 			    (ip->bi_flags & DSW_MSTOFFLINE)) {
9122 				rc = EIO;
9123 				goto out;
9124 			}
9125 			rc = ii_read_volume(ip, !(bfd->ii_shd), *handle, NULL,
9126 			    chunk_num, temp_fba, bmp_len);
9127 			if (!II_SUCCESS(rc)) {
9128 				if (bfd->ii_shd)
9129 					_ii_error(ip, DSW_SHDOFFLINE);
9130 				else
9131 					_ii_error(ip, DSW_MSTOFFLINE);
9132 				goto out;
9133 			}
9134 		}
9135 
9136 		temp_fba += bmp_len;
9137 		chunk_num++;
9138 	}
9139 
9140 	rc = 0;
9141 out:
9142 	if (second_buf)
9143 		(void) nsc_free_buf(second_buf);
9144 	if (second_shd)
9145 		_ii_rlse_devs(ip, rtype);
9146 
9147 	return (rc);
9148 }
9149 
9150 
9151 /*
9152  * _ii_shadow_write
9153  *	Perform any copy on write required by a write buffer request
9154  *
9155  * Calling/Exit State:
9156  *	Returns 0 on success, otherwise error code.
9157  *
9158  */
9159 
9160 static int
9161 _ii_shadow_write(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len)
9162 {
9163 	_ii_info_t *ip = bfd->ii_info;
9164 	chunkid_t	chunk_num;
9165 	int	rc;
9166 	int	flag;
9167 	int hanging;
9168 
9169 	DTRACE_PROBE2(_ii_shadow_write_start, nsc_off_t, pos, nsc_size_t, len);
9170 
9171 	/* fail immediately if config DB is unavailable */
9172 	if ((ip->bi_flags & DSW_CFGOFFLINE) == DSW_CFGOFFLINE) {
9173 		return (EIO);
9174 	}
9175 
9176 	chunk_num = pos / DSW_SIZE;
9177 
9178 	if (bfd->ii_shd)
9179 		flag = 0;		/* To shadow */
9180 	else
9181 		flag = CV_SHD2MST;	/* To master */
9182 
9183 	mutex_enter(&ip->bi_mutex);
9184 	ip->bi_shdref++;
9185 	mutex_exit(&ip->bi_mutex);
9186 	hanging = (ip->bi_flags&DSW_HANGING) != 0;
9187 
9188 	for (; (chunk_num >= 0) &&
9189 	    DSW_CHK2FBA(chunk_num) < (pos + len); chunk_num++) {
9190 
9191 		if (!hanging)
9192 			_ii_lock_chunk(ip, chunk_num);
9193 		rc = _ii_copy_on_write(ip, flag, chunk_num, 1);
9194 
9195 		/*
9196 		 * Set the shadow bit when a small shadow has overflowed so
9197 		 * that ii_read_volume can return an error if an attempt is
9198 		 * made to read that chunk.
9199 		 */
9200 		if (!hanging) {
9201 			if (rc == 0 ||
9202 			    (rc == EIO && (ip->bi_flags&DSW_OVERFLOW) != 0))
9203 				(void) II_SET_SHD_BIT(ip, chunk_num);
9204 			_ii_unlock_chunk(ip, chunk_num);
9205 		}
9206 	}
9207 
9208 	mutex_enter(&ip->bi_mutex);
9209 	ip->bi_shdref--;
9210 	if (ip->bi_state & DSW_CLOSING) {
9211 		if (total_ref(ip) == 0) {
9212 			cv_signal(&ip->bi_closingcv);
9213 		}
9214 	}
9215 	mutex_exit(&ip->bi_mutex);
9216 
9217 	/* did the bitmap fail during this process? */
9218 	return (ip->bi_flags & DSW_CFGOFFLINE? EIO : 0);
9219 }
9220 
9221 /*
9222  * _ii_alloc_buf
9223  *	Allocate a buffer of data
9224  *
9225  * Calling/Exit State:
9226  *	Returns 0 for success, < 0 for async I/O, > 0 is an error code.
9227  *
9228  * Description:
9229  *	For a write buffer, calls dsw_shadow_write to perform any necessary
9230  *	copy on write operations, then allocates the real buffers from the
9231  *	underlying devices.
9232  *	For a read buffer, allocates the real buffers from the underlying
9233  *	devices, then calls _ii_fill_buf to fill the required buffer.
9234  *	For a buffer that is neither read nor write, just allocate the
9235  *	buffers so that a _ii_fill_buf can be done later by _ii_read.
9236  */
9237 
9238 static int
9239 _ii_alloc_buf(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len, int flag,
9240     ii_buf_t **ptr)
9241 {
9242 	_ii_info_t *ip = bfd->ii_info;
9243 	ii_buf_t *h;
9244 	int	raw = II_RAW(bfd);
9245 	int rc = 0;
9246 	int ioflag;
9247 	int fbuf = 0, fbuf2 = 0, abuf = 0;
9248 	int rw_ent = 0;
9249 
9250 	if (bfd->ii_bmp) {
9251 		DTRACE_PROBE(_ii_alloc_buf_end);
9252 		/* any I/O to the bitmap device is barred */
9253 		return (EIO);
9254 	}
9255 
9256 	if (len == 0) {
9257 		DTRACE_PROBE(_ii_alloc_buf_end);
9258 		return (EINVAL);
9259 	}
9260 
9261 	/* Bounds checking */
9262 	if (pos + len > ip->bi_size) {
9263 		if (ii_debug > 1)
9264 			cmn_err(CE_NOTE,
9265 			    "!ii: Attempt to access beyond end of ii volume");
9266 		DTRACE_PROBE(_ii_alloc_buf_end);
9267 		return (EIO);
9268 	}
9269 
9270 	h = *ptr;
9271 	if (h == NULL) {
9272 		h = (ii_buf_t *)_ii_alloc_handle(NULL, NULL, NULL, bfd);
9273 		if (h == NULL) {
9274 			DTRACE_PROBE(_ii_alloc_buf_end);
9275 			return (ENOMEM);
9276 		}
9277 	}
9278 
9279 	/*
9280 	 * Temporary nsc_reserve of bitmap and other device.
9281 	 * This device has already been reserved by the preceding _ii_attach.
9282 	 * Corresponding nsc_release is in _ii_free_buf.
9283 	 */
9284 
9285 	h->ii_rsrv = BMP | (raw ? (bfd->ii_shd ? MSTR : SHDR)
9286 	    : (bfd->ii_shd ? MST : SHD));
9287 
9288 	if (!bfd->ii_shd)
9289 		ip = ip->bi_master;
9290 
9291 	rw_enter(&ip->bi_linkrw, RW_READER);
9292 	rw_ent = 1;
9293 	if (ip->bi_shdfd == NULL || (ip->bi_flags & DSW_SHDEXPORT) ==
9294 	    DSW_SHDEXPORT)
9295 		h->ii_rsrv &= ~(SHD|SHDR);
9296 	if ((rc = _ii_rsrv_devs(ip, h->ii_rsrv, II_EXTERNAL)) != 0) {
9297 		rw_exit(&ip->bi_linkrw);
9298 		rw_ent = 0;
9299 		h->ii_rsrv = NULL;
9300 		goto error;
9301 	}
9302 
9303 	if (flag & NSC_WRBUF) {
9304 		rc = _ii_shadow_write(bfd, pos, len);
9305 		if (!II_SUCCESS(rc))
9306 			goto error;
9307 	}
9308 
9309 	if (!(flag & NSC_RDAHEAD))
9310 		ioflag = flag & ~(NSC_RDBUF);
9311 	else
9312 		ioflag = flag;
9313 
9314 	if (bfd->ii_shd) {
9315 		/*
9316 		 * SHADOW
9317 		 */
9318 
9319 		if (ip->bi_flags & DSW_SHDEXPORT) {
9320 			rc = EIO;
9321 			goto error;
9322 		}
9323 		/*
9324 		 * The master device buffer has to be allocated first
9325 		 * so that deadlocks are avoided.
9326 		 */
9327 		DTRACE_PROBE(AllocBufFor_SHADOW);
9328 
9329 		if ((ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) == 0) {
9330 			rc = nsc_alloc_buf(MSTFD(ip), pos, len,
9331 			    (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2);
9332 			if (!II_SUCCESS(rc)) {
9333 				if (ii_debug > 2)
9334 					cmn_err(CE_WARN, "!ii: "
9335 					    "Join/write-S race detected\n");
9336 				if (h->ii_bufp2)
9337 					(void) nsc_free_buf(h->ii_bufp2);
9338 				h->ii_bufp2 = NULL;
9339 				/*
9340 				 * Carry on as this will not matter if
9341 				 * _ii_fill_buf is not called, or if
9342 				 * it is called but doesn't need to read this
9343 				 * volume.
9344 				 */
9345 				rc = 0;
9346 			}
9347 			fbuf2 = 1;
9348 		}
9349 
9350 		if (ip->bi_flags & DSW_SHDOFFLINE) {
9351 			rc = EIO;
9352 			goto error;
9353 		}
9354 		if ((ip->bi_flags)&DSW_TREEMAP) {
9355 			rc = nsc_alloc_abuf(pos, len, 0, &h->ii_abufp);
9356 			if (!II_SUCCESS(rc)) {
9357 				_ii_error(ip, DSW_SHDOFFLINE);
9358 				goto error;
9359 			}
9360 			abuf = 1;
9361 		} else {
9362 			II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), pos, len,
9363 			    ioflag, &h->ii_bufp);	/* do not read yet */
9364 			if (!II_SUCCESS(rc)) {
9365 				_ii_error(ip, DSW_SHDOFFLINE);
9366 				goto error;
9367 			}
9368 			fbuf = 1;
9369 		}
9370 	} else {
9371 		/*
9372 		 * MASTER
9373 		 */
9374 
9375 		/*
9376 		 * The master device buffer has to be allocated first
9377 		 * so that deadlocks are avoided.
9378 		 */
9379 
9380 		if (ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) {
9381 			rc = EIO;
9382 			goto error;
9383 		}
9384 
9385 		DTRACE_PROBE(AllocBufFor_MASTER);
9386 
9387 		II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, ioflag,
9388 		    &h->ii_bufp);		/* do not read yet */
9389 		if (!II_SUCCESS(rc)) {
9390 			_ii_error(ip, DSW_MSTOFFLINE);
9391 			goto error;
9392 		}
9393 		fbuf = 1;
9394 
9395 		/*
9396 		 * If shadow FD and (dependent set OR copying) and
9397 		 * not (compact dependent && shadow offline && shadow exported)
9398 		 */
9399 		if ((ip->bi_shdfd) &&
9400 		    ((ip->bi_flags & DSW_COPYINGP) ||
9401 		    (!(ip->bi_flags & DSW_GOLDEN))) &&
9402 		    (!(ip->bi_flags &
9403 		    (DSW_TREEMAP|DSW_SHDOFFLINE|DSW_SHDEXPORT)))) {
9404 			rc = nsc_alloc_buf(SHDFD(ip), pos, len,
9405 			    (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2);
9406 			if (!II_SUCCESS(rc)) {
9407 				if (ii_debug > 2)
9408 					cmn_err(CE_WARN, "!ii: "
9409 					    "Join/write-M race detected\n");
9410 				if (h->ii_bufp2)
9411 					(void) nsc_free_buf(h->ii_bufp2);
9412 				h->ii_bufp2 = NULL;
9413 				/*
9414 				 * Carry on as this will not matter if
9415 				 * _ii_fill_buf is not called, or if
9416 				 * it is called but doesn't need to read this
9417 				 * volume.
9418 				 */
9419 				rc = 0;
9420 			}
9421 			fbuf2 = 1;
9422 		}
9423 	}
9424 
9425 	if (flag & NSC_RDBUF)
9426 		rc = _ii_fill_buf(bfd, pos, len, flag,
9427 		    h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2);
9428 
9429 error:
9430 	if (II_SUCCESS(rc)) {
9431 		h->ii_bufh.sb_vec = h->ii_abufp ? h->ii_abufp->sb_vec :
9432 		    h->ii_bufp->sb_vec;
9433 		h->ii_bufh.sb_error = 0;
9434 		h->ii_bufh.sb_flag |= flag;
9435 		h->ii_bufh.sb_pos = pos;
9436 		h->ii_bufh.sb_len = len;
9437 	} else {
9438 		h->ii_bufh.sb_error = rc;
9439 		if (h->ii_bufp2 && fbuf2) {
9440 			(void) nsc_free_buf(h->ii_bufp2);
9441 			h->ii_bufp2 = NULL;
9442 		}
9443 		if (h->ii_bufp && fbuf) {
9444 			(void) nsc_free_buf(h->ii_bufp);
9445 			h->ii_bufp = NULL;
9446 		}
9447 		if (h->ii_abufp && abuf) {
9448 			(void) nsc_free_buf(h->ii_abufp);
9449 			h->ii_abufp = NULL;
9450 		}
9451 
9452 		if (h->ii_rsrv) {
9453 			/*
9454 			 * Release temporary reserve - reserved above.
9455 			 */
9456 			_ii_rlse_devs(ip, h->ii_rsrv);
9457 			h->ii_rsrv = NULL;
9458 		}
9459 		if (rw_ent)
9460 			rw_exit(&ip->bi_linkrw);
9461 	}
9462 
9463 	return (rc);
9464 }
9465 
9466 
9467 /*
9468  * _ii_free_buf
9469  */
9470 
9471 static int
9472 _ii_free_buf(ii_buf_t *h)
9473 {
9474 	ii_fd_t *bfd;
9475 	int rsrv;
9476 	int rc;
9477 
9478 	if (h->ii_abufp == NULL) {
9479 		rc = nsc_free_buf(h->ii_bufp);
9480 	} else {
9481 		rc = nsc_free_buf(h->ii_abufp);
9482 		h->ii_abufp = NULL;
9483 	}
9484 	if (!II_SUCCESS(rc))
9485 		return (rc);
9486 	if (h->ii_bufp2) {
9487 		rc = nsc_free_buf(h->ii_bufp2);
9488 		h->ii_bufp2 = NULL;
9489 		if (!II_SUCCESS(rc))
9490 			return (rc);
9491 	}
9492 
9493 	bfd = h->ii_fd;
9494 	rsrv = h->ii_rsrv;
9495 
9496 	if ((h->ii_bufh.sb_flag & NSC_HALLOCATED) == 0) {
9497 		rc = _ii_free_handle(h, h->ii_fd);
9498 		if (!II_SUCCESS(rc))
9499 			return (rc);
9500 	} else {
9501 		h->ii_bufh.sb_flag = NSC_HALLOCATED;
9502 		h->ii_bufh.sb_vec = NULL;
9503 		h->ii_bufh.sb_error = 0;
9504 		h->ii_bufh.sb_pos = 0;
9505 		h->ii_bufh.sb_len = 0;
9506 		h->ii_rsrv = NULL;
9507 	}
9508 
9509 	/*
9510 	 * Release temporary reserve - reserved in _ii_alloc_buf.
9511 	 */
9512 
9513 	if (rsrv)
9514 		_ii_rlse_devs(bfd->ii_info, rsrv);
9515 	rw_exit(&bfd->ii_info->bi_linkrw);
9516 
9517 	return (0);
9518 }
9519 
9520 
9521 /*
9522  * _ii_open
9523  *	Open a device
9524  *
9525  * Calling/Exit State:
9526  *	Returns a token to identify the shadow device.
9527  *
9528  * Description:
9529  *	Performs the housekeeping operations associated with an upper layer
9530  *	of the nsc stack opening a shadowed device.
9531  */
9532 
9533 /* ARGSUSED */
9534 
9535 static int
9536 _ii_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
9537 {
9538 	_ii_info_t *ip;
9539 	_ii_overflow_t *op;
9540 	ii_fd_t *bfd;
9541 	int is_mst = 0;
9542 	int is_shd = 0;
9543 	int raw = (flag & NSC_CACHE) == 0;
9544 
9545 	bfd = nsc_kmem_zalloc(sizeof (*bfd), KM_SLEEP, _ii_local_mem);
9546 	if (!bfd)
9547 		return (ENOMEM);
9548 
9549 	DTRACE_PROBE1(_ii_open_mutex,
9550 	    ii_fd_t *, bfd);
9551 
9552 	mutex_enter(&_ii_info_mutex);
9553 
9554 	for (ip = _ii_info_top; ip; ip = ip->bi_next) {
9555 		if (strcmp(path, ii_pathname(ip->bi_mstfd)) == 0) {
9556 			is_mst = 1;
9557 			break;
9558 		} else if (strcmp(path, ip->bi_keyname) == 0) {
9559 			is_shd = 1;
9560 			break;
9561 		} else if (strcmp(path, ii_pathname(ip->bi_bmpfd)) == 0)
9562 			break;
9563 	}
9564 
9565 	if (is_mst)
9566 		ip = ip->bi_master;
9567 
9568 	if (ip && ip->bi_disabled && !(ip->bi_state & DSW_MULTIMST)) {
9569 		DTRACE_PROBE(_ii_open_Disabled);
9570 		mutex_exit(&_ii_info_mutex);
9571 		return (EINTR);
9572 	}
9573 
9574 	if (!ip) {
9575 		/* maybe it's an overflow */
9576 		mutex_exit(&_ii_info_mutex);
9577 		mutex_enter(&_ii_overflow_mutex);
9578 		for (op = _ii_overflow_top; op; op = op->ii_next) {
9579 			if (strcmp(path, op->ii_volname) == 0)
9580 				break;
9581 		}
9582 		mutex_exit(&_ii_overflow_mutex);
9583 
9584 		if (!op) {
9585 			nsc_kmem_free(bfd, sizeof (*bfd));
9586 			DTRACE_PROBE(_ii_open_end_EINVAL);
9587 			return (EINVAL);
9588 		}
9589 		bfd->ii_ovr = 1;
9590 		bfd->ii_oflags = flag;
9591 		bfd->ii_optr = op;
9592 		*cdp = (blind_t)bfd;
9593 
9594 		DTRACE_PROBE(_ii_open_end_overflow);
9595 		return (0);
9596 	}
9597 	mutex_enter(&ip->bi_mutex);
9598 	ip->bi_ioctl++;
9599 	mutex_exit(&_ii_info_mutex);
9600 
9601 	if (is_mst) {
9602 		if (raw) {
9603 			ip->bi_mstr_iodev = NULL;	/* set in attach */
9604 			ip->bi_mstrref++;
9605 		} else {
9606 			ip->bi_mst_iodev = NULL;	/* set in attach */
9607 			ip->bi_mstref++;
9608 		}
9609 		ip->bi_master->bi_iifd = bfd;
9610 	} else if (is_shd) {
9611 		if (raw) {
9612 			ip->bi_shdr_iodev = NULL;	/* set in attach */
9613 			ip->bi_shdrref++;
9614 		} else {
9615 			ip->bi_shd_iodev = NULL;	/* set in attach */
9616 			ip->bi_shdref++;
9617 		}
9618 		bfd->ii_shd = 1;
9619 	} else {
9620 		ip->bi_bmpref++;
9621 		ip->bi_bmp_iodev = NULL;	/* set in attach */
9622 		bfd->ii_bmp = 1;
9623 	}
9624 
9625 	_ii_ioctl_done(ip);
9626 	mutex_exit(&ip->bi_mutex);
9627 
9628 	bfd->ii_info = ip;
9629 	bfd->ii_oflags = flag;
9630 
9631 	*cdp = (blind_t)bfd;
9632 
9633 	return (0);
9634 }
9635 
9636 static int
9637 _ii_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
9638 {
9639 	return (_ii_open(path, NSC_CACHE|flag, cdp, iodev));
9640 }
9641 
9642 static int
9643 _ii_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
9644 {
9645 	return (_ii_open(path, NSC_DEVICE|flag, cdp, iodev));
9646 }
9647 
9648 
9649 /*
9650  * _ii_close
9651  *	Close a device
9652  *
9653  * Calling/Exit State:
9654  *	Always succeeds - returns 0
9655  *
9656  * Description:
9657  *	Performs the housekeeping operations associated with an upper layer
9658  *	of the nsc stack closing a shadowed device.
9659  */
9660 
9661 static int
9662 _ii_close(bfd)
9663 ii_fd_t *bfd;
9664 {
9665 	_ii_info_t *ip = bfd->ii_info;
9666 	_ii_info_dev_t *dip;
9667 	int raw;
9668 
9669 	if (!ip) {
9670 		ASSERT(bfd->ii_ovr);
9671 		return (0);
9672 	}
9673 
9674 	raw = II_RAW(bfd);
9675 
9676 	mutex_enter(&ip->bi_mutex);
9677 
9678 	if (bfd->ii_shd && raw) {
9679 		dip = &ip->bi_shdrdev;
9680 	} else if (bfd->ii_shd) {
9681 		dip = &ip->bi_shddev;
9682 	} else if (bfd->ii_bmp) {
9683 		dip = &ip->bi_bmpdev;
9684 	} else if (raw) {
9685 		dip = ip->bi_mstrdev;
9686 	} else {
9687 		dip = ip->bi_mstdev;
9688 	}
9689 
9690 	if (dip) {
9691 		dip->bi_ref--;
9692 		if (dip->bi_ref == 0)
9693 			dip->bi_iodev = NULL;
9694 	}
9695 
9696 	if (ip->bi_state & DSW_CLOSING) {
9697 		if (total_ref(ip) == 0) {
9698 			cv_signal(&ip->bi_closingcv);
9699 		}
9700 	} else if ((ip->bi_flags & DSW_HANGING) &&
9701 	    (ip->bi_head->bi_state & DSW_CLOSING))
9702 		cv_signal(&ip->bi_head->bi_closingcv);
9703 
9704 	if (!(bfd->ii_shd || bfd->ii_bmp))	/* is master device */
9705 		ip->bi_master->bi_iifd = NULL;
9706 	mutex_exit(&ip->bi_mutex);
9707 
9708 	nsc_kmem_free(bfd, sizeof (*bfd));
9709 
9710 	return (0);
9711 }
9712 
9713 /*
9714  * _ii_alloc_handle
9715  *	Allocate a handle
9716  *
9717  */
9718 
9719 static nsc_buf_t *
9720 _ii_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), ii_fd_t *bfd)
9721 {
9722 	ii_buf_t *h;
9723 
9724 	if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
9725 		return (NULL);
9726 
9727 	h = kmem_alloc(sizeof (*h), KM_SLEEP);
9728 	if (!h)
9729 		return (NULL);
9730 
9731 	h->ii_abufp = NULL;
9732 	h->ii_bufp = nsc_alloc_handle(II_FD(bfd), d_cb, r_cb, w_cb);
9733 	if (!h->ii_bufp) {
9734 		kmem_free(h, sizeof (*h));
9735 		return (NULL);
9736 	}
9737 	h->ii_bufp2 = NULL;
9738 	h->ii_bufh.sb_flag = NSC_HALLOCATED;
9739 	h->ii_fd = bfd;
9740 	h->ii_rsrv = NULL;
9741 
9742 	return ((nsc_buf_t *)h);
9743 }
9744 
9745 
9746 /*
9747  * _ii_free_handle
9748  *	Free a handle
9749  *
9750  */
9751 
9752 static int	 /*ARGSUSED*/
9753 _ii_free_handle(ii_buf_t *h, ii_fd_t *bfd)
9754 {
9755 	int rc;
9756 
9757 	if (h->ii_abufp)
9758 		(void) nsc_free_buf(h->ii_abufp);
9759 	rc = nsc_free_handle(h->ii_bufp);
9760 	if (!II_SUCCESS(rc)) {
9761 		return (rc);
9762 	}
9763 
9764 	kmem_free(h, sizeof (ii_buf_t));
9765 
9766 	return (0);
9767 }
9768 
9769 
9770 /*
9771  * _ii_attach
9772  *	Attach
9773  *
9774  * Calling/Exit State:
9775  *	Returns 0 for success, errno on failure.
9776  *
9777  * Description:
9778  */
9779 
9780 static int
9781 _ii_attach(ii_fd_t *bfd, nsc_iodev_t *iodev)
9782 {
9783 	_ii_info_t *ip;
9784 	int dev;
9785 	int raw;
9786 	int rc;
9787 	_ii_info_dev_t *infop;
9788 
9789 	raw  = II_RAW(bfd);
9790 
9791 	DTRACE_PROBE2(_ii_attach_info,
9792 	    char *, bfd->ii_shd? "shadow" : "master",
9793 	    int, raw);
9794 
9795 	if (bfd->ii_ovr)
9796 		return (EINVAL);
9797 
9798 	ip = bfd->ii_info;
9799 	if (ip == NULL)
9800 		return (EINVAL);
9801 
9802 	mutex_enter(&ip->bi_mutex);
9803 	if (bfd->ii_bmp) {
9804 		infop = &ip->bi_bmpdev;
9805 	} else if (bfd->ii_shd) {
9806 		if (raw) {
9807 			infop = &ip->bi_shdrdev;
9808 		} else {
9809 			infop = &ip->bi_shddev;
9810 		}
9811 	} else if (!bfd->ii_ovr) {
9812 		if (raw) {
9813 			infop = ip->bi_mstrdev;
9814 		} else {
9815 			infop = ip->bi_mstdev;
9816 		}
9817 	}
9818 
9819 	if (iodev) {
9820 		infop->bi_iodev = iodev;
9821 		nsc_set_owner(infop->bi_fd, infop->bi_iodev);
9822 	}
9823 	mutex_exit(&ip->bi_mutex);
9824 
9825 	if (bfd->ii_bmp)
9826 		return (EINVAL);
9827 
9828 	if (raw)
9829 		dev = bfd->ii_shd ? SHDR : MSTR;
9830 	else
9831 		dev = bfd->ii_shd ? SHD : MST;
9832 
9833 	rc = _ii_rsrv_devs(ip, dev, II_EXTERNAL);
9834 
9835 	return (rc);
9836 }
9837 
9838 
9839 /*
9840  * _ii_detach
9841  *	Detach
9842  *
9843  * Calling/Exit State:
9844  *	Returns 0 for success, always succeeds
9845  *
9846  * Description:
9847  */
9848 
9849 static int
9850 _ii_detach(bfd)
9851 ii_fd_t *bfd;
9852 {
9853 	int dev;
9854 	int raw;
9855 
9856 	raw = II_RAW(bfd);
9857 
9858 	DTRACE_PROBE2(_ii_detach_info,
9859 	    char *, bfd->ii_shd? "shadow" : "master",
9860 	    int, raw);
9861 
9862 	if (bfd->ii_bmp)
9863 		return (0);
9864 
9865 	ASSERT(bfd->ii_info);
9866 	dev = bfd->ii_shd ? (raw ? SHDR : SHD) : (raw ? MSTR : MST);
9867 	_ii_rlse_devs(bfd->ii_info, dev);
9868 
9869 	return (0);
9870 }
9871 
9872 /*
9873  * _ii_get_pinned
9874  *
9875  */
9876 
9877 static int
9878 _ii_get_pinned(ii_fd_t *bfd)
9879 {
9880 	int rc;
9881 
9882 	if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
9883 		return (EIO);
9884 
9885 	rc = nsc_get_pinned(II_FD(bfd));
9886 
9887 	return (rc);
9888 }
9889 
9890 /*
9891  * _ii_discard_pinned
9892  *
9893  */
9894 
9895 static int
9896 _ii_discard_pinned(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len)
9897 {
9898 	int rc;
9899 
9900 	if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
9901 		return (EIO);
9902 	rc = nsc_discard_pinned(II_FD(bfd), pos, len);
9903 
9904 	return (rc);
9905 }
9906 
9907 /*
9908  * _ii_partsize
9909  *
9910  */
9911 
9912 static int
9913 _ii_partsize(ii_fd_t *bfd, nsc_size_t *ptr)
9914 {
9915 	/* Always return saved size */
9916 	*ptr = bfd->ii_info->bi_size;
9917 	return (0);
9918 }
9919 
9920 /*
9921  * _ii_maxfbas
9922  *
9923  */
9924 
9925 static int
9926 _ii_maxfbas(ii_fd_t *bfd, int flag, nsc_size_t *ptr)
9927 {
9928 	int rc;
9929 	int rs;
9930 	int dev;
9931 	_ii_info_t *ip;
9932 
9933 	ip = bfd->ii_info;
9934 	if (REMOTE_VOL(bfd->ii_shd, ip))
9935 		return (EIO);
9936 
9937 	dev =  ((ip->bi_flags)&DSW_SHDIMPORT) ? SHDR : MSTR;
9938 
9939 	DTRACE_PROBE1(_ii_maxfbas_info,
9940 	    char *, dev == SHDR? "shadow" : "master");
9941 
9942 	rs = _ii_rsrv_devs(ip, dev, II_INTERNAL);
9943 	rc = nsc_maxfbas((dev == MSTR) ? MSTFD(ip) : SHDFD(ip), flag, ptr);
9944 
9945 	if (rs == 0)
9946 		_ii_rlse_devs(ip, dev);
9947 
9948 	return (rc);
9949 }
9950 
9951 /*
9952  * ii_get_group_list
9953  */
9954 _ii_info_t **
9955 ii_get_group_list(char *group, int *count)
9956 {
9957 	int i;
9958 	int nip;
9959 	uint64_t   hash;
9960 	_ii_info_t **ipa;
9961 	_ii_lsthead_t *head;
9962 	_ii_lstinfo_t *np;
9963 
9964 	hash = nsc_strhash(group);
9965 
9966 	for (head = _ii_group_top; head; head = head->lst_next) {
9967 		if (hash == head->lst_hash && strncmp(head->lst_name,
9968 		    group, DSW_NAMELEN) == 0)
9969 			break;
9970 	}
9971 
9972 	if (!head) {
9973 		return (NULL);
9974 	}
9975 
9976 	/* Count entries */
9977 	for (nip = 0, np = head->lst_start; np; np = np->lst_next)
9978 		++nip;
9979 
9980 	ASSERT(nip > 0);
9981 
9982 	ipa = kmem_zalloc(sizeof (_ii_info_t *) * nip, KM_SLEEP);
9983 
9984 	np = head->lst_start;
9985 
9986 	for (i = 0; i < nip; i++) {
9987 		ASSERT(np != 0);
9988 
9989 		ipa[i] = np->lst_ip;
9990 		np = np->lst_next;
9991 	}
9992 
9993 	*count = nip;
9994 	return (ipa);
9995 }
9996 
9997 /*
9998  * _ii_pinned
9999  *
10000  */
10001 
10002 static void
10003 _ii_pinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
10004 {
10005 	DTRACE_PROBE3(_ii_pinned_start, nsc_iodev_t, dip->bi_iodev,
10006 	    nsc_off_t, pos, nsc_size_t, len);
10007 
10008 	nsc_pinned_data(dip->bi_iodev, pos, len);
10009 
10010 }
10011 
10012 /*
10013  * _ii_unpinned
10014  *
10015  */
10016 
10017 static void
10018 _ii_unpinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
10019 {
10020 	nsc_unpinned_data(dip->bi_iodev, pos, len);
10021 
10022 }
10023 
10024 
10025 /*
10026  * _ii_read
10027  */
10028 
10029 static int
10030 _ii_read(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10031 {
10032 	int rc;
10033 	void *sb_vec;
10034 	nsc_vec_t **src;
10035 
10036 	if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info))
10037 		rc = EIO;
10038 	else {
10039 		src =  h->ii_abufp? &h->ii_abufp->sb_vec : &h->ii_bufp->sb_vec;
10040 		sb_vec = *src;
10041 		*src = h->ii_bufh.sb_vec;
10042 		rc = _ii_fill_buf(h->ii_fd, pos, len, flag,
10043 		    h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2);
10044 		*src = sb_vec;
10045 	}
10046 	if (!II_SUCCESS(rc))
10047 		h->ii_bufh.sb_error = rc;
10048 
10049 	return (rc);
10050 }
10051 
10052 
10053 /*
10054  * _ii_write
10055  */
10056 
10057 static int
10058 _ii_write(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10059 {
10060 	int rc;
10061 	ii_fd_t *bfd = h->ii_fd;
10062 	_ii_info_t *ip = bfd->ii_info;
10063 	chunkid_t	chunk_num;
10064 	nsc_size_t	copy_len;
10065 	nsc_off_t	mapped_fba;
10066 	chunkid_t	mapped_chunk;
10067 	int	overflow;
10068 	nsc_buf_t *tmp;
10069 	void	*sb_vec;
10070 
10071 	if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info))
10072 		rc = EIO;
10073 	else if ((ip->bi_flags&DSW_TREEMAP) == 0 || !bfd->ii_shd) {
10074 		sb_vec = h->ii_bufp->sb_vec;
10075 		h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
10076 		if (bfd->ii_shd) {
10077 			II_NSC_WRITE(ip, shadow, rc, h->ii_bufp, pos, len,
10078 			    flag);
10079 		} else {
10080 			II_NSC_WRITE(ip, master, rc, h->ii_bufp, pos, len,
10081 			    flag);
10082 		}
10083 		h->ii_bufp->sb_vec = sb_vec;
10084 	} else {
10085 		/* write of mapped shadow buffer */
10086 		rc = 0;
10087 		chunk_num = pos / DSW_SIZE;
10088 		while (len > 0 && II_SUCCESS(rc)) {
10089 			/*
10090 			 * don't need to test bitmaps as allocating the
10091 			 * write buffer will c-o-write the chunk.
10092 			 */
10093 			mapped_chunk = ii_tsearch(ip, chunk_num);
10094 			if (mapped_chunk == II_NULLNODE) {
10095 				rc = EIO;
10096 				break;
10097 			}
10098 			overflow = II_ISOVERFLOW(mapped_chunk);
10099 			if (overflow)
10100 				mapped_chunk = II_2OVERFLOW(mapped_chunk);
10101 			mapped_fba = DSW_CHK2FBA(mapped_chunk) +
10102 			    (pos % DSW_SIZE);
10103 			copy_len = DSW_SIZE - (pos % DSW_SIZE);
10104 			if (copy_len > len)
10105 				copy_len = len;
10106 			tmp = NULL;
10107 			if (overflow) {
10108 				(void) nsc_reserve(OVRFD(ip), NSC_MULTI);
10109 				rc = nsc_alloc_buf(OVRFD(ip), mapped_fba,
10110 				    copy_len, NSC_WRBUF, &tmp);
10111 			} else
10112 				rc = nsc_alloc_buf(SHDFD(ip), mapped_fba,
10113 				    copy_len, NSC_WRBUF, &tmp);
10114 			sb_vec = h->ii_abufp->sb_vec;
10115 			h->ii_abufp->sb_vec = h->ii_bufh.sb_vec;
10116 			if (II_SUCCESS(rc)) {
10117 				rc = nsc_copy(h->ii_abufp, tmp, pos,
10118 				    mapped_fba, copy_len);
10119 			}
10120 			if (overflow) {
10121 				II_NSC_WRITE(ip, overflow, rc, tmp, mapped_fba,
10122 				    copy_len, flag);
10123 			} else {
10124 				II_NSC_WRITE(ip, shadow, rc, tmp, mapped_fba,
10125 				    copy_len, flag);
10126 			}
10127 			h->ii_abufp->sb_vec = sb_vec;
10128 			(void) nsc_free_buf(tmp);
10129 			if (overflow)
10130 				nsc_release(OVRFD(ip));
10131 			/* move on to next chunk */
10132 			pos += copy_len;
10133 			len -= copy_len;
10134 			chunk_num++;
10135 		}
10136 	}
10137 	if (!II_SUCCESS(rc))
10138 		h->ii_bufh.sb_error = rc;
10139 
10140 	return (rc);
10141 }
10142 
10143 
10144 /*
10145  * _ii_zero
10146  */
10147 
10148 static int
10149 _ii_zero(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10150 {
10151 	int rc;
10152 	void *sb_vec;
10153 
10154 	sb_vec = h->ii_bufp->sb_vec;
10155 	h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
10156 	rc = nsc_zero(h->ii_bufp, pos, len, flag);
10157 	h->ii_bufp->sb_vec = sb_vec;
10158 	if (!II_SUCCESS(rc))
10159 		h->ii_bufh.sb_error = rc;
10160 
10161 	return (rc);
10162 }
10163 
10164 
10165 /*
10166  * _ii_uncommit
10167  */
10168 
10169 static int
10170 _ii_uncommit(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10171 {
10172 	int rc;
10173 	void *sb_vec;
10174 
10175 	sb_vec = h->ii_bufp->sb_vec;
10176 	h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
10177 	rc = nsc_uncommit(h->ii_bufp, pos, len, flag);
10178 	h->ii_bufp->sb_vec = sb_vec;
10179 	if (!II_SUCCESS(rc))
10180 		h->ii_bufh.sb_error = rc;
10181 
10182 	return (rc);
10183 }
10184 
10185 
10186 /*
10187  * _ii_trksize
10188  */
10189 
10190 static int
10191 _ii_trksize(ii_fd_t *bfd, int trksize)
10192 {
10193 	int rc;
10194 
10195 	rc = nsc_set_trksize(II_FD(bfd), trksize);
10196 
10197 	return (rc);
10198 }
10199 
10200 /*
10201  * _ii_register_path
10202  */
10203 
10204 static nsc_path_t *
10205 _ii_register_path(char *path, int type, nsc_io_t *io)
10206 {
10207 	nsc_path_t *tok;
10208 
10209 	tok = nsc_register_path(path, type, io);
10210 
10211 	return (tok);
10212 }
10213 
10214 /*
10215  * _ii_unregister_path
10216  */
10217 /*ARGSUSED*/
10218 static int
10219 _ii_unregister_path(nsc_path_t *sp, int flag, char *type)
10220 {
10221 	int rc;
10222 
10223 	rc = nsc_unregister_path(sp, flag);
10224 
10225 	return (rc);
10226 }
10227 
10228 int
10229 _ii_ll_add(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char *name,
10230     char **key)
10231 {
10232 	_ii_lsthead_t **head;
10233 	_ii_lstinfo_t *node;
10234 	uint64_t hash;
10235 
10236 	ASSERT(key && !*key);
10237 	ASSERT(ip && mutex && lst && name);
10238 
10239 	node = kmem_zalloc(sizeof (_ii_lstinfo_t), KM_SLEEP);
10240 	if (!node) {
10241 		cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM");
10242 		DTRACE_PROBE(_ii_ll_add_end_ENOMEM);
10243 		return (ENOMEM);
10244 	}
10245 	node->lst_ip = ip;
10246 
10247 	/* find out where we should insert it */
10248 	hash = nsc_strhash(name);
10249 
10250 	mutex_enter(mutex);
10251 	for (head = lst; *head; head = &((*head)->lst_next)) {
10252 		if (((*head)->lst_hash == hash) &&
10253 		    strncmp(name, (*head)->lst_name, DSW_NAMELEN) == 0) {
10254 			node->lst_next = (*head)->lst_start;
10255 			(*head)->lst_start = node;
10256 			break;
10257 		}
10258 	}
10259 
10260 	if (!*head) {
10261 		/* create a new entry */
10262 		*head = kmem_zalloc(sizeof (_ii_lsthead_t), KM_SLEEP);
10263 		if (!*head) {
10264 			/* bother */
10265 			cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM");
10266 			kmem_free(node, sizeof (_ii_lstinfo_t));
10267 			DTRACE_PROBE(_ii_ll_add_end_2);
10268 			return (ENOMEM);
10269 		}
10270 		(*head)->lst_hash = hash;
10271 		(void) strncpy((*head)->lst_name, name, DSW_NAMELEN);
10272 		(*head)->lst_start = node;
10273 	}
10274 	mutex_exit(mutex);
10275 
10276 	*key = (*head)->lst_name;
10277 
10278 	return (0);
10279 }
10280 
10281 int
10282 _ii_ll_remove(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char **key)
10283 {
10284 	_ii_lsthead_t **head, *oldhead = 0;
10285 	_ii_lstinfo_t **node, *oldnode = 0;
10286 	uint64_t hash;
10287 	int found;
10288 
10289 	ASSERT(key && *key);
10290 	ASSERT(ip && lst);
10291 
10292 	hash = nsc_strhash(*key);
10293 
10294 	mutex_enter(mutex);
10295 	for (head = lst; *head; head = &((*head)->lst_next)) {
10296 		if (((*head)->lst_hash == hash) &&
10297 		    strncmp(*key, (*head)->lst_name, DSW_NAMELEN) == 0)
10298 			break;
10299 	}
10300 	if (!*head) {
10301 		/* no such link (!) */
10302 		mutex_exit(mutex);
10303 		return (0);
10304 	}
10305 
10306 	found = 0;
10307 	for (node = &(*head)->lst_start; *node; node = &((*node)->lst_next)) {
10308 		if (ip == (*node)->lst_ip) {
10309 			oldnode = *node;
10310 			*node = (*node)->lst_next;
10311 			kmem_free(oldnode, sizeof (_ii_lstinfo_t));
10312 			found = 1;
10313 			break;
10314 		}
10315 	}
10316 
10317 	ASSERT(found);
10318 
10319 	if (!found) {
10320 		mutex_exit(mutex);
10321 		return (0);
10322 	}
10323 
10324 	/* did we just delete the last set in this resource group? */
10325 	if (!(*head)->lst_start) {
10326 		oldhead = *head;
10327 		*head = (*head)->lst_next;
10328 		kmem_free(oldhead, sizeof (_ii_lsthead_t));
10329 	}
10330 	mutex_exit(mutex);
10331 
10332 	*key = NULL;
10333 
10334 	return (0);
10335 }
10336 
10337 static nsc_def_t _ii_fd_def[] = {
10338 	"Pinned",	(uintptr_t)_ii_pinned,		0,
10339 	"Unpinned",	(uintptr_t)_ii_unpinned,	0,
10340 	0,		0,				0
10341 };
10342 
10343 
10344 static nsc_def_t _ii_io_def[] = {
10345 	"Open",		(uintptr_t)_ii_openc,		0,
10346 	"Close",	(uintptr_t)_ii_close,		0,
10347 	"Attach",	(uintptr_t)_ii_attach,		0,
10348 	"Detach",	(uintptr_t)_ii_detach,		0,
10349 	"AllocHandle",	(uintptr_t)_ii_alloc_handle,	0,
10350 	"FreeHandle",	(uintptr_t)_ii_free_handle,	0,
10351 	"AllocBuf",	(uintptr_t)_ii_alloc_buf,	0,
10352 	"FreeBuf",	(uintptr_t)_ii_free_buf,	0,
10353 	"GetPinned",	(uintptr_t)_ii_get_pinned,	0,
10354 	"Discard",	(uintptr_t)_ii_discard_pinned,	0,
10355 	"PartSize",	(uintptr_t)_ii_partsize,	0,
10356 	"MaxFbas",	(uintptr_t)_ii_maxfbas,	0,
10357 	"Read",		(uintptr_t)_ii_read,		0,
10358 	"Write",	(uintptr_t)_ii_write,		0,
10359 	"Zero",		(uintptr_t)_ii_zero,		0,
10360 	"Uncommit",	(uintptr_t)_ii_uncommit,	0,
10361 	"TrackSize",	(uintptr_t)_ii_trksize,	0,
10362 	"Provide",	0,				0,
10363 	0,		0,				0
10364 };
10365 
10366 static nsc_def_t _ii_ior_def[] = {
10367 	"Open",		(uintptr_t)_ii_openr,		0,
10368 	"Close",	(uintptr_t)_ii_close,		0,
10369 	"Attach",	(uintptr_t)_ii_attach,		0,
10370 	"Detach",	(uintptr_t)_ii_detach,		0,
10371 	"AllocHandle",	(uintptr_t)_ii_alloc_handle,	0,
10372 	"FreeHandle",	(uintptr_t)_ii_free_handle,	0,
10373 	"AllocBuf",	(uintptr_t)_ii_alloc_buf,	0,
10374 	"FreeBuf",	(uintptr_t)_ii_free_buf,	0,
10375 	"GetPinned",	(uintptr_t)_ii_get_pinned,	0,
10376 	"Discard",	(uintptr_t)_ii_discard_pinned,	0,
10377 	"PartSize",	(uintptr_t)_ii_partsize,	0,
10378 	"MaxFbas",	(uintptr_t)_ii_maxfbas,	0,
10379 	"Read",		(uintptr_t)_ii_read,		0,
10380 	"Write",	(uintptr_t)_ii_write,		0,
10381 	"Zero",		(uintptr_t)_ii_zero,		0,
10382 	"Uncommit",	(uintptr_t)_ii_uncommit,	0,
10383 	"TrackSize",	(uintptr_t)_ii_trksize,	0,
10384 	"Provide",	0,				0,
10385 	0,		0,				0
10386 };
10387