xref: /titanic_41/usr/src/uts/common/io/lvm/mirror/mirror_ioctl.c (revision b97d6ca7333c353b6ca20c20c99fb1be8d32a8de)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2012 Milan Jurik. All rights reserved.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/conf.h>
31 #include <sys/file.h>
32 #include <sys/user.h>
33 #include <sys/uio.h>
34 #include <sys/t_lock.h>
35 #include <sys/buf.h>
36 #include <sys/dkio.h>
37 #include <sys/vtoc.h>
38 #include <sys/kmem.h>
39 #include <vm/page.h>
40 #include <sys/sysmacros.h>
41 #include <sys/types.h>
42 #include <sys/mkdev.h>
43 #include <sys/stat.h>
44 #include <sys/open.h>
45 #include <sys/modctl.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 
49 #include <sys/lvm/mdvar.h>
50 #include <sys/lvm/md_names.h>
51 #include <sys/lvm/md_mddb.h>
52 #include <sys/lvm/md_stripe.h>
53 #include <sys/lvm/md_mirror.h>
54 
55 #include <sys/model.h>
56 
57 #include <sys/sysevent/eventdefs.h>
58 #include <sys/sysevent/svm.h>
59 #include <sys/lvm/mdmn_commd.h>
60 
61 extern int		md_status;
62 extern kmutex_t		md_mx;
63 extern kcondvar_t	md_cv;
64 
65 extern unit_t		md_nunits;
66 extern set_t		md_nsets;
67 extern md_set_t		md_set[];
68 
69 extern md_ops_t		mirror_md_ops;
70 extern int		md_ioctl_cnt;
71 extern md_krwlock_t	md_unit_array_rw;
72 extern major_t		md_major;
73 extern mdq_anchor_t	md_ff_daemonq;
74 extern void		md_probe_one(probe_req_t *);
75 extern void		mirror_openfail_console_info(mm_unit_t *, int, int);
76 
77 #ifdef DEBUG
78 extern int		mirror_debug_flag;
79 #endif
80 
81 static void
mirror_resume_writes(mm_unit_t * un)82 mirror_resume_writes(mm_unit_t *un)
83 {
84 	/*
85 	 * Release the block on writes to the mirror and resume any blocked
86 	 * resync thread.
87 	 * This is only required for MN sets
88 	 */
89 	if (MD_MNSET_SETNO(MD_UN2SET(un))) {
90 #ifdef DEBUG
91 		if (mirror_debug_flag)
92 			printf("mirror_resume_writes: mnum %x\n", MD_SID(un));
93 #endif
94 		mutex_enter(&un->un_suspend_wr_mx);
95 		un->un_suspend_wr_flag = 0;
96 		cv_broadcast(&un->un_suspend_wr_cv);
97 		mutex_exit(&un->un_suspend_wr_mx);
98 		mutex_enter(&un->un_rs_thread_mx);
99 		un->un_rs_thread_flags &= ~MD_RI_BLOCK;
100 		cv_signal(&un->un_rs_thread_cv);
101 		mutex_exit(&un->un_rs_thread_mx);
102 	}
103 }
104 
105 mm_unit_t *
mirror_getun(minor_t mnum,md_error_t * mde,int flags,IOLOCK * lock)106 mirror_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
107 {
108 	mm_unit_t	*un;
109 	mdi_unit_t	*ui;
110 	set_t		setno = MD_MIN2SET(mnum);
111 
112 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
113 		(void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
114 		return (NULL);
115 	}
116 
117 	if (!(flags & STALE_OK)) {
118 		if (md_get_setstatus(setno) & MD_SET_STALE) {
119 			(void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
120 			return (NULL);
121 		}
122 	}
123 
124 	ui = MDI_UNIT(mnum);
125 	if (flags & NO_OLD) {
126 		if (ui != NULL) {
127 			(void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
128 			return (NULL);
129 		}
130 		return ((mm_unit_t *)1);
131 	}
132 
133 	if (ui == NULL) {
134 		(void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
135 		return (NULL);
136 	}
137 
138 	if (flags & ARRAY_WRITER)
139 		md_array_writer(lock);
140 	else if (flags & ARRAY_READER)
141 		md_array_reader(lock);
142 
143 	if (!(flags & NO_LOCK)) {
144 		if (flags & WR_LOCK)
145 			(void) md_ioctl_writerlock(lock, ui);
146 		else /* RD_LOCK */
147 			(void) md_ioctl_readerlock(lock, ui);
148 	}
149 	un = (mm_unit_t *)MD_UNIT(mnum);
150 
151 	if (un->c.un_type != MD_METAMIRROR) {
152 		(void) mdmderror(mde, MDE_NOT_MM, mnum);
153 		return (NULL);
154 	}
155 
156 	return (un);
157 }
158 
159 static int
mirror_set(void * d,int mode)160 mirror_set(
161 	void		*d,
162 	int		mode
163 )
164 {
165 	minor_t		mnum;
166 	mm_unit_t	*un;
167 	mddb_recid_t	recid;
168 	mddb_type_t	typ1;
169 	int		err;
170 	int		i;
171 	set_t		setno;
172 	md_set_params_t	*msp = d;
173 
174 
175 	mnum = msp->mnum;
176 
177 	mdclrerror(&msp->mde);
178 
179 	if (mirror_getun(mnum, &msp->mde, NO_OLD, NULL) == NULL)
180 		return (0);
181 
182 	setno = MD_MIN2SET(mnum);
183 
184 	typ1 = (mddb_type_t)md_getshared_key(setno,
185 	    mirror_md_ops.md_driver.md_drivername);
186 
187 	/*
188 	 * Create the db record for this mdstruct
189 	 * We don't store incore elements ondisk
190 	 */
191 
192 	if (msp->options & MD_CRO_64BIT) {
193 #if defined(_ILP32)
194 		return (mdmderror(&msp->mde, MDE_UNIT_TOO_LARGE, mnum));
195 #else
196 		recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC,
197 		    MD_CRO_64BIT | MD_CRO_MIRROR | MD_CRO_FN, setno);
198 #endif
199 	} else {
200 		/*
201 		 * It's important to use the correct size here
202 		 */
203 		msp->size = sizeof (mm_unit32_od_t);
204 		recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC,
205 		    MD_CRO_32BIT | MD_CRO_MIRROR | MD_CRO_FN, setno);
206 	}
207 	if (recid < 0)
208 		return (mddbstatus2error(&msp->mde, (int)recid,
209 		    mnum, setno));
210 
211 	/* Resize to include incore fields */
212 	un = (mm_unit_t *)mddb_getrecaddr_resize(recid, sizeof (*un), 0);
213 	/*
214 	 * It is okay that we muck with the mdstruct here,
215 	 * since no one else will know about the mdstruct
216 	 * until we commit it. If we crash, the record will
217 	 * be automatically purged, since we haven't
218 	 * committed it yet.
219 	 */
220 
221 	/* copy in the user's mdstruct */
222 	if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, un,
223 	    (uint_t)msp->size, mode)) {
224 		mddb_deleterec_wrapper(recid);
225 		return (EFAULT);
226 	}
227 	/* All 64 bit metadevices only support EFI labels. */
228 	if (msp->options & MD_CRO_64BIT) {
229 		un->c.un_flag |= MD_EFILABEL;
230 	}
231 
232 	un->c.un_revision |= MD_FN_META_DEV;
233 	MD_RECID(un)	= recid;
234 	MD_CAPAB(un)	= MD_CAN_PARENT | MD_CAN_META_CHILD | MD_CAN_SP;
235 	MD_PARENT(un)	= MD_NO_PARENT;
236 
237 	for (i = 0; i < NMIRROR; i++) {
238 		struct mm_submirror	*sm;
239 
240 		sm = &un->un_sm[i];
241 		if (!SMS_IS(sm, SMS_INUSE))
242 			continue;
243 
244 		/* ensure that the submirror is a metadevice */
245 		if (md_getmajor(sm->sm_dev) != md_major)
246 			return (mdmderror(&msp->mde, MDE_INVAL_UNIT,
247 			    md_getminor(sm->sm_dev)));
248 
249 		if (md_get_parent(sm->sm_dev) == MD_NO_PARENT)
250 			continue;
251 
252 		/* mirror creation should fail here */
253 		md_nblocks_set(mnum, -1ULL);
254 		MD_UNIT(mnum) = NULL;
255 
256 		mddb_deleterec_wrapper(recid);
257 		return (mdmderror(&msp->mde, MDE_IN_USE,
258 		    md_getminor(sm->sm_dev)));
259 	}
260 
261 	if (err = mirror_build_incore(un, 0)) {
262 		md_nblocks_set(mnum, -1ULL);
263 		MD_UNIT(mnum) = NULL;
264 
265 		mddb_deleterec_wrapper(recid);
266 		return (err);
267 	}
268 
269 	/*
270 	 * Update unit availability
271 	 */
272 	md_set[setno].s_un_avail--;
273 
274 	mirror_commit(un, ALL_SUBMIRRORS, 0);
275 	md_create_unit_incore(MD_SID(un), &mirror_md_ops, 0);
276 	mirror_check_failfast(mnum);
277 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, setno,
278 	    MD_SID(un));
279 
280 	resync_start_timeout(setno);
281 	return (0);
282 }
283 
284 static int
mirror_get(void * migp,int mode,IOLOCK * lock)285 mirror_get(
286 	void		*migp,
287 	int		mode,
288 	IOLOCK		*lock
289 )
290 {
291 	mm_unit_t	*un;
292 	md_i_get_t	*migph = migp;
293 
294 	mdclrerror(&migph->mde);
295 
296 	if ((un = mirror_getun(migph->id, &migph->mde, RD_LOCK, lock)) == NULL)
297 		return (0);
298 
299 	if (migph->size == 0) {
300 		migph->size = un->c.un_size;
301 		return (0);
302 	}
303 
304 	if (migph->size < un->c.un_size) {
305 		return (EFAULT);
306 	}
307 	if (ddi_copyout(un, (caddr_t)(uintptr_t)migph->mdp,
308 	    un->c.un_size, mode))
309 		return (EFAULT);
310 	return (0);
311 }
312 
313 static int
mirror_getdevs(void * mgdp,int mode,IOLOCK * lock)314 mirror_getdevs(
315 	void			*mgdp,
316 	int			mode,
317 	IOLOCK			*lock
318 )
319 {
320 	mm_unit_t		*un;
321 	md_dev64_t		*udevs;
322 	int			cnt;
323 	int			i;
324 	md_dev64_t		unit_dev;
325 	md_getdevs_params_t	*mgdph = mgdp;
326 
327 
328 	mdclrerror(&mgdph->mde);
329 
330 	if ((un = mirror_getun(mgdph->mnum,
331 	    &mgdph->mde, RD_LOCK, lock)) == NULL)
332 		return (0);
333 
334 	udevs = (md_dev64_t *)(uintptr_t)mgdph->devs;
335 
336 	for (cnt = 0, i = 0; i < NMIRROR; i++) {
337 		if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE))
338 			continue;
339 		if (cnt < mgdph->cnt) {
340 			unit_dev = un->un_sm[i].sm_dev;
341 			if (md_getmajor(unit_dev) != md_major) {
342 				unit_dev = md_xlate_mini_2_targ(unit_dev);
343 				if (unit_dev == NODEV64)
344 					return (ENODEV);
345 			}
346 
347 			if (ddi_copyout((caddr_t)&unit_dev, (caddr_t)udevs,
348 			    sizeof (*udevs), mode) != 0)
349 				return (EFAULT);
350 			++udevs;
351 		}
352 		++cnt;
353 	}
354 
355 	mgdph->cnt = cnt;
356 	return (0);
357 }
358 
359 static int
mirror_reset(md_i_reset_t * mirp)360 mirror_reset(
361 	md_i_reset_t	*mirp
362 )
363 {
364 	minor_t		mnum = mirp->mnum;
365 	mm_unit_t	*un;
366 	mdi_unit_t	*ui;
367 	set_t		setno = MD_MIN2SET(mnum);
368 
369 	mdclrerror(&mirp->mde);
370 
371 	if ((un = mirror_getun(mnum, &mirp->mde, NO_LOCK, NULL)) == NULL)
372 		return (0);
373 
374 	if (MD_HAS_PARENT(un->c.un_parent)) {
375 		return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
376 	}
377 
378 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
379 
380 	/* single thread */
381 	ui = MDI_UNIT(mnum);
382 	(void) md_unit_openclose_enter(ui);
383 
384 	if (md_unit_isopen(ui)) {
385 		md_unit_openclose_exit(ui);
386 		rw_exit(&md_unit_array_rw.lock);
387 		return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
388 	}
389 
390 	md_unit_openclose_exit(ui);
391 
392 	if (!mirp->force) {
393 		int	smi;
394 		for (smi = 0; smi < NMIRROR; smi++) {
395 			if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
396 				continue;
397 
398 			if (!SMS_BY_INDEX_IS(un, smi, SMS_RUNNING)) {
399 				rw_exit(&md_unit_array_rw.lock);
400 				return (mdmderror(&mirp->mde,
401 				    MDE_C_WITH_INVAL_SM, mnum));
402 			}
403 		}
404 	}
405 
406 	reset_mirror(un, mnum, 1);
407 
408 	/*
409 	 * Update unit availability
410 	 */
411 	md_set[setno].s_un_avail++;
412 
413 	/*
414 	 * If MN set, reset s_un_next so all nodes can have
415 	 * the same view of the next available slot when
416 	 * nodes are -w and -j
417 	 */
418 	if (MD_MNSET_SETNO(setno)) {
419 		(void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
420 	}
421 
422 	rw_exit(&md_unit_array_rw.lock);
423 	return (0);
424 }
425 
426 static int
mirror_get_geom(mm_unit_t * un,struct dk_geom * geomp)427 mirror_get_geom(
428 	mm_unit_t	*un,
429 	struct dk_geom	*geomp
430 )
431 {
432 	md_get_geom((md_unit_t *)un, geomp);
433 
434 	return (0);
435 }
436 
437 static int
mirror_get_vtoc(mm_unit_t * un,struct vtoc * vtocp)438 mirror_get_vtoc(
439 	mm_unit_t	*un,
440 	struct vtoc	*vtocp
441 )
442 {
443 	md_get_vtoc((md_unit_t *)un, vtocp);
444 
445 	return (0);
446 }
447 
448 static int
mirror_set_vtoc(mm_unit_t * un,struct vtoc * vtocp)449 mirror_set_vtoc(
450 	mm_unit_t	*un,
451 	struct vtoc	*vtocp
452 )
453 {
454 	return (md_set_vtoc((md_unit_t *)un, vtocp));
455 }
456 
457 static int
mirror_get_extvtoc(mm_unit_t * un,struct extvtoc * vtocp)458 mirror_get_extvtoc(
459 	mm_unit_t	*un,
460 	struct extvtoc	*vtocp
461 )
462 {
463 	md_get_extvtoc((md_unit_t *)un, vtocp);
464 
465 	return (0);
466 }
467 
468 static int
mirror_set_extvtoc(mm_unit_t * un,struct extvtoc * vtocp)469 mirror_set_extvtoc(
470 	mm_unit_t	*un,
471 	struct extvtoc	*vtocp
472 )
473 {
474 	return (md_set_extvtoc((md_unit_t *)un, vtocp));
475 }
476 
477 static int
mirror_get_cgapart(mm_unit_t * un,struct dk_map * dkmapp)478 mirror_get_cgapart(
479 	mm_unit_t	*un,
480 	struct dk_map	*dkmapp
481 )
482 {
483 	md_get_cgapart((md_unit_t *)un, dkmapp);
484 	return (0);
485 }
486 
487 static int
mirror_getcomp_by_dev(mm_unit_t * un,replace_params_t * params,int * smi,int * cip)488 mirror_getcomp_by_dev(mm_unit_t *un, replace_params_t *params,
489     int *smi, int *cip)
490 {
491 	mm_submirror_t		*sm;
492 	mm_submirror_ic_t	*smic;
493 	ms_comp_t		*comp;
494 	ms_unit_t		*mous;
495 	int			ci;
496 	int			i;
497 	int			compcnt;
498 	ms_cd_info_t		cd;
499 	void			(*get_dev)();
500 	md_dev64_t		dev = md_expldev(params->old_dev);
501 	md_error_t		*ep = &params->mde;
502 	minor_t			mnum = params->mnum;
503 	mdkey_t			devkey;
504 	int			nkeys;
505 	set_t			setno;
506 	side_t			side;
507 
508 	setno = MD_MIN2SET(MD_SID(un));
509 	side = mddb_getsidenum(setno);
510 
511 	if (md_getkeyfromdev(setno, side, dev, &devkey, &nkeys) != 0)
512 		return (mddeverror(ep, MDE_NAME_SPACE, dev));
513 
514 	for (i = 0; i < NMIRROR; i++) {
515 		sm = &un->un_sm[i];
516 		smic = &un->un_smic[i];
517 
518 		if (!SMS_IS(sm, SMS_INUSE))
519 			continue;
520 
521 		get_dev =
522 		    (void (*)())md_get_named_service(sm->sm_dev, 0,
523 		    "get device", 0);
524 		compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
525 
526 		/*
527 		 * For each of the underlying stripe components get
528 		 * the info.
529 		 */
530 		for (ci = 0; ci < compcnt; ci++) {
531 			(void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
532 			if ((cd.cd_dev == dev) || (cd.cd_orig_dev == dev)) {
533 				*cip = ci;
534 				*smi = i;
535 				return (1);
536 			}
537 		}
538 
539 		/*
540 		 * now we rescan looking only for NODEV. If we find
541 		 * NODEV then we will check the keys to see if its a match.
542 		 *
543 		 * If no key was found to match dev, then there is
544 		 * no way to compare keys - so continue.
545 		 */
546 		if (nkeys == 0) {
547 			continue;
548 		}
549 		mous = MD_UNIT(md_getminor(sm->sm_dev));
550 
551 		for (ci = 0; ci < compcnt; ci++) {
552 
553 			comp = (struct ms_comp *)
554 			    ((void *)&((char *)mous)[mous->un_ocomp]);
555 
556 			(void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
557 
558 			if (cd.cd_dev == NODEV64 || cd.cd_orig_dev == NODEV64) {
559 				comp += ci;
560 				if (comp->un_key == devkey) {
561 					if (nkeys > 1) {
562 						return (mddeverror(
563 						    ep, MDE_MULTNM, dev));
564 					}
565 					*cip = ci;
566 					*smi = i;
567 					return (1);
568 				}
569 			}
570 		}
571 	}
572 	return (mdcomperror(ep, MDE_CANT_FIND_COMP, mnum, dev));
573 }
574 
575 /*
576  * comp_replace:
577  * ----------------
578  * Called to implement the component replace function
579  *
580  * Owner is returned in the parameter block passed in by the caller.
581  *
582  * Returns:
583  *	0	success
584  *	error code if the functions fails
585  *
586  * For a MN set, on entry all writes to the mirror are suspended, on exit
587  * from this function, writes must be resumed when not a dryrun.
588  */
589 static int
comp_replace(replace_params_t * params,IOLOCK * lock)590 comp_replace(
591 	replace_params_t	*params,
592 	IOLOCK			*lock
593 )
594 {
595 	minor_t			mnum = params->mnum;
596 	set_t			setno;
597 	side_t			side;
598 	mm_unit_t		*un;
599 	mdi_unit_t		*ui;
600 	ms_unit_t		*ms_un;
601 	mdi_unit_t		*ms_ui;
602 	ms_comp_t		*comp;
603 	mm_submirror_t		*sm;
604 	md_dev64_t		smdev;
605 	mddb_recid_t		recids[6]; /* recids for stripe on SP */
606 	int			smi, ci;
607 	ms_new_dev_t		nd;
608 	int			(*repl_dev)();
609 	void			(*repl_done)();
610 	void			*repl_data;
611 	int			err = 0;
612 	ms_cd_info_t		cd;
613 	void			(*get_dev)();
614 
615 	mdclrerror(&params->mde);
616 
617 	if ((un = mirror_getun(mnum, &params->mde, WRITERS, lock)) == NULL) {
618 		return (0);
619 	}
620 
621 	ui = MDI_UNIT(mnum);
622 	if (ui->ui_tstate & MD_INACCESSIBLE) {
623 		(void) mdmderror(&params->mde, MDE_IN_UNAVAIL_STATE, mnum);
624 		goto errexit;
625 	}
626 
627 	/*
628 	 * replace cannot be done while a resync is active or we are
629 	 * still waiting for an optimized resync to be started
630 	 */
631 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
632 		(void) mdmderror(&params->mde, MDE_RESYNC_ACTIVE, mnum);
633 		goto errexit;
634 	}
635 
636 	if (mirror_getcomp_by_dev(un, params, &smi, &ci) == 0) {
637 		goto errexit;
638 	}
639 
640 	if (un->un_nsm == 1) {
641 		(void) mdmderror(&params->mde, MDE_LAST_SM_RE, mnum);
642 		goto errexit;
643 	}
644 
645 	if (mirror_other_sources(un, smi, ci, 0) != 0) {
646 		(void) mdcomperror(&params->mde, MDE_REPL_INVAL_STATE,
647 		    mnum, md_expldev(params->old_dev));
648 		goto errexit;
649 	}
650 
651 	sm = &un->un_sm[smi];
652 	if (sm->sm_state & (SMS_OFFLINE | SMS_OFFLINE_RESYNC)) {
653 		(void) mdmderror(&params->mde, MDE_ILLEGAL_SM_STATE, mnum);
654 		goto errexit;
655 	}
656 
657 	get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
658 	    "get device", 0);
659 	(void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
660 
661 	repl_dev = (int (*)())md_get_named_service(sm->sm_dev, 0,
662 	    "replace device", 0);
663 
664 	smdev = sm->sm_dev;
665 	ms_un = MD_UNIT(md_getminor(smdev));
666 
667 	if (params->cmd == ENABLE_COMP) {
668 		md_dev64_t	this_dev;
669 		int		numkeys;
670 		mdkey_t		this_key;
671 
672 		this_dev = ((cd.cd_orig_dev == 0) ? cd.cd_dev :
673 		    cd.cd_orig_dev);
674 		setno = MD_MIN2SET(md_getminor(smdev));
675 		side = mddb_getsidenum(setno);
676 		comp = (struct ms_comp *)
677 		    ((void *)&((char *)ms_un)[ms_un->un_ocomp]);
678 		comp += ci;
679 		/*
680 		 * We trust the dev_t because we cannot determine the
681 		 * dev_t from the device id since a new disk is in the
682 		 * same location. Since this is a call from metareplace -e dx
683 		 * AND it is SCSI a new dev_t is not generated.  So the
684 		 * dev_t from the mddb is used. Before enabling the device
685 		 * we check to make sure that multiple entries for the same
686 		 * device does not exist in the namespace. If they do we
687 		 * fail the ioctl.
688 		 * One of the many ways multiple entries in the name space
689 		 * can occur is if one removed the failed component in the
690 		 * stripe of a mirror and put another disk that was part of
691 		 * another metadevice. After reboot metadevadm would correctly
692 		 * update the device name for the metadevice whose component
693 		 * has moved. However now in the metadb there are two entries
694 		 * for the same name (ctds) that belong to different
695 		 * metadevices. One is valid, the other is a ghost or "last
696 		 * know as" ctds.
697 		 */
698 		this_dev =  md_getdevnum(setno, side,
699 		    comp->un_key, MD_TRUST_DEVT);
700 
701 		/*
702 		 * Verify that multiple keys for the same
703 		 * dev_t don't exist
704 		 */
705 
706 		if (md_getkeyfromdev(setno, side, this_dev,
707 		    &this_key, &numkeys) != 0) {
708 			(void) mddeverror(&params->mde, MDE_NAME_SPACE,
709 			    md_expldev(params->old_dev));
710 			goto errexit;
711 		}
712 		/*
713 		 * Namespace has multiple entries
714 		 * for the same devt
715 		 */
716 		if (numkeys > 1) {
717 			(void) mddeverror(&params->mde, MDE_MULTNM,
718 			    md_expldev(params->old_dev));
719 			goto errexit;
720 		}
721 		if ((numkeys == 0) || (comp->un_key != this_key)) {
722 			(void) mdcomperror(&params->mde, MDE_CANT_FIND_COMP,
723 			    mnum, this_dev);
724 			goto errexit;
725 		}
726 
727 		if ((md_getmajor(this_dev) != md_major) &&
728 		    (md_devid_found(setno, side, this_key) == 1)) {
729 			if (md_update_namespace_did(setno, side,
730 			    this_key, &params->mde) != 0) {
731 				(void) mddeverror(&params->mde, MDE_NAME_SPACE,
732 				    this_dev);
733 				goto errexit;
734 			}
735 		}
736 
737 		if (md_expldev(params->new_dev) != this_dev) {
738 			(void) mddeverror(&params->mde, MDE_FIX_INVAL_STATE,
739 			    md_expldev(params->new_dev));
740 			goto errexit;
741 		}
742 
743 		/* in case of dryrun, don't actually do anything */
744 		if ((params->options & MDIOCTL_DRYRUN) == 0) {
745 			err = (*repl_dev)(sm->sm_dev, 0, ci, NULL, recids, 6,
746 			    &repl_done, &repl_data);
747 		}
748 	} else if ((params->options & MDIOCTL_DRYRUN) == 0) {
749 		nd.nd_dev = md_expldev(params->new_dev);
750 		nd.nd_key = params->new_key;
751 		nd.nd_start_blk = params->start_blk;
752 		nd.nd_nblks = params->number_blks;
753 		nd.nd_labeled = params->has_label;
754 		nd.nd_hs_id = 0;
755 
756 		err = (*repl_dev)(sm->sm_dev, 0, ci, &nd, recids, 6,
757 		    &repl_done, &repl_data);
758 
759 	}
760 
761 	if (err != 0) {
762 		(void) mdcomperror(&params->mde, err, mnum,
763 		    md_expldev(params->new_dev));
764 		goto errexit;
765 	}
766 	/* In case of a dryun we're done. */
767 	if (params->options & MDIOCTL_DRYRUN) {
768 		mdclrerror(&params->mde);
769 		return (0);
770 	}
771 
772 	/* set_sm_comp_state() commits the modified records */
773 	set_sm_comp_state(un, smi, ci, CS_RESYNC, recids, MD_STATE_NO_XMIT,
774 	    lock);
775 
776 	(*repl_done)(sm->sm_dev, repl_data);
777 
778 	/*
779 	 * If the mirror is open then need to make sure that the submirror,
780 	 * on which the replace ran, is also open and if not then open it.
781 	 * This is only a concern for a single component sub-mirror stripe
782 	 * as it may not be open due to the failure of the single component.
783 	 *
784 	 * This check has to be done after the call to (*repl_done)
785 	 * as that function releases the writer lock on the submirror.
786 	 */
787 	if (md_unit_isopen(ui)) {
788 		minor_t ms_mnum = md_getminor(sm->sm_dev);
789 
790 		ms_ui = MDI_UNIT(ms_mnum);
791 
792 		if (!md_unit_isopen(ms_ui)) {
793 			/*
794 			 * Underlying submirror is not open so open it.
795 			 */
796 			if (md_layered_open(ms_mnum, &smdev, MD_OFLG_NULL)) {
797 				mirror_openfail_console_info(un, smi, ci);
798 				goto errexit;
799 			}
800 		}
801 	}
802 
803 	mirror_check_failfast(mnum);
804 
805 	if (params->cmd == ENABLE_COMP) {
806 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_METADEVICE,
807 		    MD_UN2SET(un), MD_SID(un));
808 	} else {
809 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE,
810 		    MD_UN2SET(un), MD_SID(un));
811 	}
812 
813 	md_ioctl_writerexit(lock);
814 	/*
815 	 * Reset any saved resync location flags as we've now replaced the
816 	 * component. This means we have to resync the _whole_ component.
817 	 */
818 	un->un_rs_resync_done = un->un_rs_resync_2_do = 0;
819 	un->un_rs_type = MD_RS_NONE;
820 	mirror_resume_writes(un);
821 	if (!MD_MNSET_SETNO(MD_UN2SET(un)))
822 		(void) mirror_resync_unit(mnum, NULL, &params->mde, lock);
823 	mdclrerror(&params->mde);
824 	return (0);
825 errexit:
826 	/* We need to resume writes unless this is a dryrun */
827 	if (!(params->options & MDIOCTL_DRYRUN))
828 		mirror_resume_writes(un);
829 	return (0);
830 }
831 
832 /*
833  * mirror_attach:
834  * ----------------
835  * Called to implement the submirror attach function
836  *
837  * Owner is returned in the parameter block passed in by the caller.
838  *
839  * Returns:
840  *	0	success
841  *	error code if the functions fails
842  *
843  * For a MN set, on entry all writes to the mirror are suspended, on exit
844  * from this function, writes must be resumed when not a dryrun.
845  */
846 static int
mirror_attach(md_att_struct_t * att,IOLOCK * lock)847 mirror_attach(
848 	md_att_struct_t	*att,
849 	IOLOCK		*lock
850 )
851 {
852 	minor_t			mnum = att->mnum;
853 	mm_unit_t		*un;
854 	md_unit_t		*su;
855 	mm_submirror_t		*sm;
856 	mm_submirror_ic_t	*smic;
857 	int			smi;
858 	md_dev64_t		sm_dev;
859 	minor_t			sm_mnum;
860 	mdkey_t			indx;
861 	set_t			setno;
862 	uint_t			options;
863 
864 	/*
865 	 * This routine should not be called during upgrade.
866 	 */
867 	if (MD_UPGRADE)  {
868 		return (0);
869 	}
870 
871 	mdclrerror(&att->mde);
872 	options = att->options;
873 
874 	if ((un = mirror_getun(mnum, &att->mde, WRITERS, lock)) == NULL) {
875 		return (0);
876 	}
877 
878 	setno = MD_UN2SET(un);
879 
880 	for (smi = 0; smi < NMIRROR; smi++)
881 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
882 			break;
883 
884 	if (smi == NMIRROR) {
885 		(void) mdmderror(&att->mde, MDE_MIRROR_FULL, mnum);
886 		goto errexit;
887 	}
888 
889 	sm = &un->un_sm[smi];
890 	smic = &un->un_smic[smi];
891 	sm_dev = att->submirror;
892 	sm_mnum = md_getminor(sm_dev);
893 
894 	if (md_get_parent(sm_dev) != MD_NO_PARENT) {
895 		(void) mdmderror(&att->mde, MDE_IN_USE, sm_mnum);
896 		goto errexit;
897 	}
898 
899 	if (md_unit_isopen(MDI_UNIT(sm_mnum))) {
900 		(void) mdmderror(&att->mde, MDE_IS_OPEN, sm_mnum);
901 		goto errexit;
902 	}
903 
904 	/* Check the size */
905 	su = (md_unit_t *)MD_UNIT(sm_mnum);
906 	if (un->c.un_total_blocks > su->c.un_total_blocks) {
907 		(void) mdmderror(&att->mde, MDE_SM_TOO_SMALL, sm_mnum);
908 		goto errexit;
909 	}
910 
911 	/* Don't attach labeled sm to unlabeled mirrors */
912 	if ((su->c.un_flag & MD_LABELED) && !(un->c.un_flag & MD_LABELED)) {
913 		(void) mdmderror(&att->mde, MDE_NO_LABELED_SM, sm_mnum);
914 		goto errexit;
915 	}
916 
917 	indx = md_setshared_name(setno,
918 	    ddi_major_to_name(md_getmajor(sm_dev)), 0L);
919 
920 	/* Open the sm, only if the mirror is open */
921 	if (md_unit_isopen(MDI_UNIT(mnum))) {
922 		if (md_layered_open(mnum, &sm_dev, MD_OFLG_NULL)) {
923 			(void) md_remshared_name(setno, indx);
924 			(void) mdmderror(&att->mde, MDE_SM_OPEN_ERR,
925 			    md_getminor(att->submirror));
926 			goto errexit;
927 		}
928 		/* in dryrun mode, don't leave the device open */
929 		if (options & MDIOCTL_DRYRUN) {
930 			md_layered_close(sm_dev, MD_OFLG_NULL);
931 		}
932 	}
933 
934 	/*
935 	 * After this point the checks are done and action is taken.
936 	 * So, clean up and return in case of dryrun.
937 	 */
938 
939 	if (options & MDIOCTL_DRYRUN) {
940 		md_ioctl_writerexit(lock);
941 		mdclrerror(&att->mde);
942 		return (0);
943 	}
944 
945 	sm->sm_key = att->key;
946 	sm->sm_dev = sm_dev;
947 	md_set_parent(sm_dev, MD_SID(un));
948 	mirror_set_sm_state(sm, smic, SMS_ATTACHED_RESYNC, 1);
949 	build_submirror(un, smi, 0);
950 	un->un_nsm++;
951 	mirror_commit(un, SMI2BIT(smi), 0);
952 	mirror_check_failfast(mnum);
953 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ATTACH, SVM_TAG_METADEVICE,
954 	    MD_UN2SET(un), MD_SID(un));
955 
956 	mirror_resume_writes(un);
957 	md_ioctl_writerexit(lock);
958 	if (!MD_MNSET_SETNO(setno))
959 		(void) mirror_resync_unit(mnum, NULL, &att->mde, lock);
960 	mdclrerror(&att->mde);
961 	return (0);
962 errexit:
963 	/* We need to resume writes unless this is a dryrun */
964 	if (!(options & MDIOCTL_DRYRUN))
965 		mirror_resume_writes(un);
966 	return (0);
967 }
968 
969 
970 void
reset_comp_states(mm_submirror_t * sm,mm_submirror_ic_t * smic)971 reset_comp_states(mm_submirror_t *sm, mm_submirror_ic_t *smic)
972 {
973 	int		compcnt;
974 	int		i;
975 	md_m_shared_t	*shared;
976 
977 	compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, sm);
978 	for (i = 0; i < compcnt; i++) {
979 		shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
980 		    (sm->sm_dev, sm, i);
981 
982 		shared->ms_state = CS_OKAY;
983 		shared->ms_flags &= ~MDM_S_NOWRITE;
984 		shared->ms_lasterrcnt = 0;
985 	}
986 }
987 
988 
989 /*
990  * mirror_detach:
991  * ----------------
992  * Called to implement the submirror detach function
993  *
994  * Owner is returned in the parameter block passed in by the caller.
995  *
996  * Returns:
997  *	0	success
998  *	error code if the functions fails
999  *
1000  * For a MN set, on entry all writes to the mirror are suspended, on exit
1001  * from this function, writes must be resumed.
1002  */
1003 static int
mirror_detach(md_detach_params_t * det,IOLOCK * lock)1004 mirror_detach(
1005 	md_detach_params_t	*det,
1006 	IOLOCK			*lock
1007 )
1008 {
1009 	minor_t			mnum = det->mnum;
1010 	mm_unit_t		*un;
1011 	mdi_unit_t		*ui;
1012 	mm_submirror_t		*sm;
1013 	mm_submirror_t		*old_sm;
1014 	mm_submirror_t		*new_sm;
1015 	mm_submirror_ic_t	*smic;
1016 	int			smi;
1017 	md_dev64_t		sm_dev;
1018 	md_unit_t		*su;
1019 	sv_dev_t		sv;
1020 	mddb_recid_t		recids[2];
1021 	int			nsv = 0;
1022 	int			smi_remove;
1023 	mm_submirror_ic_t	*old_smic;
1024 	mm_submirror_ic_t	*new_smic;
1025 
1026 	mdclrerror(&det->mde);
1027 
1028 	if ((un = mirror_getun(mnum, &det->mde, WRITERS, lock)) == NULL) {
1029 		return (0);
1030 	}
1031 
1032 	ui = MDI_UNIT(mnum);
1033 	if (ui->ui_tstate & MD_INACCESSIBLE) {
1034 		mirror_resume_writes(un);
1035 		return (mdmderror(&det->mde, MDE_IN_UNAVAIL_STATE, mnum));
1036 	}
1037 	/*
1038 	 * detach cannot be done while a resync is active or we are
1039 	 * still waiting for an optimized resync to be started
1040 	 */
1041 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1042 		mirror_resume_writes(un);
1043 		return (mdmderror(&det->mde, MDE_RESYNC_ACTIVE, mnum));
1044 	}
1045 
1046 	for (smi = 0; smi < NMIRROR; smi++) {
1047 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
1048 			continue;
1049 		}
1050 		if (un->un_sm[smi].sm_dev == det->submirror) {
1051 			smi_remove = smi;
1052 			break;
1053 		}
1054 	}
1055 
1056 	if (smi == NMIRROR) {
1057 		mirror_resume_writes(un);
1058 		return (mdmderror(&det->mde, MDE_CANT_FIND_SM, mnum));
1059 	}
1060 
1061 	if (un->un_nsm == 1) {
1062 		mirror_resume_writes(un);
1063 		return (mdmderror(&det->mde, MDE_LAST_SM, mnum));
1064 	}
1065 
1066 	if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) {
1067 		mirror_resume_writes(un);
1068 		return (mdmderror(&det->mde, MDE_NO_READABLE_SM, mnum));
1069 	}
1070 
1071 	sm = &un->un_sm[smi];
1072 	smic = &un->un_smic[smi];
1073 	sm_dev = sm->sm_dev;
1074 	su = (md_unit_t *)MD_UNIT(md_getminor(sm_dev));
1075 
1076 	/*
1077 	 * Need to pass in the extra record id,
1078 	 * cause mirror_commit() will not commit
1079 	 * a sm (from the smmask) if the slot is unused.
1080 	 * Which it is, since we are detaching.
1081 	 */
1082 	recids[0] = ((md_unit_t *)MD_UNIT(md_getminor(sm_dev)))->c.un_record_id;
1083 	recids[1] = 0;
1084 
1085 	mirror_set_sm_state(sm, smic, SMS_UNUSED, det->force_detach);
1086 	/*
1087 	 * If there are any erred components
1088 	 * then make the detach fail and do not unparent the
1089 	 * submirror.
1090 	 */
1091 	if (sm->sm_state == SMS_UNUSED) {
1092 		/* reallow soft partitioning of submirror */
1093 		MD_CAPAB(su) |= MD_CAN_SP;
1094 		md_reset_parent(sm_dev);
1095 		reset_comp_states(sm, smic);
1096 		un->un_nsm--;
1097 		/* Close the sm, only if the mirror is open */
1098 		if (md_unit_isopen(MDI_UNIT(mnum)))
1099 			md_layered_close(sm_dev, MD_OFLG_NULL);
1100 		sv.setno = MD_UN2SET(un);
1101 		sv.key = sm->sm_key;
1102 		nsv = 1;
1103 	} else
1104 		(void) mdmderror(&det->mde, MDE_SM_FAILED_COMPS, mnum);
1105 
1106 	/*
1107 	 * Perhaps the mirror changed it's size due to this detach.
1108 	 * (void) mirror_grow_unit(un, &mde);
1109 	 */
1110 
1111 	/*
1112 	 * NOTE: We are passing the detached sm recid
1113 	 * and not the smmask field. This is correct.
1114 	 */
1115 	mirror_commit(un, 0, recids);
1116 	md_rem_names(&sv, nsv);
1117 	if (sm->sm_state == SMS_UNUSED) {
1118 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, SVM_TAG_METADEVICE,
1119 		    MD_UN2SET(un), MD_SID(un));
1120 	}
1121 
1122 	/*
1123 	 * Reshuffle the submirror devices in the array as we potentially
1124 	 * have a dead record in the middle of it.
1125 	 */
1126 	for (smi = 0; nsv && (smi < NMIRROR); smi++) {
1127 		if (smi < smi_remove) {
1128 			continue;
1129 		}
1130 		if (smi > smi_remove) {
1131 			old_sm = &un->un_sm[smi];
1132 			new_sm = &un->un_sm[smi - 1];
1133 			new_sm->sm_key = old_sm->sm_key;
1134 			new_sm->sm_dev = old_sm->sm_dev;
1135 			new_sm->sm_state = old_sm->sm_state;
1136 			new_sm->sm_flags = old_sm->sm_flags;
1137 			new_sm->sm_shared = old_sm->sm_shared;
1138 			new_sm->sm_hsp_id = old_sm->sm_hsp_id;
1139 			new_sm->sm_timestamp = old_sm->sm_timestamp;
1140 			bzero(old_sm, sizeof (mm_submirror_t));
1141 			old_smic = &un->un_smic[smi];
1142 			new_smic = &un->un_smic[smi - 1];
1143 			bcopy(old_smic, new_smic, sizeof (mm_submirror_ic_t));
1144 			bzero(old_smic, sizeof (mm_submirror_ic_t));
1145 		}
1146 	}
1147 	mirror_commit(un, 0, NULL);
1148 	mirror_resume_writes(un);
1149 	return (0);
1150 }
1151 
1152 /*
1153  * mirror_offline:
1154  * ----------------
1155  * Called to implement the submirror offline function
1156  *
1157  * Owner is returned in the parameter block passed in by the caller.
1158  *
1159  * Returns:
1160  *	0	success
1161  *	error code if the functions fails
1162  *
1163  * For a MN set, on entry all writes to the mirror are suspended, on exit
1164  * from this function, writes must be resumed.
1165  */
1166 static int
mirror_offline(md_i_off_on_t * miop,IOLOCK * lock)1167 mirror_offline(
1168 	md_i_off_on_t	*miop,
1169 	IOLOCK		*lock
1170 )
1171 {
1172 	minor_t			mnum = miop->mnum;
1173 	mm_unit_t		*un;
1174 	mm_submirror_t		*sm;
1175 	mm_submirror_ic_t	*smic;
1176 	int			smi;
1177 	mdi_unit_t		*ui = MDI_UNIT(mnum);
1178 
1179 	mdclrerror(&miop->mde);
1180 
1181 	if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) {
1182 		return (0);
1183 	}
1184 
1185 	/*
1186 	 * offline cannot be done while a resync is active or we are
1187 	 * still waiting for an optimized resync to be started
1188 	 */
1189 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1190 		mirror_resume_writes(un);
1191 		return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum));
1192 	}
1193 
1194 	/*
1195 	 * Reject mirror_offline if ABR is set
1196 	 */
1197 	if ((ui->ui_tstate & MD_ABR_CAP) || un->un_abr_count) {
1198 		mirror_resume_writes(un);
1199 		return (mderror(&miop->mde, MDE_ABR_SET));
1200 	}
1201 
1202 	for (smi = 0; smi < NMIRROR; smi++) {
1203 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1204 			continue;
1205 		if (un->un_sm[smi].sm_dev == miop->submirror)
1206 			break;
1207 	}
1208 
1209 	if (smi == NMIRROR) {
1210 		mirror_resume_writes(un);
1211 		return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum));
1212 	}
1213 
1214 	sm = &un->un_sm[smi];
1215 	smic = &un->un_smic[smi];
1216 	if (!SMS_IS(sm, SMS_RUNNING) && !miop->force_offline) {
1217 		mirror_resume_writes(un);
1218 		return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum));
1219 	}
1220 
1221 	if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) {
1222 		mirror_resume_writes(un);
1223 		return (mdmderror(&miop->mde, MDE_NO_READABLE_SM, mnum));
1224 	}
1225 	mirror_set_sm_state(sm, smic, SMS_OFFLINE, 1);
1226 	mirror_resume_writes(un);
1227 
1228 	MD_STATUS(un) |= MD_UN_OFFLINE_SM;
1229 	mirror_commit(un, NO_SUBMIRRORS, 0);
1230 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OFFLINE, SVM_TAG_METADEVICE,
1231 	    MD_UN2SET(un), MD_SID(un));
1232 	return (0);
1233 }
1234 
1235 /*
1236  * mirror_online:
1237  * ----------------
1238  * Called to implement the submirror online function
1239  *
1240  * Owner is returned in the parameter block passed in by the caller.
1241  *
1242  * Returns:
1243  *	0	success
1244  *	error code if the functions fails
1245  *
1246  * For a MN set, on entry all writes to the mirror are suspended, on exit
1247  * from this function, writes must be resumed.
1248  */
1249 static int
mirror_online(md_i_off_on_t * miop,IOLOCK * lock)1250 mirror_online(
1251 	md_i_off_on_t	*miop,
1252 	IOLOCK		*lock
1253 )
1254 {
1255 	minor_t			mnum = miop->mnum;
1256 	mm_unit_t		*un;
1257 	mm_submirror_t		*sm;
1258 	mm_submirror_ic_t	*smic;
1259 	int			smi;
1260 	set_t			setno = MD_MIN2SET(mnum);
1261 
1262 	mdclrerror(&miop->mde);
1263 
1264 	if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) {
1265 		return (0);
1266 	}
1267 
1268 	for (smi = 0; smi < NMIRROR; smi++) {
1269 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1270 			continue;
1271 		if (un->un_sm[smi].sm_dev == miop->submirror)
1272 			break;
1273 	}
1274 	if (smi == NMIRROR) {
1275 		mirror_resume_writes(un);
1276 		return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum));
1277 	}
1278 
1279 	sm = &un->un_sm[smi];
1280 	smic = &un->un_smic[smi];
1281 	if (!SMS_IS(sm, SMS_OFFLINE)) {
1282 		mirror_resume_writes(un);
1283 		return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum));
1284 	}
1285 
1286 	/*
1287 	 * online cannot be done while a resync is active or we are
1288 	 * still waiting for an optimized resync to be started
1289 	 */
1290 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1291 		mirror_resume_writes(un);
1292 		return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum));
1293 	}
1294 
1295 	mirror_set_sm_state(sm, smic, SMS_OFFLINE_RESYNC, 1);
1296 	mirror_commit(un, NO_SUBMIRRORS, 0);
1297 	mirror_check_failfast(mnum);
1298 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ONLINE, SVM_TAG_METADEVICE,
1299 	    MD_UN2SET(un), MD_SID(un));
1300 
1301 
1302 	/* for MN sets, re-read the resync record from disk */
1303 	if (MD_MNSET_SETNO(MD_UN2SET(un)))
1304 		(void) mddb_reread_rr(setno, un->un_rr_dirty_recid);
1305 
1306 	bcopy((caddr_t)un->un_dirty_bm, (caddr_t)un->un_resync_bm,
1307 	    howmany(un->un_rrd_num, NBBY));
1308 	MD_STATUS(un) |= MD_UN_OPT_NOT_DONE;
1309 	sm->sm_flags |= MD_SM_RESYNC_TARGET;
1310 	mirror_resume_writes(un);
1311 	md_ioctl_writerexit(lock);
1312 	if (!MD_MNSET_SETNO(setno))
1313 		return (mirror_resync_unit(mnum, NULL, &miop->mde, lock));
1314 	else return (0);
1315 }
1316 
1317 int
mirror_grow_unit(mm_unit_t * un,md_error_t * ep)1318 mirror_grow_unit(
1319 	mm_unit_t		*un,
1320 	md_error_t		*ep
1321 )
1322 {
1323 	md_unit_t		*su;
1324 	mm_submirror_t		*sm;
1325 	int			smi;
1326 	diskaddr_t		total_blocks;
1327 	diskaddr_t		current_tb;
1328 	int			spc;		/* sectors per head */
1329 	minor_t			mnum = MD_SID(un);
1330 
1331 	/*
1332 	 * grow_unit cannot be done while a resync is active or we are
1333 	 * still waiting for an optimized resync to be started. Set
1334 	 * flag to indicate GROW_PENDING and once the resync is complete
1335 	 * the grow_unit function will be executed.
1336 	 */
1337 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1338 		MD_STATUS(un) |= MD_UN_GROW_PENDING;
1339 		mirror_commit(un, NO_SUBMIRRORS, 0);
1340 		return (mdmderror(ep, MDE_GROW_DELAYED, MD_SID(un)));
1341 	}
1342 
1343 	/*
1344 	 * Find the smallest submirror
1345 	 */
1346 	total_blocks = 0;
1347 	for (smi = 0; smi < NMIRROR; smi++) {
1348 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1349 			continue;
1350 		sm = &un->un_sm[smi];
1351 		/*
1352 		 * Growth is not possible if there is one or more
1353 		 * submirrors made up of non-Metadevices.
1354 		 */
1355 		if (md_getmajor(sm->sm_dev) != md_major)
1356 			return (0);
1357 
1358 		su = MD_UNIT(md_getminor(sm->sm_dev));
1359 		if ((total_blocks == 0) ||
1360 		    (su->c.un_total_blocks < total_blocks))
1361 			total_blocks = su->c.un_total_blocks;
1362 	}
1363 
1364 	/*
1365 	 * If the smallest submirror is not larger
1366 	 * than the mirror, we are all done.
1367 	 */
1368 	if (total_blocks <= un->c.un_total_blocks)
1369 		return (0);
1370 
1371 	/*
1372 	 * Growing the mirror now.
1373 	 * First: Round down the actual_tb to be a multiple
1374 	 * 	of nheads * nsects.
1375 	 */
1376 	spc = un->c.un_nhead * un->c.un_nsect;
1377 	current_tb = (total_blocks/spc) * spc;
1378 
1379 	un->c.un_total_blocks = current_tb;
1380 	md_nblocks_set(mnum, un->c.un_total_blocks);
1381 	un->c.un_actual_tb = total_blocks;
1382 
1383 	/* Is the mirror growing from 32 bit device to 64 bit device? */
1384 	if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
1385 	    (un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS)) {
1386 #if defined(_ILP32)
1387 		return (mdmderror(ep, MDE_UNIT_TOO_LARGE, mnum));
1388 #else
1389 		mddb_type_t	typ1;
1390 		mddb_recid_t	recid;
1391 		set_t		setno;
1392 		mddb_recid_t	old_recid = un->c.un_record_id;
1393 		mddb_recid_t	old_vtoc;
1394 		mddb_de_ic_t    *dep, *old_dep;
1395 		md_create_rec_option_t	options;
1396 
1397 		/* yup, new device size. So we need to replace the record */
1398 		typ1 = (mddb_type_t)md_getshared_key(MD_UN2SET(un),
1399 		    mirror_md_ops.md_driver.md_drivername);
1400 		setno = MD_MIN2SET(mnum);
1401 
1402 		/* Preserve the friendly name properties of growing unit */
1403 		options = MD_CRO_64BIT | MD_CRO_MIRROR;
1404 		if (un->c.un_revision & MD_FN_META_DEV)
1405 			options |= MD_CRO_FN;
1406 		recid = mddb_createrec(offsetof(mm_unit_t, un_smic), typ1,
1407 		    MIRROR_REC, options, setno);
1408 		/* Resize to include incore fields */
1409 		un->c.un_revision |= MD_64BIT_META_DEV;
1410 		/* All 64 bit metadevices only support EFI labels. */
1411 		un->c.un_flag |= MD_EFILABEL;
1412 		/*
1413 		 * If the device had a vtoc record attached to it, we remove
1414 		 * the vtoc record, because the layout has changed completely.
1415 		 */
1416 		old_vtoc = un->c.un_vtoc_id;
1417 		if (old_vtoc != 0) {
1418 			un->c.un_vtoc_id =
1419 			    md_vtoc_to_efi_record(old_vtoc, setno);
1420 		}
1421 		MD_RECID(un) = recid;
1422 		dep = mddb_getrecdep(recid);
1423 		old_dep = mddb_getrecdep(old_recid);
1424 		kmem_free(dep->de_rb_userdata, dep->de_reqsize);
1425 		dep->de_rb_userdata = old_dep->de_rb_userdata;
1426 		dep->de_reqsize = old_dep->de_reqsize;
1427 		dep->de_rb_userdata_ic = old_dep->de_rb_userdata_ic;
1428 		dep->de_icreqsize = old_dep->de_icreqsize;
1429 		mirror_commit(un, NO_SUBMIRRORS, 0);
1430 		old_dep->de_rb_userdata = NULL;
1431 		old_dep->de_rb_userdata_ic = NULL;
1432 		mddb_deleterec_wrapper(old_recid);
1433 		/*
1434 		 * If there was a vtoc record, it is no longer needed, because
1435 		 * a new efi record has been created for this un.
1436 		 */
1437 		if (old_vtoc != 0) {
1438 			mddb_deleterec_wrapper(old_vtoc);
1439 		}
1440 #endif
1441 	}
1442 
1443 	if ((current_tb/un->un_rrd_blksize) > MD_MAX_NUM_RR) {
1444 		if (mirror_resize_resync_regions(un, current_tb)) {
1445 			return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un)));
1446 		}
1447 		mirror_check_failfast(mnum);
1448 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1449 		    MD_UN2SET(un), MD_SID(un));
1450 		return (0);
1451 	}
1452 
1453 	if (mirror_add_resync_regions(un, current_tb)) {
1454 		return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un)));
1455 	}
1456 
1457 	mirror_check_failfast(mnum);
1458 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1459 	    MD_UN2SET(un), MD_SID(un));
1460 
1461 	return (0);
1462 }
1463 
1464 static int
mirror_grow(void * mgp,IOLOCK * lock)1465 mirror_grow(
1466 	void			*mgp,
1467 	IOLOCK			*lock
1468 )
1469 {
1470 	mm_unit_t		*un;
1471 	md_grow_params_t	*mgph = mgp;
1472 
1473 	mdclrerror(&mgph->mde);
1474 
1475 	if ((un = mirror_getun(mgph->mnum,
1476 	    &mgph->mde, WR_LOCK, lock)) == NULL)
1477 		return (0);
1478 
1479 	if (MD_STATUS(un) & MD_UN_GROW_PENDING)
1480 		return (0);
1481 
1482 	return (mirror_grow_unit(un, &mgph->mde));
1483 }
1484 
1485 static int
mirror_change(md_mirror_params_t * mmp,IOLOCK * lock)1486 mirror_change(
1487 	md_mirror_params_t	*mmp,
1488 	IOLOCK			*lock
1489 )
1490 {
1491 	mm_params_t		*pp = &mmp->params;
1492 	mm_unit_t		*un;
1493 
1494 	mdclrerror(&mmp->mde);
1495 
1496 	if ((un = mirror_getun(mmp->mnum, &mmp->mde, WR_LOCK, lock)) == NULL)
1497 		return (0);
1498 
1499 	if (pp->change_read_option)
1500 		un->un_read_option = pp->read_option;
1501 
1502 	if (pp->change_write_option)
1503 		un->un_write_option = pp->write_option;
1504 
1505 	if (pp->change_pass_num)
1506 		un->un_pass_num = pp->pass_num;
1507 
1508 	mirror_commit(un, NO_SUBMIRRORS, 0);
1509 
1510 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE,
1511 	    MD_UN2SET(un), MD_SID(un));
1512 	return (0);
1513 }
1514 
1515 static int
mirror_get_resync(md_resync_ioctl_t * ri)1516 mirror_get_resync(
1517 	md_resync_ioctl_t	*ri
1518 )
1519 {
1520 	minor_t			mnum = ri->ri_mnum;
1521 	mm_unit_t		*un;
1522 	u_longlong_t		percent;
1523 	uint_t			cnt;
1524 	uint_t			rr;
1525 	diskaddr_t		d;
1526 
1527 	mdclrerror(&ri->mde);
1528 
1529 	if ((un = mirror_getun(mnum, &ri->mde, STALE_OK|NO_LOCK, NULL)) == NULL)
1530 		return (0);
1531 
1532 	ri->ri_flags = 0;
1533 	if (md_get_setstatus(MD_MIN2SET(mnum)) & MD_SET_STALE) {
1534 		ri->ri_percent_done = 0;
1535 		ri->ri_percent_dirty = 0;
1536 		return (0);
1537 	}
1538 
1539 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE|MD_UN_RESYNC_CANCEL)) {
1540 		if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE)
1541 			ri->ri_flags |= MD_RI_INPROGRESS;
1542 		/* Return state of resync thread */
1543 		ri->ri_flags |= (un->un_rs_thread_flags & MD_RI_BLOCK);
1544 		d = un->un_rs_resync_2_do;
1545 		if (d) {
1546 			percent = un->un_rs_resync_done;
1547 			if (un->c.un_total_blocks >
1548 			    MD_MAX_BLKS_FOR_SMALL_DEVS) {
1549 				percent *= 1000;
1550 				percent /= d;
1551 				if (percent > 1000)
1552 					percent = 1000;
1553 			} else {
1554 				percent *= 100;
1555 				percent /= d;
1556 			}
1557 			ri->ri_percent_done = (int)percent;
1558 		} else {
1559 			ri->ri_percent_done = 0;
1560 		}
1561 	}
1562 	if (un->un_nsm < 2) {
1563 		ri->ri_percent_dirty = 0;
1564 		return (0);
1565 	}
1566 	cnt = 0;
1567 	for (rr = 0; rr < un->un_rrd_num; rr++)
1568 		if (IS_REGION_DIRTY(rr, un))
1569 			cnt++;
1570 	d = un->un_rrd_num;
1571 	if (d) {
1572 		percent = cnt;
1573 		percent *= 100;
1574 		percent += d - 1;		/* round up */
1575 		percent /= d;
1576 	} else
1577 		percent = 0;
1578 	ri->ri_percent_dirty = (int)percent;
1579 	return (0);
1580 }
1581 
1582 /*
1583  * mirror_get_owner:
1584  * ----------------
1585  * Called to obtain the current owner of a mirror.
1586  *
1587  * Owner is returned in the parameter block passed in by the caller.
1588  *
1589  * Returns:
1590  *	0	success
1591  *	EINVAL	metadevice does not exist or is not a member of a multi-owned
1592  *		set.
1593  */
1594 static int
mirror_get_owner(md_set_mmown_params_t * p,IOLOCK * lock)1595 mirror_get_owner(md_set_mmown_params_t *p, IOLOCK *lock)
1596 {
1597 	mm_unit_t	*un;
1598 	set_t		setno;
1599 
1600 	if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL)
1601 		return (EINVAL);
1602 
1603 	setno = MD_UN2SET(un);
1604 	if (!MD_MNSET_SETNO(setno)) {
1605 		return (EINVAL);
1606 	}
1607 	p->d.owner = un->un_mirror_owner;
1608 	return (0);
1609 }
1610 
1611 /*
1612  * mirror_choose_owner_thread:
1613  * --------------------------
1614  * Called to send a CHOOSE_OWNER message to the commd running on the master
1615  * node. This needs to run in a separate context so that mutex livelock is
1616  * avoided. This can occur because the original request is issued from a call
1617  * to metaioctl() which acquires the global ioctl lock, calls down into the
1618  * mirror_ioctl code and then attempts to mdmn_ksend_message() to the master
1619  * node. As the handler for the choose_owner message needs to send another
1620  * ioctl through the metaioctl() entry point, any other use (by rpc.metad or
1621  * mdcommd checking on set ownership) will deadlock the system leading to
1622  * cluster reconfiguration timeouts and eventually a node or (at worst) a
1623  * cluster-wide panic
1624  */
1625 static void
mirror_choose_owner_thread(md_mn_msg_chooseid_t * msg)1626 mirror_choose_owner_thread(md_mn_msg_chooseid_t	*msg)
1627 {
1628 	int		rval;
1629 	md_mn_kresult_t	*kres;
1630 	set_t		setno = MD_MIN2SET(msg->msg_chooseid_mnum);
1631 
1632 	kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
1633 	rval = mdmn_ksend_message(setno, MD_MN_MSG_CHOOSE_OWNER,
1634 	    MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)msg,
1635 	    sizeof (md_mn_msg_chooseid_t), kres);
1636 	if (!MDMN_KSEND_MSG_OK(rval, kres)) {
1637 		mdmn_ksend_show_error(rval, kres, "CHOOSE OWNER");
1638 		cmn_err(CE_WARN, "ksend_message failure: CHOOSE_OWNER");
1639 	}
1640 
1641 	kmem_free(kres, sizeof (md_mn_kresult_t));
1642 	kmem_free(msg, sizeof (md_mn_msg_chooseid_t));
1643 	thread_exit();
1644 }
1645 
1646 /*
1647  * mirror_owner_thread:
1648  * -------------------
1649  * Called to request an ownership change from a thread context. This issues
1650  * a mdmn_ksend_message() and then completes the appropriate ownership change
1651  * on successful completion of the message transport.
1652  * The originating application must poll for completion on the 'flags' member
1653  * of the MD_MN_MM_OWNER_STATUS ioctl() parameter block.
1654  * Success is marked by a return value of MD_MN_MM_RES_OK, Failure by
1655  * MD_MN_MM_RES_FAIL
1656  */
1657 static void
mirror_owner_thread(md_mn_req_owner_t * ownp)1658 mirror_owner_thread(md_mn_req_owner_t *ownp)
1659 {
1660 	int		rval;
1661 	set_t		setno = MD_MIN2SET(ownp->mnum);
1662 	mm_unit_t	*un = MD_UNIT(ownp->mnum);
1663 	md_mn_kresult_t	*kresult;
1664 	md_mps_t	*ps1;
1665 
1666 	un->un_mirror_owner_status = 0;
1667 
1668 	mutex_enter(&un->un_owner_mx);
1669 	un->un_owner_state |= MM_MN_OWNER_SENT;
1670 	mutex_exit(&un->un_owner_mx);
1671 
1672 	kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
1673 	rval = mdmn_ksend_message(setno, MD_MN_MSG_REQUIRE_OWNER,
1674 	    MD_MSGF_NO_LOG, 0, (char *)ownp, sizeof (md_mn_req_owner_t),
1675 	    kresult);
1676 
1677 	if (!MDMN_KSEND_MSG_OK(rval, kresult)) {
1678 		/*
1679 		 * Message transport layer failed. Return the failure code to
1680 		 * the application.
1681 		 */
1682 		mdmn_ksend_show_error(rval, kresult, "CHANGE OWNER");
1683 		mutex_enter(&un->un_owner_mx);
1684 		un->un_owner_state &= ~(MM_MN_BECOME_OWNER|MM_MN_OWNER_SENT);
1685 		mutex_exit(&un->un_owner_mx);
1686 		un->un_mirror_owner_status =
1687 		    MD_MN_MM_RESULT | MD_MN_MM_RES_FAIL;
1688 	} else {
1689 		/*
1690 		 * Ownership change succeeded. Update in-core version of
1691 		 * mirror owner.
1692 		 */
1693 		mutex_enter(&un->un_owner_mx);
1694 		if (un->un_owner_state & MM_MN_BECOME_OWNER) {
1695 			un->un_mirror_owner = md_mn_mynode_id;
1696 			/* Sets node owner of un_rr_dirty record */
1697 			if (un->un_rr_dirty_recid)
1698 				(void) mddb_setowner(un->un_rr_dirty_recid,
1699 				    md_mn_mynode_id);
1700 			/*
1701 			 * Release the block on the current resync region if it
1702 			 * is blocked
1703 			 */
1704 			ps1 = un->un_rs_prev_overlap;
1705 			if ((ps1 != NULL) &&
1706 			    (ps1->ps_flags & MD_MPS_ON_OVERLAP))
1707 				mirror_overlap_tree_remove(ps1);
1708 		}
1709 
1710 		un->un_owner_state &= ~(MM_MN_OWNER_SENT|MM_MN_BECOME_OWNER);
1711 		mutex_exit(&un->un_owner_mx);
1712 		un->un_mirror_owner_status =
1713 		    MD_MN_MM_RESULT | MD_MN_MM_RES_OK;
1714 
1715 		/* Restart the resync thread if it was previously blocked */
1716 		if (un->un_rs_thread_flags & MD_RI_BLOCK_OWNER) {
1717 			mutex_enter(&un->un_rs_thread_mx);
1718 			un->un_rs_thread_flags &= ~MD_RI_BLOCK_OWNER;
1719 			cv_signal(&un->un_rs_thread_cv);
1720 			mutex_exit(&un->un_rs_thread_mx);
1721 		}
1722 	}
1723 	kmem_free(kresult, sizeof (md_mn_kresult_t));
1724 	kmem_free(ownp, sizeof (md_mn_req_owner_t));
1725 	thread_exit();
1726 }
1727 
1728 /*
1729  * mirror_set_owner:
1730  * ----------------
1731  * Called to change the owner of a mirror to the specified node. If we
1732  * are not the owner of the mirror, we do nothing apart from update the in-core
1733  * ownership. It can also be used to choose a new owner for the resync of a
1734  * mirror, this case is specified by the flag MD_MN_MM_CHOOSE_OWNER, see below.
1735  *
1736  * The p->d.flags bitfield controls how subsequent ownership changes will be
1737  * handled:
1738  *	MD_MN_MM_SPAWN_THREAD
1739  *		a separate thread is created which emulates the behaviour of
1740  *		become_owner() [mirror.c]. This is needed when changing the
1741  *		ownership from user context as there needs to be a controlling
1742  *		kernel thread which updates the owner info on the originating
1743  *		node. Successful completion of the mdmn_ksend_message() means
1744  *		that the owner field can be changed.
1745  *
1746  *	MD_MN_MM_PREVENT_CHANGE
1747  *		Disallow any change of ownership once this ownership change has
1748  *		been processed. The only way of changing the owner away from
1749  *		the p->d.owner node specified in the call is to issue a request
1750  *		with MD_MN_MM_ALLOW_CHANGE set in the flags. Any request to
1751  *		become owner from a different node while the PREVENT_CHANGE
1752  *		is in operation will result in an EAGAIN return value.
1753  *		un->un_owner_state has MM_MN_PREVENT_CHANGE set.
1754  *
1755  *	MD_MN_MM_ALLOW_CHANGE
1756  *		Allow the owner to be changed by a subsequent request.
1757  *		un->un_owner_state has MM_MN_PREVENT_CHANGE cleared.
1758  *
1759  *	MD_MN_MM_CHOOSE_OWNER
1760  *		Choose a new owner for a mirror resync. In this case, the new
1761  *		owner argument is not used. The selection of a new owner
1762  *		is a round robin allocation using a resync owner count. This
1763  *		ioctl passes this value in a message to the master node
1764  *		which uses it to select a node from the node list and then
1765  *		sends it a message to become the owner.
1766  *
1767  * If we are the current owner, we must stop further i/o from being scheduled
1768  * and wait for any pending i/o to drain. We wait for any in-progress resync
1769  * bitmap updates to complete and we can then set the owner. If an update to
1770  * the resync bitmap is attempted after this we simply don't write this out to
1771  * disk until the ownership is restored.
1772  *
1773  * If we are the node that wants to become the owner we update the in-core
1774  * owner and return. The i/o that initiated the ownership change will complete
1775  * on successful return from this ioctl.
1776  *
1777  * Return Value:
1778  *	0		Success
1779  * 	EINVAL		Invalid unit referenced
1780  *	EAGAIN		Ownership couldn't be transferred away or change of
1781  *			ownership is prevented. Caller should retry later on.
1782  */
1783 static int
mirror_set_owner(md_set_mmown_params_t * p,IOLOCK * lock)1784 mirror_set_owner(md_set_mmown_params_t *p, IOLOCK *lock)
1785 {
1786 	mdi_unit_t	*ui;
1787 	mm_unit_t	*un;
1788 	set_t		setno;
1789 
1790 	if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL)
1791 		return (EINVAL);
1792 	ui = MDI_UNIT(p->d.mnum);
1793 	setno = MD_MIN2SET(p->d.mnum);
1794 	if (!MD_MNSET_SETNO(setno)) {
1795 		return (EINVAL);
1796 	}
1797 
1798 	/*
1799 	 * If we are choosing a new resync owner, send a message to the master
1800 	 * to make the choice.
1801 	 */
1802 	if (p->d.flags & MD_MN_MM_CHOOSE_OWNER) {
1803 		/* Release ioctl lock before we call ksend_message() */
1804 		md_ioctl_readerexit(lock);
1805 		/* If we're resetting the owner pass the node id in */
1806 		if (p->d.owner != MD_MN_MIRROR_UNOWNED) {
1807 			return (mirror_choose_owner(un, &p->d));
1808 		} else {
1809 			return (mirror_choose_owner(un, NULL));
1810 		}
1811 	}
1812 
1813 	/*
1814 	 * Check for whether we have to spawn a thread to issue this request.
1815 	 * If set we issue a mdmn_ksend_message() to cause the appropriate
1816 	 * ownership change. On completion of this request the calling
1817 	 * application _must_ poll the structure 'flags' field to determine the
1818 	 * result of the request. All this is necessary until we have true
1819 	 * multi-entrant ioctl support.
1820 	 * If we are just clearing the owner, then MD_MN_MM_SPAWN_THREAD can
1821 	 * be ignored.
1822 	 */
1823 	if ((p->d.flags & MD_MN_MM_SPAWN_THREAD) && (p->d.owner != 0)) {
1824 		md_mn_req_owner_t	*ownp;
1825 		ownp = kmem_zalloc(sizeof (md_mn_req_owner_t), KM_SLEEP);
1826 		p->d.flags &= ~MD_MN_MM_SPAWN_THREAD;
1827 		bcopy(&p->d, ownp, sizeof (md_mn_req_owner_t));
1828 		if (thread_create(NULL, 0, mirror_owner_thread, (caddr_t)ownp,
1829 		    0, &p0, TS_RUN, 60) == NULL) {
1830 			kmem_free(ownp, sizeof (md_mn_req_owner_t));
1831 			return (EFAULT);
1832 		} else {
1833 			return (0);
1834 		}
1835 	}
1836 
1837 	/*
1838 	 * If setting owner to NULL, this is being done because the owner has
1839 	 * died and therefore we set OPT_NOT_DONE to ensure that the
1840 	 * mirror is marked as "Needs Maintenance" and that an optimized
1841 	 * resync will be done when we resync the mirror, Also clear the
1842 	 * PREVENT_CHANGE flag and remove the last resync region from the
1843 	 * overlap tree.
1844 	 */
1845 	if (p->d.owner == 0) {
1846 		md_mps_t	*ps;
1847 		int		i;
1848 
1849 		md_ioctl_readerexit(lock);
1850 		un = md_ioctl_writerlock(lock, ui);
1851 		/*
1852 		 * If the ABR capability is not set and the pass_num is non-zero
1853 		 * there is need to perform an optimized resync
1854 		 * Therefore set OPT_NOT_DONE, setup the resync_bm and set
1855 		 * the submirrors as resync targets.
1856 		 */
1857 		if (!(ui->ui_tstate & MD_ABR_CAP) && un->un_pass_num) {
1858 			MD_STATUS(un) |= MD_UN_OPT_NOT_DONE;
1859 
1860 			(void) mddb_reread_rr(setno, un->un_rr_dirty_recid);
1861 			bcopy((caddr_t)un->un_dirty_bm,
1862 			    (caddr_t)un->un_resync_bm,
1863 			    howmany(un->un_rrd_num, NBBY));
1864 			for (i = 0; i < NMIRROR; i++) {
1865 				if ((SUBMIRROR_IS_READABLE(un, i)) ||
1866 				    SMS_BY_INDEX_IS(un, i,
1867 				    SMS_OFFLINE_RESYNC))
1868 					un->un_sm[i].sm_flags |=
1869 					    MD_SM_RESYNC_TARGET;
1870 			}
1871 		}
1872 		mutex_enter(&un->un_owner_mx);
1873 		un->un_owner_state &= ~MD_MN_MM_PREVENT_CHANGE;
1874 		mutex_exit(&un->un_owner_mx);
1875 		ps = un->un_rs_prev_overlap;
1876 		if ((ps != NULL) && (ps->ps_flags & MD_MPS_ON_OVERLAP)) {
1877 			mirror_overlap_tree_remove(ps);
1878 			ps->ps_firstblk = 0;
1879 			ps->ps_lastblk = 0;
1880 		}
1881 		md_ioctl_writerexit(lock);
1882 		un = md_ioctl_readerlock(lock, ui);
1883 	}
1884 
1885 	mutex_enter(&un->un_owner_mx);
1886 	if (!(un->un_owner_state & MM_MN_BECOME_OWNER)) {
1887 		/*
1888 		 * If we are not trying to become owner ourselves check
1889 		 * to see if we have to change the owner
1890 		 */
1891 		if (un->un_mirror_owner == p->d.owner) {
1892 			/*
1893 			 * No need to change owner,
1894 			 * Clear/set PREVENT_CHANGE bit
1895 			 */
1896 			if (p->d.flags & MD_MN_MM_PREVENT_CHANGE) {
1897 				un->un_owner_state |= MM_MN_PREVENT_CHANGE;
1898 			} else if (p->d.flags & MD_MN_MM_ALLOW_CHANGE) {
1899 				un->un_owner_state &= ~MM_MN_PREVENT_CHANGE;
1900 			}
1901 			mutex_exit(&un->un_owner_mx);
1902 			return (0);
1903 		}
1904 	}
1905 
1906 	/*
1907 	 * Disallow ownership change if previously requested to. This can only
1908 	 * be reset by issuing a request with MD_MN_MM_ALLOW_CHANGE set in the
1909 	 * flags field.
1910 	 */
1911 	if ((un->un_owner_state & MM_MN_PREVENT_CHANGE) &&
1912 	    !(p->d.flags & MD_MN_MM_ALLOW_CHANGE)) {
1913 		mutex_exit(&un->un_owner_mx);
1914 #ifdef DEBUG
1915 		cmn_err(CE_WARN, "mirror_ioctl: Node %x attempted to become "
1916 		    "owner while node %x has exclusive access to %s",
1917 		    p->d.owner, un->un_mirror_owner, md_shortname(MD_SID(un)));
1918 #endif
1919 		return (EAGAIN);
1920 	}
1921 	if (p->d.owner == md_mn_mynode_id) {
1922 		/*
1923 		 * I'm becoming the mirror owner. Flag this so that the
1924 		 * message sender can change the in-core owner when all
1925 		 * nodes have processed this message
1926 		 */
1927 		un->un_owner_state &= ~MM_MN_OWNER_SENT;
1928 		un->un_owner_state |= MM_MN_BECOME_OWNER;
1929 		un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1930 		    MM_MN_PREVENT_CHANGE : 0;
1931 		un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1932 		    ~MM_MN_PREVENT_CHANGE : ~0;
1933 
1934 		mutex_exit(&un->un_owner_mx);
1935 	} else if ((un->un_mirror_owner == md_mn_mynode_id) ||
1936 	    un->un_owner_state & MM_MN_BECOME_OWNER) {
1937 		mutex_exit(&un->un_owner_mx);
1938 
1939 		/*
1940 		 * I'm releasing ownership. Block and drain i/o. This also
1941 		 * blocks until any in-progress resync record update completes.
1942 		 */
1943 		md_ioctl_readerexit(lock);
1944 		un = md_ioctl_writerlock(lock, ui);
1945 		/* Block the resync thread */
1946 		mutex_enter(&un->un_rs_thread_mx);
1947 		un->un_rs_thread_flags |= MD_RI_BLOCK_OWNER;
1948 		mutex_exit(&un->un_rs_thread_mx);
1949 		mutex_enter(&un->un_owner_mx);
1950 		un->un_mirror_owner = p->d.owner;
1951 
1952 		/* Sets node owner of un_rr_dirty record */
1953 		if (un->un_rr_dirty_recid)
1954 			(void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner);
1955 		un->un_owner_state &= ~MM_MN_BECOME_OWNER;
1956 		un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1957 		    MM_MN_PREVENT_CHANGE : 0;
1958 		un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1959 		    ~MM_MN_PREVENT_CHANGE : ~0;
1960 		mutex_exit(&un->un_owner_mx);
1961 		/*
1962 		 * Allow further i/o to occur. Any write() from another node
1963 		 * will now cause another ownership change to occur.
1964 		 */
1965 		md_ioctl_writerexit(lock);
1966 	} else {
1967 		/* Update the in-core mirror owner */
1968 		un->un_mirror_owner = p->d.owner;
1969 		/* Sets node owner of un_rr_dirty record */
1970 		if (un->un_rr_dirty_recid)
1971 			(void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner);
1972 		un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1973 		    MM_MN_PREVENT_CHANGE : 0;
1974 		un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1975 		    ~MM_MN_PREVENT_CHANGE : ~0;
1976 		mutex_exit(&un->un_owner_mx);
1977 	}
1978 	return (0);
1979 }
1980 /*
1981  * mirror_allocate_hotspare:
1982  * ------------------------
1983  * Called to allocate a hotspare for a failed component. This function is
1984  * called by the MD_MN_ALLOCATE_HOTSPARE ioctl.
1985  */
1986 static int
mirror_allocate_hotspare(md_alloc_hotsp_params_t * p,IOLOCK * lockp)1987 mirror_allocate_hotspare(md_alloc_hotsp_params_t *p, IOLOCK *lockp)
1988 {
1989 	set_t		setno;
1990 	mm_unit_t	*un;
1991 
1992 #ifdef DEBUG
1993 	if (mirror_debug_flag)
1994 		printf("mirror_allocate_hotspare: mnum,sm,comp = %x, %x, %x\n",
1995 		    p->mnum, p->sm, p->comp);
1996 #endif
1997 
1998 	if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL)
1999 		return (EINVAL);
2000 
2001 	/* This function is only valid for a multi-node set */
2002 	setno = MD_MIN2SET(p->mnum);
2003 	if (!MD_MNSET_SETNO(setno)) {
2004 		return (EINVAL);
2005 	}
2006 	(void) check_comp_4_hotspares(un, p->sm, p->comp, MD_HOTSPARE_NO_XMIT,
2007 	    p->hs_id, lockp);
2008 	md_ioctl_writerexit(lockp);
2009 	return (0);
2010 }
2011 
2012 /*
2013  * mirror_get_owner_status:
2014  * -----------------------
2015  * Return the status of a previously issued ioctl to change ownership. This is
2016  * required for soft-partition support as the request to change mirror owner
2017  * needs to be run from a separate daemon thread.
2018  *
2019  * Returns:
2020  *	0	Success (contents of un_mirror_owner_status placed in 'flags')
2021  *	EINVAL	Invalid unit
2022  */
2023 static int
mirror_get_owner_status(md_mn_own_status_t * p,IOLOCK * lock)2024 mirror_get_owner_status(md_mn_own_status_t *p, IOLOCK *lock)
2025 {
2026 	mm_unit_t	*un;
2027 	set_t		setno;
2028 
2029 	if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lock)) == NULL)
2030 		return (EINVAL);
2031 
2032 	setno = MD_MIN2SET(p->mnum);
2033 	if (!MD_MNSET_SETNO(setno)) {
2034 		return (EINVAL);
2035 	}
2036 
2037 	p->flags = un->un_mirror_owner_status;
2038 	return (0);
2039 }
2040 
2041 /*
2042  * mirror_set_state:
2043  * ---------------
2044  * Called to set the state of the component of a submirror to the specified
2045  * value. This function is called by the MD_MN_SET_STATE ioctl.
2046  */
2047 static int
mirror_set_state(md_set_state_params_t * p,IOLOCK * lockp)2048 mirror_set_state(md_set_state_params_t *p, IOLOCK *lockp)
2049 {
2050 	mm_unit_t		*un;
2051 	mm_submirror_t		*sm;
2052 	mm_submirror_ic_t	*smic;
2053 	md_m_shared_t		*shared;
2054 	set_t			setno;
2055 
2056 #ifdef DEBUG
2057 	if (mirror_debug_flag)
2058 		printf("mirror_set_state: mnum,sm,comp,state, hs_id = %x, "
2059 		    "%x, %x, %x %x\n", p->mnum, p->sm, p->comp,
2060 		    p->state, p->hs_id);
2061 #endif
2062 	if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL)
2063 		return (EINVAL);
2064 
2065 	/* This function is only valid for a multi-node set */
2066 	setno = MD_MIN2SET(p->mnum);
2067 	if (!MD_MNSET_SETNO(setno)) {
2068 		return (EINVAL);
2069 	}
2070 	sm = &un->un_sm[p->sm];
2071 	smic = &un->un_smic[p->sm];
2072 
2073 	/* Set state in component and update ms_flags */
2074 	shared = (md_m_shared_t *)
2075 	    (*(smic->sm_shared_by_indx))(sm->sm_dev, sm, p->comp);
2076 	/*
2077 	 * If a CS_ERRED state is being sent, verify that the sender
2078 	 * has the same view of the component that this node currently has.
2079 	 *
2080 	 * There is a case where the sender was sending a CS_ERRED when a
2081 	 * component was in error, but before the sender returns from
2082 	 * ksend_message the component has been hotspared and resync'd.
2083 	 *
2084 	 * In this case, the hs_id will be different from the shared ms_hs_id,
2085 	 * so the component has already been hotspared.  Just return in this
2086 	 * case.
2087 	 */
2088 	if (p->state == CS_ERRED) {
2089 		if (shared->ms_hs_id != p->hs_id) {
2090 #ifdef DEBUG
2091 			if (mirror_debug_flag) {
2092 				printf("mirror_set_state: short circuit "
2093 				    "hs_id=0x%x, ms_hs_id=0x%x\n",
2094 				    p->hs_id, shared->ms_hs_id);
2095 			}
2096 #endif
2097 			/* release the block on writes to the mirror */
2098 			mirror_resume_writes(un);
2099 			md_ioctl_writerexit(lockp);
2100 			return (0);
2101 		}
2102 	}
2103 
2104 	/*
2105 	 * If the device is newly errored then make sure that it is
2106 	 * closed. Closing the device allows for the RCM framework
2107 	 * to unconfigure the device if required.
2108 	 */
2109 	if (!(shared->ms_state & CS_ERRED) && (p->state & CS_ERRED) &&
2110 	    (shared->ms_flags & MDM_S_ISOPEN)) {
2111 		void		(*get_dev)();
2112 		ms_cd_info_t	cd;
2113 
2114 		get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
2115 		    "get device", 0);
2116 		(void) (*get_dev)(sm->sm_dev, sm, p->comp, &cd);
2117 
2118 		md_layered_close(cd.cd_dev, MD_OFLG_NULL);
2119 		shared->ms_flags &= ~MDM_S_ISOPEN;
2120 	}
2121 
2122 	shared->ms_state = p->state;
2123 	uniqtime32(&shared->ms_timestamp);
2124 
2125 	if (p->state == CS_ERRED) {
2126 		shared->ms_flags |= MDM_S_NOWRITE;
2127 	} else
2128 		shared->ms_flags &= ~MDM_S_NOWRITE;
2129 
2130 	shared->ms_flags &= ~MDM_S_IOERR;
2131 	un->un_changecnt++;
2132 	shared->ms_lasterrcnt = un->un_changecnt;
2133 
2134 	/* Update state in submirror */
2135 	mirror_set_sm_state(sm, smic, SMS_RUNNING, 0);
2136 	/*
2137 	 * Commit the state change to the metadb, only the master will write
2138 	 * to disk
2139 	 */
2140 	mirror_commit(un, SMI2BIT(p->sm), 0);
2141 
2142 	/* release the block on writes to the mirror */
2143 	mirror_resume_writes(un);
2144 
2145 	/* generate NOTIFY events for error state changes */
2146 	if (p->state == CS_ERRED) {
2147 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE,
2148 		    MD_UN2SET(un), MD_SID(un));
2149 	} else if (p->state == CS_LAST_ERRED) {
2150 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE,
2151 		    MD_UN2SET(un), MD_SID(un));
2152 	}
2153 	md_ioctl_writerexit(lockp);
2154 	return (0);
2155 }
2156 
2157 /*
2158  * mirror_suspend_writes:
2159  * ---------------------
2160  * Called to suspend writes to a mirror region. The flag un_suspend_wr_flag is
2161  * tested in mirror_write_strategy, and if set all writes are blocked.
2162  * This function is called by the MD_MN_SUSPEND_WRITES ioctl.
2163  */
2164 static int
mirror_suspend_writes(md_suspend_wr_params_t * p)2165 mirror_suspend_writes(md_suspend_wr_params_t *p)
2166 {
2167 	set_t		setno;
2168 	mm_unit_t	*un;
2169 
2170 #ifdef DEBUG
2171 	if (mirror_debug_flag)
2172 		printf("mirror_suspend_writes: mnum = %x\n", p->mnum);
2173 #endif
2174 	if ((un = mirror_getun(p->mnum, &p->mde, NO_LOCK, NULL)) == NULL)
2175 		return (EINVAL); /* No unit */
2176 
2177 	/* This function is only valid for a multi-node set */
2178 	setno = MD_MIN2SET(p->mnum);
2179 	if (!MD_MNSET_SETNO(setno)) {
2180 		return (EINVAL);
2181 	}
2182 
2183 	/*
2184 	 * Mark the resync as blocked. This will stop any currently running
2185 	 * thread and will prevent a new resync from attempting to perform
2186 	 * i/o
2187 	 */
2188 	mutex_enter(&un->un_rs_thread_mx);
2189 	un->un_rs_thread_flags |= MD_RI_BLOCK;
2190 	mutex_exit(&un->un_rs_thread_mx);
2191 
2192 	mutex_enter(&un->un_suspend_wr_mx);
2193 	un->un_suspend_wr_flag = 1;
2194 	mutex_exit(&un->un_suspend_wr_mx);
2195 
2196 	return (0);
2197 }
2198 
2199 /*
2200  * mirror_set_capability:
2201  * ------------------------
2202  * Called to set or clear a capability for a mirror
2203  * called by the MD_MN_SET_CAP ioctl.
2204  */
2205 static int
mirror_set_capability(md_mn_setcap_params_t * p,IOLOCK * lockp)2206 mirror_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp)
2207 {
2208 	set_t		setno;
2209 	mm_unit_t	*un;
2210 	mdi_unit_t	*ui;
2211 
2212 #ifdef DEBUG
2213 	if (mirror_debug_flag)
2214 		printf("mirror_set_capability: mnum = %x\n", p->mnum);
2215 #endif
2216 	if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lockp)) == NULL)
2217 		return (EINVAL);
2218 
2219 	/* This function is only valid for a multi-node set */
2220 	setno = MD_MIN2SET(p->mnum);
2221 	if (!MD_MNSET_SETNO(setno)) {
2222 		return (EINVAL);
2223 	}
2224 	ui = MDI_UNIT(p->mnum);
2225 
2226 	if (p->sc_set & DKV_ABR_CAP) {
2227 		ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */
2228 		/* Clear DRL and set owner to 0 if no resync active */
2229 		mirror_process_unit_resync(un);
2230 		if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE)) {
2231 			mutex_enter(&un->un_owner_mx);
2232 			un->un_mirror_owner = 0;
2233 			mutex_exit(&un->un_owner_mx);
2234 		}
2235 	} else {
2236 		ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */
2237 	}
2238 	if (p->sc_set & DKV_DMR_CAP) {
2239 		ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */
2240 	} else {
2241 		ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */
2242 	}
2243 	return (0);
2244 }
2245 
2246 /*
2247  * mirror_choose_owner:
2248  * ------------------------
2249  * Called to choose an owner for a mirror resync. Can be called when starting
2250  * resync or by the MD_MN_SET_MM_OWNER ioctl with the MD_MN_MM_CHOOSE_OWNER flag
2251  * set. The ioctl is called with this flag set when we are in the cluster
2252  * reconfig and we wish to set a new owner for a resync whose owner has left
2253  * the cluster. We use a resync owner count to implement a round robin
2254  * allocation of resync owners. We send a message to the master including
2255  * this count and the message handler uses it to select an owner from the
2256  * nodelist and then sends a SET_MM_OWNER message to the chosen node to
2257  * become the owner.
2258  *
2259  * Input:
2260  *	un	- unit reference
2261  *	ownp	- owner information (if non-NULL)
2262  */
2263 int
mirror_choose_owner(mm_unit_t * un,md_mn_req_owner_t * ownp)2264 mirror_choose_owner(mm_unit_t *un, md_mn_req_owner_t *ownp)
2265 {
2266 	set_t		setno;
2267 	md_mn_msg_chooseid_t	*msg;
2268 
2269 	/* This function is only valid for a multi-node set */
2270 	setno = MD_UN2SET(un);
2271 	if (!MD_MNSET_SETNO(setno)) {
2272 		return (EINVAL);
2273 	}
2274 
2275 
2276 #ifdef DEBUG
2277 	if (mirror_debug_flag)
2278 		printf("send choose owner message, mnum = %x,"
2279 		    "rcnt = %d\n", MD_SID(un), md_set[setno].s_rcnt);
2280 #endif
2281 
2282 	/*
2283 	 * setup message with current resync count
2284 	 * and then increment the count. If we're called with a non-NULL
2285 	 * owner then we are reestablishing the owner of the mirror. In this
2286 	 * case we have to flag this to the message handler and set rcnt to
2287 	 * the new owner node.
2288 	 */
2289 	msg = kmem_zalloc(sizeof (md_mn_msg_chooseid_t), KM_SLEEP);
2290 	msg->msg_chooseid_mnum = MD_SID(un);
2291 	if (ownp == NULL) {
2292 		mutex_enter(&md_mx);
2293 		msg->msg_chooseid_rcnt = md_set[setno].s_rcnt;
2294 		md_set[setno].s_rcnt++;
2295 		mutex_exit(&md_mx);
2296 		msg->msg_chooseid_set_node = B_FALSE;
2297 	} else {
2298 		msg->msg_chooseid_rcnt = ownp->owner;
2299 		msg->msg_chooseid_set_node = B_TRUE;
2300 	}
2301 
2302 	/*
2303 	 * Spawn a thread to issue the ksend_message() call so that we can
2304 	 * drop the ioctl lock hierarchy that is blocking further rpc.metad and
2305 	 * commd set ownership checking.
2306 	 */
2307 	if (thread_create(NULL, 0, mirror_choose_owner_thread, (caddr_t)msg,
2308 	    0, &p0, TS_RUN, 60) == NULL) {
2309 		kmem_free(msg, sizeof (md_mn_msg_chooseid_t));
2310 		return (EFAULT);
2311 	} else {
2312 		return (0);
2313 	}
2314 }
2315 
2316 /*
2317  * mirror_get_status:
2318  * ----------------------------------
2319  * Called by nodes which are not the master node of the cluster. Obtains the
2320  * master abr state and the submirror status for each valid submirror of the
2321  * unit so that the status returned by metastat is consistent across the
2322  * cluster.
2323  * We update tstate for the mirror and both the sm_flag and the sm_state for
2324  * each submirror.
2325  *
2326  * Input:
2327  *	un	mirror to obtain status from
2328  *
2329  * Calling Convention:
2330  *	writerlock (either ioctl or unit) must be held
2331  */
2332 void
mirror_get_status(mm_unit_t * un,IOLOCK * lockp)2333 mirror_get_status(mm_unit_t *un, IOLOCK *lockp)
2334 {
2335 	mm_submirror_t		*sm;
2336 	int			smi;
2337 	int			rval;
2338 	md_mn_kresult_t		*kres;
2339 	md_mn_msg_mir_state_t	msg;
2340 	md_mn_msg_mir_state_res_t	*res;
2341 	set_t			setno = MD_UN2SET(un);
2342 	mdi_unit_t		*ui = MDI_UNIT(MD_SID(un));
2343 
2344 
2345 	ASSERT(ui->ui_lock & MD_UL_WRITER);
2346 
2347 	/*
2348 	 * Get all of the information for the mirror.
2349 	 */
2350 	bzero(&msg, sizeof (msg));
2351 	msg.mir_state_mnum = MD_SID(un);
2352 
2353 	/*
2354 	 * Must drop the writerlock over ksend_message since another
2355 	 * thread on this node could be running a higher class message
2356 	 * and be trying grab the readerlock.
2357 	 *
2358 	 * If we are in the context of an ioctl, drop the ioctl lock.
2359 	 * lockp holds the list of locks held.
2360 	 */
2361 	if (lockp) {
2362 		IOLOCK_RETURN_RELEASE(0, lockp);
2363 	} else {
2364 		md_unit_writerexit(ui);
2365 	}
2366 
2367 	kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
2368 	rval = mdmn_ksend_message(setno, MD_MN_MSG_GET_MIRROR_STATE,
2369 	    MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)&msg,
2370 	    sizeof (msg), kres);
2371 
2372 	/* if the node hasn't yet joined, it's Ok. */
2373 	if ((!MDMN_KSEND_MSG_OK(rval, kres)) &&
2374 	    (kres->kmmr_comm_state != MDMNE_NOT_JOINED)) {
2375 		mdmn_ksend_show_error(rval, kres, "GET_MIRROR_STATE");
2376 		cmn_err(CE_WARN, "ksend_message failure: GET_MIRROR_STATE");
2377 	}
2378 
2379 	/* if dropped the lock previously, regain it */
2380 	if (lockp) {
2381 		IOLOCK_RETURN_REACQUIRE(lockp);
2382 	} else {
2383 		/*
2384 		 * Reacquire dropped locks and update acquirecnts
2385 		 * appropriately.
2386 		 */
2387 		(void) md_unit_writerlock(ui);
2388 	}
2389 
2390 	/*
2391 	 * Check to see if we've got a believable amount of returned data.
2392 	 * If not, we simply return as there is no usable information.
2393 	 */
2394 	if (kres->kmmr_res_size < sizeof (*res)) {
2395 		cmn_err(CE_WARN, "GET_MIRROR_STATE: returned %d bytes, expected"
2396 		    " %d\n", kres->kmmr_res_size, (int)sizeof (*res));
2397 		kmem_free(kres, sizeof (md_mn_kresult_t));
2398 		return;
2399 	}
2400 
2401 	/*
2402 	 * Copy the results from the call back into our sm_state/sm_flags
2403 	 */
2404 	res = (md_mn_msg_mir_state_res_t *)kres->kmmr_res_data;
2405 #ifdef DEBUG
2406 	if (mirror_debug_flag)
2407 		printf("mirror_get_status: %s\n", md_shortname(MD_SID(un)));
2408 #endif
2409 	for (smi = 0; smi < NMIRROR; smi++) {
2410 		sm = &un->un_sm[smi];
2411 #ifdef DEBUG
2412 		if (mirror_debug_flag) {
2413 			printf("curr state %4x, new state %4x\n", sm->sm_state,
2414 			    res->sm_state[smi]);
2415 			printf("curr_flags %4x, new flags %4x\n", sm->sm_flags,
2416 			    res->sm_flags[smi]);
2417 		}
2418 #endif
2419 		sm->sm_state = res->sm_state[smi];
2420 		sm->sm_flags = res->sm_flags[smi];
2421 	}
2422 
2423 	/* Set ABR if set on the Master node */
2424 	ui->ui_tstate |= (res->mir_tstate & MD_ABR_CAP);
2425 
2426 	kmem_free(kres, sizeof (md_mn_kresult_t));
2427 }
2428 
2429 /*
2430  * mirror_get_mir_state:
2431  * -------------------
2432  * Obtain the ABR state of a mirror and the state of all submirrors from the
2433  * master node for the unit specified in sm_state->mnum.
2434  * Called by MD_MN_GET_MIRROR_STATE ioctl.
2435  */
2436 static int
mirror_get_mir_state(md_mn_get_mir_state_t * p,IOLOCK * lockp)2437 mirror_get_mir_state(md_mn_get_mir_state_t *p, IOLOCK *lockp)
2438 {
2439 	mm_unit_t	*un;
2440 	set_t		setno;
2441 	md_error_t	mde;
2442 
2443 	mdclrerror(&mde);
2444 
2445 	if ((un = mirror_getun(p->mnum, &mde, WR_LOCK, lockp)) == NULL) {
2446 		return (EINVAL);
2447 	}
2448 	setno = MD_MIN2SET(p->mnum);
2449 	if (!MD_MNSET_SETNO(setno)) {
2450 		return (EINVAL);
2451 	}
2452 
2453 	/*
2454 	 * We've now got a writerlock on the unit structure (so no-one can
2455 	 * modify the incore values) and we'll now send the message to the
2456 	 * master node. Since we're only called as part of a reconfig cycle
2457 	 * we don't need to release the unit locks across the ksend_message as
2458 	 * only the master node will process it, and we never send this to
2459 	 * ourselves if we're the master.
2460 	 */
2461 
2462 	mirror_get_status(un, lockp);
2463 
2464 	return (0);
2465 }
2466 
2467 static int
mirror_admin_ioctl(int cmd,void * data,int mode,IOLOCK * lockp)2468 mirror_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
2469 {
2470 	size_t	sz = 0;
2471 	void	*d = NULL;
2472 	int	err = 0;
2473 
2474 	/* We can only handle 32-bit clients for internal commands */
2475 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
2476 		return (EINVAL);
2477 	}
2478 	/* dispatch ioctl */
2479 	switch (cmd) {
2480 
2481 	case MD_IOCSET:
2482 	{
2483 		if (! (mode & FWRITE))
2484 			return (EACCES);
2485 
2486 		sz = sizeof (md_set_params_t);
2487 
2488 		d = kmem_alloc(sz, KM_SLEEP);
2489 
2490 		if (ddi_copyin(data, d, sz, mode)) {
2491 			err = EFAULT;
2492 			break;
2493 		}
2494 
2495 		err = mirror_set(d, mode);
2496 		break;
2497 	}
2498 
2499 	case MD_IOCGET:
2500 	{
2501 		if (! (mode & FREAD))
2502 			return (EACCES);
2503 
2504 		sz = sizeof (md_i_get_t);
2505 
2506 		d = kmem_alloc(sz, KM_SLEEP);
2507 
2508 		if (ddi_copyin(data, d, sz, mode)) {
2509 			err = EFAULT;
2510 			break;
2511 		}
2512 
2513 		err = mirror_get(d, mode, lockp);
2514 		break;
2515 	}
2516 
2517 	case MD_IOCRESET:
2518 	{
2519 		if (! (mode & FWRITE))
2520 			return (EACCES);
2521 
2522 		sz = sizeof (md_i_reset_t);
2523 		d = kmem_alloc(sz, KM_SLEEP);
2524 
2525 		if (ddi_copyin(data, d, sz, mode)) {
2526 			err = EFAULT;
2527 			break;
2528 		}
2529 
2530 		err = mirror_reset((md_i_reset_t *)d);
2531 		break;
2532 	}
2533 
2534 	case MD_IOCSETSYNC:
2535 	case MD_MN_SETSYNC:
2536 	{
2537 		if (! (mode & FWRITE))
2538 			return (EACCES);
2539 
2540 		sz = sizeof (md_resync_ioctl_t);
2541 		d = kmem_alloc(sz, KM_SLEEP);
2542 
2543 		if (ddi_copyin(data, d, sz, mode)) {
2544 			err = EFAULT;
2545 			break;
2546 		}
2547 
2548 		err = mirror_ioctl_resync((md_resync_ioctl_t *)d, lockp);
2549 		break;
2550 	}
2551 
2552 	case MD_IOCGETSYNC:
2553 	{
2554 		if (! (mode & FREAD))
2555 			return (EACCES);
2556 
2557 		sz = sizeof (md_resync_ioctl_t);
2558 		d = kmem_alloc(sz, KM_SLEEP);
2559 
2560 		if (ddi_copyin(data, d, sz, mode)) {
2561 			err = EFAULT;
2562 			break;
2563 		}
2564 
2565 		err = mirror_get_resync((md_resync_ioctl_t *)d);
2566 		break;
2567 	}
2568 
2569 	case MD_IOCREPLACE:
2570 	{
2571 		if (! (mode & FWRITE))
2572 			return (EACCES);
2573 
2574 		sz = sizeof (replace_params_t);
2575 		d = kmem_alloc(sz, KM_SLEEP);
2576 
2577 		if (ddi_copyin(data, d, sz, mode)) {
2578 			err = EFAULT;
2579 			break;
2580 		}
2581 
2582 		err = comp_replace((replace_params_t *)d, lockp);
2583 		break;
2584 	}
2585 
2586 	case MD_IOCOFFLINE:
2587 	{
2588 		if (! (mode & FWRITE))
2589 			return (EACCES);
2590 
2591 		sz = sizeof (md_i_off_on_t);
2592 		d = kmem_alloc(sz, KM_SLEEP);
2593 
2594 		if (ddi_copyin(data, d, sz, mode)) {
2595 			err = EFAULT;
2596 			break;
2597 		}
2598 
2599 		err = mirror_offline((md_i_off_on_t *)d, lockp);
2600 		break;
2601 	}
2602 
2603 	case MD_IOCONLINE:
2604 	{
2605 		if (! (mode & FWRITE))
2606 			return (EACCES);
2607 
2608 		sz = sizeof (md_i_off_on_t);
2609 		d = kmem_alloc(sz, KM_SLEEP);
2610 
2611 		if (ddi_copyin(data, d, sz, mode)) {
2612 			err = EFAULT;
2613 			break;
2614 		}
2615 
2616 		err = mirror_online((md_i_off_on_t *)d, lockp);
2617 		break;
2618 	}
2619 
2620 	case MD_IOCDETACH:
2621 	{
2622 		if (! (mode & FWRITE))
2623 			return (EACCES);
2624 
2625 		sz = sizeof (md_detach_params_t);
2626 		d = kmem_alloc(sz, KM_SLEEP);
2627 
2628 		if (ddi_copyin(data, d, sz, mode)) {
2629 			err = EFAULT;
2630 			break;
2631 		}
2632 
2633 		err = mirror_detach((md_detach_params_t *)d, lockp);
2634 		break;
2635 	}
2636 
2637 	case MD_IOCATTACH:
2638 	{
2639 
2640 		if (! (mode & FWRITE))
2641 			return (EACCES);
2642 
2643 		sz = sizeof (md_att_struct_t);
2644 		d = kmem_alloc(sz, KM_SLEEP);
2645 
2646 		if (ddi_copyin(data, d, sz, mode)) {
2647 			err = EFAULT;
2648 			break;
2649 		}
2650 
2651 		err = mirror_attach((md_att_struct_t *)d, lockp);
2652 		break;
2653 	}
2654 
2655 	case MD_IOCGET_DEVS:
2656 	{
2657 		if (! (mode & FREAD))
2658 			return (EACCES);
2659 
2660 		sz = sizeof (md_getdevs_params_t);
2661 
2662 		d = kmem_alloc(sz, KM_SLEEP);
2663 
2664 		if (ddi_copyin(data, d, sz, mode)) {
2665 			err = EFAULT;
2666 			break;
2667 		}
2668 
2669 		err = mirror_getdevs(d, mode, lockp);
2670 		break;
2671 	}
2672 
2673 	case MD_IOCGROW:
2674 	{
2675 		if (! (mode & FWRITE))
2676 			return (EACCES);
2677 
2678 		sz = sizeof (md_grow_params_t);
2679 
2680 		d = kmem_alloc(sz, KM_SLEEP);
2681 
2682 		if (ddi_copyin(data, d, sz, mode)) {
2683 			err = EFAULT;
2684 			break;
2685 		}
2686 
2687 		err = mirror_grow(d, lockp);
2688 		break;
2689 	}
2690 
2691 	case MD_IOCCHANGE:
2692 	{
2693 		if (! (mode & FWRITE))
2694 			return (EACCES);
2695 
2696 		sz = sizeof (md_mirror_params_t);
2697 		d = kmem_alloc(sz, KM_SLEEP);
2698 
2699 		if (ddi_copyin(data, d, sz, mode)) {
2700 			err = EFAULT;
2701 			break;
2702 		}
2703 
2704 		err = mirror_change((md_mirror_params_t *)d, lockp);
2705 		break;
2706 	}
2707 
2708 	case MD_IOCPROBE_DEV:
2709 	{
2710 		md_probedev_impl_t	*p = NULL;
2711 		md_probedev_t		*ph = NULL;
2712 		daemon_queue_t		*hdr = NULL;
2713 		int			i;
2714 		size_t			sz2 = 0;
2715 
2716 		if (! (mode & FREAD))
2717 			return (EACCES);
2718 
2719 
2720 		sz = sizeof (md_probedev_t);
2721 		d = kmem_alloc(sz, KM_SLEEP);
2722 
2723 		/* now copy in the data */
2724 		if (ddi_copyin(data, d, sz, mode)) {
2725 			err = EFAULT;
2726 			goto free_mem;
2727 		}
2728 
2729 		/*
2730 		 * Sanity test the args. Test name should have the keyword
2731 		 * probe.
2732 		 */
2733 
2734 		p = kmem_alloc(sizeof (md_probedev_impl_t), KM_SLEEP);
2735 
2736 		p->probe_sema = NULL;
2737 		p->probe_mx = NULL;
2738 		p->probe.mnum_list = (uint64_t)NULL;
2739 
2740 		ph = (struct md_probedev *)d;
2741 
2742 		p->probe.nmdevs = ph->nmdevs;
2743 		(void) strcpy(p->probe.test_name, ph->test_name);
2744 		bcopy(&ph->md_driver, &(p->probe.md_driver),
2745 		    sizeof (md_driver_t));
2746 
2747 		if ((p->probe.nmdevs < 1) ||
2748 		    (strstr(p->probe.test_name, "probe") == NULL)) {
2749 			err = EINVAL;
2750 			goto free_mem;
2751 		}
2752 
2753 
2754 		sz2 = sizeof (minor_t) * p->probe.nmdevs;
2755 		p->probe.mnum_list = (uint64_t)(uintptr_t)kmem_alloc(sz2,
2756 		    KM_SLEEP);
2757 
2758 		if (ddi_copyin((void *)(uintptr_t)ph->mnum_list,
2759 		    (void *)(uintptr_t)p->probe.mnum_list, sz2, mode)) {
2760 			err = EFAULT;
2761 			goto free_mem;
2762 		}
2763 
2764 		if (err = md_init_probereq(p, &hdr))
2765 			goto free_mem;
2766 
2767 		/*
2768 		 * put the request on the queue and wait.
2769 		 */
2770 
2771 		daemon_request_new(&md_ff_daemonq, md_probe_one, hdr, REQ_NEW);
2772 
2773 		(void) IOLOCK_RETURN(0, lockp);
2774 		/* wait for the events to occur */
2775 		for (i = 0; i < p->probe.nmdevs; i++) {
2776 			sema_p(PROBE_SEMA(p));
2777 		}
2778 		while (md_ioctl_lock_enter() == EINTR)
2779 		;
2780 
2781 		/*
2782 		 * clean up. The hdr list is freed in the probe routines
2783 		 * since the list is NULL by the time we get here.
2784 		 */
2785 free_mem:
2786 		if (p) {
2787 			if (p->probe_sema != NULL) {
2788 				sema_destroy(PROBE_SEMA(p));
2789 				kmem_free(p->probe_sema, sizeof (ksema_t));
2790 			}
2791 			if (p->probe_mx != NULL) {
2792 				mutex_destroy(PROBE_MX(p));
2793 				kmem_free(p->probe_mx, sizeof (kmutex_t));
2794 			}
2795 			if ((uintptr_t)p->probe.mnum_list)
2796 				kmem_free((void *)(uintptr_t)
2797 				    p->probe.mnum_list, sz2);
2798 
2799 			kmem_free(p, sizeof (md_probedev_impl_t));
2800 		}
2801 		break;
2802 	}
2803 
2804 	case MD_MN_SET_MM_OWNER:
2805 	{
2806 		if (! (mode & FWRITE))
2807 			return (EACCES);
2808 
2809 		sz = sizeof (md_set_mmown_params_t);
2810 		d = kmem_alloc(sz, KM_SLEEP);
2811 
2812 		if (ddi_copyin(data, d, sz, mode) != 0) {
2813 			err = EFAULT;
2814 			break;
2815 		}
2816 
2817 		err = mirror_set_owner((md_set_mmown_params_t *)d, lockp);
2818 		break;
2819 	}
2820 
2821 	case MD_MN_GET_MM_OWNER:
2822 	{
2823 		if (! (mode & FREAD))
2824 			return (EACCES);
2825 
2826 		sz = sizeof (md_set_mmown_params_t);
2827 		d = kmem_alloc(sz, KM_SLEEP);
2828 
2829 		if (ddi_copyin(data, d, sz, mode) != 0) {
2830 			err = EFAULT;
2831 			break;
2832 		}
2833 
2834 		err = mirror_get_owner((md_set_mmown_params_t *)d, lockp);
2835 		break;
2836 	}
2837 
2838 	case MD_MN_MM_OWNER_STATUS:
2839 	{
2840 		if (! (mode & FREAD))
2841 			return (EACCES);
2842 
2843 		sz = sizeof (md_mn_own_status_t);
2844 		d = kmem_alloc(sz, KM_SLEEP);
2845 
2846 		if (ddi_copyin(data, d, sz, mode) != 0) {
2847 			err = EFAULT;
2848 			break;
2849 		}
2850 
2851 		err = mirror_get_owner_status((md_mn_own_status_t *)d, lockp);
2852 		break;
2853 	}
2854 
2855 	case MD_MN_SET_STATE:
2856 	{
2857 		if (! (mode & FWRITE))
2858 			return (EACCES);
2859 
2860 		sz = sizeof (md_set_state_params_t);
2861 		d = kmem_alloc(sz, KM_SLEEP);
2862 
2863 		if (ddi_copyin(data, d, sz, mode)) {
2864 			err = EFAULT;
2865 			break;
2866 		}
2867 
2868 		err  = mirror_set_state((md_set_state_params_t *)d, lockp);
2869 		break;
2870 	}
2871 
2872 	case MD_MN_SUSPEND_WRITES:
2873 	{
2874 		if (! (mode & FREAD))
2875 			return (EACCES);
2876 
2877 		sz = sizeof (md_suspend_wr_params_t);
2878 		d = kmem_alloc(sz, KM_SLEEP);
2879 
2880 		if (ddi_copyin(data, d, sz, mode) != 0) {
2881 			err = EFAULT;
2882 			break;
2883 		}
2884 
2885 		err = mirror_suspend_writes((md_suspend_wr_params_t *)d);
2886 		break;
2887 	}
2888 
2889 	case MD_MN_RESYNC:
2890 	{
2891 		sz = sizeof (md_mn_rs_params_t);
2892 		d = kmem_alloc(sz, KM_SLEEP);
2893 
2894 		if (ddi_copyin(data, d, sz, mode) != 0) {
2895 			err = EFAULT;
2896 			break;
2897 		}
2898 
2899 		err = mirror_resync_message((md_mn_rs_params_t *)d, lockp);
2900 		break;
2901 	}
2902 
2903 	case MD_MN_ALLOCATE_HOTSPARE:
2904 	{
2905 		if (! (mode & FWRITE))
2906 			return (EACCES);
2907 
2908 		sz = sizeof (md_alloc_hotsp_params_t);
2909 		d = kmem_alloc(sz, KM_SLEEP);
2910 
2911 		if (ddi_copyin(data, d, sz, mode)) {
2912 			err = EFAULT;
2913 			break;
2914 		}
2915 
2916 		err  = mirror_allocate_hotspare((md_alloc_hotsp_params_t *)d,
2917 		    lockp);
2918 		break;
2919 	}
2920 
2921 	case MD_MN_POKE_HOTSPARES:
2922 	{
2923 		(void) poke_hotspares();
2924 		break;
2925 	}
2926 
2927 	case MD_MN_SET_CAP:
2928 	{
2929 		if (! (mode & FWRITE))
2930 			return (EACCES);
2931 
2932 		sz = sizeof (md_mn_setcap_params_t);
2933 		d = kmem_alloc(sz, KM_SLEEP);
2934 
2935 		if (ddi_copyin(data, d, sz, mode)) {
2936 			err = EFAULT;
2937 			break;
2938 		}
2939 
2940 		err  = mirror_set_capability((md_mn_setcap_params_t *)d,
2941 		    lockp);
2942 		break;
2943 	}
2944 
2945 	case MD_MN_GET_MIRROR_STATE:
2946 	{
2947 		sz = sizeof (md_mn_get_mir_state_t);
2948 		d = kmem_zalloc(sz, KM_SLEEP);
2949 
2950 		if (ddi_copyin(data, d, sz, mode)) {
2951 			err = EFAULT;
2952 			break;
2953 		}
2954 
2955 		err = mirror_get_mir_state((md_mn_get_mir_state_t *)d,
2956 		    lockp);
2957 		break;
2958 	}
2959 
2960 	case MD_MN_RR_DIRTY:
2961 	{
2962 		sz = sizeof (md_mn_rr_dirty_params_t);
2963 		d = kmem_zalloc(sz, KM_SLEEP);
2964 
2965 		if (ddi_copyin(data, d, sz, mode)) {
2966 			err = EFAULT;
2967 			break;
2968 		}
2969 
2970 		err = mirror_set_dirty_rr((md_mn_rr_dirty_params_t *)d);
2971 		break;
2972 	}
2973 
2974 	case MD_MN_RR_CLEAN:
2975 	{
2976 		md_mn_rr_clean_params_t tmp;
2977 
2978 		/* get the first part of the structure to find the size */
2979 		if (ddi_copyin(data, &tmp, sizeof (tmp), mode)) {
2980 			err = EFAULT;
2981 			break;
2982 		}
2983 
2984 		sz = MDMN_RR_CLEAN_PARAMS_SIZE(&tmp);
2985 		d = kmem_zalloc(sz, KM_SLEEP);
2986 
2987 		if (ddi_copyin(data, d, sz, mode)) {
2988 			err = EFAULT;
2989 			break;
2990 		}
2991 
2992 		err = mirror_set_clean_rr((md_mn_rr_clean_params_t *)d);
2993 		break;
2994 	}
2995 
2996 	default:
2997 		return (ENOTTY);
2998 	}
2999 
3000 	/*
3001 	 * copyout and free any args
3002 	 */
3003 	if (sz != 0) {
3004 		if (err == 0) {
3005 			if (ddi_copyout(d, data, sz, mode) != 0) {
3006 				err = EFAULT;
3007 			}
3008 		}
3009 		kmem_free(d, sz);
3010 	}
3011 	return (err);
3012 }
3013 
3014 int
md_mirror_ioctl(dev_t ddi_dev,int cmd,void * data,int mode,IOLOCK * lockp)3015 md_mirror_ioctl(
3016 	dev_t		ddi_dev,
3017 	int		cmd,
3018 	void		*data,
3019 	int		mode,
3020 	IOLOCK		*lockp
3021 )
3022 {
3023 	minor_t		mnum = getminor(ddi_dev);
3024 	mm_unit_t	*un;
3025 	int		err = 0;
3026 
3027 	/* handle admin ioctls */
3028 	if (mnum == MD_ADM_MINOR)
3029 		return (mirror_admin_ioctl(cmd, data, mode, lockp));
3030 
3031 	/* check unit */
3032 	if ((MD_MIN2SET(mnum) >= md_nsets) ||
3033 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
3034 	    ((un = MD_UNIT(mnum)) == NULL))
3035 		return (ENXIO);
3036 	/* is this a supported ioctl? */
3037 	err = md_check_ioctl_against_unit(cmd, un->c);
3038 	if (err != 0) {
3039 		return (err);
3040 	}
3041 
3042 	/* dispatch ioctl */
3043 	switch (cmd) {
3044 
3045 	case DKIOCINFO:
3046 	{
3047 		struct dk_cinfo	*p;
3048 
3049 		if (! (mode & FREAD))
3050 			return (EACCES);
3051 
3052 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
3053 
3054 		get_info(p, mnum);
3055 		if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
3056 			err = EFAULT;
3057 
3058 		kmem_free(p, sizeof (*p));
3059 		return (err);
3060 	}
3061 
3062 	case DKIOCGMEDIAINFO:
3063 	{
3064 		struct dk_minfo	p;
3065 
3066 		if (! (mode & FREAD))
3067 			return (EACCES);
3068 
3069 		get_minfo(&p, mnum);
3070 		if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
3071 			err = EFAULT;
3072 
3073 		return (err);
3074 	}
3075 
3076 	case DKIOCGGEOM:
3077 	{
3078 		struct dk_geom	*p;
3079 
3080 		if (! (mode & FREAD))
3081 			return (EACCES);
3082 
3083 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
3084 
3085 		if ((err = mirror_get_geom(un, p)) == 0) {
3086 			if (ddi_copyout((caddr_t)p, data, sizeof (*p),
3087 			    mode) != 0)
3088 				err = EFAULT;
3089 		}
3090 
3091 		kmem_free(p, sizeof (*p));
3092 		return (err);
3093 	}
3094 
3095 	case DKIOCGVTOC:
3096 	{
3097 		struct vtoc	*vtoc;
3098 
3099 		if (! (mode & FREAD))
3100 			return (EACCES);
3101 
3102 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
3103 
3104 		if ((err = mirror_get_vtoc(un, vtoc)) != 0) {
3105 			kmem_free(vtoc, sizeof (*vtoc));
3106 			return (err);
3107 		}
3108 
3109 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3110 			if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
3111 				err = EFAULT;
3112 		}
3113 #ifdef _SYSCALL32
3114 		else {
3115 			struct vtoc32	*vtoc32;
3116 
3117 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
3118 
3119 			vtoctovtoc32((*vtoc), (*vtoc32));
3120 			if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
3121 				err = EFAULT;
3122 			kmem_free(vtoc32, sizeof (*vtoc32));
3123 		}
3124 #endif /* _SYSCALL32 */
3125 
3126 		kmem_free(vtoc, sizeof (*vtoc));
3127 		return (err);
3128 	}
3129 
3130 	case DKIOCSVTOC:
3131 	{
3132 		struct vtoc	*vtoc;
3133 
3134 		if (! (mode & FWRITE))
3135 			return (EACCES);
3136 
3137 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
3138 
3139 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3140 			if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
3141 				err = EFAULT;
3142 			}
3143 		}
3144 #ifdef _SYSCALL32
3145 		else {
3146 			struct vtoc32	*vtoc32;
3147 
3148 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
3149 
3150 			if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
3151 				err = EFAULT;
3152 			} else {
3153 				vtoc32tovtoc((*vtoc32), (*vtoc));
3154 			}
3155 			kmem_free(vtoc32, sizeof (*vtoc32));
3156 		}
3157 #endif /* _SYSCALL32 */
3158 
3159 		if (err == 0)
3160 			err = mirror_set_vtoc(un, vtoc);
3161 
3162 		kmem_free(vtoc, sizeof (*vtoc));
3163 		return (err);
3164 	}
3165 
3166 	case DKIOCGEXTVTOC:
3167 	{
3168 		struct extvtoc	*extvtoc;
3169 
3170 		if (! (mode & FREAD))
3171 			return (EACCES);
3172 
3173 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
3174 
3175 		if ((err = mirror_get_extvtoc(un, extvtoc)) != 0) {
3176 			kmem_free(extvtoc, sizeof (*extvtoc));
3177 			return (err);
3178 		}
3179 
3180 		if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
3181 			err = EFAULT;
3182 
3183 		kmem_free(extvtoc, sizeof (*extvtoc));
3184 		return (err);
3185 	}
3186 
3187 	case DKIOCSEXTVTOC:
3188 	{
3189 		struct extvtoc	*extvtoc;
3190 
3191 		if (! (mode & FWRITE))
3192 			return (EACCES);
3193 
3194 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
3195 
3196 		if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
3197 			err = EFAULT;
3198 		}
3199 
3200 		if (err == 0)
3201 			err = mirror_set_extvtoc(un, extvtoc);
3202 
3203 		kmem_free(extvtoc, sizeof (*extvtoc));
3204 		return (err);
3205 	}
3206 
3207 	case DKIOCGAPART:
3208 	{
3209 		struct dk_map	dmp;
3210 
3211 		if ((err = mirror_get_cgapart(un, &dmp)) != 0) {
3212 			return (err);
3213 		}
3214 
3215 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3216 			if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
3217 			    mode) != 0)
3218 				err = EFAULT;
3219 		}
3220 #ifdef _SYSCALL32
3221 		else {
3222 			struct dk_map32 dmp32;
3223 
3224 			dmp32.dkl_cylno = dmp.dkl_cylno;
3225 			dmp32.dkl_nblk = dmp.dkl_nblk;
3226 
3227 			if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
3228 			    mode) != 0)
3229 				err = EFAULT;
3230 		}
3231 #endif /* _SYSCALL32 */
3232 
3233 		return (err);
3234 	}
3235 	case DKIOCGETEFI:
3236 	{
3237 		/*
3238 		 * This one can be done centralized,
3239 		 * no need to put in the same code for all types of metadevices
3240 		 */
3241 		return (md_dkiocgetefi(mnum, data, mode));
3242 	}
3243 	case DKIOCSETEFI:
3244 	{
3245 		/*
3246 		 * This one can be done centralized,
3247 		 * no need to put in the same code for all types of metadevices
3248 		 */
3249 		return (md_dkiocsetefi(mnum, data, mode));
3250 	}
3251 	case DKIOCPARTITION:
3252 	{
3253 		return (md_dkiocpartition(mnum, data, mode));
3254 	}
3255 
3256 	case DKIOCGETVOLCAP:
3257 	{
3258 		volcap_t	vc;
3259 		mdi_unit_t	*ui;
3260 
3261 		/* Only valid for MN sets */
3262 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3263 			return (EINVAL);
3264 
3265 		ui = MDI_UNIT(mnum);
3266 		if (! (mode & FREAD))
3267 			return (EACCES);
3268 
3269 		vc.vc_info = DKV_ABR_CAP | DKV_DMR_CAP;
3270 		vc.vc_set = 0;
3271 		if (ui->ui_tstate & MD_ABR_CAP) {
3272 			vc.vc_set |= DKV_ABR_CAP;
3273 		}
3274 		if (ddi_copyout(&vc, data, sizeof (volcap_t), mode))
3275 			err = EFAULT;
3276 		return (err);
3277 	}
3278 
3279 	case DKIOCSETVOLCAP:
3280 	{
3281 		volcap_t	vc;
3282 		volcapset_t	volcap = 0;
3283 		mdi_unit_t	*ui;
3284 
3285 		/* Only valid for MN sets */
3286 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3287 			return (EINVAL);
3288 
3289 		ui = MDI_UNIT(mnum);
3290 		if (! (mode & FWRITE))
3291 			return (EACCES);
3292 
3293 		if (ddi_copyin(data, &vc, sizeof (volcap_t), mode))
3294 			return (EFAULT);
3295 
3296 		/* Not valid if a submirror is offline */
3297 		if (un->c.un_status & MD_UN_OFFLINE_SM) {
3298 			return (EINVAL);
3299 		}
3300 		if (ui->ui_tstate & MD_ABR_CAP)
3301 			volcap |= DKV_ABR_CAP;
3302 		/* Only send capability message if there is a change */
3303 		if ((vc.vc_set & (DKV_ABR_CAP)) != volcap)
3304 			err = mdmn_send_capability_message(mnum, vc, lockp);
3305 		return (err);
3306 	}
3307 
3308 	case DKIOCDMR:
3309 	{
3310 		vol_directed_rd_t	*vdr;
3311 
3312 #ifdef _MULTI_DATAMODEL
3313 		vol_directed_rd32_t	*vdr32;
3314 #endif	/* _MULTI_DATAMODEL */
3315 
3316 		/* Only valid for MN sets */
3317 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3318 			return (EINVAL);
3319 
3320 		vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP);
3321 		if (vdr == NULL)
3322 			return (ENOMEM);
3323 
3324 #ifdef _MULTI_DATAMODEL
3325 		vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP);
3326 		if (vdr32 == NULL) {
3327 			kmem_free(vdr, sizeof (vol_directed_rd_t));
3328 			return (ENOMEM);
3329 		}
3330 
3331 		switch (ddi_model_convert_from(mode & FMODELS)) {
3332 		case DDI_MODEL_ILP32:
3333 			/*
3334 			 * If we're called from a higher-level driver we don't
3335 			 * need to manipulate the data. Its already been done by
3336 			 * the caller.
3337 			 */
3338 			if (!(mode & FKIOCTL)) {
3339 				if (ddi_copyin(data, vdr32, sizeof (*vdr32),
3340 				    mode)) {
3341 					kmem_free(vdr, sizeof (*vdr));
3342 					return (EFAULT);
3343 				}
3344 				vdr->vdr_flags = vdr32->vdr_flags;
3345 				vdr->vdr_offset = vdr32->vdr_offset;
3346 				vdr->vdr_nbytes = vdr32->vdr_nbytes;
3347 				vdr->vdr_data =
3348 				    (void *)(uintptr_t)vdr32->vdr_data;
3349 				vdr->vdr_side = vdr32->vdr_side;
3350 				break;
3351 			}
3352 			/* FALLTHROUGH */
3353 
3354 		case DDI_MODEL_NONE:
3355 			if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
3356 				kmem_free(vdr32, sizeof (*vdr32));
3357 				kmem_free(vdr, sizeof (*vdr));
3358 				return (EFAULT);
3359 			}
3360 			break;
3361 
3362 		default:
3363 			kmem_free(vdr32, sizeof (*vdr32));
3364 			kmem_free(vdr, sizeof (*vdr));
3365 			return (EFAULT);
3366 		}
3367 #else	/* ! _MULTI_DATAMODEL */
3368 		if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
3369 			kmem_free(vdr, sizeof (*vdr));
3370 			return (EFAULT);
3371 		}
3372 #endif	/* _MULTI_DATAMODEL */
3373 
3374 		err = mirror_directed_read(ddi_dev, vdr, mode);
3375 
3376 		if (err == 0) {
3377 #ifdef _MULTI_DATAMODEL
3378 			switch (ddi_model_convert_from(mode & FMODELS)) {
3379 			case DDI_MODEL_ILP32:
3380 				if (!(mode & FKIOCTL)) {
3381 					vdr32->vdr_flags = vdr->vdr_flags;
3382 					vdr32->vdr_offset = vdr->vdr_offset;
3383 					vdr32->vdr_side = vdr->vdr_side;
3384 					vdr32->vdr_bytesread =
3385 					    vdr->vdr_bytesread;
3386 					bcopy(vdr->vdr_side_name,
3387 					    vdr32->vdr_side_name,
3388 					    sizeof (vdr32->vdr_side_name));
3389 
3390 					if (ddi_copyout(vdr32, data,
3391 					    sizeof (*vdr32), mode)) {
3392 						err = EFAULT;
3393 					}
3394 					break;
3395 				}
3396 				/* FALLTHROUGH */
3397 
3398 			case DDI_MODEL_NONE:
3399 				if (ddi_copyout(vdr, data, sizeof (*vdr), mode))
3400 					err = EFAULT;
3401 				break;
3402 			}
3403 #else	/* ! _MULTI_DATAMODEL */
3404 			if (ddi_copyout(vdr, data, sizeof (*vdr), mode))
3405 				err = EFAULT;
3406 #endif	/* _MULTI_DATAMODEL */
3407 			if (vdr->vdr_flags &  DKV_DMR_ERROR)
3408 				err = EIO;
3409 		}
3410 
3411 #ifdef _MULTI_DATAMODEL
3412 		kmem_free(vdr32, sizeof (*vdr32));
3413 #endif	/* _MULTI_DATAMODEL */
3414 
3415 		kmem_free(vdr, sizeof (*vdr));
3416 
3417 		return (err);
3418 	}
3419 
3420 	default:
3421 		return (ENOTTY);
3422 	}
3423 }
3424 
3425 /*
3426  * rename named service entry points and support functions
3427  */
3428 
3429 /*
3430  * rename/exchange role swap functions
3431  *
3432  * most of these are handled by generic role swap functions
3433  */
3434 
3435 /*
3436  * MDRNM_UPDATE_KIDS
3437  * rename/exchange of our child or grandchild
3438  */
3439 void
mirror_renexch_update_kids(md_rendelta_t * delta,md_rentxn_t * rtxnp)3440 mirror_renexch_update_kids(md_rendelta_t *delta, md_rentxn_t *rtxnp)
3441 {
3442 	mm_submirror_t		*sm;
3443 	int			smi;
3444 
3445 	ASSERT(rtxnp);
3446 	ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE));
3447 	ASSERT(rtxnp->recids);
3448 	ASSERT(delta);
3449 	ASSERT(delta->unp);
3450 	ASSERT(delta->old_role == MDRR_PARENT);
3451 	ASSERT(delta->new_role == MDRR_PARENT);
3452 
3453 	/*
3454 	 * since our role isn't changing (parent->parent)
3455 	 * one of our children must be changing
3456 	 * find the child being modified, and update
3457 	 * our notion of it
3458 	 */
3459 	for (smi = 0; smi < NMIRROR; smi++) {
3460 		mm_unit_t *un = (mm_unit_t *)delta->unp;
3461 
3462 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3463 			continue;
3464 		}
3465 		sm = &un->un_sm[smi];
3466 
3467 		if (md_getminor(sm->sm_dev) == rtxnp->from.mnum) {
3468 			sm->sm_dev = md_makedevice(md_major, rtxnp->to.mnum);
3469 			sm->sm_key = rtxnp->to.key;
3470 			break;
3471 		}
3472 	}
3473 
3474 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3475 }
3476 
3477 /*
3478  * exchange down (self->child)
3479  */
3480 void
mirror_exchange_self_update_from_down(md_rendelta_t * delta,md_rentxn_t * rtxnp)3481 mirror_exchange_self_update_from_down(
3482 	md_rendelta_t	*delta,
3483 	md_rentxn_t	*rtxnp
3484 )
3485 {
3486 	int			smi;
3487 	mm_submirror_t		*found;
3488 	minor_t			from_min, to_min;
3489 	sv_dev_t		sv;
3490 
3491 	ASSERT(rtxnp);
3492 	ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
3493 	ASSERT(rtxnp->recids);
3494 	ASSERT(rtxnp->rec_idx >= 0);
3495 	ASSERT(delta);
3496 	ASSERT(delta->unp);
3497 	ASSERT(delta->uip);
3498 	ASSERT(delta->old_role == MDRR_SELF);
3499 	ASSERT(delta->new_role == MDRR_CHILD);
3500 	ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
3501 
3502 	from_min = rtxnp->from.mnum;
3503 	to_min = rtxnp->to.mnum;
3504 
3505 	/*
3506 	 * self id changes in our own unit struct
3507 	 */
3508 
3509 	MD_SID(delta->unp) = to_min;
3510 
3511 	/*
3512 	 * parent identifier need not change
3513 	 */
3514 
3515 	/*
3516 	 * point the set array pointers at the "new" unit and unit in-cores
3517 	 * Note: the other half of this transfer is done in the "update_to"
3518 	 * exchange named service.
3519 	 */
3520 
3521 	MDI_VOIDUNIT(to_min) = delta->uip;
3522 	MD_VOIDUNIT(to_min) = delta->unp;
3523 
3524 	/*
3525 	 * transfer kstats
3526 	 */
3527 
3528 	delta->uip->ui_kstat = rtxnp->to.kstatp;
3529 
3530 	/*
3531 	 * the unit in-core reference to the get next link's id changes
3532 	 */
3533 
3534 	delta->uip->ui_link.ln_id = to_min;
3535 
3536 	/*
3537 	 * find the child whose identity we're assuming
3538 	 */
3539 
3540 	for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) {
3541 		mm_submirror_t		*sm;
3542 		mm_unit_t		*un = (mm_unit_t *)delta->unp;
3543 
3544 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3545 			continue;
3546 		}
3547 		sm = &un->un_sm[smi];
3548 
3549 		if (md_getminor(sm->sm_dev) == to_min) {
3550 			found = sm;
3551 		}
3552 	}
3553 	ASSERT(found);
3554 
3555 	/*
3556 	 * Update the sub-mirror's identity
3557 	 */
3558 	found->sm_dev = md_makedevice(md_major, rtxnp->from.mnum);
3559 	sv.key = found->sm_key;
3560 
3561 	ASSERT(rtxnp->from.key != MD_KEYWILD);
3562 	ASSERT(rtxnp->from.key != MD_KEYBAD);
3563 
3564 	found->sm_key = rtxnp->from.key;
3565 
3566 	/*
3567 	 * delete the key for the old sub-mirror from the name space
3568 	 */
3569 
3570 	sv.setno = MD_MIN2SET(from_min);
3571 	md_rem_names(&sv, 1);
3572 
3573 	/*
3574 	 * and store the record id (from the unit struct) into recids
3575 	 */
3576 
3577 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3578 }
3579 
3580 /*
3581  * exchange down (parent->self)
3582  */
3583 void
mirror_exchange_parent_update_to(md_rendelta_t * delta,md_rentxn_t * rtxnp)3584 mirror_exchange_parent_update_to(
3585 		md_rendelta_t	*delta,
3586 		md_rentxn_t	*rtxnp
3587 )
3588 {
3589 	int			smi;
3590 	mm_submirror_t		*found;
3591 	minor_t			from_min, to_min;
3592 	sv_dev_t		sv;
3593 
3594 	ASSERT(rtxnp);
3595 	ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
3596 	ASSERT(rtxnp->recids);
3597 	ASSERT(rtxnp->rec_idx >= 0);
3598 	ASSERT(delta);
3599 	ASSERT(delta->unp);
3600 	ASSERT(delta->uip);
3601 	ASSERT(delta->old_role == MDRR_PARENT);
3602 	ASSERT(delta->new_role == MDRR_SELF);
3603 	ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
3604 
3605 	from_min = rtxnp->from.mnum;
3606 	to_min = rtxnp->to.mnum;
3607 
3608 	/*
3609 	 * self id changes in our own unit struct
3610 	 */
3611 
3612 	MD_SID(delta->unp) = from_min;
3613 
3614 	/*
3615 	 * parent identifier need not change
3616 	 */
3617 
3618 	/*
3619 	 * point the set array pointers at the "new" unit and unit in-cores
3620 	 * Note: the other half of this transfer is done in the "update_to"
3621 	 * exchange named service.
3622 	 */
3623 
3624 	MDI_VOIDUNIT(from_min) = delta->uip;
3625 	MD_VOIDUNIT(from_min) = delta->unp;
3626 
3627 	/*
3628 	 * transfer kstats
3629 	 */
3630 
3631 	delta->uip->ui_kstat = rtxnp->from.kstatp;
3632 
3633 	/*
3634 	 * the unit in-core reference to the get next link's id changes
3635 	 */
3636 
3637 	delta->uip->ui_link.ln_id = from_min;
3638 
3639 	/*
3640 	 * find the child whose identity we're assuming
3641 	 */
3642 
3643 	for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) {
3644 		mm_submirror_t		*sm;
3645 		mm_unit_t		*un = (mm_unit_t *)delta->unp;
3646 
3647 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3648 			continue;
3649 		}
3650 		sm = &un->un_sm[smi];
3651 
3652 		if (md_getminor(sm->sm_dev) == from_min) {
3653 			found = sm;
3654 		}
3655 	}
3656 	ASSERT(found);
3657 
3658 	/*
3659 	 * Update the sub-mirror's identity
3660 	 */
3661 	found->sm_dev = md_makedevice(md_major, rtxnp->to.mnum);
3662 	sv.key = found->sm_key;
3663 
3664 	ASSERT(rtxnp->to.key != MD_KEYWILD);
3665 	ASSERT(rtxnp->to.key != MD_KEYBAD);
3666 
3667 	found->sm_key = rtxnp->to.key;
3668 
3669 	/*
3670 	 * delete the key for the old sub-mirror from the name space
3671 	 */
3672 
3673 	sv.setno = MD_MIN2SET(to_min);
3674 	md_rem_names(&sv, 1);
3675 
3676 	/*
3677 	 * and store the record id (from the unit struct) into recids
3678 	 */
3679 
3680 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3681 }
3682 
3683 /*
3684  * MDRNM_LIST_URKIDS: named svc entry point
3685  * all all delta entries appropriate for our children onto the
3686  * deltalist pointd to by dlpp
3687  */
3688 int
mirror_rename_listkids(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)3689 mirror_rename_listkids(md_rendelta_t **dlpp, md_rentxn_t *rtxnp)
3690 {
3691 	minor_t			from_min, to_min;
3692 	mm_unit_t		*from_un;
3693 	md_rendelta_t		*new, *p;
3694 	int			smi;
3695 	int			n_children;
3696 	mm_submirror_t		*sm;
3697 
3698 	ASSERT(rtxnp);
3699 	ASSERT(dlpp);
3700 	ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
3701 
3702 	from_min = rtxnp->from.mnum;
3703 	to_min = rtxnp->to.mnum;
3704 	n_children = 0;
3705 
3706 	if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) {
3707 		(void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
3708 		return (-1);
3709 	}
3710 
3711 	for (p = *dlpp; p && p->next != NULL; p = p->next) {
3712 		/* NULL */
3713 	}
3714 
3715 	for (smi = 0; smi < NMIRROR; smi++) {
3716 		minor_t	child_min;
3717 
3718 		if (!SMS_BY_INDEX_IS(from_un, smi, SMS_INUSE)) {
3719 			continue;
3720 		}
3721 
3722 		sm = &from_un->un_sm[smi];
3723 		child_min = md_getminor(sm->sm_dev);
3724 
3725 		p = new = md_build_rendelta(MDRR_CHILD,
3726 		    to_min == child_min? MDRR_SELF: MDRR_CHILD,
3727 		    sm->sm_dev, p,
3728 		    MD_UNIT(child_min), MDI_UNIT(child_min),
3729 		    &rtxnp->mde);
3730 
3731 		if (!new) {
3732 			if (mdisok(&rtxnp->mde)) {
3733 				(void) mdsyserror(&rtxnp->mde, ENOMEM);
3734 			}
3735 			return (-1);
3736 		}
3737 		++n_children;
3738 	}
3739 
3740 	return (n_children);
3741 }
3742 
3743 /*
3744  * support routine for MDRNM_CHECK
3745  */
3746 static int
mirror_may_renexch_self(mm_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)3747 mirror_may_renexch_self(
3748 	mm_unit_t	*un,
3749 	mdi_unit_t	*ui,
3750 	md_rentxn_t	*rtxnp)
3751 {
3752 	minor_t			 from_min;
3753 	minor_t			 to_min;
3754 	bool_t			 toplevel;
3755 	bool_t			 related;
3756 	int			 smi;
3757 	mm_submirror_t		*sm;
3758 
3759 	from_min = rtxnp->from.mnum;
3760 	to_min = rtxnp->to.mnum;
3761 
3762 	if (!un || !ui) {
3763 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3764 		    from_min);
3765 		return (EINVAL);
3766 	}
3767 
3768 	ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD);
3769 	if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) {
3770 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
3771 		return (EINVAL);
3772 	}
3773 
3774 	if (MD_PARENT(un) == MD_MULTI_PARENT) {
3775 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
3776 		return (EINVAL);
3777 	}
3778 
3779 	toplevel = !MD_HAS_PARENT(MD_PARENT(un));
3780 
3781 	/* we're related if trying to swap with our parent */
3782 	related = (!toplevel) && (MD_PARENT(un) == to_min);
3783 
3784 	switch (rtxnp->op) {
3785 	case MDRNOP_EXCHANGE:
3786 		/*
3787 		 * check for a swap with our child
3788 		 */
3789 		for (smi = 0; smi < NMIRROR; smi++) {
3790 
3791 			if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3792 				continue;
3793 			}
3794 
3795 			sm = &un->un_sm[smi];
3796 			if (md_getminor(sm->sm_dev) == to_min) {
3797 				related |= TRUE;
3798 			}
3799 		}
3800 		if (!related) {
3801 			(void) mdmderror(&rtxnp->mde,
3802 			    MDE_RENAME_TARGET_UNRELATED, to_min);
3803 			return (EINVAL);
3804 		}
3805 
3806 		break;
3807 
3808 	case MDRNOP_RENAME:
3809 		/*
3810 		 * if from is top-level and is open, then the kernel is using
3811 		 * the md_dev64_t.
3812 		 */
3813 
3814 		if (toplevel && md_unit_isopen(ui)) {
3815 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
3816 			    from_min);
3817 			return (EBUSY);
3818 		}
3819 		break;
3820 
3821 	default:
3822 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3823 		    from_min);
3824 		return (EINVAL);
3825 	}
3826 
3827 	return (0);	/* ok */
3828 }
3829 
3830 /*
3831  * Named service entry point: MDRNM_CHECK
3832  */
3833 intptr_t
mirror_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)3834 mirror_rename_check(
3835 	md_rendelta_t	*delta,
3836 	md_rentxn_t	*rtxnp)
3837 {
3838 	mm_submirror_t		*sm;
3839 	mm_submirror_ic_t	*smic;
3840 	md_m_shared_t		*shared;
3841 	int			ci;
3842 	int			i;
3843 	int			compcnt;
3844 	mm_unit_t		*un;
3845 	int			err = 0;
3846 
3847 	ASSERT(delta);
3848 	ASSERT(rtxnp);
3849 	ASSERT(delta->unp);
3850 	ASSERT(delta->uip);
3851 	ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
3852 
3853 	if (!delta || !rtxnp || !delta->unp || !delta->uip) {
3854 		(void) mdsyserror(&rtxnp->mde, EINVAL);
3855 		return (EINVAL);
3856 	}
3857 
3858 	un = (mm_unit_t *)delta->unp;
3859 
3860 	for (i = 0; i < NMIRROR; i++) {
3861 		sm = &un->un_sm[i];
3862 		smic = &un->un_smic[i];
3863 
3864 		if (!SMS_IS(sm, SMS_INUSE))
3865 			continue;
3866 
3867 		ASSERT(smic->sm_get_component_count);
3868 		if (!smic->sm_get_component_count) {
3869 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3870 			    md_getminor(delta->dev));
3871 			return (ENXIO);
3872 		}
3873 
3874 		compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
3875 
3876 		for (ci = 0; ci < compcnt; ci++) {
3877 
3878 			ASSERT(smic->sm_shared_by_indx);
3879 			if (!smic->sm_shared_by_indx) {
3880 				(void) mdmderror(&rtxnp->mde,
3881 				    MDE_RENAME_CONFIG_ERROR,
3882 				    md_getminor(delta->dev));
3883 				return (ENXIO);
3884 			}
3885 
3886 			shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
3887 			    (sm->sm_dev, sm, ci);
3888 
3889 			ASSERT(shared);
3890 			if (!shared) {
3891 				(void) mdmderror(&rtxnp->mde,
3892 				    MDE_RENAME_CONFIG_ERROR,
3893 				    md_getminor(delta->dev));
3894 				return (ENXIO);
3895 			}
3896 
3897 			if (shared->ms_hs_id != 0) {
3898 				(void) mdmderror(&rtxnp->mde,
3899 				    MDE_SM_FAILED_COMPS,
3900 				    md_getminor(delta->dev));
3901 				return (EIO);
3902 			}
3903 
3904 			switch (shared->ms_state) {
3905 			case CS_OKAY:
3906 				break;
3907 
3908 			case CS_RESYNC:
3909 				(void) mdmderror(&rtxnp->mde,
3910 				    MDE_RESYNC_ACTIVE,
3911 				    md_getminor(delta->dev));
3912 				return (EBUSY);
3913 
3914 			default:
3915 				(void) mdmderror(&rtxnp->mde,
3916 				    MDE_SM_FAILED_COMPS,
3917 				    md_getminor(delta->dev));
3918 				return (EINVAL);
3919 			}
3920 
3921 		}
3922 	}
3923 
3924 	/* self does additional checks */
3925 	if (delta->old_role == MDRR_SELF) {
3926 		err = mirror_may_renexch_self(un, delta->uip, rtxnp);
3927 	}
3928 
3929 	return (err);
3930 }
3931 
3932 /* end of rename/exchange */
3933