xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_raid.c (revision 391647d5ee9b25dc5307abb55f583388e08b2dd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 /*
28  * Just in case we're not in a build environment, make sure that
29  * TEXT_DOMAIN gets set to something.
30  */
31 #if !defined(TEXT_DOMAIN)
32 #define	TEXT_DOMAIN "SYS_TEST"
33 #endif
34 
35 /*
36  * RAID operations
37  */
38 
39 #include <stdlib.h>
40 #include <meta.h>
41 #include <sys/lvm/md_raid.h>
42 #include <sys/lvm/mdvar.h>
43 #include <sys/lvm/md_convert.h>
44 #include <stddef.h>
45 
46 /*
47  * FUNCTION:    meta_get_raid_names()
48  * INPUT:       sp      - the set name to get raid from
49  *              options - options from the command line
50  * OUTPUT:      nlpp    - list of all raid names
51  *              ep      - return error pointer
52  * RETURNS:     int     - -1 if error, 0 success
53  * PURPOSE:     returns a list of all raid in the metadb
54  *              for all devices in the specified set
55  */
56 int
57 meta_get_raid_names(
58 	mdsetname_t	*sp,
59 	mdnamelist_t	**nlpp,
60 	int		options,
61 	md_error_t	*ep
62 )
63 {
64 	return (meta_get_names(MD_RAID, sp, nlpp, options, ep));
65 }
66 
67 /*
68  * free raid unit
69  */
70 void
71 meta_free_raid(
72 	md_raid_t	*raidp
73 )
74 {
75 	if (raidp->cols.cols_val != NULL) {
76 		assert(raidp->cols.cols_len > 0);
77 		Free(raidp->cols.cols_val);
78 	}
79 	Free(raidp);
80 }
81 
82 /*
83  * get raid (common)
84  */
85 md_raid_t *
86 meta_get_raid_common(
87 	mdsetname_t		*sp,
88 	mdname_t		*raidnp,
89 	int			fast,
90 	md_error_t		*ep
91 )
92 {
93 	mddrivename_t		*dnp = raidnp->drivenamep;
94 	char			*miscname;
95 	mr_unit_t		*mr;
96 	md_raid_t		*raidp;
97 	uint_t			ncol;
98 	uint_t			col;
99 	md_resync_ioctl_t	ri;
100 
101 	/* must have set */
102 	assert(sp != NULL);
103 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
104 
105 	/* short circuit */
106 	if (dnp->unitp != NULL) {
107 		assert(dnp->unitp->type == MD_METARAID);
108 		return ((md_raid_t *)dnp->unitp);
109 	}
110 
111 	/* get miscname and unit */
112 	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
113 		return (NULL);
114 	if (strcmp(miscname, MD_RAID) != 0) {
115 		(void) mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
116 		    raidnp->cname);
117 		return (NULL);
118 	}
119 	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
120 		return (NULL);
121 	assert(mr->c.un_type == MD_METARAID);
122 
123 	/* allocate raid */
124 	raidp = Zalloc(sizeof (*raidp));
125 
126 	/* allocate columns */
127 	ncol = mr->un_totalcolumncnt;
128 	assert(ncol >= MD_RAID_MIN);
129 	raidp->cols.cols_len = ncol;
130 	raidp->cols.cols_val = Zalloc(raidp->cols.cols_len *
131 	    sizeof (*raidp->cols.cols_val));
132 
133 	/* get common info */
134 	raidp->common.namep = raidnp;
135 	raidp->common.type = mr->c.un_type;
136 	raidp->common.state = mr->c.un_status;
137 	raidp->common.capabilities = mr->c.un_capabilities;
138 	raidp->common.parent = mr->c.un_parent;
139 	raidp->common.size = mr->c.un_total_blocks;
140 	raidp->common.user_flags = mr->c.un_user_flags;
141 	raidp->common.revision = mr->c.un_revision;
142 
143 	/* get options */
144 	raidp->state = mr->un_state;
145 	raidp->timestamp = mr->un_timestamp;
146 	raidp->interlace = mr->un_segsize;
147 	raidp->orig_ncol = mr->un_origcolumncnt;
148 	raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
149 	raidp->pw_count = mr->un_pwcnt;
150 	assert(raidp->orig_ncol <= ncol);
151 	if ((mr->un_hsp_id != MD_HSP_NONE) &&
152 	    ((raidp->hspnamep = metahsphspname(&sp, mr->un_hsp_id,
153 	    ep)) == NULL)) {
154 		goto out;
155 	}
156 
157 	/* get columns, update unit state */
158 	for (col = 0; (col < ncol); ++col) {
159 		mr_column_t	*rcp = &mr->un_column[col];
160 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
161 
162 		/* get column name */
163 		mdrcp->colnamep = metakeyname(&sp, rcp->un_orig_key, fast, ep);
164 		if (mdrcp->colnamep == NULL)
165 			goto out;
166 
167 		/* override any start_blk */
168 #ifdef	DEBUG
169 		if (metagetstart(sp, mdrcp->colnamep, ep) !=
170 		    MD_DISKADDR_ERROR) {
171 			assert(mdrcp->colnamep->start_blk <=
172 			    rcp->un_orig_devstart);
173 		} else {
174 			mdclrerror(ep);
175 		}
176 #endif	/* DEBUG */
177 		mdrcp->colnamep->start_blk = rcp->un_orig_devstart;
178 
179 		/* if hotspared */
180 		if (HOTSPARED(mr, col)) {
181 			/* get hotspare name */
182 			mdrcp->hsnamep = metakeyname(&sp, rcp->un_hs_key,
183 			    fast, ep);
184 			if (mdrcp->hsnamep == NULL)
185 				goto out;
186 
187 			if (getenv("META_DEBUG_START_BLK") != NULL) {
188 				if (metagetstart(sp, mdrcp->hsnamep, ep) ==
189 				    MD_DISKADDR_ERROR)
190 					mdclrerror(ep);
191 
192 				if ((mdrcp->hsnamep->start_blk == 0) &&
193 				    (rcp->un_hs_pwstart != 0))
194 					md_eprintf(dgettext(TEXT_DOMAIN,
195 					    "%s: suspected bad start block,"
196 					    " seems labelled [raid]\n"),
197 					    mdrcp->hsnamep->cname);
198 
199 				if ((mdrcp->hsnamep->start_blk > 0) &&
200 				    (rcp->un_hs_pwstart == 0))
201 					md_eprintf(dgettext(TEXT_DOMAIN,
202 					    "%s: suspected bad start block, "
203 					    " seems unlabelled [raid]\n"),
204 					    mdrcp->hsnamep->cname);
205 			}
206 
207 			/* override any start_blk */
208 			mdrcp->hsnamep->start_blk = rcp->un_hs_devstart;
209 		}
210 
211 		/* get state, flags, and timestamp */
212 		mdrcp->state = rcp->un_devstate;
213 		mdrcp->flags = rcp->un_devflags;
214 		mdrcp->timestamp = rcp->un_devtimestamp;
215 	}
216 
217 	/* get resync info */
218 	(void) memset(&ri, 0, sizeof (ri));
219 	ri.ri_mnum = meta_getminor(raidnp->dev);
220 	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
221 	if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, raidnp->cname) != 0) {
222 		(void) mdstealerror(ep, &ri.mde);
223 		goto out;
224 	}
225 	raidp->resync_flags = ri.ri_flags;
226 	raidp->percent_dirty = ri.ri_percent_dirty;
227 	raidp->percent_done = ri.ri_percent_done;
228 
229 	/* cleanup, return success */
230 	Free(mr);
231 	dnp->unitp = (md_common_t *)raidp;
232 	return (raidp);
233 
234 	/* cleanup, return error */
235 out:
236 	Free(mr);
237 	meta_free_raid(raidp);
238 	return (NULL);
239 }
240 
241 /*
242  * get raid
243  */
244 md_raid_t *
245 meta_get_raid(
246 	mdsetname_t		*sp,
247 	mdname_t		*raidnp,
248 	md_error_t		*ep
249 )
250 {
251 	return (meta_get_raid_common(sp, raidnp, 0, ep));
252 }
253 
254 /*
255  * check raid for dev
256  */
257 static int
258 in_raid(
259 	mdsetname_t	*sp,
260 	mdname_t	*raidnp,
261 	mdname_t	*np,
262 	diskaddr_t	slblk,
263 	diskaddr_t	nblks,
264 	md_error_t	*ep
265 )
266 {
267 	md_raid_t	*raidp;
268 	uint_t		col;
269 
270 	/* should be in the same set */
271 	assert(sp != NULL);
272 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
273 
274 	/* get unit */
275 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
276 		return (-1);
277 
278 	/* look in columns */
279 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
280 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
281 		mdname_t	*colnp = cp->colnamep;
282 		diskaddr_t	col_sblk;
283 		int		err;
284 
285 		/* check same drive since metagetstart() can fail */
286 		if ((err = meta_check_samedrive(np, colnp, ep)) < 0)
287 			return (-1);
288 		else if (err == 0)
289 			continue;
290 
291 		/* check overlap */
292 		if ((col_sblk = metagetstart(sp, colnp, ep)) ==
293 		    MD_DISKADDR_ERROR)
294 			return (-1);
295 		if (meta_check_overlap(raidnp->cname, np, slblk, nblks,
296 		    colnp, col_sblk, -1, ep) != 0) {
297 			return (-1);
298 		}
299 	}
300 
301 	/* return success */
302 	return (0);
303 }
304 
305 /*
306  * check to see if we're in a raid
307  */
308 int
309 meta_check_inraid(
310 	mdsetname_t	*sp,
311 	mdname_t	*np,
312 	diskaddr_t	slblk,
313 	diskaddr_t	nblks,
314 	md_error_t	*ep
315 )
316 {
317 	mdnamelist_t	*raidnlp = NULL;
318 	mdnamelist_t	*p;
319 	int		rval = 0;
320 
321 	/* should have a set */
322 	assert(sp != NULL);
323 
324 	/* for each raid */
325 	if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
326 		return (-1);
327 	for (p = raidnlp; (p != NULL); p = p->next) {
328 		mdname_t	*raidnp = p->namep;
329 
330 		/* check raid */
331 		if (in_raid(sp, raidnp, np, slblk, nblks, ep) != 0) {
332 			rval = -1;
333 			break;
334 		}
335 	}
336 
337 	/* cleanup, return success */
338 	metafreenamelist(raidnlp);
339 	return (rval);
340 }
341 
342 /*
343  * check column
344  */
345 int
346 meta_check_column(
347 	mdsetname_t	*sp,
348 	mdname_t	*np,
349 	md_error_t	*ep
350 )
351 {
352 	mdchkopts_t	options = (MDCHK_ALLOW_MDDB);
353 
354 	/* check for soft partitions */
355 	if (meta_sp_issp(sp, np, ep) != 0) {
356 		/* make sure we have a disk */
357 		if (metachkcomp(np, ep) != 0)
358 			return (-1);
359 	}
360 
361 	/* check to ensure that it is not already in use */
362 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
363 		return (-1);
364 	}
365 
366 	/* make sure it is in the set */
367 	if (meta_check_inset(sp, np, ep) != 0)
368 		return (-1);
369 
370 	/* make sure its not in a metadevice */
371 	if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
372 		return (-1);
373 
374 	/* return success */
375 	return (0);
376 }
377 
378 /*
379  * print raid
380  */
381 static int
382 raid_print(
383 	md_raid_t	*raidp,
384 	char		*fname,
385 	FILE		*fp,
386 	mdprtopts_t	options,
387 	md_error_t	*ep
388 )
389 {
390 	uint_t		col;
391 	int		rval = -1;
392 
393 
394 	if (options & PRINT_LARGEDEVICES) {
395 		if ((raidp->common.revision & MD_64BIT_META_DEV) == 0) {
396 			rval = 0;
397 			goto out;
398 		}
399 	}
400 
401 	if (options & PRINT_FN) {
402 		if ((raidp->common.revision & MD_FN_META_DEV) == 0) {
403 			rval = 0;
404 			goto out;
405 		}
406 	}
407 
408 	/* print name and -r */
409 	if (fprintf(fp, "%s -r", raidp->common.namep->cname) == EOF)
410 		goto out;
411 
412 	/*
413 	 * Print columns. Always print the full path.
414 	 */
415 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
416 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
417 
418 		if (fprintf(fp, " %s", mdrcp->colnamep->rname) == EOF)
419 			goto out;
420 	}
421 
422 	if (fprintf(fp, " -k") == EOF)
423 		goto out;
424 
425 	/* print options */
426 	if (fprintf(fp, " -i %lldb", raidp->interlace) == EOF)
427 		goto out;
428 
429 	if (raidp->pw_count != PWCNT_MIN)
430 		if (fprintf(fp, " -w %d", raidp->pw_count) == EOF)
431 			goto out;
432 
433 	if (raidp->hspnamep != NULL) {
434 		if (fprintf(fp, " -h %s", raidp->hspnamep->hspname) == EOF)
435 			goto out;
436 	}
437 	if (raidp->orig_ncol != raidp->cols.cols_len) {
438 		assert(raidp->orig_ncol < raidp->cols.cols_len);
439 		if (fprintf(fp, " -o %u", raidp->orig_ncol) == EOF)
440 			goto out;
441 	}
442 
443 	/* terminate last line */
444 	if (fprintf(fp, "\n") == EOF)
445 		goto out;
446 
447 	/* success */
448 	rval = 0;
449 
450 	/* cleanup, return error */
451 out:
452 	if (rval != 0)
453 		(void) mdsyserror(ep, errno, fname);
454 	return (rval);
455 }
456 
457 static int
458 find_resyncing_column(
459 	md_raid_t *raidp
460 )
461 {
462 	int		col;
463 
464 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
465 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
466 		if (cp->state & RCS_RESYNC)
467 			return (col);
468 	}
469 
470 	/* No resyncing columns */
471 	return (-1);
472 }
473 
474 /*
475  * convert raid state to name
476  */
477 char *
478 raid_state_to_name(
479 	md_raid_t	*raidp,
480 	md_timeval32_t	*tvp,
481 	uint_t		tstate /* Errored tstate flags */
482 )
483 {
484 
485 	/* grab time */
486 	if (tvp != NULL)
487 		*tvp = raidp->timestamp;
488 
489 	/*
490 	 * If the device has a transient error state (due to it being DR'ed or
491 	 * failed) and there has been no I/O to it (the actual device is still
492 	 * marked as 'Okay') then we cannot know what the state is or what
493 	 * action to take on it. Therefore report the device as 'Unavailable'.
494 	 * A subsequent I/O to the device will cause the 'Okay' status to
495 	 * disappear if the device is actually gone and then we will print out
496 	 * the appropriate status.  The MD_INACCESSIBLE state is only set
497 	 * on the raid when we open it or probe it.  One the raid is open
498 	 * then we will just have regular error status on the device.
499 	 */
500 	if (tstate & MD_INACCESSIBLE) {
501 		return (dgettext(TEXT_DOMAIN, "Unavailable"));
502 	}
503 
504 	/* resyncing */
505 	if (find_resyncing_column(raidp) >= 0)
506 		return (dgettext(TEXT_DOMAIN, "Resyncing"));
507 
508 	/* everything else */
509 	switch (raidp->state) {
510 		case RUS_INIT :
511 			return (dgettext(TEXT_DOMAIN, "Initializing"));
512 		case RUS_OKAY :
513 			return (dgettext(TEXT_DOMAIN, "Okay"));
514 		case RUS_ERRED :
515 		/*FALLTHROUGH*/
516 		case RUS_LAST_ERRED :
517 			return (dgettext(TEXT_DOMAIN, "Needs Maintenance"));
518 		case RUS_DOI :
519 			return (dgettext(TEXT_DOMAIN, "Initialization Failed"));
520 		case RUS_REGEN :
521 			return (dgettext(TEXT_DOMAIN, "Regen"));
522 		default :
523 			return (dgettext(TEXT_DOMAIN, "invalid"));
524 	} /* switch */
525 }
526 
527 static int
528 find_erred_column(md_raid_t *raidp, rcs_state_t state)
529 {
530 	int		col;
531 
532 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
533 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
534 		if (cp->state & state)
535 			return (col);
536 	}
537 
538 	/* No erred columns */
539 	return (-1);
540 }
541 
542 /*
543  * convert raid state to repair action
544  */
545 char *
546 raid_state_to_action(md_raid_t *raidp)
547 {
548 	static char	emsg[1024];
549 	mdname_t	*raidnp = raidp->common.namep;
550 	int		err_col;
551 
552 	/* first check for full init failure */
553 	if (raidp->state & RUS_DOI) {
554 		(void) snprintf(emsg, sizeof (emsg),
555 		    "metaclear -f %s", raidnp->cname);
556 		return (emsg);
557 	}
558 
559 	/* replace errored or init errored raid column */
560 	if ((err_col = find_erred_column(raidp,
561 	    (RCS_ERRED | RCS_INIT_ERRED))) >= 0) {
562 		mdname_t	*colnp;
563 
564 		/* get column with error */
565 		assert(err_col < raidp->cols.cols_len);
566 		colnp = raidp->cols.cols_val[err_col].colnamep;
567 		(void) snprintf(emsg, sizeof (emsg),
568 		    "metareplace %s%s %s <%s>",
569 		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
570 		    raidnp->cname, colnp->cname,
571 		    dgettext(TEXT_DOMAIN, "new device"));
572 		return (emsg);
573 	}
574 
575 
576 	/* replace last errored raid column */
577 	if ((err_col = find_erred_column(raidp, RCS_LAST_ERRED)) >= 0) {
578 		mdname_t	*colnp;
579 
580 		assert(err_col < raidp->cols.cols_len);
581 		colnp = raidp->cols.cols_val[err_col].colnamep;
582 		(void) snprintf(emsg, sizeof (emsg),
583 		    "metareplace %s %s %s <%s>",
584 		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
585 		    raidnp->cname, colnp->cname,
586 		    dgettext(TEXT_DOMAIN, "new device"));
587 		return (emsg);
588 	}
589 
590 	/* OK */
591 	return (NULL);
592 }
593 
594 /*
595  * get printable raid column state
596  */
597 char *
598 raid_col_state_to_name(
599 	md_raidcol_t	*colp,
600 	md_timeval32_t	*tvp,
601 	uint_t		tstate
602 )
603 {
604 	/* grab time */
605 	if (tvp != NULL)
606 		*tvp = colp->timestamp;
607 
608 	if (tstate != 0) {
609 		return (dgettext(TEXT_DOMAIN, "Unavailable"));
610 	}
611 
612 	/* everything else */
613 	switch (colp->state) {
614 	case RCS_INIT:
615 		return (dgettext(TEXT_DOMAIN, "Initializing"));
616 
617 	case RCS_OKAY:
618 		return (dgettext(TEXT_DOMAIN, "Okay"));
619 
620 	case RCS_INIT_ERRED:
621 	/*FALLTHROUGH*/
622 	case RCS_ERRED:
623 		return (dgettext(TEXT_DOMAIN, "Maintenance"));
624 
625 	case RCS_LAST_ERRED:
626 		return (dgettext(TEXT_DOMAIN, "Last Erred"));
627 
628 	case RCS_RESYNC:
629 		return (dgettext(TEXT_DOMAIN, "Resyncing"));
630 
631 	default:
632 		return (dgettext(TEXT_DOMAIN, "Unknown"));
633 	}
634 }
635 
636 /*
637  * print raid column
638  */
639 static int
640 display_raid_device_info(
641 	mdsetname_t	*sp,
642 	md_raidcol_t	*colp,
643 	char		*fname,
644 	FILE		*fp,
645 	mdprtopts_t	options,
646 	int		print_len,
647 	uint_t		top_tstate, /* Errored tstate flags */
648 	md_error_t	*ep
649 )
650 {
651 	mdname_t	*namep = ((colp->hsnamep != NULL) ?
652 	    colp->hsnamep : colp->colnamep);
653 	char 		*devid = "";
654 	char		*cname = colp->colnamep->cname;
655 	diskaddr_t	start_blk;
656 	int		has_mddb;
657 	char		*has_mddb_str;
658 	char		*col_state;
659 	md_timeval32_t	tv;
660 	char		*hsname = ((colp->hsnamep != NULL) ?
661 	    colp->hsnamep->cname : "");
662 	int		rval = -1;
663 	mdname_t	*didnp = NULL;
664 	ddi_devid_t	dtp;
665 	uint_t		tstate = 0;
666 
667 	/* get info */
668 	if ((start_blk = metagetstart(sp, namep, ep)) == MD_DISKADDR_ERROR)
669 		return (-1);
670 	if ((has_mddb = metahasmddb(sp, namep, ep)) < 0)
671 		return (-1);
672 	if (has_mddb)
673 		has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
674 	else
675 		has_mddb_str = dgettext(TEXT_DOMAIN, "No");
676 
677 	if (metaismeta(namep)) {
678 		if (meta_get_tstate(namep->dev, &tstate, ep) != 0)
679 			return (-1);
680 		col_state = raid_col_state_to_name(colp, &tv,
681 		    tstate & MD_DEV_ERRORED);
682 	} else {
683 		/*
684 		 * if top_tstate is set, that implies that you have
685 		 * a ctd type device with an unavailable metadevice
686 		 * on top of it. If so, print a - for it's state
687 		 */
688 		if (top_tstate != 0)
689 			col_state = "-";
690 		else
691 			col_state = raid_col_state_to_name(colp, &tv, tstate);
692 	}
693 
694 	/* populate the key in the name_p structure */
695 	if ((didnp = metadevname(&sp, namep->dev, ep)) == NULL)
696 		return (-1);
697 
698 	/* determine if devid does NOT exist */
699 	if (options & PRINT_DEVID) {
700 		if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
701 		    didnp->key, ep)) == NULL)
702 			devid = dgettext(TEXT_DOMAIN, "No ");
703 		else {
704 			devid = dgettext(TEXT_DOMAIN, "Yes");
705 			free(dtp);
706 		}
707 	}
708 	/* print column */
709 	/*
710 	 * Building a format string on the fly that will
711 	 * be used in (f)printf. This allows the length
712 	 * of the ctd to vary from small to large without
713 	 * looking horrible.
714 	 */
715 	if (! (options & PRINT_TIMES)) {
716 		if (fprintf(fp,
717 		    "\t%-*.*s %8lld     %5.5s %12.12s %5.5s %s\n",
718 		    print_len, print_len, cname, start_blk, has_mddb_str,
719 		    col_state, devid, hsname) == EOF) {
720 			goto out;
721 		}
722 	} else {
723 		char	*timep = meta_print_time(&tv);
724 
725 		if (fprintf(fp,
726 		    "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n",
727 		    print_len, cname, start_blk, has_mddb_str,
728 		    col_state, devid, hsname, timep) == EOF) {
729 			goto out;
730 		}
731 	}
732 
733 	/* success */
734 	rval = 0;
735 
736 	/* cleanup, return error */
737 out:
738 	if (rval != 0)
739 		(void) mdsyserror(ep, errno, fname);
740 
741 	return (rval);
742 }
743 
744 /*
745  * print raid options
746  */
747 int
748 meta_print_raid_options(
749 	mdhspname_t	*hspnamep,
750 	char		*fname,
751 	FILE		*fp,
752 	md_error_t	*ep
753 )
754 {
755 	char		*hspname = ((hspnamep != NULL) ? hspnamep->hspname :
756 	    dgettext(TEXT_DOMAIN, "none"));
757 	int		rval = -1;
758 
759 	/* print options */
760 	if (fprintf(fp, dgettext(TEXT_DOMAIN,
761 	    "    Hot spare pool: %s\n"), hspname) == EOF) {
762 		goto out;
763 	}
764 
765 	/* success */
766 	rval = 0;
767 
768 	/* cleanup, return error */
769 out:
770 	if (rval != 0)
771 		(void) mdsyserror(ep, errno, fname);
772 	return (rval);
773 }
774 
775 /*
776  * report raid
777  */
778 static int
779 raid_report(
780 	mdsetname_t	*sp,
781 	md_raid_t	*raidp,
782 	char		*fname,
783 	FILE		*fp,
784 	mdprtopts_t	options,
785 	md_error_t	*ep
786 )
787 {
788 	char		*p;
789 	uint_t		ncol = raidp->cols.cols_len;
790 	uint_t		orig_ncol = raidp->orig_ncol;
791 	diskaddr_t	column_size = raidp->column_size;
792 	char		*raid_state;
793 	md_timeval32_t	tv;
794 	char		*timep;
795 	uint_t		col;
796 	int		rval = -1;
797 	int		len = 0;
798 	uint_t		tstate = 0;
799 
800 	if (options & PRINT_LARGEDEVICES) {
801 		if ((raidp->common.revision & MD_64BIT_META_DEV) == 0) {
802 			rval = 0;
803 			goto out;
804 		}
805 	}
806 
807 	if (options & PRINT_FN) {
808 		if ((raidp->common.revision & MD_FN_META_DEV) == 0) {
809 			rval = 0;
810 			goto out;
811 		}
812 	}
813 
814 	/* print header */
815 	if (options & PRINT_HEADER) {
816 		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: RAID\n"),
817 		    raidp->common.namep->cname) == EOF) {
818 			goto out;
819 		}
820 
821 	}
822 
823 	/* print state */
824 	if (metaismeta(raidp->common.namep)) {
825 		if (meta_get_tstate(raidp->common.namep->dev, &tstate, ep) != 0)
826 			return (-1);
827 	}
828 	tstate &= MD_DEV_ERRORED; /* extract the errored tstate bits */
829 	raid_state = raid_state_to_name(raidp, &tv, tstate);
830 	if (options & PRINT_TIMES) {
831 		timep = meta_print_time(&tv);
832 	} else {
833 		timep = "";
834 	}
835 
836 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    State: %-12s %s\n"),
837 	    raid_state, timep) == EOF) {
838 		goto out;
839 	}
840 
841 	/*
842 	 * Display recovery action if we're marked in the Unavailable state.
843 	 */
844 	if ((tstate == 0) || (tstate & MD_INACCESSIBLE)) {
845 		/* print what to do */
846 		if (tstate & MD_INACCESSIBLE) {
847 			char sname[MD_MAX_SETNAME + 3]; /* 3 = sizeof("-s ") */
848 
849 			if (metaislocalset(sp)) {
850 				sname[0] = '\0';
851 			} else {
852 				(void) snprintf(sname, MD_MAX_SETNAME + 3,
853 				    "-s %s", sp->setname);
854 			}
855 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
856 			    "    Invoke: metastat -i %s\n"), sname) == EOF) {
857 				goto out;
858 			}
859 		} else if ((p = raid_state_to_action(raidp)) != NULL) {
860 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
861 			    "    Invoke: %s\n"), p) == EOF) {
862 				goto out;
863 			}
864 		}
865 
866 		/* resync status */
867 		if (raidp->resync_flags & MD_RI_INPROGRESS) {
868 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
869 			    "    Resync in progress: %2d.%1d%% done\n"),
870 			    raidp->percent_done/10,
871 			    raidp->percent_done % 10) == EOF) {
872 				goto out;
873 			}
874 		} else if (raidp->resync_flags & MD_GROW_INPROGRESS) {
875 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
876 			    "    Initialization in progress: %2d.%1d%% "
877 			    "done\n"),
878 			    raidp->percent_done/10,
879 			    raidp->percent_done % 10) == EOF) {
880 				goto out;
881 			}
882 		} else if (raidp->state & RUS_REGEN) {
883 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
884 			    "    Parity regeneration in progress: %2d.%1d%% "
885 			    "done\n"),
886 			    raidp->percent_done/10,
887 			    raidp->percent_done % 10) == EOF) {
888 				goto out;
889 			}
890 		}
891 	}
892 
893 	/* print hotspare pool */
894 	if (raidp->hspnamep != NULL) {
895 		if (meta_print_raid_options(raidp->hspnamep,
896 		    fname, fp, ep) != 0) {
897 			return (-1);
898 		}
899 	}
900 
901 	/* print interlace */
902 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Interlace: %lld blocks\n"),
903 	    raidp->interlace) == EOF) {
904 		goto out;
905 	}
906 
907 	/* print size */
908 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
909 	    raidp->common.size,
910 	    meta_number_to_string(raidp->common.size, DEV_BSIZE)) == EOF) {
911 		goto out;
912 	}
913 
914 	/* MD_DEBUG stuff */
915 	if (options & PRINT_DEBUG) {
916 		mdname_t	*raidnp = raidp->common.namep;
917 		mr_unit_t	*mr;
918 
919 		/* get additional info */
920 		if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
921 			return (-1);
922 		assert(mr->c.un_type == MD_METARAID);
923 
924 		/* print prewrite count and size */
925 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
926 		    "    Prewrite Count: %u slots\n"),
927 		    mr->un_pwcnt) == EOF) {
928 			Free(mr);
929 			goto out;
930 		}
931 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
932 		    "    Prewrite Slot Size: %u blocks\n"),
933 		    (mr->un_pwsize / mr->un_pwcnt)) == EOF) {
934 			Free(mr);
935 			goto out;
936 		}
937 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
938 		    "    Prewrite Total Size: %u blocks\n"),
939 		    mr->un_pwsize) == EOF) {
940 			Free(mr);
941 			goto out;
942 		}
943 		Free(mr);
944 	}
945 
946 	/* print original devices */
947 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "Original device:\n")) == EOF)
948 		goto out;
949 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
950 	    column_size * (orig_ncol - 1),
951 	    meta_number_to_string(column_size * (orig_ncol - 1), DEV_BSIZE))
952 	    == EOF) {
953 		goto out;
954 	}
955 	/*
956 	 * Building a format string on the fly that will
957 	 * be used in (f)printf. This allows the length
958 	 * of the ctd to vary from small to large without
959 	 * looking horrible.
960 	 */
961 	for (col = 0; (col < orig_ncol); ++col) {
962 		len = max(len,
963 		    strlen(raidp->cols.cols_val[col].colnamep->cname));
964 	}
965 
966 	len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
967 	len += 2;
968 
969 	if (! (options & PRINT_TIMES)) {
970 		if (fprintf(fp,
971 		    "\t%-*.*s %-12.12s %-5.5s %12.12s %-5.5s  %s\n",
972 		    len, len,
973 		    dgettext(TEXT_DOMAIN, "Device"),
974 		    dgettext(TEXT_DOMAIN, "Start Block"),
975 		    dgettext(TEXT_DOMAIN, "Dbase"),
976 		    dgettext(TEXT_DOMAIN, "State"),
977 		    dgettext(TEXT_DOMAIN, "Reloc"),
978 		    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
979 			goto out;
980 		}
981 	} else {
982 		if (fprintf(fp,
983 		    "\t%-*s  %5s  %-5s  %-11s  %-5s   %-9s  %s\n",
984 		    len,
985 		    dgettext(TEXT_DOMAIN, "Device"),
986 		    dgettext(TEXT_DOMAIN, "Start"),
987 		    dgettext(TEXT_DOMAIN, "Dbase"),
988 		    dgettext(TEXT_DOMAIN, "State"),
989 		    dgettext(TEXT_DOMAIN, "Reloc"),
990 		    dgettext(TEXT_DOMAIN, "Hot Spare"),
991 		    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
992 			goto out;
993 		}
994 	}
995 	for (col = 0; (col < orig_ncol); ++col) {
996 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
997 
998 		if (display_raid_device_info(sp, mdrcp, fname, fp, options,
999 		    len, tstate, ep) != 0) {
1000 			return (-1);
1001 		}
1002 	}
1003 
1004 	/* print concatenated devices */
1005 	if (col < ncol) {
1006 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
1007 		    "Concatenated Devices:\n")) == EOF) {
1008 			goto out;
1009 		}
1010 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
1011 		    "    Size: %lld blocks (%s)\n"),
1012 		    column_size * (ncol - orig_ncol),
1013 		    meta_number_to_string(column_size * (ncol - orig_ncol),
1014 		    DEV_BSIZE))
1015 		    == EOF) {
1016 			goto out;
1017 		}
1018 		/*
1019 		 * This allows the length
1020 		 * of the ctd to vary from small to large without
1021 		 * looking horrible.
1022 		 */
1023 		if (! (options & PRINT_TIMES)) {
1024 			if (fprintf(fp,
1025 			    "\t%-*.*s %-12.12s %-5.5s %-12.12s %5.5s %s\n",
1026 			    len, len,
1027 			    dgettext(TEXT_DOMAIN, "Device"),
1028 			    dgettext(TEXT_DOMAIN, "Start Block"),
1029 			    dgettext(TEXT_DOMAIN, "Dbase"),
1030 			    dgettext(TEXT_DOMAIN, "State"),
1031 			    dgettext(TEXT_DOMAIN, "Reloc"),
1032 			    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
1033 				goto out;
1034 			}
1035 		} else {
1036 			if (fprintf(fp,
1037 			    "\t%-*s %5s %-5s %-11s %-9s %s\t%s\n",
1038 			    len,
1039 			    dgettext(TEXT_DOMAIN, "Device"),
1040 			    dgettext(TEXT_DOMAIN, "Start"),
1041 			    dgettext(TEXT_DOMAIN, "Dbase"),
1042 			    dgettext(TEXT_DOMAIN, "State"),
1043 			    dgettext(TEXT_DOMAIN, "Reloc"),
1044 			    dgettext(TEXT_DOMAIN, "Hot Spare"),
1045 			    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
1046 				goto out;
1047 			}
1048 		}
1049 		assert(col == orig_ncol);
1050 		for (/* void */; (col < ncol); col++) {
1051 			md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
1052 
1053 			if (display_raid_device_info(sp, mdrcp, fname, fp,
1054 			    options, len, tstate, ep) != 0) {
1055 				return (-1);
1056 			}
1057 		}
1058 	}
1059 
1060 	/* add extra line */
1061 	if (fprintf(fp, "\n") == EOF)
1062 		goto out;
1063 
1064 	/* success */
1065 	rval = 0;
1066 
1067 	/* cleanup, return error */
1068 out:
1069 	if (rval != 0)
1070 		(void) mdsyserror(ep, errno, fname);
1071 	return (rval);
1072 }
1073 
1074 /*
1075  * print/report raid
1076  */
1077 int
1078 meta_raid_print(
1079 	mdsetname_t	*sp,
1080 	mdname_t	*raidnp,
1081 	mdnamelist_t	**nlpp,
1082 	char		*fname,
1083 	FILE		*fp,
1084 	mdprtopts_t	options,
1085 	md_error_t	*ep
1086 )
1087 {
1088 	md_raid_t	*raidp;
1089 	int		col;
1090 
1091 	/* should have same set */
1092 	assert(sp != NULL);
1093 	assert((raidnp == NULL) ||
1094 	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
1095 
1096 	/* print all raids */
1097 	if (raidnp == NULL) {
1098 		mdnamelist_t	*nlp = NULL;
1099 		mdnamelist_t	*p;
1100 		int		cnt;
1101 		int		rval = 0;
1102 
1103 		/* get list */
1104 		if ((cnt = meta_get_raid_names(sp, &nlp, options, ep)) < 0)
1105 			return (-1);
1106 		else if (cnt == 0)
1107 			return (0);
1108 
1109 		/* recurse */
1110 		for (p = nlp; (p != NULL); p = p->next) {
1111 			mdname_t	*np = p->namep;
1112 
1113 			if (meta_raid_print(sp, np, nlpp, fname, fp,
1114 			    options, ep) != 0)
1115 				rval = -1;
1116 		}
1117 
1118 		/* cleanup, return success */
1119 		metafreenamelist(nlp);
1120 		return (rval);
1121 	}
1122 
1123 	/* get unit structure */
1124 	if ((raidp = meta_get_raid_common(sp, raidnp,
1125 	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
1126 		return (-1);
1127 
1128 	/* check for parented */
1129 	if ((! (options & PRINT_SUBDEVS)) &&
1130 	    (MD_HAS_PARENT(raidp->common.parent))) {
1131 		return (0);
1132 	}
1133 
1134 	/* print appropriate detail */
1135 	if (options & PRINT_SHORT) {
1136 		if (raid_print(raidp, fname, fp, options, ep) != 0)
1137 			return (-1);
1138 	} else {
1139 		if (raid_report(sp, raidp, fname, fp, options, ep) != 0)
1140 			return (-1);
1141 	}
1142 
1143 	/* Recurse on components that are metadevices */
1144 	for (col = 0; col < raidp->cols.cols_len; ++col) {
1145 		md_raidcol_t	*colp = &raidp->cols.cols_val[col];
1146 		mdname_t	*namep = colp->colnamep;
1147 
1148 		if ((metaismeta(namep)) &&
1149 		    (meta_print_name(sp, namep, nlpp, fname, fp,
1150 		    (options | PRINT_HEADER | PRINT_SUBDEVS),
1151 		    NULL, ep) != 0)) {
1152 			return (-1);
1153 		}
1154 	}
1155 
1156 	return (0);
1157 }
1158 
1159 /*
1160  * adjust raid geometry
1161  */
1162 static int
1163 adjust_geom(
1164 	mdname_t	*raidnp,
1165 	mdname_t	*colnp,
1166 	mr_unit_t	*mr,
1167 	md_error_t	*ep
1168 )
1169 {
1170 	uint_t		round_cyl = 1;
1171 	mdgeom_t	*geomp;
1172 
1173 	/* get reinstructs */
1174 	if ((geomp = metagetgeom(colnp, ep)) == NULL)
1175 		return (-1);
1176 
1177 	/* adjust geometry */
1178 	if (meta_adjust_geom((md_unit_t *)mr, raidnp, geomp->write_reinstruct,
1179 	    geomp->read_reinstruct, round_cyl, ep) != 0)
1180 		return (-1);
1181 
1182 	/* return success */
1183 	return (0);
1184 }
1185 
1186 /*
1187  * add another column to the raid unit structure
1188  */
1189 static int
1190 attach_raid_col(
1191 	mdsetname_t	*sp,
1192 	mdname_t	*raidnp,
1193 	mr_unit_t	*mr,
1194 	mr_column_t	*mdc,
1195 	mdname_t	*colnp,
1196 	rcs_state_t	state,
1197 	mdnamelist_t	**keynlpp,
1198 	mdcmdopts_t	options,
1199 	md_error_t	*ep
1200 )
1201 {
1202 	diskaddr_t	column_size = mr->un_segsize * mr->un_segsincolumn;
1203 	diskaddr_t	size;
1204 	uint_t		 maxio;
1205 	mdcinfo_t	*cinfop;
1206 	md_timeval32_t	tmp_time;
1207 
1208 	/* setup state and timestamp */
1209 	mdc->un_devstate = state;
1210 	if (meta_gettimeofday(&tmp_time) == -1)
1211 		return (mdsyserror(ep, errno, NULL));
1212 
1213 	mdc->un_devtimestamp = tmp_time;
1214 	/* get start, size, and maxio */
1215 	if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
1216 	    MD_DISKADDR_ERROR)
1217 		return (-1);
1218 	if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
1219 		return (-1);
1220 	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
1221 		return (-1);
1222 	maxio = cinfop->maxtransfer;
1223 
1224 	/* adjust start and size by prewrite */
1225 	mdc->un_orig_pwstart = mdc->un_orig_devstart;
1226 	mdc->un_orig_devstart += mr->un_pwsize;
1227 
1228 	/* make sure we still have something left */
1229 	if ((mdc->un_orig_devstart >= size) ||
1230 	    ((size - mdc->un_orig_devstart) < column_size)) {
1231 		return (mdsyserror(ep, ENOSPC, colnp->cname));
1232 	}
1233 	size -= mdc->un_orig_devstart;
1234 	if (maxio < mr->un_maxio) {
1235 		return (mdcomperror(ep, MDE_MAXIO,
1236 		    meta_getminor(raidnp->dev), colnp->dev, colnp->cname));
1237 	}
1238 
1239 	if (options & MDCMD_DOIT) {
1240 		/* store name in namespace */
1241 		if (add_key_name(sp, colnp, keynlpp, ep) != 0)
1242 			return (-1);
1243 	}
1244 
1245 	/* setup column */
1246 	mdc->un_orig_dev = colnp->dev;
1247 	mdc->un_orig_key = colnp->key;
1248 	mdc->un_dev = colnp->dev;
1249 	mdc->un_pwstart = mdc->un_orig_pwstart;
1250 	mdc->un_devstart = mdc->un_orig_devstart;
1251 	mdc->un_alt_dev = NODEV64;
1252 	mdc->un_alt_pwstart = 0;
1253 	mdc->un_alt_devstart = 0;
1254 	mdc->un_hs_id = 0;
1255 
1256 	/* add the size (we use) of the device to the total */
1257 	mr->c.un_actual_tb += column_size;
1258 
1259 	/* adjust geometry */
1260 	if (adjust_geom(raidnp, colnp, mr, ep) != 0)
1261 		return (-1);
1262 
1263 	/* count column */
1264 	mr->un_totalcolumncnt++;
1265 
1266 	/* return success */
1267 	return (0);
1268 }
1269 
1270 /*
1271  * invalidate column names
1272  */
1273 static int
1274 invalidate_columns(
1275 	mdsetname_t	*sp,
1276 	mdname_t	*raidnp,
1277 	md_error_t	*ep
1278 )
1279 {
1280 	md_raid_t	*raidp;
1281 	uint_t		col;
1282 
1283 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
1284 		return (-1);
1285 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
1286 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
1287 		mdname_t	*colnp = cp->colnamep;
1288 
1289 		meta_invalidate_name(colnp);
1290 	}
1291 	return (0);
1292 }
1293 
1294 /*
1295  * attach columns to raid
1296  */
1297 int
1298 meta_raid_attach(
1299 	mdsetname_t		*sp,
1300 	mdname_t		*raidnp,
1301 	mdnamelist_t		*colnlp,
1302 	mdcmdopts_t		options,
1303 	md_error_t		*ep
1304 )
1305 {
1306 	uint_t			concat_cnt = 0;
1307 	mdnamelist_t		*p;
1308 	mr_unit_t		*old_mr;
1309 	mr_unit_t		*new_mr;
1310 	size_t			old_rusize;
1311 	size_t			new_rusize;
1312 	mdnamelist_t		*keynlp = NULL;
1313 	md_grow_params_t	mgp;
1314 	int			rval = -1;
1315 	int			create_flag = MD_CRO_32BIT;
1316 
1317 	/* should have a set */
1318 	assert(sp != NULL);
1319 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1320 
1321 	/* check type */
1322 	if (metachkmeta(raidnp, ep) != 0)
1323 		return (-1);
1324 
1325 	/* check and count new columns */
1326 	for (p = colnlp; (p != NULL); p = p->next) {
1327 		mdname_t	*np = p->namep;
1328 		mdnamelist_t	*p2;
1329 
1330 		/* check against existing devices */
1331 		if (meta_check_column(sp, np, ep) != 0)
1332 			return (-1);
1333 
1334 		/* check against ourselves */
1335 		for (p2 = p->next; (p2 != NULL); p2 = p2->next) {
1336 			if (meta_check_overlap(np->cname, np, 0, -1,
1337 			    p2->namep, 0, -1, ep) != 0) {
1338 				return (-1);
1339 			}
1340 		}
1341 
1342 		/* count */
1343 		++concat_cnt;
1344 	}
1345 
1346 	/* get old unit */
1347 	if ((old_mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
1348 		return (-1);
1349 
1350 	/*
1351 	 * calculate the size needed for the new raid unit and allocate
1352 	 * the appropriate structure. allocate new unit.
1353 	 */
1354 	old_rusize = sizeof (*old_mr) - sizeof (old_mr->un_column[0]);
1355 	old_rusize += old_mr->un_totalcolumncnt * sizeof (old_mr->un_column[0]);
1356 	new_rusize = sizeof (*new_mr) - sizeof (new_mr->un_column[0]);
1357 	new_rusize += (old_mr->un_totalcolumncnt + concat_cnt)
1358 	    * sizeof (new_mr->un_column[0]);
1359 	new_mr = Zalloc(new_rusize);
1360 	(void) memcpy(new_mr, old_mr, old_rusize);
1361 
1362 	/* We always want a do-it, this is for attach_raid_col below */
1363 	options |= MDCMD_DOIT;
1364 
1365 	/* build new unit structure */
1366 	for (p = colnlp; (p != NULL); p = p->next) {
1367 		mdname_t	*colnp = p->namep;
1368 		mr_column_t	*mdc;
1369 
1370 		/* attach column */
1371 		mdc = &new_mr->un_column[new_mr->un_totalcolumncnt];
1372 		if (attach_raid_col(sp, raidnp, new_mr, mdc, colnp,
1373 		    RCS_INIT, &keynlp, options, ep) != 0) {
1374 			goto out;
1375 		}
1376 	}
1377 	assert(new_mr->un_totalcolumncnt
1378 	    == (old_mr->un_totalcolumncnt + concat_cnt));
1379 
1380 
1381 	create_flag = meta_check_devicesize(new_mr->c.un_total_blocks);
1382 
1383 	/* grow raid */
1384 	(void) memset(&mgp, 0, sizeof (mgp));
1385 	mgp.mnum = MD_SID(new_mr);
1386 	MD_SETDRIVERNAME(&mgp, MD_RAID, sp->setno);
1387 	mgp.size = new_rusize;
1388 	mgp.mdp = (uintptr_t)new_mr;
1389 
1390 	if (create_flag == MD_CRO_32BIT) {
1391 		mgp.options = MD_CRO_32BIT;
1392 		new_mr->c.un_revision &= ~MD_64BIT_META_DEV;
1393 	} else {
1394 		mgp.options = MD_CRO_64BIT;
1395 		new_mr->c.un_revision |= MD_64BIT_META_DEV;
1396 	}
1397 	if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) {
1398 		(void) mdstealerror(ep, &mgp.mde);
1399 		goto out;
1400 	}
1401 
1402 	/* clear cache */
1403 	if (invalidate_columns(sp, raidnp, ep) != 0)
1404 		goto out;
1405 	meta_invalidate_name(raidnp);
1406 
1407 	/* let em know */
1408 	if (options & MDCMD_PRINT) {
1409 		if (concat_cnt == 1) {
1410 			(void) printf(dgettext(TEXT_DOMAIN,
1411 			    "%s: component is attached\n"),
1412 			    raidnp->cname);
1413 		} else {
1414 			(void) printf(dgettext(TEXT_DOMAIN,
1415 			    "%s: components are attached\n"),
1416 			    raidnp->cname);
1417 		}
1418 		(void) fflush(stdout);
1419 	}
1420 
1421 
1422 	/* grow any parents */
1423 	if (meta_concat_parent(sp, raidnp, ep) != 0)
1424 		goto out;
1425 	rval = 0;	/* success */
1426 
1427 	/* cleanup, return error */
1428 out:
1429 	Free(old_mr);
1430 	Free(new_mr);
1431 	if (rval != 0)
1432 		(void) del_key_names(sp, keynlp, NULL);
1433 	metafreenamelist(keynlp);
1434 	return (rval);
1435 }
1436 
1437 /*
1438  * get raid parameters
1439  */
1440 int
1441 meta_raid_get_params(
1442 	mdsetname_t	*sp,
1443 	mdname_t	*raidnp,
1444 	mr_params_t	*paramsp,
1445 	md_error_t	*ep
1446 )
1447 {
1448 	md_raid_t	*raidp;
1449 
1450 	/* should have a set */
1451 	assert(sp != NULL);
1452 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1453 
1454 	/* check name */
1455 	if (metachkmeta(raidnp, ep) != 0)
1456 		return (-1);
1457 
1458 	/* get unit */
1459 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
1460 		return (-1);
1461 
1462 	/* return parameters */
1463 	(void) memset(paramsp, 0, sizeof (*paramsp));
1464 	if (raidp->hspnamep == NULL)
1465 		paramsp->hsp_id = MD_HSP_NONE;
1466 	else
1467 		paramsp->hsp_id = raidp->hspnamep->hsp;
1468 	return (0);
1469 }
1470 
1471 /*
1472  * set raid parameters
1473  */
1474 int
1475 meta_raid_set_params(
1476 	mdsetname_t		*sp,
1477 	mdname_t		*raidnp,
1478 	mr_params_t		*paramsp,
1479 	md_error_t		*ep
1480 )
1481 {
1482 	md_raid_params_t	msp;
1483 
1484 	/* should have a set */
1485 	assert(sp != NULL);
1486 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1487 
1488 	/* check name */
1489 	if (metachkmeta(raidnp, ep) != 0)
1490 		return (-1);
1491 
1492 	/* set parameters */
1493 	(void) memset(&msp, 0, sizeof (msp));
1494 	MD_SETDRIVERNAME(&msp, MD_RAID, sp->setno);
1495 	msp.mnum = meta_getminor(raidnp->dev);
1496 	msp.params = *paramsp;
1497 	if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, raidnp->cname) != 0)
1498 		return (mdstealerror(ep, &msp.mde));
1499 
1500 	/* clear cache */
1501 	meta_invalidate_name(raidnp);
1502 
1503 	/* return success */
1504 	return (0);
1505 }
1506 
1507 /*
1508  * validate raid replace column
1509  */
1510 static int
1511 validate_new_raid(
1512 	mdsetname_t	*sp,
1513 	mdname_t	*raidnp,
1514 	mdname_t	*colnp,
1515 	replace_params_t *paramsp,
1516 	int		dup_ok,
1517 	md_error_t	*ep
1518 )
1519 {
1520 	mr_unit_t	*mr;
1521 	diskaddr_t	column_size;
1522 	diskaddr_t	label;
1523 	mdcinfo_t	*cinfop;
1524 	int		rval = -1;
1525 
1526 	/* get raid unit */
1527 	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
1528 		return (-1);
1529 	column_size = mr->un_segsize * mr->un_segsincolumn;
1530 
1531 	/* check it out */
1532 	if (meta_check_column(sp, colnp, ep) != 0) {
1533 		if ((! dup_ok) || (! mdisuseerror(ep, MDE_ALREADY)))
1534 			goto out;
1535 		mdclrerror(ep);
1536 	}
1537 	if ((paramsp->number_blks = metagetsize(colnp, ep)) ==
1538 	    MD_DISKADDR_ERROR)
1539 		goto out;
1540 	if ((label = metagetlabel(colnp, ep)) == MD_DISKADDR_ERROR)
1541 		goto out;
1542 	paramsp->has_label = ((label > 0) ? 1 : 0);
1543 	if ((paramsp->start_blk = metagetstart(sp, colnp, ep)) ==
1544 	    MD_DISKADDR_ERROR)
1545 		goto out;
1546 	if ((paramsp->number_blks - paramsp->start_blk) < column_size) {
1547 		(void) mdsyserror(ep, ENOSPC, colnp->cname);
1548 		goto out;
1549 	}
1550 	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
1551 		goto out;
1552 	if (cinfop->maxtransfer < mr->un_maxio) {
1553 		(void) mdcomperror(ep, MDE_MAXIO, meta_getminor(raidnp->dev),
1554 		    colnp->dev, colnp->cname);
1555 		goto out;
1556 	}
1557 
1558 	/* success */
1559 	rval = 0;
1560 
1561 	/* cleanup, return error */
1562 out:
1563 	Free(mr);
1564 	return (rval);
1565 }
1566 
1567 /*
1568  * replace raid column
1569  */
1570 int
1571 meta_raid_replace(
1572 	mdsetname_t		*sp,
1573 	mdname_t		*raidnp,
1574 	mdname_t		*oldnp,
1575 	mdname_t		*newnp,
1576 	mdcmdopts_t		options,
1577 	md_error_t		*ep
1578 )
1579 {
1580 	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
1581 	replace_params_t	params;
1582 	md_dev64_t		old_dev, new_dev;
1583 	diskaddr_t		new_start_blk, new_end_blk;
1584 	int			rebind;
1585 	char			*new_devidp = NULL;
1586 	md_error_t		xep = mdnullerror;
1587 	int			ret;
1588 	md_set_desc		*sd;
1589 	uint_t			tstate;
1590 
1591 	/* should have same set */
1592 	assert(sp != NULL);
1593 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1594 
1595 	/* check name */
1596 	if (metachkmeta(raidnp, ep) != 0)
1597 		return (-1);
1598 
1599 	/* save new binding incase this is a rebind where oldnp==newnp */
1600 	new_dev = newnp->dev;
1601 	new_start_blk = newnp->start_blk;
1602 	new_end_blk = newnp->end_blk;
1603 
1604 	/* invalidate, then get the raid (fill in oldnp from metadb) */
1605 	meta_invalidate_name(raidnp);
1606 	if (meta_get_raid(sp, raidnp, ep) == NULL)
1607 		return (-1);
1608 
1609 	/* can't replace a component if the raid inaccessible */
1610 	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
1611 		return (-1);
1612 	}
1613 	if (tstate & MD_INACCESSIBLE) {
1614 		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
1615 		    meta_getminor(raidnp->dev), raidnp->cname));
1616 	}
1617 
1618 	/* the old device binding is now established */
1619 	if ((old_dev = oldnp->dev) == NODEV64)
1620 		return (mdsyserror(ep, ENODEV, oldnp->cname));
1621 
1622 
1623 	/* setup raid info */
1624 	(void) memset(&params, 0, sizeof (params));
1625 	params.mnum = meta_getminor(raidnp->dev);
1626 	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
1627 	params.old_dev = old_dev;
1628 	params.cmd = force ? FORCE_REPLACE_COMP : REPLACE_COMP;
1629 
1630 	if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
1631 	    (old_dev != new_dev)) {
1632 		rebind = 1;
1633 	} else {
1634 		rebind = 0;
1635 	}
1636 	if (rebind) {
1637 		newnp->dev = new_dev;
1638 		newnp->start_blk = new_start_blk;
1639 		newnp->end_blk = new_end_blk;
1640 	}
1641 
1642 	/*
1643 	 * Save a copy of the devid associated with the new disk, the
1644 	 * reason is that the checks for the column (meta_check_column)
1645 	 * via validate_new_raid(), could cause the disk's devid to be
1646 	 * changed to that of the devid that is currently stored in the
1647 	 * replica namespace for the disk in question. This devid could
1648 	 * be stale if we are replacing the disk. The actual function
1649 	 * that overwrites the devid is dr2drivedesc().
1650 	 */
1651 
1652 	/* don't setup new_devid if no devid's or MN diskset */
1653 	if (newnp->drivenamep->devid != NULL)
1654 		new_devidp = Strdup(newnp->drivenamep->devid);
1655 
1656 	if (!metaislocalset(sp)) {
1657 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1658 			return (-1);
1659 		if (MD_MNSET_DESC(sd))
1660 			new_devidp = NULL;
1661 	}
1662 
1663 	/* check out new (sets up start_blk, has_label, number_blks) */
1664 	if (validate_new_raid(sp, raidnp, newnp, &params, rebind,
1665 	    ep) != 0) {
1666 		Free(new_devidp);
1667 		return (-1);
1668 	}
1669 
1670 	/*
1671 	 * Copy back the saved devid.
1672 	 */
1673 	Free(newnp->drivenamep->devid);
1674 	if (new_devidp) {
1675 		newnp->drivenamep->devid = Strdup(new_devidp);
1676 		Free(new_devidp);
1677 	}
1678 
1679 	/* store name in namespace, allocate new key */
1680 	if (add_key_name(sp, newnp, NULL, ep) != 0)
1681 		return (-1);
1682 
1683 	if (rebind && !metaislocalset(sp)) {
1684 		/*
1685 		 * We are 'rebind'ing a disk that is in a diskset so as well
1686 		 * as updating the diskset's namespace the local set needs
1687 		 * to be updated because it also contains a reference to the
1688 		 * disk in question.
1689 		 */
1690 		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
1691 		    newnp->cname, ep);
1692 
1693 		if (ret != METADEVADM_SUCCESS) {
1694 			(void) del_key_name(sp, newnp, &xep);
1695 			return (-1);
1696 		}
1697 	}
1698 
1699 	/* replace column */
1700 	params.new_dev = new_dev;
1701 	params.new_key = newnp->key;
1702 	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
1703 		(void) del_key_name(sp, newnp, ep);
1704 		return (mdstealerror(ep, &params.mde));
1705 	}
1706 
1707 	/* clear cache */
1708 	meta_invalidate_name(oldnp);
1709 	meta_invalidate_name(newnp);
1710 	meta_invalidate_name(raidnp);
1711 
1712 	/* let em know */
1713 	if (options & MDCMD_PRINT) {
1714 		(void) printf(dgettext(TEXT_DOMAIN,
1715 		    "%s: device %s is replaced with %s\n"),
1716 		    raidnp->cname, oldnp->cname, newnp->cname);
1717 		(void) fflush(stdout);
1718 	}
1719 
1720 	/* return success */
1721 	return (0);
1722 }
1723 
1724 /*
1725  * enable raid column
1726  */
1727 int
1728 meta_raid_enable(
1729 	mdsetname_t		*sp,
1730 	mdname_t		*raidnp,
1731 	mdname_t		*colnp,
1732 	mdcmdopts_t		options,
1733 	md_error_t		*ep
1734 )
1735 {
1736 	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
1737 	replace_params_t	params;
1738 	md_dev64_t		fs_dev, del_dev;
1739 	int			err = 0;
1740 	char			*devnm;
1741 	int			ret;
1742 	uint_t			tstate;
1743 
1744 	/* should have same set */
1745 	assert(sp != NULL);
1746 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1747 
1748 	/* check name */
1749 	if (metachkmeta(raidnp, ep) != 0)
1750 		return (-1);
1751 
1752 	/* get the file_system dev binding */
1753 	if (meta_getdev(sp, colnp, ep) != 0)
1754 		return (-1);
1755 	fs_dev = colnp->dev;
1756 
1757 	/* get the raid unit (fill in colnp->dev with metadb version) */
1758 	meta_invalidate_name(raidnp);
1759 	if (meta_get_raid(sp, raidnp, ep) == NULL)
1760 		return (-1);
1761 
1762 	/* enabling a component can't work if the raid inaccessible */
1763 	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
1764 		return (-1);
1765 	}
1766 	if (tstate & MD_INACCESSIBLE) {
1767 		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
1768 		    meta_getminor(raidnp->dev), raidnp->cname));
1769 	}
1770 
1771 	/* the metadb device binding is now established */
1772 	if (colnp->dev == NODEV64)
1773 		return (mdsyserror(ep, ENODEV, colnp->cname));
1774 
1775 	/*
1776 	 * check for the case where the dev_t has changed between the
1777 	 * filesystem and the metadb.  This is called a rebind, and
1778 	 * is handled by meta_raid_replace.
1779 	 */
1780 	if (fs_dev != colnp->dev) {
1781 		/*
1782 		 * Save the devt of mddb version
1783 		 */
1784 		del_dev = colnp->dev;
1785 
1786 		/* establish file system binding with invalid start/end */
1787 		colnp->dev = fs_dev;
1788 		colnp->start_blk = -1;
1789 		colnp->end_blk = -1;
1790 		err = meta_raid_replace(sp, raidnp, colnp, colnp, options, ep);
1791 
1792 		/*
1793 		 * Don't do it if meta_raid_replace returns an error
1794 		 */
1795 		if (!err && (devnm = meta_getnmentbydev(sp->setno, MD_SIDEWILD,
1796 		    del_dev, NULL, NULL, &colnp->key, ep)) != NULL) {
1797 			(void) del_key_name(sp, colnp, ep);
1798 			Free(devnm);
1799 		}
1800 		return (err);
1801 	}
1802 
1803 	/* setup raid info */
1804 	(void) memset(&params, 0, sizeof (params));
1805 	params.mnum = meta_getminor(raidnp->dev);
1806 	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
1807 	params.old_dev = params.new_dev = colnp->dev;
1808 	if (force)
1809 		params.cmd = FORCE_ENABLE_COMP;
1810 	else
1811 		params.cmd = ENABLE_COMP;
1812 
1813 	/* check it out */
1814 	if (validate_new_raid(sp, raidnp, colnp, &params, 1, ep) != 0)
1815 		return (-1);
1816 
1817 	/* enable column */
1818 	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0)
1819 		return (mdstealerror(ep, &params.mde));
1820 
1821 	/*
1822 	 * are we dealing with a non-local set? If so need to update the
1823 	 * local namespace so that the disk record has the correct devid.
1824 	 */
1825 	if (!metaislocalset(sp)) {
1826 		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, colnp->cname,
1827 		    ep);
1828 
1829 		if (ret != METADEVADM_SUCCESS) {
1830 			/*
1831 			 * Failed to update the local set. Nothing to do here
1832 			 * apart from report the error. The namespace is
1833 			 * most likely broken and some form of remedial
1834 			 * recovery is going to be required.
1835 			 */
1836 			mde_perror(ep, "");
1837 			mdclrerror(ep);
1838 		}
1839 	}
1840 
1841 	/* clear cache */
1842 	meta_invalidate_name(colnp);
1843 	meta_invalidate_name(raidnp);
1844 
1845 	/* let em know */
1846 	if (options & MDCMD_PRINT) {
1847 		(void) printf(dgettext(TEXT_DOMAIN,
1848 		    "%s: device %s is enabled\n"),
1849 		    raidnp->cname, colnp->cname);
1850 		(void) fflush(stdout);
1851 	}
1852 
1853 	/* return success */
1854 	return (0);
1855 }
1856 
1857 /*
1858  * check for dups in the raid itself
1859  */
1860 static int
1861 check_twice(
1862 	md_raid_t	*raidp,
1863 	uint_t		col,
1864 	md_error_t	*ep
1865 )
1866 {
1867 	mdname_t	*raidnp = raidp->common.namep;
1868 	mdname_t	*thisnp;
1869 	uint_t		c;
1870 
1871 	thisnp = raidp->cols.cols_val[col].colnamep;
1872 	for (c = 0; (c < col); ++c) {
1873 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[c];
1874 		mdname_t	*colnp = mdcp->colnamep;
1875 
1876 		if (meta_check_overlap(raidnp->cname, thisnp, 0, -1,
1877 		    colnp, 0, -1, ep) != 0) {
1878 			return (-1);
1879 		}
1880 	}
1881 	return (0);
1882 }
1883 
1884 /*
1885  * default raid interlace
1886  */
1887 diskaddr_t
1888 meta_default_raid_interlace(void)
1889 {
1890 	diskaddr_t	interlace;
1891 
1892 	/* default to 512k, round up if necessary */
1893 	interlace = btodb(512 * 1024);
1894 	if (interlace < lbtodb(MININTERLACE))
1895 		interlace = roundup(MININTERLACE, interlace);
1896 	return (interlace);
1897 }
1898 
1899 /*
1900  * convert interlaces
1901  */
1902 int
1903 meta_raid_check_interlace(
1904 	diskaddr_t	interlace,
1905 	char		*uname,
1906 	md_error_t	*ep
1907 )
1908 {
1909 	if ((interlace < btodb(RAID_MIN_INTERLACE)) ||
1910 	    (interlace > btodb(MAXINTERLACE))) {
1911 		return (mderror(ep, MDE_BAD_INTERLACE, uname));
1912 	}
1913 	return (0);
1914 }
1915 
1916 /*
1917  * check raid
1918  */
1919 int
1920 meta_check_raid(
1921 	mdsetname_t	*sp,
1922 	md_raid_t	*raidp,
1923 	mdcmdopts_t	options,
1924 	md_error_t	*ep
1925 )
1926 {
1927 	mdname_t	*raidnp = raidp->common.namep;
1928 	int		doit = ((options & MDCMD_DOIT) ? 1 : 0);
1929 	int		updateit = ((options & MDCMD_UPDATE) ? 1 : 0);
1930 	uint_t		ncol;
1931 	uint_t		col;
1932 	minor_t		mnum = meta_getminor(raidnp->dev);
1933 
1934 	/* check number */
1935 	if (((ncol = raidp->cols.cols_len) < MD_RAID_MIN) ||
1936 	    (raidp->orig_ncol > ncol)) {
1937 		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
1938 	}
1939 
1940 	/* compute default interlace */
1941 	if (raidp->interlace == 0) {
1942 		raidp->interlace = meta_default_raid_interlace();
1943 	}
1944 
1945 	/* check state */
1946 	switch (raidp->state) {
1947 	case RUS_INIT:
1948 	case RUS_OKAY:
1949 		break;
1950 
1951 	default:
1952 		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
1953 	}
1954 
1955 	/* check interlace */
1956 	if (meta_raid_check_interlace(raidp->interlace, raidnp->cname, ep) != 0)
1957 		return (-1);
1958 
1959 	/* check hotspare pool name */
1960 	if (doit) {
1961 		if ((raidp->hspnamep != NULL) &&
1962 		    (metachkhsp(sp, raidp->hspnamep, ep) != 0)) {
1963 			return (-1);
1964 		}
1965 	}
1966 
1967 	/* check columns */
1968 	for (col = 0; (col < ncol); ++col) {
1969 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
1970 		mdname_t	*colnp = mdcp->colnamep;
1971 		diskaddr_t	start_blk, size;
1972 
1973 		/* setup column */
1974 		if (raidp->state == RUS_INIT)
1975 			mdcp->state = RCS_INIT;
1976 		else
1977 			mdcp->state = RCS_OKAY;
1978 
1979 		/* check column */
1980 		if (!updateit) {
1981 			if (meta_check_column(sp, colnp, ep) != 0)
1982 				return (-1);
1983 			if (((start_blk = metagetstart(sp, colnp, ep)) ==
1984 			    MD_DISKADDR_ERROR) || ((size = metagetsize(colnp,
1985 			    ep)) == MD_DISKADDR_ERROR)) {
1986 				return (-1);
1987 			}
1988 			if (start_blk >= size)
1989 				return (mdsyserror(ep, ENOSPC, colnp->cname));
1990 			size -= start_blk;
1991 			size = rounddown(size, raidp->interlace);
1992 			if (size == 0)
1993 				return (mdsyserror(ep, ENOSPC, colnp->cname));
1994 		}
1995 
1996 		/* check this raid too */
1997 		if (check_twice(raidp, col, ep) != 0)
1998 			return (-1);
1999 	}
2000 
2001 	/* return success */
2002 	return (0);
2003 }
2004 
2005 /*
2006  * setup raid geometry
2007  */
2008 static int
2009 raid_geom(
2010 	md_raid_t	*raidp,
2011 	mr_unit_t	*mr,
2012 	md_error_t	*ep
2013 )
2014 {
2015 	uint_t		write_reinstruct = 0;
2016 	uint_t		read_reinstruct = 0;
2017 	uint_t		round_cyl = 1;
2018 	uint_t		col;
2019 	mdgeom_t	*geomp;
2020 
2021 	/* get worst reinstructs */
2022 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
2023 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
2024 		mdname_t	*colnp = mdcp->colnamep;
2025 
2026 		if ((geomp = metagetgeom(colnp, ep)) == NULL)
2027 			return (-1);
2028 		if (geomp->write_reinstruct > write_reinstruct)
2029 			write_reinstruct = geomp->write_reinstruct;
2030 		if (geomp->read_reinstruct > read_reinstruct)
2031 			read_reinstruct = geomp->read_reinstruct;
2032 	}
2033 
2034 	/* setup geometry from first column */
2035 	assert(raidp->cols.cols_len > 0);
2036 	if ((geomp = metagetgeom(raidp->cols.cols_val[0].colnamep,
2037 	    ep)) == NULL) {
2038 		return (-1);
2039 	}
2040 	if (meta_setup_geom((md_unit_t *)mr, raidp->common.namep, geomp,
2041 	    write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
2042 		return (-1);
2043 
2044 	/* return success */
2045 	return (0);
2046 }
2047 
2048 int
2049 meta_raid_state_cnt(mr_unit_t *mr, rcs_state_t state)
2050 {
2051 	int 	statecnt = 0;
2052 	int	col;
2053 
2054 	for (col = 0; col < mr->un_totalcolumncnt; col++)
2055 		if (mr->un_column[col].un_devstate & state)
2056 			statecnt++;
2057 	return (statecnt);
2058 }
2059 /*
2060  * validate that a raid device being created with the -k flag is a real
2061  * raid device
2062  */
2063 int
2064 meta_raid_valid(md_raid_t *raidp, mr_unit_t *mr)
2065 {
2066 	long long	buf[DEV_BSIZE / sizeof (long long)];
2067 	raid_pwhdr_t	pwhdr;
2068 	raid_pwhdr_t	*rpw = &pwhdr;
2069 	minor_t		mnum;
2070 	int		col;
2071 	int		fd;
2072 
2073 	for (col = 0; col < mr->un_totalcolumncnt; col++) {
2074 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2075 		mdname_t	*colnp = cp->colnamep;
2076 
2077 		if ((fd = open(colnp->rname, O_RDONLY)) < 0)
2078 			goto error_exit;
2079 
2080 		if (lseek64(fd,
2081 		    (mr->un_column[col].un_pwstart * DEV_BSIZE), SEEK_SET) < 0)
2082 			goto error_exit;
2083 
2084 		if (read(fd, buf, DEV_BSIZE) < 0)
2085 			goto error_exit;
2086 
2087 		/*
2088 		 * If our raid device is a 64 bit device, we can accept the
2089 		 * pw header we just read in.
2090 		 * Otherwise it's of type raid_pwhdr32_od_t and has to
2091 		 * be converted.
2092 		 */
2093 		if (mr->c.un_revision & MD_64BIT_META_DEV) {
2094 			rpw = (raid_pwhdr_t *)buf;
2095 		} else {
2096 			RAID_CONVERT_RPW((raid_pwhdr32_od_t *)buf, rpw);
2097 		}
2098 
2099 		if (rpw->rpw_column != col)
2100 			goto error_exit;
2101 
2102 		if (col == 0)
2103 			mnum = rpw->rpw_unit;
2104 
2105 		if (rpw->rpw_unit != mnum)
2106 			goto error_exit;
2107 
2108 		if (rpw->rpw_magic_ext == RAID_PWMAGIC) {
2109 			/* 4.1 prewrite header */
2110 			if ((rpw->rpw_origcolumncnt != mr->un_origcolumncnt) ||
2111 			    (rpw->rpw_totalcolumncnt !=
2112 			    mr->un_totalcolumncnt) ||
2113 			    (rpw->rpw_segsize != mr->un_segsize) ||
2114 			    (rpw->rpw_segsincolumn != mr->un_segsincolumn) ||
2115 			    (rpw->rpw_pwcnt != mr->un_pwcnt) ||
2116 			    (rpw->rpw_pwstart !=
2117 			    mr->un_column[col].un_pwstart) ||
2118 			    (rpw->rpw_devstart !=
2119 			    mr->un_column[col].un_devstart) ||
2120 			    (rpw->rpw_pwsize != mr->un_pwsize))
2121 				goto error_exit;
2122 		}
2123 		/*
2124 		 * this is an old prewrite header (4.0) the unit structure
2125 		 * will have to be trusted.
2126 		 */
2127 		(void) close(fd);
2128 	}
2129 
2130 	return (0);
2131 
2132 error_exit:
2133 	(void) close(fd);
2134 	return (-1);
2135 }
2136 
2137 /*
2138  * create raid
2139  */
2140 int
2141 meta_create_raid(
2142 	mdsetname_t	*sp,
2143 	md_raid_t	*raidp,
2144 	mdcmdopts_t	options,
2145 	md_error_t	*ep
2146 )
2147 {
2148 	mdname_t	*raidnp = raidp->common.namep;
2149 	uint_t		ncol = raidp->cols.cols_len;
2150 	uint_t		orig_ncol = raidp->orig_ncol;
2151 	size_t		rdsize;
2152 	mr_unit_t	*mr;
2153 	uint_t		col;
2154 	diskaddr_t	disk_size = 0;
2155 	uint_t		disk_maxio = 0;
2156 	uint_t		pwes;
2157 	diskaddr_t	non_pw_blks, column_size;
2158 	mdnamelist_t	*keynlp = NULL;
2159 	md_set_params_t	set_params;
2160 	int		rval = -1;
2161 	md_timeval32_t	creation_time;
2162 	int		create_flag = MD_CRO_32BIT;
2163 
2164 	/* validate raid */
2165 	if (meta_check_raid(sp, raidp, options, ep) != 0)
2166 		return (-1);
2167 
2168 	/* allocate raid unit */
2169 	rdsize = sizeof (*mr) - sizeof (mr->un_column[0]);
2170 	rdsize += ncol * sizeof (mr->un_column[0]);
2171 	mr = Zalloc(rdsize);
2172 
2173 	if (meta_gettimeofday(&creation_time) == -1)
2174 		return (mdsyserror(ep, errno, NULL));
2175 	/*
2176 	 * initialize the top level mr_unit_t structure
2177 	 * setup the unit state to indicate whether to retain
2178 	 * any data currently on the metadevice or to clear it
2179 	 */
2180 	mr->c.un_type = MD_METARAID;
2181 	MD_SID(mr) = meta_getminor(raidnp->dev);
2182 	mr->c.un_size = rdsize;
2183 	mr->un_magic = RAID_UNMAGIC;
2184 	mr->un_state = raidp->state;
2185 	mr->un_timestamp = creation_time;
2186 	mr->un_origcolumncnt = orig_ncol;
2187 	mr->un_segsize = (uint_t)raidp->interlace;
2188 	if (raidp->hspnamep != NULL) {
2189 		mr->un_hsp_id = raidp->hspnamep->hsp;
2190 	} else {
2191 		mr->un_hsp_id = MD_HSP_NONE;
2192 	}
2193 	/*
2194 	 * setup original columns, saving start_block and
2195 	 * finding smallest size and maxio
2196 	 */
2197 	for (col = 0; (col < orig_ncol); ++col) {
2198 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2199 		mdname_t	*colnp = cp->colnamep;
2200 		mr_column_t	*mdc = &mr->un_column[col];
2201 		diskaddr_t	size;
2202 		uint_t		maxio;
2203 		mdcinfo_t	*cinfop;
2204 
2205 		/* setup state */
2206 		mdc->un_devstate = cp->state;
2207 
2208 		/* setup creation time */
2209 		mdc->un_devtimestamp = creation_time;
2210 
2211 		/* get start, size, and maxio */
2212 		if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
2213 		    MD_DISKADDR_ERROR)
2214 			goto out;
2215 		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
2216 			goto out;
2217 		size -= mdc->un_orig_devstart;
2218 		if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
2219 			goto out;
2220 		maxio = cinfop->maxtransfer;
2221 
2222 		if (options & MDCMD_DOIT) {
2223 			/* store name in namespace */
2224 			if (add_key_name(sp, colnp, &keynlp, ep) != 0)
2225 				goto out;
2226 		}
2227 
2228 		/* setup column */
2229 		mdc->un_orig_key = colnp->key;
2230 		mdc->un_orig_dev = colnp->dev;
2231 		mdc->un_dev = mdc->un_orig_dev;
2232 		mdc->un_pwstart = mdc->un_orig_pwstart;
2233 		mdc->un_devstart = mdc->un_orig_devstart;
2234 		mdc->un_alt_dev = NODEV64;
2235 		mdc->un_alt_pwstart = 0;
2236 		mdc->un_alt_devstart = 0;
2237 		mdc->un_hs_id = 0;
2238 		if (mr->un_state == RUS_INIT)
2239 			mdc->un_devstate = RCS_INIT;
2240 		else
2241 			mdc->un_devstate = RCS_OKAY;
2242 
2243 		/* adjust for smallest disk */
2244 		if (disk_size == 0) {
2245 			disk_size = size;
2246 		} else if (size < disk_size) {
2247 			disk_size = size;
2248 		}
2249 		if (disk_maxio == 0) {
2250 			disk_maxio = maxio;
2251 		} else if (maxio < disk_maxio) {
2252 			disk_maxio = maxio;
2253 		}
2254 	}
2255 	assert(col == mr->un_origcolumncnt);
2256 
2257 	/*
2258 	 * before processing any of the attached column(s)
2259 	 * set up the composition of the metadevice for column
2260 	 * sizes and pre-write information
2261 	 */
2262 	mr->un_maxio = disk_maxio;	/* smallest maxio */
2263 	mr->un_iosize = min(mr->un_maxio, (mr->un_segsize + 1));
2264 	pwes = mr->un_iosize;
2265 	if (raidp->pw_count)
2266 		mr->un_pwcnt = raidp->pw_count;
2267 	else
2268 		mr->un_pwcnt = PWCNT_MIN;
2269 	if ((mr->un_pwcnt < PWCNT_MIN) || (mr->un_pwcnt > PWCNT_MAX)) {
2270 		(void) mderror(ep, MDE_RAID_BAD_PW_CNT, raidnp->cname);
2271 		goto out;
2272 	}
2273 	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
2274 
2275 	/* now calculate the number of segments per column */
2276 	non_pw_blks = disk_size - mr->un_pwsize;	/* smallest disk */
2277 	if ((mr->un_pwsize > disk_size) ||
2278 	    (non_pw_blks < (diskaddr_t)mr->un_segsize)) {
2279 		(void) mdsyserror(ep, ENOSPC, raidnp->cname);
2280 		goto out;
2281 	}
2282 	mr->un_segsincolumn = non_pw_blks / mr->un_segsize;
2283 	column_size = mr->un_segsize * mr->un_segsincolumn;
2284 
2285 	/*
2286 	 * adjust the pw_cnt, pw_size, to fit into any fragmentation
2287 	 * left over after column_size has been computed
2288 	 */
2289 	mr->un_pwsize = rounddown(((uint_t)(disk_size - column_size)), 2);
2290 	mr->un_pwcnt = mr->un_pwsize / pwes;
2291 	assert(mr->un_pwcnt >= PWCNT_MIN);
2292 	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
2293 	assert((mr->un_pwsize + column_size) <= disk_size);
2294 
2295 	/*
2296 	 * calculate the actual block count available based on the
2297 	 * segment size and the number of segments per column ...
2298 	 * ... and adjust for the number of parity segments
2299 	 */
2300 	mr->c.un_actual_tb = column_size * (mr->un_origcolumncnt - 1);
2301 
2302 	if (raid_geom(raidp, mr, ep) != 0)
2303 		goto out;
2304 
2305 	create_flag = meta_check_devicesize(mr->c.un_total_blocks);
2306 
2307 	/*
2308 	 * now calculate the pre-write offset and update the column
2309 	 * structures to include the address of the individual pre-write
2310 	 * areas
2311 	 */
2312 	for (col = 0; (col < orig_ncol); ++col) {
2313 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2314 		mdname_t	*colnp = cp->colnamep;
2315 		mr_column_t	*mdc = &mr->un_column[col];
2316 		diskaddr_t	size;
2317 
2318 		/* get size */
2319 		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
2320 			goto out;
2321 
2322 		/* adjust start and size by prewrite */
2323 		mdc->un_orig_pwstart = mdc->un_orig_devstart;
2324 		mdc->un_orig_devstart += mr->un_pwsize;
2325 		mdc->un_pwstart = mdc->un_orig_pwstart;
2326 		mdc->un_devstart = mdc->un_orig_devstart;
2327 
2328 		assert(size >= mdc->un_orig_devstart);
2329 		size -= mdc->un_orig_devstart;
2330 
2331 		/* make sure we still have something left */
2332 		assert(size >= column_size);
2333 	}
2334 
2335 	/* do concat cols */
2336 	mr->un_totalcolumncnt = mr->un_origcolumncnt;
2337 	assert(col == mr->un_origcolumncnt);
2338 	for (col = orig_ncol; (col < ncol); ++col) {
2339 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2340 		mdname_t	*colnp = cp->colnamep;
2341 		mr_column_t	*mdc = &mr->un_column[col];
2342 
2343 		/* attach column */
2344 		if (attach_raid_col(sp, raidnp, mr, mdc, colnp,
2345 		    cp->state, &keynlp, options, ep) != 0) {
2346 			goto out;
2347 		}
2348 	}
2349 	assert(mr->un_totalcolumncnt == ncol);
2350 
2351 	/* fill in the size of the raid */
2352 	if (options & MDCMD_UPDATE) {
2353 		raidp->common.size = mr->c.un_total_blocks;
2354 		raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
2355 	}
2356 
2357 	/* if we're not doing anything, return success */
2358 	if (! (options & MDCMD_DOIT)) {
2359 		rval = 0;	/* success */
2360 		goto out;
2361 	}
2362 
2363 	if ((mr->un_state & RUS_OKAY) &&
2364 	    (meta_raid_valid(raidp, mr) != 0)) {
2365 		(void) mderror(ep, MDE_RAID_INVALID, raidnp->cname);
2366 		goto out;
2367 	}
2368 
2369 	/* create raid */
2370 	(void) memset(&set_params, 0, sizeof (set_params));
2371 	/* did the user tell us to generate a large device? */
2372 	if (create_flag == MD_CRO_64BIT) {
2373 		mr->c.un_revision |= MD_64BIT_META_DEV;
2374 		set_params.options = MD_CRO_64BIT;
2375 	} else {
2376 		mr->c.un_revision &= ~MD_64BIT_META_DEV;
2377 		set_params.options = MD_CRO_32BIT;
2378 	}
2379 	set_params.mnum = MD_SID(mr);
2380 	set_params.size = mr->c.un_size;
2381 	set_params.mdp = (uintptr_t)mr;
2382 	MD_SETDRIVERNAME(&set_params, MD_RAID, MD_MIN2SET(set_params.mnum));
2383 	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
2384 	    raidnp->cname) != 0) {
2385 		(void) mdstealerror(ep, &set_params.mde);
2386 		goto out;
2387 	}
2388 	rval = 0;	/* success */
2389 
2390 	/* cleanup, return success */
2391 out:
2392 	Free(mr);
2393 	if (rval != 0) {
2394 		(void) del_key_names(sp, keynlp, NULL);
2395 	}
2396 	metafreenamelist(keynlp);
2397 	if ((rval == 0) && (options & MDCMD_DOIT)) {
2398 		if (invalidate_columns(sp, raidnp, ep) != 0)
2399 			rval = -1;
2400 		meta_invalidate_name(raidnp);
2401 	}
2402 	return (rval);
2403 }
2404 
2405 /*
2406  * initialize raid
2407  * NOTE: this functions is metainit(1m)'s command line parser!
2408  */
2409 int
2410 meta_init_raid(
2411 	mdsetname_t	**spp,
2412 	int		argc,
2413 	char		*argv[],
2414 	mdcmdopts_t	options,
2415 	md_error_t	*ep
2416 )
2417 {
2418 	char		*uname = argv[0];
2419 	mdname_t	*raidnp = NULL;
2420 	int		old_optind;
2421 	int		c;
2422 	md_raid_t	*raidp = NULL;
2423 	uint_t		ncol, col;
2424 	int		rval = -1;
2425 	md_set_desc	*sd;
2426 
2427 	/* get raid name */
2428 	assert(argc > 0);
2429 	if (argc < 1)
2430 		goto syntax;
2431 	if ((raidnp = metaname(spp, uname, META_DEVICE, ep)) == NULL)
2432 		goto out;
2433 	assert(*spp != NULL);
2434 
2435 	/*
2436 	 * Raid metadevice not allowed on multi-node diskset.
2437 	 */
2438 	if (! metaislocalset(*spp)) {
2439 		if ((sd = metaget_setdesc(*spp, ep)) == NULL)
2440 			goto out;
2441 		if (MD_MNSET_DESC(sd)) {
2442 			rval = meta_cook_syntax(ep, MDE_MNSET_NORAID, uname,
2443 			    argc, argv);
2444 			goto out;
2445 		}
2446 	}
2447 
2448 	uname = raidnp->cname;
2449 	if (metachkmeta(raidnp, ep) != 0)
2450 		goto out;
2451 
2452 	if (!(options & MDCMD_NOLOCK)) {
2453 		/* grab set lock */
2454 		if (meta_lock(*spp, TRUE, ep) != 0)
2455 			goto out;
2456 
2457 		if (meta_check_ownership(*spp, ep) != 0)
2458 			goto out;
2459 	}
2460 
2461 	/* see if it exists already */
2462 	if (metagetmiscname(raidnp, ep) != NULL) {
2463 		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
2464 		    meta_getminor(raidnp->dev), uname);
2465 		goto out;
2466 	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
2467 		goto out;
2468 	} else {
2469 		mdclrerror(ep);
2470 	}
2471 	--argc, ++argv;
2472 
2473 	/* grab -r */
2474 	if ((argc < 1) || (strcmp(argv[0], "-r") != 0))
2475 		goto syntax;
2476 	--argc, ++argv;
2477 
2478 	/* parse general options */
2479 	optind = 0;
2480 	opterr = 0;
2481 	if (getopt(argc, argv, "") != -1)
2482 		goto options;
2483 
2484 	/* allocate raid */
2485 	raidp = Zalloc(sizeof (*raidp));
2486 
2487 	/* setup common */
2488 	raidp->common.namep = raidnp;
2489 	raidp->common.type = MD_METARAID;
2490 	raidp->state = RUS_INIT;
2491 
2492 	/* allocate and parse cols */
2493 	for (ncol = 0; ((ncol < argc) && (argv[ncol][0] != '-')); ++ncol)
2494 		;
2495 	raidp->cols.cols_len = ncol;
2496 	if (ncol != 0) {
2497 		raidp->cols.cols_val =
2498 		    Zalloc(ncol * sizeof (*raidp->cols.cols_val));
2499 	}
2500 	for (col = 0; ((argc > 0) && (col < ncol)); ++col) {
2501 		md_raidcol_t	*mdc = &raidp->cols.cols_val[col];
2502 		mdname_t	*colnp;
2503 
2504 		/* parse column name */
2505 		if ((colnp = metaname(spp, argv[0], UNKNOWN, ep)) == NULL)
2506 			goto out;
2507 		/* check for soft partitions */
2508 		if (meta_sp_issp(*spp, colnp, ep) != 0) {
2509 			/* check disks */
2510 			if (metachkcomp(colnp, ep) != 0)
2511 				goto out;
2512 		}
2513 		mdc->colnamep = colnp;
2514 		--argc, ++argv;
2515 	}
2516 
2517 	/* parse raid options */
2518 	old_optind = optind = 0;
2519 	opterr = 0;
2520 	while ((c = getopt(argc, argv, "h:i:ko:w:")) != -1) {
2521 		switch (c) {
2522 		case 'h':
2523 			if ((raidp->hspnamep = metahspname(spp, optarg,
2524 			    ep)) == NULL) {
2525 				goto out;
2526 			}
2527 
2528 			/*
2529 			 * Get out if the specified hotspare pool really
2530 			 * doesn't exist.
2531 			 */
2532 			if (raidp->hspnamep->hsp == MD_HSP_NONE) {
2533 				(void) mdhsperror(ep, MDE_INVAL_HSP,
2534 				    raidp->hspnamep->hsp, optarg);
2535 				goto out;
2536 			}
2537 			break;
2538 
2539 		case 'i':
2540 			if (parse_interlace(uname, optarg, &raidp->interlace,
2541 			    ep) != 0) {
2542 				goto out;
2543 			}
2544 			if (meta_raid_check_interlace(raidp->interlace,
2545 			    uname, ep))
2546 				goto out;
2547 			break;
2548 
2549 		case 'k':
2550 			raidp->state = RUS_OKAY;
2551 			break;
2552 
2553 		case 'o':
2554 			if ((sscanf(optarg, "%u", &raidp->orig_ncol) != 1) ||
2555 			    ((int)raidp->orig_ncol < 0)) {
2556 				goto syntax;
2557 			}
2558 			if ((raidp->orig_ncol < MD_RAID_MIN) ||
2559 			    (raidp->orig_ncol > ncol)) {
2560 				rval = mderror(ep, MDE_BAD_ORIG_NCOL, uname);
2561 				goto out;
2562 			}
2563 			break;
2564 		case 'w':
2565 			if ((sscanf(optarg, "%d", &raidp->pw_count) != 1) ||
2566 			    ((int)raidp->pw_count < 0))
2567 				goto syntax;
2568 			if (((int)raidp->pw_count < PWCNT_MIN) ||
2569 			    ((int)raidp->pw_count > PWCNT_MAX)) {
2570 				rval = mderror(ep, MDE_RAID_BAD_PW_CNT, uname);
2571 				goto out;
2572 			}
2573 			break;
2574 		default:
2575 			argc += old_optind;
2576 			argv -= old_optind;
2577 			goto options;
2578 		}
2579 		old_optind = optind;
2580 	}
2581 	argc -= optind;
2582 	argv += optind;
2583 
2584 	/* we should be at the end */
2585 	if (argc != 0)
2586 		goto syntax;
2587 
2588 	/* default to all original columns */
2589 	if (raidp->orig_ncol == 0)
2590 		raidp->orig_ncol = ncol;
2591 
2592 	/* create raid */
2593 	if (meta_create_raid(*spp, raidp, options, ep) != 0)
2594 		goto out;
2595 	rval = 0;	/* success */
2596 
2597 	/* let em know */
2598 	if (options & MDCMD_PRINT) {
2599 		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is setup\n"),
2600 		    uname);
2601 		(void) fflush(stdout);
2602 	}
2603 	goto out;
2604 
2605 	/* syntax error */
2606 syntax:
2607 	rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
2608 	goto out;
2609 
2610 	/* options error */
2611 options:
2612 	rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
2613 	goto out;
2614 
2615 	/* cleanup, return error */
2616 out:
2617 	if (raidp != NULL)
2618 		meta_free_raid(raidp);
2619 	return (rval);
2620 }
2621 
2622 /*
2623  * reset RAIDs
2624  */
2625 int
2626 meta_raid_reset(
2627 	mdsetname_t	*sp,
2628 	mdname_t	*raidnp,
2629 	mdcmdopts_t	options,
2630 	md_error_t	*ep
2631 )
2632 {
2633 	md_raid_t	*raidp;
2634 	int		rval = -1;
2635 	int		col;
2636 
2637 	/* should have same set */
2638 	assert(sp != NULL);
2639 	assert((raidnp == NULL) ||
2640 	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
2641 
2642 	/* reset all raids */
2643 	if (raidnp == NULL) {
2644 		mdnamelist_t	*raidnlp = NULL;
2645 		mdnamelist_t	*p;
2646 
2647 		/* for each raid */
2648 		rval = 0;
2649 		if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
2650 			return (-1);
2651 		for (p = raidnlp; (p != NULL); p = p->next) {
2652 			/* reset RAID */
2653 			raidnp = p->namep;
2654 			if (meta_raid_reset(sp, raidnp, options, ep) != 0) {
2655 				rval = -1;
2656 				break;
2657 			}
2658 		}
2659 
2660 		/* cleanup, return success */
2661 		metafreenamelist(raidnlp);
2662 		return (rval);
2663 	}
2664 
2665 	/* check name */
2666 	if (metachkmeta(raidnp, ep) != 0)
2667 		return (-1);
2668 
2669 	/* get unit structure */
2670 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
2671 		return (-1);
2672 
2673 	/* make sure nobody owns us */
2674 	if (MD_HAS_PARENT(raidp->common.parent)) {
2675 		return (mdmderror(ep, MDE_IN_USE, meta_getminor(raidnp->dev),
2676 		    raidnp->cname));
2677 	}
2678 
2679 	/* clear subdevices cache */
2680 	if (invalidate_columns(sp, raidnp, ep) != 0)
2681 		return (-1);
2682 
2683 	/* clear metadevice */
2684 	if (meta_reset(sp, raidnp, options, ep) != 0)
2685 		goto out;
2686 	rval = 0;	/* success */
2687 
2688 	/* let em know */
2689 	if (options & MDCMD_PRINT) {
2690 		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is cleared\n"),
2691 		    raidnp->cname);
2692 		(void) fflush(stdout);
2693 	}
2694 
2695 	/* clear subdevices */
2696 	if (! (options & MDCMD_RECURSE))
2697 		goto out;
2698 
2699 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
2700 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2701 		mdname_t	*colnp = cp->colnamep;
2702 
2703 		/* only recurse on metadevices */
2704 		if (! metaismeta(colnp))
2705 			continue;
2706 
2707 		if (meta_reset_by_name(sp, colnp, options, ep) != 0)
2708 			rval = -1;
2709 	}
2710 
2711 	/* cleanup, return success */
2712 out:
2713 	meta_invalidate_name(raidnp);
2714 	return (rval);
2715 }
2716 
2717 /*
2718  * reports TRUE if any RAID component is in error
2719  */
2720 int
2721 meta_raid_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *raid_names)
2722 {
2723 	mdnamelist_t	*nlp;
2724 	md_error_t	  status	= mdnullerror;
2725 	md_error_t	 *ep		= &status;
2726 	int		  any_errs	= FALSE;
2727 
2728 	for (nlp = raid_names; nlp; nlp = nlp->next) {
2729 		md_raid_t	*raidp;
2730 
2731 		if ((raidp = meta_get_raid(sp, nlp->namep, ep)) == NULL) {
2732 			any_errs |= TRUE;
2733 			goto out;
2734 		}
2735 		if (raidp->state != RUS_OKAY && raidp->state != RUS_INIT) {
2736 			any_errs |= TRUE;
2737 			goto out;
2738 		}
2739 	}
2740 out:
2741 	if (!mdisok(ep))
2742 		mdclrerror(ep);
2743 
2744 	return (any_errs);
2745 }
2746 /*
2747  * regen parity on a raid
2748  */
2749 int
2750 meta_raid_regen_byname(mdsetname_t *sp, mdname_t *raidnp, diskaddr_t size,
2751 	md_error_t *ep)
2752 {
2753 	char			*miscname;
2754 	md_resync_ioctl_t	ri;
2755 
2756 	/* should have a set */
2757 	assert(sp != NULL);
2758 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
2759 
2760 	/* make sure we have a raid */
2761 	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
2762 		return (-1);
2763 	if (strcmp(miscname, MD_RAID) != 0) {
2764 		return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
2765 		    raidnp->cname));
2766 	}
2767 
2768 	/* start resync */
2769 	(void) memset(&ri, 0, sizeof (ri));
2770 	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
2771 	ri.ri_mnum = meta_getminor(raidnp->dev);
2772 	ri.ri_copysize = size;
2773 	if (metaioctl(MD_IOCSETREGEN, &ri, &ri.mde, raidnp->cname) != 0)
2774 		return (mdstealerror(ep, &ri.mde));
2775 
2776 	/* return success */
2777 	return (0);
2778 }
2779 
2780 int
2781 meta_raid_check_component(
2782 	mdsetname_t	*sp,
2783 	mdname_t	*np,
2784 	md_dev64_t	mydevs,
2785 	md_error_t	*ep
2786 )
2787 {
2788 	md_raid_t	 *raid;
2789 	mdnm_params_t	nm;
2790 	md_getdevs_params_t	mgd;
2791 	side_t	sideno;
2792 	char	*miscname;
2793 	md_dev64_t	*mydev = NULL;
2794 	mdkey_t	key;
2795 	char	*pname, *t;
2796 	char	*ctd_name;
2797 	char	*devname;
2798 	int	len;
2799 	int	i;
2800 	int	rval = -1;
2801 
2802 	(void) memset(&nm, '\0', sizeof (nm));
2803 	if ((raid = meta_get_raid_common(sp, np, 0, ep)) == NULL)
2804 		return (-1);
2805 
2806 	if ((miscname = metagetmiscname(np, ep)) == NULL)
2807 		return (-1);
2808 
2809 	sideno = getmyside(sp, ep);
2810 
2811 	/* get count of underlying devices */
2812 
2813 	(void) memset(&mgd, '\0', sizeof (mgd));
2814 	MD_SETDRIVERNAME(&mgd, miscname, sp->setno);
2815 	mgd.mnum = meta_getminor(np->dev);
2816 	mgd.cnt = 0;
2817 	mgd.devs = NULL;
2818 	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) {
2819 		(void) mdstealerror(ep, &mgd.mde);
2820 		rval = 0;
2821 		goto out;
2822 	} else if (mgd.cnt <= 0) {
2823 		assert(mgd.cnt >= 0);
2824 		rval = 0;
2825 		goto out;
2826 	}
2827 
2828 	/*
2829 	 * Now get the data from the unit structure.
2830 	 * The compnamep stuff contains the data from
2831 	 * the namespace and we need the un_dev
2832 	 * from the unit structure.
2833 	 */
2834 	mydev = Zalloc(sizeof (*mydev) * mgd.cnt);
2835 	mgd.devs = (uintptr_t)mydev;
2836 	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) {
2837 		(void) mdstealerror(ep, &mgd.mde);
2838 		rval = 0;
2839 		goto out;
2840 	} else if (mgd.cnt <= 0) {
2841 		assert(mgd.cnt >= 0);
2842 		rval = 0;
2843 		goto out;
2844 	}
2845 
2846 	for (i = 0; i < raid->orig_ncol; i++) {
2847 		md_raidcol_t	*colp = &raid->cols.cols_val[i];
2848 		mdname_t	*compnp = colp->colnamep;
2849 
2850 		if (mydevs == mydev[i]) {
2851 			/* Get the devname from the name space. */
2852 			if ((devname = meta_getnmentbydev(sp->setno, sideno,
2853 			    compnp->dev, NULL, NULL, &key, ep)) == NULL) {
2854 				goto out;
2855 			}
2856 
2857 			if (compnp->dev != meta_getminor(mydev[i])) {
2858 				/*
2859 				 * The minor numbers are different. Update
2860 				 * the namespace with the information from
2861 				 * the component.
2862 				 */
2863 
2864 				t = strrchr(devname, '/');
2865 				t++;
2866 				ctd_name = Strdup(t);
2867 
2868 				len = strlen(devname);
2869 				t = strrchr(devname, '/');
2870 				t++;
2871 				pname = Zalloc((len - strlen(t)) + 1);
2872 				(void) strncpy(pname, devname,
2873 				    (len - strlen(t)));
2874 
2875 				if (meta_update_namespace(sp->setno, sideno,
2876 				    ctd_name, mydev[i], key, pname,
2877 				    ep) != 0) {
2878 					goto out;
2879 				}
2880 			}
2881 			rval = 0;
2882 			break;
2883 		} /* End of if (mydevs == mydev[i]) */
2884 	} /* end of for loop */
2885 out:
2886 	if (pname != NULL)
2887 		Free(pname);
2888 	if (ctd_name != NULL)
2889 		Free(ctd_name);
2890 	if (devname != NULL)
2891 		Free(devname);
2892 	if (mydev != NULL)
2893 		Free(mydev);
2894 	return (rval);
2895 }
2896