xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_raid.c (revision 3a8ad3333e0bc7ad2934d6fcdb575f3499633aff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Just in case we're not in a build environment, make sure that
28  * TEXT_DOMAIN gets set to something.
29  */
30 #if !defined(TEXT_DOMAIN)
31 #define	TEXT_DOMAIN "SYS_TEST"
32 #endif
33 
34 /*
35  * RAID operations
36  */
37 
38 #include <stdlib.h>
39 #include <meta.h>
40 #include <sys/lvm/md_raid.h>
41 #include <sys/lvm/mdvar.h>
42 #include <sys/lvm/md_convert.h>
43 #include <stddef.h>
44 
45 /*
46  * FUNCTION:    meta_get_raid_names()
47  * INPUT:       sp      - the set name to get raid from
48  *              options - options from the command line
49  * OUTPUT:      nlpp    - list of all raid names
50  *              ep      - return error pointer
51  * RETURNS:     int     - -1 if error, 0 success
52  * PURPOSE:     returns a list of all raid in the metadb
53  *              for all devices in the specified set
54  */
55 int
56 meta_get_raid_names(
57 	mdsetname_t	*sp,
58 	mdnamelist_t	**nlpp,
59 	int		options,
60 	md_error_t	*ep
61 )
62 {
63 	return (meta_get_names(MD_RAID, sp, nlpp, options, ep));
64 }
65 
66 /*
67  * free raid unit
68  */
69 void
70 meta_free_raid(
71 	md_raid_t	*raidp
72 )
73 {
74 	if (raidp->cols.cols_val != NULL) {
75 		assert(raidp->cols.cols_len > 0);
76 		Free(raidp->cols.cols_val);
77 	}
78 	Free(raidp);
79 }
80 
81 /*
82  * get raid (common)
83  */
84 md_raid_t *
85 meta_get_raid_common(
86 	mdsetname_t		*sp,
87 	mdname_t		*raidnp,
88 	int			fast,
89 	md_error_t		*ep
90 )
91 {
92 	mddrivename_t		*dnp = raidnp->drivenamep;
93 	char			*miscname;
94 	mr_unit_t		*mr;
95 	md_raid_t		*raidp;
96 	uint_t			ncol;
97 	uint_t			col;
98 	md_resync_ioctl_t	ri;
99 
100 	/* must have set */
101 	assert(sp != NULL);
102 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
103 
104 	/* short circuit */
105 	if (dnp->unitp != NULL) {
106 		assert(dnp->unitp->type == MD_METARAID);
107 		return ((md_raid_t *)dnp->unitp);
108 	}
109 
110 	/* get miscname and unit */
111 	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
112 		return (NULL);
113 	if (strcmp(miscname, MD_RAID) != 0) {
114 		(void) mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
115 		    raidnp->cname);
116 		return (NULL);
117 	}
118 	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
119 		return (NULL);
120 	assert(mr->c.un_type == MD_METARAID);
121 
122 	/* allocate raid */
123 	raidp = Zalloc(sizeof (*raidp));
124 
125 	/* allocate columns */
126 	ncol = mr->un_totalcolumncnt;
127 	assert(ncol >= MD_RAID_MIN);
128 	raidp->cols.cols_len = ncol;
129 	raidp->cols.cols_val = Zalloc(raidp->cols.cols_len *
130 	    sizeof (*raidp->cols.cols_val));
131 
132 	/* get common info */
133 	raidp->common.namep = raidnp;
134 	raidp->common.type = mr->c.un_type;
135 	raidp->common.state = mr->c.un_status;
136 	raidp->common.capabilities = mr->c.un_capabilities;
137 	raidp->common.parent = mr->c.un_parent;
138 	raidp->common.size = mr->c.un_total_blocks;
139 	raidp->common.user_flags = mr->c.un_user_flags;
140 	raidp->common.revision = mr->c.un_revision;
141 
142 	/* get options */
143 	raidp->state = mr->un_state;
144 	raidp->timestamp = mr->un_timestamp;
145 	raidp->interlace = mr->un_segsize;
146 	raidp->orig_ncol = mr->un_origcolumncnt;
147 	raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
148 	raidp->pw_count = mr->un_pwcnt;
149 	assert(raidp->orig_ncol <= ncol);
150 	if ((mr->un_hsp_id != MD_HSP_NONE) &&
151 	    ((raidp->hspnamep = metahsphspname(&sp, mr->un_hsp_id,
152 	    ep)) == NULL)) {
153 		goto out;
154 	}
155 
156 	/* get columns, update unit state */
157 	for (col = 0; (col < ncol); ++col) {
158 		mr_column_t	*rcp = &mr->un_column[col];
159 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
160 
161 		/* get column name */
162 		mdrcp->colnamep = metakeyname(&sp, rcp->un_orig_key, fast, ep);
163 		if (mdrcp->colnamep == NULL)
164 			goto out;
165 
166 		/* override any start_blk */
167 #ifdef	DEBUG
168 		if (metagetstart(sp, mdrcp->colnamep, ep) !=
169 		    MD_DISKADDR_ERROR) {
170 			assert(mdrcp->colnamep->start_blk <=
171 			    rcp->un_orig_devstart);
172 		} else {
173 			mdclrerror(ep);
174 		}
175 #endif	/* DEBUG */
176 		mdrcp->colnamep->start_blk = rcp->un_orig_devstart;
177 
178 		/* if hotspared */
179 		if (HOTSPARED(mr, col)) {
180 			/* get hotspare name */
181 			mdrcp->hsnamep = metakeyname(&sp, rcp->un_hs_key,
182 			    fast, ep);
183 			if (mdrcp->hsnamep == NULL)
184 				goto out;
185 
186 			if (getenv("META_DEBUG_START_BLK") != NULL) {
187 				if (metagetstart(sp, mdrcp->hsnamep, ep) ==
188 				    MD_DISKADDR_ERROR)
189 					mdclrerror(ep);
190 
191 				if ((mdrcp->hsnamep->start_blk == 0) &&
192 				    (rcp->un_hs_pwstart != 0))
193 					md_eprintf(dgettext(TEXT_DOMAIN,
194 					    "%s: suspected bad start block,"
195 					    " seems labelled [raid]\n"),
196 					    mdrcp->hsnamep->cname);
197 
198 				if ((mdrcp->hsnamep->start_blk > 0) &&
199 				    (rcp->un_hs_pwstart == 0))
200 					md_eprintf(dgettext(TEXT_DOMAIN,
201 					    "%s: suspected bad start block, "
202 					    " seems unlabelled [raid]\n"),
203 					    mdrcp->hsnamep->cname);
204 			}
205 
206 			/* override any start_blk */
207 			mdrcp->hsnamep->start_blk = rcp->un_hs_devstart;
208 		}
209 
210 		/* get state, flags, and timestamp */
211 		mdrcp->state = rcp->un_devstate;
212 		mdrcp->flags = rcp->un_devflags;
213 		mdrcp->timestamp = rcp->un_devtimestamp;
214 	}
215 
216 	/* get resync info */
217 	(void) memset(&ri, 0, sizeof (ri));
218 	ri.ri_mnum = meta_getminor(raidnp->dev);
219 	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
220 	if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, raidnp->cname) != 0) {
221 		(void) mdstealerror(ep, &ri.mde);
222 		goto out;
223 	}
224 	raidp->resync_flags = ri.ri_flags;
225 	raidp->percent_dirty = ri.ri_percent_dirty;
226 	raidp->percent_done = ri.ri_percent_done;
227 
228 	/* cleanup, return success */
229 	Free(mr);
230 	dnp->unitp = (md_common_t *)raidp;
231 	return (raidp);
232 
233 	/* cleanup, return error */
234 out:
235 	Free(mr);
236 	meta_free_raid(raidp);
237 	return (NULL);
238 }
239 
240 /*
241  * get raid
242  */
243 md_raid_t *
244 meta_get_raid(
245 	mdsetname_t		*sp,
246 	mdname_t		*raidnp,
247 	md_error_t		*ep
248 )
249 {
250 	return (meta_get_raid_common(sp, raidnp, 0, ep));
251 }
252 
253 /*
254  * check raid for dev
255  */
256 static int
257 in_raid(
258 	mdsetname_t	*sp,
259 	mdname_t	*raidnp,
260 	mdname_t	*np,
261 	diskaddr_t	slblk,
262 	diskaddr_t	nblks,
263 	md_error_t	*ep
264 )
265 {
266 	md_raid_t	*raidp;
267 	uint_t		col;
268 
269 	/* should be in the same set */
270 	assert(sp != NULL);
271 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
272 
273 	/* get unit */
274 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
275 		return (-1);
276 
277 	/* look in columns */
278 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
279 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
280 		mdname_t	*colnp = cp->colnamep;
281 		diskaddr_t	col_sblk;
282 		int		err;
283 
284 		/* check same drive since metagetstart() can fail */
285 		if ((err = meta_check_samedrive(np, colnp, ep)) < 0)
286 			return (-1);
287 		else if (err == 0)
288 			continue;
289 
290 		/* check overlap */
291 		if ((col_sblk = metagetstart(sp, colnp, ep)) ==
292 		    MD_DISKADDR_ERROR)
293 			return (-1);
294 		if (meta_check_overlap(raidnp->cname, np, slblk, nblks,
295 		    colnp, col_sblk, -1, ep) != 0) {
296 			return (-1);
297 		}
298 	}
299 
300 	/* return success */
301 	return (0);
302 }
303 
304 /*
305  * check to see if we're in a raid
306  */
307 int
308 meta_check_inraid(
309 	mdsetname_t	*sp,
310 	mdname_t	*np,
311 	diskaddr_t	slblk,
312 	diskaddr_t	nblks,
313 	md_error_t	*ep
314 )
315 {
316 	mdnamelist_t	*raidnlp = NULL;
317 	mdnamelist_t	*p;
318 	int		rval = 0;
319 
320 	/* should have a set */
321 	assert(sp != NULL);
322 
323 	/* for each raid */
324 	if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
325 		return (-1);
326 	for (p = raidnlp; (p != NULL); p = p->next) {
327 		mdname_t	*raidnp = p->namep;
328 
329 		/* check raid */
330 		if (in_raid(sp, raidnp, np, slblk, nblks, ep) != 0) {
331 			rval = -1;
332 			break;
333 		}
334 	}
335 
336 	/* cleanup, return success */
337 	metafreenamelist(raidnlp);
338 	return (rval);
339 }
340 
341 /*
342  * check column
343  */
344 int
345 meta_check_column(
346 	mdsetname_t	*sp,
347 	mdname_t	*np,
348 	md_error_t	*ep
349 )
350 {
351 	mdchkopts_t	options = (MDCHK_ALLOW_MDDB);
352 
353 	/* check for soft partitions */
354 	if (meta_sp_issp(sp, np, ep) != 0) {
355 		/* make sure we have a disk */
356 		if (metachkcomp(np, ep) != 0)
357 			return (-1);
358 	}
359 
360 	/* check to ensure that it is not already in use */
361 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
362 		return (-1);
363 	}
364 
365 	/* make sure it is in the set */
366 	if (meta_check_inset(sp, np, ep) != 0)
367 		return (-1);
368 
369 	/* make sure its not in a metadevice */
370 	if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
371 		return (-1);
372 
373 	/* return success */
374 	return (0);
375 }
376 
377 /*
378  * print raid
379  */
380 static int
381 raid_print(
382 	md_raid_t	*raidp,
383 	char		*fname,
384 	FILE		*fp,
385 	mdprtopts_t	options,
386 	md_error_t	*ep
387 )
388 {
389 	uint_t		col;
390 	int		rval = -1;
391 
392 
393 	if (options & PRINT_LARGEDEVICES) {
394 		if ((raidp->common.revision & MD_64BIT_META_DEV) == 0) {
395 			rval = 0;
396 			goto out;
397 		}
398 	}
399 
400 	if (options & PRINT_FN) {
401 		if ((raidp->common.revision & MD_FN_META_DEV) == 0) {
402 			rval = 0;
403 			goto out;
404 		}
405 	}
406 
407 	/* print name and -r */
408 	if (fprintf(fp, "%s -r", raidp->common.namep->cname) == EOF)
409 		goto out;
410 
411 	/*
412 	 * Print columns. Always print the full path.
413 	 */
414 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
415 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
416 
417 		if (fprintf(fp, " %s", mdrcp->colnamep->rname) == EOF)
418 			goto out;
419 	}
420 
421 	if (fprintf(fp, " -k") == EOF)
422 		goto out;
423 
424 	/* print options */
425 	if (fprintf(fp, " -i %lldb", raidp->interlace) == EOF)
426 		goto out;
427 
428 	if (raidp->pw_count != PWCNT_MIN)
429 		if (fprintf(fp, " -w %d", raidp->pw_count) == EOF)
430 			goto out;
431 
432 	if (raidp->hspnamep != NULL) {
433 		if (fprintf(fp, " -h %s", raidp->hspnamep->hspname) == EOF)
434 			goto out;
435 	}
436 	if (raidp->orig_ncol != raidp->cols.cols_len) {
437 		assert(raidp->orig_ncol < raidp->cols.cols_len);
438 		if (fprintf(fp, " -o %u", raidp->orig_ncol) == EOF)
439 			goto out;
440 	}
441 
442 	/* terminate last line */
443 	if (fprintf(fp, "\n") == EOF)
444 		goto out;
445 
446 	/* success */
447 	rval = 0;
448 
449 	/* cleanup, return error */
450 out:
451 	if (rval != 0)
452 		(void) mdsyserror(ep, errno, fname);
453 	return (rval);
454 }
455 
456 static int
457 find_resyncing_column(
458 	md_raid_t *raidp
459 )
460 {
461 	int		col;
462 
463 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
464 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
465 		if (cp->state & RCS_RESYNC)
466 			return (col);
467 	}
468 
469 	/* No resyncing columns */
470 	return (-1);
471 }
472 
473 /*
474  * convert raid state to name
475  */
476 char *
477 raid_state_to_name(
478 	md_raid_t	*raidp,
479 	md_timeval32_t	*tvp,
480 	uint_t		tstate /* Errored tstate flags */
481 )
482 {
483 
484 	/* grab time */
485 	if (tvp != NULL)
486 		*tvp = raidp->timestamp;
487 
488 	/*
489 	 * If the device has a transient error state (due to it being DR'ed or
490 	 * failed) and there has been no I/O to it (the actual device is still
491 	 * marked as 'Okay') then we cannot know what the state is or what
492 	 * action to take on it. Therefore report the device as 'Unavailable'.
493 	 * A subsequent I/O to the device will cause the 'Okay' status to
494 	 * disappear if the device is actually gone and then we will print out
495 	 * the appropriate status.  The MD_INACCESSIBLE state is only set
496 	 * on the raid when we open it or probe it.  One the raid is open
497 	 * then we will just have regular error status on the device.
498 	 */
499 	if (tstate & MD_INACCESSIBLE) {
500 		return (dgettext(TEXT_DOMAIN, "Unavailable"));
501 	}
502 
503 	/* resyncing */
504 	if (find_resyncing_column(raidp) >= 0)
505 		return (dgettext(TEXT_DOMAIN, "Resyncing"));
506 
507 	/* everything else */
508 	switch (raidp->state) {
509 		case RUS_INIT :
510 			return (dgettext(TEXT_DOMAIN, "Initializing"));
511 		case RUS_OKAY :
512 			return (dgettext(TEXT_DOMAIN, "Okay"));
513 		case RUS_ERRED :
514 		/*FALLTHROUGH*/
515 		case RUS_LAST_ERRED :
516 			return (dgettext(TEXT_DOMAIN, "Needs Maintenance"));
517 		case RUS_DOI :
518 			return (dgettext(TEXT_DOMAIN, "Initialization Failed"));
519 		case RUS_REGEN :
520 			return (dgettext(TEXT_DOMAIN, "Regen"));
521 		default :
522 			return (dgettext(TEXT_DOMAIN, "invalid"));
523 	} /* switch */
524 }
525 
526 static int
527 find_erred_column(md_raid_t *raidp, rcs_state_t state)
528 {
529 	int		col;
530 
531 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
532 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
533 		if (cp->state & state)
534 			return (col);
535 	}
536 
537 	/* No erred columns */
538 	return (-1);
539 }
540 
541 /*
542  * convert raid state to repair action
543  */
544 char *
545 raid_state_to_action(md_raid_t *raidp)
546 {
547 	static char	emsg[1024];
548 	mdname_t	*raidnp = raidp->common.namep;
549 	int		err_col;
550 
551 	/* first check for full init failure */
552 	if (raidp->state & RUS_DOI) {
553 		(void) snprintf(emsg, sizeof (emsg),
554 		    "metaclear -f %s", raidnp->cname);
555 		return (emsg);
556 	}
557 
558 	/* replace errored or init errored raid column */
559 	if ((err_col = find_erred_column(raidp,
560 	    (RCS_ERRED | RCS_INIT_ERRED))) >= 0) {
561 		mdname_t	*colnp;
562 
563 		/* get column with error */
564 		assert(err_col < raidp->cols.cols_len);
565 		colnp = raidp->cols.cols_val[err_col].colnamep;
566 		(void) snprintf(emsg, sizeof (emsg),
567 		    "metareplace %s%s %s <%s>",
568 		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
569 		    raidnp->cname, colnp->cname,
570 		    dgettext(TEXT_DOMAIN, "new device"));
571 		return (emsg);
572 	}
573 
574 
575 	/* replace last errored raid column */
576 	if ((err_col = find_erred_column(raidp, RCS_LAST_ERRED)) >= 0) {
577 		mdname_t	*colnp;
578 
579 		assert(err_col < raidp->cols.cols_len);
580 		colnp = raidp->cols.cols_val[err_col].colnamep;
581 		(void) snprintf(emsg, sizeof (emsg),
582 		    "metareplace %s %s %s <%s>",
583 		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
584 		    raidnp->cname, colnp->cname,
585 		    dgettext(TEXT_DOMAIN, "new device"));
586 		return (emsg);
587 	}
588 
589 	/* OK */
590 	return (NULL);
591 }
592 
593 /*
594  * get printable raid column state
595  */
596 char *
597 raid_col_state_to_name(
598 	md_raidcol_t	*colp,
599 	md_timeval32_t	*tvp,
600 	uint_t		tstate
601 )
602 {
603 	/* grab time */
604 	if (tvp != NULL)
605 		*tvp = colp->timestamp;
606 
607 	if (tstate != 0) {
608 		return (dgettext(TEXT_DOMAIN, "Unavailable"));
609 	}
610 
611 	/* everything else */
612 	switch (colp->state) {
613 	case RCS_INIT:
614 		return (dgettext(TEXT_DOMAIN, "Initializing"));
615 
616 	case RCS_OKAY:
617 		return (dgettext(TEXT_DOMAIN, "Okay"));
618 
619 	case RCS_INIT_ERRED:
620 	/*FALLTHROUGH*/
621 	case RCS_ERRED:
622 		return (dgettext(TEXT_DOMAIN, "Maintenance"));
623 
624 	case RCS_LAST_ERRED:
625 		return (dgettext(TEXT_DOMAIN, "Last Erred"));
626 
627 	case RCS_RESYNC:
628 		return (dgettext(TEXT_DOMAIN, "Resyncing"));
629 
630 	default:
631 		return (dgettext(TEXT_DOMAIN, "Unknown"));
632 	}
633 }
634 
635 /*
636  * print raid column
637  */
638 static int
639 display_raid_device_info(
640 	mdsetname_t	*sp,
641 	md_raidcol_t	*colp,
642 	char		*fname,
643 	FILE		*fp,
644 	mdprtopts_t	options,
645 	int		print_len,
646 	uint_t		top_tstate, /* Errored tstate flags */
647 	md_error_t	*ep
648 )
649 {
650 	mdname_t	*namep = ((colp->hsnamep != NULL) ?
651 	    colp->hsnamep : colp->colnamep);
652 	char 		*devid = "";
653 	char		*cname = colp->colnamep->cname;
654 	diskaddr_t	start_blk;
655 	int		has_mddb;
656 	char		*has_mddb_str;
657 	char		*col_state;
658 	md_timeval32_t	tv;
659 	char		*hsname = ((colp->hsnamep != NULL) ?
660 	    colp->hsnamep->cname : "");
661 	int		rval = -1;
662 	mdname_t	*didnp = NULL;
663 	ddi_devid_t	dtp;
664 	uint_t		tstate = 0;
665 
666 	/* get info */
667 	if ((start_blk = metagetstart(sp, namep, ep)) == MD_DISKADDR_ERROR)
668 		return (-1);
669 	if ((has_mddb = metahasmddb(sp, namep, ep)) < 0)
670 		return (-1);
671 	if (has_mddb)
672 		has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
673 	else
674 		has_mddb_str = dgettext(TEXT_DOMAIN, "No");
675 
676 	if (metaismeta(namep)) {
677 		if (meta_get_tstate(namep->dev, &tstate, ep) != 0)
678 			return (-1);
679 		col_state = raid_col_state_to_name(colp, &tv,
680 		    tstate & MD_DEV_ERRORED);
681 	} else {
682 		/*
683 		 * if top_tstate is set, that implies that you have
684 		 * a ctd type device with an unavailable metadevice
685 		 * on top of it. If so, print a - for it's state
686 		 */
687 		if (top_tstate != 0)
688 			col_state = "-";
689 		else
690 			col_state = raid_col_state_to_name(colp, &tv, tstate);
691 	}
692 
693 	/* populate the key in the name_p structure */
694 	if ((didnp = metadevname(&sp, namep->dev, ep)) == NULL)
695 		return (-1);
696 
697 	/* determine if devid does NOT exist */
698 	if (options & PRINT_DEVID) {
699 		if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
700 		    didnp->key, ep)) == NULL)
701 			devid = dgettext(TEXT_DOMAIN, "No ");
702 		else {
703 			devid = dgettext(TEXT_DOMAIN, "Yes");
704 			free(dtp);
705 		}
706 	}
707 	/* print column */
708 	/*
709 	 * Building a format string on the fly that will
710 	 * be used in (f)printf. This allows the length
711 	 * of the ctd to vary from small to large without
712 	 * looking horrible.
713 	 */
714 	if (! (options & PRINT_TIMES)) {
715 		if (fprintf(fp,
716 		    "\t%-*.*s %8lld     %5.5s %12.12s %5.5s %s\n",
717 		    print_len, print_len, cname, start_blk, has_mddb_str,
718 		    col_state, devid, hsname) == EOF) {
719 			goto out;
720 		}
721 	} else {
722 		char	*timep = meta_print_time(&tv);
723 
724 		if (fprintf(fp,
725 		    "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n",
726 		    print_len, cname, start_blk, has_mddb_str,
727 		    col_state, devid, hsname, timep) == EOF) {
728 			goto out;
729 		}
730 	}
731 
732 	/* success */
733 	rval = 0;
734 
735 	/* cleanup, return error */
736 out:
737 	if (rval != 0)
738 		(void) mdsyserror(ep, errno, fname);
739 
740 	return (rval);
741 }
742 
743 /*
744  * print raid options
745  */
746 int
747 meta_print_raid_options(
748 	mdhspname_t	*hspnamep,
749 	char		*fname,
750 	FILE		*fp,
751 	md_error_t	*ep
752 )
753 {
754 	char		*hspname = ((hspnamep != NULL) ? hspnamep->hspname :
755 	    dgettext(TEXT_DOMAIN, "none"));
756 	int		rval = -1;
757 
758 	/* print options */
759 	if (fprintf(fp, dgettext(TEXT_DOMAIN,
760 	    "    Hot spare pool: %s\n"), hspname) == EOF) {
761 		goto out;
762 	}
763 
764 	/* success */
765 	rval = 0;
766 
767 	/* cleanup, return error */
768 out:
769 	if (rval != 0)
770 		(void) mdsyserror(ep, errno, fname);
771 	return (rval);
772 }
773 
774 /*
775  * report raid
776  */
777 static int
778 raid_report(
779 	mdsetname_t	*sp,
780 	md_raid_t	*raidp,
781 	char		*fname,
782 	FILE		*fp,
783 	mdprtopts_t	options,
784 	md_error_t	*ep
785 )
786 {
787 	char		*p;
788 	uint_t		ncol = raidp->cols.cols_len;
789 	uint_t		orig_ncol = raidp->orig_ncol;
790 	diskaddr_t	column_size = raidp->column_size;
791 	char		*raid_state;
792 	md_timeval32_t	tv;
793 	char		*timep;
794 	uint_t		col;
795 	int		rval = -1;
796 	int		len = 0;
797 	uint_t		tstate = 0;
798 
799 	if (options & PRINT_LARGEDEVICES) {
800 		if ((raidp->common.revision & MD_64BIT_META_DEV) == 0) {
801 			rval = 0;
802 			goto out;
803 		}
804 	}
805 
806 	if (options & PRINT_FN) {
807 		if ((raidp->common.revision & MD_FN_META_DEV) == 0) {
808 			rval = 0;
809 			goto out;
810 		}
811 	}
812 
813 	/* print header */
814 	if (options & PRINT_HEADER) {
815 		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: RAID\n"),
816 		    raidp->common.namep->cname) == EOF) {
817 			goto out;
818 		}
819 
820 	}
821 
822 	/* print state */
823 	if (metaismeta(raidp->common.namep)) {
824 		if (meta_get_tstate(raidp->common.namep->dev, &tstate, ep) != 0)
825 			return (-1);
826 	}
827 	tstate &= MD_DEV_ERRORED; /* extract the errored tstate bits */
828 	raid_state = raid_state_to_name(raidp, &tv, tstate);
829 	if (options & PRINT_TIMES) {
830 		timep = meta_print_time(&tv);
831 	} else {
832 		timep = "";
833 	}
834 
835 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    State: %-12s %s\n"),
836 	    raid_state, timep) == EOF) {
837 		goto out;
838 	}
839 
840 	/*
841 	 * Display recovery action if we're marked in the Unavailable state.
842 	 */
843 	if ((tstate == 0) || (tstate & MD_INACCESSIBLE)) {
844 		/* print what to do */
845 		if (tstate & MD_INACCESSIBLE) {
846 			char sname[MD_MAX_SETNAME + 3]; /* 3 = sizeof("-s ") */
847 
848 			if (metaislocalset(sp)) {
849 				sname[0] = '\0';
850 			} else {
851 				(void) snprintf(sname, MD_MAX_SETNAME + 3,
852 				    "-s %s", sp->setname);
853 			}
854 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
855 			    "    Invoke: metastat -i %s\n"), sname) == EOF) {
856 				goto out;
857 			}
858 		} else if ((p = raid_state_to_action(raidp)) != NULL) {
859 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
860 			    "    Invoke: %s\n"), p) == EOF) {
861 				goto out;
862 			}
863 		}
864 
865 		/* resync status */
866 		if (raidp->resync_flags & MD_RI_INPROGRESS) {
867 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
868 			    "    Resync in progress: %2d.%1d%% done\n"),
869 			    raidp->percent_done/10,
870 			    raidp->percent_done % 10) == EOF) {
871 				goto out;
872 			}
873 		} else if (raidp->resync_flags & MD_GROW_INPROGRESS) {
874 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
875 			    "    Initialization in progress: %2d.%1d%% "
876 			    "done\n"),
877 			    raidp->percent_done/10,
878 			    raidp->percent_done % 10) == EOF) {
879 				goto out;
880 			}
881 		} else if (raidp->state & RUS_REGEN) {
882 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
883 			    "    Parity regeneration in progress: %2d.%1d%% "
884 			    "done\n"),
885 			    raidp->percent_done/10,
886 			    raidp->percent_done % 10) == EOF) {
887 				goto out;
888 			}
889 		}
890 	}
891 
892 	/* print hotspare pool */
893 	if (raidp->hspnamep != NULL) {
894 		if (meta_print_raid_options(raidp->hspnamep,
895 		    fname, fp, ep) != 0) {
896 			return (-1);
897 		}
898 	}
899 
900 	/* print interlace */
901 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Interlace: %lld blocks\n"),
902 	    raidp->interlace) == EOF) {
903 		goto out;
904 	}
905 
906 	/* print size */
907 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
908 	    raidp->common.size,
909 	    meta_number_to_string(raidp->common.size, DEV_BSIZE)) == EOF) {
910 		goto out;
911 	}
912 
913 	/* MD_DEBUG stuff */
914 	if (options & PRINT_DEBUG) {
915 		mdname_t	*raidnp = raidp->common.namep;
916 		mr_unit_t	*mr;
917 
918 		/* get additional info */
919 		if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
920 			return (-1);
921 		assert(mr->c.un_type == MD_METARAID);
922 
923 		/* print prewrite count and size */
924 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
925 		    "    Prewrite Count: %u slots\n"),
926 		    mr->un_pwcnt) == EOF) {
927 			Free(mr);
928 			goto out;
929 		}
930 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
931 		    "    Prewrite Slot Size: %u blocks\n"),
932 		    (mr->un_pwsize / mr->un_pwcnt)) == EOF) {
933 			Free(mr);
934 			goto out;
935 		}
936 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
937 		    "    Prewrite Total Size: %u blocks\n"),
938 		    mr->un_pwsize) == EOF) {
939 			Free(mr);
940 			goto out;
941 		}
942 		Free(mr);
943 	}
944 
945 	/* print original devices */
946 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "Original device:\n")) == EOF)
947 		goto out;
948 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
949 	    column_size * (orig_ncol - 1),
950 	    meta_number_to_string(column_size * (orig_ncol - 1), DEV_BSIZE))
951 	    == EOF) {
952 		goto out;
953 	}
954 	/*
955 	 * Building a format string on the fly that will
956 	 * be used in (f)printf. This allows the length
957 	 * of the ctd to vary from small to large without
958 	 * looking horrible.
959 	 */
960 	for (col = 0; (col < orig_ncol); ++col) {
961 		len = max(len,
962 		    strlen(raidp->cols.cols_val[col].colnamep->cname));
963 	}
964 
965 	len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
966 	len += 2;
967 
968 	if (! (options & PRINT_TIMES)) {
969 		if (fprintf(fp,
970 		    "\t%-*.*s %-12.12s %-5.5s %12.12s %-5.5s  %s\n",
971 		    len, len,
972 		    dgettext(TEXT_DOMAIN, "Device"),
973 		    dgettext(TEXT_DOMAIN, "Start Block"),
974 		    dgettext(TEXT_DOMAIN, "Dbase"),
975 		    dgettext(TEXT_DOMAIN, "State"),
976 		    dgettext(TEXT_DOMAIN, "Reloc"),
977 		    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
978 			goto out;
979 		}
980 	} else {
981 		if (fprintf(fp,
982 		    "\t%-*s  %5s  %-5s  %-11s  %-5s   %-9s  %s\n",
983 		    len,
984 		    dgettext(TEXT_DOMAIN, "Device"),
985 		    dgettext(TEXT_DOMAIN, "Start"),
986 		    dgettext(TEXT_DOMAIN, "Dbase"),
987 		    dgettext(TEXT_DOMAIN, "State"),
988 		    dgettext(TEXT_DOMAIN, "Reloc"),
989 		    dgettext(TEXT_DOMAIN, "Hot Spare"),
990 		    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
991 			goto out;
992 		}
993 	}
994 	for (col = 0; (col < orig_ncol); ++col) {
995 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
996 
997 		if (display_raid_device_info(sp, mdrcp, fname, fp, options,
998 		    len, tstate, ep) != 0) {
999 			return (-1);
1000 		}
1001 	}
1002 
1003 	/* print concatenated devices */
1004 	if (col < ncol) {
1005 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
1006 		    "Concatenated Devices:\n")) == EOF) {
1007 			goto out;
1008 		}
1009 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
1010 		    "    Size: %lld blocks (%s)\n"),
1011 		    column_size * (ncol - orig_ncol),
1012 		    meta_number_to_string(column_size * (ncol - orig_ncol),
1013 		    DEV_BSIZE))
1014 		    == EOF) {
1015 			goto out;
1016 		}
1017 		/*
1018 		 * This allows the length
1019 		 * of the ctd to vary from small to large without
1020 		 * looking horrible.
1021 		 */
1022 		if (! (options & PRINT_TIMES)) {
1023 			if (fprintf(fp,
1024 			    "\t%-*.*s %-12.12s %-5.5s %-12.12s %5.5s %s\n",
1025 			    len, len,
1026 			    dgettext(TEXT_DOMAIN, "Device"),
1027 			    dgettext(TEXT_DOMAIN, "Start Block"),
1028 			    dgettext(TEXT_DOMAIN, "Dbase"),
1029 			    dgettext(TEXT_DOMAIN, "State"),
1030 			    dgettext(TEXT_DOMAIN, "Reloc"),
1031 			    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
1032 				goto out;
1033 			}
1034 		} else {
1035 			if (fprintf(fp,
1036 			    "\t%-*s %5s %-5s %-11s %-9s %s\t%s\n",
1037 			    len,
1038 			    dgettext(TEXT_DOMAIN, "Device"),
1039 			    dgettext(TEXT_DOMAIN, "Start"),
1040 			    dgettext(TEXT_DOMAIN, "Dbase"),
1041 			    dgettext(TEXT_DOMAIN, "State"),
1042 			    dgettext(TEXT_DOMAIN, "Reloc"),
1043 			    dgettext(TEXT_DOMAIN, "Hot Spare"),
1044 			    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
1045 				goto out;
1046 			}
1047 		}
1048 		assert(col == orig_ncol);
1049 		for (/* void */; (col < ncol); col++) {
1050 			md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
1051 
1052 			if (display_raid_device_info(sp, mdrcp, fname, fp,
1053 			    options, len, tstate, ep) != 0) {
1054 				return (-1);
1055 			}
1056 		}
1057 	}
1058 
1059 	/* add extra line */
1060 	if (fprintf(fp, "\n") == EOF)
1061 		goto out;
1062 
1063 	/* success */
1064 	rval = 0;
1065 
1066 	/* cleanup, return error */
1067 out:
1068 	if (rval != 0)
1069 		(void) mdsyserror(ep, errno, fname);
1070 	return (rval);
1071 }
1072 
1073 /*
1074  * print/report raid
1075  */
1076 int
1077 meta_raid_print(
1078 	mdsetname_t	*sp,
1079 	mdname_t	*raidnp,
1080 	mdnamelist_t	**nlpp,
1081 	char		*fname,
1082 	FILE		*fp,
1083 	mdprtopts_t	options,
1084 	md_error_t	*ep
1085 )
1086 {
1087 	md_raid_t	*raidp;
1088 	int		col;
1089 
1090 	/* should have same set */
1091 	assert(sp != NULL);
1092 	assert((raidnp == NULL) ||
1093 	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
1094 
1095 	/* print all raids */
1096 	if (raidnp == NULL) {
1097 		mdnamelist_t	*nlp = NULL;
1098 		mdnamelist_t	*p;
1099 		int		cnt;
1100 		int		rval = 0;
1101 
1102 		/* get list */
1103 		if ((cnt = meta_get_raid_names(sp, &nlp, options, ep)) < 0)
1104 			return (-1);
1105 		else if (cnt == 0)
1106 			return (0);
1107 
1108 		/* recurse */
1109 		for (p = nlp; (p != NULL); p = p->next) {
1110 			mdname_t	*np = p->namep;
1111 
1112 			if (meta_raid_print(sp, np, nlpp, fname, fp,
1113 			    options, ep) != 0)
1114 				rval = -1;
1115 		}
1116 
1117 		/* cleanup, return success */
1118 		metafreenamelist(nlp);
1119 		return (rval);
1120 	}
1121 
1122 	/* get unit structure */
1123 	if ((raidp = meta_get_raid_common(sp, raidnp,
1124 	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
1125 		return (-1);
1126 
1127 	/* check for parented */
1128 	if ((! (options & PRINT_SUBDEVS)) &&
1129 	    (MD_HAS_PARENT(raidp->common.parent))) {
1130 		return (0);
1131 	}
1132 
1133 	/* print appropriate detail */
1134 	if (options & PRINT_SHORT) {
1135 		if (raid_print(raidp, fname, fp, options, ep) != 0)
1136 			return (-1);
1137 	} else {
1138 		if (raid_report(sp, raidp, fname, fp, options, ep) != 0)
1139 			return (-1);
1140 	}
1141 
1142 	/* Recurse on components that are metadevices */
1143 	for (col = 0; col < raidp->cols.cols_len; ++col) {
1144 		md_raidcol_t	*colp = &raidp->cols.cols_val[col];
1145 		mdname_t	*namep = colp->colnamep;
1146 
1147 		if ((metaismeta(namep)) &&
1148 		    (meta_print_name(sp, namep, nlpp, fname, fp,
1149 		    (options | PRINT_HEADER | PRINT_SUBDEVS),
1150 		    NULL, ep) != 0)) {
1151 			return (-1);
1152 		}
1153 	}
1154 
1155 	return (0);
1156 }
1157 
1158 /*
1159  * adjust raid geometry
1160  */
1161 static int
1162 adjust_geom(
1163 	mdname_t	*raidnp,
1164 	mdname_t	*colnp,
1165 	mr_unit_t	*mr,
1166 	md_error_t	*ep
1167 )
1168 {
1169 	uint_t		round_cyl = 1;
1170 	mdgeom_t	*geomp;
1171 
1172 	/* get reinstructs */
1173 	if ((geomp = metagetgeom(colnp, ep)) == NULL)
1174 		return (-1);
1175 
1176 	/* adjust geometry */
1177 	if (meta_adjust_geom((md_unit_t *)mr, raidnp, geomp->write_reinstruct,
1178 	    geomp->read_reinstruct, round_cyl, ep) != 0)
1179 		return (-1);
1180 
1181 	/* return success */
1182 	return (0);
1183 }
1184 
1185 /*
1186  * add another column to the raid unit structure
1187  */
1188 static int
1189 attach_raid_col(
1190 	mdsetname_t	*sp,
1191 	mdname_t	*raidnp,
1192 	mr_unit_t	*mr,
1193 	mr_column_t	*mdc,
1194 	mdname_t	*colnp,
1195 	rcs_state_t	state,
1196 	mdnamelist_t	**keynlpp,
1197 	mdcmdopts_t	options,
1198 	md_error_t	*ep
1199 )
1200 {
1201 	diskaddr_t	column_size = mr->un_segsize * mr->un_segsincolumn;
1202 	diskaddr_t	size;
1203 	uint_t		 maxio;
1204 	mdcinfo_t	*cinfop;
1205 	md_timeval32_t	tmp_time;
1206 
1207 	/* setup state and timestamp */
1208 	mdc->un_devstate = state;
1209 	if (meta_gettimeofday(&tmp_time) == -1)
1210 		return (mdsyserror(ep, errno, NULL));
1211 
1212 	mdc->un_devtimestamp = tmp_time;
1213 	/* get start, size, and maxio */
1214 	if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
1215 	    MD_DISKADDR_ERROR)
1216 		return (-1);
1217 	if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
1218 		return (-1);
1219 	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
1220 		return (-1);
1221 	maxio = cinfop->maxtransfer;
1222 
1223 	/* adjust start and size by prewrite */
1224 	mdc->un_orig_pwstart = mdc->un_orig_devstart;
1225 	mdc->un_orig_devstart += mr->un_pwsize;
1226 
1227 	/* make sure we still have something left */
1228 	if ((mdc->un_orig_devstart >= size) ||
1229 	    ((size - mdc->un_orig_devstart) < column_size)) {
1230 		return (mdsyserror(ep, ENOSPC, colnp->cname));
1231 	}
1232 	size -= mdc->un_orig_devstart;
1233 	if (maxio < mr->un_maxio) {
1234 		return (mdcomperror(ep, MDE_MAXIO,
1235 		    meta_getminor(raidnp->dev), colnp->dev, colnp->cname));
1236 	}
1237 
1238 	if (options & MDCMD_DOIT) {
1239 		/* store name in namespace */
1240 		if (add_key_name(sp, colnp, keynlpp, ep) != 0)
1241 			return (-1);
1242 	}
1243 
1244 	/* setup column */
1245 	mdc->un_orig_dev = colnp->dev;
1246 	mdc->un_orig_key = colnp->key;
1247 	mdc->un_dev = colnp->dev;
1248 	mdc->un_pwstart = mdc->un_orig_pwstart;
1249 	mdc->un_devstart = mdc->un_orig_devstart;
1250 	mdc->un_alt_dev = NODEV64;
1251 	mdc->un_alt_pwstart = 0;
1252 	mdc->un_alt_devstart = 0;
1253 	mdc->un_hs_id = 0;
1254 
1255 	/* add the size (we use) of the device to the total */
1256 	mr->c.un_actual_tb += column_size;
1257 
1258 	/* adjust geometry */
1259 	if (adjust_geom(raidnp, colnp, mr, ep) != 0)
1260 		return (-1);
1261 
1262 	/* count column */
1263 	mr->un_totalcolumncnt++;
1264 
1265 	/* return success */
1266 	return (0);
1267 }
1268 
1269 /*
1270  * invalidate column names
1271  */
1272 static int
1273 invalidate_columns(
1274 	mdsetname_t	*sp,
1275 	mdname_t	*raidnp,
1276 	md_error_t	*ep
1277 )
1278 {
1279 	md_raid_t	*raidp;
1280 	uint_t		col;
1281 
1282 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
1283 		return (-1);
1284 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
1285 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
1286 		mdname_t	*colnp = cp->colnamep;
1287 
1288 		meta_invalidate_name(colnp);
1289 	}
1290 	return (0);
1291 }
1292 
1293 /*
1294  * attach columns to raid
1295  */
1296 int
1297 meta_raid_attach(
1298 	mdsetname_t		*sp,
1299 	mdname_t		*raidnp,
1300 	mdnamelist_t		*colnlp,
1301 	mdcmdopts_t		options,
1302 	md_error_t		*ep
1303 )
1304 {
1305 	uint_t			concat_cnt = 0;
1306 	mdnamelist_t		*p;
1307 	mr_unit_t		*old_mr;
1308 	mr_unit_t		*new_mr;
1309 	size_t			old_rusize;
1310 	size_t			new_rusize;
1311 	mdnamelist_t		*keynlp = NULL;
1312 	md_grow_params_t	mgp;
1313 	int			rval = -1;
1314 	int			create_flag = MD_CRO_32BIT;
1315 
1316 	/* should have a set */
1317 	assert(sp != NULL);
1318 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1319 
1320 	/* check type */
1321 	if (metachkmeta(raidnp, ep) != 0)
1322 		return (-1);
1323 
1324 	/* check and count new columns */
1325 	for (p = colnlp; (p != NULL); p = p->next) {
1326 		mdname_t	*np = p->namep;
1327 		mdnamelist_t	*p2;
1328 
1329 		/* check against existing devices */
1330 		if (meta_check_column(sp, np, ep) != 0)
1331 			return (-1);
1332 
1333 		/* check against ourselves */
1334 		for (p2 = p->next; (p2 != NULL); p2 = p2->next) {
1335 			if (meta_check_overlap(np->cname, np, 0, -1,
1336 			    p2->namep, 0, -1, ep) != 0) {
1337 				return (-1);
1338 			}
1339 		}
1340 
1341 		/* count */
1342 		++concat_cnt;
1343 	}
1344 
1345 	/* get old unit */
1346 	if ((old_mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
1347 		return (-1);
1348 
1349 	/*
1350 	 * calculate the size needed for the new raid unit and allocate
1351 	 * the appropriate structure. allocate new unit.
1352 	 */
1353 	old_rusize = sizeof (*old_mr) - sizeof (old_mr->un_column[0]);
1354 	old_rusize += old_mr->un_totalcolumncnt * sizeof (old_mr->un_column[0]);
1355 	new_rusize = sizeof (*new_mr) - sizeof (new_mr->un_column[0]);
1356 	new_rusize += (old_mr->un_totalcolumncnt + concat_cnt)
1357 	    * sizeof (new_mr->un_column[0]);
1358 	new_mr = Zalloc(new_rusize);
1359 	(void) memcpy(new_mr, old_mr, old_rusize);
1360 
1361 	/* We always want a do-it, this is for attach_raid_col below */
1362 	options |= MDCMD_DOIT;
1363 
1364 	/* build new unit structure */
1365 	for (p = colnlp; (p != NULL); p = p->next) {
1366 		mdname_t	*colnp = p->namep;
1367 		mr_column_t	*mdc;
1368 
1369 		/* attach column */
1370 		mdc = &new_mr->un_column[new_mr->un_totalcolumncnt];
1371 		if (attach_raid_col(sp, raidnp, new_mr, mdc, colnp,
1372 		    RCS_INIT, &keynlp, options, ep) != 0) {
1373 			goto out;
1374 		}
1375 	}
1376 	assert(new_mr->un_totalcolumncnt
1377 	    == (old_mr->un_totalcolumncnt + concat_cnt));
1378 
1379 
1380 	create_flag = meta_check_devicesize(new_mr->c.un_total_blocks);
1381 
1382 	/* grow raid */
1383 	(void) memset(&mgp, 0, sizeof (mgp));
1384 	mgp.mnum = MD_SID(new_mr);
1385 	MD_SETDRIVERNAME(&mgp, MD_RAID, sp->setno);
1386 	mgp.size = new_rusize;
1387 	mgp.mdp = (uintptr_t)new_mr;
1388 
1389 	if (create_flag == MD_CRO_32BIT) {
1390 		mgp.options = MD_CRO_32BIT;
1391 		new_mr->c.un_revision &= ~MD_64BIT_META_DEV;
1392 	} else {
1393 		mgp.options = MD_CRO_64BIT;
1394 		new_mr->c.un_revision |= MD_64BIT_META_DEV;
1395 	}
1396 	if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) {
1397 		(void) mdstealerror(ep, &mgp.mde);
1398 		goto out;
1399 	}
1400 
1401 	/* clear cache */
1402 	if (invalidate_columns(sp, raidnp, ep) != 0)
1403 		goto out;
1404 	meta_invalidate_name(raidnp);
1405 
1406 	/* let em know */
1407 	if (options & MDCMD_PRINT) {
1408 		if (concat_cnt == 1) {
1409 			(void) printf(dgettext(TEXT_DOMAIN,
1410 			    "%s: component is attached\n"),
1411 			    raidnp->cname);
1412 		} else {
1413 			(void) printf(dgettext(TEXT_DOMAIN,
1414 			    "%s: components are attached\n"),
1415 			    raidnp->cname);
1416 		}
1417 		(void) fflush(stdout);
1418 	}
1419 
1420 
1421 	/* grow any parents */
1422 	if (meta_concat_parent(sp, raidnp, ep) != 0)
1423 		goto out;
1424 	rval = 0;	/* success */
1425 
1426 	/* cleanup, return error */
1427 out:
1428 	Free(old_mr);
1429 	Free(new_mr);
1430 	if (rval != 0)
1431 		(void) del_key_names(sp, keynlp, NULL);
1432 	metafreenamelist(keynlp);
1433 	return (rval);
1434 }
1435 
1436 /*
1437  * get raid parameters
1438  */
1439 int
1440 meta_raid_get_params(
1441 	mdsetname_t	*sp,
1442 	mdname_t	*raidnp,
1443 	mr_params_t	*paramsp,
1444 	md_error_t	*ep
1445 )
1446 {
1447 	md_raid_t	*raidp;
1448 
1449 	/* should have a set */
1450 	assert(sp != NULL);
1451 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1452 
1453 	/* check name */
1454 	if (metachkmeta(raidnp, ep) != 0)
1455 		return (-1);
1456 
1457 	/* get unit */
1458 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
1459 		return (-1);
1460 
1461 	/* return parameters */
1462 	(void) memset(paramsp, 0, sizeof (*paramsp));
1463 	if (raidp->hspnamep == NULL)
1464 		paramsp->hsp_id = MD_HSP_NONE;
1465 	else
1466 		paramsp->hsp_id = raidp->hspnamep->hsp;
1467 	return (0);
1468 }
1469 
1470 /*
1471  * set raid parameters
1472  */
1473 int
1474 meta_raid_set_params(
1475 	mdsetname_t		*sp,
1476 	mdname_t		*raidnp,
1477 	mr_params_t		*paramsp,
1478 	md_error_t		*ep
1479 )
1480 {
1481 	md_raid_params_t	msp;
1482 
1483 	/* should have a set */
1484 	assert(sp != NULL);
1485 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1486 
1487 	/* check name */
1488 	if (metachkmeta(raidnp, ep) != 0)
1489 		return (-1);
1490 
1491 	/* set parameters */
1492 	(void) memset(&msp, 0, sizeof (msp));
1493 	MD_SETDRIVERNAME(&msp, MD_RAID, sp->setno);
1494 	msp.mnum = meta_getminor(raidnp->dev);
1495 	msp.params = *paramsp;
1496 	if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, raidnp->cname) != 0)
1497 		return (mdstealerror(ep, &msp.mde));
1498 
1499 	/* clear cache */
1500 	meta_invalidate_name(raidnp);
1501 
1502 	/* return success */
1503 	return (0);
1504 }
1505 
1506 /*
1507  * validate raid replace column
1508  */
1509 static int
1510 validate_new_raid(
1511 	mdsetname_t	*sp,
1512 	mdname_t	*raidnp,
1513 	mdname_t	*colnp,
1514 	replace_params_t *paramsp,
1515 	int		dup_ok,
1516 	md_error_t	*ep
1517 )
1518 {
1519 	mr_unit_t	*mr;
1520 	diskaddr_t	column_size;
1521 	diskaddr_t	label;
1522 	mdcinfo_t	*cinfop;
1523 	int		rval = -1;
1524 
1525 	/* get raid unit */
1526 	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
1527 		return (-1);
1528 	column_size = mr->un_segsize * mr->un_segsincolumn;
1529 
1530 	/* check it out */
1531 	if (meta_check_column(sp, colnp, ep) != 0) {
1532 		if ((! dup_ok) || (! mdisuseerror(ep, MDE_ALREADY)))
1533 			goto out;
1534 		mdclrerror(ep);
1535 	}
1536 	if ((paramsp->number_blks = metagetsize(colnp, ep)) ==
1537 	    MD_DISKADDR_ERROR)
1538 		goto out;
1539 	if ((label = metagetlabel(colnp, ep)) == MD_DISKADDR_ERROR)
1540 		goto out;
1541 	paramsp->has_label = ((label > 0) ? 1 : 0);
1542 	if ((paramsp->start_blk = metagetstart(sp, colnp, ep)) ==
1543 	    MD_DISKADDR_ERROR)
1544 		goto out;
1545 	if ((paramsp->number_blks - paramsp->start_blk) < column_size) {
1546 		(void) mdsyserror(ep, ENOSPC, colnp->cname);
1547 		goto out;
1548 	}
1549 	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
1550 		goto out;
1551 	if (cinfop->maxtransfer < mr->un_maxio) {
1552 		(void) mdcomperror(ep, MDE_MAXIO, meta_getminor(raidnp->dev),
1553 		    colnp->dev, colnp->cname);
1554 		goto out;
1555 	}
1556 
1557 	/* success */
1558 	rval = 0;
1559 
1560 	/* cleanup, return error */
1561 out:
1562 	Free(mr);
1563 	return (rval);
1564 }
1565 
1566 /*
1567  * replace raid column
1568  */
1569 int
1570 meta_raid_replace(
1571 	mdsetname_t		*sp,
1572 	mdname_t		*raidnp,
1573 	mdname_t		*oldnp,
1574 	mdname_t		*newnp,
1575 	mdcmdopts_t		options,
1576 	md_error_t		*ep
1577 )
1578 {
1579 	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
1580 	replace_params_t	params;
1581 	md_dev64_t		old_dev, new_dev;
1582 	diskaddr_t		new_start_blk, new_end_blk;
1583 	int			rebind;
1584 	char			*new_devidp = NULL;
1585 	md_error_t		xep = mdnullerror;
1586 	int			ret;
1587 	md_set_desc		*sd;
1588 	uint_t			tstate;
1589 
1590 	/* should have same set */
1591 	assert(sp != NULL);
1592 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1593 
1594 	/* check name */
1595 	if (metachkmeta(raidnp, ep) != 0)
1596 		return (-1);
1597 
1598 	/* save new binding incase this is a rebind where oldnp==newnp */
1599 	new_dev = newnp->dev;
1600 	new_start_blk = newnp->start_blk;
1601 	new_end_blk = newnp->end_blk;
1602 
1603 	/* invalidate, then get the raid (fill in oldnp from metadb) */
1604 	meta_invalidate_name(raidnp);
1605 	if (meta_get_raid(sp, raidnp, ep) == NULL)
1606 		return (-1);
1607 
1608 	/* can't replace a component if the raid inaccessible */
1609 	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
1610 		return (-1);
1611 	}
1612 	if (tstate & MD_INACCESSIBLE) {
1613 		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
1614 		    meta_getminor(raidnp->dev), raidnp->cname));
1615 	}
1616 
1617 	/* the old device binding is now established */
1618 	if ((old_dev = oldnp->dev) == NODEV64)
1619 		return (mdsyserror(ep, ENODEV, oldnp->cname));
1620 
1621 
1622 	/* setup raid info */
1623 	(void) memset(&params, 0, sizeof (params));
1624 	params.mnum = meta_getminor(raidnp->dev);
1625 	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
1626 	params.old_dev = old_dev;
1627 	params.cmd = force ? FORCE_REPLACE_COMP : REPLACE_COMP;
1628 
1629 	if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
1630 	    (old_dev != new_dev)) {
1631 		rebind = 1;
1632 	} else {
1633 		rebind = 0;
1634 	}
1635 	if (rebind) {
1636 		newnp->dev = new_dev;
1637 		newnp->start_blk = new_start_blk;
1638 		newnp->end_blk = new_end_blk;
1639 	}
1640 
1641 	/*
1642 	 * Save a copy of the devid associated with the new disk, the
1643 	 * reason is that the checks for the column (meta_check_column)
1644 	 * via validate_new_raid(), could cause the disk's devid to be
1645 	 * changed to that of the devid that is currently stored in the
1646 	 * replica namespace for the disk in question. This devid could
1647 	 * be stale if we are replacing the disk. The actual function
1648 	 * that overwrites the devid is dr2drivedesc().
1649 	 */
1650 
1651 	/* don't setup new_devid if no devid's or MN diskset */
1652 	if (newnp->drivenamep->devid != NULL)
1653 		new_devidp = Strdup(newnp->drivenamep->devid);
1654 
1655 	if (!metaislocalset(sp)) {
1656 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1657 			return (-1);
1658 		if (MD_MNSET_DESC(sd))
1659 			new_devidp = NULL;
1660 	}
1661 
1662 	/* check out new (sets up start_blk, has_label, number_blks) */
1663 	if (validate_new_raid(sp, raidnp, newnp, &params, rebind,
1664 	    ep) != 0) {
1665 		Free(new_devidp);
1666 		return (-1);
1667 	}
1668 
1669 	/*
1670 	 * Copy back the saved devid.
1671 	 */
1672 	Free(newnp->drivenamep->devid);
1673 	if (new_devidp) {
1674 		newnp->drivenamep->devid = Strdup(new_devidp);
1675 		Free(new_devidp);
1676 	}
1677 
1678 	/* store name in namespace, allocate new key */
1679 	if (add_key_name(sp, newnp, NULL, ep) != 0)
1680 		return (-1);
1681 
1682 	if (rebind && !metaislocalset(sp)) {
1683 		/*
1684 		 * We are 'rebind'ing a disk that is in a diskset so as well
1685 		 * as updating the diskset's namespace the local set needs
1686 		 * to be updated because it also contains a reference to the
1687 		 * disk in question.
1688 		 */
1689 		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
1690 		    newnp->cname, ep);
1691 
1692 		if (ret != METADEVADM_SUCCESS) {
1693 			(void) del_key_name(sp, newnp, &xep);
1694 			return (-1);
1695 		}
1696 	}
1697 
1698 	/* replace column */
1699 	params.new_dev = new_dev;
1700 	params.new_key = newnp->key;
1701 	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
1702 		(void) del_key_name(sp, newnp, ep);
1703 		return (mdstealerror(ep, &params.mde));
1704 	}
1705 
1706 	/* clear cache */
1707 	meta_invalidate_name(oldnp);
1708 	meta_invalidate_name(newnp);
1709 	meta_invalidate_name(raidnp);
1710 
1711 	/* let em know */
1712 	if (options & MDCMD_PRINT) {
1713 		(void) printf(dgettext(TEXT_DOMAIN,
1714 		    "%s: device %s is replaced with %s\n"),
1715 		    raidnp->cname, oldnp->cname, newnp->cname);
1716 		(void) fflush(stdout);
1717 	}
1718 
1719 	/* return success */
1720 	return (0);
1721 }
1722 
1723 /*
1724  * enable raid column
1725  */
1726 int
1727 meta_raid_enable(
1728 	mdsetname_t		*sp,
1729 	mdname_t		*raidnp,
1730 	mdname_t		*colnp,
1731 	mdcmdopts_t		options,
1732 	md_error_t		*ep
1733 )
1734 {
1735 	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
1736 	replace_params_t	params;
1737 	md_dev64_t		fs_dev, del_dev;
1738 	int			err = 0;
1739 	char			*devnm;
1740 	int			ret;
1741 	uint_t			tstate;
1742 
1743 	/* should have same set */
1744 	assert(sp != NULL);
1745 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1746 
1747 	/* check name */
1748 	if (metachkmeta(raidnp, ep) != 0)
1749 		return (-1);
1750 
1751 	/* get the file_system dev binding */
1752 	if (meta_getdev(sp, colnp, ep) != 0)
1753 		return (-1);
1754 	fs_dev = colnp->dev;
1755 
1756 	/* get the raid unit (fill in colnp->dev with metadb version) */
1757 	meta_invalidate_name(raidnp);
1758 	if (meta_get_raid(sp, raidnp, ep) == NULL)
1759 		return (-1);
1760 
1761 	/* enabling a component can't work if the raid inaccessible */
1762 	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
1763 		return (-1);
1764 	}
1765 	if (tstate & MD_INACCESSIBLE) {
1766 		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
1767 		    meta_getminor(raidnp->dev), raidnp->cname));
1768 	}
1769 
1770 	/* the metadb device binding is now established */
1771 	if (colnp->dev == NODEV64)
1772 		return (mdsyserror(ep, ENODEV, colnp->cname));
1773 
1774 	/*
1775 	 * check for the case where the dev_t has changed between the
1776 	 * filesystem and the metadb.  This is called a rebind, and
1777 	 * is handled by meta_raid_replace.
1778 	 */
1779 	if (fs_dev != colnp->dev) {
1780 		/*
1781 		 * Save the devt of mddb version
1782 		 */
1783 		del_dev = colnp->dev;
1784 
1785 		/* establish file system binding with invalid start/end */
1786 		colnp->dev = fs_dev;
1787 		colnp->start_blk = -1;
1788 		colnp->end_blk = -1;
1789 		err = meta_raid_replace(sp, raidnp, colnp, colnp, options, ep);
1790 
1791 		/*
1792 		 * Don't do it if meta_raid_replace returns an error
1793 		 */
1794 		if (!err && (devnm = meta_getnmentbydev(sp->setno, MD_SIDEWILD,
1795 		    del_dev, NULL, NULL, &colnp->key, ep)) != NULL) {
1796 			(void) del_key_name(sp, colnp, ep);
1797 			Free(devnm);
1798 		}
1799 		return (err);
1800 	}
1801 
1802 	/* setup raid info */
1803 	(void) memset(&params, 0, sizeof (params));
1804 	params.mnum = meta_getminor(raidnp->dev);
1805 	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
1806 	params.old_dev = params.new_dev = colnp->dev;
1807 	if (force)
1808 		params.cmd = FORCE_ENABLE_COMP;
1809 	else
1810 		params.cmd = ENABLE_COMP;
1811 
1812 	/* check it out */
1813 	if (validate_new_raid(sp, raidnp, colnp, &params, 1, ep) != 0)
1814 		return (-1);
1815 
1816 	/* enable column */
1817 	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0)
1818 		return (mdstealerror(ep, &params.mde));
1819 
1820 	/*
1821 	 * are we dealing with a non-local set? If so need to update the
1822 	 * local namespace so that the disk record has the correct devid.
1823 	 */
1824 	if (!metaislocalset(sp)) {
1825 		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, colnp->cname,
1826 		    ep);
1827 
1828 		if (ret != METADEVADM_SUCCESS) {
1829 			/*
1830 			 * Failed to update the local set. Nothing to do here
1831 			 * apart from report the error. The namespace is
1832 			 * most likely broken and some form of remedial
1833 			 * recovery is going to be required.
1834 			 */
1835 			mde_perror(ep, "");
1836 			mdclrerror(ep);
1837 		}
1838 	}
1839 
1840 	/* clear cache */
1841 	meta_invalidate_name(colnp);
1842 	meta_invalidate_name(raidnp);
1843 
1844 	/* let em know */
1845 	if (options & MDCMD_PRINT) {
1846 		(void) printf(dgettext(TEXT_DOMAIN,
1847 		    "%s: device %s is enabled\n"),
1848 		    raidnp->cname, colnp->cname);
1849 		(void) fflush(stdout);
1850 	}
1851 
1852 	/* return success */
1853 	return (0);
1854 }
1855 
1856 /*
1857  * check for dups in the raid itself
1858  */
1859 static int
1860 check_twice(
1861 	md_raid_t	*raidp,
1862 	uint_t		col,
1863 	md_error_t	*ep
1864 )
1865 {
1866 	mdname_t	*raidnp = raidp->common.namep;
1867 	mdname_t	*thisnp;
1868 	uint_t		c;
1869 
1870 	thisnp = raidp->cols.cols_val[col].colnamep;
1871 	for (c = 0; (c < col); ++c) {
1872 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[c];
1873 		mdname_t	*colnp = mdcp->colnamep;
1874 
1875 		if (meta_check_overlap(raidnp->cname, thisnp, 0, -1,
1876 		    colnp, 0, -1, ep) != 0) {
1877 			return (-1);
1878 		}
1879 	}
1880 	return (0);
1881 }
1882 
1883 /*
1884  * default raid interlace
1885  */
1886 diskaddr_t
1887 meta_default_raid_interlace(void)
1888 {
1889 	diskaddr_t	interlace;
1890 
1891 	/* default to 512k, round up if necessary */
1892 	interlace = btodb(512 * 1024);
1893 	if (interlace < lbtodb(MININTERLACE))
1894 		interlace = roundup(MININTERLACE, interlace);
1895 	return (interlace);
1896 }
1897 
1898 /*
1899  * convert interlaces
1900  */
1901 int
1902 meta_raid_check_interlace(
1903 	diskaddr_t	interlace,
1904 	char		*uname,
1905 	md_error_t	*ep
1906 )
1907 {
1908 	if ((interlace < btodb(RAID_MIN_INTERLACE)) ||
1909 	    (interlace > btodb(MAXINTERLACE))) {
1910 		return (mderror(ep, MDE_BAD_INTERLACE, uname));
1911 	}
1912 	return (0);
1913 }
1914 
1915 /*
1916  * check raid
1917  */
1918 int
1919 meta_check_raid(
1920 	mdsetname_t	*sp,
1921 	md_raid_t	*raidp,
1922 	mdcmdopts_t	options,
1923 	md_error_t	*ep
1924 )
1925 {
1926 	mdname_t	*raidnp = raidp->common.namep;
1927 	int		doit = ((options & MDCMD_DOIT) ? 1 : 0);
1928 	int		updateit = ((options & MDCMD_UPDATE) ? 1 : 0);
1929 	uint_t		ncol;
1930 	uint_t		col;
1931 	minor_t		mnum = meta_getminor(raidnp->dev);
1932 
1933 	/* check number */
1934 	if (((ncol = raidp->cols.cols_len) < MD_RAID_MIN) ||
1935 	    (raidp->orig_ncol > ncol)) {
1936 		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
1937 	}
1938 
1939 	/* compute default interlace */
1940 	if (raidp->interlace == 0) {
1941 		raidp->interlace = meta_default_raid_interlace();
1942 	}
1943 
1944 	/* check state */
1945 	switch (raidp->state) {
1946 	case RUS_INIT:
1947 	case RUS_OKAY:
1948 		break;
1949 
1950 	default:
1951 		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
1952 	}
1953 
1954 	/* check interlace */
1955 	if (meta_raid_check_interlace(raidp->interlace, raidnp->cname, ep) != 0)
1956 		return (-1);
1957 
1958 	/* check hotspare pool name */
1959 	if (doit) {
1960 		if ((raidp->hspnamep != NULL) &&
1961 		    (metachkhsp(sp, raidp->hspnamep, ep) != 0)) {
1962 			return (-1);
1963 		}
1964 	}
1965 
1966 	/* check columns */
1967 	for (col = 0; (col < ncol); ++col) {
1968 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
1969 		mdname_t	*colnp = mdcp->colnamep;
1970 		diskaddr_t	start_blk, size;
1971 
1972 		/* setup column */
1973 		if (raidp->state == RUS_INIT)
1974 			mdcp->state = RCS_INIT;
1975 		else
1976 			mdcp->state = RCS_OKAY;
1977 
1978 		/* check column */
1979 		if (!updateit) {
1980 			if (meta_check_column(sp, colnp, ep) != 0)
1981 				return (-1);
1982 			if (((start_blk = metagetstart(sp, colnp, ep)) ==
1983 			    MD_DISKADDR_ERROR) || ((size = metagetsize(colnp,
1984 			    ep)) == MD_DISKADDR_ERROR)) {
1985 				return (-1);
1986 			}
1987 			if (start_blk >= size)
1988 				return (mdsyserror(ep, ENOSPC, colnp->cname));
1989 			size -= start_blk;
1990 			size = rounddown(size, raidp->interlace);
1991 			if (size == 0)
1992 				return (mdsyserror(ep, ENOSPC, colnp->cname));
1993 		}
1994 
1995 		/* check this raid too */
1996 		if (check_twice(raidp, col, ep) != 0)
1997 			return (-1);
1998 	}
1999 
2000 	/* return success */
2001 	return (0);
2002 }
2003 
2004 /*
2005  * setup raid geometry
2006  */
2007 static int
2008 raid_geom(
2009 	md_raid_t	*raidp,
2010 	mr_unit_t	*mr,
2011 	md_error_t	*ep
2012 )
2013 {
2014 	uint_t		write_reinstruct = 0;
2015 	uint_t		read_reinstruct = 0;
2016 	uint_t		round_cyl = 1;
2017 	uint_t		col;
2018 	mdgeom_t	*geomp;
2019 
2020 	/* get worst reinstructs */
2021 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
2022 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
2023 		mdname_t	*colnp = mdcp->colnamep;
2024 
2025 		if ((geomp = metagetgeom(colnp, ep)) == NULL)
2026 			return (-1);
2027 		if (geomp->write_reinstruct > write_reinstruct)
2028 			write_reinstruct = geomp->write_reinstruct;
2029 		if (geomp->read_reinstruct > read_reinstruct)
2030 			read_reinstruct = geomp->read_reinstruct;
2031 	}
2032 
2033 	/* setup geometry from first column */
2034 	assert(raidp->cols.cols_len > 0);
2035 	if ((geomp = metagetgeom(raidp->cols.cols_val[0].colnamep,
2036 	    ep)) == NULL) {
2037 		return (-1);
2038 	}
2039 	if (meta_setup_geom((md_unit_t *)mr, raidp->common.namep, geomp,
2040 	    write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
2041 		return (-1);
2042 
2043 	/* return success */
2044 	return (0);
2045 }
2046 
2047 int
2048 meta_raid_state_cnt(mr_unit_t *mr, rcs_state_t state)
2049 {
2050 	int 	statecnt = 0;
2051 	int	col;
2052 
2053 	for (col = 0; col < mr->un_totalcolumncnt; col++)
2054 		if (mr->un_column[col].un_devstate & state)
2055 			statecnt++;
2056 	return (statecnt);
2057 }
2058 /*
2059  * validate that a raid device being created with the -k flag is a real
2060  * raid device
2061  */
2062 int
2063 meta_raid_valid(md_raid_t *raidp, mr_unit_t *mr)
2064 {
2065 	long long	buf[DEV_BSIZE / sizeof (long long)];
2066 	raid_pwhdr_t	pwhdr;
2067 	raid_pwhdr_t	*rpw = &pwhdr;
2068 	minor_t		mnum;
2069 	int		col;
2070 	int		fd;
2071 
2072 	for (col = 0; col < mr->un_totalcolumncnt; col++) {
2073 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2074 		mdname_t	*colnp = cp->colnamep;
2075 
2076 		if ((fd = open(colnp->rname, O_RDONLY)) < 0)
2077 			goto error_exit;
2078 
2079 		if (lseek64(fd,
2080 		    (mr->un_column[col].un_pwstart * DEV_BSIZE), SEEK_SET) < 0)
2081 			goto error_exit;
2082 
2083 		if (read(fd, buf, DEV_BSIZE) < 0)
2084 			goto error_exit;
2085 
2086 		/*
2087 		 * If our raid device is a 64 bit device, we can accept the
2088 		 * pw header we just read in.
2089 		 * Otherwise it's of type raid_pwhdr32_od_t and has to
2090 		 * be converted.
2091 		 */
2092 		if (mr->c.un_revision & MD_64BIT_META_DEV) {
2093 			rpw = (raid_pwhdr_t *)buf;
2094 		} else {
2095 			RAID_CONVERT_RPW((raid_pwhdr32_od_t *)buf, rpw);
2096 		}
2097 
2098 		if (rpw->rpw_column != col)
2099 			goto error_exit;
2100 
2101 		if (col == 0)
2102 			mnum = rpw->rpw_unit;
2103 
2104 		if (rpw->rpw_unit != mnum)
2105 			goto error_exit;
2106 
2107 		if (rpw->rpw_magic_ext == RAID_PWMAGIC) {
2108 			/* 4.1 prewrite header */
2109 			if ((rpw->rpw_origcolumncnt != mr->un_origcolumncnt) ||
2110 			    (rpw->rpw_totalcolumncnt !=
2111 			    mr->un_totalcolumncnt) ||
2112 			    (rpw->rpw_segsize != mr->un_segsize) ||
2113 			    (rpw->rpw_segsincolumn != mr->un_segsincolumn) ||
2114 			    (rpw->rpw_pwcnt != mr->un_pwcnt) ||
2115 			    (rpw->rpw_pwstart !=
2116 			    mr->un_column[col].un_pwstart) ||
2117 			    (rpw->rpw_devstart !=
2118 			    mr->un_column[col].un_devstart) ||
2119 			    (rpw->rpw_pwsize != mr->un_pwsize))
2120 				goto error_exit;
2121 		}
2122 		/*
2123 		 * this is an old prewrite header (4.0) the unit structure
2124 		 * will have to be trusted.
2125 		 */
2126 		(void) close(fd);
2127 	}
2128 
2129 	return (0);
2130 
2131 error_exit:
2132 	(void) close(fd);
2133 	return (-1);
2134 }
2135 
2136 /*
2137  * create raid
2138  */
2139 int
2140 meta_create_raid(
2141 	mdsetname_t	*sp,
2142 	md_raid_t	*raidp,
2143 	mdcmdopts_t	options,
2144 	md_error_t	*ep
2145 )
2146 {
2147 	mdname_t	*raidnp = raidp->common.namep;
2148 	uint_t		ncol = raidp->cols.cols_len;
2149 	uint_t		orig_ncol = raidp->orig_ncol;
2150 	size_t		rdsize;
2151 	mr_unit_t	*mr;
2152 	uint_t		col;
2153 	diskaddr_t	disk_size = 0;
2154 	uint_t		disk_maxio = 0;
2155 	uint_t		pwes;
2156 	diskaddr_t	non_pw_blks, column_size;
2157 	mdnamelist_t	*keynlp = NULL;
2158 	md_set_params_t	set_params;
2159 	int		rval = -1;
2160 	md_timeval32_t	creation_time;
2161 	int		create_flag = MD_CRO_32BIT;
2162 
2163 	/* validate raid */
2164 	if (meta_check_raid(sp, raidp, options, ep) != 0)
2165 		return (-1);
2166 
2167 	/* allocate raid unit */
2168 	rdsize = sizeof (*mr) - sizeof (mr->un_column[0]);
2169 	rdsize += ncol * sizeof (mr->un_column[0]);
2170 	mr = Zalloc(rdsize);
2171 
2172 	if (meta_gettimeofday(&creation_time) == -1)
2173 		return (mdsyserror(ep, errno, NULL));
2174 	/*
2175 	 * initialize the top level mr_unit_t structure
2176 	 * setup the unit state to indicate whether to retain
2177 	 * any data currently on the metadevice or to clear it
2178 	 */
2179 	mr->c.un_type = MD_METARAID;
2180 	MD_SID(mr) = meta_getminor(raidnp->dev);
2181 	mr->c.un_size = rdsize;
2182 	mr->un_magic = RAID_UNMAGIC;
2183 	mr->un_state = raidp->state;
2184 	mr->un_timestamp = creation_time;
2185 	mr->un_origcolumncnt = orig_ncol;
2186 	mr->un_segsize = (uint_t)raidp->interlace;
2187 	if (raidp->hspnamep != NULL) {
2188 		mr->un_hsp_id = raidp->hspnamep->hsp;
2189 	} else {
2190 		mr->un_hsp_id = MD_HSP_NONE;
2191 	}
2192 	/*
2193 	 * setup original columns, saving start_block and
2194 	 * finding smallest size and maxio
2195 	 */
2196 	for (col = 0; (col < orig_ncol); ++col) {
2197 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2198 		mdname_t	*colnp = cp->colnamep;
2199 		mr_column_t	*mdc = &mr->un_column[col];
2200 		diskaddr_t	size;
2201 		uint_t		maxio;
2202 		mdcinfo_t	*cinfop;
2203 
2204 		/* setup state */
2205 		mdc->un_devstate = cp->state;
2206 
2207 		/* setup creation time */
2208 		mdc->un_devtimestamp = creation_time;
2209 
2210 		/* get start, size, and maxio */
2211 		if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
2212 		    MD_DISKADDR_ERROR)
2213 			goto out;
2214 		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
2215 			goto out;
2216 		size -= mdc->un_orig_devstart;
2217 		if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
2218 			goto out;
2219 		maxio = cinfop->maxtransfer;
2220 
2221 		if (options & MDCMD_DOIT) {
2222 			/* store name in namespace */
2223 			if (add_key_name(sp, colnp, &keynlp, ep) != 0)
2224 				goto out;
2225 		}
2226 
2227 		/* setup column */
2228 		mdc->un_orig_key = colnp->key;
2229 		mdc->un_orig_dev = colnp->dev;
2230 		mdc->un_dev = mdc->un_orig_dev;
2231 		mdc->un_pwstart = mdc->un_orig_pwstart;
2232 		mdc->un_devstart = mdc->un_orig_devstart;
2233 		mdc->un_alt_dev = NODEV64;
2234 		mdc->un_alt_pwstart = 0;
2235 		mdc->un_alt_devstart = 0;
2236 		mdc->un_hs_id = 0;
2237 		if (mr->un_state == RUS_INIT)
2238 			mdc->un_devstate = RCS_INIT;
2239 		else
2240 			mdc->un_devstate = RCS_OKAY;
2241 
2242 		/* adjust for smallest disk */
2243 		if (disk_size == 0) {
2244 			disk_size = size;
2245 		} else if (size < disk_size) {
2246 			disk_size = size;
2247 		}
2248 		if (disk_maxio == 0) {
2249 			disk_maxio = maxio;
2250 		} else if (maxio < disk_maxio) {
2251 			disk_maxio = maxio;
2252 		}
2253 	}
2254 	assert(col == mr->un_origcolumncnt);
2255 
2256 	/*
2257 	 * before processing any of the attached column(s)
2258 	 * set up the composition of the metadevice for column
2259 	 * sizes and pre-write information
2260 	 */
2261 	mr->un_maxio = disk_maxio;	/* smallest maxio */
2262 	mr->un_iosize = min(mr->un_maxio, (mr->un_segsize + 1));
2263 	pwes = mr->un_iosize;
2264 	if (raidp->pw_count)
2265 		mr->un_pwcnt = raidp->pw_count;
2266 	else
2267 		mr->un_pwcnt = PWCNT_MIN;
2268 	if ((mr->un_pwcnt < PWCNT_MIN) || (mr->un_pwcnt > PWCNT_MAX)) {
2269 		(void) mderror(ep, MDE_RAID_BAD_PW_CNT, raidnp->cname);
2270 		goto out;
2271 	}
2272 	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
2273 
2274 	/* now calculate the number of segments per column */
2275 	non_pw_blks = disk_size - mr->un_pwsize;	/* smallest disk */
2276 	if ((mr->un_pwsize > disk_size) ||
2277 	    (non_pw_blks < (diskaddr_t)mr->un_segsize)) {
2278 		(void) mdsyserror(ep, ENOSPC, raidnp->cname);
2279 		goto out;
2280 	}
2281 	mr->un_segsincolumn = non_pw_blks / mr->un_segsize;
2282 	column_size = mr->un_segsize * mr->un_segsincolumn;
2283 
2284 	/*
2285 	 * adjust the pw_cnt, pw_size, to fit into any fragmentation
2286 	 * left over after column_size has been computed
2287 	 */
2288 	mr->un_pwsize = rounddown(((uint_t)(disk_size - column_size)), 2);
2289 	mr->un_pwcnt = mr->un_pwsize / pwes;
2290 	assert(mr->un_pwcnt >= PWCNT_MIN);
2291 	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
2292 	assert((mr->un_pwsize + column_size) <= disk_size);
2293 
2294 	/*
2295 	 * calculate the actual block count available based on the
2296 	 * segment size and the number of segments per column ...
2297 	 * ... and adjust for the number of parity segments
2298 	 */
2299 	mr->c.un_actual_tb = column_size * (mr->un_origcolumncnt - 1);
2300 
2301 	if (raid_geom(raidp, mr, ep) != 0)
2302 		goto out;
2303 
2304 	create_flag = meta_check_devicesize(mr->c.un_total_blocks);
2305 
2306 	/*
2307 	 * now calculate the pre-write offset and update the column
2308 	 * structures to include the address of the individual pre-write
2309 	 * areas
2310 	 */
2311 	for (col = 0; (col < orig_ncol); ++col) {
2312 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2313 		mdname_t	*colnp = cp->colnamep;
2314 		mr_column_t	*mdc = &mr->un_column[col];
2315 		diskaddr_t	size;
2316 
2317 		/* get size */
2318 		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
2319 			goto out;
2320 
2321 		/* adjust start and size by prewrite */
2322 		mdc->un_orig_pwstart = mdc->un_orig_devstart;
2323 		mdc->un_orig_devstart += mr->un_pwsize;
2324 		mdc->un_pwstart = mdc->un_orig_pwstart;
2325 		mdc->un_devstart = mdc->un_orig_devstart;
2326 
2327 		assert(size >= mdc->un_orig_devstart);
2328 		size -= mdc->un_orig_devstart;
2329 
2330 		/* make sure we still have something left */
2331 		assert(size >= column_size);
2332 	}
2333 
2334 	/* do concat cols */
2335 	mr->un_totalcolumncnt = mr->un_origcolumncnt;
2336 	assert(col == mr->un_origcolumncnt);
2337 	for (col = orig_ncol; (col < ncol); ++col) {
2338 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2339 		mdname_t	*colnp = cp->colnamep;
2340 		mr_column_t	*mdc = &mr->un_column[col];
2341 
2342 		/* attach column */
2343 		if (attach_raid_col(sp, raidnp, mr, mdc, colnp,
2344 		    cp->state, &keynlp, options, ep) != 0) {
2345 			goto out;
2346 		}
2347 	}
2348 	assert(mr->un_totalcolumncnt == ncol);
2349 
2350 	/* fill in the size of the raid */
2351 	if (options & MDCMD_UPDATE) {
2352 		raidp->common.size = mr->c.un_total_blocks;
2353 		raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
2354 	}
2355 
2356 	/* if we're not doing anything, return success */
2357 	if (! (options & MDCMD_DOIT)) {
2358 		rval = 0;	/* success */
2359 		goto out;
2360 	}
2361 
2362 	if ((mr->un_state & RUS_OKAY) &&
2363 	    (meta_raid_valid(raidp, mr) != 0)) {
2364 		(void) mderror(ep, MDE_RAID_INVALID, raidnp->cname);
2365 		goto out;
2366 	}
2367 
2368 	/* create raid */
2369 	(void) memset(&set_params, 0, sizeof (set_params));
2370 	/* did the user tell us to generate a large device? */
2371 	if (create_flag == MD_CRO_64BIT) {
2372 		mr->c.un_revision |= MD_64BIT_META_DEV;
2373 		set_params.options = MD_CRO_64BIT;
2374 	} else {
2375 		mr->c.un_revision &= ~MD_64BIT_META_DEV;
2376 		set_params.options = MD_CRO_32BIT;
2377 	}
2378 	set_params.mnum = MD_SID(mr);
2379 	set_params.size = mr->c.un_size;
2380 	set_params.mdp = (uintptr_t)mr;
2381 	MD_SETDRIVERNAME(&set_params, MD_RAID, MD_MIN2SET(set_params.mnum));
2382 	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
2383 	    raidnp->cname) != 0) {
2384 		(void) mdstealerror(ep, &set_params.mde);
2385 		goto out;
2386 	}
2387 	rval = 0;	/* success */
2388 
2389 	/* cleanup, return success */
2390 out:
2391 	Free(mr);
2392 	if (rval != 0) {
2393 		(void) del_key_names(sp, keynlp, NULL);
2394 	}
2395 	metafreenamelist(keynlp);
2396 	if ((rval == 0) && (options & MDCMD_DOIT)) {
2397 		if (invalidate_columns(sp, raidnp, ep) != 0)
2398 			rval = -1;
2399 		meta_invalidate_name(raidnp);
2400 	}
2401 	return (rval);
2402 }
2403 
2404 /*
2405  * initialize raid
2406  * NOTE: this functions is metainit(1m)'s command line parser!
2407  */
2408 int
2409 meta_init_raid(
2410 	mdsetname_t	**spp,
2411 	int		argc,
2412 	char		*argv[],
2413 	mdcmdopts_t	options,
2414 	md_error_t	*ep
2415 )
2416 {
2417 	char		*uname = argv[0];
2418 	mdname_t	*raidnp = NULL;
2419 	int		old_optind;
2420 	int		c;
2421 	md_raid_t	*raidp = NULL;
2422 	uint_t		ncol, col;
2423 	int		rval = -1;
2424 	md_set_desc	*sd;
2425 
2426 	/* get raid name */
2427 	assert(argc > 0);
2428 	if (argc < 1)
2429 		goto syntax;
2430 	if ((raidnp = metaname(spp, uname, META_DEVICE, ep)) == NULL)
2431 		goto out;
2432 	assert(*spp != NULL);
2433 
2434 	/*
2435 	 * Raid metadevice not allowed on multi-node diskset.
2436 	 */
2437 	if (! metaislocalset(*spp)) {
2438 		if ((sd = metaget_setdesc(*spp, ep)) == NULL)
2439 			goto out;
2440 		if (MD_MNSET_DESC(sd)) {
2441 			rval = meta_cook_syntax(ep, MDE_MNSET_NORAID, uname,
2442 			    argc, argv);
2443 			goto out;
2444 		}
2445 	}
2446 
2447 	uname = raidnp->cname;
2448 	if (metachkmeta(raidnp, ep) != 0)
2449 		goto out;
2450 
2451 	if (!(options & MDCMD_NOLOCK)) {
2452 		/* grab set lock */
2453 		if (meta_lock(*spp, TRUE, ep) != 0)
2454 			goto out;
2455 
2456 		if (meta_check_ownership(*spp, ep) != 0)
2457 			goto out;
2458 	}
2459 
2460 	/* see if it exists already */
2461 	if (metagetmiscname(raidnp, ep) != NULL) {
2462 		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
2463 		    meta_getminor(raidnp->dev), uname);
2464 		goto out;
2465 	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
2466 		goto out;
2467 	} else {
2468 		mdclrerror(ep);
2469 	}
2470 	--argc, ++argv;
2471 
2472 	/* grab -r */
2473 	if ((argc < 1) || (strcmp(argv[0], "-r") != 0))
2474 		goto syntax;
2475 	--argc, ++argv;
2476 
2477 	/* parse general options */
2478 	optind = 0;
2479 	opterr = 0;
2480 	if (getopt(argc, argv, "") != -1)
2481 		goto options;
2482 
2483 	/* allocate raid */
2484 	raidp = Zalloc(sizeof (*raidp));
2485 
2486 	/* setup common */
2487 	raidp->common.namep = raidnp;
2488 	raidp->common.type = MD_METARAID;
2489 	raidp->state = RUS_INIT;
2490 
2491 	/* allocate and parse cols */
2492 	for (ncol = 0; ((ncol < argc) && (argv[ncol][0] != '-')); ++ncol)
2493 		;
2494 	raidp->cols.cols_len = ncol;
2495 	if (ncol != 0) {
2496 		raidp->cols.cols_val =
2497 		    Zalloc(ncol * sizeof (*raidp->cols.cols_val));
2498 	}
2499 	for (col = 0; ((argc > 0) && (col < ncol)); ++col) {
2500 		md_raidcol_t	*mdc = &raidp->cols.cols_val[col];
2501 		mdname_t	*colnp;
2502 
2503 		/* parse column name */
2504 		if ((colnp = metaname(spp, argv[0], UNKNOWN, ep)) == NULL)
2505 			goto out;
2506 		/* check for soft partitions */
2507 		if (meta_sp_issp(*spp, colnp, ep) != 0) {
2508 			/* check disks */
2509 			if (metachkcomp(colnp, ep) != 0)
2510 				goto out;
2511 		}
2512 		mdc->colnamep = colnp;
2513 		--argc, ++argv;
2514 	}
2515 
2516 	/* parse raid options */
2517 	old_optind = optind = 0;
2518 	opterr = 0;
2519 	while ((c = getopt(argc, argv, "h:i:ko:w:")) != -1) {
2520 		switch (c) {
2521 		case 'h':
2522 			if ((raidp->hspnamep = metahspname(spp, optarg,
2523 			    ep)) == NULL) {
2524 				goto out;
2525 			}
2526 
2527 			/*
2528 			 * Get out if the specified hotspare pool really
2529 			 * doesn't exist.
2530 			 */
2531 			if (raidp->hspnamep->hsp == MD_HSP_NONE) {
2532 				(void) mdhsperror(ep, MDE_INVAL_HSP,
2533 				    raidp->hspnamep->hsp, optarg);
2534 				goto out;
2535 			}
2536 			break;
2537 
2538 		case 'i':
2539 			if (parse_interlace(uname, optarg, &raidp->interlace,
2540 			    ep) != 0) {
2541 				goto out;
2542 			}
2543 			if (meta_raid_check_interlace(raidp->interlace,
2544 			    uname, ep))
2545 				goto out;
2546 			break;
2547 
2548 		case 'k':
2549 			raidp->state = RUS_OKAY;
2550 			break;
2551 
2552 		case 'o':
2553 			if ((sscanf(optarg, "%u", &raidp->orig_ncol) != 1) ||
2554 			    ((int)raidp->orig_ncol < 0)) {
2555 				goto syntax;
2556 			}
2557 			if ((raidp->orig_ncol < MD_RAID_MIN) ||
2558 			    (raidp->orig_ncol > ncol)) {
2559 				rval = mderror(ep, MDE_BAD_ORIG_NCOL, uname);
2560 				goto out;
2561 			}
2562 			break;
2563 		case 'w':
2564 			if ((sscanf(optarg, "%d", &raidp->pw_count) != 1) ||
2565 			    ((int)raidp->pw_count < 0))
2566 				goto syntax;
2567 			if (((int)raidp->pw_count < PWCNT_MIN) ||
2568 			    ((int)raidp->pw_count > PWCNT_MAX)) {
2569 				rval = mderror(ep, MDE_RAID_BAD_PW_CNT, uname);
2570 				goto out;
2571 			}
2572 			break;
2573 		default:
2574 			argc += old_optind;
2575 			argv -= old_optind;
2576 			goto options;
2577 		}
2578 		old_optind = optind;
2579 	}
2580 	argc -= optind;
2581 	argv += optind;
2582 
2583 	/* we should be at the end */
2584 	if (argc != 0)
2585 		goto syntax;
2586 
2587 	/* default to all original columns */
2588 	if (raidp->orig_ncol == 0)
2589 		raidp->orig_ncol = ncol;
2590 
2591 	/* create raid */
2592 	if (meta_create_raid(*spp, raidp, options, ep) != 0)
2593 		goto out;
2594 	rval = 0;	/* success */
2595 
2596 	/* let em know */
2597 	if (options & MDCMD_PRINT) {
2598 		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is setup\n"),
2599 		    uname);
2600 		(void) fflush(stdout);
2601 	}
2602 	goto out;
2603 
2604 	/* syntax error */
2605 syntax:
2606 	rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
2607 	goto out;
2608 
2609 	/* options error */
2610 options:
2611 	rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
2612 	goto out;
2613 
2614 	/* cleanup, return error */
2615 out:
2616 	if (raidp != NULL)
2617 		meta_free_raid(raidp);
2618 	return (rval);
2619 }
2620 
2621 /*
2622  * reset RAIDs
2623  */
2624 int
2625 meta_raid_reset(
2626 	mdsetname_t	*sp,
2627 	mdname_t	*raidnp,
2628 	mdcmdopts_t	options,
2629 	md_error_t	*ep
2630 )
2631 {
2632 	md_raid_t	*raidp;
2633 	int		rval = -1;
2634 	int		col;
2635 
2636 	/* should have same set */
2637 	assert(sp != NULL);
2638 	assert((raidnp == NULL) ||
2639 	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
2640 
2641 	/* reset all raids */
2642 	if (raidnp == NULL) {
2643 		mdnamelist_t	*raidnlp = NULL;
2644 		mdnamelist_t	*p;
2645 
2646 		/* for each raid */
2647 		rval = 0;
2648 		if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
2649 			return (-1);
2650 		for (p = raidnlp; (p != NULL); p = p->next) {
2651 			/* reset RAID */
2652 			raidnp = p->namep;
2653 			if (meta_raid_reset(sp, raidnp, options, ep) != 0) {
2654 				rval = -1;
2655 				break;
2656 			}
2657 		}
2658 
2659 		/* cleanup, return success */
2660 		metafreenamelist(raidnlp);
2661 		return (rval);
2662 	}
2663 
2664 	/* check name */
2665 	if (metachkmeta(raidnp, ep) != 0)
2666 		return (-1);
2667 
2668 	/* get unit structure */
2669 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
2670 		return (-1);
2671 
2672 	/* make sure nobody owns us */
2673 	if (MD_HAS_PARENT(raidp->common.parent)) {
2674 		return (mdmderror(ep, MDE_IN_USE, meta_getminor(raidnp->dev),
2675 		    raidnp->cname));
2676 	}
2677 
2678 	/* clear subdevices cache */
2679 	if (invalidate_columns(sp, raidnp, ep) != 0)
2680 		return (-1);
2681 
2682 	/* clear metadevice */
2683 	if (meta_reset(sp, raidnp, options, ep) != 0)
2684 		goto out;
2685 	rval = 0;	/* success */
2686 
2687 	/* let em know */
2688 	if (options & MDCMD_PRINT) {
2689 		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is cleared\n"),
2690 		    raidnp->cname);
2691 		(void) fflush(stdout);
2692 	}
2693 
2694 	/* clear subdevices */
2695 	if (! (options & MDCMD_RECURSE))
2696 		goto out;
2697 
2698 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
2699 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2700 		mdname_t	*colnp = cp->colnamep;
2701 
2702 		/* only recurse on metadevices */
2703 		if (! metaismeta(colnp))
2704 			continue;
2705 
2706 		if (meta_reset_by_name(sp, colnp, options, ep) != 0)
2707 			rval = -1;
2708 	}
2709 
2710 	/* cleanup, return success */
2711 out:
2712 	meta_invalidate_name(raidnp);
2713 	return (rval);
2714 }
2715 
2716 /*
2717  * reports TRUE if any RAID component is in error
2718  */
2719 int
2720 meta_raid_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *raid_names)
2721 {
2722 	mdnamelist_t	*nlp;
2723 	md_error_t	  status	= mdnullerror;
2724 	md_error_t	 *ep		= &status;
2725 	int		  any_errs	= FALSE;
2726 
2727 	for (nlp = raid_names; nlp; nlp = nlp->next) {
2728 		md_raid_t	*raidp;
2729 
2730 		if ((raidp = meta_get_raid(sp, nlp->namep, ep)) == NULL) {
2731 			any_errs |= TRUE;
2732 			goto out;
2733 		}
2734 		if (raidp->state != RUS_OKAY && raidp->state != RUS_INIT) {
2735 			any_errs |= TRUE;
2736 			goto out;
2737 		}
2738 	}
2739 out:
2740 	if (!mdisok(ep))
2741 		mdclrerror(ep);
2742 
2743 	return (any_errs);
2744 }
2745 /*
2746  * regen parity on a raid
2747  */
2748 int
2749 meta_raid_regen_byname(mdsetname_t *sp, mdname_t *raidnp, diskaddr_t size,
2750 	md_error_t *ep)
2751 {
2752 	char			*miscname;
2753 	md_resync_ioctl_t	ri;
2754 
2755 	/* should have a set */
2756 	assert(sp != NULL);
2757 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
2758 
2759 	/* make sure we have a raid */
2760 	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
2761 		return (-1);
2762 	if (strcmp(miscname, MD_RAID) != 0) {
2763 		return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
2764 		    raidnp->cname));
2765 	}
2766 
2767 	/* start resync */
2768 	(void) memset(&ri, 0, sizeof (ri));
2769 	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
2770 	ri.ri_mnum = meta_getminor(raidnp->dev);
2771 	ri.ri_copysize = size;
2772 	if (metaioctl(MD_IOCSETREGEN, &ri, &ri.mde, raidnp->cname) != 0)
2773 		return (mdstealerror(ep, &ri.mde));
2774 
2775 	/* return success */
2776 	return (0);
2777 }
2778 
2779 int
2780 meta_raid_check_component(
2781 	mdsetname_t	*sp,
2782 	mdname_t	*np,
2783 	md_dev64_t	mydevs,
2784 	md_error_t	*ep
2785 )
2786 {
2787 	md_raid_t	 *raid;
2788 	mdnm_params_t	nm;
2789 	md_getdevs_params_t	mgd;
2790 	side_t	sideno;
2791 	char	*miscname;
2792 	md_dev64_t	*mydev = NULL;
2793 	mdkey_t	key;
2794 	char	*pname = NULL, *t;
2795 	char	*ctd_name = NULL;
2796 	char	*devname = NULL;
2797 	int	len;
2798 	int	i;
2799 	int	rval = -1;
2800 
2801 	(void) memset(&nm, '\0', sizeof (nm));
2802 	if ((raid = meta_get_raid_common(sp, np, 0, ep)) == NULL)
2803 		return (-1);
2804 
2805 	if ((miscname = metagetmiscname(np, ep)) == NULL)
2806 		return (-1);
2807 
2808 	sideno = getmyside(sp, ep);
2809 
2810 	/* get count of underlying devices */
2811 
2812 	(void) memset(&mgd, '\0', sizeof (mgd));
2813 	MD_SETDRIVERNAME(&mgd, miscname, sp->setno);
2814 	mgd.mnum = meta_getminor(np->dev);
2815 	mgd.cnt = 0;
2816 	mgd.devs = NULL;
2817 	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) {
2818 		(void) mdstealerror(ep, &mgd.mde);
2819 		rval = 0;
2820 		goto out;
2821 	} else if (mgd.cnt <= 0) {
2822 		assert(mgd.cnt >= 0);
2823 		rval = 0;
2824 		goto out;
2825 	}
2826 
2827 	/*
2828 	 * Now get the data from the unit structure.
2829 	 * The compnamep stuff contains the data from
2830 	 * the namespace and we need the un_dev
2831 	 * from the unit structure.
2832 	 */
2833 	mydev = Zalloc(sizeof (*mydev) * mgd.cnt);
2834 	mgd.devs = (uintptr_t)mydev;
2835 	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) {
2836 		(void) mdstealerror(ep, &mgd.mde);
2837 		rval = 0;
2838 		goto out;
2839 	} else if (mgd.cnt <= 0) {
2840 		assert(mgd.cnt >= 0);
2841 		rval = 0;
2842 		goto out;
2843 	}
2844 
2845 	for (i = 0; i < raid->orig_ncol; i++) {
2846 		md_raidcol_t	*colp = &raid->cols.cols_val[i];
2847 		mdname_t	*compnp = colp->colnamep;
2848 
2849 		if (mydevs == mydev[i]) {
2850 			/* Get the devname from the name space. */
2851 			if ((devname = meta_getnmentbydev(sp->setno, sideno,
2852 			    compnp->dev, NULL, NULL, &key, ep)) == NULL) {
2853 				goto out;
2854 			}
2855 
2856 			if (compnp->dev != meta_getminor(mydev[i])) {
2857 				/*
2858 				 * The minor numbers are different. Update
2859 				 * the namespace with the information from
2860 				 * the component.
2861 				 */
2862 
2863 				t = strrchr(devname, '/');
2864 				t++;
2865 				ctd_name = Strdup(t);
2866 
2867 				len = strlen(devname);
2868 				t = strrchr(devname, '/');
2869 				t++;
2870 				pname = Zalloc((len - strlen(t)) + 1);
2871 				(void) strncpy(pname, devname,
2872 				    (len - strlen(t)));
2873 
2874 				if (meta_update_namespace(sp->setno, sideno,
2875 				    ctd_name, mydev[i], key, pname,
2876 				    ep) != 0) {
2877 					goto out;
2878 				}
2879 			}
2880 			rval = 0;
2881 			break;
2882 		} /* End of if (mydevs == mydev[i]) */
2883 	} /* end of for loop */
2884 out:
2885 	if (pname != NULL)
2886 		Free(pname);
2887 	if (ctd_name != NULL)
2888 		Free(ctd_name);
2889 	if (devname != NULL)
2890 		Free(devname);
2891 	if (mydev != NULL)
2892 		Free(mydev);
2893 	return (rval);
2894 }
2895