xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_raid.c (revision 32529ec11ac8e93a62985721612a18ee6bb8659a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 /*
29  * Just in case we're not in a build environment, make sure that
30  * TEXT_DOMAIN gets set to something.
31  */
32 #if !defined(TEXT_DOMAIN)
33 #define	TEXT_DOMAIN "SYS_TEST"
34 #endif
35 
36 /*
37  * RAID operations
38  */
39 
40 #include <stdlib.h>
41 #include <meta.h>
42 #include <sys/lvm/md_raid.h>
43 #include <sys/lvm/mdvar.h>
44 #include <sys/lvm/md_convert.h>
45 #include <stddef.h>
46 
47 /*
48  * FUNCTION:    meta_get_raid_names()
49  * INPUT:       sp      - the set name to get raid from
50  *              options - options from the command line
51  * OUTPUT:      nlpp    - list of all raid names
52  *              ep      - return error pointer
53  * RETURNS:     int     - -1 if error, 0 success
54  * PURPOSE:     returns a list of all raid in the metadb
55  *              for all devices in the specified set
56  */
57 int
58 meta_get_raid_names(
59 	mdsetname_t	*sp,
60 	mdnamelist_t	**nlpp,
61 	int		options,
62 	md_error_t	*ep
63 )
64 {
65 	return (meta_get_names(MD_RAID, sp, nlpp, options, ep));
66 }
67 
68 /*
69  * free raid unit
70  */
71 void
72 meta_free_raid(
73 	md_raid_t	*raidp
74 )
75 {
76 	if (raidp->cols.cols_val != NULL) {
77 		assert(raidp->cols.cols_len > 0);
78 		Free(raidp->cols.cols_val);
79 	}
80 	Free(raidp);
81 }
82 
83 /*
84  * get raid (common)
85  */
86 md_raid_t *
87 meta_get_raid_common(
88 	mdsetname_t		*sp,
89 	mdname_t		*raidnp,
90 	int			fast,
91 	md_error_t		*ep
92 )
93 {
94 	mddrivename_t		*dnp = raidnp->drivenamep;
95 	char			*miscname;
96 	mr_unit_t		*mr;
97 	md_raid_t		*raidp;
98 	uint_t			ncol;
99 	uint_t			col;
100 	md_resync_ioctl_t	ri;
101 
102 	/* must have set */
103 	assert(sp != NULL);
104 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
105 
106 	/* short circuit */
107 	if (dnp->unitp != NULL) {
108 		assert(dnp->unitp->type == MD_METARAID);
109 		return ((md_raid_t *)dnp->unitp);
110 	}
111 
112 	/* get miscname and unit */
113 	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
114 		return (NULL);
115 	if (strcmp(miscname, MD_RAID) != 0) {
116 		(void) mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
117 		    raidnp->cname);
118 		return (NULL);
119 	}
120 	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
121 		return (NULL);
122 	assert(mr->c.un_type == MD_METARAID);
123 
124 	/* allocate raid */
125 	raidp = Zalloc(sizeof (*raidp));
126 
127 	/* allocate columns */
128 	ncol = mr->un_totalcolumncnt;
129 	assert(ncol >= MD_RAID_MIN);
130 	raidp->cols.cols_len = ncol;
131 	raidp->cols.cols_val = Zalloc(raidp->cols.cols_len *
132 	    sizeof (*raidp->cols.cols_val));
133 
134 	/* get common info */
135 	raidp->common.namep = raidnp;
136 	raidp->common.type = mr->c.un_type;
137 	raidp->common.state = mr->c.un_status;
138 	raidp->common.capabilities = mr->c.un_capabilities;
139 	raidp->common.parent = mr->c.un_parent;
140 	raidp->common.size = mr->c.un_total_blocks;
141 	raidp->common.user_flags = mr->c.un_user_flags;
142 	raidp->common.revision = mr->c.un_revision;
143 
144 	/* get options */
145 	raidp->state = mr->un_state;
146 	raidp->timestamp = mr->un_timestamp;
147 	raidp->interlace = mr->un_segsize;
148 	raidp->orig_ncol = mr->un_origcolumncnt;
149 	raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
150 	raidp->pw_count = mr->un_pwcnt;
151 	assert(raidp->orig_ncol <= ncol);
152 	if ((mr->un_hsp_id != MD_HSP_NONE) &&
153 	    ((raidp->hspnamep = metahsphspname(&sp, mr->un_hsp_id,
154 	    ep)) == NULL)) {
155 		goto out;
156 	}
157 
158 	/* get columns, update unit state */
159 	for (col = 0; (col < ncol); ++col) {
160 		mr_column_t	*rcp = &mr->un_column[col];
161 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
162 
163 		/* get column name */
164 		mdrcp->colnamep = metakeyname(&sp, rcp->un_orig_key, fast, ep);
165 		if (mdrcp->colnamep == NULL)
166 			goto out;
167 
168 		/* override any start_blk */
169 #ifdef	DEBUG
170 		if (metagetstart(sp, mdrcp->colnamep, ep) !=
171 		    MD_DISKADDR_ERROR) {
172 			assert(mdrcp->colnamep->start_blk <=
173 			    rcp->un_orig_devstart);
174 		} else {
175 			mdclrerror(ep);
176 		}
177 #endif	/* DEBUG */
178 		mdrcp->colnamep->start_blk = rcp->un_orig_devstart;
179 
180 		/* if hotspared */
181 		if (HOTSPARED(mr, col)) {
182 			/* get hotspare name */
183 			mdrcp->hsnamep = metakeyname(&sp, rcp->un_hs_key,
184 			    fast, ep);
185 			if (mdrcp->hsnamep == NULL)
186 				goto out;
187 
188 			if (getenv("META_DEBUG_START_BLK") != NULL) {
189 				if (metagetstart(sp, mdrcp->hsnamep, ep) ==
190 				    MD_DISKADDR_ERROR)
191 					mdclrerror(ep);
192 
193 				if ((mdrcp->hsnamep->start_blk == 0) &&
194 				    (rcp->un_hs_pwstart != 0))
195 					md_eprintf(dgettext(TEXT_DOMAIN,
196 					    "%s: suspected bad start block,"
197 					    " seems labelled [raid]\n"),
198 					    mdrcp->hsnamep->cname);
199 
200 				if ((mdrcp->hsnamep->start_blk > 0) &&
201 				    (rcp->un_hs_pwstart == 0))
202 					md_eprintf(dgettext(TEXT_DOMAIN,
203 					    "%s: suspected bad start block, "
204 					    " seems unlabelled [raid]\n"),
205 					    mdrcp->hsnamep->cname);
206 			}
207 
208 			/* override any start_blk */
209 			mdrcp->hsnamep->start_blk = rcp->un_hs_devstart;
210 		}
211 
212 		/* get state, flags, and timestamp */
213 		mdrcp->state = rcp->un_devstate;
214 		mdrcp->flags = rcp->un_devflags;
215 		mdrcp->timestamp = rcp->un_devtimestamp;
216 	}
217 
218 	/* get resync info */
219 	(void) memset(&ri, 0, sizeof (ri));
220 	ri.ri_mnum = meta_getminor(raidnp->dev);
221 	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
222 	if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, raidnp->cname) != 0) {
223 		(void) mdstealerror(ep, &ri.mde);
224 		goto out;
225 	}
226 	raidp->resync_flags = ri.ri_flags;
227 	raidp->percent_dirty = ri.ri_percent_dirty;
228 	raidp->percent_done = ri.ri_percent_done;
229 
230 	/* cleanup, return success */
231 	Free(mr);
232 	dnp->unitp = (md_common_t *)raidp;
233 	return (raidp);
234 
235 	/* cleanup, return error */
236 out:
237 	Free(mr);
238 	meta_free_raid(raidp);
239 	return (NULL);
240 }
241 
242 /*
243  * get raid
244  */
245 md_raid_t *
246 meta_get_raid(
247 	mdsetname_t		*sp,
248 	mdname_t		*raidnp,
249 	md_error_t		*ep
250 )
251 {
252 	return (meta_get_raid_common(sp, raidnp, 0, ep));
253 }
254 
255 /*
256  * check raid for dev
257  */
258 static int
259 in_raid(
260 	mdsetname_t	*sp,
261 	mdname_t	*raidnp,
262 	mdname_t	*np,
263 	diskaddr_t	slblk,
264 	diskaddr_t	nblks,
265 	md_error_t	*ep
266 )
267 {
268 	md_raid_t	*raidp;
269 	uint_t		col;
270 
271 	/* should be in the same set */
272 	assert(sp != NULL);
273 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
274 
275 	/* get unit */
276 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
277 		return (-1);
278 
279 	/* look in columns */
280 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
281 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
282 		mdname_t	*colnp = cp->colnamep;
283 		diskaddr_t	col_sblk;
284 		int		err;
285 
286 		/* check same drive since metagetstart() can fail */
287 		if ((err = meta_check_samedrive(np, colnp, ep)) < 0)
288 			return (-1);
289 		else if (err == 0)
290 			continue;
291 
292 		/* check overlap */
293 		if ((col_sblk = metagetstart(sp, colnp, ep)) ==
294 		    MD_DISKADDR_ERROR)
295 			return (-1);
296 		if (meta_check_overlap(raidnp->cname, np, slblk, nblks,
297 		    colnp, col_sblk, -1, ep) != 0) {
298 			return (-1);
299 		}
300 	}
301 
302 	/* return success */
303 	return (0);
304 }
305 
306 /*
307  * check to see if we're in a raid
308  */
309 int
310 meta_check_inraid(
311 	mdsetname_t	*sp,
312 	mdname_t	*np,
313 	diskaddr_t	slblk,
314 	diskaddr_t	nblks,
315 	md_error_t	*ep
316 )
317 {
318 	mdnamelist_t	*raidnlp = NULL;
319 	mdnamelist_t	*p;
320 	int		rval = 0;
321 
322 	/* should have a set */
323 	assert(sp != NULL);
324 
325 	/* for each raid */
326 	if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
327 		return (-1);
328 	for (p = raidnlp; (p != NULL); p = p->next) {
329 		mdname_t	*raidnp = p->namep;
330 
331 		/* check raid */
332 		if (in_raid(sp, raidnp, np, slblk, nblks, ep) != 0) {
333 			rval = -1;
334 			break;
335 		}
336 	}
337 
338 	/* cleanup, return success */
339 	metafreenamelist(raidnlp);
340 	return (rval);
341 }
342 
343 /*
344  * check column
345  */
346 int
347 meta_check_column(
348 	mdsetname_t	*sp,
349 	mdname_t	*np,
350 	md_error_t	*ep
351 )
352 {
353 	mdchkopts_t	options = (MDCHK_ALLOW_MDDB);
354 
355 	/* check for soft partitions */
356 	if (meta_sp_issp(sp, np, ep) != 0) {
357 		/* make sure we have a disk */
358 		if (metachkcomp(np, ep) != 0)
359 			return (-1);
360 	}
361 
362 	/* check to ensure that it is not already in use */
363 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
364 		return (-1);
365 	}
366 
367 	/* make sure it is in the set */
368 	if (meta_check_inset(sp, np, ep) != 0)
369 		return (-1);
370 
371 	/* make sure its not in a metadevice */
372 	if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
373 		return (-1);
374 
375 	/* return success */
376 	return (0);
377 }
378 
379 /*
380  * print raid
381  */
382 static int
383 raid_print(
384 	md_raid_t	*raidp,
385 	char		*fname,
386 	FILE		*fp,
387 	mdprtopts_t	options,
388 	md_error_t	*ep
389 )
390 {
391 	uint_t		col;
392 	int		rval = -1;
393 
394 
395 	if (options & PRINT_LARGEDEVICES) {
396 		if (raidp->common.revision != MD_64BIT_META_DEV) {
397 			rval = 0;
398 			goto out;
399 		}
400 	}
401 
402 	/* print name and -r */
403 	if (fprintf(fp, "%s -r", raidp->common.namep->cname) == EOF)
404 		goto out;
405 
406 	/* print columns */
407 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
408 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
409 
410 		/* print column */
411 		/*
412 		 * If the path is our standard /dev/rdsk or /dev/md/rdsk
413 		 * then just print out the cxtxdxsx or the dx, metainit
414 		 * will assume the default, otherwise we need the full
415 		 * pathname to make sure this works as we intend.
416 		 */
417 		if ((strstr(mdrcp->colnamep->rname, "/dev/rdsk") == NULL) &&
418 		    (strstr(mdrcp->colnamep->rname, "/dev/md/rdsk") == NULL) &&
419 		    (strstr(mdrcp->colnamep->rname, "/dev/td/") == NULL)) {
420 			/* not standard path, print full pathname */
421 			if (fprintf(fp, " %s", mdrcp->colnamep->rname) == EOF)
422 				goto out;
423 		} else {
424 			/* standard path so print ctd or d number */
425 			if (fprintf(fp, " %s", mdrcp->colnamep->cname) == EOF)
426 				goto out;
427 		}
428 	}
429 
430 	if (fprintf(fp, " -k") == EOF)
431 		goto out;
432 
433 	/* print options */
434 	if (fprintf(fp, " -i %lldb", raidp->interlace) == EOF)
435 		goto out;
436 
437 	if (raidp->pw_count != PWCNT_MIN)
438 		if (fprintf(fp, " -w %d", raidp->pw_count) == EOF)
439 			goto out;
440 
441 	if (raidp->hspnamep != NULL) {
442 		if (fprintf(fp, " -h %s", raidp->hspnamep->hspname) == EOF)
443 			goto out;
444 	}
445 	if (raidp->orig_ncol != raidp->cols.cols_len) {
446 		assert(raidp->orig_ncol < raidp->cols.cols_len);
447 		if (fprintf(fp, " -o %u", raidp->orig_ncol) == EOF)
448 			goto out;
449 	}
450 
451 	/* terminate last line */
452 	if (fprintf(fp, "\n") == EOF)
453 		goto out;
454 
455 	/* success */
456 	rval = 0;
457 
458 	/* cleanup, return error */
459 out:
460 	if (rval != 0)
461 		(void) mdsyserror(ep, errno, fname);
462 	return (rval);
463 }
464 
465 static int
466 find_resyncing_column(
467 	md_raid_t *raidp
468 )
469 {
470 	int		col;
471 
472 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
473 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
474 		if (cp->state & RCS_RESYNC)
475 			return (col);
476 	}
477 
478 	/* No resyncing columns */
479 	return (-1);
480 }
481 
482 /*
483  * convert raid state to name
484  */
485 char *
486 raid_state_to_name(
487 	md_raid_t	*raidp,
488 	md_timeval32_t	*tvp,
489 	uint_t		tstate /* Errored tstate flags */
490 )
491 {
492 
493 	/* grab time */
494 	if (tvp != NULL)
495 		*tvp = raidp->timestamp;
496 
497 	/*
498 	 * If the device has a transient error state (due to it being DR'ed or
499 	 * failed) and there has been no I/O to it (the actual device is still
500 	 * marked as 'Okay') then we cannot know what the state is or what
501 	 * action to take on it. Therefore report the device as 'Unavailable'.
502 	 * A subsequent I/O to the device will cause the 'Okay' status to
503 	 * disappear if the device is actually gone and then we will print out
504 	 * the appropriate status.  The MD_INACCESSIBLE state is only set
505 	 * on the raid when we open it or probe it.  One the raid is open
506 	 * then we will just have regular error status on the device.
507 	 */
508 	if (tstate & MD_INACCESSIBLE) {
509 		return (dgettext(TEXT_DOMAIN, "Unavailable"));
510 	}
511 
512 	/* resyncing */
513 	if (find_resyncing_column(raidp) >= 0)
514 		return (dgettext(TEXT_DOMAIN, "Resyncing"));
515 
516 	/* everything else */
517 	switch (raidp->state) {
518 		case RUS_INIT :
519 			return (dgettext(TEXT_DOMAIN, "Initializing"));
520 		case RUS_OKAY :
521 			return (dgettext(TEXT_DOMAIN, "Okay"));
522 		case RUS_ERRED :
523 		/*FALLTHROUGH*/
524 		case RUS_LAST_ERRED :
525 			return (dgettext(TEXT_DOMAIN, "Needs Maintenance"));
526 		case RUS_DOI :
527 			return (dgettext(TEXT_DOMAIN, "Initialization Failed"));
528 		case RUS_REGEN :
529 			return (dgettext(TEXT_DOMAIN, "Regen"));
530 		default :
531 			return (dgettext(TEXT_DOMAIN, "invalid"));
532 	} /* switch */
533 }
534 
535 static int
536 find_erred_column(md_raid_t *raidp, rcs_state_t state)
537 {
538 	int		col;
539 
540 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
541 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
542 		if (cp->state & state)
543 			return (col);
544 	}
545 
546 	/* No erred columns */
547 	return (-1);
548 }
549 
550 /*
551  * convert raid state to repair action
552  */
553 char *
554 raid_state_to_action(md_raid_t *raidp)
555 {
556 	static char	emsg[1024];
557 	mdname_t	*raidnp = raidp->common.namep;
558 	int		err_col;
559 
560 	/* first check for full init failure */
561 	if (raidp->state & RUS_DOI) {
562 		(void) snprintf(emsg, sizeof (emsg),
563 		    "metaclear -f %s", raidnp->cname);
564 		return (emsg);
565 	}
566 
567 	/* replace errored or init errored raid column */
568 	if ((err_col = find_erred_column(raidp,
569 	    (RCS_ERRED | RCS_INIT_ERRED))) >= 0) {
570 		mdname_t	*colnp;
571 
572 		/* get column with error */
573 		assert(err_col < raidp->cols.cols_len);
574 		colnp = raidp->cols.cols_val[err_col].colnamep;
575 		(void) snprintf(emsg, sizeof (emsg),
576 		    "metareplace %s%s %s <%s>",
577 		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
578 		    raidnp->cname, colnp->cname,
579 		    dgettext(TEXT_DOMAIN, "new device"));
580 		return (emsg);
581 	}
582 
583 
584 	/* replace last errored raid column */
585 	if ((err_col = find_erred_column(raidp, RCS_LAST_ERRED)) >= 0) {
586 		mdname_t	*colnp;
587 
588 		assert(err_col < raidp->cols.cols_len);
589 		colnp = raidp->cols.cols_val[err_col].colnamep;
590 		(void) snprintf(emsg, sizeof (emsg),
591 		    "metareplace %s %s %s <%s>",
592 		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
593 		    raidnp->cname, colnp->cname,
594 		    dgettext(TEXT_DOMAIN, "new device"));
595 		return (emsg);
596 	}
597 
598 	/* OK */
599 	return (NULL);
600 }
601 
602 /*
603  * get printable raid column state
604  */
605 char *
606 raid_col_state_to_name(
607 	md_raidcol_t	*colp,
608 	md_timeval32_t	*tvp,
609 	uint_t		tstate
610 )
611 {
612 	/* grab time */
613 	if (tvp != NULL)
614 		*tvp = colp->timestamp;
615 
616 	if (tstate != 0) {
617 		return (dgettext(TEXT_DOMAIN, "Unavailable"));
618 	}
619 
620 	/* everything else */
621 	switch (colp->state) {
622 	case RCS_INIT:
623 		return (dgettext(TEXT_DOMAIN, "Initializing"));
624 
625 	case RCS_OKAY:
626 		return (dgettext(TEXT_DOMAIN, "Okay"));
627 
628 	case RCS_INIT_ERRED:
629 	/*FALLTHROUGH*/
630 	case RCS_ERRED:
631 		return (dgettext(TEXT_DOMAIN, "Maintenance"));
632 
633 	case RCS_LAST_ERRED:
634 		return (dgettext(TEXT_DOMAIN, "Last Erred"));
635 
636 	case RCS_RESYNC:
637 		return (dgettext(TEXT_DOMAIN, "Resyncing"));
638 
639 	default:
640 		return (dgettext(TEXT_DOMAIN, "Unknown"));
641 	}
642 }
643 
644 /*
645  * print raid column
646  */
647 static int
648 display_raid_device_info(
649 	mdsetname_t	*sp,
650 	md_raidcol_t	*colp,
651 	char		*fname,
652 	FILE		*fp,
653 	mdprtopts_t	options,
654 	int		print_len,
655 	uint_t		top_tstate, /* Errored tstate flags */
656 	md_error_t	*ep
657 )
658 {
659 	mdname_t	*namep = ((colp->hsnamep != NULL) ?
660 				    colp->hsnamep : colp->colnamep);
661 	char 		*devid = "";
662 	char		*cname = colp->colnamep->cname;
663 	diskaddr_t	start_blk;
664 	int		has_mddb;
665 	char		*has_mddb_str;
666 	char		*col_state;
667 	md_timeval32_t	tv;
668 	char		*hsname = ((colp->hsnamep != NULL) ?
669 			    colp->hsnamep->cname : "");
670 	int		rval = -1;
671 	mdname_t	*didnp = NULL;
672 	ddi_devid_t	dtp;
673 	uint_t		tstate = 0;
674 
675 	/* get info */
676 	if ((start_blk = metagetstart(sp, namep, ep)) == MD_DISKADDR_ERROR)
677 		return (-1);
678 	if ((has_mddb = metahasmddb(sp, namep, ep)) < 0)
679 		return (-1);
680 	if (has_mddb)
681 		has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
682 	else
683 		has_mddb_str = dgettext(TEXT_DOMAIN, "No");
684 
685 	if (metaismeta(namep)) {
686 		if (meta_get_tstate(namep->dev, &tstate, ep) != 0)
687 			return (-1);
688 		col_state = raid_col_state_to_name(colp, &tv,
689 		    tstate & MD_DEV_ERRORED);
690 	} else {
691 		/*
692 		 * if top_tstate is set, that implies that you have
693 		 * a ctd type device with an unavailable metadevice
694 		 * on top of it. If so, print a - for it's state
695 		 */
696 		if (top_tstate != 0)
697 			col_state = "-";
698 		else
699 			col_state = raid_col_state_to_name(colp, &tv, tstate);
700 	}
701 
702 	/* populate the key in the name_p structure */
703 	if ((didnp = metadevname(&sp, namep->dev, ep)) == NULL)
704 		return (-1);
705 
706 	/* determine if devid does NOT exist */
707 	if (options & PRINT_DEVID) {
708 		if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
709 			didnp->key, ep)) == NULL)
710 			devid = dgettext(TEXT_DOMAIN, "No ");
711 		else {
712 			devid = dgettext(TEXT_DOMAIN, "Yes");
713 			free(dtp);
714 		}
715 	}
716 	/* print column */
717 	/*
718 	 * Building a format string on the fly that will
719 	 * be used in (f)printf. This allows the length
720 	 * of the ctd to vary from small to large without
721 	 * looking horrible.
722 	 */
723 	if (! (options & PRINT_TIMES)) {
724 		if (fprintf(fp,
725 		    "\t%-*.*s %8lld     %5.5s %12.12s %5.5s %s\n",
726 		    print_len, print_len, cname, start_blk, has_mddb_str,
727 		    col_state, devid, hsname) == EOF) {
728 			goto out;
729 		}
730 	} else {
731 		char	*timep = meta_print_time(&tv);
732 
733 		if (fprintf(fp,
734 		    "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n",
735 		    print_len, cname, start_blk, has_mddb_str,
736 		    col_state, devid, hsname, timep) == EOF) {
737 			goto out;
738 		}
739 	}
740 
741 	/* success */
742 	rval = 0;
743 
744 	/* cleanup, return error */
745 out:
746 	if (rval != 0)
747 		(void) mdsyserror(ep, errno, fname);
748 
749 	return (rval);
750 }
751 
752 /*
753  * print raid options
754  */
755 int
756 meta_print_raid_options(
757 	mdhspname_t	*hspnamep,
758 	char		*fname,
759 	FILE		*fp,
760 	md_error_t	*ep
761 )
762 {
763 	char		*hspname = ((hspnamep != NULL) ? hspnamep->hspname :
764 					dgettext(TEXT_DOMAIN, "none"));
765 	int		rval = -1;
766 
767 	/* print options */
768 	if (fprintf(fp, dgettext(TEXT_DOMAIN,
769 	    "    Hot spare pool: %s\n"), hspname) == EOF) {
770 		goto out;
771 	}
772 
773 	/* success */
774 	rval = 0;
775 
776 	/* cleanup, return error */
777 out:
778 	if (rval != 0)
779 		(void) mdsyserror(ep, errno, fname);
780 	return (rval);
781 }
782 
783 /*
784  * report raid
785  */
786 static int
787 raid_report(
788 	mdsetname_t	*sp,
789 	md_raid_t	*raidp,
790 	char		*fname,
791 	FILE		*fp,
792 	mdprtopts_t	options,
793 	md_error_t	*ep
794 )
795 {
796 	char		*p;
797 	uint_t		ncol = raidp->cols.cols_len;
798 	uint_t		orig_ncol = raidp->orig_ncol;
799 	diskaddr_t	column_size = raidp->column_size;
800 	char		*raid_state;
801 	md_timeval32_t	tv;
802 	char		*timep;
803 	uint_t		col;
804 	int		rval = -1;
805 	int		len = 0;
806 	uint_t		tstate = 0;
807 
808 	if (options & PRINT_LARGEDEVICES) {
809 		if (raidp->common.revision != MD_64BIT_META_DEV) {
810 			rval = 0;
811 			goto out;
812 		}
813 	}
814 
815 	/* print header */
816 	if (options & PRINT_HEADER) {
817 		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: RAID\n"),
818 		    raidp->common.namep->cname) == EOF) {
819 			goto out;
820 		}
821 
822 	}
823 
824 	/* print state */
825 	if (metaismeta(raidp->common.namep)) {
826 		if (meta_get_tstate(raidp->common.namep->dev, &tstate, ep) != 0)
827 			return (-1);
828 	}
829 	tstate &= MD_DEV_ERRORED; /* extract the errored tstate bits */
830 	raid_state = raid_state_to_name(raidp, &tv, tstate);
831 	if (options & PRINT_TIMES) {
832 		timep = meta_print_time(&tv);
833 	} else {
834 		timep = "";
835 	}
836 
837 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    State: %-12s %s\n"),
838 	    raid_state, timep) == EOF) {
839 		goto out;
840 	}
841 
842 	/*
843 	 * Display recovery action if we're marked in the Unavailable state.
844 	 */
845 	if ((tstate == 0) || (tstate & MD_INACCESSIBLE)) {
846 		/* print what to do */
847 		if (tstate & MD_INACCESSIBLE) {
848 			char sname[MD_MAX_SETNAME + 3]; /* 3 = sizeof("-s ") */
849 
850 			if (metaislocalset(sp)) {
851 				sname[0] = '\0';
852 			} else {
853 				(void) snprintf(sname, MD_MAX_SETNAME + 3,
854 				    "-s %s", sp->setname);
855 			}
856 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
857 			    "    Invoke: metastat -i %s\n"), sname) == EOF) {
858 				goto out;
859 			}
860 		} else if ((p = raid_state_to_action(raidp)) != NULL) {
861 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
862 			    "    Invoke: %s\n"), p) == EOF) {
863 				goto out;
864 			}
865 		}
866 
867 		/* resync status */
868 		if (raidp->resync_flags & MD_RI_INPROGRESS) {
869 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
870 			    "    Resync in progress: %2d.%1d%% done\n"),
871 			    raidp->percent_done/10,
872 			    raidp->percent_done % 10) == EOF) {
873 				goto out;
874 			}
875 		} else if (raidp->resync_flags & MD_GROW_INPROGRESS) {
876 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
877 			    "    Initialization in progress: %2d.%1d%% "
878 			    "done\n"),
879 			    raidp->percent_done/10,
880 			    raidp->percent_done % 10) == EOF) {
881 				goto out;
882 			}
883 		} else if (raidp->state & RUS_REGEN) {
884 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
885 			    "    Parity regeneration in progress: %2d.%1d%% "
886 			    "done\n"),
887 			    raidp->percent_done/10,
888 			    raidp->percent_done % 10) == EOF) {
889 				goto out;
890 			}
891 		}
892 	}
893 
894 	/* print hotspare pool */
895 	if (raidp->hspnamep != NULL) {
896 		if (meta_print_raid_options(raidp->hspnamep,
897 		    fname, fp, ep) != 0) {
898 			return (-1);
899 		}
900 	}
901 
902 	/* print interlace */
903 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Interlace: %lld blocks\n"),
904 	    raidp->interlace) == EOF) {
905 		goto out;
906 	}
907 
908 	/* print size */
909 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
910 	    raidp->common.size,
911 	    meta_number_to_string(raidp->common.size, DEV_BSIZE)) == EOF) {
912 		goto out;
913 	}
914 
915 	/* MD_DEBUG stuff */
916 	if (options & PRINT_DEBUG) {
917 		mdname_t	*raidnp = raidp->common.namep;
918 		mr_unit_t	*mr;
919 
920 		/* get additional info */
921 		if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
922 			return (-1);
923 		assert(mr->c.un_type == MD_METARAID);
924 
925 		/* print prewrite count and size */
926 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
927 		    "    Prewrite Count: %u slots\n"),
928 		    mr->un_pwcnt) == EOF) {
929 			Free(mr);
930 			goto out;
931 		}
932 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
933 		    "    Prewrite Slot Size: %u blocks\n"),
934 		    (mr->un_pwsize / mr->un_pwcnt)) == EOF) {
935 			Free(mr);
936 			goto out;
937 		}
938 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
939 		    "    Prewrite Total Size: %u blocks\n"),
940 		    mr->un_pwsize) == EOF) {
941 			Free(mr);
942 			goto out;
943 		}
944 		Free(mr);
945 	}
946 
947 	/* print original devices */
948 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "Original device:\n")) == EOF)
949 		goto out;
950 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
951 	    column_size * (orig_ncol - 1),
952 	    meta_number_to_string(column_size * (orig_ncol - 1), DEV_BSIZE))
953 	    == EOF) {
954 		goto out;
955 	}
956 	/*
957 	 * Building a format string on the fly that will
958 	 * be used in (f)printf. This allows the length
959 	 * of the ctd to vary from small to large without
960 	 * looking horrible.
961 	 */
962 	for (col = 0; (col < orig_ncol); ++col) {
963 		len = max(len,
964 		    strlen(raidp->cols.cols_val[col].colnamep->cname));
965 	}
966 
967 	len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
968 	len += 2;
969 
970 	if (! (options & PRINT_TIMES)) {
971 		if (fprintf(fp,
972 		    "\t%-*.*s %-12.12s %-5.5s %12.12s %-5.5s  %s\n",
973 		    len, len,
974 		    dgettext(TEXT_DOMAIN, "Device"),
975 		    dgettext(TEXT_DOMAIN, "Start Block"),
976 		    dgettext(TEXT_DOMAIN, "Dbase"),
977 		    dgettext(TEXT_DOMAIN, "State"),
978 		    dgettext(TEXT_DOMAIN, "Reloc"),
979 		    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
980 			goto out;
981 		}
982 	} else {
983 		if (fprintf(fp,
984 		    "\t%-*s  %5s  %-5s  %-11s  %-5s   %-9s  %s\n",
985 		    len,
986 		    dgettext(TEXT_DOMAIN, "Device"),
987 		    dgettext(TEXT_DOMAIN, "Start"),
988 		    dgettext(TEXT_DOMAIN, "Dbase"),
989 		    dgettext(TEXT_DOMAIN, "State"),
990 		    dgettext(TEXT_DOMAIN, "Reloc"),
991 		    dgettext(TEXT_DOMAIN, "Hot Spare"),
992 		    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
993 			goto out;
994 		}
995 	}
996 	for (col = 0; (col < orig_ncol); ++col) {
997 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
998 
999 		if (display_raid_device_info(sp, mdrcp, fname, fp, options,
1000 		    len, tstate, ep) != 0) {
1001 			return (-1);
1002 		}
1003 	}
1004 
1005 	/* print concatenated devices */
1006 	if (col < ncol) {
1007 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
1008 		    "Concatenated Devices:\n")) == EOF) {
1009 			goto out;
1010 		}
1011 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
1012 		    "    Size: %lld blocks (%s)\n"),
1013 		    column_size * (ncol - orig_ncol),
1014 		    meta_number_to_string(column_size * (ncol - orig_ncol),
1015 		    DEV_BSIZE))
1016 		    == EOF) {
1017 			goto out;
1018 		}
1019 		/*
1020 		 * This allows the length
1021 		 * of the ctd to vary from small to large without
1022 		 * looking horrible.
1023 		 */
1024 		if (! (options & PRINT_TIMES)) {
1025 			if (fprintf(fp,
1026 			    "\t%-*.*s %-12.12s %-5.5s %-12.12s %5.5s %s\n",
1027 			    len, len,
1028 			    dgettext(TEXT_DOMAIN, "Device"),
1029 			    dgettext(TEXT_DOMAIN, "Start Block"),
1030 			    dgettext(TEXT_DOMAIN, "Dbase"),
1031 			    dgettext(TEXT_DOMAIN, "State"),
1032 			    dgettext(TEXT_DOMAIN, "Reloc"),
1033 			    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
1034 				goto out;
1035 			}
1036 		} else {
1037 			if (fprintf(fp,
1038 			    "\t%-*s %5s %-5s %-11s %-9s %s\t%s\n",
1039 			    len,
1040 			    dgettext(TEXT_DOMAIN, "Device"),
1041 			    dgettext(TEXT_DOMAIN, "Start"),
1042 			    dgettext(TEXT_DOMAIN, "Dbase"),
1043 			    dgettext(TEXT_DOMAIN, "State"),
1044 			    dgettext(TEXT_DOMAIN, "Reloc"),
1045 			    dgettext(TEXT_DOMAIN, "Hot Spare"),
1046 			    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
1047 				goto out;
1048 			}
1049 		}
1050 		assert(col == orig_ncol);
1051 		for (/* void */; (col < ncol); col++) {
1052 			md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
1053 
1054 			if (display_raid_device_info(sp, mdrcp, fname, fp,
1055 			    options, len, tstate, ep) != 0) {
1056 				return (-1);
1057 			}
1058 		}
1059 	}
1060 
1061 	/* add extra line */
1062 	if (fprintf(fp, "\n") == EOF)
1063 		goto out;
1064 
1065 	/* success */
1066 	rval = 0;
1067 
1068 	/* cleanup, return error */
1069 out:
1070 	if (rval != 0)
1071 		(void) mdsyserror(ep, errno, fname);
1072 	return (rval);
1073 }
1074 
1075 /*
1076  * print/report raid
1077  */
1078 int
1079 meta_raid_print(
1080 	mdsetname_t	*sp,
1081 	mdname_t	*raidnp,
1082 	mdnamelist_t	**nlpp,
1083 	char		*fname,
1084 	FILE		*fp,
1085 	mdprtopts_t	options,
1086 	md_error_t	*ep
1087 )
1088 {
1089 	md_raid_t	*raidp;
1090 	int		col;
1091 
1092 	/* should have same set */
1093 	assert(sp != NULL);
1094 	assert((raidnp == NULL) ||
1095 	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
1096 
1097 	/* print all raids */
1098 	if (raidnp == NULL) {
1099 		mdnamelist_t	*nlp = NULL;
1100 		mdnamelist_t	*p;
1101 		int		cnt;
1102 		int		rval = 0;
1103 
1104 		/* get list */
1105 		if ((cnt = meta_get_raid_names(sp, &nlp, options, ep)) < 0)
1106 			return (-1);
1107 		else if (cnt == 0)
1108 			return (0);
1109 
1110 		/* recurse */
1111 		for (p = nlp; (p != NULL); p = p->next) {
1112 			mdname_t	*np = p->namep;
1113 
1114 			if (meta_raid_print(sp, np, nlpp, fname, fp,
1115 			    options, ep) != 0)
1116 				rval = -1;
1117 		}
1118 
1119 		/* cleanup, return success */
1120 		metafreenamelist(nlp);
1121 		return (rval);
1122 	}
1123 
1124 	/* get unit structure */
1125 	if ((raidp = meta_get_raid_common(sp, raidnp,
1126 	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
1127 		return (-1);
1128 
1129 	/* check for parented */
1130 	if ((! (options & PRINT_SUBDEVS)) &&
1131 	    (MD_HAS_PARENT(raidp->common.parent))) {
1132 		return (0);
1133 	}
1134 
1135 	/* print appropriate detail */
1136 	if (options & PRINT_SHORT) {
1137 		if (raid_print(raidp, fname, fp, options, ep) != 0)
1138 			return (-1);
1139 	} else {
1140 		if (raid_report(sp, raidp, fname, fp, options, ep) != 0)
1141 			return (-1);
1142 	}
1143 
1144 	/* Recurse on components that are metadevices */
1145 	for (col = 0; col < raidp->cols.cols_len; ++col) {
1146 		md_raidcol_t	*colp = &raidp->cols.cols_val[col];
1147 		mdname_t	*namep = colp->colnamep;
1148 
1149 		if ((metaismeta(namep)) &&
1150 		    (meta_print_name(sp, namep, nlpp, fname, fp,
1151 		    (options | PRINT_HEADER | PRINT_SUBDEVS),
1152 		    NULL, ep) != 0)) {
1153 			return (-1);
1154 		}
1155 	}
1156 
1157 	return (0);
1158 }
1159 
1160 /*
1161  * adjust raid geometry
1162  */
1163 static int
1164 adjust_geom(
1165 	mdname_t	*raidnp,
1166 	mdname_t	*colnp,
1167 	mr_unit_t	*mr,
1168 	md_error_t	*ep
1169 )
1170 {
1171 	uint_t		round_cyl = 1;
1172 	mdgeom_t	*geomp;
1173 
1174 	/* get reinstructs */
1175 	if ((geomp = metagetgeom(colnp, ep)) == NULL)
1176 		return (-1);
1177 
1178 	/* adjust geometry */
1179 	if (meta_adjust_geom((md_unit_t *)mr, raidnp, geomp->write_reinstruct,
1180 	    geomp->read_reinstruct, round_cyl, ep) != 0)
1181 		return (-1);
1182 
1183 	/* return success */
1184 	return (0);
1185 }
1186 
1187 /*
1188  * add another column to the raid unit structure
1189  */
1190 static int
1191 attach_raid_col(
1192 	mdsetname_t	*sp,
1193 	mdname_t	*raidnp,
1194 	mr_unit_t	*mr,
1195 	mr_column_t	*mdc,
1196 	mdname_t	*colnp,
1197 	rcs_state_t	state,
1198 	mdnamelist_t	**keynlpp,
1199 	mdcmdopts_t	options,
1200 	md_error_t	*ep
1201 )
1202 {
1203 	diskaddr_t	column_size = mr->un_segsize * mr->un_segsincolumn;
1204 	diskaddr_t	size;
1205 	uint_t		 maxio;
1206 	mdcinfo_t	*cinfop;
1207 	md_timeval32_t	tmp_time;
1208 
1209 	/* setup state and timestamp */
1210 	mdc->un_devstate = state;
1211 	if (meta_gettimeofday(&tmp_time) == -1)
1212 		return (mdsyserror(ep, errno, NULL));
1213 
1214 	mdc->un_devtimestamp = tmp_time;
1215 	/* get start, size, and maxio */
1216 	if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
1217 	    MD_DISKADDR_ERROR)
1218 		return (-1);
1219 	if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
1220 		return (-1);
1221 	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
1222 		return (-1);
1223 	maxio = cinfop->maxtransfer;
1224 
1225 	/* adjust start and size by prewrite */
1226 	mdc->un_orig_pwstart = mdc->un_orig_devstart;
1227 	mdc->un_orig_devstart += mr->un_pwsize;
1228 
1229 	/* make sure we still have something left */
1230 	if ((mdc->un_orig_devstart >= size) ||
1231 	    ((size - mdc->un_orig_devstart) < column_size)) {
1232 		return (mdsyserror(ep, ENOSPC, colnp->cname));
1233 	}
1234 	size -= mdc->un_orig_devstart;
1235 	if (maxio < mr->un_maxio) {
1236 		return (mdcomperror(ep, MDE_MAXIO,
1237 		    meta_getminor(raidnp->dev), colnp->dev, colnp->cname));
1238 	}
1239 
1240 	if (options & MDCMD_DOIT) {
1241 		/* store name in namespace */
1242 		if (add_key_name(sp, colnp, keynlpp, ep) != 0)
1243 			return (-1);
1244 	}
1245 
1246 	/* setup column */
1247 	mdc->un_orig_dev = colnp->dev;
1248 	mdc->un_orig_key = colnp->key;
1249 	mdc->un_dev = colnp->dev;
1250 	mdc->un_pwstart = mdc->un_orig_pwstart;
1251 	mdc->un_devstart = mdc->un_orig_devstart;
1252 	mdc->un_alt_dev = NODEV64;
1253 	mdc->un_alt_pwstart = 0;
1254 	mdc->un_alt_devstart = 0;
1255 	mdc->un_hs_id = 0;
1256 
1257 	/* add the size (we use) of the device to the total */
1258 	mr->c.un_actual_tb += column_size;
1259 
1260 	/* adjust geometry */
1261 	if (adjust_geom(raidnp, colnp, mr, ep) != 0)
1262 		return (-1);
1263 
1264 	/* count column */
1265 	mr->un_totalcolumncnt++;
1266 
1267 	/* return success */
1268 	return (0);
1269 }
1270 
1271 /*
1272  * invalidate column names
1273  */
1274 static int
1275 invalidate_columns(
1276 	mdsetname_t	*sp,
1277 	mdname_t	*raidnp,
1278 	md_error_t	*ep
1279 )
1280 {
1281 	md_raid_t	*raidp;
1282 	uint_t		col;
1283 
1284 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
1285 		return (-1);
1286 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
1287 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
1288 		mdname_t	*colnp = cp->colnamep;
1289 
1290 		meta_invalidate_name(colnp);
1291 	}
1292 	return (0);
1293 }
1294 
1295 /*
1296  * attach columns to raid
1297  */
1298 int
1299 meta_raid_attach(
1300 	mdsetname_t		*sp,
1301 	mdname_t		*raidnp,
1302 	mdnamelist_t		*colnlp,
1303 	mdcmdopts_t		options,
1304 	md_error_t		*ep
1305 )
1306 {
1307 	uint_t			concat_cnt = 0;
1308 	mdnamelist_t		*p;
1309 	mr_unit_t		*old_mr;
1310 	mr_unit_t		*new_mr;
1311 	size_t			old_rusize;
1312 	size_t			new_rusize;
1313 	mdnamelist_t		*keynlp = NULL;
1314 	md_grow_params_t	mgp;
1315 	int			rval = -1;
1316 	int			create_flag = MD_CRO_32BIT;
1317 
1318 	/* should have a set */
1319 	assert(sp != NULL);
1320 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1321 
1322 	/* check type */
1323 	if (metachkmeta(raidnp, ep) != 0)
1324 		return (-1);
1325 
1326 	/* check and count new columns */
1327 	for (p = colnlp; (p != NULL); p = p->next) {
1328 		mdname_t	*np = p->namep;
1329 		mdnamelist_t	*p2;
1330 
1331 		/* check against existing devices */
1332 		if (meta_check_column(sp, np, ep) != 0)
1333 			return (-1);
1334 
1335 		/* check against ourselves */
1336 		for (p2 = p->next; (p2 != NULL); p2 = p2->next) {
1337 			if (meta_check_overlap(np->cname, np, 0, -1,
1338 			    p2->namep, 0, -1, ep) != 0) {
1339 				return (-1);
1340 			}
1341 		}
1342 
1343 		/* count */
1344 		++concat_cnt;
1345 	}
1346 
1347 	/* get old unit */
1348 	if ((old_mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
1349 		return (-1);
1350 
1351 	/*
1352 	 * calculate the size needed for the new raid unit and allocate
1353 	 * the appropriate structure. allocate new unit.
1354 	 */
1355 	old_rusize = sizeof (*old_mr) - sizeof (old_mr->un_column[0]);
1356 	old_rusize += old_mr->un_totalcolumncnt * sizeof (old_mr->un_column[0]);
1357 	new_rusize = sizeof (*new_mr) - sizeof (new_mr->un_column[0]);
1358 	new_rusize += (old_mr->un_totalcolumncnt + concat_cnt)
1359 	    * sizeof (new_mr->un_column[0]);
1360 	new_mr = Zalloc(new_rusize);
1361 	(void) memcpy(new_mr, old_mr, old_rusize);
1362 
1363 	/* We always want a do-it, this is for attach_raid_col below */
1364 	options |= MDCMD_DOIT;
1365 
1366 	/* build new unit structure */
1367 	for (p = colnlp; (p != NULL); p = p->next) {
1368 		mdname_t	*colnp = p->namep;
1369 		mr_column_t	*mdc;
1370 
1371 		/* attach column */
1372 		mdc = &new_mr->un_column[new_mr->un_totalcolumncnt];
1373 		if (attach_raid_col(sp, raidnp, new_mr, mdc, colnp,
1374 		    RCS_INIT, &keynlp, options, ep) != 0) {
1375 			goto out;
1376 		}
1377 	}
1378 	assert(new_mr->un_totalcolumncnt
1379 	    == (old_mr->un_totalcolumncnt + concat_cnt));
1380 
1381 
1382 	create_flag = meta_check_devicesize(new_mr->c.un_total_blocks);
1383 
1384 	/* grow raid */
1385 	(void) memset(&mgp, 0, sizeof (mgp));
1386 	mgp.mnum = MD_SID(new_mr);
1387 	MD_SETDRIVERNAME(&mgp, MD_RAID, sp->setno);
1388 	mgp.size = new_rusize;
1389 	mgp.mdp = (uintptr_t)new_mr;
1390 
1391 	if (create_flag == MD_CRO_32BIT) {
1392 		mgp.options = MD_CRO_32BIT;
1393 		new_mr->c.un_revision = MD_32BIT_META_DEV;
1394 	} else {
1395 		mgp.options = MD_CRO_64BIT;
1396 		new_mr->c.un_revision = MD_64BIT_META_DEV;
1397 	}
1398 	if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) {
1399 		(void) mdstealerror(ep, &mgp.mde);
1400 		goto out;
1401 	}
1402 
1403 	/* clear cache */
1404 	if (invalidate_columns(sp, raidnp, ep) != 0)
1405 		goto out;
1406 	meta_invalidate_name(raidnp);
1407 
1408 	/* let em know */
1409 	if (options & MDCMD_PRINT) {
1410 		if (concat_cnt == 1) {
1411 			(void) printf(dgettext(TEXT_DOMAIN,
1412 			    "%s: component is attached\n"),
1413 			    raidnp->cname);
1414 		} else {
1415 			(void) printf(dgettext(TEXT_DOMAIN,
1416 			    "%s: components are attached\n"),
1417 			    raidnp->cname);
1418 		}
1419 		(void) fflush(stdout);
1420 	}
1421 
1422 
1423 	/* grow any parents */
1424 	if (meta_concat_parent(sp, raidnp, ep) != 0)
1425 		goto out;
1426 	rval = 0;	/* success */
1427 
1428 	/* cleanup, return error */
1429 out:
1430 	Free(old_mr);
1431 	Free(new_mr);
1432 	if (rval != 0)
1433 		(void) del_key_names(sp, keynlp, NULL);
1434 	metafreenamelist(keynlp);
1435 	return (rval);
1436 }
1437 
1438 /*
1439  * get raid parameters
1440  */
1441 int
1442 meta_raid_get_params(
1443 	mdsetname_t	*sp,
1444 	mdname_t	*raidnp,
1445 	mr_params_t	*paramsp,
1446 	md_error_t	*ep
1447 )
1448 {
1449 	md_raid_t	*raidp;
1450 
1451 	/* should have a set */
1452 	assert(sp != NULL);
1453 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1454 
1455 	/* check name */
1456 	if (metachkmeta(raidnp, ep) != 0)
1457 		return (-1);
1458 
1459 	/* get unit */
1460 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
1461 		return (-1);
1462 
1463 	/* return parameters */
1464 	(void) memset(paramsp, 0, sizeof (*paramsp));
1465 	if (raidp->hspnamep == NULL)
1466 		paramsp->hsp_id = MD_HSP_NONE;
1467 	else
1468 		paramsp->hsp_id = raidp->hspnamep->hsp;
1469 	return (0);
1470 }
1471 
1472 /*
1473  * set raid parameters
1474  */
1475 int
1476 meta_raid_set_params(
1477 	mdsetname_t		*sp,
1478 	mdname_t		*raidnp,
1479 	mr_params_t		*paramsp,
1480 	md_error_t		*ep
1481 )
1482 {
1483 	md_raid_params_t	msp;
1484 
1485 	/* should have a set */
1486 	assert(sp != NULL);
1487 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1488 
1489 	/* check name */
1490 	if (metachkmeta(raidnp, ep) != 0)
1491 		return (-1);
1492 
1493 	/* set parameters */
1494 	(void) memset(&msp, 0, sizeof (msp));
1495 	MD_SETDRIVERNAME(&msp, MD_RAID, sp->setno);
1496 	msp.mnum = meta_getminor(raidnp->dev);
1497 	msp.params = *paramsp;
1498 	if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, raidnp->cname) != 0)
1499 		return (mdstealerror(ep, &msp.mde));
1500 
1501 	/* clear cache */
1502 	meta_invalidate_name(raidnp);
1503 
1504 	/* return success */
1505 	return (0);
1506 }
1507 
1508 /*
1509  * validate raid replace column
1510  */
1511 static int
1512 validate_new_raid(
1513 	mdsetname_t	*sp,
1514 	mdname_t	*raidnp,
1515 	mdname_t	*colnp,
1516 	replace_params_t *paramsp,
1517 	int		dup_ok,
1518 	md_error_t	*ep
1519 )
1520 {
1521 	mr_unit_t	*mr;
1522 	diskaddr_t	column_size;
1523 	diskaddr_t	label;
1524 	mdcinfo_t	*cinfop;
1525 	int		rval = -1;
1526 
1527 	/* get raid unit */
1528 	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
1529 		return (-1);
1530 	column_size = mr->un_segsize * mr->un_segsincolumn;
1531 
1532 	/* check it out */
1533 	if (meta_check_column(sp, colnp, ep) != 0) {
1534 		if ((! dup_ok) || (! mdisuseerror(ep, MDE_ALREADY)))
1535 			goto out;
1536 		mdclrerror(ep);
1537 	}
1538 	if ((paramsp->number_blks = metagetsize(colnp, ep)) ==
1539 	    MD_DISKADDR_ERROR)
1540 		goto out;
1541 	if ((label = metagetlabel(colnp, ep)) == MD_DISKADDR_ERROR)
1542 		goto out;
1543 	paramsp->has_label = ((label > 0) ? 1 : 0);
1544 	if ((paramsp->start_blk = metagetstart(sp, colnp, ep)) ==
1545 	    MD_DISKADDR_ERROR)
1546 		goto out;
1547 	if ((paramsp->number_blks - paramsp->start_blk) < column_size) {
1548 		(void) mdsyserror(ep, ENOSPC, colnp->cname);
1549 		goto out;
1550 	}
1551 	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
1552 		goto out;
1553 	if (cinfop->maxtransfer < mr->un_maxio) {
1554 		(void) mdcomperror(ep, MDE_MAXIO, meta_getminor(raidnp->dev),
1555 		    colnp->dev, colnp->cname);
1556 		goto out;
1557 	}
1558 
1559 	/* success */
1560 	rval = 0;
1561 
1562 	/* cleanup, return error */
1563 out:
1564 	Free(mr);
1565 	return (rval);
1566 }
1567 
1568 /*
1569  * replace raid column
1570  */
1571 int
1572 meta_raid_replace(
1573 	mdsetname_t		*sp,
1574 	mdname_t		*raidnp,
1575 	mdname_t		*oldnp,
1576 	mdname_t		*newnp,
1577 	mdcmdopts_t		options,
1578 	md_error_t		*ep
1579 )
1580 {
1581 	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
1582 	replace_params_t	params;
1583 	md_dev64_t		old_dev, new_dev;
1584 	diskaddr_t		new_start_blk, new_end_blk;
1585 	int			rebind;
1586 	mr_unit_t		*mr;
1587 	char			*new_devidp = NULL;
1588 	md_error_t		xep = mdnullerror;
1589 	int			ret;
1590 	md_set_desc		*sd;
1591 	uint_t			tstate;
1592 
1593 	/* should have same set */
1594 	assert(sp != NULL);
1595 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1596 
1597 	/* check name */
1598 	if (metachkmeta(raidnp, ep) != 0)
1599 		return (-1);
1600 
1601 	/* save new binding incase this is a rebind where oldnp==newnp */
1602 	new_dev = newnp->dev;
1603 	new_start_blk = newnp->start_blk;
1604 	new_end_blk = newnp->end_blk;
1605 
1606 	/* invalidate, then get the raid (fill in oldnp from metadb) */
1607 	meta_invalidate_name(raidnp);
1608 	if (meta_get_raid(sp, raidnp, ep) == NULL)
1609 		return (-1);
1610 
1611 	/* can't replace a component if the raid inaccessible */
1612 	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
1613 		return (-1);
1614 	}
1615 	if (tstate & MD_INACCESSIBLE) {
1616 		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
1617 		    meta_getminor(raidnp->dev), raidnp->cname));
1618 	}
1619 
1620 	/* the old device binding is now established */
1621 	if ((old_dev = oldnp->dev) == NODEV64)
1622 		return (mdsyserror(ep, ENODEV, oldnp->cname));
1623 
1624 
1625 	/* setup raid info */
1626 	(void) memset(&params, 0, sizeof (params));
1627 	params.mnum = meta_getminor(raidnp->dev);
1628 	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
1629 	params.old_dev = old_dev;
1630 	params.cmd = force ? FORCE_REPLACE_COMP : REPLACE_COMP;
1631 
1632 	if (options & MDCMD_CLUSTER_REPLACE) {
1633 		if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
1634 			return (NULL);
1635 		Free(mr);
1636 		params.options = MDIOCTL_NO_RESYNC_RAID;
1637 		params.number_blks = metagetsize(newnp, ep);
1638 		if ((metagetlabel(newnp, ep) == MD_DISKADDR_ERROR) ||
1639 		    (metagetlabel(newnp, ep) == 0))
1640 			params.has_label = 0;
1641 		else
1642 			params.has_label = 1;
1643 		params.start_blk = metagetstart(sp, newnp, ep);
1644 	} else {
1645 		if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
1646 		    (old_dev != new_dev)) {
1647 			rebind = 1;
1648 		} else {
1649 			rebind = 0;
1650 		}
1651 		if (rebind) {
1652 			newnp->dev = new_dev;
1653 			newnp->start_blk = new_start_blk;
1654 			newnp->end_blk = new_end_blk;
1655 		}
1656 
1657 		/*
1658 		 * Save a copy of the devid associated with the new disk, the
1659 		 * reason is that the checks for the column (meta_check_column)
1660 		 * via validate_new_raid(), could cause the disk's devid to be
1661 		 * changed to that of the devid that is currently stored in the
1662 		 * replica namespace for the disk in question. This devid could
1663 		 * be stale if we are replacing the disk. The actual function
1664 		 * that overwrites the devid is dr2drivedesc().
1665 		 */
1666 
1667 		/* don't setup new_devid if no devid's or MN diskset */
1668 		if (newnp->drivenamep->devid != NULL)
1669 			new_devidp = Strdup(newnp->drivenamep->devid);
1670 
1671 		if (!metaislocalset(sp)) {
1672 			if ((sd = metaget_setdesc(sp, ep)) == NULL)
1673 				return (-1);
1674 			if (MD_MNSET_DESC(sd))
1675 				new_devidp = NULL;
1676 		}
1677 
1678 		/* check out new (sets up start_blk, has_label, number_blks) */
1679 		if (validate_new_raid(sp, raidnp, newnp, &params, rebind,
1680 		    ep) != 0) {
1681 			Free(new_devidp);
1682 			return (-1);
1683 		}
1684 
1685 		/*
1686 		 * Copy back the saved devid.
1687 		 */
1688 		Free(newnp->drivenamep->devid);
1689 		if (new_devidp) {
1690 			newnp->drivenamep->devid = Strdup(new_devidp);
1691 			Free(new_devidp);
1692 		}
1693 	}
1694 
1695 	/* store name in namespace, allocate new key */
1696 	if (add_key_name(sp, newnp, NULL, ep) != 0)
1697 		return (-1);
1698 
1699 	if (rebind && !metaislocalset(sp)) {
1700 		/*
1701 		 * We are 'rebind'ing a disk that is in a diskset so as well
1702 		 * as updating the diskset's namespace the local set needs
1703 		 * to be updated because it also contains a reference to the
1704 		 * disk in question.
1705 		 */
1706 		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
1707 		    newnp->cname, ep);
1708 
1709 		if (ret != METADEVADM_SUCCESS) {
1710 			(void) del_key_name(sp, newnp, &xep);
1711 			return (-1);
1712 		}
1713 	}
1714 
1715 	/* replace column */
1716 	params.new_dev = new_dev;
1717 	params.new_key = newnp->key;
1718 	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
1719 		(void) del_key_name(sp, newnp, ep);
1720 		return (mdstealerror(ep, &params.mde));
1721 	}
1722 
1723 	/* clear cache */
1724 	meta_invalidate_name(oldnp);
1725 	meta_invalidate_name(newnp);
1726 	meta_invalidate_name(raidnp);
1727 
1728 	/* let em know */
1729 	if (options & MDCMD_PRINT) {
1730 		(void) printf(dgettext(TEXT_DOMAIN,
1731 		    "%s: device %s is replaced with %s\n"),
1732 		    raidnp->cname, oldnp->cname, newnp->cname);
1733 		(void) fflush(stdout);
1734 	}
1735 
1736 	/* return success */
1737 	return (0);
1738 }
1739 
1740 /*
1741  * enable raid column
1742  */
1743 int
1744 meta_raid_enable(
1745 	mdsetname_t		*sp,
1746 	mdname_t		*raidnp,
1747 	mdname_t		*colnp,
1748 	mdcmdopts_t		options,
1749 	md_error_t		*ep
1750 )
1751 {
1752 	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
1753 	replace_params_t	params;
1754 	md_dev64_t		fs_dev, del_dev;
1755 	int			err = 0;
1756 	char			*devnm;
1757 	int			ret;
1758 	uint_t			tstate;
1759 
1760 	/* should have same set */
1761 	assert(sp != NULL);
1762 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
1763 
1764 	/* check name */
1765 	if (metachkmeta(raidnp, ep) != 0)
1766 		return (-1);
1767 
1768 	/* get the file_system dev binding */
1769 	if (meta_getdev(sp, colnp, ep) != 0)
1770 		return (-1);
1771 	fs_dev = colnp->dev;
1772 
1773 	/* get the raid unit (fill in colnp->dev with metadb version) */
1774 	meta_invalidate_name(raidnp);
1775 	if (meta_get_raid(sp, raidnp, ep) == NULL)
1776 		return (-1);
1777 
1778 	/* enabling a component can't work if the raid inaccessible */
1779 	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
1780 		return (-1);
1781 	}
1782 	if (tstate & MD_INACCESSIBLE) {
1783 		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
1784 		    meta_getminor(raidnp->dev), raidnp->cname));
1785 	}
1786 
1787 	/* the metadb device binding is now established */
1788 	if (colnp->dev == NODEV64)
1789 		return (mdsyserror(ep, ENODEV, colnp->cname));
1790 
1791 	/*
1792 	 * check for the case where the dev_t has changed between the
1793 	 * filesystem and the metadb.  This is called a rebind, and
1794 	 * is handled by meta_raid_replace.
1795 	 */
1796 	if (fs_dev != colnp->dev) {
1797 		/*
1798 		 * Save the devt of mddb version
1799 		 */
1800 		del_dev = colnp->dev;
1801 
1802 		/* establish file system binding with invalid start/end */
1803 		colnp->dev = fs_dev;
1804 		colnp->start_blk = -1;
1805 		colnp->end_blk = -1;
1806 		err = meta_raid_replace(sp, raidnp, colnp, colnp, options, ep);
1807 
1808 		/*
1809 		 * Don't do it if meta_raid_replace returns an error
1810 		 */
1811 		if (!err && (devnm = meta_getnmentbydev(sp->setno, MD_SIDEWILD,
1812 			del_dev, NULL, NULL, &colnp->key, ep)) != NULL) {
1813 			(void) del_key_name(sp, colnp, ep);
1814 			Free(devnm);
1815 		}
1816 		return (err);
1817 	}
1818 
1819 	/* setup raid info */
1820 	(void) memset(&params, 0, sizeof (params));
1821 	params.mnum = meta_getminor(raidnp->dev);
1822 	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
1823 	params.old_dev = params.new_dev = colnp->dev;
1824 	if (force)
1825 		params.cmd = FORCE_ENABLE_COMP;
1826 	else
1827 		params.cmd = ENABLE_COMP;
1828 
1829 	/* check it out */
1830 	if (validate_new_raid(sp, raidnp, colnp, &params, 1, ep) != 0)
1831 		return (-1);
1832 
1833 	/* enable column */
1834 	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0)
1835 		return (mdstealerror(ep, &params.mde));
1836 
1837 	/*
1838 	 * are we dealing with a non-local set? If so need to update the
1839 	 * local namespace so that the disk record has the correct devid.
1840 	 */
1841 	if (!metaislocalset(sp)) {
1842 		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, colnp->cname,
1843 		    ep);
1844 
1845 		if (ret != METADEVADM_SUCCESS) {
1846 			/*
1847 			 * Failed to update the local set. Nothing to do here
1848 			 * apart from report the error. The namespace is
1849 			 * most likely broken and some form of remedial
1850 			 * recovery is going to be required.
1851 			 */
1852 			mde_perror(ep, "");
1853 			mdclrerror(ep);
1854 		}
1855 	}
1856 
1857 	/* clear cache */
1858 	meta_invalidate_name(colnp);
1859 	meta_invalidate_name(raidnp);
1860 
1861 	/* let em know */
1862 	if (options & MDCMD_PRINT) {
1863 		(void) printf(dgettext(TEXT_DOMAIN,
1864 		    "%s: device %s is enabled\n"),
1865 		    raidnp->cname, colnp->cname);
1866 		(void) fflush(stdout);
1867 	}
1868 
1869 	/* return success */
1870 	return (0);
1871 }
1872 
1873 /*
1874  * check for dups in the raid itself
1875  */
1876 static int
1877 check_twice(
1878 	md_raid_t	*raidp,
1879 	uint_t		col,
1880 	md_error_t	*ep
1881 )
1882 {
1883 	mdname_t	*raidnp = raidp->common.namep;
1884 	mdname_t	*thisnp;
1885 	uint_t		c;
1886 
1887 	thisnp = raidp->cols.cols_val[col].colnamep;
1888 	for (c = 0; (c < col); ++c) {
1889 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[c];
1890 		mdname_t	*colnp = mdcp->colnamep;
1891 
1892 		if (meta_check_overlap(raidnp->cname, thisnp, 0, -1,
1893 		    colnp, 0, -1, ep) != 0) {
1894 			return (-1);
1895 		}
1896 	}
1897 	return (0);
1898 }
1899 
1900 /*
1901  * default raid interlace
1902  */
1903 diskaddr_t
1904 meta_default_raid_interlace(void)
1905 {
1906 	diskaddr_t	interlace;
1907 
1908 	/* default to 512k, round up if necessary */
1909 	interlace = btodb(512 * 1024);
1910 	if (interlace < lbtodb(MININTERLACE))
1911 		interlace = roundup(MININTERLACE, interlace);
1912 	return (interlace);
1913 }
1914 
1915 /*
1916  * convert interlaces
1917  */
1918 int
1919 meta_raid_check_interlace(
1920 	diskaddr_t	interlace,
1921 	char		*uname,
1922 	md_error_t	*ep
1923 )
1924 {
1925 	if ((interlace < btodb(RAID_MIN_INTERLACE)) ||
1926 	    (interlace > btodb(MAXINTERLACE))) {
1927 		return (mderror(ep, MDE_BAD_INTERLACE, uname));
1928 	}
1929 	return (0);
1930 }
1931 
1932 /*
1933  * check raid
1934  */
1935 int
1936 meta_check_raid(
1937 	mdsetname_t	*sp,
1938 	md_raid_t	*raidp,
1939 	mdcmdopts_t	options,
1940 	md_error_t	*ep
1941 )
1942 {
1943 	mdname_t	*raidnp = raidp->common.namep;
1944 	int		doit = ((options & MDCMD_DOIT) ? 1 : 0);
1945 	int		updateit = ((options & MDCMD_UPDATE) ? 1 : 0);
1946 	uint_t		ncol;
1947 	uint_t		col;
1948 	minor_t		mnum = meta_getminor(raidnp->dev);
1949 
1950 	/* check number */
1951 	if (((ncol = raidp->cols.cols_len) < MD_RAID_MIN) ||
1952 	    (raidp->orig_ncol > ncol)) {
1953 		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
1954 	}
1955 
1956 	/* compute default interlace */
1957 	if (raidp->interlace == 0) {
1958 		raidp->interlace = meta_default_raid_interlace();
1959 	}
1960 
1961 	/* check state */
1962 	switch (raidp->state) {
1963 	case RUS_INIT:
1964 	case RUS_OKAY:
1965 		break;
1966 
1967 	default:
1968 		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
1969 	}
1970 
1971 	/* check interlace */
1972 	if (meta_raid_check_interlace(raidp->interlace, raidnp->cname, ep) != 0)
1973 		return (-1);
1974 
1975 	/* check hotspare pool name */
1976 	if (doit) {
1977 		if ((raidp->hspnamep != NULL) &&
1978 		    (metachkhsp(sp, raidp->hspnamep, ep) != 0)) {
1979 			return (-1);
1980 		}
1981 	}
1982 
1983 	/* check columns */
1984 	for (col = 0; (col < ncol); ++col) {
1985 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
1986 		mdname_t	*colnp = mdcp->colnamep;
1987 		diskaddr_t	start_blk, size;
1988 
1989 		/* setup column */
1990 		if (raidp->state == RUS_INIT)
1991 			mdcp->state = RCS_INIT;
1992 		else
1993 			mdcp->state = RCS_OKAY;
1994 
1995 		/* check column */
1996 		if (!updateit) {
1997 			if (meta_check_column(sp, colnp, ep) != 0)
1998 				return (-1);
1999 			if (((start_blk = metagetstart(sp, colnp, ep)) ==
2000 			    MD_DISKADDR_ERROR) || ((size = metagetsize(colnp,
2001 			    ep)) == MD_DISKADDR_ERROR)) {
2002 				return (-1);
2003 			}
2004 			if (start_blk >= size)
2005 				return (mdsyserror(ep, ENOSPC, colnp->cname));
2006 			size -= start_blk;
2007 			size = rounddown(size, raidp->interlace);
2008 			if (size == 0)
2009 				return (mdsyserror(ep, ENOSPC, colnp->cname));
2010 		}
2011 
2012 		/* check this raid too */
2013 		if (check_twice(raidp, col, ep) != 0)
2014 			return (-1);
2015 	}
2016 
2017 	/* return success */
2018 	return (0);
2019 }
2020 
2021 /*
2022  * setup raid geometry
2023  */
2024 static int
2025 raid_geom(
2026 	md_raid_t	*raidp,
2027 	mr_unit_t	*mr,
2028 	md_error_t	*ep
2029 )
2030 {
2031 	uint_t		write_reinstruct = 0;
2032 	uint_t		read_reinstruct = 0;
2033 	uint_t		round_cyl = 1;
2034 	uint_t		col;
2035 	mdgeom_t	*geomp;
2036 
2037 	/* get worst reinstructs */
2038 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
2039 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
2040 		mdname_t	*colnp = mdcp->colnamep;
2041 
2042 		if ((geomp = metagetgeom(colnp, ep)) == NULL)
2043 			return (-1);
2044 		if (geomp->write_reinstruct > write_reinstruct)
2045 			write_reinstruct = geomp->write_reinstruct;
2046 		if (geomp->read_reinstruct > read_reinstruct)
2047 			read_reinstruct = geomp->read_reinstruct;
2048 	}
2049 
2050 	/* setup geometry from first column */
2051 	assert(raidp->cols.cols_len > 0);
2052 	if ((geomp = metagetgeom(raidp->cols.cols_val[0].colnamep,
2053 	    ep)) == NULL) {
2054 		return (-1);
2055 	}
2056 	if (meta_setup_geom((md_unit_t *)mr, raidp->common.namep, geomp,
2057 	    write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
2058 		return (-1);
2059 
2060 	/* return success */
2061 	return (0);
2062 }
2063 
2064 int
2065 meta_raid_state_cnt(mr_unit_t *mr, rcs_state_t state)
2066 {
2067 	int 	statecnt = 0;
2068 	int	col;
2069 
2070 	for (col = 0; col < mr->un_totalcolumncnt; col++)
2071 		if (mr->un_column[col].un_devstate & state)
2072 			statecnt++;
2073 	return (statecnt);
2074 }
2075 /*
2076  * validate that a raid device being created with the -k flag is a real
2077  * raid device
2078  */
2079 int
2080 meta_raid_valid(md_raid_t *raidp, mr_unit_t *mr)
2081 {
2082 	long long	buf[DEV_BSIZE / sizeof (long long)];
2083 	raid_pwhdr_t	pwhdr;
2084 	raid_pwhdr_t	*rpw = &pwhdr;
2085 	minor_t		mnum;
2086 	int		col;
2087 	int		fd;
2088 
2089 	for (col = 0; col < mr->un_totalcolumncnt; col++) {
2090 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2091 		mdname_t	*colnp = cp->colnamep;
2092 
2093 		if ((fd = open(colnp->rname, O_RDONLY)) < 0)
2094 			goto error_exit;
2095 
2096 		if (lseek64(fd,
2097 		    (mr->un_column[col].un_pwstart * DEV_BSIZE), SEEK_SET) < 0)
2098 			goto error_exit;
2099 
2100 		if (read(fd, buf, DEV_BSIZE) < 0)
2101 			goto error_exit;
2102 
2103 		/*
2104 		 * If our raid device is a 64 bit device, we can accept the
2105 		 * pw header we just read in.
2106 		 * Otherwise it's of type raid_pwhdr32_od_t and has to
2107 		 * be converted.
2108 		 */
2109 		if (mr->c.un_revision == MD_64BIT_META_DEV) {
2110 			rpw = (raid_pwhdr_t *)buf;
2111 		} else {
2112 			RAID_CONVERT_RPW((raid_pwhdr32_od_t *)buf, rpw);
2113 		}
2114 
2115 		if (rpw->rpw_column != col)
2116 			goto error_exit;
2117 
2118 		if (col == 0)
2119 			mnum = rpw->rpw_unit;
2120 
2121 		if (rpw->rpw_unit != mnum)
2122 			goto error_exit;
2123 
2124 		if (rpw->rpw_magic_ext == RAID_PWMAGIC) {
2125 			/* 4.1 prewrite header */
2126 			if ((rpw->rpw_origcolumncnt != mr->un_origcolumncnt) ||
2127 			    (rpw->rpw_totalcolumncnt
2128 				!= mr->un_totalcolumncnt) ||
2129 			    (rpw->rpw_segsize != mr->un_segsize) ||
2130 			    (rpw->rpw_segsincolumn != mr->un_segsincolumn) ||
2131 			    (rpw->rpw_pwcnt != mr->un_pwcnt) ||
2132 			    (rpw->rpw_pwstart !=
2133 				mr->un_column[col].un_pwstart) ||
2134 			    (rpw->rpw_devstart !=
2135 				mr->un_column[col].un_devstart) ||
2136 			    (rpw->rpw_pwsize != mr->un_pwsize))
2137 				goto error_exit;
2138 		}
2139 		/*
2140 		 * this is an old prewrite header (4.0) the unit structure
2141 		 * will have to be trusted.
2142 		 */
2143 		(void) close(fd);
2144 	}
2145 
2146 	return (0);
2147 
2148 error_exit:
2149 	(void) close(fd);
2150 	return (-1);
2151 }
2152 
2153 /*
2154  * create raid
2155  */
2156 int
2157 meta_create_raid(
2158 	mdsetname_t	*sp,
2159 	md_raid_t	*raidp,
2160 	mdcmdopts_t	options,
2161 	md_error_t	*ep
2162 )
2163 {
2164 	mdname_t	*raidnp = raidp->common.namep;
2165 	uint_t		ncol = raidp->cols.cols_len;
2166 	uint_t		orig_ncol = raidp->orig_ncol;
2167 	size_t		rdsize;
2168 	mr_unit_t	*mr;
2169 	uint_t		col;
2170 	diskaddr_t	disk_size = 0;
2171 	uint_t		disk_maxio = 0;
2172 	uint_t		pwes;
2173 	diskaddr_t	non_pw_blks, column_size;
2174 	mdnamelist_t	*keynlp = NULL;
2175 	md_set_params_t	set_params;
2176 	int		rval = -1;
2177 	md_timeval32_t	creation_time;
2178 	int		create_flag = MD_CRO_32BIT;
2179 
2180 	/* validate raid */
2181 	if (meta_check_raid(sp, raidp, options, ep) != 0)
2182 		return (-1);
2183 
2184 	/* allocate raid unit */
2185 	rdsize = sizeof (*mr) - sizeof (mr->un_column[0]);
2186 	rdsize += ncol * sizeof (mr->un_column[0]);
2187 	mr = Zalloc(rdsize);
2188 
2189 	if (meta_gettimeofday(&creation_time) == -1)
2190 		return (mdsyserror(ep, errno, NULL));
2191 	/*
2192 	 * initialize the top level mr_unit_t structure
2193 	 * setup the unit state to indicate whether to retain
2194 	 * any data currently on the metadevice or to clear it
2195 	 */
2196 	mr->c.un_type = MD_METARAID;
2197 	MD_SID(mr) = meta_getminor(raidnp->dev);
2198 	mr->c.un_size = rdsize;
2199 	mr->un_magic = RAID_UNMAGIC;
2200 	mr->un_state = raidp->state;
2201 	mr->un_timestamp = creation_time;
2202 	mr->un_origcolumncnt = orig_ncol;
2203 	mr->un_segsize = (uint_t)raidp->interlace;
2204 	if (raidp->hspnamep != NULL) {
2205 		mr->un_hsp_id = raidp->hspnamep->hsp;
2206 	} else {
2207 		mr->un_hsp_id = MD_HSP_NONE;
2208 	}
2209 	/*
2210 	 * setup original columns, saving start_block and
2211 	 * finding smallest size and maxio
2212 	 */
2213 	for (col = 0; (col < orig_ncol); ++col) {
2214 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2215 		mdname_t	*colnp = cp->colnamep;
2216 		mr_column_t	*mdc = &mr->un_column[col];
2217 		diskaddr_t	size;
2218 		uint_t		maxio;
2219 		mdcinfo_t	*cinfop;
2220 
2221 		/* setup state */
2222 		mdc->un_devstate = cp->state;
2223 
2224 		/* setup creation time */
2225 		mdc->un_devtimestamp = creation_time;
2226 
2227 		/* get start, size, and maxio */
2228 		if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
2229 		    MD_DISKADDR_ERROR)
2230 			goto out;
2231 		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
2232 			goto out;
2233 		size -= mdc->un_orig_devstart;
2234 		if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
2235 			goto out;
2236 		maxio = cinfop->maxtransfer;
2237 
2238 		if (options & MDCMD_DOIT) {
2239 			/* store name in namespace */
2240 			if (add_key_name(sp, colnp, &keynlp, ep) != 0)
2241 				goto out;
2242 		}
2243 
2244 		/* setup column */
2245 		mdc->un_orig_key = colnp->key;
2246 		mdc->un_orig_dev = colnp->dev;
2247 		mdc->un_dev = mdc->un_orig_dev;
2248 		mdc->un_pwstart = mdc->un_orig_pwstart;
2249 		mdc->un_devstart = mdc->un_orig_devstart;
2250 		mdc->un_alt_dev = NODEV64;
2251 		mdc->un_alt_pwstart = 0;
2252 		mdc->un_alt_devstart = 0;
2253 		mdc->un_hs_id = 0;
2254 		if (mr->un_state == RUS_INIT)
2255 			mdc->un_devstate = RCS_INIT;
2256 		else
2257 			mdc->un_devstate = RCS_OKAY;
2258 
2259 		/* adjust for smallest disk */
2260 		if (disk_size == 0) {
2261 			disk_size = size;
2262 		} else if (size < disk_size) {
2263 			disk_size = size;
2264 		}
2265 		if (disk_maxio == 0) {
2266 			disk_maxio = maxio;
2267 		} else if (maxio < disk_maxio) {
2268 			disk_maxio = maxio;
2269 		}
2270 	}
2271 	assert(col == mr->un_origcolumncnt);
2272 
2273 	/*
2274 	 * before processing any of the attached column(s)
2275 	 * set up the composition of the metadevice for column
2276 	 * sizes and pre-write information
2277 	 */
2278 	mr->un_maxio = disk_maxio;	/* smallest maxio */
2279 	mr->un_iosize = min(mr->un_maxio, (mr->un_segsize + 1));
2280 	pwes = mr->un_iosize;
2281 	if (raidp->pw_count)
2282 		mr->un_pwcnt = raidp->pw_count;
2283 	else
2284 		mr->un_pwcnt = PWCNT_MIN;
2285 	if ((mr->un_pwcnt < PWCNT_MIN) || (mr->un_pwcnt > PWCNT_MAX)) {
2286 		(void) mderror(ep, MDE_RAID_BAD_PW_CNT, raidnp->cname);
2287 		goto out;
2288 	}
2289 	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
2290 
2291 	/* now calculate the number of segments per column */
2292 	non_pw_blks = disk_size - mr->un_pwsize;	/* smallest disk */
2293 	if ((mr->un_pwsize > disk_size) ||
2294 	    (non_pw_blks < (diskaddr_t)mr->un_segsize)) {
2295 		(void) mdsyserror(ep, ENOSPC, raidnp->cname);
2296 		goto out;
2297 	}
2298 	mr->un_segsincolumn = non_pw_blks / mr->un_segsize;
2299 	column_size = mr->un_segsize * mr->un_segsincolumn;
2300 
2301 	/*
2302 	 * adjust the pw_cnt, pw_size, to fit into any fragmentation
2303 	 * left over after column_size has been computed
2304 	 */
2305 	mr->un_pwsize = rounddown(((uint_t)(disk_size - column_size)), 2);
2306 	mr->un_pwcnt = mr->un_pwsize / pwes;
2307 	assert(mr->un_pwcnt >= PWCNT_MIN);
2308 	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
2309 	assert((mr->un_pwsize + column_size) <= disk_size);
2310 
2311 	/*
2312 	 * calculate the actual block count available based on the
2313 	 * segment size and the number of segments per column ...
2314 	 * ... and adjust for the number of parity segments
2315 	 */
2316 	mr->c.un_actual_tb = column_size * (mr->un_origcolumncnt - 1);
2317 
2318 	if (raid_geom(raidp, mr, ep) != 0)
2319 		goto out;
2320 
2321 	create_flag = meta_check_devicesize(mr->c.un_total_blocks);
2322 
2323 	/*
2324 	 * now calculate the pre-write offset and update the column
2325 	 * structures to include the address of the individual pre-write
2326 	 * areas
2327 	 */
2328 	for (col = 0; (col < orig_ncol); ++col) {
2329 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2330 		mdname_t	*colnp = cp->colnamep;
2331 		mr_column_t	*mdc = &mr->un_column[col];
2332 		diskaddr_t	size;
2333 
2334 		/* get size */
2335 		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
2336 			goto out;
2337 
2338 		/* adjust start and size by prewrite */
2339 		mdc->un_orig_pwstart = mdc->un_orig_devstart;
2340 		mdc->un_orig_devstart += mr->un_pwsize;
2341 		mdc->un_pwstart = mdc->un_orig_pwstart;
2342 		mdc->un_devstart = mdc->un_orig_devstart;
2343 
2344 		assert(size >= mdc->un_orig_devstart);
2345 		size -= mdc->un_orig_devstart;
2346 
2347 		/* make sure we still have something left */
2348 		assert(size >= column_size);
2349 	}
2350 
2351 	/* do concat cols */
2352 	mr->un_totalcolumncnt = mr->un_origcolumncnt;
2353 	assert(col == mr->un_origcolumncnt);
2354 	for (col = orig_ncol; (col < ncol); ++col) {
2355 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2356 		mdname_t	*colnp = cp->colnamep;
2357 		mr_column_t	*mdc = &mr->un_column[col];
2358 
2359 		/* attach column */
2360 		if (attach_raid_col(sp, raidnp, mr, mdc, colnp,
2361 		    cp->state, &keynlp, options, ep) != 0) {
2362 			goto out;
2363 		}
2364 	}
2365 	assert(mr->un_totalcolumncnt == ncol);
2366 
2367 	/* fill in the size of the raid */
2368 	if (options & MDCMD_UPDATE) {
2369 		raidp->common.size = mr->c.un_total_blocks;
2370 		raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
2371 	}
2372 
2373 	/* if we're not doing anything, return success */
2374 	if (! (options & MDCMD_DOIT)) {
2375 		rval = 0;	/* success */
2376 		goto out;
2377 	}
2378 
2379 	if ((mr->un_state & RUS_OKAY) &&
2380 	    (meta_raid_valid(raidp, mr) != 0)) {
2381 		(void) mderror(ep, MDE_RAID_INVALID, raidnp->cname);
2382 		goto out;
2383 	}
2384 
2385 	/* create raid */
2386 	(void) memset(&set_params, 0, sizeof (set_params));
2387 	/* did the user tell us to generate a large device? */
2388 	if (create_flag == MD_CRO_64BIT) {
2389 		mr->c.un_revision = MD_64BIT_META_DEV;
2390 		set_params.options = MD_CRO_64BIT;
2391 	} else {
2392 		mr->c.un_revision = MD_32BIT_META_DEV;
2393 		set_params.options = MD_CRO_32BIT;
2394 	}
2395 	set_params.mnum = MD_SID(mr);
2396 	set_params.size = mr->c.un_size;
2397 	set_params.mdp = (uintptr_t)mr;
2398 	MD_SETDRIVERNAME(&set_params, MD_RAID, MD_MIN2SET(set_params.mnum));
2399 	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
2400 	    raidnp->cname) != 0) {
2401 		(void) mdstealerror(ep, &set_params.mde);
2402 		goto out;
2403 	}
2404 	rval = 0;	/* success */
2405 
2406 	/* cleanup, return success */
2407 out:
2408 	Free(mr);
2409 	if (rval != 0) {
2410 		(void) del_key_names(sp, keynlp, NULL);
2411 	}
2412 	metafreenamelist(keynlp);
2413 	if ((rval == 0) && (options & MDCMD_DOIT)) {
2414 		if (invalidate_columns(sp, raidnp, ep) != 0)
2415 			rval = -1;
2416 		meta_invalidate_name(raidnp);
2417 	}
2418 	return (rval);
2419 }
2420 
2421 /*
2422  * initialize raid
2423  * NOTE: this functions is metainit(1m)'s command line parser!
2424  */
2425 int
2426 meta_init_raid(
2427 	mdsetname_t	**spp,
2428 	int		argc,
2429 	char		*argv[],
2430 	mdcmdopts_t	options,
2431 	md_error_t	*ep
2432 )
2433 {
2434 	char		*uname = argv[0];
2435 	mdname_t	*raidnp = NULL;
2436 	int		old_optind;
2437 	int		c;
2438 	md_raid_t	*raidp = NULL;
2439 	uint_t		ncol, col;
2440 	int		rval = -1;
2441 	md_set_desc	*sd;
2442 
2443 	/* get raid name */
2444 	assert(argc > 0);
2445 	if (argc < 1)
2446 		goto syntax;
2447 	if ((raidnp = metaname(spp, uname, ep)) == NULL)
2448 		goto out;
2449 	assert(*spp != NULL);
2450 
2451 	/*
2452 	 * Raid metadevice not allowed on multi-node diskset.
2453 	 */
2454 	if (! metaislocalset(*spp)) {
2455 		if ((sd = metaget_setdesc(*spp, ep)) == NULL)
2456 			goto out;
2457 		if (MD_MNSET_DESC(sd)) {
2458 			rval = meta_cook_syntax(ep, MDE_MNSET_NORAID, uname,
2459 						argc, argv);
2460 			goto out;
2461 		}
2462 	}
2463 
2464 	uname = raidnp->cname;
2465 	if (metachkmeta(raidnp, ep) != 0)
2466 		goto out;
2467 
2468 	if (!(options & MDCMD_NOLOCK)) {
2469 		/* grab set lock */
2470 		if (meta_lock(*spp, TRUE, ep) != 0)
2471 			goto out;
2472 
2473 		if (meta_check_ownership(*spp, ep) != 0)
2474 			goto out;
2475 	}
2476 
2477 	/* see if it exists already */
2478 	if (metagetmiscname(raidnp, ep) != NULL) {
2479 		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
2480 		    meta_getminor(raidnp->dev), uname);
2481 		goto out;
2482 	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
2483 		goto out;
2484 	} else {
2485 		mdclrerror(ep);
2486 	}
2487 	--argc, ++argv;
2488 
2489 	/* grab -r */
2490 	if ((argc < 1) || (strcmp(argv[0], "-r") != 0))
2491 		goto syntax;
2492 	--argc, ++argv;
2493 
2494 	/* parse general options */
2495 	optind = 0;
2496 	opterr = 0;
2497 	if (getopt(argc, argv, "") != -1)
2498 		goto options;
2499 
2500 	/* allocate raid */
2501 	raidp = Zalloc(sizeof (*raidp));
2502 
2503 	/* setup common */
2504 	raidp->common.namep = raidnp;
2505 	raidp->common.type = MD_METARAID;
2506 	raidp->state = RUS_INIT;
2507 
2508 	/* allocate and parse cols */
2509 	for (ncol = 0; ((ncol < argc) && (argv[ncol][0] != '-')); ++ncol)
2510 		;
2511 	raidp->cols.cols_len = ncol;
2512 	if (ncol != 0) {
2513 		raidp->cols.cols_val =
2514 		    Zalloc(ncol * sizeof (*raidp->cols.cols_val));
2515 	}
2516 	for (col = 0; ((argc > 0) && (col < ncol)); ++col) {
2517 		md_raidcol_t	*mdc = &raidp->cols.cols_val[col];
2518 		mdname_t	*colnp;
2519 
2520 		/* parse column name */
2521 		if ((colnp = metaname(spp, argv[0], ep)) == NULL)
2522 			goto out;
2523 		/* check for soft partitions */
2524 		if (meta_sp_issp(*spp, colnp, ep) != 0) {
2525 			/* check disks */
2526 			if (metachkcomp(colnp, ep) != 0)
2527 				goto out;
2528 		}
2529 		mdc->colnamep = colnp;
2530 		--argc, ++argv;
2531 	}
2532 
2533 	/* parse raid options */
2534 	old_optind = optind = 0;
2535 	opterr = 0;
2536 	while ((c = getopt(argc, argv, "h:i:ko:w:")) != -1) {
2537 		switch (c) {
2538 		case 'h':
2539 			if ((raidp->hspnamep = metahspname(spp, optarg,
2540 			    ep)) == NULL) {
2541 				goto out;
2542 			}
2543 			break;
2544 
2545 		case 'i':
2546 			if (parse_interlace(uname, optarg, &raidp->interlace,
2547 			    ep) != 0) {
2548 				goto out;
2549 			}
2550 			if (meta_raid_check_interlace(raidp->interlace,
2551 			    uname, ep))
2552 				goto out;
2553 			break;
2554 
2555 		case 'k':
2556 			raidp->state = RUS_OKAY;
2557 			break;
2558 
2559 		case 'o':
2560 			if ((sscanf(optarg, "%u", &raidp->orig_ncol) != 1) ||
2561 			    ((int)raidp->orig_ncol < 0)) {
2562 				goto syntax;
2563 			}
2564 			if ((raidp->orig_ncol < MD_RAID_MIN) ||
2565 			    (raidp->orig_ncol > ncol)) {
2566 				rval = mderror(ep, MDE_BAD_ORIG_NCOL, uname);
2567 				goto out;
2568 			}
2569 			break;
2570 		case 'w':
2571 			if ((sscanf(optarg, "%d", &raidp->pw_count) != 1) ||
2572 			    ((int)raidp->pw_count < 0))
2573 				goto syntax;
2574 			if (((int)raidp->pw_count < PWCNT_MIN) ||
2575 			    ((int)raidp->pw_count > PWCNT_MAX)) {
2576 				rval = mderror(ep, MDE_RAID_BAD_PW_CNT, uname);
2577 				goto out;
2578 			}
2579 			break;
2580 		default:
2581 			argc += old_optind;
2582 			argv -= old_optind;
2583 			goto options;
2584 		}
2585 		old_optind = optind;
2586 	}
2587 	argc -= optind;
2588 	argv += optind;
2589 
2590 	/* we should be at the end */
2591 	if (argc != 0)
2592 		goto syntax;
2593 
2594 	/* default to all original columns */
2595 	if (raidp->orig_ncol == 0)
2596 		raidp->orig_ncol = ncol;
2597 
2598 	/* create raid */
2599 	if (meta_create_raid(*spp, raidp, options, ep) != 0)
2600 		goto out;
2601 	rval = 0;	/* success */
2602 
2603 	/* let em know */
2604 	if (options & MDCMD_PRINT) {
2605 		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is setup\n"),
2606 		    uname);
2607 		(void) fflush(stdout);
2608 	}
2609 	goto out;
2610 
2611 	/* syntax error */
2612 syntax:
2613 	rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
2614 	goto out;
2615 
2616 	/* options error */
2617 options:
2618 	rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
2619 	goto out;
2620 
2621 	/* cleanup, return error */
2622 out:
2623 	if (raidp != NULL)
2624 		meta_free_raid(raidp);
2625 	return (rval);
2626 }
2627 
2628 /*
2629  * reset RAIDs
2630  */
2631 int
2632 meta_raid_reset(
2633 	mdsetname_t	*sp,
2634 	mdname_t	*raidnp,
2635 	mdcmdopts_t	options,
2636 	md_error_t	*ep
2637 )
2638 {
2639 	md_raid_t	*raidp;
2640 	int		rval = -1;
2641 	int		col;
2642 
2643 	/* should have same set */
2644 	assert(sp != NULL);
2645 	assert((raidnp == NULL) ||
2646 	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
2647 
2648 	/* reset all raids */
2649 	if (raidnp == NULL) {
2650 		mdnamelist_t	*raidnlp = NULL;
2651 		mdnamelist_t	*p;
2652 
2653 		/* for each raid */
2654 		rval = 0;
2655 		if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
2656 			return (-1);
2657 		for (p = raidnlp; (p != NULL); p = p->next) {
2658 			/* reset RAID */
2659 			raidnp = p->namep;
2660 			if (meta_raid_reset(sp, raidnp, options, ep) != 0) {
2661 				rval = -1;
2662 				break;
2663 			}
2664 		}
2665 
2666 		/* cleanup, return success */
2667 		metafreenamelist(raidnlp);
2668 		return (rval);
2669 	}
2670 
2671 	/* check name */
2672 	if (metachkmeta(raidnp, ep) != 0)
2673 		return (-1);
2674 
2675 	/* get unit structure */
2676 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
2677 		return (-1);
2678 
2679 	/* make sure nobody owns us */
2680 	if (MD_HAS_PARENT(raidp->common.parent)) {
2681 		return (mdmderror(ep, MDE_IN_USE, meta_getminor(raidnp->dev),
2682 		    raidnp->cname));
2683 	}
2684 
2685 	/* clear subdevices cache */
2686 	if (invalidate_columns(sp, raidnp, ep) != 0)
2687 		return (-1);
2688 
2689 	/* clear metadevice */
2690 	if (meta_reset(sp, raidnp, options, ep) != 0)
2691 		goto out;
2692 	rval = 0;	/* success */
2693 
2694 	/* let em know */
2695 	if (options & MDCMD_PRINT) {
2696 		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is cleared\n"),
2697 		    raidnp->cname);
2698 		(void) fflush(stdout);
2699 	}
2700 
2701 	/* clear subdevices */
2702 	if (! (options & MDCMD_RECURSE))
2703 		goto out;
2704 
2705 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
2706 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
2707 		mdname_t	*colnp = cp->colnamep;
2708 
2709 		/* only recurse on metadevices */
2710 		if (! metaismeta(colnp))
2711 			continue;
2712 
2713 		if (meta_reset_by_name(sp, colnp, options, ep) != 0)
2714 			rval = -1;
2715 	}
2716 
2717 	/* cleanup, return success */
2718 out:
2719 	meta_invalidate_name(raidnp);
2720 	return (rval);
2721 }
2722 
2723 /*
2724  * reports TRUE if any RAID component is in error
2725  */
2726 int
2727 meta_raid_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *raid_names)
2728 {
2729 	mdnamelist_t	*nlp;
2730 	md_error_t	  status	= mdnullerror;
2731 	md_error_t	 *ep		= &status;
2732 	int		  any_errs	= FALSE;
2733 
2734 	for (nlp = raid_names; nlp; nlp = nlp->next) {
2735 		md_raid_t	*raidp;
2736 
2737 		if ((raidp = meta_get_raid(sp, nlp->namep, ep)) == NULL) {
2738 			any_errs |= TRUE;
2739 			goto out;
2740 		}
2741 		if (raidp->state != RUS_OKAY && raidp->state != RUS_INIT) {
2742 			any_errs |= TRUE;
2743 			goto out;
2744 		}
2745 	}
2746 out:
2747 	if (!mdisok(ep))
2748 		mdclrerror(ep);
2749 
2750 	return (any_errs);
2751 }
2752 /*
2753  * regen parity on a raid
2754  */
2755 int
2756 meta_raid_regen_byname(mdsetname_t *sp, mdname_t *raidnp, diskaddr_t size,
2757 	md_error_t *ep)
2758 {
2759 	char			*miscname;
2760 	md_resync_ioctl_t	ri;
2761 
2762 	/* should have a set */
2763 	assert(sp != NULL);
2764 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
2765 
2766 	/* make sure we have a raid */
2767 	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
2768 		return (-1);
2769 	if (strcmp(miscname, MD_RAID) != 0) {
2770 		return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
2771 		    raidnp->cname));
2772 	}
2773 
2774 	/* start resync */
2775 	(void) memset(&ri, 0, sizeof (ri));
2776 	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
2777 	ri.ri_mnum = meta_getminor(raidnp->dev);
2778 	ri.ri_copysize = size;
2779 	if (metaioctl(MD_IOCSETREGEN, &ri, &ri.mde, raidnp->cname) != 0)
2780 		return (mdstealerror(ep, &ri.mde));
2781 
2782 	/* return success */
2783 	return (0);
2784 }
2785