xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_db.c (revision c33df7ede245a3815b726e3eb38752e85ebb081f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Just in case we're not in a build environment, make sure that
30  * TEXT_DOMAIN gets set to something.
31  */
32 #if !defined(TEXT_DOMAIN)
33 #define	TEXT_DOMAIN "SYS_TEST"
34 #endif
35 
36 /*
37  * Metadevice database interfaces.
38  */
39 
40 #define	MDDB
41 
42 #include <meta.h>
43 #include <sys/lvm/md_mddb.h>
44 #include <sys/lvm/md_crc.h>
45 #include <sys/lvm/mdio.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <ctype.h>
49 
50 struct svm_daemon {
51 	char *svmd_name;
52 	char *svmd_kill_val;
53 };
54 
55 /*
56  * This is a list of the daemons that are not stopped by the SVM smf(5)
57  * services. The mdmonitord is started via svc:/system/mdmonitor:default
58  * but no contract(4) is constructed and so it is not stopped by smf(5).
59  */
60 struct svm_daemon svmd_kill_list[] = {
61 		{"mdmonitord", "HUP"},
62 		{"mddoors", "KILL"},
63 	};
64 
65 #define	DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
66 
67 extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
68 
69 /*
70  * Are the locator blocks for the replicas using devids
71  */
72 static int	devid_in_use = FALSE;
73 
74 static char *
75 getlongname(
76 	struct mddb_config	*c,
77 	md_error_t		*ep
78 )
79 {
80 	char		*diskname = NULL;
81 	char		*devid_str;
82 	devid_nmlist_t	*disklist = NULL;
83 
84 	c->c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
85 	if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
86 		(void) mdstealerror(ep, &c->c_mde);
87 		return (NULL);
88 	}
89 
90 	if (c->c_locator.l_devid_flags & MDDB_DEVID_SZ) {
91 		c->c_locator.l_devid = (uintptr_t)
92 		    Malloc(c->c_locator.l_devid_sz);
93 		c->c_locator.l_devid_flags =
94 		    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
95 	} else {
96 		(void) mderror(ep, MDE_NODEVID, "");
97 		goto out;
98 	}
99 
100 	if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
101 		(void) mdstealerror(ep, &c->c_mde);
102 		goto out;
103 	}
104 
105 	if (c->c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
106 		(void) mderror(ep, MDE_NODEVID, "");
107 		goto out;
108 	}
109 
110 	if (metaioctl(MD_DB_GETDEV, c, &c->c_mde, NULL) != 0) {
111 		(void) mdstealerror(ep, &c->c_mde);
112 		goto out;
113 	}
114 
115 	if (c->c_locator.l_devid != NULL) {
116 		if (meta_deviceid_to_nmlist("/dev/dsk",
117 		    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
118 		    c->c_locator.l_minor_name, &disklist) != 0) {
119 			devid_str = devid_str_encode(
120 			    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid, NULL);
121 			(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
122 			mderrorextra(ep, devid_str);
123 			if (devid_str != NULL)
124 				devid_str_free(devid_str);
125 			goto out;
126 		}
127 		diskname = Strdup(disklist[0].devname);
128 	}
129 
130 out:
131 	if (disklist != NULL)
132 		devid_free_nmlist(disklist);
133 
134 	if (c->c_locator.l_devid != NULL)
135 		Free((void *)(uintptr_t)c->c_locator.l_devid);
136 
137 	return (diskname);
138 }
139 
140 /*
141  * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
142  */
143 md_timeval32_t
144 meta_get_lb_inittime(
145 	mdsetname_t	*sp,
146 	md_error_t	*ep
147 )
148 {
149 	mddb_config_t	c;
150 
151 	(void) memset(&c, 0, sizeof (c));
152 
153 	/* Fill in setno, setname, and sideno */
154 	c.c_setno = sp->setno;
155 
156 	if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
157 		(void) mdstealerror(ep, &c.c_mde);
158 	}
159 
160 	return (c.c_timestamp);
161 }
162 
163 /*
164  * mkmasterblks writes out the master blocks of the mddb to the replica.
165  *
166  * In a MN diskset, this is called by the node that is adding this replica
167  * to the diskset.
168  */
169 
170 #define	MDDB_VERIFY_SIZE	8192
171 
172 static int
173 mkmasterblks(
174 	mdsetname_t	*sp,
175 	mdname_t	*np,
176 	int		fd,
177 	daddr_t		firstblk,
178 	int		dbsize,
179 	md_timeval32_t	inittime,
180 	md_error_t	*ep
181 )
182 {
183 	int		consecutive;
184 	md_timeval32_t	tp;
185 	struct mddb_mb	*mb;
186 	char		*buffer;
187 	int		iosize;
188 	md_set_desc	*sd;
189 	int		mn_set = 0;
190 	daddr_t		startblk;
191 	int		cnt;
192 	ddi_devid_t	devid;
193 
194 	if (! metaislocalset(sp)) {
195 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
196 			return (-1);
197 
198 		if (MD_MNSET_DESC(sd)) {
199 			mn_set = 1;		/* Used later */
200 		}
201 	}
202 
203 	/*
204 	 * Loop to verify the entire mddb region on disk is read/writable.
205 	 * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
206 	 * chunks.
207 	 *
208 	 * A side-effect of this loop is to zero out the entire mddb region
209 	 */
210 	if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
211 		return (mdsyserror(ep, ENOMEM, np->rname));
212 
213 	startblk = firstblk;
214 	for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
215 
216 		if (cnt > MDDB_VERIFY_SIZE)
217 			consecutive = MDDB_VERIFY_SIZE;
218 		else
219 			consecutive = cnt;
220 
221 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
222 			Free(buffer);
223 			return (mdsyserror(ep, errno, np->rname));
224 		}
225 
226 		iosize = DEV_BSIZE * consecutive;
227 		if (write(fd, buffer, iosize) != iosize) {
228 			Free(buffer);
229 			return (mdsyserror(ep, errno, np->rname));
230 		}
231 
232 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
233 			Free(buffer);
234 			return (mdsyserror(ep, errno, np->rname));
235 		}
236 
237 		if (read(fd, buffer, iosize) != iosize) {
238 			Free(buffer);
239 			return (mdsyserror(ep, errno, np->rname));
240 		}
241 
242 		startblk += consecutive;
243 	}
244 
245 	Free(buffer);
246 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
247 		return (mdsyserror(ep, ENOMEM, np->rname));
248 
249 	if (meta_gettimeofday(&tp) == -1) {
250 		Free(mb);
251 		return (mdsyserror(ep, errno, np->rname));
252 	}
253 
254 	mb->mb_magic = MDDB_MAGIC_MB;
255 	/*
256 	 * If a MN diskset, set master block revision for a MN set.
257 	 * Even though the master block structure is no different
258 	 * for a MN set, setting the revision field to a different
259 	 * number keeps any pre-MN_diskset code from accessing
260 	 * this diskset.  It also allows for an early determination
261 	 * of a MN diskset when reading in from disk so that the
262 	 * proper size locator block and locator names structure
263 	 * can be read in thus saving time on diskset startup.
264 	 */
265 	if (mn_set)
266 		mb->mb_revision = MDDB_REV_MNMB;
267 	else
268 		mb->mb_revision = MDDB_REV_MB;
269 	mb->mb_timestamp = tp;
270 	mb->mb_setno = sp->setno;
271 	mb->mb_blkcnt = dbsize - 1;
272 	mb->mb_blkno = firstblk;
273 	mb->mb_nextblk = 0;
274 
275 	mb->mb_blkmap.m_firstblk = firstblk + 1;
276 	mb->mb_blkmap.m_consecutive = dbsize - 1;
277 	if (! metaislocalset(sp)) {
278 		mb->mb_setcreatetime = inittime;
279 	}
280 
281 	/*
282 	 * We try to save the disks device ID into the remaining bytes in
283 	 * the master block. The saved devid is used to provide a mapping
284 	 * between this disk's devid and the devid stored into the master
285 	 * block. This allows the disk image to be self-identifying
286 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
287 	 * when we try to import these disks on the remote copied image.
288 	 * If we cannot save the disks device ID onto the master block that is
289 	 * ok.  The disk is just not self-identifying and won't be importable
290 	 * in the remote copy scenario.
291 	 */
292 	if (devid_get(fd, &devid) == 0) {
293 		size_t len;
294 
295 		len = devid_sizeof(devid);
296 		if (len <= DEV_BSIZE - sizeof (*mb)) {
297 			/* there is enough space to store the devid */
298 			mb->mb_devid_magic = MDDB_MAGIC_DE;
299 			mb->mb_devid_len = len;
300 			(void) memcpy(mb->mb_devid, devid, len);
301 		}
302 		devid_free(devid);
303 	}
304 
305 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
306 	    (crc_skip_t *)NULL);
307 
308 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
309 		Free(mb);
310 		return (mdsyserror(ep, errno, np->rname));
311 	}
312 
313 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
314 		Free(mb);
315 		return (mdsyserror(ep, errno, np->rname));
316 	}
317 
318 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
319 		Free(mb);
320 		return (mdsyserror(ep, errno, np->rname));
321 	}
322 
323 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
324 		Free(mb);
325 		return (mdsyserror(ep, errno, np->rname));
326 	}
327 
328 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
329 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
330 		Free(mb);
331 		return (mdmddberror(ep, MDE_NOTVERIFIED,
332 		    meta_getminor(np->dev), sp->setno, 0, np->rname));
333 	}
334 
335 	Free(mb);
336 	return (0);
337 }
338 
339 void
340 meta_mkdummymaster(
341 	mdsetname_t	*sp,
342 	int		fd,
343 	daddr_t		firstblk
344 )
345 {
346 	md_timeval32_t	tp;
347 	struct mddb_mb	*mb;
348 	ddi_devid_t	devid;
349 	md_set_desc	*sd;
350 	md_error_t	ep = mdnullerror;
351 	md_timeval32_t	inittime;
352 
353 	/*
354 	 * No dummy master blocks are written for a MN diskset since devids
355 	 * are not supported in MN disksets.
356 	 */
357 	if (! metaislocalset(sp)) {
358 		if ((sd = metaget_setdesc(sp, &ep)) == NULL)
359 			return;
360 
361 		if (MD_MNSET_DESC(sd))
362 			return;
363 	}
364 
365 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
366 		return;
367 
368 	mb->mb_magic = MDDB_MAGIC_DU;
369 	mb->mb_revision = MDDB_REV_MB;
370 	mb->mb_setno = sp->setno;
371 	inittime = meta_get_lb_inittime(sp, &ep);
372 	mb->mb_setcreatetime = inittime;
373 
374 	if (meta_gettimeofday(&tp) != -1)
375 		mb->mb_timestamp = tp;
376 
377 	/*
378 	 * We try to save the disks device ID into the remaining bytes in
379 	 * the master block.  This allows the disk image to be self-identifying
380 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
381 	 * when we try to import these disks on the remote copied image.
382 	 * If we cannot save the disks device ID onto the master block that is
383 	 * ok.  The disk is just not self-identifying and won't be importable
384 	 * in the remote copy scenario.
385 	 */
386 	if (devid_get(fd, &devid) == 0) {
387 		int len;
388 
389 		len = devid_sizeof(devid);
390 		if (len <= DEV_BSIZE - sizeof (*mb)) {
391 			/* there is enough space to store the devid */
392 			mb->mb_devid_magic = MDDB_MAGIC_DE;
393 			mb->mb_devid_len = len;
394 			(void) memcpy(mb->mb_devid, (char *)devid, len);
395 		}
396 		devid_free(devid);
397 	}
398 
399 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
400 	    (crc_skip_t *)NULL);
401 
402 	/*
403 	 * If any of these operations fail, we need to inform the
404 	 * user that the disk won't be self identifying. When support
405 	 * for importing remotely replicated disksets is added, we
406 	 * want to add the error messages here.
407 	 */
408 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
409 		goto out;
410 
411 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
412 		goto out;
413 
414 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
415 		goto out;
416 
417 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
418 		goto out;
419 
420 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
421 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
422 		goto out;
423 
424 out:
425 	Free(mb);
426 }
427 
428 static int
429 buildconf(mdsetname_t *sp, md_error_t *ep)
430 {
431 	md_replicalist_t	*rlp = NULL;
432 	md_replicalist_t	*rl;
433 	FILE			*cfp = NULL;
434 	FILE			*mfp = NULL;
435 	struct stat		sbuf;
436 	int			rval = 0;
437 	int			in_miniroot = 0;
438 	char			line[MDDB_BOOTLIST_MAX_LEN];
439 	char			*tname = NULL;
440 
441 	/* get list of local replicas */
442 	if (! metaislocalset(sp))
443 		return (0);
444 
445 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
446 		return (-1);
447 
448 	/* open tempfile, copy permissions of original file */
449 	if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
450 		/*
451 		 * On the miniroot tmp files must be created in /var/tmp.
452 		 * If we get a EROFS error, we assume that we are in the
453 		 * miniroot.
454 		 */
455 		if (errno != EROFS)
456 			goto error;
457 		in_miniroot = 1;
458 		errno = 0;
459 		tname = tempnam("/var/tmp", "slvm_");
460 		if (tname == NULL && errno == EROFS) {
461 			/*
462 			 * If we are booted on a read-only root because
463 			 * of mddb quorum problems we don't want to emit
464 			 * any scary error messages.
465 			 */
466 			errno = 0;
467 			goto out;
468 		}
469 
470 		/* open tempfile, copy permissions of original file */
471 		if ((cfp = fopen(tname, "w+")) == NULL)
472 			goto error;
473 	}
474 	if (stat(META_DBCONF, &sbuf) == 0) {
475 		if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
476 			goto error;
477 		if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
478 			goto error;
479 	}
480 
481 	/* print header */
482 	if (fprintf(cfp, "#metadevice database location file ") == EOF)
483 		goto error;
484 	if (fprintf(cfp, "do not hand edit\n") < 0)
485 		goto error;
486 	if (fprintf(cfp,
487 	    "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
488 		goto error;
489 
490 	/* dump replicas */
491 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
492 		md_replica_t	*r = rl->rl_repp;
493 		int		checksum = 42;
494 		int		i;
495 		char		*devidp;
496 		minor_t		min;
497 
498 		devidp = devid_str_encode(r->r_devid, r->r_minor_name);
499 		/* If devid code can't encode devidp - skip entry */
500 		if (devidp == NULL) {
501 			continue;
502 		}
503 
504 		/* compute checksum */
505 		for (i = 0; ((r->r_driver_name[i] != '\0') &&
506 		    (i < sizeof (r->r_driver_name))); i++) {
507 			checksum -= r->r_driver_name[i];
508 		}
509 		min = meta_getminor(r->r_namep->dev);
510 		checksum -= min;
511 		checksum -= r->r_blkno;
512 
513 		for (i = 0; i < strlen(devidp); i++) {
514 			checksum -= devidp[i];
515 		}
516 		/* print info */
517 		if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
518 		    r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
519 			goto error;
520 		}
521 
522 		devid_str_free(devidp);
523 	}
524 
525 	/* close and rename to real file */
526 	if (fflush(cfp) != 0)
527 		goto error;
528 	if (fsync(fileno(cfp)) != 0)
529 		goto error;
530 	if (fclose(cfp) != 0) {
531 		cfp = NULL;
532 		goto error;
533 	}
534 	cfp = NULL;
535 
536 	/*
537 	 * Renames don't work in the miniroot since tmpfiles are
538 	 * created in /var/tmp. Hence we copy the data out.
539 	 */
540 
541 	if (! in_miniroot) {
542 		if (rename(META_DBCONFTMP, META_DBCONF) != 0)
543 			goto error;
544 	} else {
545 		if ((cfp = fopen(tname, "r")) == NULL)
546 			goto error;
547 		if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
548 			goto error;
549 		while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
550 			if (fputs(line, mfp) == NULL)
551 				goto error;
552 		}
553 		(void) fclose(cfp);
554 		cfp = NULL;
555 		if (fflush(mfp) != 0)
556 			goto error;
557 		if (fsync(fileno(mfp)) != 0)
558 			goto error;
559 		if (fclose(mfp) != 0) {
560 			mfp = NULL;
561 			goto error;
562 		}
563 		/* delete the tempfile */
564 		(void) unlink(tname);
565 	}
566 	/* success */
567 	rval = 0;
568 	goto out;
569 
570 	/* tempfile error */
571 error:
572 	rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
573 	    mdsyserror(ep, errno, META_DBCONFTMP);
574 
575 
576 	/* cleanup, return success */
577 out:
578 	if (rlp != NULL)
579 		metafreereplicalist(rlp);
580 	if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
581 		rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
582 		    mdsyserror(ep, errno, META_DBCONFTMP);
583 	}
584 	free(tname);
585 	return (rval);
586 }
587 
588 /*
589  * check replica for dev
590  */
591 static int
592 in_replica(
593 	mdsetname_t	*sp,
594 	md_replica_t	*rp,
595 	mdname_t	*np,
596 	diskaddr_t	slblk,
597 	diskaddr_t	nblks,
598 	md_error_t	*ep
599 )
600 {
601 	mdname_t	*repnp = rp->r_namep;
602 	diskaddr_t	rep_sblk = rp->r_blkno;
603 	diskaddr_t	rep_nblks = rp->r_nblk;
604 
605 	/* should be in the same set */
606 	assert(sp != NULL);
607 
608 	/* if error in master block, assume whole partition */
609 	if ((rep_sblk == MD_DISKADDR_ERROR) ||
610 	    (rep_nblks == MD_DISKADDR_ERROR)) {
611 		rep_sblk = 0;
612 		rep_nblks = MD_DISKADDR_ERROR;
613 	}
614 
615 	/* check overlap */
616 	if (meta_check_overlap(
617 	    MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
618 		return (-1);
619 	}
620 
621 	/* return success */
622 	return (0);
623 }
624 
625 /*
626  * check to see if we're in a replica
627  */
628 int
629 meta_check_inreplica(
630 	mdsetname_t		*sp,
631 	mdname_t		*np,
632 	diskaddr_t		slblk,
633 	diskaddr_t		nblks,
634 	md_error_t		*ep
635 )
636 {
637 	md_replicalist_t	*rlp = NULL;
638 	md_replicalist_t	*rl;
639 	int			rval = 0;
640 
641 	/* should have a set */
642 	assert(sp != NULL);
643 
644 	/* for each replica */
645 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
646 		return (-1);
647 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
648 		md_replica_t	*rp = rl->rl_repp;
649 
650 		/* check replica */
651 		if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
652 			rval = -1;
653 			break;
654 		}
655 	}
656 
657 	/* cleanup, return success */
658 	metafreereplicalist(rlp);
659 	return (rval);
660 }
661 
662 /*
663  * check replica
664  */
665 int
666 meta_check_replica(
667 	mdsetname_t	*sp,		/* set to check against */
668 	mdname_t	*np,		/* component to check against */
669 	mdchkopts_t	options,	/* option flags */
670 	diskaddr_t	slblk,		/* start logical block */
671 	diskaddr_t	nblks,		/* number of blocks (-1,rest of them) */
672 	md_error_t	*ep		/* error packet */
673 )
674 {
675 	mdchkopts_t	chkoptions = MDCHK_ALLOW_REPSLICE;
676 
677 	/* make sure we have a disk */
678 	if (metachkcomp(np, ep) != 0)
679 		return (-1);
680 
681 	/* check to ensure that it is not already in use */
682 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
683 		return (-1);
684 	}
685 
686 	if (options & MDCHK_ALLOW_NODBS)
687 		return (0);
688 
689 	if (options & MDCHK_DRVINSET)
690 		return (0);
691 
692 	/* make sure it is in the set */
693 	if (meta_check_inset(sp, np, ep) != 0)
694 		return (-1);
695 
696 	/* make sure its not in a metadevice */
697 	if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
698 		return (-1);
699 
700 	/* return success */
701 	return (0);
702 }
703 
704 static int
705 update_dbinfo_on_drives(
706 	mdsetname_t	*sp,
707 	md_drive_desc	*dd,
708 	int		set_locked,
709 	int		force,
710 	md_error_t	*ep
711 )
712 {
713 	md_set_desc		*sd;
714 	int			i;
715 	md_setkey_t		*cl_sk;
716 	int			rval = 0;
717 	md_mnnode_desc		*nd;
718 
719 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
720 		return (-1);
721 
722 	if (! set_locked) {
723 		if (MD_MNSET_DESC(sd)) {
724 			md_error_t xep = mdnullerror;
725 			sigset_t sigs;
726 			/* Make sure we are blocking all signals */
727 			if (procsigs(TRUE, &sigs, &xep) < 0)
728 				mdclrerror(&xep);
729 
730 			nd = sd->sd_nodelist;
731 			while (nd) {
732 				if (force && strcmp(nd->nd_nodename,
733 				    mynode()) != 0) {
734 					nd = nd->nd_next;
735 					continue;
736 				}
737 
738 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
739 					nd = nd->nd_next;
740 					continue;
741 				}
742 
743 				if (clnt_lock_set(nd->nd_nodename, sp, ep))
744 					return (-1);
745 				nd = nd->nd_next;
746 			}
747 		} else {
748 			for (i = 0; i < MD_MAXSIDES; i++) {
749 				/* Skip empty slots */
750 				if (sd->sd_nodes[i][0] == '\0')
751 					continue;
752 
753 				if (force && strcmp(sd->sd_nodes[i],
754 				    mynode()) != 0)
755 					continue;
756 
757 				if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
758 					return (-1);
759 			}
760 		}
761 	}
762 
763 	if (MD_MNSET_DESC(sd)) {
764 		nd = sd->sd_nodelist;
765 		while (nd) {
766 			if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
767 				nd = nd->nd_next;
768 				continue;
769 			}
770 
771 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
772 				nd = nd->nd_next;
773 				continue;
774 			}
775 
776 			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
777 			    == -1) {
778 				rval = -1;
779 				break;
780 			}
781 			nd = nd->nd_next;
782 		}
783 	} else {
784 		for (i = 0; i < MD_MAXSIDES; i++) {
785 			/* Skip empty slots */
786 			if (sd->sd_nodes[i][0] == '\0')
787 				continue;
788 
789 			if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
790 				continue;
791 
792 			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
793 			    == -1) {
794 				rval = -1;
795 				break;
796 			}
797 		}
798 	}
799 
800 	if (! set_locked) {
801 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
802 		if (MD_MNSET_DESC(sd)) {
803 			nd = sd->sd_nodelist;
804 			while (nd) {
805 				if (force &&
806 				    strcmp(nd->nd_nodename, mynode()) != 0) {
807 					nd = nd->nd_next;
808 					continue;
809 				}
810 
811 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
812 					nd = nd->nd_next;
813 					continue;
814 				}
815 
816 				if (clnt_unlock_set(nd->nd_nodename, cl_sk,
817 				    ep)) {
818 					rval = -1;
819 					break;
820 				}
821 				nd = nd->nd_next;
822 			}
823 		} else {
824 			for (i = 0; i < MD_MAXSIDES; i++) {
825 				/* Skip empty slots */
826 				if (sd->sd_nodes[i][0] == '\0')
827 					continue;
828 
829 				if (force &&
830 				    strcmp(sd->sd_nodes[i], mynode()) != 0)
831 					continue;
832 
833 				if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
834 				    ep)) {
835 					rval = -1;
836 					break;
837 				}
838 			}
839 
840 		}
841 		cl_set_setkey(NULL);
842 	}
843 
844 	return (rval);
845 }
846 
847 int
848 meta_db_addsidenms(
849 	mdsetname_t	*sp,
850 	mdname_t	*np,
851 	daddr_t		blkno,
852 	int		bcast,
853 	md_error_t	*ep
854 )
855 {
856 	side_t		sideno;
857 	char		*bname = NULL;
858 	char		*dname = NULL;
859 	minor_t		mnum;
860 	mddb_config_t	c;
861 	int		done;
862 	int		rval = 0;
863 	md_set_desc	*sd;
864 
865 	sideno = MD_SIDEWILD;
866 	/*CONSTCOND*/
867 	while (1) {
868 		if (bname != NULL) {
869 			Free(bname);
870 			bname = NULL;
871 		}
872 		if (dname != NULL) {
873 			Free(dname);
874 			dname = NULL;
875 		}
876 		if ((done = meta_getnextside_devinfo(sp, np->bname,
877 		    &sideno, &bname, &dname, &mnum, ep)) == -1) {
878 			rval = -1;
879 			break;
880 		}
881 
882 		if (done == 0)
883 			break;
884 
885 		if (! metaislocalset(sp)) {
886 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
887 				rval = -1;
888 				break;
889 			}
890 		}
891 
892 		/*
893 		 * Send addsidenms to all nodes using rpc.mdcommd if
894 		 * sidename is being added to MN diskset.
895 		 *
896 		 *   It's ok to broadcast this call to other nodes.
897 		 *
898 		 *   Note: The broadcast to other nodes isn't needed during
899 		 *   the addition of the first mddbs to the set since the
900 		 *   other nodes haven't been joined to the set yet.  All
901 		 *   nodes in a MN diskset are (implicitly) joined to the set
902 		 *   on the addition of the first mddb.
903 		 */
904 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
905 		    (bcast == DB_ADDSIDENMS_BCAST)) {
906 			md_mn_result_t			*resultp = NULL;
907 			md_mn_msg_meta_db_newside_t	db_ns;
908 			int				send_rval;
909 
910 			db_ns.msg_l_dev = np->dev;
911 			db_ns.msg_sideno = sideno;
912 			db_ns.msg_blkno = blkno;
913 			(void) strncpy(db_ns.msg_dname, dname,
914 			    sizeof (db_ns.msg_dname));
915 			(void) splitname(np->bname, &db_ns.msg_splitname);
916 			db_ns.msg_mnum = mnum;
917 
918 			/* Set devid to NULL until devids are supported */
919 			db_ns.msg_devid[0] = NULL;
920 
921 			/*
922 			 * If reconfig cycle has been started, this node is
923 			 * stuck in in the return step until this command has
924 			 * completed.  If mdcommd is suspended, ask
925 			 * send_message to fail (instead of retrying)
926 			 * so that metaset can finish allowing the reconfig
927 			 * cycle to proceed.
928 			 */
929 			send_rval = mdmn_send_message(sp->setno,
930 			    MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
931 			    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns,
932 			    sizeof (md_mn_msg_meta_db_newside_t),
933 			    &resultp, ep);
934 			if (send_rval != 0) {
935 				rval = -1;
936 				if (resultp == NULL)
937 					(void) mddserror(ep,
938 					    MDE_DS_COMMD_SEND_FAIL,
939 					    sp->setno, NULL, NULL,
940 					    sp->setname);
941 				else {
942 					(void) mdstealerror(ep,
943 					    &(resultp->mmr_ep));
944 					if (mdisok(ep)) {
945 						(void) mddserror(ep,
946 						    MDE_DS_COMMD_SEND_FAIL,
947 						    sp->setno, NULL, NULL,
948 						    sp->setname);
949 					}
950 					free_result(resultp);
951 				}
952 				break;
953 			}
954 			if (resultp)
955 				free_result(resultp);
956 		} else {
957 			/*
958 			 * Let this side's  device name, minor # and driver name
959 			 * be known to the database replica.
960 			 */
961 			(void) memset(&c, 0, sizeof (c));
962 
963 			/* Fill in device/replica info */
964 			c.c_locator.l_dev = meta_cmpldev(np->dev);
965 			c.c_locator.l_blkno = blkno;
966 			(void) strncpy(c.c_locator.l_driver, dname,
967 			    sizeof (c.c_locator.l_driver));
968 			if (splitname(bname, &c.c_devname) ==
969 			    METASPLIT_LONGDISKNAME && devid_in_use == FALSE) {
970 				rval = mddeverror(ep, MDE_DISKNAMETOOLONG,
971 				    NODEV64, np->rname);
972 				break;
973 			}
974 
975 			c.c_locator.l_mnum = mnum;
976 
977 			/* Fill in setno, setname, and sideno */
978 			c.c_setno = sp->setno;
979 			(void) strncpy(c.c_setname, sp->setname,
980 			    sizeof (c.c_setname));
981 			c.c_sideno = sideno;
982 
983 			/*
984 			 * Don't need device id information from this ioctl
985 			 * Kernel determines device id from dev_t, which
986 			 * is just what this code would do.
987 			 */
988 			c.c_locator.l_devid = (uint64_t)0;
989 			c.c_locator.l_devid_flags = 0;
990 
991 			if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
992 				rval = mdstealerror(ep, &c.c_mde);
993 				break;
994 			}
995 		}
996 	}
997 
998 	/* cleanup, return success */
999 	if (bname != NULL) {
1000 		Free(bname);
1001 		bname = NULL;
1002 	}
1003 	if (dname != NULL) {
1004 		Free(dname);
1005 		dname = NULL;
1006 	}
1007 	return (rval);
1008 }
1009 
1010 
1011 int
1012 meta_db_delsidenm(
1013 	mdsetname_t	*sp,
1014 	side_t		sideno,
1015 	mdname_t	*np,
1016 	daddr_t		blkno,
1017 	md_error_t	*ep
1018 )
1019 {
1020 	mddb_config_t	c;
1021 	md_set_desc	*sd;
1022 
1023 	if (! metaislocalset(sp)) {
1024 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1025 			return (-1);
1026 	}
1027 	/* Use rpc.mdcommd to delete mddb side from all nodes */
1028 	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
1029 	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
1030 		md_mn_result_t			*resultp = NULL;
1031 		md_mn_msg_meta_db_delside_t	db_ds;
1032 		int				send_rval;
1033 
1034 		db_ds.msg_l_dev = np->dev;
1035 		db_ds.msg_blkno = blkno;
1036 		db_ds.msg_sideno = sideno;
1037 
1038 		/* Set devid to NULL until devids are supported */
1039 		db_ds.msg_devid[0] = NULL;
1040 
1041 		/*
1042 		 * If reconfig cycle has been started, this node is
1043 		 * stuck in in the return step until this command has
1044 		 * completed.  If mdcommd is suspended, ask
1045 		 * send_message to fail (instead of retrying)
1046 		 * so that metaset can finish allowing the reconfig
1047 		 * cycle to proceed.
1048 		 */
1049 		send_rval = mdmn_send_message(sp->setno,
1050 		    MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
1051 		    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds,
1052 		    sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
1053 		if (send_rval != 0) {
1054 			if (resultp == NULL)
1055 				(void) mddserror(ep,
1056 				    MDE_DS_COMMD_SEND_FAIL,
1057 				    sp->setno, NULL, NULL,
1058 				    sp->setname);
1059 			else {
1060 				(void) mdstealerror(ep, &(resultp->mmr_ep));
1061 				if (mdisok(ep)) {
1062 					(void) mddserror(ep,
1063 					    MDE_DS_COMMD_SEND_FAIL,
1064 					    sp->setno, NULL, NULL,
1065 					    sp->setname);
1066 				}
1067 				free_result(resultp);
1068 			}
1069 			return (-1);
1070 		}
1071 		if (resultp)
1072 			free_result(resultp);
1073 
1074 	} else {
1075 		/*
1076 		 * Let this side's  device name, minor # and driver name
1077 		 * be known to the database replica.
1078 		 */
1079 		(void) memset(&c, 0, sizeof (c));
1080 
1081 		/* Fill in device/replica info */
1082 		c.c_locator.l_dev = meta_cmpldev(np->dev);
1083 		c.c_locator.l_blkno = blkno;
1084 
1085 		/* Fill in setno, setname, and sideno */
1086 		c.c_setno = sp->setno;
1087 		(void) strcpy(c.c_setname, sp->setname);
1088 		c.c_sideno = sideno;
1089 
1090 		/*
1091 		 * Don't need device id information from this ioctl
1092 		 * Kernel determines device id from dev_t, which
1093 		 * is just what this code would do.
1094 		 */
1095 		c.c_locator.l_devid = (uint64_t)0;
1096 		c.c_locator.l_devid_flags = 0;
1097 
1098 		if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
1099 			return (mdstealerror(ep, &c.c_mde));
1100 	}
1101 	return (0);
1102 }
1103 
1104 
1105 static int
1106 mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
1107 {
1108 	mdnamelist_t		*dnp1, *dnp2;
1109 
1110 	for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
1111 		for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
1112 			if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
1113 				return (mderror(ep, MDE_DUPDRIVE,
1114 				    dnp1->namep->cname));
1115 		}
1116 	}
1117 	return (0);
1118 }
1119 
1120 
1121 /*
1122  * Return 1 if files are different, else return 0
1123  */
1124 static int
1125 filediff(char *tsname, char *sname)
1126 {
1127 	int ret = 1, fd;
1128 	size_t tsz, sz;
1129 	struct stat sbuf;
1130 	char *tbuf, *buf;
1131 
1132 	if (stat(tsname, &sbuf) != 0)
1133 		return (1);
1134 	tsz = sbuf.st_size;
1135 	if (stat(sname, &sbuf) != 0)
1136 		return (1);
1137 	sz = sbuf.st_size;
1138 	if (tsz != sz)
1139 		return (1);
1140 
1141 	/* allocate memory and read both files into buffer */
1142 	tbuf = malloc(tsz);
1143 	buf = malloc(sz);
1144 	if (tbuf == NULL || buf == NULL)
1145 		goto out;
1146 
1147 	fd = open(tsname, O_RDONLY);
1148 	if (fd == -1)
1149 		goto out;
1150 	sz = read(fd, tbuf, tsz);
1151 	(void) close(fd);
1152 	if (sz != tsz)
1153 		goto out;
1154 
1155 	fd = open(sname, O_RDONLY);
1156 	if (fd == -1)
1157 		goto out;
1158 	sz = read(fd, buf, tsz);
1159 	(void) close(fd);
1160 	if (sz != tsz)
1161 		goto out;
1162 
1163 	/* compare content */
1164 	ret = bcmp(tbuf, buf, tsz);
1165 out:
1166 	if (tbuf)
1167 		free(tbuf);
1168 	if (buf)
1169 		free(buf);
1170 	return (ret);
1171 }
1172 
1173 /*
1174  * patch md.conf file with mddb locations
1175  */
1176 int
1177 meta_db_patch(
1178 	char		*sname,		/* system file name */
1179 	char		*cname,		/* mddb.cf file name */
1180 	int		patch,		/* patching locally */
1181 	md_error_t	*ep
1182 )
1183 {
1184 	char		*tsname = NULL;
1185 	char		line[MDDB_BOOTLIST_MAX_LEN];
1186 	FILE		*tsfp = NULL;
1187 	FILE		*mfp = NULL;
1188 	int		rval = -1;
1189 
1190 	/* check names */
1191 	if (sname == NULL) {
1192 		if (patch)
1193 			sname = "md.conf";
1194 		else
1195 			sname = "/kernel/drv/md.conf";
1196 	}
1197 	if (cname == NULL)
1198 		cname = META_DBCONF;
1199 
1200 	/*
1201 	 * edit file
1202 	 */
1203 	if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
1204 		if (mdissyserror(ep, EROFS)) {
1205 			/*
1206 			 * If we are booted on a read-only root because
1207 			 * of mddb quorum problems we don't want to emit
1208 			 * any scary error messages.
1209 			 */
1210 			mdclrerror(ep);
1211 			rval = 0;
1212 		}
1213 		goto out;
1214 	}
1215 
1216 	if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 0,
1217 	    ep) != 0)
1218 		goto out;
1219 
1220 	/* if file content is identical, skip rename */
1221 	if (filediff(tsname, sname) == 0) {
1222 		rval = 0;
1223 		goto out;
1224 	}
1225 
1226 	if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
1227 	    (fclose(tsfp) != 0)) {
1228 		(void) mdsyserror(ep, errno, tsname);
1229 		goto out;
1230 	}
1231 
1232 	tsfp = NULL;
1233 
1234 	/*
1235 	 * rename file. If we get a Cross Device error then it
1236 	 * is because we are in the miniroot.
1237 	 */
1238 	if (rename(tsname, sname) != 0 && errno != EXDEV) {
1239 		(void) mdsyserror(ep, errno, sname);
1240 		goto out;
1241 	}
1242 
1243 	if (errno == EXDEV) {
1244 		if ((tsfp = fopen(tsname, "r")) == NULL)
1245 			goto out;
1246 		if ((mfp = fopen(sname, "w+")) == NULL)
1247 			goto out;
1248 		while (fgets(line, sizeof (line), tsfp) != NULL) {
1249 			if (fputs(line, mfp) == NULL)
1250 				goto out;
1251 		}
1252 		(void) fclose(tsfp);
1253 		tsfp = NULL;
1254 		if (fflush(mfp) != 0)
1255 			goto out;
1256 		if (fsync(fileno(mfp)) != 0)
1257 			goto out;
1258 		if (fclose(mfp) != 0) {
1259 			mfp = NULL;
1260 			goto out;
1261 		}
1262 	}
1263 
1264 	Free(tsname);
1265 	tsname = NULL;
1266 	rval = 0;
1267 
1268 	/* cleanup, return error */
1269 out:
1270 	if (tsfp != NULL)
1271 		(void) fclose(tsfp);
1272 	if (tsname != NULL) {
1273 		(void) unlink(tsname);
1274 		Free(tsname);
1275 	}
1276 	return (rval);
1277 }
1278 
1279 /*
1280  * Add replicas to set.  This happens as a result of:
1281  *	- metadb [-s set_name] -a
1282  *	- metaset -s set_name -a disk
1283  *	- metaset -s set_name -d disk	 (causes a rebalance of mddbs)
1284  *	- metaset -s set_name -b
1285  *
1286  * For a local set, this routine is run on the local set host.
1287  *
1288  * For a traditional diskset, this routine is run on the node that
1289  * is running the metaset command.
1290  *
1291  * For a multinode diskset, this routine is run by the node that is
1292  * running the metaset command.  If this is the first mddb added to
1293  * the MN diskset, then no communication is made to other nodes via commd
1294  * since the other nodes will be in-sync with respect to the mddbs when
1295  * those other nodes join the set and snarf in the newly created mddb.
1296  * If this is not the first mddb added to the MN diskset, then this
1297  * attach command is sent to all of the nodes using commd.  This keeps
1298  * the nodes in-sync.
1299  */
1300 int
1301 meta_db_attach(
1302 	mdsetname_t		*sp,
1303 	mdnamelist_t		*db_nlp,
1304 	mdchkopts_t		options,
1305 	md_timeval32_t		*timeval,
1306 	int			dbcnt,
1307 	int			dbsize,
1308 	char			*sysfilename,
1309 	md_error_t		*ep
1310 )
1311 {
1312 	struct mddb_config	c;
1313 	mdnamelist_t		*nlp;
1314 	mdname_t		*np;
1315 	md_drive_desc		*dd = NULL;
1316 	md_drive_desc		*p;
1317 	int			i;
1318 	int			fd;
1319 	side_t			sideno;
1320 	daddr_t			blkno;
1321 	int			replicacount = 0;
1322 	int			start_svmdaemons = 0;
1323 	int			rval = 0;
1324 	md_error_t		status = mdnullerror;
1325 	md_set_desc		*sd;
1326 	int			stale_bool = FALSE;
1327 	int			flags;
1328 	int			firstmddb = 1;
1329 	md_timeval32_t		inittime = {0, 0};
1330 
1331 	/*
1332 	 * Error if we don't get some work to do.
1333 	 */
1334 	if (db_nlp == NULL)
1335 		return (mdsyserror(ep, EINVAL, NULL));
1336 
1337 	if (mdnamesareunique(db_nlp, ep) != 0)
1338 		return (-1);
1339 	(void) memset(&c, 0, sizeof (c));
1340 	c.c_id = 0;
1341 	c.c_setno = sp->setno;
1342 
1343 	/* Don't need device id information from this ioctl */
1344 	c.c_locator.l_devid = (uint64_t)0;
1345 	c.c_locator.l_devid_flags = 0;
1346 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
1347 		if (metaislocalset(sp)) {
1348 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
1349 				mdclrerror(&c.c_mde);
1350 			else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
1351 			    (! (options & MDCHK_ALLOW_NODBS)))
1352 				return (mdstealerror(ep, &c.c_mde));
1353 		} else {
1354 			if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
1355 				return (mdstealerror(ep, &c.c_mde));
1356 		}
1357 		mdclrerror(&c.c_mde);
1358 	}
1359 	/*
1360 	 * Is current set STALE?
1361 	 */
1362 	if (c.c_flags & MDDB_C_STALE) {
1363 		stale_bool = TRUE;
1364 	}
1365 
1366 	assert(db_nlp != NULL);
1367 
1368 	/* if these are the first replicas then the SVM daemons need to run */
1369 	if (c.c_dbcnt == 0)
1370 		start_svmdaemons = 1;
1371 
1372 	/*
1373 	 * check to see if we will go over the total possible number
1374 	 * of data bases
1375 	 */
1376 	nlp = db_nlp;
1377 	while (nlp) {
1378 		replicacount += dbcnt;
1379 		nlp = nlp->next;
1380 	}
1381 
1382 	if ((replicacount + c.c_dbcnt) > c.c_dbmax)
1383 		return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
1384 		    sp->setno, c.c_dbcnt + replicacount, NULL));
1385 
1386 	/*
1387 	 * go through and check to make sure all locations specified
1388 	 * are legal also pick out driver name;
1389 	 */
1390 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1391 		diskaddr_t devsize;
1392 
1393 		np = nlp->namep;
1394 
1395 		if (! metaislocalset(sp)) {
1396 			uint_t	partno;
1397 			uint_t	rep_partno;
1398 			mddrivename_t	*dnp = np->drivenamep;
1399 
1400 			/*
1401 			 * make sure that non-local database replicas
1402 			 * are always on the replica slice.
1403 			 */
1404 			if (meta_replicaslice(dnp,
1405 			    &rep_partno, ep) != 0)
1406 				return (-1);
1407 			if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
1408 				return (-1);
1409 			if (partno != rep_partno)
1410 				return (mddeverror(ep, MDE_REPCOMP_ONLY,
1411 				    np->dev, sp->setname));
1412 		}
1413 
1414 		if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
1415 		    ep)) {
1416 			return (-1);
1417 		}
1418 
1419 		if ((devsize = metagetsize(np, ep)) == -1)
1420 			return (-1);
1421 
1422 		if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
1423 			return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
1424 			    meta_getminor(np->dev), sp->setno, devsize,
1425 			    np->cname));
1426 	}
1427 
1428 	/*
1429 	 * If first disk in set we don't have lb_inittime yet for use as
1430 	 * mb_setcreatetime so don't go looking for it. WE'll come back
1431 	 * later and update after the locator block has been created.
1432 	 * If this isn't the first disk in the set, we have a locator
1433 	 * block and thus we have lb_inittime. Set mb_setcreatetime to
1434 	 * lb_inittime.
1435 	 */
1436 	if (! metaislocalset(sp)) {
1437 		if (c.c_dbcnt != 0) {
1438 			firstmddb = 0;
1439 			inittime = meta_get_lb_inittime(sp, ep);
1440 		}
1441 	}
1442 
1443 	/*
1444 	 * go through and write all master blocks
1445 	 */
1446 
1447 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1448 		np = nlp->namep;
1449 
1450 		if ((fd = open(np->rname, O_RDWR)) < 0)
1451 			return (mdsyserror(ep, errno, np->rname));
1452 
1453 		for (i = 0; i < dbcnt; i++) {
1454 			if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
1455 			    inittime, ep)) {
1456 				(void) close(fd);
1457 				return (-1);
1458 			}
1459 		}
1460 		(void) close(fd);
1461 	}
1462 
1463 	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
1464 		return (-1);
1465 
1466 	if (! metaislocalset(sp)) {
1467 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
1468 		if (! mdisok(ep))
1469 			return (-1);
1470 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1471 			return (-1);
1472 
1473 	}
1474 
1475 	/*
1476 	 * go through and tell kernel to add them
1477 	 */
1478 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1479 		mdcinfo_t	*cinfo;
1480 
1481 		np = nlp->namep;
1482 
1483 		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
1484 			rval = -1;
1485 			goto out;
1486 		}
1487 
1488 		/*
1489 		 * If mddb is being added to MN diskset and there already
1490 		 * exists a valid mddb in the set (which equates to this
1491 		 * node being an owner of the set) then use rpc.mdcommd
1492 		 * mechanism to add mddb(s) so that all nodes stay in sync.
1493 		 * If set is stale, don't log the message since rpc.mdcommd
1494 		 * can't write the message to the mddb.
1495 		 *
1496 		 * Otherwise, just add mddb to this node.
1497 		 */
1498 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
1499 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
1500 			md_mn_result_t			*resultp = NULL;
1501 			md_mn_msg_meta_db_attach_t	attach;
1502 			int 				send_rval;
1503 
1504 			/*
1505 			 * In a scenario where new replicas had been added on
1506 			 * the master, and then all of the old replicas failed
1507 			 * before the slaves had knowledge of the new replicas,
1508 			 * the slaves are unable to re-parse in the mddb
1509 			 * from the new replicas since the slaves have no
1510 			 * knowledge of the new replicas.  The following
1511 			 * algorithm solves this problem:
1512 			 * 	- META_DB_ATTACH message generates submsgs
1513 			 * 		- BLOCK parse (master)
1514 			 * 		- MDDB_ATTACH new replicas
1515 			 * 		- UNBLOCK parse (master) causing parse
1516 			 *		information to be sent from master
1517 			 *		to slaves at a higher class than the
1518 			 *		unblock so the parse message will
1519 			 *		reach slaves before unblock message.
1520 			 */
1521 			attach.msg_l_dev = np->dev;
1522 			attach.msg_cnt = dbcnt;
1523 			attach.msg_dbsize = dbsize;
1524 			(void) strncpy(attach.msg_dname, cinfo->dname,
1525 			    sizeof (attach.msg_dname));
1526 			(void) splitname(np->bname, &attach.msg_splitname);
1527 			attach.msg_options = options;
1528 
1529 			/* Set devid to NULL until devids are supported */
1530 			attach.msg_devid[0] = NULL;
1531 
1532 			/*
1533 			 * If reconfig cycle has been started, this node is
1534 			 * stuck in in the return step until this command has
1535 			 * completed.  If mdcommd is suspended, ask
1536 			 * send_message to fail (instead of retrying)
1537 			 * so that metaset can finish allowing the reconfig
1538 			 * cycle to proceed.
1539 			 */
1540 			flags = MD_MSGF_FAIL_ON_SUSPEND;
1541 			if (stale_bool == TRUE)
1542 				flags |= MD_MSGF_NO_LOG;
1543 			send_rval = mdmn_send_message(sp->setno,
1544 			    MD_MN_MSG_META_DB_ATTACH,
1545 			    flags, (char *)&attach,
1546 			    sizeof (md_mn_msg_meta_db_attach_t),
1547 			    &resultp, ep);
1548 			if (send_rval != 0) {
1549 				rval = -1;
1550 				if (resultp == NULL)
1551 					(void) mddserror(ep,
1552 					    MDE_DS_COMMD_SEND_FAIL,
1553 					    sp->setno, NULL, NULL,
1554 					    sp->setname);
1555 				else {
1556 					(void) mdstealerror(ep,
1557 					    &(resultp->mmr_ep));
1558 					if (mdisok(ep)) {
1559 						(void) mddserror(ep,
1560 						    MDE_DS_COMMD_SEND_FAIL,
1561 						    sp->setno, NULL, NULL,
1562 						    sp->setname);
1563 					}
1564 					free_result(resultp);
1565 				}
1566 				goto out;
1567 			}
1568 			if (resultp)
1569 				free_result(resultp);
1570 		} else {
1571 			/* Adding mddb(s) to just this node */
1572 			for (i = 0; i < dbcnt; i++) {
1573 				(void) memset(&c, 0, sizeof (c));
1574 				/* Fill in device/replica info */
1575 				c.c_locator.l_dev = meta_cmpldev(np->dev);
1576 				c.c_locator.l_blkno = i * dbsize + 16;
1577 				blkno = c.c_locator.l_blkno;
1578 				(void) strncpy(c.c_locator.l_driver,
1579 				    cinfo->dname,
1580 				    sizeof (c.c_locator.l_driver));
1581 
1582 				if (splitname(np->bname, &c.c_devname) ==
1583 				    METASPLIT_LONGDISKNAME && devid_in_use ==
1584 				    FALSE) {
1585 					rval = mddeverror(ep,
1586 					    MDE_DISKNAMETOOLONG,
1587 					    NODEV64, np->rname);
1588 					goto out;
1589 				}
1590 
1591 				c.c_locator.l_mnum = meta_getminor(np->dev);
1592 
1593 				/* Fill in setno, setname, and sideno */
1594 				c.c_setno = sp->setno;
1595 				if (! metaislocalset(sp)) {
1596 					if (MD_MNSET_DESC(sd)) {
1597 						c.c_multi_node = 1;
1598 					}
1599 				}
1600 				(void) strcpy(c.c_setname, sp->setname);
1601 				c.c_sideno = sideno;
1602 
1603 				/*
1604 				 * Don't need device id information from this
1605 				 * ioctl Kernel determines device id from
1606 				 * dev_t, which is just what this code would do.
1607 				 */
1608 				c.c_locator.l_devid = (uint64_t)0;
1609 				c.c_locator.l_devid_flags = 0;
1610 
1611 				if (timeval != NULL)
1612 					c.c_timestamp = *timeval;
1613 
1614 				if (setup_med_cfg(sp, &c,
1615 				    (options & MDCHK_SET_FORCE), ep)) {
1616 					rval = -1;
1617 					goto out;
1618 				}
1619 
1620 				if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde,
1621 				    NULL) != 0) {
1622 					rval = mdstealerror(ep, &c.c_mde);
1623 					goto out;
1624 				}
1625 				/*
1626 				 * This is either a traditional diskset OR this
1627 				 * is the first replica added to a MN diskset.
1628 				 * In either case, set broadcast to NO_BCAST so
1629 				 * that message won't go through rpc.mdcommd.
1630 				 * If this is a traditional diskset, the bcast
1631 				 * flag is ignored since traditional disksets
1632 				 * don't use the rpc.mdcommd.
1633 				 */
1634 				if (meta_db_addsidenms(sp, np, blkno,
1635 				    DB_ADDSIDENMS_NO_BCAST, ep))
1636 					goto out;
1637 			}
1638 		}
1639 		if (! metaislocalset(sp)) {
1640 			/* update the dbcnt and size in dd */
1641 			for (p = dd; p != NULL; p = p->dd_next)
1642 				if (p->dd_dnp == np->drivenamep) {
1643 					p->dd_dbcnt = dbcnt;
1644 					p->dd_dbsize  = dbsize;
1645 					break;
1646 				}
1647 		}
1648 
1649 		/*
1650 		 * If this was the first addition of disks to the
1651 		 * diskset you now need to update the mb_setcreatetime
1652 		 * which needed lb_inittime which wasn't there until now.
1653 		 */
1654 		if (firstmddb) {
1655 			if (meta_update_mb(sp, dd, ep) != 0) {
1656 				return (-1);
1657 			}
1658 		}
1659 		(void) close(fd);
1660 	}
1661 
1662 out:
1663 	if (metaislocalset(sp)) {
1664 
1665 		/* everything looks fine. Start mdmonitord */
1666 		if (rval == 0 && start_svmdaemons == 1) {
1667 			if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
1668 				mde_perror(&status, "");
1669 				mdclrerror(&status);
1670 			}
1671 		}
1672 
1673 		if (buildconf(sp, &status)) {
1674 			/* Don't mask any previous errors */
1675 			if (rval == 0)
1676 				rval = mdstealerror(ep, &status);
1677 			return (rval);
1678 		}
1679 
1680 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
1681 			/* Don't mask any previous errors */
1682 			if (rval == 0)
1683 				rval = mdstealerror(ep, &status);
1684 		}
1685 	} else {
1686 		if (update_dbinfo_on_drives(sp, dd,
1687 		    (options & MDCHK_SET_LOCKED),
1688 		    (options & MDCHK_SET_FORCE),
1689 		    &status)) {
1690 			/* Don't mask any previous errors */
1691 			if (rval == 0)
1692 				rval = mdstealerror(ep, &status);
1693 			else
1694 				mdclrerror(&status);
1695 		}
1696 		metafreedrivedesc(&dd);
1697 	}
1698 	/*
1699 	 * For MN disksets that already had already had nodes joined
1700 	 * before the attach of this mddb(s), the name invalidation is
1701 	 * done by the commd handler routine.  Otherwise, if this
1702 	 * is the first attach of a MN diskset mddb, the invalidation
1703 	 * must be done here since the first attach cannot be sent
1704 	 * via the commd since there are no nodes joined to the set yet.
1705 	 */
1706 	if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
1707 	    (MD_MNSET_DESC(sd) &&
1708 	    (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
1709 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
1710 			meta_invalidate_name(nlp->namep);
1711 		}
1712 	}
1713 	return (rval);
1714 }
1715 
1716 /*
1717  * deletelist_length
1718  *
1719  *	return the number of slices that have been specified for deletion
1720  *	on the metadb command line.  This does not calculate the number
1721  *	of replicas because there may be multiple replicas per slice.
1722  */
1723 static int
1724 deletelist_length(mdnamelist_t *db_nlp)
1725 {
1726 
1727 	mdnamelist_t		*nlp;
1728 	int			list_length = 0;
1729 
1730 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1731 		list_length++;
1732 	}
1733 
1734 	return (list_length);
1735 }
1736 
1737 static int
1738 in_deletelist(char *devname, mdnamelist_t *db_nlp)
1739 {
1740 
1741 	mdnamelist_t		*nlp;
1742 	mdname_t		*np;
1743 	int			index = 0;
1744 
1745 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1746 		np = nlp->namep;
1747 
1748 		if (strcmp(devname, np->bname) == 0)
1749 			return (index);
1750 		index++;
1751 	}
1752 
1753 	return (-1);
1754 }
1755 
1756 /*
1757  * Delete replicas from set.  This happens as a result of:
1758  *	- metadb [-s set_name] -d
1759  *	- metaset -s set_name -a disk	(causes a rebalance of mddbs)
1760  *	- metaset -s set_name -d disk
1761  *	- metaset -s set_name -b
1762  *
1763  * For a local set, this routine is run on the local set host.
1764  *
1765  * For a traditional diskset, this routine is run on the node that
1766  * is running the metaset command.
1767  *
1768  * For a multinode diskset, this routine is run by the node that is
1769  * running the metaset command.  This detach routine is sent to all
1770  * of the joined nodes in the diskset using commd.  This keeps
1771  * the nodes in-sync.
1772  */
1773 int
1774 meta_db_detach(
1775 	mdsetname_t		*sp,
1776 	mdnamelist_t		*db_nlp,
1777 	mdforceopts_t		force_option,
1778 	char			*sysfilename,
1779 	md_error_t		*ep
1780 )
1781 {
1782 	struct mddb_config	c;
1783 	mdnamelist_t		*nlp;
1784 	mdname_t		*np;
1785 	md_drive_desc		*dd = NULL;
1786 	md_drive_desc		*p;
1787 	int			replicacount;
1788 	int			replica_delete_count;
1789 	int			nr_replica_slices;
1790 	int			i;
1791 	int			stop_svmdaemons = 0;
1792 	int			rval = 0;
1793 	int			index;
1794 	int			valid_replicas_nottodelete = 0;
1795 	int			invalid_replicas_nottodelete = 0;
1796 	int			invalid_replicas_todelete = 0;
1797 	int			errored = 0;
1798 	int			*tag_array;
1799 	int			fd = -1;
1800 	md_error_t		status = mdnullerror;
1801 	md_set_desc		*sd;
1802 	int			stale_bool = FALSE;
1803 	int			flags;
1804 
1805 	/*
1806 	 * Error if we don't get some work to do.
1807 	 */
1808 	if (db_nlp == NULL)
1809 		return (mdsyserror(ep, EINVAL, NULL));
1810 
1811 	if (mdnamesareunique(db_nlp, ep) != 0)
1812 		return (-1);
1813 
1814 	(void) memset(&c, 0, sizeof (c));
1815 	c.c_id = 0;
1816 	c.c_setno = sp->setno;
1817 
1818 	/* Don't need device id information from this ioctl */
1819 	c.c_locator.l_devid = (uint64_t)0;
1820 	c.c_locator.l_devid_flags = 0;
1821 
1822 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
1823 		return (mdstealerror(ep, &c.c_mde));
1824 
1825 	/*
1826 	 * Is current set STALE?
1827 	 */
1828 	if (c.c_flags & MDDB_C_STALE) {
1829 		stale_bool = TRUE;
1830 	}
1831 
1832 	replicacount = c.c_dbcnt;
1833 
1834 	assert(db_nlp != NULL);
1835 
1836 	/*
1837 	 * go through and gather how many data bases are on each
1838 	 * device specified.
1839 	 */
1840 
1841 	nr_replica_slices = deletelist_length(db_nlp);
1842 	tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
1843 
1844 	replica_delete_count = 0;
1845 	for (i = 0; i < replicacount; i++) {
1846 		char	*devname;
1847 		int	found = 0;
1848 
1849 		c.c_id = i;
1850 
1851 		/* Don't need device id information from this ioctl */
1852 		c.c_locator.l_devid = (uint64_t)0;
1853 		c.c_locator.l_devid_flags = 0;
1854 
1855 		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
1856 			return (mdstealerror(ep, &c.c_mde));
1857 
1858 		devname = splicename(&c.c_devname);
1859 
1860 		if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
1861 			Free(devname);
1862 			devname = getlongname(&c, ep);
1863 			if (devname == NULL) {
1864 				return (-1);
1865 			}
1866 		}
1867 
1868 		if ((index = in_deletelist(devname, db_nlp)) != -1) {
1869 			found = 1;
1870 			tag_array[index] = 1;
1871 			replica_delete_count++;
1872 		}
1873 
1874 		errored = c.c_locator.l_flags & (MDDB_F_EREAD |
1875 		    MDDB_F_EWRITE | MDDB_F_TOOSMALL | MDDB_F_EFMT |
1876 		    MDDB_F_EDATA | MDDB_F_EMASTER);
1877 
1878 		/*
1879 		 * There are four combinations of "errored" and "found"
1880 		 * and they are used to find the number of
1881 		 * (a) valid/invalid replicas that are not in the delete
1882 		 * list and are available in the system.
1883 		 * (b) valid/invalid replicas that are to be deleted.
1884 		 */
1885 
1886 		if (errored && !found)		/* errored and !found */
1887 			invalid_replicas_nottodelete++;
1888 		else if (!found)		/* !errored and !found */
1889 			valid_replicas_nottodelete++;
1890 		else if (errored)		/* errored and found */
1891 			invalid_replicas_todelete++;
1892 		/*
1893 		 * else it is !errored and found. This means
1894 		 * valid_replicas_todelete++; But this variable will not
1895 		 * be used anywhere
1896 		 */
1897 
1898 		Free(devname);
1899 	}
1900 
1901 	index = 0;
1902 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1903 		np = nlp->namep;
1904 		if (tag_array[index++] != 1) {
1905 			Free(tag_array);
1906 			return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
1907 		}
1908 	}
1909 
1910 	Free(tag_array);
1911 
1912 
1913 	/* if all replicas are deleted stop mdmonitord */
1914 	if ((replicacount - replica_delete_count) == 0)
1915 		stop_svmdaemons = 1;
1916 
1917 	if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
1918 		if (force_option & MDFORCE_NONE)
1919 			return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
1920 		if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
1921 			return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
1922 	}
1923 
1924 	/*
1925 	 * The following algorithms are followed to check for deletion:
1926 	 * (a) If the delete list(db_nlp) has all invalid replicas and no valid
1927 	 * replicas, then deletion should be allowed.
1928 	 * (b) Deletion should be allowed only if valid replicas that are "not"
1929 	 * to be deleted is always greater than the invalid replicas that
1930 	 * are "not" to be deleted.
1931 	 * (c) If the user uses -f option, then deletion should be allowed.
1932 	 */
1933 
1934 	if ((invalid_replicas_todelete != replica_delete_count) &&
1935 	    (invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
1936 	    (force_option != MDFORCE_LOCAL))
1937 		return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
1938 
1939 	/*
1940 	 * go through and tell kernel to delete them
1941 	 */
1942 
1943 	/* Don't need device id information from this ioctl */
1944 	c.c_locator.l_devid = (uint64_t)0;
1945 	c.c_locator.l_devid_flags = 0;
1946 
1947 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
1948 		return (mdstealerror(ep, &c.c_mde));
1949 
1950 	if (! metaislocalset(sp)) {
1951 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
1952 		if (! mdisok(ep))
1953 			return (-1);
1954 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1955 			return (-1);
1956 	}
1957 
1958 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1959 		np = nlp->namep;
1960 
1961 		/*
1962 		 * If mddb is being deleted from MN diskset and node is
1963 		 * an owner of the diskset then use rpc.mdcommd
1964 		 * mechanism to add mddb(s) so that all nodes stay in sync.
1965 		 * If set is stale, don't log the message since rpc.mdcommd
1966 		 * can't write the message to the mddb.
1967 		 *
1968 		 * When mddbs are first being added to set, a detach can
1969 		 * be called before any node has joined the diskset, so
1970 		 * must check to see if node is an owner of the diskset.
1971 		 *
1972 		 * Otherwise, just delete mddb from this node.
1973 		 */
1974 
1975 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
1976 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
1977 			md_mn_result_t			*resultp;
1978 			md_mn_msg_meta_db_detach_t	detach;
1979 			int				send_rval;
1980 
1981 			/*
1982 			 * The following algorithm is used to detach replicas.
1983 			 * 	- META_DB_DETACH message generates submsgs
1984 			 * 		- BLOCK parse (master)
1985 			 * 		- MDDB_DETACH replicas
1986 			 * 		- UNBLOCK parse (master) causing parse
1987 			 *		information to be sent from master
1988 			 *		to slaves at a higher class than the
1989 			 *		unblock so the parse message will
1990 			 *		reach slaves before unblock message.
1991 			 */
1992 			(void) splitname(np->bname, &detach.msg_splitname);
1993 
1994 			/* Set devid to NULL until devids are supported */
1995 			detach.msg_devid[0] = NULL;
1996 
1997 			/*
1998 			 * If reconfig cycle has been started, this node is
1999 			 * stuck in in the return step until this command has
2000 			 * completed.  If mdcommd is suspended, ask
2001 			 * send_message to fail (instead of retrying)
2002 			 * so that metaset can finish allowing the reconfig
2003 			 * cycle to proceed.
2004 			 */
2005 			flags = MD_MSGF_FAIL_ON_SUSPEND;
2006 			if (stale_bool == TRUE)
2007 				flags |= MD_MSGF_NO_LOG;
2008 			send_rval = mdmn_send_message(sp->setno,
2009 			    MD_MN_MSG_META_DB_DETACH,
2010 			    flags, (char *)&detach,
2011 			    sizeof (md_mn_msg_meta_db_detach_t),
2012 			    &resultp, ep);
2013 			if (send_rval != 0) {
2014 				rval = -1;
2015 				if (resultp == NULL)
2016 					(void) mddserror(ep,
2017 					    MDE_DS_COMMD_SEND_FAIL,
2018 					    sp->setno, NULL, NULL,
2019 					    sp->setname);
2020 				else {
2021 					(void) mdstealerror(ep,
2022 					    &(resultp->mmr_ep));
2023 					if (mdisok(ep)) {
2024 						(void) mddserror(ep,
2025 						    MDE_DS_COMMD_SEND_FAIL,
2026 						    sp->setno, NULL, NULL,
2027 						    sp->setname);
2028 					}
2029 					free_result(resultp);
2030 				}
2031 				goto out;
2032 			}
2033 			if (resultp)
2034 				free_result(resultp);
2035 		} else {
2036 			i = 0;
2037 			while (i < c.c_dbcnt) {
2038 				char	*devname;
2039 
2040 				c.c_id = i;
2041 
2042 				/* Don't need devid info from this ioctl */
2043 				c.c_locator.l_devid = (uint64_t)0;
2044 				c.c_locator.l_devid_flags = 0;
2045 
2046 				if (metaioctl(MD_DB_GETDEV, &c,
2047 				    &c.c_mde, NULL)) {
2048 					rval = mdstealerror(ep, &c.c_mde);
2049 					goto out;
2050 				}
2051 
2052 				devname = splicename(&c.c_devname);
2053 
2054 				if (strstr(devname, META_LONGDISKNAME_STR)
2055 				    != NULL) {
2056 					Free(devname);
2057 					devname = getlongname(&c, ep);
2058 					if (devname == NULL) {
2059 						return (-1);
2060 					}
2061 				}
2062 
2063 				if (strcmp(devname, np->bname) != 0) {
2064 					Free(devname);
2065 					i++;
2066 					continue;
2067 				}
2068 				Free(devname);
2069 
2070 				/* Don't need devid info from this ioctl */
2071 				c.c_locator.l_devid = (uint64_t)0;
2072 				c.c_locator.l_devid_flags = 0;
2073 
2074 				if (metaioctl(MD_DB_DELDEV, &c,
2075 				    &c.c_mde, NULL) != 0) {
2076 					rval = mdstealerror(ep, &c.c_mde);
2077 					goto out;
2078 				}
2079 
2080 				/* Not incrementing "i" intentionally */
2081 			}
2082 		}
2083 		if (! metaislocalset(sp)) {
2084 			/* update the dbcnt and size in dd */
2085 			for (p = dd; p != NULL; p = p->dd_next) {
2086 				if (p->dd_dnp == np->drivenamep) {
2087 					p->dd_dbcnt = 0;
2088 					p->dd_dbsize  = 0;
2089 					break;
2090 				}
2091 			}
2092 
2093 			/*
2094 			 * Slam a dummy master block and make it self
2095 			 * identifying
2096 			 */
2097 			if ((fd = open(np->rname, O_RDWR)) >= 0) {
2098 				meta_mkdummymaster(sp, fd, 16);
2099 				(void) close(fd);
2100 			}
2101 		}
2102 	}
2103 out:
2104 	if (metaislocalset(sp)) {
2105 		/*
2106 		 * Stop all the daemons if there are
2107 		 * no more replicas so that the module can be
2108 		 * unloaded.
2109 		 */
2110 		if (rval == 0 && stop_svmdaemons == 1) {
2111 			char buf[MAXPATHLEN];
2112 			int i;
2113 
2114 			for (i = 0; i < DAEMON_COUNT; i++) {
2115 				(void) snprintf(buf, MAXPATHLEN,
2116 				    "/usr/bin/pkill -%s -x %s",
2117 				    svmd_kill_list[i].svmd_kill_val,
2118 				    svmd_kill_list[i].svmd_name);
2119 				if (pclose(popen(buf, "w")) == -1)
2120 					md_perror(buf);
2121 			}
2122 
2123 			if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
2124 				mde_perror(&status, "");
2125 				mdclrerror(&status);
2126 			}
2127 		}
2128 		if (buildconf(sp, &status)) {
2129 			/* Don't mask any previous errors */
2130 			if (rval == 0)
2131 				rval = mdstealerror(ep, &status);
2132 			else
2133 				mdclrerror(&status);
2134 			return (rval);
2135 		}
2136 
2137 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
2138 			/* Don't mask any previous errors */
2139 			if (rval == 0)
2140 				rval = mdstealerror(ep, &status);
2141 			else
2142 				mdclrerror(&status);
2143 		}
2144 	} else {
2145 		if (update_dbinfo_on_drives(sp, dd,
2146 		    (force_option & MDFORCE_SET_LOCKED),
2147 		    ((force_option & MDFORCE_LOCAL) |
2148 		    (force_option & MDFORCE_DS)), &status)) {
2149 			/* Don't mask any previous errors */
2150 			if (rval == 0)
2151 				rval = mdstealerror(ep, &status);
2152 			else
2153 				mdclrerror(&status);
2154 		}
2155 		metafreedrivedesc(&dd);
2156 	}
2157 	if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
2158 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
2159 			meta_invalidate_name(nlp->namep);
2160 		}
2161 	}
2162 	return (rval);
2163 }
2164 
2165 static md_replica_t *
2166 metareplicaname(
2167 	mdsetname_t		*sp,
2168 	int			flags,
2169 	struct mddb_config	*c,
2170 	md_error_t		*ep
2171 )
2172 {
2173 	md_replica_t	*rp;
2174 	char		*devname;
2175 	size_t		sz;
2176 	devid_nmlist_t	*disklist = NULL;
2177 	char		*devid_str;
2178 
2179 	/* allocate replicaname */
2180 	rp = Zalloc(sizeof (*rp));
2181 
2182 	/* get device name */
2183 	devname = splicename(&c->c_devname);
2184 
2185 	/*
2186 	 * Check if the device has a long name (>40 characters) and
2187 	 * if so then we have to use devids to get the device name.
2188 	 * If this cannot be done then we have to fail the request.
2189 	 */
2190 	if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
2191 		if (c->c_locator.l_devid != NULL) {
2192 			if (meta_deviceid_to_nmlist("/dev/dsk",
2193 			    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
2194 			    c->c_locator.l_minor_name, &disklist) != 0) {
2195 				devid_str = devid_str_encode(
2196 				    (ddi_devid_t)(uintptr_t)
2197 				    c->c_locator.l_devid, NULL);
2198 				(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
2199 				mderrorextra(ep, devid_str);
2200 				if (devid_str != NULL)
2201 					devid_str_free(devid_str);
2202 				Free(rp);
2203 				Free(devname);
2204 				return (NULL);
2205 			}
2206 		} else {
2207 			(void) mderror(ep, MDE_NODEVID, "");
2208 			Free(rp);
2209 			Free(devname);
2210 			return (NULL);
2211 		}
2212 		Free(devname);
2213 		devname = disklist[0].devname;
2214 	}
2215 
2216 	if (flags & PRINT_FAST) {
2217 		if ((rp->r_namep = metaname_fast(&sp, devname,
2218 		    LOGICAL_DEVICE, ep)) == NULL) {
2219 			Free(devname);
2220 			Free(rp);
2221 			return (NULL);
2222 		}
2223 	} else {
2224 		if ((rp->r_namep = metaname(&sp, devname,
2225 		    LOGICAL_DEVICE, ep)) == NULL) {
2226 			Free(devname);
2227 			Free(rp);
2228 			return (NULL);
2229 		}
2230 	}
2231 	Free(devname);
2232 
2233 	/* make sure it's OK */
2234 	if ((! (flags & MD_BASICNAME_OK)) &&
2235 	    (metachkcomp(rp->r_namep, ep) != 0)) {
2236 		Free(rp);
2237 		return (NULL);
2238 	}
2239 
2240 	rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR;
2241 	rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR;
2242 	rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
2243 	if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
2244 		sz = devid_sizeof((ddi_devid_t)(uintptr_t)
2245 		    (c->c_locator.l_devid));
2246 		if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
2247 		    (ddi_devid_t)NULL) {
2248 			Free(rp);
2249 			return (NULL);
2250 		}
2251 		(void) memcpy((void *)rp->r_devid,
2252 		    (void *)(uintptr_t)c->c_locator.l_devid, sz);
2253 		(void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
2254 		rp->r_flags &= ~MDDB_F_NODEVID;
2255 		/* Overwrite dev derived from name with dev from devid */
2256 		rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
2257 	}
2258 	(void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
2259 
2260 	rp->r_blkno = c->c_locator.l_blkno;
2261 	if (c->c_dbend != 0)
2262 		rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
2263 
2264 	/* return replica */
2265 	return (rp);
2266 }
2267 
2268 /*
2269  * free replica list
2270  */
2271 void
2272 metafreereplicalist(
2273 	md_replicalist_t	*rlp
2274 )
2275 {
2276 	md_replicalist_t	*rl = NULL;
2277 
2278 	for (/* void */; (rlp != NULL); rlp = rl) {
2279 		rl = rlp->rl_next;
2280 		if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
2281 			free(rlp->rl_repp->r_devid);
2282 		}
2283 		Free(rlp->rl_repp);
2284 		Free(rlp);
2285 	}
2286 }
2287 
2288 /*
2289  * return list of all replicas in set
2290  */
2291 int
2292 metareplicalist(
2293 	mdsetname_t		*sp,
2294 	int			flags,
2295 	md_replicalist_t	**rlpp,
2296 	md_error_t		*ep
2297 )
2298 {
2299 	md_replicalist_t	**tail = rlpp;
2300 	int			count = 0;
2301 	struct mddb_config	c;
2302 	int			i;
2303 	char			*devid;
2304 
2305 	/* for each replica */
2306 	i = 0;
2307 	do {
2308 		md_replica_t	*rp;
2309 
2310 		/* get next replica */
2311 		(void) memset(&c, 0, sizeof (c));
2312 		c.c_id = i;
2313 		c.c_setno = sp->setno;
2314 
2315 		c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
2316 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
2317 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
2318 				mdclrerror(&c.c_mde);
2319 				break;	/* handle none at all */
2320 			}
2321 			(void) mdstealerror(ep, &c.c_mde);
2322 			goto out;
2323 		}
2324 
2325 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
2326 			if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
2327 				(void) mdsyserror(ep, ENOMEM, META_DBCONF);
2328 				goto out;
2329 			}
2330 			c.c_locator.l_devid = (uintptr_t)devid;
2331 			/*
2332 			 * Turn on space and sz flags since 'sz' amount of
2333 			 * space has been alloc'd.
2334 			 */
2335 			c.c_locator.l_devid_flags =
2336 			    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
2337 		}
2338 
2339 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
2340 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
2341 				mdclrerror(&c.c_mde);
2342 				break;	/* handle none at all */
2343 			}
2344 			(void) mdstealerror(ep, &c.c_mde);
2345 			goto out;
2346 		}
2347 
2348 		/*
2349 		 * Paranoid check - shouldn't happen, but is left as
2350 		 * a place holder for changes that will be needed after
2351 		 * dynamic reconfiguration changes are added to SVM (to
2352 		 * support movement of disks at any point in time).
2353 		 */
2354 		if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
2355 			(void) fprintf(stderr,
2356 			    dgettext(TEXT_DOMAIN,
2357 			    "Error: Relocation Information "
2358 			    "(drvnm=%s, mnum=0x%lx) \n"
2359 			    "relocation information size changed - \n"
2360 			    "rerun command\n"),
2361 			    c.c_locator.l_driver, c.c_locator.l_mnum);
2362 			(void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
2363 			goto out;
2364 		}
2365 
2366 		if (c.c_dbcnt == 0)
2367 			break;		/* handle none at all */
2368 
2369 		/* get info */
2370 		if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
2371 			goto out;
2372 
2373 		/* append to list */
2374 		*tail = Zalloc(sizeof (**tail));
2375 		(*tail)->rl_repp = rp;
2376 		tail = &(*tail)->rl_next;
2377 		++count;
2378 
2379 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
2380 			free(devid);
2381 			c.c_locator.l_devid_flags = 0;
2382 		}
2383 
2384 	} while (++i < c.c_dbcnt);
2385 
2386 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
2387 		free(devid);
2388 	}
2389 
2390 	/* return count */
2391 	return (count);
2392 
2393 	/* cleanup, return error */
2394 out:
2395 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
2396 		free(devid);
2397 	}
2398 	metafreereplicalist(*rlpp);
2399 	*rlpp = NULL;
2400 	return (-1);
2401 }
2402 
2403 /*
2404  * meta_sync_db_locations - get list of replicas from kernel and write
2405  * 	out to mddb.cf and md.conf.  'Syncs up' the replica list in
2406  * 	the kernel with the replica list in the conf files.
2407  *
2408  */
2409 void
2410 meta_sync_db_locations(
2411 	mdsetname_t	*sp,
2412 	md_error_t	*ep
2413 )
2414 {
2415 	char		*sname = 0;		/* system file name */
2416 	char 		*cname = 0;		/* config file name */
2417 
2418 	if (!metaislocalset(sp))
2419 		return;
2420 
2421 	/* Updates backup of configuration file (aka mddb.cf) */
2422 	if (buildconf(sp, ep) != 0)
2423 		return;
2424 
2425 	/* Updates system configuration file (aka md.conf) */
2426 	(void) meta_db_patch(sname, cname, 0, ep);
2427 }
2428 
2429 /*
2430  * setup_db_locations - parse the mddb.cf file and
2431  *			tells the driver which db locations to use.
2432  */
2433 int
2434 meta_setup_db_locations(
2435 	md_error_t	*ep
2436 )
2437 {
2438 	mddb_config_t	c;
2439 	FILE		*fp;
2440 	char		inbuff[1024];
2441 	char		*buff;
2442 	uint_t		i;
2443 	size_t		sz;
2444 	int		rval = 0;
2445 	char		*devidp;
2446 	uint_t		devid_size;
2447 	char		*minor_name = NULL;
2448 	ddi_devid_t	devid_decode;
2449 	int		checksum;
2450 
2451 	/* do mddb.cf file */
2452 	(void) memset(&c, '\0', sizeof (c));
2453 	if ((fp = fopen(META_DBCONF, "r")) == NULL) {
2454 		if (errno != ENOENT)
2455 			return (mdsyserror(ep, errno, META_DBCONF));
2456 	}
2457 	while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
2458 	    fp)) != NULL)) {
2459 
2460 		/* ignore comments */
2461 		if (*buff == '#')
2462 			continue;
2463 
2464 		/* parse locator */
2465 		(void) memset(&c, 0, sizeof (c));
2466 		c.c_setno = MD_LOCAL_SET;
2467 		i = strcspn(buff, " \t");
2468 		if (i > sizeof (c.c_locator.l_driver))
2469 			i = sizeof (c.c_locator.l_driver);
2470 		(void) strncpy(c.c_locator.l_driver, buff, i);
2471 		buff += i;
2472 		c.c_locator.l_dev =
2473 		    makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
2474 		c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
2475 		c.c_locator.l_mnum = minor(c.c_locator.l_dev);
2476 
2477 		/* parse out devid */
2478 		while (isspace((int)(*buff)))
2479 			buff += 1;
2480 		i = strcspn(buff, " \t");
2481 		if ((devidp = (char *)malloc(i+1)) == NULL)
2482 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
2483 
2484 		(void) strncpy(devidp, buff, i);
2485 		devidp[i] = '\0';
2486 		if (devid_str_decode(devidp, &devid_decode,
2487 		    &minor_name) == -1) {
2488 			free(devidp);
2489 			continue;
2490 		}
2491 
2492 		/* Conf file must have minor name associated with devid */
2493 		if (minor_name == NULL) {
2494 			free(devidp);
2495 			devid_free(devid_decode);
2496 			continue;
2497 		}
2498 
2499 		sz = devid_sizeof(devid_decode);
2500 		/* Copy to devid size buffer that ioctl expects */
2501 		if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
2502 			devid_free(devid_decode);
2503 			free(minor_name);
2504 			free(devidp);
2505 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
2506 		}
2507 
2508 		(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
2509 		    (void *)devid_decode, sz);
2510 
2511 		devid_free(devid_decode);
2512 
2513 		if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
2514 			free(minor_name);
2515 			free(devidp);
2516 			free((void *)(uintptr_t)c.c_locator.l_devid);
2517 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
2518 		}
2519 		(void) strcpy(c.c_locator.l_minor_name, minor_name);
2520 		free(minor_name);
2521 		c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
2522 		    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
2523 		c.c_locator.l_devid_sz = sz;
2524 
2525 		devid_size = strlen(devidp);
2526 		buff += devid_size;
2527 
2528 		checksum = strtol(buff, &buff, 10);
2529 		for (i = 0; c.c_locator.l_driver[i] != 0; i++)
2530 			checksum += c.c_locator.l_driver[i];
2531 		for (i = 0; i < devid_size; i++) {
2532 			checksum += devidp[i];
2533 		}
2534 		free(devidp);
2535 
2536 		checksum += minor(c.c_locator.l_dev);
2537 		checksum += c.c_locator.l_blkno;
2538 		if (checksum != 42) {
2539 			/* overwritten later for more serious problems */
2540 			rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
2541 			free((void *)(uintptr_t)c.c_locator.l_devid);
2542 			continue;
2543 		}
2544 		c.c_locator.l_flags = 0;
2545 
2546 		/* use db location */
2547 		if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
2548 			free((void *)(uintptr_t)c.c_locator.l_devid);
2549 			return (mdstealerror(ep, &c.c_mde));
2550 		}
2551 
2552 		/* free up devid if in use */
2553 		free((void *)(uintptr_t)c.c_locator.l_devid);
2554 		c.c_locator.l_devid = (uint64_t)0;
2555 		c.c_locator.l_devid_flags = 0;
2556 	}
2557 	if ((fp) && (fclose(fp) != 0))
2558 		return (mdsyserror(ep, errno, META_DBCONF));
2559 
2560 	/* check for stale database */
2561 	(void) memset((char *)&c, 0, sizeof (struct mddb_config));
2562 	c.c_id = 0;
2563 	c.c_setno = MD_LOCAL_SET;
2564 
2565 	/*
2566 	 * While we do not need the devid here we may need to
2567 	 * know if devid's are being used by the kernel for
2568 	 * the replicas. This is because under some circumstances
2569 	 * we can only manipulate the SVM configuration if the
2570 	 * kernel is using devid's.
2571 	 */
2572 	c.c_locator.l_devid = (uint64_t)0;
2573 	c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
2574 	c.c_locator.l_devid_sz = 0;
2575 
2576 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
2577 		if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
2578 			return (mdstealerror(ep, &c.c_mde));
2579 		mdclrerror(&c.c_mde);
2580 	}
2581 
2582 	if (c.c_flags & MDDB_C_STALE)
2583 		return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
2584 		    0, NULL));
2585 
2586 	if (c.c_locator.l_devid_sz != 0) {
2587 		/*
2588 		 * Devid's are being used to track the replicas because
2589 		 * there is space for a devid.
2590 		 */
2591 		devid_in_use = TRUE;
2592 	}
2593 
2594 	/* success */
2595 	return (rval);
2596 }
2597 
2598 /*
2599  * meta_db_minreplica - returns the minimum size replica currently in use.
2600  */
2601 daddr_t
2602 meta_db_minreplica(
2603 	mdsetname_t	*sp,
2604 	md_error_t	*ep
2605 )
2606 {
2607 	md_replica_t		*r;
2608 	md_replicalist_t	*rl, *rlp = NULL;
2609 	daddr_t			nblks = 0;
2610 
2611 	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
2612 		return (-1);
2613 
2614 	if (rlp == NULL)
2615 		return (-1);
2616 
2617 	/* find the smallest existing replica */
2618 	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
2619 		r = rl->rl_repp;
2620 		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
2621 	}
2622 
2623 	metafreereplicalist(rlp);
2624 	return (nblks);
2625 }
2626 
2627 /*
2628  * meta_get_replica_names
2629  *  returns an mdnamelist_t of replica slices
2630  */
2631 /*ARGSUSED*/
2632 int
2633 meta_get_replica_names(
2634 	mdsetname_t	*sp,
2635 	mdnamelist_t	**nlpp,
2636 	int		options,
2637 	md_error_t	*ep
2638 )
2639 {
2640 	md_replicalist_t	*rlp = NULL;
2641 	md_replicalist_t	*rl;
2642 	mdnamelist_t		**tailpp = nlpp;
2643 	int			cnt = 0;
2644 
2645 	assert(nlpp != NULL);
2646 
2647 	if (!metaislocalset(sp))
2648 		goto out;
2649 
2650 	/* get replicas */
2651 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
2652 		cnt = -1;
2653 		goto out;
2654 	}
2655 
2656 	/* build name list */
2657 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
2658 		/*
2659 		 * Add the name struct to the end of the
2660 		 * namelist but keep a pointer to the last
2661 		 * element so that we don't incur the overhead
2662 		 * of traversing the list each time
2663 		 */
2664 		tailpp = meta_namelist_append_wrapper(
2665 		    tailpp, rl->rl_repp->r_namep);
2666 		++cnt;
2667 	}
2668 
2669 	/* cleanup, return count or error */
2670 out:
2671 	metafreereplicalist(rlp);
2672 	return (cnt);
2673 }
2674