xref: /titanic_41/usr/src/cmd/lvm/util/metaset.c (revision 144dfaa9a648eea321858b34d4941d2268130176)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Metadevice diskset utility.
30  */
31 
32 #include <meta.h>
33 #include <sys/lvm/md_mddb.h>
34 #include <sdssc.h>
35 
36 enum metaset_cmd {
37 	notspecified,
38 	add,
39 	balance,
40 	delete,
41 	cluster,
42 	isowner,
43 	purge,
44 	query,
45 	release,
46 	take,
47 	join,			/* Join a multinode diskset */
48 	withdraw		/* Withdraw from a multinode diskset */
49 };
50 
51 enum cluster_cmd {
52 	ccnotspecified,
53 	clusterversion,		/* Return the version of the cluster I/F */
54 	clusterdisksin,		/* List disks in a given diskset */
55 	clustertake,		/* back door for Cluster take */
56 	clusterrelease,		/* ditto */
57 	clusterpurge,		/* back door for Cluster purge */
58 	clusterproxy		/* proxy the args after '--' to primary */
59 };
60 
61 static void
62 usage(
63 	mdsetname_t	*sp,
64 	char		*string)
65 {
66 	if ((string != NULL) && (*string != '\0'))
67 		md_eprintf("%s\n", string);
68 	(void) fprintf(stderr, gettext(
69 "usage:	%s -s setname -a [-A enable | disable] -h hostname ...\n"
70 "	%s -s setname -a [-M] -h hostname ...\n"
71 "	%s -s setname -a [-M] [-l length] [-L] drivename ...\n"
72 "	%s -s setname -d [-M] -h hostname ...\n"
73 "	%s -s setname -d [-M] -f -h all-hostnames\n"
74 "	%s -s setname -d [-M] [-f] drivename ...\n"
75 "	%s -s setname -d [-M] [-f] hostname ...\n"
76 "	%s -s setname -A enable | disable\n"
77 "	%s -s setname -t [-f]\n"
78 "	%s -s setname -r\n"
79 "	%s [-s setname] -j [-M]\n"
80 "	%s [-s setname] -w [-M]\n"
81 "	%s -s setname -P [-M]\n"
82 "	%s -s setname -b [-M]\n"
83 "	%s -s setname -o [-M] [-h hostname]\n"
84 "	%s [-s setname]\n"
85 "\n"
86 "		hostname = contents of /etc/nodename\n"
87 "		drivename = cNtNdN no slice\n"
88 "		[-M] for multi-owner set is optional except on set creation\n"),
89 	myname, myname, myname, myname, myname, myname, myname, myname,
90 	myname, myname, myname, myname, myname, myname, myname, myname);
91 	md_exit(sp, (string == NULL) ? 0 : 1);
92 }
93 
94 /*
95  * The svm.sync rc script relies heavily on the metaset output.
96  * Any changes to the metaset output MUST verify that the rc script
97  * does not break. Not doing so may potentially leave the system
98  * unusable. You have been WARNED.
99  */
100 static int
101 printset(mdsetname_t *sp, md_error_t *ep)
102 {
103 	int			i, j;
104 	md_set_desc		*sd;
105 	md_drive_desc		*dd, *p;
106 	int			max_meds;
107 	md_mnnode_desc		*nd;
108 
109 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
110 		return (-1);
111 
112 	/*
113 	 * Only get set owner information for traditional diskset.
114 	 * This set owner information is stored in the node records
115 	 * for a MN diskset.
116 	 */
117 	if (!(MD_MNSET_DESC(sd))) {
118 		if (metaget_setownership(sp, ep) == -1)
119 			return (-1);
120 	}
121 
122 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
123 	    ep)) == NULL) && !mdisok(ep))
124 		return (-1);
125 
126 	if (MD_MNSET_DESC(sd)) {
127 		(void) printf(gettext(
128 		"\nMulti-owner Set name = %s, Set number = %d, Master = %s\n"),
129 		    sp->setname, sp->setno, sd->sd_mn_master_nodenm);
130 		if ((sd->sd_mn_master_nodeid == MD_MN_INVALID_NID) &&
131 		    (dd != NULL)) {
132 			(void) printf(gettext(
133 				"Master and owner information unavailable "
134 				"until joined (metaset -j)\n"));
135 		}
136 	} else {
137 		(void) printf(gettext(
138 		    "\nSet name = %s, Set number = %d\n"),
139 		    sp->setname, sp->setno);
140 	}
141 
142 	if (MD_MNSET_DESC(sd)) {
143 		(void) printf(gettext("\n%-19.19s %-14.14s %-6.6s\n"),
144 			gettext("Host"), gettext("Owner"), gettext("Member"));
145 		nd = sd->sd_nodelist;
146 		while (nd) {
147 			/*
148 			 * Don't print nodes that aren't ok since they may be
149 			 * removed from config during a reconfig cycle.  If a
150 			 * node was being added to a diskset and the entire
151 			 * cluster went down but the node being added was unable
152 			 * to reboot, there's no way to know if that node had
153 			 * its own node record set to OK or not.  So, node
154 			 * record is left in ADD state during reconfig cycle.
155 			 * When that node reboots and returns to the cluster,
156 			 * the reconfig cycle will either remove the node
157 			 * record (if not marked OK on that node) or will mark
158 			 * it OK on all nodes.
159 			 * It is very important to only remove a node record
160 			 * from the other nodes when that node record is not
161 			 * marked OK on its own node - otherwise, different
162 			 * nodes would have different nodelists possibly
163 			 * causing different nodes to to choose different
164 			 * masters.
165 			 */
166 			if (!(nd->nd_flags & MD_MN_NODE_OK)) {
167 				nd = nd->nd_next;
168 				continue;
169 			}
170 			if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
171 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
172 				(void) printf(
173 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
174 				    nd->nd_nodename, gettext("multi-owner"),
175 				    gettext("Yes"));
176 			} else /* Should never be able to happen */
177 			    if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
178 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
179 				(void) printf(
180 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
181 				    nd->nd_nodename, gettext("multi-owner"),
182 				    gettext("No"));
183 			} else if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
184 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
185 				(void) printf(
186 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
187 				    nd->nd_nodename, gettext(""),
188 				    gettext("Yes"));
189 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
190 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
191 				(void) printf(
192 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
193 				    nd->nd_nodename, gettext(""),
194 				    gettext("No"));
195 			}
196 			nd = nd->nd_next;
197 		}
198 	} else {
199 		(void) printf("\n%-19.19s %-5.5s\n",
200 			gettext("Host"), gettext("Owner"));
201 		for (i = 0; i < MD_MAXSIDES; i++) {
202 			/* Skip empty slots */
203 			if (sd->sd_nodes[i][0] == '\0')
204 				continue;
205 
206 			/*
207 			 * Standard hostname field is 17 bytes but metaset will
208 			 * display up to MD_MAX_NODENAME, def in meta_basic.h
209 			 */
210 			(void) printf("  %-17.*s  %s\n", MD_MAX_NODENAME,
211 			    sd->sd_nodes[i], (sd->sd_flags & MD_SR_AUTO_TAKE ?
212 				(sd->sd_isown[i] ? gettext("Yes (auto)") :
213 				    gettext("No (auto)"))
214 				: (sd->sd_isown[i] ? gettext("Yes") : "")));
215 		}
216 	}
217 
218 	if (sd->sd_med.n_cnt > 0)
219 		(void) printf("\n%-19.19s %-7.7s\n",
220 		    gettext("Mediator Host(s)"), gettext("Aliases"));
221 
222 	if ((max_meds = get_max_meds(ep)) == 0)
223 		return (-1);
224 
225 	for (i = 0; i < max_meds; i++) {
226 		if (sd->sd_med.n_lst[i].a_cnt == 0)
227 			continue;
228 		(void) printf("  %-17.17s   ", sd->sd_med.n_lst[i].a_nm[0]);
229 		for (j = 1; j < sd->sd_med.n_lst[i].a_cnt; j++) {
230 			(void) printf("%s", sd->sd_med.n_lst[i].a_nm[j]);
231 			if (sd->sd_med.n_lst[i].a_cnt - j > 1)
232 				(void) printf(gettext(", "));
233 		}
234 		(void) printf("\n");
235 	}
236 
237 	if (dd) {
238 		int	len = 0;
239 
240 
241 		/*
242 		 * Building a format string on the fly that will
243 		 * be used in (f)printf. This allows the length
244 		 * of the ctd to vary from small to large without
245 		 * looking horrible.
246 		 */
247 		for (p = dd; p != NULL; p = p->dd_next)
248 			len = max(len, strlen(p->dd_dnp->cname));
249 
250 		len += 2;
251 		(void) printf("\n%-*.*s %-5.5s\n", len, len,
252 		    gettext("Drive"),
253 		    gettext("Dbase"));
254 		for (p = dd; p != NULL; p = p->dd_next) {
255 			(void) printf("\n%-*.*s %-5.5s\n", len, len,
256 			    p->dd_dnp->cname,
257 			    (p->dd_dbcnt ? gettext("Yes") :
258 			    gettext("No")));
259 		}
260 	}
261 
262 	return (0);
263 }
264 
265 static int
266 printsets(mdsetname_t *sp, md_error_t *ep)
267 {
268 	int			i;
269 	mdsetname_t		*sp1;
270 	set_t			max_sets;
271 
272 	/*
273 	 * print setname given.
274 	 */
275 	if (! metaislocalset(sp)) {
276 		if (printset(sp, ep))
277 			return (-1);
278 		return (0);
279 	}
280 
281 	if ((max_sets = get_max_sets(ep)) == 0)
282 		return (-1);
283 
284 	/*
285 	 * Print all known sets
286 	 */
287 	for (i = 1; i < max_sets; i++) {
288 		if ((sp1 = metasetnosetname(i, ep)) == NULL) {
289 			if (! mdiserror(ep, MDE_NO_SET))
290 				break;
291 			mdclrerror(ep);
292 			continue;
293 		}
294 
295 		if (printset(sp1, ep))
296 			break;
297 	}
298 	if (! mdisok(ep))
299 		return (-1);
300 
301 	return (0);
302 }
303 
304 /*
305  * Print the current versionn of the cluster contract private interface.
306  */
307 static void
308 printclusterversion()
309 {
310 	printf("%s\n", METASETIFVERSION);
311 }
312 
313 /*
314  * Print the disks that make up the given disk set. This is used
315  * exclusively by Sun Cluster and is contract private.
316  * Should never be called with sname of a Multinode diskset.
317  */
318 static int
319 printdisksin(char *sname, md_error_t *ep)
320 {
321 	mdsetname_t	*sp;
322 	md_drive_desc	*dd, *p;
323 
324 	if ((sp = metasetname(sname, ep)) == NULL) {
325 
326 		/*
327 		 * During a deletion of a set the associated service is
328 		 * put offline. The SC3.0 reservation code calls disksuite
329 		 * to find a list of disks associated with the set so that
330 		 * it can release the reservation on those disks. In this
331 		 * case there won't be any disks or even a set left. So just
332 		 * return.
333 		 */
334 		return (0);
335 	}
336 
337 	if (metaget_setownership(sp, ep) == -1)
338 		return (-1);
339 
340 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
341 	    ep)) == NULL) && !mdisok(ep))
342 		return (-1);
343 
344 	for (p = dd; p != NULL; p = p->dd_next)
345 		(void) printf("%s\n", p->dd_dnp->rname);
346 
347 	return (0);
348 }
349 
350 static void
351 parse_printset(int argc, char **argv)
352 {
353 	int		c;
354 	mdsetname_t	*sp = NULL;
355 	char		*sname = MD_LOCAL_NAME;
356 	md_error_t	status = mdnullerror;
357 	md_error_t	*ep = &status;
358 
359 	/* reset and parse args */
360 	optind = 1;
361 	opterr = 1;
362 	while ((c = getopt(argc, argv, "s:")) != -1) {
363 		switch (c) {
364 		case 's':
365 			sname = optarg;
366 			break;
367 		default:
368 			usage(sp, gettext("unknown options"));
369 		}
370 	}
371 
372 	argc -= optind;
373 	argv += optind;
374 
375 	if (argc != 0)
376 		usage(sp, gettext("too many args"));
377 
378 	if ((sp = metasetname(sname, ep)) == NULL) {
379 		mde_perror(ep, "");
380 		md_exit(sp, 1);
381 	}
382 
383 	if (printsets(sp, ep) && !mdiserror(ep, MDE_SMF_NO_SERVICE)) {
384 		mde_perror(ep, "");
385 		md_exit(sp, 1);
386 	}
387 
388 	if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) {
389 		mde_perror(ep, "");
390 		md_exit(sp, 1);
391 	}
392 
393 	md_exit(sp, 0);
394 }
395 
396 static void
397 parse_add(int argc, char **argv)
398 {
399 	int			c,
400 				created_set,
401 				hosts = FALSE,
402 				meds = FALSE,
403 				auto_take = FALSE,
404 				force_label = FALSE,
405 				default_size = TRUE;
406 	mdsetname_t		*sp = NULL;
407 	char			*sname = MD_LOCAL_NAME;
408 	md_error_t		status = mdnullerror,
409 				*ep = &status;
410 	mddrivenamelist_t	*dnlp = NULL;
411 	mddrivenamelist_t	*p;
412 	daddr_t			dbsize,
413 				nblks;
414 	mdsetname_t		*local_sp = NULL;
415 	int			multi_node = 0;
416 	md_set_desc		*sd;
417 	rval_e			sdssc_rval;
418 
419 	/* reset and parse args */
420 	optind = 1;
421 	opterr = 1;
422 	while ((c = getopt(argc, argv, "MaA:hl:Lms:")) != -1) {
423 		switch (c) {
424 		case 'M':
425 			multi_node = 1;
426 			break;
427 		case 'A':
428 			/* verified sub-option in main */
429 			if (strcmp(optarg, "enable") == 0)
430 				auto_take = TRUE;
431 			break;
432 		case 'a':
433 			break;
434 		case 'h':
435 		case 'm':
436 			if (meds == TRUE || hosts == TRUE)
437 				usage(sp, gettext(
438 				    "only one -m or -h option allowed"));
439 
440 			if (default_size == FALSE || force_label == TRUE)
441 				usage(sp, gettext(
442 				    "conflicting options"));
443 
444 			if (c == 'h')
445 				hosts = TRUE;
446 			else
447 				meds = TRUE;
448 			break;
449 		case 'l':
450 			if (hosts == TRUE || meds == TRUE)
451 				usage(sp, gettext(
452 				    "conflicting options"));
453 			if (sscanf(optarg, "%ld", &dbsize) != 1) {
454 				md_eprintf(gettext(
455 				    "%s: bad format\n"), optarg);
456 				usage(sp, "");
457 			}
458 
459 			default_size = FALSE;
460 			break;
461 		case 'L':
462 			/* Same criteria as -l */
463 			if (hosts == TRUE || meds == TRUE)
464 				usage(sp, gettext(
465 				    "conflicting options"));
466 			force_label = TRUE;
467 			break;
468 		case 's':
469 			sname = optarg;
470 			break;
471 		default:
472 			usage(sp, gettext(
473 			    "unknown options"));
474 		}
475 	}
476 
477 	/* Can only use -A enable when creating the single-node set */
478 	if (auto_take && hosts != TRUE)
479 		usage(sp, gettext("conflicting options"));
480 
481 	argc -= optind;
482 	argv += optind;
483 
484 	/*
485 	 * Add hosts
486 	 */
487 	if (hosts == TRUE) {
488 
489 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
490 			mde_perror(ep, "");
491 			md_exit(local_sp, 1);
492 		}
493 
494 		if (meta_lock(local_sp, TRUE, ep) != 0) {
495 			mde_perror(ep, "");
496 			md_exit(local_sp, 1);
497 		}
498 
499 		/*
500 		 * Keep track of Cluster set creation. Need to complete
501 		 * the transaction no matter if the set was created or not.
502 		 */
503 		created_set = 0;
504 
505 		/*
506 		 * Have no set, cannot take the lock, so only take the
507 		 * local lock.
508 		 */
509 		if ((sp = metasetname(sname, ep)) == NULL) {
510 			sdssc_rval = 0;
511 			if (multi_node) {
512 				/*
513 				 * When running on a cluster system that
514 				 * does not support MN disksets, the routine
515 				 * sdssc_mo_create_begin will be bound
516 				 * to the SVM routine not_bound_error
517 				 * which returns SDSSC_NOT_BOUND_ERROR.
518 				 *
519 				 * When running on a cluster system that
520 				 * does support MN disksets, the routine
521 				 * sdssc_mo_create_begin will be bound to
522 				 * the sdssc_mo_create_begin routine in
523 				 * library libsdssc_so.  A call to
524 				 * sdssc_mo_create_begin will return with
525 				 * either SDSSC_ERROR or SDSSC_OKAY. If
526 				 * an SDSSC_OKAY is returned, then the
527 				 * cluster framework has allocated a
528 				 * set number for this new set that is unique
529 				 * across traditional and MN disksets.
530 				 * Libmeta will get this unique set number
531 				 * by calling sdssc_get_index.
532 				 *
533 				 * When running on a non-cluster system,
534 				 * the routine sdssc_mo_create_begin
535 				 * will be bound to the SVM routine
536 				 * not_bound which returns SDSSC_NOT_BOUND.
537 				 * In this case, all sdssc routines will
538 				 * return SDSSC_NOT_BOUND.  No need to check
539 				 * for return value of SDSSC_NOT_BOUND since
540 				 * the libmeta call to get the set number
541 				 * (sdssc_get_index) will also fail with
542 				 * SDSSC_NOT_BOUND causing libmeta to
543 				 * determine its own set number.
544 				 */
545 				sdssc_rval = sdssc_mo_create_begin(sname, argc,
546 					argv, SDSSC_PICK_SETNO);
547 				if (sdssc_rval == SDSSC_NOT_BOUND_ERROR) {
548 					mderror(ep, MDE_NOT_MN, NULL);
549 					mde_perror(ep,
550 					"Cluster node does not support "
551 					"multi-owner diskset operations");
552 					md_exit(local_sp, 1);
553 				} else if (sdssc_rval == SDSSC_ERROR) {
554 					mde_perror(ep, "");
555 					md_exit(local_sp, 1);
556 				}
557 			} else {
558 				sdssc_rval = sdssc_create_begin(sname, argc,
559 					argv, SDSSC_PICK_SETNO);
560 				if (sdssc_rval == SDSSC_ERROR) {
561 					mde_perror(ep, "");
562 					md_exit(local_sp, 1);
563 				}
564 			}
565 			/*
566 			 * Created diskset (as opposed to adding a
567 			 * host to an existing diskset).
568 			 */
569 			created_set = 1;
570 
571 			sp = Zalloc(sizeof (*sp));
572 			sp->setname = Strdup(sname);
573 			sp->lockfd = MD_NO_LOCK;
574 			mdclrerror(ep);
575 		} else {
576 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
577 				mde_perror(ep, "");
578 				md_exit(local_sp, 1);
579 			}
580 			if (MD_MNSET_DESC(sd)) {
581 				multi_node = 1;
582 			}
583 
584 			/*
585 			 * can't add hosts to an existing set & enable
586 			 * auto-take
587 			 */
588 			if (auto_take)
589 				usage(sp, gettext("conflicting options"));
590 
591 			/*
592 			 * Have a valid set, take the set lock also.
593 			 *
594 			 * A MN diskset does not use the set meta_lock but
595 			 * instead uses the clnt_lock of rpc.metad and the
596 			 * suspend/resume feature of the rpc.mdcommd.  Can't
597 			 * use set meta_lock since class 1 messages are
598 			 * grabbing this lock and if this thread is holding
599 			 * the set meta_lock then no rpc.mdcommd suspend
600 			 * can occur.
601 			 */
602 			if (!multi_node) {
603 				if (meta_lock(sp, TRUE, ep) != 0) {
604 					mde_perror(ep, "");
605 					md_exit(local_sp, 1);
606 				}
607 			}
608 		}
609 
610 		if (meta_set_addhosts(sp, multi_node, argc, argv, auto_take,
611 		    ep)) {
612 			if (created_set)
613 				sdssc_create_end(sname, SDSSC_CLEANUP);
614 			mde_perror(&status, "");
615 			if (!multi_node)
616 				(void) meta_unlock(sp, ep);
617 			md_exit(local_sp, 1);
618 		}
619 
620 		if (created_set)
621 			sdssc_create_end(sname, SDSSC_COMMIT);
622 
623 		else {
624 			/*
625 			 * If adding hosts to existing diskset,
626 			 * call DCS svcs
627 			 */
628 			sdssc_add_hosts(sname, argc, argv);
629 		}
630 		if (!multi_node)
631 			(void) meta_unlock(sp, ep);
632 		md_exit(local_sp, 0);
633 	}
634 
635 	/*
636 	 * Add mediators
637 	 */
638 	if (meds == TRUE) {
639 
640 		if ((sp = metasetname(sname, ep)) == NULL) {
641 			mde_perror(ep, "");
642 			md_exit(local_sp, 1);
643 		}
644 
645 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
646 			mde_perror(ep, "");
647 			md_exit(local_sp, 1);
648 		}
649 
650 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
651 			mde_perror(ep, "");
652 			md_exit(local_sp, 1);
653 		}
654 		if (MD_MNSET_DESC(sd)) {
655 			multi_node = 1;
656 		}
657 
658 		if (meta_lock(local_sp, TRUE, ep) != 0) {
659 			mde_perror(ep, "");
660 			md_exit(local_sp, 1);
661 		}
662 		/*
663 		 * A MN diskset does not use the set meta_lock but
664 		 * instead uses the clnt_lock of rpc.metad and the
665 		 * suspend/resume feature of the rpc.mdcommd.  Can't
666 		 * use set meta_lock since class 1 messages are
667 		 * grabbing this lock and if this thread is holding
668 		 * the set meta_lock then no rpc.mdcommd suspend
669 		 * can occur.
670 		 */
671 		if (!multi_node) {
672 			if (meta_lock(sp, TRUE, ep) != 0) {
673 				mde_perror(ep, "");
674 				md_exit(local_sp, 1);
675 			}
676 		}
677 
678 		if (meta_set_addmeds(sp, argc, argv, ep)) {
679 			mde_perror(&status, "");
680 			if (!multi_node)
681 				(void) meta_unlock(sp, ep);
682 			md_exit(local_sp, 1);
683 		}
684 
685 		if (!multi_node)
686 			(void) meta_unlock(sp, ep);
687 		md_exit(local_sp, 0);
688 	}
689 
690 	/*
691 	 * Add drives
692 	 */
693 	if ((sp = metasetname(sname, ep)) == NULL) {
694 		mde_perror(ep, "");
695 		md_exit(local_sp, 1);
696 	}
697 
698 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
699 		mde_perror(ep, "");
700 		md_exit(local_sp, 1);
701 	}
702 
703 	/* Determine if diskset is a MN diskset or not */
704 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
705 		mde_perror(ep, "");
706 		md_exit(local_sp, 1);
707 	}
708 	if (MD_MNSET_DESC(sd)) {
709 		multi_node = 1;
710 	}
711 
712 	if (meta_lock(local_sp, TRUE, ep) != 0) {
713 		mde_perror(ep, "");
714 		md_exit(local_sp, 1);
715 	}
716 
717 	/* Make sure database size is within limits */
718 	if (default_size == FALSE) {
719 		if ((multi_node && dbsize < MDDB_MN_MINBLKS) ||
720 		    (!multi_node && dbsize < MDDB_MINBLKS))
721 			usage(sp, gettext(
722 			    "size (-l) is too small"));
723 
724 		if ((multi_node && dbsize > MDDB_MN_MAXBLKS) ||
725 		    (!multi_node && dbsize > MDDB_MAXBLKS))
726 			usage(sp, gettext(
727 			    "size (-l) is too big"));
728 	}
729 
730 	/*
731 	 * Have a valid set, take the set lock also.
732 	 *
733 	 * A MN diskset does not use the set meta_lock but
734 	 * instead uses the clnt_lock of rpc.metad and the
735 	 * suspend/resume feature of the rpc.mdcommd.  Can't
736 	 * use set meta_lock since class 1 messages are
737 	 * grabbing this lock and if this thread is holding
738 	 * the set meta_lock then no rpc.mdcommd suspend
739 	 * can occur.
740 	 */
741 	if (!multi_node) {
742 		if (meta_lock(sp, TRUE, ep) != 0) {
743 			mde_perror(ep, "");
744 			md_exit(local_sp, 1);
745 		}
746 	}
747 
748 
749 	/*
750 	 * If using the default size,
751 	 *   then let's adjust the default to the minimum
752 	 *   size currently in use.
753 	 */
754 	if (default_size) {
755 		dbsize = multi_node ? MD_MN_DBSIZE : MD_DBSIZE;
756 		if ((nblks = meta_db_minreplica(sp, ep)) < 0)
757 			mdclrerror(ep);
758 		else
759 			dbsize = nblks;	/* adjust replica size */
760 	}
761 
762 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
763 		mde_perror(ep, "");
764 		if (!multi_node)
765 			(void) meta_unlock(sp, ep);
766 		md_exit(local_sp, 1);
767 	}
768 
769 	if (c == 0) {
770 		md_perror(gettext(
771 		    "No drives specified to add.\n"));
772 		if (!multi_node)
773 			(void) meta_unlock(sp, ep);
774 		md_exit(local_sp, 1);
775 	}
776 
777 	if (meta_set_adddrives(sp, dnlp, dbsize, force_label, ep)) {
778 		metafreedrivenamelist(dnlp);
779 		mde_perror(ep, "");
780 		if (!multi_node)
781 			(void) meta_unlock(sp, ep);
782 		md_exit(local_sp, 1);
783 	}
784 
785 	/*
786 	 * MN disksets don't have a device id in the master block
787 	 * For traditional disksets, check for the drive device
788 	 * id not fitting in the master block
789 	 */
790 	if (!multi_node) {
791 		for (p = dnlp; p != NULL; p = p->next) {
792 			int 		fd;
793 			ddi_devid_t	devid;
794 			mdname_t	*np;
795 
796 			np = metaslicename(p->drivenamep, 0, ep);
797 			if (np == NULL)
798 				continue;
799 
800 			if ((fd = open(np->rname, O_RDONLY | O_NDELAY)) < 0)
801 				continue;
802 
803 			if (devid_get(fd, &devid) == 0) {
804 				size_t len;
805 
806 				len = devid_sizeof(devid);
807 				if (len > (DEV_BSIZE - sizeof (mddb_mb_t)))
808 					(void) mddserror(ep,
809 					    MDE_DS_NOTSELFIDENTIFY, NULL, NULL,
810 					    np->rname, NULL);
811 				devid_free(devid);
812 			} else {
813 				(void) mddserror(ep, MDE_DS_NOTSELFIDENTIFY,
814 				    NULL, NULL, np->rname, NULL);
815 			}
816 			(void) close(fd);
817 		}
818 	}
819 
820 	/*
821 	 * MN disksets don't use DCS clustering services.
822 	 * For traditional disksets:
823 	 * There's not really much we can do here if this call fails.
824 	 * The drives have been added to the set and DiskSuite believes
825 	 * it owns the drives.
826 	 * Relase the set and hope for the best.
827 	 */
828 	if ((!multi_node) &&
829 	    (sdssc_notify_service(sname, Make_Primary) == SDSSC_ERROR)) {
830 		meta_set_release(sp, ep);
831 		printf(gettext(
832 		    "Sun Clustering failed to make set primary\n"));
833 	}
834 
835 	metafreedrivenamelist(dnlp);
836 	if (!multi_node)
837 		(void) meta_unlock(sp, ep);
838 	md_exit(local_sp, 0);
839 }
840 
841 static void
842 parse_balance(int argc, char **argv)
843 {
844 	int		c;
845 	mdsetname_t	*sp = NULL;
846 	char		*sname = MD_LOCAL_NAME;
847 	md_error_t	status = mdnullerror;
848 	md_set_desc	*sd;
849 	int		multi_node = 0;
850 
851 	/* reset and parse args */
852 	optind = 1;
853 	opterr = 1;
854 	while ((c = getopt(argc, argv, "Mbs:")) != -1) {
855 		switch (c) {
856 		case 'M':
857 			break;
858 		case 'b':
859 			break;
860 		case 's':
861 			sname = optarg;
862 			break;
863 		default:
864 			usage(sp, gettext("unknown options"));
865 		}
866 	}
867 
868 	argc -= optind;
869 	argv += optind;
870 
871 	if (argc != 0)
872 		usage(sp, gettext("too many args"));
873 
874 	if ((sp = metasetname(sname, &status)) == NULL) {
875 		mde_perror(&status, "");
876 		md_exit(sp, 1);
877 	}
878 	if ((sd = metaget_setdesc(sp, &status)) == NULL) {
879 		mde_perror(&status, "");
880 		md_exit(sp, 1);
881 	}
882 	if (MD_MNSET_DESC(sd)) {
883 		multi_node = 1;
884 	}
885 	/*
886 	 * Have a valid set, take the set lock also.
887 	 *
888 	 * A MN diskset does not use the set meta_lock but
889 	 * instead uses the clnt_lock of rpc.metad and the
890 	 * suspend/resume feature of the rpc.mdcommd.  Can't
891 	 * use set meta_lock since class 1 messages are
892 	 * grabbing this lock and if this thread is holding
893 	 * the set meta_lock then no rpc.mdcommd suspend
894 	 * can occur.
895 	 */
896 	if (!multi_node) {
897 		if (meta_lock(sp, TRUE, &status) != 0) {
898 			mde_perror(&status, "");
899 			md_exit(sp, 1);
900 		}
901 	}
902 
903 	if (meta_set_balance(sp, &status) != 0) {
904 		mde_perror(&status, "");
905 		md_exit(sp, 1);
906 	}
907 	md_exit(sp, 0);
908 }
909 
910 static void
911 parse_autotake(int argc, char **argv)
912 {
913 	int			c;
914 	int			enable = 0;
915 	mdsetname_t		*sp = NULL;
916 	char			*sname = MD_LOCAL_NAME;
917 	md_error_t		status = mdnullerror;
918 	md_error_t		*ep = &status;
919 
920 	/* reset and parse args */
921 	optind = 1;
922 	opterr = 1;
923 	while ((c = getopt(argc, argv, "A:s:")) != -1) {
924 		switch (c) {
925 		case 'A':
926 			/* verified sub-option in main */
927 			if (strcmp(optarg, "enable") == 0)
928 				enable = 1;
929 			break;
930 		case 's':
931 			/* verified presence of setname in main */
932 			sname = optarg;
933 			break;
934 		default:
935 			usage(sp, gettext("unknown options"));
936 		}
937 	}
938 
939 	if ((sp = metasetname(sname, ep)) == NULL) {
940 		mde_perror(ep, "");
941 		md_exit(sp, 1);
942 	}
943 
944 	if (meta_lock(sp, TRUE, ep) != 0) {
945 		mde_perror(ep, "");
946 		md_exit(sp, 1);
947 	}
948 
949 	if (meta_check_ownership(sp, ep) != 0) {
950 		mde_perror(ep, "");
951 		md_exit(sp, 1);
952 	}
953 
954 	if (meta_set_auto_take(sp, enable, ep) != 0) {
955 		mde_perror(ep, "");
956 		md_exit(sp, 1);
957 	}
958 
959 	md_exit(sp, 0);
960 }
961 
962 static void
963 parse_del(int argc, char **argv)
964 {
965 	int			c;
966 	mdsetname_t		*sp = NULL;
967 	char			*sname = MD_LOCAL_NAME;
968 	int			hosts = FALSE;
969 	int			meds = FALSE;
970 	int			forceflg = FALSE;
971 	md_error_t		status = mdnullerror;
972 	md_error_t		*ep = &status;
973 	mddrivenamelist_t	*dnlp = NULL;
974 	mdsetname_t		*local_sp = NULL;
975 	md_set_desc		*sd;
976 	int			multi_node = 0;
977 
978 	/* reset and parse args */
979 	optind = 1;
980 	opterr = 1;
981 	while ((c = getopt(argc, argv, "Mdfhms:")) != -1) {
982 		switch (c) {
983 		case 'M':
984 			break;
985 		case 'd':
986 			break;
987 		case 'f':
988 			forceflg = TRUE;
989 			break;
990 		case 'h':
991 		case 'm':
992 			if (meds == TRUE || hosts == TRUE)
993 				usage(sp, gettext(
994 				    "only one -m or -h option allowed"));
995 
996 			if (c == 'h')
997 				hosts = TRUE;
998 			else
999 				meds = TRUE;
1000 			break;
1001 		case 's':
1002 			sname = optarg;
1003 			break;
1004 		default:
1005 			usage(sp, gettext("unknown options"));
1006 		}
1007 	}
1008 
1009 	argc -= optind;
1010 	argv += optind;
1011 
1012 	if ((sp = metasetname(sname, ep)) == NULL) {
1013 		mde_perror(ep, "");
1014 		md_exit(local_sp, 1);
1015 	}
1016 
1017 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1018 		mde_perror(ep, "");
1019 		md_exit(local_sp, 1);
1020 	}
1021 
1022 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1023 		mde_perror(ep, "");
1024 		md_exit(local_sp, 1);
1025 	}
1026 	if (MD_MNSET_DESC(sd))
1027 		multi_node = 1;
1028 
1029 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1030 		mde_perror(ep, "");
1031 		md_exit(local_sp, 1);
1032 	}
1033 
1034 	/*
1035 	 * Have a valid set, take the set lock also.
1036 	 *
1037 	 * A MN diskset does not use the set meta_lock but
1038 	 * instead uses the clnt_lock of rpc.metad and the
1039 	 * suspend/resume feature of the rpc.mdcommd.  Can't
1040 	 * use set meta_lock since class 1 messages are
1041 	 * grabbing this lock and if this thread is holding
1042 	 * the set meta_lock then no rpc.mdcommd suspend
1043 	 * can occur.
1044 	 */
1045 	if (!multi_node) {
1046 		if (meta_lock(sp, TRUE, ep) != 0) {
1047 			mde_perror(ep, "");
1048 			md_exit(local_sp, 1);
1049 		}
1050 	}
1051 
1052 	/*
1053 	 * Delete hosts
1054 	 */
1055 	if (hosts == TRUE) {
1056 		if (meta_check_ownership(sp, ep) != 0) {
1057 			/*
1058 			 * If we don't own the set bail out here otherwise
1059 			 * we could delete the node from the DCS service
1060 			 * yet not delete the host from the set.
1061 			 */
1062 			mde_perror(ep, "");
1063 			if (!multi_node)
1064 				(void) meta_unlock(sp, ep);
1065 			md_exit(local_sp, 1);
1066 		}
1067 		if (sdssc_delete_hosts(sname, argc, argv) == SDSSC_ERROR) {
1068 		    if (!metad_isautotakebyname(sname)) {
1069 			/*
1070 			 * SC could have been installed after the set was
1071 			 * created.  We still want to be able to delete these
1072 			 * sets.
1073 			 */
1074 			md_perror(gettext(
1075 			    "Failed to delete hosts from DCS service"));
1076 			if (!multi_node)
1077 				(void) meta_unlock(sp, ep);
1078 			md_exit(local_sp, 1);
1079 		    }
1080 		}
1081 		if (meta_set_deletehosts(sp, argc, argv, forceflg, ep)) {
1082 			if (sdssc_add_hosts(sname, argc, argv) == SDSSC_ERROR) {
1083 				(void) printf(gettext(
1084 				    "Failed to restore host(s) in DCS "
1085 				    "database\n"));
1086 			}
1087 			mde_perror(ep, "");
1088 			if (!multi_node)
1089 				(void) meta_unlock(sp, ep);
1090 			md_exit(local_sp, 1);
1091 		}
1092 		if (!multi_node)
1093 			(void) meta_unlock(sp, ep);
1094 		md_exit(local_sp, 0);
1095 	}
1096 
1097 	/*
1098 	 * Delete mediators
1099 	 */
1100 	if (meds == TRUE) {
1101 		if (meta_set_deletemeds(sp, argc, argv, forceflg, ep)) {
1102 			mde_perror(ep, "");
1103 			if (!multi_node)
1104 				(void) meta_unlock(sp, ep);
1105 			md_exit(local_sp, 1);
1106 		}
1107 		if (!multi_node)
1108 			(void) meta_unlock(sp, ep);
1109 		md_exit(local_sp, 0);
1110 	}
1111 
1112 	/*
1113 	 * Delete drives
1114 	 */
1115 
1116 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
1117 		mde_perror(ep, "");
1118 		if (!multi_node)
1119 			(void) meta_unlock(sp, ep);
1120 		md_exit(local_sp, 1);
1121 	}
1122 
1123 	if (c == 0) {
1124 		md_perror(gettext(
1125 		    "No drives specified to delete.\n"));
1126 		if (!multi_node)
1127 			(void) meta_unlock(sp, ep);
1128 		md_exit(local_sp, 1);
1129 	}
1130 
1131 	if (meta_set_deletedrives(sp, dnlp, forceflg, ep)) {
1132 		metafreedrivenamelist(dnlp);
1133 		mde_perror(ep, "");
1134 		if (!multi_node)
1135 			(void) meta_unlock(sp, ep);
1136 		md_exit(local_sp, 1);
1137 	}
1138 
1139 	metafreedrivenamelist(dnlp);
1140 	if (!multi_node)
1141 		(void) meta_unlock(sp, ep);
1142 	md_exit(local_sp, 0);
1143 }
1144 
1145 static void
1146 parse_isowner(int argc, char **argv)
1147 {
1148 	int		c;
1149 	mdsetname_t	*sp = NULL;
1150 	char		*sname = MD_LOCAL_NAME;
1151 	md_error_t	status = mdnullerror;
1152 	md_error_t	*ep = &status;
1153 	char		*host = NULL;
1154 
1155 	/* reset and parse args */
1156 	optind = 1;
1157 	opterr = 1;
1158 	while ((c = getopt(argc, argv, "Moh:s:")) != -1) {
1159 		switch (c) {
1160 		case 'M':
1161 			break;
1162 		case 'o':
1163 			break;
1164 		case 'h':
1165 			if (host != NULL) {
1166 				usage(sp, gettext(
1167 				    "only one -h option allowed"));
1168 			}
1169 			host = optarg;
1170 			break;
1171 		case 's':
1172 			sname = optarg;
1173 			break;
1174 		default:
1175 			usage(sp, gettext("unknown options"));
1176 		}
1177 	}
1178 
1179 	argc -= optind;
1180 	argv += optind;
1181 
1182 	if (argc != 0)
1183 		usage(sp, gettext("too many args"));
1184 
1185 	if ((sp = metasetname(sname, ep)) == NULL) {
1186 		mde_perror(ep, "");
1187 		md_exit(sp, 1);
1188 	}
1189 
1190 	if (host == NULL) {
1191 		if (meta_check_ownership(sp, ep) != 0) {
1192 			mde_perror(ep, "");
1193 			md_exit(sp, 1);
1194 		}
1195 	} else {
1196 		if (meta_check_ownership_on_host(sp, host, ep) != 0) {
1197 			mde_perror(ep, "");
1198 			md_exit(sp, 1);
1199 		}
1200 	}
1201 	md_exit(sp, 0);
1202 }
1203 
1204 static void
1205 parse_purge(int argc, char **argv)
1206 {
1207 	int		c;
1208 	mdsetname_t	*sp = NULL;
1209 	mdsetname_t	*local_sp = NULL;
1210 	md_drive_desc	*dd;
1211 	char		*sname = MD_LOCAL_NAME;
1212 	char		*thishost = mynode();
1213 	md_error_t	status = mdnullerror;
1214 	md_error_t	*ep = &status;
1215 	int		bypass_cluster_purge = 0;
1216 	int		forceflg = FALSE;
1217 	int		ret = 0;
1218 	int		multi_node = 0;
1219 	md_set_desc		*sd;
1220 
1221 	optind = 1;
1222 	opterr = 1;
1223 	while ((c = getopt(argc, argv, "C:fPs:")) != -1) {
1224 		switch (c) {
1225 		case 'M':
1226 			break;
1227 		case 'C':
1228 			bypass_cluster_purge = 1;
1229 			break;
1230 		case 'f':
1231 			forceflg = TRUE;
1232 			break;
1233 		case 'P':
1234 			break;
1235 		case 's':
1236 			sname = optarg;
1237 			break;
1238 		default:
1239 			usage(sp, gettext("unknown options"));
1240 		}
1241 	}
1242 
1243 	argc -= optind;
1244 	argv += optind;
1245 
1246 	if (argc != 0)
1247 		usage(sp, gettext("too many arguments"));
1248 
1249 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1250 		mde_perror(ep, "");
1251 		md_exit(local_sp, 1);
1252 	}
1253 
1254 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1255 		mde_perror(ep, "");
1256 		md_exit(local_sp, 1);
1257 	}
1258 
1259 	if ((sp = metasetname(sname, ep)) == NULL) {
1260 		mde_perror(ep, "");
1261 		md_exit(sp, 1);
1262 	}
1263 
1264 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1265 		mde_perror(ep, "");
1266 		md_exit(local_sp, 1);
1267 	}
1268 	if (MD_MNSET_DESC(sd))
1269 		multi_node = 1;
1270 
1271 	if (!multi_node) {
1272 		if (meta_lock(sp, TRUE, ep) != 0) {
1273 			mde_perror(ep, "");
1274 			md_exit(local_sp, 1);
1275 		}
1276 	}
1277 
1278 	/* Must not own the set if purging it from this host */
1279 	if (meta_check_ownership(sp, ep) == 0) {
1280 		/*
1281 		 * Need to see if there are disks in the set, if not then
1282 		 * there is no ownership but meta_check_ownership returns 0
1283 		 */
1284 		dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
1285 		if (!mdisok(ep)) {
1286 			mde_perror(ep, "");
1287 			if (!multi_node)
1288 				(void) meta_unlock(sp, ep);
1289 			md_exit(local_sp, 1);
1290 		}
1291 		if (dd != NULL) {
1292 			(void) printf(gettext
1293 			    ("Must not be owner of the set when purging it\n"));
1294 			if (!multi_node)
1295 				(void) meta_unlock(sp, ep);
1296 			md_exit(local_sp, 1);
1297 		}
1298 	}
1299 	/*
1300 	 * Remove the node from the DCS service
1301 	 */
1302 	if (!bypass_cluster_purge) {
1303 		if (sdssc_delete_hosts(sname, 1, &thishost) == SDSSC_ERROR) {
1304 			md_perror(gettext
1305 			    ("Failed to purge hosts from DCS service"));
1306 			if (!multi_node)
1307 				(void) meta_unlock(sp, ep);
1308 			md_exit(local_sp, 1);
1309 		}
1310 	}
1311 
1312 	if ((ret = meta_set_purge(sp, bypass_cluster_purge, forceflg,
1313 	    ep)) != 0) {
1314 		if (!bypass_cluster_purge) {
1315 			if (sdssc_add_hosts(sname, 1, &thishost) ==
1316 			    SDSSC_ERROR) {
1317 				(void) printf(gettext(
1318 				    "Failed to restore host in DCS "
1319 				    "database\n"));
1320 			}
1321 		}
1322 		mde_perror(ep, "");
1323 		if (!multi_node)
1324 			(void) meta_unlock(sp, ep);
1325 		md_exit(local_sp, ret);
1326 	}
1327 
1328 	if (!multi_node)
1329 		(void) meta_unlock(sp, ep);
1330 	md_exit(local_sp, 0);
1331 }
1332 
1333 static void
1334 parse_query(int argc, char **argv)
1335 {
1336 	int		c;
1337 	mdsetname_t	*sp = NULL;
1338 	mddb_dtag_lst_t	*dtlp = NULL;
1339 	mddb_dtag_lst_t	*tdtlp;
1340 	char		*sname = MD_LOCAL_NAME;
1341 	md_error_t	status = mdnullerror;
1342 
1343 	/* reset and parse args */
1344 	optind = 1;
1345 	opterr = 1;
1346 	while ((c = getopt(argc, argv, "Mqs:")) != -1) {
1347 		switch (c) {
1348 		case 'M':
1349 			break;
1350 		case 'q':
1351 			break;
1352 		case 's':
1353 			sname = optarg;
1354 			break;
1355 		default:
1356 			usage(sp, gettext("unknown options"));
1357 		}
1358 	}
1359 
1360 	argc -= optind;
1361 	argv += optind;
1362 
1363 	if (argc != 0)
1364 		usage(sp, gettext("too many args"));
1365 
1366 	if ((sp = metasetname(sname, &status)) == NULL) {
1367 		mde_perror(&status, "");
1368 		md_exit(sp, 1);
1369 	}
1370 
1371 	if (meta_lock(sp, TRUE, &status) != 0) {
1372 		mde_perror(&status, "");
1373 		md_exit(sp, 1);
1374 	}
1375 
1376 	if (meta_set_query(sp, &dtlp, &status) != 0) {
1377 		mde_perror(&status, "");
1378 		md_exit(sp, 1);
1379 	}
1380 
1381 	if (dtlp != NULL)
1382 		(void) printf("The following tag(s) were found:\n");
1383 
1384 	for (tdtlp = dtlp; tdtlp != NULL; tdtlp = dtlp) {
1385 		dtlp = tdtlp->dtl_nx;
1386 		(void) printf("%2d - %s - %s", tdtlp->dtl_dt.dt_id,
1387 		    tdtlp->dtl_dt.dt_hn,
1388 		    ctime((long *)&tdtlp->dtl_dt.dt_tv.tv_sec));
1389 		Free(tdtlp);
1390 	}
1391 
1392 	md_exit(sp, 0);
1393 }
1394 
1395 /* Should never be called with sname of a Multinode diskset. */
1396 static void
1397 parse_releaseset(int argc, char **argv)
1398 {
1399 	int		c;
1400 	mdsetname_t	*sp = NULL;
1401 	md_error_t	status = mdnullerror;
1402 	md_error_t	*ep = &status;
1403 	char		*sname = MD_LOCAL_NAME;
1404 	sdssc_boolean_e	cluster_release = SDSSC_False;
1405 	sdssc_version_t	vers;
1406 	rval_e		rval;
1407 	md_set_desc	*sd;
1408 
1409 	/* reset and parse args */
1410 	optind = 1;
1411 	opterr = 1;
1412 	while ((c = getopt(argc, argv, "C:s:r")) != -1) {
1413 		switch (c) {
1414 		case 'C':
1415 			cluster_release = SDSSC_True;
1416 			break;
1417 		case 's':
1418 			sname = optarg;
1419 			break;
1420 		case 'r':
1421 			break;
1422 		default:
1423 			usage(sp, gettext("unknown options"));
1424 		}
1425 	}
1426 
1427 	argc -= optind;
1428 	argv += optind;
1429 
1430 	if (argc > 0)
1431 		usage(sp, gettext("too many args"));
1432 
1433 	memset(&vers, 0, sizeof (vers));
1434 
1435 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1436 	    (vers.major == 3) &&
1437 	    (cluster_release == SDSSC_False)) {
1438 
1439 		/*
1440 		 * If the release is being done by the user via the CLI
1441 		 * we need to notify the DCS to release this node as being
1442 		 * the primary. The reason nothing else needs to be done
1443 		 * is due to the fact that the reservation code will exec
1444 		 * metaset -C release to complete the operation.
1445 		 */
1446 		rval = sdssc_notify_service(sname, Release_Primary);
1447 		if (rval == SDSSC_ERROR) {
1448 			printf(gettext(
1449 			    "metaset: failed to notify DCS of release\n"));
1450 		}
1451 		md_exit(NULL, rval == SDSSC_ERROR);
1452 	}
1453 
1454 	if ((sp = metasetname(sname, ep)) == NULL) {
1455 
1456 		/*
1457 		 * It's entirely possible for the SC3.0 reservation code
1458 		 * to call for DiskSet to release a diskset and have that
1459 		 * diskset not exist. During a diskset removal DiskSuite
1460 		 * maybe able to remove all traces of the diskset before
1461 		 * the reservation code execs metaset -C release in which
1462 		 * case the metasetname will fail, but the overall command
1463 		 * shouldn't.
1464 		 */
1465 		if (vers.major == 3)
1466 			md_exit(sp, 0);
1467 		else {
1468 			mde_perror(ep, "");
1469 			md_exit(sp, 1);
1470 		}
1471 	}
1472 
1473 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1474 		mde_perror(ep, "");
1475 		md_exit(sp, 1);
1476 	}
1477 
1478 	if (sd->sd_flags & MD_SR_AUTO_TAKE) {
1479 		md_eprintf(gettext("cannot release auto-take diskset\n"));
1480 		md_exit(sp, 1);
1481 	}
1482 
1483 	if (meta_lock_nowait(sp, ep) != 0) {
1484 		mde_perror(ep, "");
1485 		md_exit(sp, 10);	/* special errcode */
1486 	}
1487 
1488 	if (meta_set_release(sp, ep)) {
1489 		mde_perror(ep, "");
1490 		md_exit(sp, 1);
1491 	}
1492 	md_exit(sp, 0);
1493 }
1494 
1495 /* Should never be called with sname of a Multinode diskset. */
1496 static void
1497 parse_takeset(int argc, char **argv)
1498 {
1499 	int		c;
1500 	mdsetname_t	*sp = NULL;
1501 	int		flags = 0;
1502 	char		*sname = MD_LOCAL_NAME;
1503 	mhd_mhiargs_t	mhiargs;
1504 	char 		*cp = NULL;
1505 	int		pos = -1;	/* position of timeout value */
1506 	int		usetag = 0;
1507 	static char	*nullopts[] = { NULL };
1508 	md_error_t	status = mdnullerror;
1509 	md_error_t	*ep = &status;
1510 	sdssc_boolean_e	cluster_take = SDSSC_False;
1511 	sdssc_version_t	vers;
1512 	rval_e		rval;
1513 	int		set_take_rval;
1514 
1515 	/* reset and parse args */
1516 	optind = 1;
1517 	opterr = 1;
1518 	while ((c = getopt(argc, argv, "C:fs:tu:y")) != -1) {
1519 		switch (c) {
1520 		case 'C':
1521 			cluster_take = SDSSC_True;
1522 			break;
1523 		case 'f':
1524 			flags |= TAKE_FORCE;
1525 			break;
1526 		case 's':
1527 			sname = optarg;
1528 			break;
1529 		case 't':
1530 			break;
1531 		case 'u':
1532 			usetag = atoi(optarg);
1533 			flags |= TAKE_USETAG;
1534 			break;
1535 		case 'y':
1536 			flags |= TAKE_USEIT;
1537 			break;
1538 		default:
1539 			usage(sp, gettext("unknown options"));
1540 		}
1541 	}
1542 
1543 	mhiargs = defmhiargs;
1544 
1545 	argc -= optind;
1546 	argv += optind;
1547 
1548 	if (argc > 1)
1549 		usage(sp, gettext("too many args"));
1550 
1551 	/*
1552 	 * If we have a list of timeout value overrides, handle it here
1553 	 */
1554 	while (argv[0] != NULL && *argv[0] != '\0') {
1555 		/*
1556 		 * The use of the nullopts[] "token list" here is to make
1557 		 * getsubopts() simply parse a comma separated list
1558 		 * returning either "" or the contents of the field, the
1559 		 * end condition is exaustion of the initial string, which
1560 		 * is modified in the process.
1561 		 */
1562 		(void) getsubopt(&argv[0], nullopts, &cp);
1563 
1564 		c = 0;			/* re-use c as temp value of timeout */
1565 
1566 		if (*cp != '-')		/* '-' uses default */
1567 			c = atoi(cp);
1568 
1569 		if (c < 0) {
1570 			usage(sp, gettext(
1571 			    "time out values must be > 0"));
1572 		}
1573 
1574 		if (++pos > 3) {
1575 			usage(sp, gettext(
1576 			    "too many timeout values specified."));
1577 		}
1578 
1579 		if (c == 0)		/* 0 or "" field uses default */
1580 			continue;
1581 
1582 		/*
1583 		 * Assign temp value to appropriate structure member based on
1584 		 * its position in the comma separated list.
1585 		 */
1586 		switch (pos) {
1587 		    case 0:
1588 			mhiargs.mh_ff = c;
1589 			break;
1590 
1591 		    case 1:
1592 			mhiargs.mh_tk.reinstate_resv_delay = c;
1593 			break;
1594 
1595 		    case 2:
1596 			mhiargs.mh_tk.min_ownership_delay = c;
1597 			break;
1598 
1599 		    case 3:
1600 			mhiargs.mh_tk.max_ownership_delay = c;
1601 			break;
1602 		}
1603 	}
1604 
1605 	memset(&vers, 0, sizeof (vers));
1606 
1607 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1608 	    (vers.major == 3) &&
1609 	    (cluster_take == SDSSC_False)) {
1610 
1611 		/*
1612 		 * If the take is beging done by the user via the CLI we need
1613 		 * to notify the DCS to make this current node the primary.
1614 		 * The SC3.0 reservation code will in turn exec metaset with
1615 		 * the -C take arg to complete this operation.
1616 		 */
1617 		if ((rval = sdssc_notify_service(sname, Make_Primary)) ==
1618 		    SDSSC_ERROR) {
1619 			printf(gettext(
1620 			    "metaset: failed to notify DCS of take\n"));
1621 		}
1622 		md_exit(NULL, rval == SDSSC_ERROR);
1623 	}
1624 
1625 	if ((sp = metasetname(sname, ep)) == NULL) {
1626 		mde_perror(ep, "");
1627 		md_exit(sp, 1);
1628 	}
1629 
1630 	if ((vers.major == 3) && (meta_check_ownership(sp, ep) == 0)) {
1631 
1632 		/*
1633 		 * If we're running in a cluster environment and this
1634 		 * node already owns the set. Don't bother trying to
1635 		 * take the set again. There's one case where an adminstrator
1636 		 * is adding disks to a set for the first time. metaset
1637 		 * will take the ownership of the set at that point. During
1638 		 * that add operation SC3.0 notices activity on the device
1639 		 * and also tries to perform a take operation. The SC3.0 take
1640 		 * will fail because the adminstrative add has the set locked
1641 		 */
1642 		md_exit(sp, 0);
1643 	}
1644 
1645 	if (meta_lock_nowait(sp, ep) != 0) {
1646 		mde_perror(ep, "");
1647 		md_exit(sp, 10);	/* special errcode */
1648 	}
1649 
1650 	/*
1651 	 * If a 2 is returned from meta_set_take, this take was able to resolve
1652 	 * an unresolved replicated disk (i.e. a disk is now available that
1653 	 * had been missing during the import of the replicated diskset).
1654 	 * Need to release the diskset and re-take in order to have
1655 	 * the subdrivers re-snarf using the newly resolved (or newly mapped)
1656 	 * devids.  This also allows the namespace to be updated with the
1657 	 * correct major names in the case where the disk being replicated
1658 	 * was handled by a different driver than the replicated disk.
1659 	 */
1660 	set_take_rval = meta_set_take(sp, &mhiargs, flags, usetag, &status);
1661 	if (set_take_rval == 2) {
1662 		if (meta_set_release(sp, &status)) {
1663 			mde_perror(&status,
1664 			    "Need to release and take set to resolve names.");
1665 			md_exit(sp, 1);
1666 		}
1667 		metaflushdrivenames();
1668 		metaflushsetname(sp);
1669 		set_take_rval = meta_set_take(sp, &mhiargs,
1670 		    (flags | TAKE_RETAKE), usetag, &status);
1671 	}
1672 
1673 	if (set_take_rval == -1) {
1674 		mde_perror(&status, "");
1675 		if (mdismddberror(&status, MDE_DB_TAGDATA))
1676 			md_exit(sp, 2);
1677 		if (mdismddberror(&status, MDE_DB_ACCOK))
1678 			md_exit(sp, 3);
1679 		if (mdismddberror(&status, MDE_DB_STALE))
1680 			md_exit(sp, 66);
1681 		md_exit(sp, 1);
1682 	}
1683 	md_exit(sp, 0);
1684 }
1685 
1686 /*
1687  * Joins a node to a specific set or to all multinode disksets known
1688  * by this node.  If set is specified then caller should have verified
1689  * that the set is a multinode diskset.
1690  *
1691  * If an error occurs, metaset exits with a 1.
1692  * If there is no error, metaset exits with a 0.
1693  */
1694 static void
1695 parse_joinset(int argc, char **argv)
1696 {
1697 	int		c;
1698 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1699 	char		*sname = MD_LOCAL_NAME;
1700 	md_error_t	status = mdnullerror;
1701 	md_error_t	*ep = &status;
1702 	md_set_desc	*sd;
1703 	char		buf[BUFSIZ];
1704 	char		*p = buf;
1705 	set_t		max_sets, setno;
1706 	int		err, cumm_err = 0;
1707 	size_t		bufsz;
1708 
1709 	bufsz = sizeof (buf);
1710 	/* reset and parse args */
1711 	optind = 1;
1712 	opterr = 1;
1713 	while ((c = getopt(argc, argv, "Ms:j")) != -1) {
1714 		switch (c) {
1715 		case 'M':
1716 			break;
1717 		case 'j':
1718 			break;
1719 		case 's':
1720 			sname = optarg;
1721 			break;
1722 		default:
1723 			usage(sp, gettext("unknown options"));
1724 		}
1725 	}
1726 
1727 	argc -= optind;
1728 	argv += optind;
1729 
1730 	if (argc > 1)
1731 		usage(sp, gettext("too many args"));
1732 
1733 	/*
1734 	 * If no setname option was used, then join all disksets
1735 	 * that this node knows about.   Attempt to join all
1736 	 * disksets that this node knows about.
1737 	 *
1738 	 * Additional text is added to the error messages during
1739 	 * this section of code in order to help the user understand
1740 	 * why the 'join of all sets' failed and which set caused
1741 	 * the failure.
1742 	 */
1743 
1744 	/*
1745 	 * Hold local set lock throughout this call to keep
1746 	 * other actions from interfering (such as creating a new
1747 	 * set, etc.).
1748 	 */
1749 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1750 		mde_perror(ep, "");
1751 		md_exit(sp, 1);
1752 	}
1753 
1754 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1755 		mde_perror(ep, "");
1756 		md_exit(local_sp, 1);
1757 	}
1758 
1759 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1760 		/*
1761 		 * If no set name is given, then walk through all sets
1762 		 * on this node which could include:
1763 		 * 	- MN disksets
1764 		 *	- traditional disksets
1765 		 *	- non-existent disksets
1766 		 * Attempt to join the MN disksets.
1767 		 * If the join of one set fails, print out an error message
1768 		 * about that set and continue the walk.
1769 		 */
1770 		if ((max_sets = get_max_sets(ep)) == 0) {
1771 			mde_perror(ep, "");
1772 			md_exit(local_sp, 1);
1773 		}
1774 
1775 		/* Start walking through all possible disksets */
1776 		for (setno = 1; setno < max_sets; setno++) {
1777 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1778 				if (mdiserror(ep, MDE_NO_SET)) {
1779 					/* No set for this setno - continue */
1780 					mdclrerror(ep);
1781 					continue;
1782 				} else {
1783 					(void) sprintf(p, gettext(
1784 					"Unable to get set %d information"),
1785 					    setno);
1786 					mde_perror(ep, p);
1787 					cumm_err = 1;
1788 					mdclrerror(ep);
1789 					continue;
1790 				}
1791 			}
1792 
1793 			/* If setname is there, set desc should exist. */
1794 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1795 				(void) snprintf(p, bufsz, gettext(
1796 				    "Unable to get set %s desc information"),
1797 				    sp->setname);
1798 				mde_perror(ep, p);
1799 				cumm_err = 1;
1800 				mdclrerror(ep);
1801 				continue;
1802 			}
1803 
1804 			/* Only check MN disksets */
1805 			if (!MD_MNSET_DESC(sd)) {
1806 				continue;
1807 			}
1808 
1809 			/*
1810 			 * Return value of 0 is success.
1811 			 * Return value of -1 means a failure.
1812 			 * Return value of -2 means set could not be
1813 			 * joined, but shouldn't cause an error.
1814 			 * Reasons would be:
1815 			 * 	- no drives in set
1816 			 * 	- node already joined to set
1817 			 * Return value of -3 means joined stale set.
1818 			 * Can't check for all reasons here
1819 			 * since set isn't locked yet across all
1820 			 * nodes in the cluster.  The call
1821 			 * to libmeta routine, meta_set_join, will
1822 			 * lock across the cluster and perform
1823 			 * the checks.
1824 			 */
1825 			if ((err = meta_set_join(sp, ep)) == -1) {
1826 				/* Print error of diskset join failure */
1827 				(void) snprintf(p, bufsz,
1828 				    gettext("Join to diskset %s failed"),
1829 				    sp->setname);
1830 				mde_perror(ep, p);
1831 				cumm_err = 1;
1832 				mdclrerror(ep);
1833 				continue;
1834 			}
1835 
1836 			if (err == -3) {
1837 				/* Print error of diskset join failure */
1838 				(void) snprintf(p, bufsz,
1839 				    gettext("Joined to stale diskset %s"),
1840 				    sp->setname);
1841 				mde_perror(ep, p);
1842 				mdclrerror(ep);
1843 			}
1844 
1845 			mdclrerror(ep);
1846 		}
1847 
1848 		md_exit(local_sp, cumm_err);
1849 	}
1850 
1851 	/*
1852 	 * Code for a specific set is much simpler.
1853 	 * Error messages don't need extra text since specific setname
1854 	 * was used.
1855 	 * Don't need to lock the local set, just the specific set given.
1856 	 */
1857 	if ((sp = metasetname(sname, ep)) == NULL) {
1858 		mde_perror(ep, "");
1859 		md_exit(local_sp, 1);
1860 	}
1861 
1862 	/*
1863 	 * Fail command if meta_set_join returns -1.
1864 	 *
1865 	 * Return of 0 means that node joined set.
1866 	 *
1867 	 * Return of -2 means that node was unable to
1868 	 * join a set since that set had no drives
1869 	 * or that had already joined the set.  No
1870 	 * need to fail the command for these reasons.
1871 	 *
1872 	 * Return of -3 means that set is stale.
1873 	 * Return a value of 66 to historically match traditional disksets.
1874 	 */
1875 	if ((err = meta_set_join(sp, ep)) == -1) {
1876 		mde_perror(&status, "");
1877 		md_exit(local_sp, 1);
1878 	}
1879 
1880 	if (err == -3) {
1881 		/* Print error of diskset join failure */
1882 		(void) snprintf(p, bufsz,
1883 		    gettext("Joined to stale diskset %s"),
1884 		    sp->setname);
1885 		mde_perror(&status, "");
1886 		md_exit(local_sp, 66);
1887 	}
1888 
1889 	md_exit(local_sp, 0);
1890 }
1891 
1892 /*
1893  * Withdraws a node from a specific set or from all multinode disksets known
1894  * by this node.  If set is specified then caller should have verified
1895  * that the set is a multinode diskset.
1896  *
1897  * If an error occurs, metaset exits with a 1.
1898  * If there is no error, metaset exits with a 0.
1899  */
1900 static void
1901 parse_withdrawset(int argc, char **argv)
1902 {
1903 	int		c;
1904 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1905 	char		*sname = MD_LOCAL_NAME;
1906 	md_error_t	status = mdnullerror;
1907 	md_error_t	*ep = &status;
1908 	char		buf[BUFSIZ];
1909 	char		*p = buf;
1910 	md_set_desc	*sd;
1911 	set_t		max_sets, setno;
1912 	int		err, cumm_err = 0;
1913 	size_t		bufsz;
1914 
1915 	bufsz = sizeof (buf);
1916 	/* reset and parse args */
1917 	optind = 1;
1918 	opterr = 1;
1919 	while ((c = getopt(argc, argv, "Ms:w")) != -1) {
1920 		switch (c) {
1921 		case 'M':
1922 			break;
1923 		case 'w':
1924 			break;
1925 		case 's':
1926 			sname = optarg;
1927 			break;
1928 		default:
1929 			usage(sp, gettext("unknown options"));
1930 		}
1931 	}
1932 
1933 	argc -= optind;
1934 	argv += optind;
1935 
1936 	if (argc > 1)
1937 		usage(sp, gettext("too many args"));
1938 
1939 	/*
1940 	 * If no setname option was used, then withdraw from all disksets
1941 	 * that this node knows about.
1942 	 *
1943 	 * Additional text is added to the error messages during
1944 	 * this section of code in order to help the user understand
1945 	 * why the 'withdraw from all sets' failed and which set caused
1946 	 * the failure.
1947 	 */
1948 
1949 	/*
1950 	 * Hold local set lock throughout this call to keep
1951 	 * other actions from interfering (such as creating a new
1952 	 * set, etc.).
1953 	 */
1954 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1955 		mde_perror(ep, "");
1956 		md_exit(sp, 1);
1957 	}
1958 
1959 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1960 		mde_perror(ep, "");
1961 		md_exit(local_sp, 1);
1962 	}
1963 
1964 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1965 		/*
1966 		 * If no set name is given, then walk through all sets
1967 		 * on this node which could include:
1968 		 * 	- MN disksets
1969 		 *	- traditional disksets
1970 		 *	- non-existent disksets
1971 		 * Attempt to withdraw from the MN disksets.
1972 		 * If the withdraw of one set fails, print out an error
1973 		 * message about that set and continue the walk.
1974 		 */
1975 		if ((max_sets = get_max_sets(ep)) == 0) {
1976 			mde_perror(ep, "");
1977 			md_exit(local_sp, 1);
1978 		}
1979 
1980 		/* Start walking through all possible disksets */
1981 		for (setno = 1; setno < max_sets; setno++) {
1982 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1983 				if (mdiserror(ep, MDE_NO_SET)) {
1984 					/* No set for this setno - continue */
1985 					mdclrerror(ep);
1986 					continue;
1987 				} else {
1988 					(void) sprintf(p, gettext(
1989 					    "Unable to get set %d information"),
1990 					    setno);
1991 					mde_perror(ep, p);
1992 					cumm_err = 1;
1993 					mdclrerror(ep);
1994 					continue;
1995 				}
1996 			}
1997 
1998 			/* If setname is there, set desc should exist. */
1999 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2000 				(void) snprintf(p, bufsz, gettext(
2001 				    "Unable to get set %s desc information"),
2002 				    sp->setname);
2003 				mde_perror(ep, p);
2004 				cumm_err = 1;
2005 				mdclrerror(ep);
2006 				continue;
2007 			}
2008 
2009 			/* Only check MN disksets */
2010 			if (!MD_MNSET_DESC(sd)) {
2011 				continue;
2012 			}
2013 
2014 			/*
2015 			 * Return value of 0 is success.
2016 			 * Return value of -1 means a failure.
2017 			 * Return value of -2 means set could not be
2018 			 * withdrawn from, but this shouldn't cause
2019 			 * an error.  Reasons would be:
2020 			 * 	- no drives in set
2021 			 * 	- node already withdrawn from set
2022 			 * Can't check for all reasons here
2023 			 * since set isn't locked yet across all
2024 			 * nodes in the cluster.  The call
2025 			 * to libmeta routine, meta_set_withdraw, will
2026 			 * lock across the cluster and perform
2027 			 * the checks.
2028 			 */
2029 			if ((err = meta_set_withdraw(sp, ep)) == -1) {
2030 				/* Print error of diskset withdraw failure */
2031 				(void) snprintf(p, bufsz,
2032 				    gettext("Withdraw from diskset %s failed"),
2033 				    sp->setname);
2034 				mde_perror(ep, p);
2035 				mdclrerror(ep);
2036 				cumm_err = 1;
2037 				continue;
2038 			}
2039 
2040 			if (err == -2) {
2041 				mdclrerror(ep);
2042 				continue;
2043 			}
2044 
2045 			mdclrerror(ep);
2046 		}
2047 		md_exit(local_sp, cumm_err);
2048 	}
2049 
2050 
2051 	/*
2052 	 * Code for a specific set is much simpler.
2053 	 * Error messages don't need extra text since specific setname
2054 	 * was used.
2055 	 * Don't need to lock the local set, just the specific set given.
2056 	 */
2057 	if ((sp = metasetname(sname, ep)) == NULL) {
2058 		mde_perror(ep, "");
2059 		md_exit(local_sp, 1);
2060 	}
2061 
2062 	/*
2063 	 * Fail command if meta_set_withdraw returns -1.
2064 	 *
2065 	 * Return of 0 means that node withdrew from set.
2066 	 *
2067 	 * Return of -2 means that node was unable to
2068 	 * withdraw from a set since that set had no drives
2069 	 * or node was not joined to set.  No
2070 	 * need to fail the command for these reasons.
2071 	 */
2072 	if (meta_set_withdraw(sp, ep) == -1) {
2073 		mde_perror(&status, "");
2074 		md_exit(local_sp, 1);
2075 	}
2076 
2077 	md_exit(local_sp, 0);
2078 }
2079 
2080 /*
2081  * Should never be called with sname of a Multinode diskset.
2082  */
2083 static void
2084 parse_cluster(int argc, char **argv)
2085 {
2086 	int			c,
2087 				error,
2088 				new_argc,
2089 				x;
2090 	enum cluster_cmd	cmd = ccnotspecified;
2091 	char			*hostname = SDSSC_PROXY_PRIMARY,
2092 				*argument = NULL,
2093 				*sname = MD_LOCAL_NAME,
2094 				primary_node[SDSSC_NODE_NAME_LEN],
2095 				**new_argv = NULL,
2096 				**np = NULL;
2097 	mdsetname_t		*sp = NULL;
2098 	md_error_t		status = mdnullerror;
2099 	md_error_t		*ep = &status;
2100 
2101 	/* reset and parse args */
2102 	optind = 1;
2103 	opterr = 1;
2104 	while ((c = getopt(argc, argv, "C:s:h:ftu:yr")) != -1) {
2105 		switch (c) {
2106 		case 'C':
2107 			if (cmd != ccnotspecified) {
2108 				md_exit(sp, -1);
2109 			}
2110 			argument = optarg;
2111 
2112 			if (strcmp(argument, "disksin") == 0) {
2113 				cmd = clusterdisksin;
2114 			} else if (strcmp(argument, "version") == 0) {
2115 				cmd = clusterversion;
2116 			} else if (strcmp(argument, "release") == 0) {
2117 				cmd = clusterrelease;
2118 			} else if (strcmp(argument, "take") == 0) {
2119 				cmd = clustertake;
2120 			} else if (strcmp(argument, "proxy") == 0) {
2121 				cmd = clusterproxy;
2122 			} else if (strcmp(argument, "purge") == 0) {
2123 				cmd = clusterpurge;
2124 			} else {
2125 				md_exit(sp, -1);
2126 			}
2127 
2128 			break;
2129 
2130 		case 'h':
2131 			hostname = optarg;
2132 			break;
2133 
2134 		case 's':
2135 			sname = optarg;
2136 			break;
2137 
2138 		case 'f':
2139 		case 't':
2140 		case 'u':
2141 		case 'y':
2142 		case 'r':
2143 			break;
2144 
2145 		default:
2146 			md_exit(sp, -1);
2147 		}
2148 	}
2149 
2150 	/* Now call the appropriate command function. */
2151 	switch (cmd) {
2152 	case clusterversion:
2153 	    printclusterversion();
2154 	    break;
2155 
2156 	case clusterdisksin:
2157 	    if (printdisksin(sname, ep)) {
2158 		md_exit(sp, -1);
2159 	    }
2160 	    break;
2161 
2162 	case clusterrelease:
2163 	    parse_releaseset(argc, argv);
2164 	    break;
2165 
2166 	case clustertake:
2167 	    parse_takeset(argc, argv);
2168 	    break;
2169 
2170 	case clusterproxy:
2171 		/* Should never get here if sname is for MN diskset */
2172 
2173 		if ((new_argv = calloc(argc, sizeof (char *))) == NULL) {
2174 			printf(gettext("Out of memory\n"));
2175 			md_exit(sp, 1);
2176 		}
2177 
2178 		np = new_argv;
2179 		new_argc = 0;
2180 		memset(primary_node, '\0', SDSSC_NODE_NAME_LEN);
2181 
2182 		for (x = 0; x < argc; x++) {
2183 			if (strcmp(argv[x], "-C") == 0) {
2184 
2185 				/*
2186 				 * Need to skip the '-C proxy' args so
2187 				 * just increase x by one and the work is
2188 				 * done.
2189 				 */
2190 				x++;
2191 			} else {
2192 				*np++ = strdup(argv[x]);
2193 				new_argc++;
2194 			}
2195 		}
2196 
2197 		switch (sdssc_get_primary_host(sname, primary_node,
2198 		    SDSSC_NODE_NAME_LEN)) {
2199 		case SDSSC_ERROR:
2200 			md_exit(sp, 1);
2201 			break;
2202 
2203 		case SDSSC_NO_SERVICE:
2204 			if (hostname != SDSSC_PROXY_PRIMARY) {
2205 				(void) strlcpy(primary_node, hostname,
2206 				    SDSSC_NODE_NAME_LEN);
2207 			}
2208 			break;
2209 		}
2210 
2211 		if (sdssc_cmd_proxy(new_argc, new_argv,
2212 		    primary_node[0] == '\0' ? SDSSC_PROXY_PRIMARY :
2213 		    primary_node, &error) == SDSSC_PROXY_DONE) {
2214 			md_exit(sp, error);
2215 		} else {
2216 			printf(gettext(
2217 			    "Couldn't proxy command\n"));
2218 			md_exit(sp, 1);
2219 		}
2220 		break;
2221 
2222 	case clusterpurge:
2223 		parse_purge(argc, argv);
2224 		break;
2225 
2226 	default:
2227 	    break;
2228 	}
2229 
2230 	md_exit(sp, 0);
2231 }
2232 
2233 /*
2234  * parse args and do it
2235  */
2236 int
2237 main(int argc, char *argv[])
2238 {
2239 	enum metaset_cmd	cmd = notspecified;
2240 	md_error_t		status = mdnullerror;
2241 	md_error_t		*ep = &status;
2242 	mdsetname_t		*sp = NULL;
2243 	char			*hostname = SDSSC_PROXY_PRIMARY,
2244 				*sname = MD_LOCAL_NAME,
2245 				*auto_take_option = NULL,
2246 				primary_node[SDSSC_NODE_NAME_LEN];
2247 	int			error,
2248 				c,
2249 				auto_take = FALSE,
2250 				stat;
2251 	md_set_desc		*sd;
2252 	int			mflag = 0;
2253 	int			multi_node = 0;
2254 	rval_e			sdssc_res;
2255 
2256 	/*
2257 	 * Get the locale set up before calling any other routines
2258 	 * with messages to ouput.  Just in case we're not in a build
2259 	 * environment, make sure that TEXT_DOMAIN gets set to
2260 	 * something.
2261 	 */
2262 #if !defined(TEXT_DOMAIN)
2263 #define	TEXT_DOMAIN "SYS_TEST"
2264 #endif
2265 	(void) setlocale(LC_ALL, "");
2266 	(void) textdomain(TEXT_DOMAIN);
2267 
2268 	sdssc_res = sdssc_bind_library();
2269 	if (sdssc_res == SDSSC_ERROR) {
2270 		printf(gettext(
2271 		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
2272 		exit(1);
2273 	}
2274 
2275 	/* initialize */
2276 	if (md_init(argc, argv, 0, 1, ep) != 0) {
2277 		mde_perror(ep, "");
2278 		md_exit(sp, 1);
2279 	}
2280 
2281 	optind = 1;
2282 	opterr = 1;
2283 
2284 	/*
2285 	 * NOTE: The "C" option is strictly for cluster use. it is not
2286 	 * and should not be documented for the customer. - JST
2287 	 */
2288 	while ((c = getopt(argc, argv, "C:MaA:bdfh:jl:Lm:oPqrs:tu:wy?"))
2289 	    != -1) {
2290 		switch (c) {
2291 		case 'M':
2292 			mflag = 1;
2293 			break;
2294 		case 'A':
2295 			auto_take = TRUE;
2296 			if (optarg == NULL || !(strcmp(optarg, "enable") == 0 ||
2297 			    strcmp(optarg, "disable") == 0))
2298 				usage(sp, gettext(
2299 				    "-A: enable or disable must be specified"));
2300 			auto_take_option = optarg;
2301 			break;
2302 		case 'a':
2303 			if (cmd != notspecified) {
2304 				usage(sp, gettext(
2305 				    "conflicting options"));
2306 			}
2307 			cmd = add;
2308 			break;
2309 		case 'b':
2310 			if (cmd != notspecified) {
2311 				usage(sp, gettext(
2312 				    "conflicting options"));
2313 			}
2314 			cmd = balance;
2315 			break;
2316 		case 'd':
2317 			if (cmd != notspecified) {
2318 				usage(sp, gettext(
2319 				    "conflicting options"));
2320 			}
2321 			cmd = delete;
2322 			break;
2323 		case 'C':	/* cluster commands */
2324 			if (cmd != notspecified) {
2325 				md_exit(sp, -1);    /* conflicting options */
2326 			}
2327 			cmd = cluster;
2328 			break;
2329 		case 'f':
2330 			break;
2331 		case 'h':
2332 			hostname = optarg;
2333 			break;
2334 		case 'j':
2335 			if (cmd != notspecified) {
2336 				usage(sp, gettext(
2337 				    "conflicting options"));
2338 			}
2339 			cmd = join;
2340 			break;
2341 		case 'l':
2342 			break;
2343 		case 'L':
2344 			break;
2345 		case 'm':
2346 			break;
2347 		case 'o':
2348 			if (cmd != notspecified) {
2349 				usage(sp, gettext(
2350 				    "conflicting options"));
2351 			}
2352 			cmd = isowner;
2353 			break;
2354 		case 'P':
2355 			if (cmd != notspecified) {
2356 				usage(sp, gettext(
2357 				    "conflicting options"));
2358 			}
2359 			cmd = purge;
2360 			break;
2361 		case 'q':
2362 			if (cmd != notspecified) {
2363 				usage(sp, gettext(
2364 				    "conflicting options"));
2365 			}
2366 			cmd = query;
2367 			break;
2368 		case 'r':
2369 			if (cmd != notspecified) {
2370 				usage(sp, gettext(
2371 				    "conflicting options"));
2372 			}
2373 			cmd = release;
2374 			break;
2375 		case 's':
2376 			sname = optarg;
2377 			break;
2378 		case 't':
2379 			if (cmd != notspecified) {
2380 				usage(sp, gettext(
2381 				    "conflicting options"));
2382 			}
2383 			cmd = take;
2384 			break;
2385 		case 'u':
2386 			break;
2387 		case 'w':
2388 			if (cmd != notspecified) {
2389 				usage(sp, gettext(
2390 				    "conflicting options"));
2391 			}
2392 			cmd = withdraw;
2393 			break;
2394 		case 'y':
2395 			break;
2396 		case '?':
2397 			if (optopt == '?')
2398 				usage(sp, NULL);
2399 			/*FALLTHROUGH*/
2400 		default:
2401 			if (cmd == cluster) {    /* cluster is silent */
2402 				md_exit(sp, -1);
2403 			} else {
2404 				usage(sp, gettext(
2405 				    "unknown command"));
2406 			}
2407 		}
2408 	}
2409 
2410 	/* check if suncluster is installed and -A enable specified */
2411 	if (auto_take && sdssc_res != SDSSC_NOT_BOUND &&
2412 	    strcmp(auto_take_option, "enable") == 0) {
2413 	    md_eprintf(gettext(
2414 		"cannot enable auto-take when SunCluster is installed\n"));
2415 	    md_exit(sp, 1);
2416 	}
2417 
2418 	/*
2419 	 * At this point we know that if the -A enable option is specified
2420 	 * for an auto-take diskset that SC is not installed on the machine, so
2421 	 * all of the sdssc calls will just be no-ops.
2422 	 */
2423 
2424 	/* list sets */
2425 	if (cmd == notspecified && auto_take == FALSE) {
2426 		parse_printset(argc, argv);
2427 		/*NOTREACHED*/
2428 	}
2429 
2430 	if (meta_check_root(ep) != 0) {
2431 		mde_perror(ep, "");
2432 		md_exit(sp, 1);
2433 	}
2434 
2435 	/* snarf MDDB */
2436 	if (meta_setup_db_locations(ep) != 0) {
2437 		mde_perror(ep, "");
2438 		md_exit(sp, 1);
2439 	}
2440 
2441 	/*
2442 	 * If sname is a diskset - check for multi_node.
2443 	 * It is possible for sname to not exist.
2444 	 */
2445 	if (strcmp(sname, MD_LOCAL_NAME)) {
2446 		if ((sp = metasetname(sname, ep)) != NULL) {
2447 			/* Set exists - check for MN diskset */
2448 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2449 				mde_perror(ep, "");
2450 				md_exit(sp, 1);
2451 			}
2452 			if (MD_MNSET_DESC(sd)) {
2453 				/*
2454 				 * If a MN diskset always set multi_node
2455 				 * regardless of whether the -M option was
2456 				 * used or not (mflag).
2457 				 */
2458 				multi_node = 1;
2459 			} else {
2460 				/*
2461 				 * If a traditional diskset, mflag must
2462 				 * not be set.
2463 				 */
2464 				if (mflag) {
2465 					usage(sp, gettext(
2466 					    "-M option only allowed "
2467 					    "on multi-owner diskset"));
2468 				}
2469 			}
2470 		} else {
2471 			/*
2472 			 * Set name does not exist, set multi_node
2473 			 * based on -M option.
2474 			 */
2475 			if (mflag) {
2476 				multi_node = 1;
2477 			}
2478 		}
2479 	}
2480 
2481 	if (auto_take && multi_node) {
2482 		/* Can't mix multinode and auto-take on a diskset */
2483 		usage(sp,
2484 		    gettext("-A option not allowed on multi-owner diskset"));
2485 	}
2486 
2487 	/*
2488 	 * MN disksets don't use DCS clustering services, so
2489 	 * do not get primary_node for MN diskset since no command
2490 	 * proxying is done to Primary cluster node.  Do not proxy
2491 	 * MN diskset commands of join and withdraw when issued without
2492 	 * a valid setname.
2493 	 * For traditional disksets: proxy all commands except a take
2494 	 * and release.  Use first host listed as the host to send the
2495 	 * command to if there isn't already a primary
2496 	 */
2497 	if (strcmp(sname, MD_LOCAL_NAME) && (multi_node == 0) &&
2498 	    (cmd != take) && (cmd != release) &&
2499 	    (cmd != cluster) && (cmd != join) &&
2500 	    (cmd != withdraw) && (cmd != purge)) {
2501 		stat = sdssc_get_primary_host(sname, primary_node,
2502 		    SDSSC_NODE_NAME_LEN);
2503 		switch (stat) {
2504 			case SDSSC_ERROR:
2505 				return (0);
2506 
2507 			case SDSSC_NO_SERVICE:
2508 				if (hostname != SDSSC_PROXY_PRIMARY) {
2509 					(void) strlcpy(primary_node, hostname,
2510 					    SDSSC_NODE_NAME_LEN);
2511 				} else {
2512 					memset(primary_node, '\0',
2513 					    SDSSC_NODE_NAME_LEN);
2514 				}
2515 				break;
2516 		}
2517 
2518 		/*
2519 		 * We've got a complicated decision here regarding
2520 		 * the hostname. If we didn't get a primary host
2521 		 * and a host name wasn't supplied on the command line
2522 		 * then we need to revert to SDSSC_PROXY_PRIMARY. Otherwise
2523 		 * use what's been found.
2524 		 */
2525 		if (sdssc_cmd_proxy(argc, argv,
2526 		    primary_node[0] == '\0' ?
2527 			SDSSC_PROXY_PRIMARY : primary_node,
2528 		    &error) == SDSSC_PROXY_DONE) {
2529 			exit(error);
2530 		}
2531 	}
2532 
2533 	/* cluster-specific commands */
2534 	if (cmd == cluster) {
2535 		if (multi_node) {
2536 			/*
2537 			 * If a specific MN diskset is given, immediately
2538 			 * fail -C command.
2539 			 */
2540 			usage(sp, gettext(
2541 			    "-C option not allowed on multi-owner diskset"));
2542 		} else {
2543 			parse_cluster(argc, argv);
2544 			/*NOTREACHED*/
2545 		}
2546 	}
2547 
2548 	/* join MultiNode diskset */
2549 	if (cmd == join) {
2550 		/*
2551 		 * If diskset specified, verify that it exists
2552 		 * and is a multinode diskset.
2553 		 */
2554 		if (strcmp(sname, MD_LOCAL_NAME)) {
2555 			if ((sp = metasetname(sname, ep)) == NULL) {
2556 				mde_perror(ep, "");
2557 				md_exit(sp, 1);
2558 			}
2559 
2560 			if (!multi_node) {
2561 				usage(sp, gettext(
2562 				    "-j option only allowed on "
2563 				    "multi-owner diskset"));
2564 			}
2565 		}
2566 		/*
2567 		 * Start mddoors daemon here.
2568 		 * mddoors itself takes care there will be only one
2569 		 * instance running, so starting it twice won't hurt
2570 		 */
2571 		pclose(popen("/usr/lib/lvm/mddoors", "w"));
2572 		parse_joinset(argc, argv);
2573 		/*NOTREACHED*/
2574 	}
2575 
2576 	/* withdraw from MultiNode diskset */
2577 	if (cmd == withdraw) {
2578 		/*
2579 		 * If diskset specified, verify that it exists
2580 		 * and is a multinode diskset.
2581 		 */
2582 		if (strcmp(sname, MD_LOCAL_NAME)) {
2583 			if ((sp = metasetname(sname, ep)) == NULL) {
2584 				mde_perror(ep, "");
2585 				md_exit(sp, 1);
2586 			}
2587 
2588 			if (!multi_node) {
2589 				usage(sp, gettext(
2590 				    "-w option only allowed on "
2591 				    "multi-owner diskset"));
2592 			}
2593 		}
2594 		parse_withdrawset(argc, argv);
2595 		/*NOTREACHED*/
2596 	}
2597 
2598 	/* must have set for everything else */
2599 	if (strcmp(sname, MD_LOCAL_NAME) == 0)
2600 		usage(sp, gettext("setname must be specified"));
2601 
2602 	/* add hosts or drives */
2603 	if (cmd == add) {
2604 		/*
2605 		 * In the multi node case start mddoors daemon.
2606 		 * mddoors itself takes care there will be
2607 		 * only one instance running, so starting it twice won't hurt
2608 		 */
2609 		if (multi_node) {
2610 			pclose(popen("/usr/lib/lvm/mddoors", "w"));
2611 		}
2612 
2613 		parse_add(argc, argv);
2614 		/*NOTREACHED*/
2615 	}
2616 
2617 	/* re-balance the replicas */
2618 	if (cmd == balance) {
2619 		parse_balance(argc, argv);
2620 		/*NOTREACHED*/
2621 	}
2622 
2623 	/* delete hosts or drives */
2624 	if (cmd == delete) {
2625 		parse_del(argc, argv);
2626 		/*NOTREACHED*/
2627 	}
2628 
2629 	/* check ownership */
2630 	if (cmd == isowner) {
2631 		parse_isowner(argc, argv);
2632 		/*NOTREACHED*/
2633 	}
2634 
2635 	/* purge the diskset */
2636 	if (cmd == purge) {
2637 		parse_purge(argc, argv);
2638 		/*NOTREACHED*/
2639 	}
2640 
2641 	/* query for data marks */
2642 	if (cmd == query) {
2643 		parse_query(argc, argv);
2644 		/*NOTREACHED*/
2645 	}
2646 
2647 	/* release ownership */
2648 	if (cmd == release) {
2649 		if (multi_node) {
2650 			/* Can't release multinode diskset */
2651 			usage(sp, gettext(
2652 			    "-r option not allowed on multi-owner diskset"));
2653 		} else {
2654 			parse_releaseset(argc, argv);
2655 			/*NOTREACHED*/
2656 		}
2657 	}
2658 
2659 	/* take ownership */
2660 	if (cmd == take) {
2661 		if (multi_node) {
2662 			/* Can't take multinode diskset */
2663 			usage(sp, gettext(
2664 			    "-t option not allowed on multi-owner diskset"));
2665 		} else {
2666 			parse_takeset(argc, argv);
2667 			/*NOTREACHED*/
2668 		}
2669 	}
2670 
2671 	/* take ownership of auto-take sets */
2672 	if (auto_take) {
2673 		parse_autotake(argc, argv);
2674 		/*NOTREACHED*/
2675 	}
2676 
2677 	/*NOTREACHED*/
2678 	return (0);
2679 }
2680