xref: /titanic_41/usr/src/cmd/lvm/util/metaset.c (revision 0b6016e6ff70af39f99c9cc28e0c2207c8f5413c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Metadevice diskset utility.
30  */
31 
32 #include <meta.h>
33 #include <sys/lvm/md_mddb.h>
34 #include <sdssc.h>
35 
36 enum metaset_cmd {
37 	notspecified,
38 	add,
39 	balance,
40 	delete,
41 	cluster,
42 	isowner,
43 	purge,
44 	query,
45 	release,
46 	take,
47 	join,			/* Join a multinode diskset */
48 	withdraw		/* Withdraw from a multinode diskset */
49 };
50 
51 enum cluster_cmd {
52 	ccnotspecified,
53 	clusterversion,		/* Return the version of the cluster I/F */
54 	clusterdisksin,		/* List disks in a given diskset */
55 	clustertake,		/* back door for Cluster take */
56 	clusterrelease,		/* ditto */
57 	clusterpurge,		/* back door for Cluster purge */
58 	clusterproxy		/* proxy the args after '--' to primary */
59 };
60 
61 static void
62 usage(
63 	mdsetname_t	*sp,
64 	char		*string)
65 {
66 	if ((string != NULL) && (*string != '\0'))
67 		md_eprintf("%s\n", string);
68 	(void) fprintf(stderr, gettext(
69 "usage:	%s -s setname -a [-A enable | disable] -h hostname ...\n"
70 "	%s -s setname -a [-M] -h hostname ...\n"
71 "	%s -s setname -a [-M] [-l length] [-L] drivename ...\n"
72 "	%s -s setname -d [-M] -h hostname ...\n"
73 "	%s -s setname -d [-M] -f -h all-hostnames\n"
74 "	%s -s setname -d [-M] [-f] drivename ...\n"
75 "	%s -s setname -d [-M] [-f] hostname ...\n"
76 "	%s -s setname -A enable | disable\n"
77 "	%s -s setname -t [-f]\n"
78 "	%s -s setname -r\n"
79 "	%s [-s setname] -j [-M]\n"
80 "	%s [-s setname] -w [-M]\n"
81 "	%s -s setname -P [-M]\n"
82 "	%s -s setname -b [-M]\n"
83 "	%s -s setname -o [-M] [-h hostname]\n"
84 "	%s [-s setname]\n"
85 "\n"
86 "		hostname = contents of /etc/nodename\n"
87 "		drivename = cNtNdN no slice\n"
88 "		[-M] for multi-owner set is optional except on set creation\n"),
89 	myname, myname, myname, myname, myname, myname, myname, myname,
90 	myname, myname, myname, myname, myname, myname, myname, myname);
91 	md_exit(sp, (string == NULL) ? 0 : 1);
92 }
93 
94 /*
95  * The svm.sync rc script relies heavily on the metaset output.
96  * Any changes to the metaset output MUST verify that the rc script
97  * does not break. Not doing so may potentially leave the system
98  * unusable. You have been WARNED.
99  */
100 static int
101 printset(mdsetname_t *sp, md_error_t *ep)
102 {
103 	int			i, j;
104 	md_set_desc		*sd;
105 	md_drive_desc		*dd, *p;
106 	int			max_meds;
107 	md_mnnode_desc		*nd;
108 
109 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
110 		return (-1);
111 
112 	/*
113 	 * Only get set owner information for traditional diskset.
114 	 * This set owner information is stored in the node records
115 	 * for a MN diskset.
116 	 */
117 	if (!(MD_MNSET_DESC(sd))) {
118 		if (metaget_setownership(sp, ep) == -1)
119 			return (-1);
120 	}
121 
122 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
123 	    ep)) == NULL) && !mdisok(ep))
124 		return (-1);
125 
126 	if (MD_MNSET_DESC(sd)) {
127 		(void) printf(gettext(
128 		"\nMulti-owner Set name = %s, Set number = %d, Master = %s\n"),
129 		    sp->setname, sp->setno, sd->sd_mn_master_nodenm);
130 		if ((sd->sd_mn_master_nodeid == MD_MN_INVALID_NID) &&
131 		    (dd != NULL)) {
132 			(void) printf(gettext(
133 				"Master and owner information unavailable "
134 				"until joined (metaset -j)\n"));
135 		}
136 	} else {
137 		(void) printf(gettext(
138 		    "\nSet name = %s, Set number = %d\n"),
139 		    sp->setname, sp->setno);
140 	}
141 
142 	if (MD_MNSET_DESC(sd)) {
143 		(void) printf(gettext("\n%-19.19s %-14.14s %-6.6s\n"),
144 			gettext("Host"), gettext("Owner"), gettext("Member"));
145 		nd = sd->sd_nodelist;
146 		while (nd) {
147 			/*
148 			 * Don't print nodes that aren't ok since they may be
149 			 * removed from config during a reconfig cycle.  If a
150 			 * node was being added to a diskset and the entire
151 			 * cluster went down but the node being added was unable
152 			 * to reboot, there's no way to know if that node had
153 			 * its own node record set to OK or not.  So, node
154 			 * record is left in ADD state during reconfig cycle.
155 			 * When that node reboots and returns to the cluster,
156 			 * the reconfig cycle will either remove the node
157 			 * record (if not marked OK on that node) or will mark
158 			 * it OK on all nodes.
159 			 * It is very important to only remove a node record
160 			 * from the other nodes when that node record is not
161 			 * marked OK on its own node - otherwise, different
162 			 * nodes would have different nodelists possibly
163 			 * causing different nodes to to choose different
164 			 * masters.
165 			 */
166 			if (!(nd->nd_flags & MD_MN_NODE_OK)) {
167 				nd = nd->nd_next;
168 				continue;
169 			}
170 			if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
171 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
172 				(void) printf(
173 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
174 				    nd->nd_nodename, gettext("multi-owner"),
175 				    gettext("Yes"));
176 			} else /* Should never be able to happen */
177 			    if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
178 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
179 				(void) printf(
180 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
181 				    nd->nd_nodename, gettext("multi-owner"),
182 				    gettext("No"));
183 			} else if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
184 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
185 				(void) printf(
186 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
187 				    nd->nd_nodename, gettext(""),
188 				    gettext("Yes"));
189 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
190 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
191 				(void) printf(
192 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
193 				    nd->nd_nodename, gettext(""),
194 				    gettext("No"));
195 			}
196 			nd = nd->nd_next;
197 		}
198 	} else {
199 		(void) printf("\n%-19.19s %-5.5s\n",
200 			gettext("Host"), gettext("Owner"));
201 		for (i = 0; i < MD_MAXSIDES; i++) {
202 			/* Skip empty slots */
203 			if (sd->sd_nodes[i][0] == '\0')
204 				continue;
205 
206 			/*
207 			 * Standard hostname field is 17 bytes but metaset will
208 			 * display up to MD_MAX_NODENAME, def in meta_basic.h
209 			 */
210 			(void) printf("  %-17.*s  %s\n", MD_MAX_NODENAME,
211 			    sd->sd_nodes[i], (sd->sd_flags & MD_SR_AUTO_TAKE ?
212 				(sd->sd_isown[i] ? gettext("Yes (auto)") :
213 				    gettext("No (auto)"))
214 				: (sd->sd_isown[i] ? gettext("Yes") : "")));
215 		}
216 	}
217 
218 	if (sd->sd_med.n_cnt > 0)
219 		(void) printf("\n%-19.19s %-7.7s\n",
220 		    gettext("Mediator Host(s)"), gettext("Aliases"));
221 
222 	if ((max_meds = get_max_meds(ep)) == 0)
223 		return (-1);
224 
225 	for (i = 0; i < max_meds; i++) {
226 		if (sd->sd_med.n_lst[i].a_cnt == 0)
227 			continue;
228 		(void) printf("  %-17.17s   ", sd->sd_med.n_lst[i].a_nm[0]);
229 		for (j = 1; j < sd->sd_med.n_lst[i].a_cnt; j++) {
230 			(void) printf("%s", sd->sd_med.n_lst[i].a_nm[j]);
231 			if (sd->sd_med.n_lst[i].a_cnt - j > 1)
232 				(void) printf(gettext(", "));
233 		}
234 		(void) printf("\n");
235 	}
236 
237 	if (dd) {
238 		int	len = 0;
239 
240 
241 		/*
242 		 * Building a format string on the fly that will
243 		 * be used in (f)printf. This allows the length
244 		 * of the ctd to vary from small to large without
245 		 * looking horrible.
246 		 */
247 		for (p = dd; p != NULL; p = p->dd_next)
248 			len = max(len, strlen(p->dd_dnp->cname));
249 
250 		len += 2;
251 		(void) printf("\n%-*.*s %-5.5s\n", len, len,
252 		    gettext("Drive"),
253 		    gettext("Dbase"));
254 		for (p = dd; p != NULL; p = p->dd_next) {
255 			(void) printf("\n%-*.*s %-5.5s\n", len, len,
256 			    p->dd_dnp->cname,
257 			    (p->dd_dbcnt ? gettext("Yes") :
258 			    gettext("No")));
259 		}
260 	}
261 
262 	return (0);
263 }
264 
265 static int
266 printsets(mdsetname_t *sp, md_error_t *ep)
267 {
268 	int			i;
269 	mdsetname_t		*sp1;
270 	set_t			max_sets;
271 
272 	/*
273 	 * print setname given.
274 	 */
275 	if (! metaislocalset(sp)) {
276 		if (printset(sp, ep))
277 			return (-1);
278 		return (0);
279 	}
280 
281 	if ((max_sets = get_max_sets(ep)) == 0)
282 		return (-1);
283 
284 	/*
285 	 * Print all known sets
286 	 */
287 	for (i = 1; i < max_sets; i++) {
288 		if ((sp1 = metasetnosetname(i, ep)) == NULL) {
289 			if (! mdiserror(ep, MDE_NO_SET))
290 				break;
291 			mdclrerror(ep);
292 			continue;
293 		}
294 
295 		if (printset(sp1, ep))
296 			break;
297 	}
298 	if (! mdisok(ep))
299 		return (-1);
300 
301 	return (0);
302 }
303 
304 /*
305  * Print the current versionn of the cluster contract private interface.
306  */
307 static void
308 printclusterversion()
309 {
310 	printf("%s\n", METASETIFVERSION);
311 }
312 
313 /*
314  * Print the disks that make up the given disk set. This is used
315  * exclusively by Sun Cluster and is contract private.
316  * Should never be called with sname of a Multinode diskset.
317  */
318 static int
319 printdisksin(char *sname, md_error_t *ep)
320 {
321 	mdsetname_t	*sp;
322 	md_drive_desc	*dd, *p;
323 
324 	if ((sp = metasetname(sname, ep)) == NULL) {
325 
326 		/*
327 		 * During a deletion of a set the associated service is
328 		 * put offline. The SC3.0 reservation code calls disksuite
329 		 * to find a list of disks associated with the set so that
330 		 * it can release the reservation on those disks. In this
331 		 * case there won't be any disks or even a set left. So just
332 		 * return.
333 		 */
334 		return (0);
335 	}
336 
337 	if (metaget_setownership(sp, ep) == -1)
338 		return (-1);
339 
340 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
341 	    ep)) == NULL) && !mdisok(ep))
342 		return (-1);
343 
344 	for (p = dd; p != NULL; p = p->dd_next)
345 		(void) printf("%s\n", p->dd_dnp->rname);
346 
347 	return (0);
348 }
349 
350 static void
351 parse_printset(int argc, char **argv)
352 {
353 	int		c;
354 	mdsetname_t	*sp = NULL;
355 	char		*sname = MD_LOCAL_NAME;
356 	md_error_t	status = mdnullerror;
357 	md_error_t	*ep = &status;
358 
359 	/* reset and parse args */
360 	optind = 1;
361 	opterr = 1;
362 	while ((c = getopt(argc, argv, "s:")) != -1) {
363 		switch (c) {
364 		case 's':
365 			sname = optarg;
366 			break;
367 		default:
368 			usage(sp, gettext("unknown options"));
369 		}
370 	}
371 
372 	argc -= optind;
373 	argv += optind;
374 
375 	if (argc != 0)
376 		usage(sp, gettext("too many args"));
377 
378 	if ((sp = metasetname(sname, ep)) == NULL) {
379 		mde_perror(ep, "");
380 		md_exit(sp, 1);
381 	}
382 
383 	if (printsets(sp, ep) && !mdiserror(ep, MDE_SMF_NO_SERVICE)) {
384 		mde_perror(ep, "");
385 		md_exit(sp, 1);
386 	}
387 
388 	if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) {
389 		mde_perror(ep, "");
390 		md_exit(sp, 1);
391 	}
392 
393 	md_exit(sp, 0);
394 }
395 
396 static void
397 parse_add(int argc, char **argv)
398 {
399 	int			c,
400 				created_set,
401 				hosts = FALSE,
402 				meds = FALSE,
403 				auto_take = FALSE,
404 				force_label = FALSE,
405 				default_size = TRUE;
406 	mdsetname_t		*sp = NULL;
407 	char			*sname = MD_LOCAL_NAME;
408 	md_error_t		status = mdnullerror,
409 				*ep = &status;
410 	mddrivenamelist_t	*dnlp = NULL;
411 	mddrivenamelist_t	*p;
412 	daddr_t			dbsize,
413 				nblks;
414 	mdsetname_t		*local_sp = NULL;
415 	int			multi_node = 0;
416 	md_set_desc		*sd;
417 	rval_e			sdssc_rval;
418 
419 	/* reset and parse args */
420 	optind = 1;
421 	opterr = 1;
422 	while ((c = getopt(argc, argv, "MaA:hl:Lms:")) != -1) {
423 		switch (c) {
424 		case 'M':
425 			multi_node = 1;
426 			break;
427 		case 'A':
428 			/* verified sub-option in main */
429 			if (strcmp(optarg, "enable") == 0)
430 				auto_take = TRUE;
431 			break;
432 		case 'a':
433 			break;
434 		case 'h':
435 		case 'm':
436 			if (meds == TRUE || hosts == TRUE)
437 				usage(sp, gettext(
438 				    "only one -m or -h option allowed"));
439 
440 			if (default_size == FALSE || force_label == TRUE)
441 				usage(sp, gettext(
442 				    "conflicting options"));
443 
444 			if (c == 'h')
445 				hosts = TRUE;
446 			else
447 				meds = TRUE;
448 			break;
449 		case 'l':
450 			if (hosts == TRUE || meds == TRUE)
451 				usage(sp, gettext(
452 				    "conflicting options"));
453 			if (sscanf(optarg, "%ld", &dbsize) != 1) {
454 				md_eprintf(gettext(
455 				    "%s: bad format\n"), optarg);
456 				usage(sp, "");
457 			}
458 
459 			default_size = FALSE;
460 			break;
461 		case 'L':
462 			/* Same criteria as -l */
463 			if (hosts == TRUE || meds == TRUE)
464 				usage(sp, gettext(
465 				    "conflicting options"));
466 			force_label = TRUE;
467 			break;
468 		case 's':
469 			sname = optarg;
470 			break;
471 		default:
472 			usage(sp, gettext(
473 			    "unknown options"));
474 		}
475 	}
476 
477 	/* Can only use -A enable when creating the single-node set */
478 	if (auto_take && hosts != TRUE)
479 		usage(sp, gettext("conflicting options"));
480 
481 	argc -= optind;
482 	argv += optind;
483 
484 	/*
485 	 * Add hosts
486 	 */
487 	if (hosts == TRUE) {
488 
489 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
490 			mde_perror(ep, "");
491 			md_exit(local_sp, 1);
492 		}
493 
494 		if (meta_lock(local_sp, TRUE, ep) != 0) {
495 			mde_perror(ep, "");
496 			md_exit(local_sp, 1);
497 		}
498 
499 		/*
500 		 * Keep track of Cluster set creation. Need to complete
501 		 * the transaction no matter if the set was created or not.
502 		 */
503 		created_set = 0;
504 
505 		/*
506 		 * Have no set, cannot take the lock, so only take the
507 		 * local lock.
508 		 */
509 		if ((sp = metasetname(sname, ep)) == NULL) {
510 			sdssc_rval = 0;
511 			if (multi_node) {
512 				/*
513 				 * When running on a cluster system that
514 				 * does not support MN disksets, the routine
515 				 * sdssc_mo_create_begin will be bound
516 				 * to the SVM routine not_bound_error
517 				 * which returns SDSSC_NOT_BOUND_ERROR.
518 				 *
519 				 * When running on a cluster system that
520 				 * does support MN disksets, the routine
521 				 * sdssc_mo_create_begin will be bound to
522 				 * the sdssc_mo_create_begin routine in
523 				 * library libsdssc_so.  A call to
524 				 * sdssc_mo_create_begin will return with
525 				 * either SDSSC_ERROR or SDSSC_OKAY. If
526 				 * an SDSSC_OKAY is returned, then the
527 				 * cluster framework has allocated a
528 				 * set number for this new set that is unique
529 				 * across traditional and MN disksets.
530 				 * Libmeta will get this unique set number
531 				 * by calling sdssc_get_index.
532 				 *
533 				 * When running on a non-cluster system,
534 				 * the routine sdssc_mo_create_begin
535 				 * will be bound to the SVM routine
536 				 * not_bound which returns SDSSC_NOT_BOUND.
537 				 * In this case, all sdssc routines will
538 				 * return SDSSC_NOT_BOUND.  No need to check
539 				 * for return value of SDSSC_NOT_BOUND since
540 				 * the libmeta call to get the set number
541 				 * (sdssc_get_index) will also fail with
542 				 * SDSSC_NOT_BOUND causing libmeta to
543 				 * determine its own set number.
544 				 */
545 				sdssc_rval = sdssc_mo_create_begin(sname, argc,
546 					argv, SDSSC_PICK_SETNO);
547 				if (sdssc_rval == SDSSC_NOT_BOUND_ERROR) {
548 					mderror(ep, MDE_NOT_MN, NULL);
549 					mde_perror(ep,
550 					"Cluster node does not support "
551 					"multi-owner diskset operations");
552 					md_exit(local_sp, 1);
553 				} else if (sdssc_rval == SDSSC_ERROR) {
554 					mde_perror(ep, "");
555 					md_exit(local_sp, 1);
556 				}
557 			} else {
558 				sdssc_rval = sdssc_create_begin(sname, argc,
559 					argv, SDSSC_PICK_SETNO);
560 				if (sdssc_rval == SDSSC_ERROR) {
561 					mde_perror(ep, "");
562 					md_exit(local_sp, 1);
563 				}
564 			}
565 			/*
566 			 * Created diskset (as opposed to adding a
567 			 * host to an existing diskset).
568 			 */
569 			created_set = 1;
570 
571 			sp = Zalloc(sizeof (*sp));
572 			sp->setname = Strdup(sname);
573 			sp->lockfd = MD_NO_LOCK;
574 			mdclrerror(ep);
575 		} else {
576 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
577 				mde_perror(ep, "");
578 				md_exit(local_sp, 1);
579 			}
580 			if (MD_MNSET_DESC(sd)) {
581 				multi_node = 1;
582 			}
583 
584 			/*
585 			 * can't add hosts to an existing set & enable
586 			 * auto-take
587 			 */
588 			if (auto_take)
589 				usage(sp, gettext("conflicting options"));
590 
591 			/*
592 			 * Have a valid set, take the set lock also.
593 			 *
594 			 * A MN diskset does not use the set meta_lock but
595 			 * instead uses the clnt_lock of rpc.metad and the
596 			 * suspend/resume feature of the rpc.mdcommd.  Can't
597 			 * use set meta_lock since class 1 messages are
598 			 * grabbing this lock and if this thread is holding
599 			 * the set meta_lock then no rpc.mdcommd suspend
600 			 * can occur.
601 			 */
602 			if (!multi_node) {
603 				if (meta_lock(sp, TRUE, ep) != 0) {
604 					mde_perror(ep, "");
605 					md_exit(local_sp, 1);
606 				}
607 			}
608 		}
609 
610 		if (meta_set_addhosts(sp, multi_node, argc, argv, auto_take,
611 		    ep)) {
612 			if (created_set)
613 				sdssc_create_end(sname, SDSSC_CLEANUP);
614 			mde_perror(&status, "");
615 			if (!multi_node)
616 				(void) meta_unlock(sp, ep);
617 			md_exit(local_sp, 1);
618 		}
619 
620 		if (created_set)
621 			sdssc_create_end(sname, SDSSC_COMMIT);
622 
623 		else {
624 			/*
625 			 * If adding hosts to existing diskset,
626 			 * call DCS svcs
627 			 */
628 			sdssc_add_hosts(sname, argc, argv);
629 		}
630 		if (!multi_node)
631 			(void) meta_unlock(sp, ep);
632 		md_exit(local_sp, 0);
633 	}
634 
635 	/*
636 	 * Add mediators
637 	 */
638 	if (meds == TRUE) {
639 
640 		if ((sp = metasetname(sname, ep)) == NULL) {
641 			mde_perror(ep, "");
642 			md_exit(local_sp, 1);
643 		}
644 
645 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
646 			mde_perror(ep, "");
647 			md_exit(local_sp, 1);
648 		}
649 
650 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
651 			mde_perror(ep, "");
652 			md_exit(local_sp, 1);
653 		}
654 		if (MD_MNSET_DESC(sd)) {
655 			multi_node = 1;
656 		}
657 
658 		if (meta_lock(local_sp, TRUE, ep) != 0) {
659 			mde_perror(ep, "");
660 			md_exit(local_sp, 1);
661 		}
662 		/*
663 		 * A MN diskset does not use the set meta_lock but
664 		 * instead uses the clnt_lock of rpc.metad and the
665 		 * suspend/resume feature of the rpc.mdcommd.  Can't
666 		 * use set meta_lock since class 1 messages are
667 		 * grabbing this lock and if this thread is holding
668 		 * the set meta_lock then no rpc.mdcommd suspend
669 		 * can occur.
670 		 */
671 		if (!multi_node) {
672 			if (meta_lock(sp, TRUE, ep) != 0) {
673 				mde_perror(ep, "");
674 				md_exit(local_sp, 1);
675 			}
676 		}
677 
678 		if (meta_set_addmeds(sp, argc, argv, ep)) {
679 			mde_perror(&status, "");
680 			if (!multi_node)
681 				(void) meta_unlock(sp, ep);
682 			md_exit(local_sp, 1);
683 		}
684 
685 		if (!multi_node)
686 			(void) meta_unlock(sp, ep);
687 		md_exit(local_sp, 0);
688 	}
689 
690 	/*
691 	 * Add drives
692 	 */
693 	if ((sp = metasetname(sname, ep)) == NULL) {
694 		mde_perror(ep, "");
695 		md_exit(local_sp, 1);
696 	}
697 
698 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
699 		mde_perror(ep, "");
700 		md_exit(local_sp, 1);
701 	}
702 
703 	/* Determine if diskset is a MN diskset or not */
704 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
705 		mde_perror(ep, "");
706 		md_exit(local_sp, 1);
707 	}
708 	if (MD_MNSET_DESC(sd)) {
709 		multi_node = 1;
710 	}
711 
712 	if (meta_lock(local_sp, TRUE, ep) != 0) {
713 		mde_perror(ep, "");
714 		md_exit(local_sp, 1);
715 	}
716 
717 	/* Make sure database size is within limits */
718 	if (default_size == FALSE) {
719 		if ((multi_node && dbsize < MDDB_MN_MINBLKS) ||
720 		    (!multi_node && dbsize < MDDB_MINBLKS))
721 			usage(sp, gettext(
722 			    "size (-l) is too small"));
723 
724 		if ((multi_node && dbsize > MDDB_MN_MAXBLKS) ||
725 		    (!multi_node && dbsize > MDDB_MAXBLKS))
726 			usage(sp, gettext(
727 			    "size (-l) is too big"));
728 	}
729 
730 	/*
731 	 * Have a valid set, take the set lock also.
732 	 *
733 	 * A MN diskset does not use the set meta_lock but
734 	 * instead uses the clnt_lock of rpc.metad and the
735 	 * suspend/resume feature of the rpc.mdcommd.  Can't
736 	 * use set meta_lock since class 1 messages are
737 	 * grabbing this lock and if this thread is holding
738 	 * the set meta_lock then no rpc.mdcommd suspend
739 	 * can occur.
740 	 */
741 	if (!multi_node) {
742 		if (meta_lock(sp, TRUE, ep) != 0) {
743 			mde_perror(ep, "");
744 			md_exit(local_sp, 1);
745 		}
746 	}
747 
748 
749 	/*
750 	 * If using the default size,
751 	 *   then let's adjust the default to the minimum
752 	 *   size currently in use.
753 	 */
754 	if (default_size) {
755 		dbsize = multi_node ? MD_MN_DBSIZE : MD_DBSIZE;
756 		if ((nblks = meta_db_minreplica(sp, ep)) < 0)
757 			mdclrerror(ep);
758 		else
759 			dbsize = nblks;	/* adjust replica size */
760 	}
761 
762 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
763 		mde_perror(ep, "");
764 		if (!multi_node)
765 			(void) meta_unlock(sp, ep);
766 		md_exit(local_sp, 1);
767 	}
768 
769 	if (c == 0) {
770 		md_perror(gettext(
771 		    "No drives specified to add.\n"));
772 		if (!multi_node)
773 			(void) meta_unlock(sp, ep);
774 		md_exit(local_sp, 1);
775 	}
776 
777 	if (meta_set_adddrives(sp, dnlp, dbsize, force_label, ep)) {
778 		metafreedrivenamelist(dnlp);
779 		mde_perror(ep, "");
780 		if (!multi_node)
781 			(void) meta_unlock(sp, ep);
782 		md_exit(local_sp, 1);
783 	}
784 
785 	/*
786 	 * MN disksets don't have a device id in the master block
787 	 * For traditional disksets, check for the drive device
788 	 * id not fitting in the master block
789 	 */
790 	if (!multi_node) {
791 		for (p = dnlp; p != NULL; p = p->next) {
792 			int 		fd;
793 			ddi_devid_t	devid;
794 			mdname_t	*np;
795 
796 			np = metaslicename(p->drivenamep, 0, ep);
797 			if (np == NULL)
798 				continue;
799 
800 			if ((fd = open(np->rname, O_RDONLY | O_NDELAY)) < 0)
801 				continue;
802 
803 			if (devid_get(fd, &devid) == 0) {
804 				size_t len;
805 
806 				len = devid_sizeof(devid);
807 				if (len > (DEV_BSIZE - sizeof (mddb_mb_t)))
808 					(void) mddserror(ep,
809 					    MDE_DS_NOTSELFIDENTIFY, NULL, NULL,
810 					    np->rname, NULL);
811 				devid_free(devid);
812 			} else {
813 				(void) mddserror(ep, MDE_DS_NOTSELFIDENTIFY,
814 				    NULL, NULL, np->rname, NULL);
815 			}
816 			(void) close(fd);
817 		}
818 	}
819 
820 	/*
821 	 * MN disksets don't use DCS clustering services.
822 	 * For traditional disksets:
823 	 * There's not really much we can do here if this call fails.
824 	 * The drives have been added to the set and DiskSuite believes
825 	 * it owns the drives.
826 	 * Relase the set and hope for the best.
827 	 */
828 	if ((!multi_node) &&
829 	    (sdssc_notify_service(sname, Make_Primary) == SDSSC_ERROR)) {
830 		meta_set_release(sp, ep);
831 		printf(gettext(
832 		    "Sun Clustering failed to make set primary\n"));
833 	}
834 
835 	metafreedrivenamelist(dnlp);
836 	if (!multi_node)
837 		(void) meta_unlock(sp, ep);
838 	md_exit(local_sp, 0);
839 }
840 
841 static void
842 parse_balance(int argc, char **argv)
843 {
844 	int		c;
845 	mdsetname_t	*sp = NULL;
846 	char		*sname = MD_LOCAL_NAME;
847 	md_error_t	status = mdnullerror;
848 	md_set_desc	*sd;
849 	int		multi_node = 0;
850 
851 	/* reset and parse args */
852 	optind = 1;
853 	opterr = 1;
854 	while ((c = getopt(argc, argv, "Mbs:")) != -1) {
855 		switch (c) {
856 		case 'M':
857 			break;
858 		case 'b':
859 			break;
860 		case 's':
861 			sname = optarg;
862 			break;
863 		default:
864 			usage(sp, gettext("unknown options"));
865 		}
866 	}
867 
868 	argc -= optind;
869 	argv += optind;
870 
871 	if (argc != 0)
872 		usage(sp, gettext("too many args"));
873 
874 	if ((sp = metasetname(sname, &status)) == NULL) {
875 		mde_perror(&status, "");
876 		md_exit(sp, 1);
877 	}
878 	if ((sd = metaget_setdesc(sp, &status)) == NULL) {
879 		mde_perror(&status, "");
880 		md_exit(sp, 1);
881 	}
882 	if (MD_MNSET_DESC(sd)) {
883 		multi_node = 1;
884 	}
885 	/*
886 	 * Have a valid set, take the set lock also.
887 	 *
888 	 * A MN diskset does not use the set meta_lock but
889 	 * instead uses the clnt_lock of rpc.metad and the
890 	 * suspend/resume feature of the rpc.mdcommd.  Can't
891 	 * use set meta_lock since class 1 messages are
892 	 * grabbing this lock and if this thread is holding
893 	 * the set meta_lock then no rpc.mdcommd suspend
894 	 * can occur.
895 	 */
896 	if (!multi_node) {
897 		if (meta_lock(sp, TRUE, &status) != 0) {
898 			mde_perror(&status, "");
899 			md_exit(sp, 1);
900 		}
901 	}
902 
903 	if (meta_set_balance(sp, &status) != 0) {
904 		mde_perror(&status, "");
905 		md_exit(sp, 1);
906 	}
907 	md_exit(sp, 0);
908 }
909 
910 static void
911 parse_autotake(int argc, char **argv)
912 {
913 	int			c;
914 	int			enable = 0;
915 	mdsetname_t		*sp = NULL;
916 	char			*sname = MD_LOCAL_NAME;
917 	md_error_t		status = mdnullerror;
918 	md_error_t		*ep = &status;
919 
920 	/* reset and parse args */
921 	optind = 1;
922 	opterr = 1;
923 	while ((c = getopt(argc, argv, "A:s:")) != -1) {
924 		switch (c) {
925 		case 'A':
926 			/* verified sub-option in main */
927 			if (strcmp(optarg, "enable") == 0)
928 				enable = 1;
929 			break;
930 		case 's':
931 			/* verified presence of setname in main */
932 			sname = optarg;
933 			break;
934 		default:
935 			usage(sp, gettext("unknown options"));
936 		}
937 	}
938 
939 	if ((sp = metasetname(sname, ep)) == NULL) {
940 		mde_perror(ep, "");
941 		md_exit(sp, 1);
942 	}
943 
944 	if (meta_lock(sp, TRUE, ep) != 0) {
945 		mde_perror(ep, "");
946 		md_exit(sp, 1);
947 	}
948 
949 	if (meta_check_ownership(sp, ep) != 0) {
950 		mde_perror(ep, "");
951 		md_exit(sp, 1);
952 	}
953 
954 	if (meta_set_auto_take(sp, enable, ep) != 0) {
955 		mde_perror(ep, "");
956 		md_exit(sp, 1);
957 	}
958 
959 	md_exit(sp, 0);
960 }
961 
962 static void
963 parse_del(int argc, char **argv)
964 {
965 	int			c;
966 	mdsetname_t		*sp = NULL;
967 	char			*sname = MD_LOCAL_NAME;
968 	int			hosts = FALSE;
969 	int			meds = FALSE;
970 	int			forceflg = FALSE;
971 	md_error_t		status = mdnullerror;
972 	md_error_t		*ep = &status;
973 	mddrivenamelist_t	*dnlp = NULL;
974 	mdsetname_t		*local_sp = NULL;
975 	md_set_desc		*sd;
976 	int			multi_node = 0;
977 
978 	/* reset and parse args */
979 	optind = 1;
980 	opterr = 1;
981 	while ((c = getopt(argc, argv, "Mdfhms:")) != -1) {
982 		switch (c) {
983 		case 'M':
984 			break;
985 		case 'd':
986 			break;
987 		case 'f':
988 			forceflg = TRUE;
989 			break;
990 		case 'h':
991 		case 'm':
992 			if (meds == TRUE || hosts == TRUE)
993 				usage(sp, gettext(
994 				    "only one -m or -h option allowed"));
995 
996 			if (c == 'h')
997 				hosts = TRUE;
998 			else
999 				meds = TRUE;
1000 			break;
1001 		case 's':
1002 			sname = optarg;
1003 			break;
1004 		default:
1005 			usage(sp, gettext("unknown options"));
1006 		}
1007 	}
1008 
1009 	argc -= optind;
1010 	argv += optind;
1011 
1012 	if ((sp = metasetname(sname, ep)) == NULL) {
1013 		mde_perror(ep, "");
1014 		md_exit(local_sp, 1);
1015 	}
1016 
1017 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1018 		mde_perror(ep, "");
1019 		md_exit(local_sp, 1);
1020 	}
1021 
1022 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1023 		mde_perror(ep, "");
1024 		md_exit(local_sp, 1);
1025 	}
1026 	if (MD_MNSET_DESC(sd))
1027 		multi_node = 1;
1028 
1029 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1030 		mde_perror(ep, "");
1031 		md_exit(local_sp, 1);
1032 	}
1033 
1034 	/*
1035 	 * Have a valid set, take the set lock also.
1036 	 *
1037 	 * A MN diskset does not use the set meta_lock but
1038 	 * instead uses the clnt_lock of rpc.metad and the
1039 	 * suspend/resume feature of the rpc.mdcommd.  Can't
1040 	 * use set meta_lock since class 1 messages are
1041 	 * grabbing this lock and if this thread is holding
1042 	 * the set meta_lock then no rpc.mdcommd suspend
1043 	 * can occur.
1044 	 */
1045 	if (!multi_node) {
1046 		if (meta_lock(sp, TRUE, ep) != 0) {
1047 			mde_perror(ep, "");
1048 			md_exit(local_sp, 1);
1049 		}
1050 	}
1051 
1052 	/*
1053 	 * Delete hosts
1054 	 */
1055 	if (hosts == TRUE) {
1056 		if (meta_check_ownership(sp, ep) != 0) {
1057 			/*
1058 			 * If we don't own the set bail out here otherwise
1059 			 * we could delete the node from the DCS service
1060 			 * yet not delete the host from the set.
1061 			 */
1062 			mde_perror(ep, "");
1063 			if (!multi_node)
1064 				(void) meta_unlock(sp, ep);
1065 			md_exit(local_sp, 1);
1066 		}
1067 		if (sdssc_delete_hosts(sname, argc, argv) == SDSSC_ERROR) {
1068 		    if (!metad_isautotakebyname(sname)) {
1069 			/*
1070 			 * SC could have been installed after the set was
1071 			 * created.  We still want to be able to delete these
1072 			 * sets.
1073 			 */
1074 			md_perror(gettext(
1075 			    "Failed to delete hosts from DCS service"));
1076 			if (!multi_node)
1077 				(void) meta_unlock(sp, ep);
1078 			md_exit(local_sp, 1);
1079 		    }
1080 		}
1081 		if (meta_set_deletehosts(sp, argc, argv, forceflg, ep)) {
1082 			if (sdssc_add_hosts(sname, argc, argv) == SDSSC_ERROR) {
1083 				(void) printf(gettext(
1084 				    "Failed to restore host(s) in DCS "
1085 				    "database\n"));
1086 			}
1087 			mde_perror(ep, "");
1088 			if (!multi_node)
1089 				(void) meta_unlock(sp, ep);
1090 			md_exit(local_sp, 1);
1091 		}
1092 		if (!multi_node)
1093 			(void) meta_unlock(sp, ep);
1094 		md_exit(local_sp, 0);
1095 	}
1096 
1097 	/*
1098 	 * Delete mediators
1099 	 */
1100 	if (meds == TRUE) {
1101 		if (meta_set_deletemeds(sp, argc, argv, forceflg, ep)) {
1102 			mde_perror(ep, "");
1103 			if (!multi_node)
1104 				(void) meta_unlock(sp, ep);
1105 			md_exit(local_sp, 1);
1106 		}
1107 		if (!multi_node)
1108 			(void) meta_unlock(sp, ep);
1109 		md_exit(local_sp, 0);
1110 	}
1111 
1112 	/*
1113 	 * Delete drives
1114 	 */
1115 
1116 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
1117 		mde_perror(ep, "");
1118 		if (!multi_node)
1119 			(void) meta_unlock(sp, ep);
1120 		md_exit(local_sp, 1);
1121 	}
1122 
1123 	if (c == 0) {
1124 		md_perror(gettext(
1125 		    "No drives specified to delete.\n"));
1126 		if (!multi_node)
1127 			(void) meta_unlock(sp, ep);
1128 		md_exit(local_sp, 1);
1129 	}
1130 
1131 	if (meta_set_deletedrives(sp, dnlp, forceflg, ep)) {
1132 		metafreedrivenamelist(dnlp);
1133 		mde_perror(ep, "");
1134 		if (!multi_node)
1135 			(void) meta_unlock(sp, ep);
1136 		md_exit(local_sp, 1);
1137 	}
1138 
1139 	metafreedrivenamelist(dnlp);
1140 	if (!multi_node)
1141 		(void) meta_unlock(sp, ep);
1142 	md_exit(local_sp, 0);
1143 }
1144 
1145 static void
1146 parse_isowner(int argc, char **argv)
1147 {
1148 	int		c;
1149 	mdsetname_t	*sp = NULL;
1150 	char		*sname = MD_LOCAL_NAME;
1151 	md_error_t	status = mdnullerror;
1152 	md_error_t	*ep = &status;
1153 	char		*host = NULL;
1154 
1155 	/* reset and parse args */
1156 	optind = 1;
1157 	opterr = 1;
1158 	while ((c = getopt(argc, argv, "Moh:s:")) != -1) {
1159 		switch (c) {
1160 		case 'M':
1161 			break;
1162 		case 'o':
1163 			break;
1164 		case 'h':
1165 			if (host != NULL) {
1166 				usage(sp, gettext(
1167 				    "only one -h option allowed"));
1168 			}
1169 			host = optarg;
1170 			break;
1171 		case 's':
1172 			sname = optarg;
1173 			break;
1174 		default:
1175 			usage(sp, gettext("unknown options"));
1176 		}
1177 	}
1178 
1179 	argc -= optind;
1180 	argv += optind;
1181 
1182 	if (argc != 0)
1183 		usage(sp, gettext("too many args"));
1184 
1185 	if ((sp = metasetname(sname, ep)) == NULL) {
1186 		mde_perror(ep, "");
1187 		md_exit(sp, 1);
1188 	}
1189 
1190 	if (host == NULL) {
1191 		if (meta_check_ownership(sp, ep) != 0) {
1192 			mde_perror(ep, "");
1193 			md_exit(sp, 1);
1194 		}
1195 	} else {
1196 		if (meta_check_ownership_on_host(sp, host, ep) != 0) {
1197 			mde_perror(ep, "");
1198 			md_exit(sp, 1);
1199 		}
1200 	}
1201 	md_exit(sp, 0);
1202 }
1203 
1204 static void
1205 parse_purge(int argc, char **argv)
1206 {
1207 	int		c;
1208 	mdsetname_t	*sp = NULL;
1209 	mdsetname_t	*local_sp = NULL;
1210 	md_drive_desc	*dd;
1211 	char		*sname = MD_LOCAL_NAME;
1212 	char		*thishost = mynode();
1213 	md_error_t	status = mdnullerror;
1214 	md_error_t	*ep = &status;
1215 	int		bypass_cluster_purge = 0;
1216 	int		forceflg = FALSE;
1217 	int		ret = 0;
1218 	int		multi_node = 0;
1219 	md_set_desc		*sd;
1220 
1221 	optind = 1;
1222 	opterr = 1;
1223 	while ((c = getopt(argc, argv, "C:fPs:")) != -1) {
1224 		switch (c) {
1225 		case 'M':
1226 			break;
1227 		case 'C':
1228 			bypass_cluster_purge = 1;
1229 			break;
1230 		case 'f':
1231 			forceflg = TRUE;
1232 			break;
1233 		case 'P':
1234 			break;
1235 		case 's':
1236 			sname = optarg;
1237 			break;
1238 		default:
1239 			usage(sp, gettext("unknown options"));
1240 		}
1241 	}
1242 
1243 	argc -= optind;
1244 	argv += optind;
1245 
1246 	if (argc != 0)
1247 		usage(sp, gettext("too many arguments"));
1248 
1249 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1250 		mde_perror(ep, "");
1251 		md_exit(local_sp, 1);
1252 	}
1253 
1254 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1255 		mde_perror(ep, "");
1256 		md_exit(local_sp, 1);
1257 	}
1258 
1259 	if ((sp = metasetname(sname, ep)) == NULL) {
1260 		mde_perror(ep, "");
1261 		md_exit(sp, 1);
1262 	}
1263 
1264 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1265 		mde_perror(ep, "");
1266 		md_exit(local_sp, 1);
1267 	}
1268 	if (MD_MNSET_DESC(sd))
1269 		multi_node = 1;
1270 
1271 	if (!multi_node) {
1272 		if (meta_lock(sp, TRUE, ep) != 0) {
1273 			mde_perror(ep, "");
1274 			md_exit(local_sp, 1);
1275 		}
1276 	}
1277 
1278 	/* Must not own the set if purging it from this host */
1279 	if (meta_check_ownership(sp, ep) == 0) {
1280 		/*
1281 		 * Need to see if there are disks in the set, if not then
1282 		 * there is no ownership but meta_check_ownership returns 0
1283 		 */
1284 		dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
1285 		if (!mdisok(ep)) {
1286 			mde_perror(ep, "");
1287 			if (!multi_node)
1288 				(void) meta_unlock(sp, ep);
1289 			md_exit(local_sp, 1);
1290 		}
1291 		if (dd != NULL) {
1292 			(void) printf(gettext
1293 			    ("Must not be owner of the set when purging it\n"));
1294 			if (!multi_node)
1295 				(void) meta_unlock(sp, ep);
1296 			md_exit(local_sp, 1);
1297 		}
1298 	}
1299 	/*
1300 	 * Remove the node from the DCS service
1301 	 */
1302 	if (!bypass_cluster_purge) {
1303 		if (sdssc_delete_hosts(sname, 1, &thishost) == SDSSC_ERROR) {
1304 			md_perror(gettext
1305 			    ("Failed to purge hosts from DCS service"));
1306 			if (!multi_node)
1307 				(void) meta_unlock(sp, ep);
1308 			md_exit(local_sp, 1);
1309 		}
1310 	}
1311 
1312 	if ((ret = meta_set_purge(sp, bypass_cluster_purge, forceflg,
1313 	    ep)) != 0) {
1314 		if (!bypass_cluster_purge) {
1315 			if (sdssc_add_hosts(sname, 1, &thishost) ==
1316 			    SDSSC_ERROR) {
1317 				(void) printf(gettext(
1318 				    "Failed to restore host in DCS "
1319 				    "database\n"));
1320 			}
1321 		}
1322 		mde_perror(ep, "");
1323 		if (!multi_node)
1324 			(void) meta_unlock(sp, ep);
1325 		md_exit(local_sp, ret);
1326 	}
1327 
1328 	if (!multi_node)
1329 		(void) meta_unlock(sp, ep);
1330 	md_exit(local_sp, 0);
1331 }
1332 
1333 static void
1334 parse_query(int argc, char **argv)
1335 {
1336 	int		c;
1337 	mdsetname_t	*sp = NULL;
1338 	mddb_dtag_lst_t	*dtlp = NULL;
1339 	mddb_dtag_lst_t	*tdtlp;
1340 	char		*sname = MD_LOCAL_NAME;
1341 	md_error_t	status = mdnullerror;
1342 
1343 	/* reset and parse args */
1344 	optind = 1;
1345 	opterr = 1;
1346 	while ((c = getopt(argc, argv, "Mqs:")) != -1) {
1347 		switch (c) {
1348 		case 'M':
1349 			break;
1350 		case 'q':
1351 			break;
1352 		case 's':
1353 			sname = optarg;
1354 			break;
1355 		default:
1356 			usage(sp, gettext("unknown options"));
1357 		}
1358 	}
1359 
1360 	argc -= optind;
1361 	argv += optind;
1362 
1363 	if (argc != 0)
1364 		usage(sp, gettext("too many args"));
1365 
1366 	if ((sp = metasetname(sname, &status)) == NULL) {
1367 		mde_perror(&status, "");
1368 		md_exit(sp, 1);
1369 	}
1370 
1371 	if (meta_lock(sp, TRUE, &status) != 0) {
1372 		mde_perror(&status, "");
1373 		md_exit(sp, 1);
1374 	}
1375 
1376 	if (meta_set_query(sp, &dtlp, &status) != 0) {
1377 		mde_perror(&status, "");
1378 		md_exit(sp, 1);
1379 	}
1380 
1381 	if (dtlp != NULL)
1382 		(void) printf("The following tag(s) were found:\n");
1383 
1384 	for (tdtlp = dtlp; tdtlp != NULL; tdtlp = dtlp) {
1385 		dtlp = tdtlp->dtl_nx;
1386 		(void) printf("%2d - %s - %s", tdtlp->dtl_dt.dt_id,
1387 		    tdtlp->dtl_dt.dt_hn,
1388 		    ctime((long *)&tdtlp->dtl_dt.dt_tv.tv_sec));
1389 		Free(tdtlp);
1390 	}
1391 
1392 	md_exit(sp, 0);
1393 }
1394 
1395 /* Should never be called with sname of a Multinode diskset. */
1396 static void
1397 parse_releaseset(int argc, char **argv)
1398 {
1399 	int		c;
1400 	mdsetname_t	*sp = NULL;
1401 	md_error_t	status = mdnullerror;
1402 	md_error_t	*ep = &status;
1403 	char		*sname = MD_LOCAL_NAME;
1404 	sdssc_boolean_e	cluster_release = SDSSC_False;
1405 	sdssc_version_t	vers;
1406 	rval_e		rval;
1407 	md_set_desc	*sd;
1408 
1409 	/* reset and parse args */
1410 	optind = 1;
1411 	opterr = 1;
1412 	while ((c = getopt(argc, argv, "C:s:r")) != -1) {
1413 		switch (c) {
1414 		case 'C':
1415 			cluster_release = SDSSC_True;
1416 			break;
1417 		case 's':
1418 			sname = optarg;
1419 			break;
1420 		case 'r':
1421 			break;
1422 		default:
1423 			usage(sp, gettext("unknown options"));
1424 		}
1425 	}
1426 
1427 	argc -= optind;
1428 	argv += optind;
1429 
1430 	if (argc > 0)
1431 		usage(sp, gettext("too many args"));
1432 
1433 	memset(&vers, 0, sizeof (vers));
1434 
1435 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1436 	    (vers.major == 3) &&
1437 	    (cluster_release == SDSSC_False)) {
1438 
1439 		/*
1440 		 * If the release is being done by the user via the CLI
1441 		 * we need to notify the DCS to release this node as being
1442 		 * the primary. The reason nothing else needs to be done
1443 		 * is due to the fact that the reservation code will exec
1444 		 * metaset -C release to complete the operation.
1445 		 */
1446 		rval = sdssc_notify_service(sname, Release_Primary);
1447 		if (rval == SDSSC_ERROR) {
1448 			printf(gettext(
1449 			    "metaset: failed to notify DCS of release\n"));
1450 		}
1451 		md_exit(NULL, rval == SDSSC_ERROR);
1452 	}
1453 
1454 	if ((sp = metasetname(sname, ep)) == NULL) {
1455 
1456 		/*
1457 		 * It's entirely possible for the SC3.0 reservation code
1458 		 * to call for DiskSet to release a diskset and have that
1459 		 * diskset not exist. During a diskset removal DiskSuite
1460 		 * maybe able to remove all traces of the diskset before
1461 		 * the reservation code execs metaset -C release in which
1462 		 * case the metasetname will fail, but the overall command
1463 		 * shouldn't.
1464 		 */
1465 		if (vers.major == 3)
1466 			md_exit(sp, 0);
1467 		else {
1468 			mde_perror(ep, "");
1469 			md_exit(sp, 1);
1470 		}
1471 	}
1472 
1473 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1474 		mde_perror(ep, "");
1475 		md_exit(sp, 1);
1476 	}
1477 
1478 	if (sd->sd_flags & MD_SR_AUTO_TAKE) {
1479 		md_eprintf(gettext("cannot release auto-take diskset\n"));
1480 		md_exit(sp, 1);
1481 	}
1482 
1483 	if (meta_lock_nowait(sp, ep) != 0) {
1484 		mde_perror(ep, "");
1485 		md_exit(sp, 10);	/* special errcode */
1486 	}
1487 
1488 	if (meta_set_release(sp, ep)) {
1489 		mde_perror(ep, "");
1490 		md_exit(sp, 1);
1491 	}
1492 	md_exit(sp, 0);
1493 }
1494 
1495 /* Should never be called with sname of a Multinode diskset. */
1496 static void
1497 parse_takeset(int argc, char **argv)
1498 {
1499 	int		c;
1500 	mdsetname_t	*sp = NULL;
1501 	int		flags = 0;
1502 	char		*sname = MD_LOCAL_NAME;
1503 	mhd_mhiargs_t	mhiargs;
1504 	char 		*cp = NULL;
1505 	int		pos = -1;	/* position of timeout value */
1506 	int		usetag = 0;
1507 	static char	*nullopts[] = { NULL };
1508 	md_error_t	status = mdnullerror;
1509 	md_error_t	*ep = &status;
1510 	sdssc_boolean_e	cluster_take = SDSSC_False;
1511 	sdssc_version_t	vers;
1512 	rval_e		rval;
1513 
1514 	/* reset and parse args */
1515 	optind = 1;
1516 	opterr = 1;
1517 	while ((c = getopt(argc, argv, "C:fs:tu:y")) != -1) {
1518 		switch (c) {
1519 		case 'C':
1520 			cluster_take = SDSSC_True;
1521 			break;
1522 		case 'f':
1523 			flags |= TAKE_FORCE;
1524 			break;
1525 		case 's':
1526 			sname = optarg;
1527 			break;
1528 		case 't':
1529 			break;
1530 		case 'u':
1531 			usetag = atoi(optarg);
1532 			flags |= TAKE_USETAG;
1533 			break;
1534 		case 'y':
1535 			flags |= TAKE_USEIT;
1536 			break;
1537 		default:
1538 			usage(sp, gettext("unknown options"));
1539 		}
1540 	}
1541 
1542 	mhiargs = defmhiargs;
1543 
1544 	argc -= optind;
1545 	argv += optind;
1546 
1547 	if (argc > 1)
1548 		usage(sp, gettext("too many args"));
1549 
1550 	/*
1551 	 * If we have a list of timeout value overrides, handle it here
1552 	 */
1553 	while (argv[0] != NULL && *argv[0] != '\0') {
1554 		/*
1555 		 * The use of the nullopts[] "token list" here is to make
1556 		 * getsubopts() simply parse a comma separated list
1557 		 * returning either "" or the contents of the field, the
1558 		 * end condition is exaustion of the initial string, which
1559 		 * is modified in the process.
1560 		 */
1561 		(void) getsubopt(&argv[0], nullopts, &cp);
1562 
1563 		c = 0;			/* re-use c as temp value of timeout */
1564 
1565 		if (*cp != '-')		/* '-' uses default */
1566 			c = atoi(cp);
1567 
1568 		if (c < 0) {
1569 			usage(sp, gettext(
1570 			    "time out values must be > 0"));
1571 		}
1572 
1573 		if (++pos > 3) {
1574 			usage(sp, gettext(
1575 			    "too many timeout values specified."));
1576 		}
1577 
1578 		if (c == 0)		/* 0 or "" field uses default */
1579 			continue;
1580 
1581 		/*
1582 		 * Assign temp value to appropriate structure member based on
1583 		 * its position in the comma separated list.
1584 		 */
1585 		switch (pos) {
1586 		    case 0:
1587 			mhiargs.mh_ff = c;
1588 			break;
1589 
1590 		    case 1:
1591 			mhiargs.mh_tk.reinstate_resv_delay = c;
1592 			break;
1593 
1594 		    case 2:
1595 			mhiargs.mh_tk.min_ownership_delay = c;
1596 			break;
1597 
1598 		    case 3:
1599 			mhiargs.mh_tk.max_ownership_delay = c;
1600 			break;
1601 		}
1602 	}
1603 
1604 	memset(&vers, 0, sizeof (vers));
1605 
1606 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1607 	    (vers.major == 3) &&
1608 	    (cluster_take == SDSSC_False)) {
1609 
1610 		/*
1611 		 * If the take is beging done by the user via the CLI we need
1612 		 * to notify the DCS to make this current node the primary.
1613 		 * The SC3.0 reservation code will in turn exec metaset with
1614 		 * the -C take arg to complete this operation.
1615 		 */
1616 		if ((rval = sdssc_notify_service(sname, Make_Primary)) ==
1617 		    SDSSC_ERROR) {
1618 			printf(gettext(
1619 			    "metaset: failed to notify DCS of take\n"));
1620 		}
1621 		md_exit(NULL, rval == SDSSC_ERROR);
1622 	}
1623 
1624 	if ((sp = metasetname(sname, ep)) == NULL) {
1625 		mde_perror(ep, "");
1626 		md_exit(sp, 1);
1627 	}
1628 
1629 	if ((vers.major == 3) && (meta_check_ownership(sp, ep) == 0)) {
1630 
1631 		/*
1632 		 * If we're running in a cluster environment and this
1633 		 * node already owns the set. Don't bother trying to
1634 		 * take the set again. There's one case where an adminstrator
1635 		 * is adding disks to a set for the first time. metaset
1636 		 * will take the ownership of the set at that point. During
1637 		 * that add operation SC3.0 notices activity on the device
1638 		 * and also tries to perform a take operation. The SC3.0 take
1639 		 * will fail because the adminstrative add has the set locked
1640 		 */
1641 		md_exit(sp, 0);
1642 	}
1643 
1644 	if (meta_lock_nowait(sp, ep) != 0) {
1645 		mde_perror(ep, "");
1646 		md_exit(sp, 10);	/* special errcode */
1647 	}
1648 
1649 	if (meta_set_take(sp, &mhiargs, flags, usetag, &status)) {
1650 		mde_perror(&status, "");
1651 		if (mdismddberror(&status, MDE_DB_TAGDATA))
1652 			md_exit(sp, 2);
1653 		if (mdismddberror(&status, MDE_DB_ACCOK))
1654 			md_exit(sp, 3);
1655 		if (mdismddberror(&status, MDE_DB_STALE))
1656 			md_exit(sp, 66);
1657 		md_exit(sp, 1);
1658 	}
1659 	md_exit(sp, 0);
1660 }
1661 
1662 /*
1663  * Joins a node to a specific set or to all multinode disksets known
1664  * by this node.  If set is specified then caller should have verified
1665  * that the set is a multinode diskset.
1666  *
1667  * If an error occurs, metaset exits with a 1.
1668  * If there is no error, metaset exits with a 0.
1669  */
1670 static void
1671 parse_joinset(int argc, char **argv)
1672 {
1673 	int		c;
1674 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1675 	char		*sname = MD_LOCAL_NAME;
1676 	md_error_t	status = mdnullerror;
1677 	md_error_t	*ep = &status;
1678 	md_set_desc	*sd;
1679 	char		buf[BUFSIZ];
1680 	char		*p = buf;
1681 	set_t		max_sets, setno;
1682 	int		err, cumm_err = 0;
1683 	size_t		bufsz;
1684 
1685 	bufsz = sizeof (buf);
1686 	/* reset and parse args */
1687 	optind = 1;
1688 	opterr = 1;
1689 	while ((c = getopt(argc, argv, "Ms:j")) != -1) {
1690 		switch (c) {
1691 		case 'M':
1692 			break;
1693 		case 'j':
1694 			break;
1695 		case 's':
1696 			sname = optarg;
1697 			break;
1698 		default:
1699 			usage(sp, gettext("unknown options"));
1700 		}
1701 	}
1702 
1703 	argc -= optind;
1704 	argv += optind;
1705 
1706 	if (argc > 1)
1707 		usage(sp, gettext("too many args"));
1708 
1709 	/*
1710 	 * If no setname option was used, then join all disksets
1711 	 * that this node knows about.   Attempt to join all
1712 	 * disksets that this node knows about.
1713 	 *
1714 	 * Additional text is added to the error messages during
1715 	 * this section of code in order to help the user understand
1716 	 * why the 'join of all sets' failed and which set caused
1717 	 * the failure.
1718 	 */
1719 
1720 	/*
1721 	 * Hold local set lock throughout this call to keep
1722 	 * other actions from interfering (such as creating a new
1723 	 * set, etc.).
1724 	 */
1725 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1726 		mde_perror(ep, "");
1727 		md_exit(sp, 1);
1728 	}
1729 
1730 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1731 		mde_perror(ep, "");
1732 		md_exit(local_sp, 1);
1733 	}
1734 
1735 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1736 		/*
1737 		 * If no set name is given, then walk through all sets
1738 		 * on this node which could include:
1739 		 * 	- MN disksets
1740 		 *	- traditional disksets
1741 		 *	- non-existent disksets
1742 		 * Attempt to join the MN disksets.
1743 		 * If the join of one set fails, print out an error message
1744 		 * about that set and continue the walk.
1745 		 */
1746 		if ((max_sets = get_max_sets(ep)) == 0) {
1747 			mde_perror(ep, "");
1748 			md_exit(local_sp, 1);
1749 		}
1750 
1751 		/* Start walking through all possible disksets */
1752 		for (setno = 1; setno < max_sets; setno++) {
1753 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1754 				if (mdiserror(ep, MDE_NO_SET)) {
1755 					/* No set for this setno - continue */
1756 					mdclrerror(ep);
1757 					continue;
1758 				} else {
1759 					(void) sprintf(p, gettext(
1760 					"Unable to get set %d information"),
1761 					    setno);
1762 					mde_perror(ep, p);
1763 					cumm_err = 1;
1764 					mdclrerror(ep);
1765 					continue;
1766 				}
1767 			}
1768 
1769 			/* If setname is there, set desc should exist. */
1770 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1771 				(void) snprintf(p, bufsz, gettext(
1772 				    "Unable to get set %s desc information"),
1773 				    sp->setname);
1774 				mde_perror(ep, p);
1775 				cumm_err = 1;
1776 				mdclrerror(ep);
1777 				continue;
1778 			}
1779 
1780 			/* Only check MN disksets */
1781 			if (!MD_MNSET_DESC(sd)) {
1782 				continue;
1783 			}
1784 
1785 			/*
1786 			 * Return value of 0 is success.
1787 			 * Return value of -1 means a failure.
1788 			 * Return value of -2 means set could not be
1789 			 * joined, but shouldn't cause an error.
1790 			 * Reasons would be:
1791 			 * 	- no drives in set
1792 			 * 	- node already joined to set
1793 			 * Return value of -3 means joined stale set.
1794 			 * Can't check for all reasons here
1795 			 * since set isn't locked yet across all
1796 			 * nodes in the cluster.  The call
1797 			 * to libmeta routine, meta_set_join, will
1798 			 * lock across the cluster and perform
1799 			 * the checks.
1800 			 */
1801 			if ((err = meta_set_join(sp, ep)) == -1) {
1802 				/* Print error of diskset join failure */
1803 				(void) snprintf(p, bufsz,
1804 				    gettext("Join to diskset %s failed"),
1805 				    sp->setname);
1806 				mde_perror(ep, p);
1807 				cumm_err = 1;
1808 				mdclrerror(ep);
1809 				continue;
1810 			}
1811 
1812 			if (err == -3) {
1813 				/* Print error of diskset join failure */
1814 				(void) snprintf(p, bufsz,
1815 				    gettext("Joined to stale diskset %s"),
1816 				    sp->setname);
1817 				mde_perror(ep, p);
1818 				mdclrerror(ep);
1819 			}
1820 
1821 			mdclrerror(ep);
1822 		}
1823 
1824 		md_exit(local_sp, cumm_err);
1825 	}
1826 
1827 	/*
1828 	 * Code for a specific set is much simpler.
1829 	 * Error messages don't need extra text since specific setname
1830 	 * was used.
1831 	 * Don't need to lock the local set, just the specific set given.
1832 	 */
1833 	if ((sp = metasetname(sname, ep)) == NULL) {
1834 		mde_perror(ep, "");
1835 		md_exit(local_sp, 1);
1836 	}
1837 
1838 	/*
1839 	 * Fail command if meta_set_join returns -1.
1840 	 *
1841 	 * Return of 0 means that node joined set.
1842 	 *
1843 	 * Return of -2 means that node was unable to
1844 	 * join a set since that set had no drives
1845 	 * or that had already joined the set.  No
1846 	 * need to fail the command for these reasons.
1847 	 *
1848 	 * Return of -3 means that set is stale.
1849 	 * Return a value of 66 to historically match traditional disksets.
1850 	 */
1851 	if ((err = meta_set_join(sp, ep)) == -1) {
1852 		mde_perror(&status, "");
1853 		md_exit(local_sp, 1);
1854 	}
1855 
1856 	if (err == -3) {
1857 		/* Print error of diskset join failure */
1858 		(void) snprintf(p, bufsz,
1859 		    gettext("Joined to stale diskset %s"),
1860 		    sp->setname);
1861 		mde_perror(&status, "");
1862 		md_exit(local_sp, 66);
1863 	}
1864 
1865 	md_exit(local_sp, 0);
1866 }
1867 
1868 /*
1869  * Withdraws a node from a specific set or from all multinode disksets known
1870  * by this node.  If set is specified then caller should have verified
1871  * that the set is a multinode diskset.
1872  *
1873  * If an error occurs, metaset exits with a 1.
1874  * If there is no error, metaset exits with a 0.
1875  */
1876 static void
1877 parse_withdrawset(int argc, char **argv)
1878 {
1879 	int		c;
1880 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1881 	char		*sname = MD_LOCAL_NAME;
1882 	md_error_t	status = mdnullerror;
1883 	md_error_t	*ep = &status;
1884 	char		buf[BUFSIZ];
1885 	char		*p = buf;
1886 	md_set_desc	*sd;
1887 	set_t		max_sets, setno;
1888 	int		err, cumm_err = 0;
1889 	size_t		bufsz;
1890 
1891 	bufsz = sizeof (buf);
1892 	/* reset and parse args */
1893 	optind = 1;
1894 	opterr = 1;
1895 	while ((c = getopt(argc, argv, "Ms:w")) != -1) {
1896 		switch (c) {
1897 		case 'M':
1898 			break;
1899 		case 'w':
1900 			break;
1901 		case 's':
1902 			sname = optarg;
1903 			break;
1904 		default:
1905 			usage(sp, gettext("unknown options"));
1906 		}
1907 	}
1908 
1909 	argc -= optind;
1910 	argv += optind;
1911 
1912 	if (argc > 1)
1913 		usage(sp, gettext("too many args"));
1914 
1915 	/*
1916 	 * If no setname option was used, then withdraw from all disksets
1917 	 * that this node knows about.
1918 	 *
1919 	 * Additional text is added to the error messages during
1920 	 * this section of code in order to help the user understand
1921 	 * why the 'withdraw from all sets' failed and which set caused
1922 	 * the failure.
1923 	 */
1924 
1925 	/*
1926 	 * Hold local set lock throughout this call to keep
1927 	 * other actions from interfering (such as creating a new
1928 	 * set, etc.).
1929 	 */
1930 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1931 		mde_perror(ep, "");
1932 		md_exit(sp, 1);
1933 	}
1934 
1935 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1936 		mde_perror(ep, "");
1937 		md_exit(local_sp, 1);
1938 	}
1939 
1940 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1941 		/*
1942 		 * If no set name is given, then walk through all sets
1943 		 * on this node which could include:
1944 		 * 	- MN disksets
1945 		 *	- traditional disksets
1946 		 *	- non-existent disksets
1947 		 * Attempt to withdraw from the MN disksets.
1948 		 * If the withdraw of one set fails, print out an error
1949 		 * message about that set and continue the walk.
1950 		 */
1951 		if ((max_sets = get_max_sets(ep)) == 0) {
1952 			mde_perror(ep, "");
1953 			md_exit(local_sp, 1);
1954 		}
1955 
1956 		/* Start walking through all possible disksets */
1957 		for (setno = 1; setno < max_sets; setno++) {
1958 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1959 				if (mdiserror(ep, MDE_NO_SET)) {
1960 					/* No set for this setno - continue */
1961 					mdclrerror(ep);
1962 					continue;
1963 				} else {
1964 					(void) sprintf(p, gettext(
1965 					    "Unable to get set %d information"),
1966 					    setno);
1967 					mde_perror(ep, p);
1968 					cumm_err = 1;
1969 					mdclrerror(ep);
1970 					continue;
1971 				}
1972 			}
1973 
1974 			/* If setname is there, set desc should exist. */
1975 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1976 				(void) snprintf(p, bufsz, gettext(
1977 				    "Unable to get set %s desc information"),
1978 				    sp->setname);
1979 				mde_perror(ep, p);
1980 				cumm_err = 1;
1981 				mdclrerror(ep);
1982 				continue;
1983 			}
1984 
1985 			/* Only check MN disksets */
1986 			if (!MD_MNSET_DESC(sd)) {
1987 				continue;
1988 			}
1989 
1990 			/*
1991 			 * Return value of 0 is success.
1992 			 * Return value of -1 means a failure.
1993 			 * Return value of -2 means set could not be
1994 			 * withdrawn from, but this shouldn't cause
1995 			 * an error.  Reasons would be:
1996 			 * 	- no drives in set
1997 			 * 	- node already withdrawn from set
1998 			 * Can't check for all reasons here
1999 			 * since set isn't locked yet across all
2000 			 * nodes in the cluster.  The call
2001 			 * to libmeta routine, meta_set_withdraw, will
2002 			 * lock across the cluster and perform
2003 			 * the checks.
2004 			 */
2005 			if ((err = meta_set_withdraw(sp, ep)) == -1) {
2006 				/* Print error of diskset withdraw failure */
2007 				(void) snprintf(p, bufsz,
2008 				    gettext("Withdraw from diskset %s failed"),
2009 				    sp->setname);
2010 				mde_perror(ep, p);
2011 				mdclrerror(ep);
2012 				cumm_err = 1;
2013 				continue;
2014 			}
2015 
2016 			if (err == -2) {
2017 				mdclrerror(ep);
2018 				continue;
2019 			}
2020 
2021 			mdclrerror(ep);
2022 		}
2023 		md_exit(local_sp, cumm_err);
2024 	}
2025 
2026 
2027 	/*
2028 	 * Code for a specific set is much simpler.
2029 	 * Error messages don't need extra text since specific setname
2030 	 * was used.
2031 	 * Don't need to lock the local set, just the specific set given.
2032 	 */
2033 	if ((sp = metasetname(sname, ep)) == NULL) {
2034 		mde_perror(ep, "");
2035 		md_exit(local_sp, 1);
2036 	}
2037 
2038 	/*
2039 	 * Fail command if meta_set_withdraw returns -1.
2040 	 *
2041 	 * Return of 0 means that node withdrew from set.
2042 	 *
2043 	 * Return of -2 means that node was unable to
2044 	 * withdraw from a set since that set had no drives
2045 	 * or node was not joined to set.  No
2046 	 * need to fail the command for these reasons.
2047 	 */
2048 	if (meta_set_withdraw(sp, ep) == -1) {
2049 		mde_perror(&status, "");
2050 		md_exit(local_sp, 1);
2051 	}
2052 
2053 	md_exit(local_sp, 0);
2054 }
2055 
2056 /*
2057  * Should never be called with sname of a Multinode diskset.
2058  */
2059 static void
2060 parse_cluster(int argc, char **argv)
2061 {
2062 	int			c,
2063 				error,
2064 				new_argc,
2065 				x;
2066 	enum cluster_cmd	cmd = ccnotspecified;
2067 	char			*hostname = SDSSC_PROXY_PRIMARY,
2068 				*argument = NULL,
2069 				*sname = MD_LOCAL_NAME,
2070 				primary_node[SDSSC_NODE_NAME_LEN],
2071 				**new_argv = NULL,
2072 				**np = NULL;
2073 	mdsetname_t		*sp = NULL;
2074 	md_error_t		status = mdnullerror;
2075 	md_error_t		*ep = &status;
2076 
2077 	/* reset and parse args */
2078 	optind = 1;
2079 	opterr = 1;
2080 	while ((c = getopt(argc, argv, "C:s:h:ftu:yr")) != -1) {
2081 		switch (c) {
2082 		case 'C':
2083 			if (cmd != ccnotspecified) {
2084 				md_exit(sp, -1);
2085 			}
2086 			argument = optarg;
2087 
2088 			if (strcmp(argument, "disksin") == 0) {
2089 				cmd = clusterdisksin;
2090 			} else if (strcmp(argument, "version") == 0) {
2091 				cmd = clusterversion;
2092 			} else if (strcmp(argument, "release") == 0) {
2093 				cmd = clusterrelease;
2094 			} else if (strcmp(argument, "take") == 0) {
2095 				cmd = clustertake;
2096 			} else if (strcmp(argument, "proxy") == 0) {
2097 				cmd = clusterproxy;
2098 			} else if (strcmp(argument, "purge") == 0) {
2099 				cmd = clusterpurge;
2100 			} else {
2101 				md_exit(sp, -1);
2102 			}
2103 
2104 			break;
2105 
2106 		case 'h':
2107 			hostname = optarg;
2108 			break;
2109 
2110 		case 's':
2111 			sname = optarg;
2112 			break;
2113 
2114 		case 'f':
2115 		case 't':
2116 		case 'u':
2117 		case 'y':
2118 		case 'r':
2119 			break;
2120 
2121 		default:
2122 			md_exit(sp, -1);
2123 		}
2124 	}
2125 
2126 	/* Now call the appropriate command function. */
2127 	switch (cmd) {
2128 	case clusterversion:
2129 	    printclusterversion();
2130 	    break;
2131 
2132 	case clusterdisksin:
2133 	    if (printdisksin(sname, ep)) {
2134 		md_exit(sp, -1);
2135 	    }
2136 	    break;
2137 
2138 	case clusterrelease:
2139 	    parse_releaseset(argc, argv);
2140 	    break;
2141 
2142 	case clustertake:
2143 	    parse_takeset(argc, argv);
2144 	    break;
2145 
2146 	case clusterproxy:
2147 		/* Should never get here if sname is for MN diskset */
2148 
2149 		if ((new_argv = calloc(argc, sizeof (char *))) == NULL) {
2150 			printf(gettext("Out of memory\n"));
2151 			md_exit(sp, 1);
2152 		}
2153 
2154 		np = new_argv;
2155 		new_argc = 0;
2156 		memset(primary_node, '\0', SDSSC_NODE_NAME_LEN);
2157 
2158 		for (x = 0; x < argc; x++) {
2159 			if (strcmp(argv[x], "-C") == 0) {
2160 
2161 				/*
2162 				 * Need to skip the '-C proxy' args so
2163 				 * just increase x by one and the work is
2164 				 * done.
2165 				 */
2166 				x++;
2167 			} else {
2168 				*np++ = strdup(argv[x]);
2169 				new_argc++;
2170 			}
2171 		}
2172 
2173 		switch (sdssc_get_primary_host(sname, primary_node,
2174 		    SDSSC_NODE_NAME_LEN)) {
2175 		case SDSSC_ERROR:
2176 			md_exit(sp, 1);
2177 			break;
2178 
2179 		case SDSSC_NO_SERVICE:
2180 			if (hostname != SDSSC_PROXY_PRIMARY) {
2181 				(void) strlcpy(primary_node, hostname,
2182 				    SDSSC_NODE_NAME_LEN);
2183 			}
2184 			break;
2185 		}
2186 
2187 		if (sdssc_cmd_proxy(new_argc, new_argv,
2188 		    primary_node[0] == '\0' ? SDSSC_PROXY_PRIMARY :
2189 		    primary_node, &error) == SDSSC_PROXY_DONE) {
2190 			md_exit(sp, error);
2191 		} else {
2192 			printf(gettext(
2193 			    "Couldn't proxy command\n"));
2194 			md_exit(sp, 1);
2195 		}
2196 		break;
2197 
2198 	case clusterpurge:
2199 		parse_purge(argc, argv);
2200 		break;
2201 
2202 	default:
2203 	    break;
2204 	}
2205 
2206 	md_exit(sp, 0);
2207 }
2208 
2209 /*
2210  * parse args and do it
2211  */
2212 int
2213 main(int argc, char *argv[])
2214 {
2215 	enum metaset_cmd	cmd = notspecified;
2216 	md_error_t		status = mdnullerror;
2217 	md_error_t		*ep = &status;
2218 	mdsetname_t		*sp = NULL;
2219 	char			*hostname = SDSSC_PROXY_PRIMARY,
2220 				*sname = MD_LOCAL_NAME,
2221 				*auto_take_option = NULL,
2222 				primary_node[SDSSC_NODE_NAME_LEN];
2223 	int			error,
2224 				c,
2225 				auto_take = FALSE,
2226 				stat;
2227 	md_set_desc		*sd;
2228 	int			mflag = 0;
2229 	int			multi_node = 0;
2230 	rval_e			sdssc_res;
2231 
2232 	/*
2233 	 * Get the locale set up before calling any other routines
2234 	 * with messages to ouput.  Just in case we're not in a build
2235 	 * environment, make sure that TEXT_DOMAIN gets set to
2236 	 * something.
2237 	 */
2238 #if !defined(TEXT_DOMAIN)
2239 #define	TEXT_DOMAIN "SYS_TEST"
2240 #endif
2241 	(void) setlocale(LC_ALL, "");
2242 	(void) textdomain(TEXT_DOMAIN);
2243 
2244 	sdssc_res = sdssc_bind_library();
2245 	if (sdssc_res == SDSSC_ERROR) {
2246 		printf(gettext(
2247 		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
2248 		exit(1);
2249 	}
2250 
2251 	/* initialize */
2252 	if (md_init(argc, argv, 0, 1, ep) != 0) {
2253 		mde_perror(ep, "");
2254 		md_exit(sp, 1);
2255 	}
2256 
2257 	optind = 1;
2258 	opterr = 1;
2259 
2260 	/*
2261 	 * NOTE: The "C" option is strictly for cluster use. it is not
2262 	 * and should not be documented for the customer. - JST
2263 	 */
2264 	while ((c = getopt(argc, argv, "C:MaA:bdfh:jl:Lm:oPqrs:tu:wy?"))
2265 	    != -1) {
2266 		switch (c) {
2267 		case 'M':
2268 			mflag = 1;
2269 			break;
2270 		case 'A':
2271 			auto_take = TRUE;
2272 			if (optarg == NULL || !(strcmp(optarg, "enable") == 0 ||
2273 			    strcmp(optarg, "disable") == 0))
2274 				usage(sp, gettext(
2275 				    "-A: enable or disable must be specified"));
2276 			auto_take_option = optarg;
2277 			break;
2278 		case 'a':
2279 			if (cmd != notspecified) {
2280 				usage(sp, gettext(
2281 				    "conflicting options"));
2282 			}
2283 			cmd = add;
2284 			break;
2285 		case 'b':
2286 			if (cmd != notspecified) {
2287 				usage(sp, gettext(
2288 				    "conflicting options"));
2289 			}
2290 			cmd = balance;
2291 			break;
2292 		case 'd':
2293 			if (cmd != notspecified) {
2294 				usage(sp, gettext(
2295 				    "conflicting options"));
2296 			}
2297 			cmd = delete;
2298 			break;
2299 		case 'C':	/* cluster commands */
2300 			if (cmd != notspecified) {
2301 				md_exit(sp, -1);    /* conflicting options */
2302 			}
2303 			cmd = cluster;
2304 			break;
2305 		case 'f':
2306 			break;
2307 		case 'h':
2308 			hostname = optarg;
2309 			break;
2310 		case 'j':
2311 			if (cmd != notspecified) {
2312 				usage(sp, gettext(
2313 				    "conflicting options"));
2314 			}
2315 			cmd = join;
2316 			break;
2317 		case 'l':
2318 			break;
2319 		case 'L':
2320 			break;
2321 		case 'm':
2322 			break;
2323 		case 'o':
2324 			if (cmd != notspecified) {
2325 				usage(sp, gettext(
2326 				    "conflicting options"));
2327 			}
2328 			cmd = isowner;
2329 			break;
2330 		case 'P':
2331 			if (cmd != notspecified) {
2332 				usage(sp, gettext(
2333 				    "conflicting options"));
2334 			}
2335 			cmd = purge;
2336 			break;
2337 		case 'q':
2338 			if (cmd != notspecified) {
2339 				usage(sp, gettext(
2340 				    "conflicting options"));
2341 			}
2342 			cmd = query;
2343 			break;
2344 		case 'r':
2345 			if (cmd != notspecified) {
2346 				usage(sp, gettext(
2347 				    "conflicting options"));
2348 			}
2349 			cmd = release;
2350 			break;
2351 		case 's':
2352 			sname = optarg;
2353 			break;
2354 		case 't':
2355 			if (cmd != notspecified) {
2356 				usage(sp, gettext(
2357 				    "conflicting options"));
2358 			}
2359 			cmd = take;
2360 			break;
2361 		case 'u':
2362 			break;
2363 		case 'w':
2364 			if (cmd != notspecified) {
2365 				usage(sp, gettext(
2366 				    "conflicting options"));
2367 			}
2368 			cmd = withdraw;
2369 			break;
2370 		case 'y':
2371 			break;
2372 		case '?':
2373 			if (optopt == '?')
2374 				usage(sp, NULL);
2375 			/*FALLTHROUGH*/
2376 		default:
2377 			if (cmd == cluster) {    /* cluster is silent */
2378 				md_exit(sp, -1);
2379 			} else {
2380 				usage(sp, gettext(
2381 				    "unknown command"));
2382 			}
2383 		}
2384 	}
2385 
2386 	/* check if suncluster is installed and -A enable specified */
2387 	if (auto_take && sdssc_res != SDSSC_NOT_BOUND &&
2388 	    strcmp(auto_take_option, "enable") == 0) {
2389 	    md_eprintf(gettext(
2390 		"cannot enable auto-take when SunCluster is installed\n"));
2391 	    md_exit(sp, 1);
2392 	}
2393 
2394 	/*
2395 	 * At this point we know that if the -A enable option is specified
2396 	 * for an auto-take diskset that SC is not installed on the machine, so
2397 	 * all of the sdssc calls will just be no-ops.
2398 	 */
2399 
2400 	/* list sets */
2401 	if (cmd == notspecified && auto_take == FALSE) {
2402 		parse_printset(argc, argv);
2403 		/*NOTREACHED*/
2404 	}
2405 
2406 	if (meta_check_root(ep) != 0) {
2407 		mde_perror(ep, "");
2408 		md_exit(sp, 1);
2409 	}
2410 
2411 	/* snarf MDDB */
2412 	if (meta_setup_db_locations(ep) != 0) {
2413 		mde_perror(ep, "");
2414 		md_exit(sp, 1);
2415 	}
2416 
2417 	/*
2418 	 * If sname is a diskset - check for multi_node.
2419 	 * It is possible for sname to not exist.
2420 	 */
2421 	if (strcmp(sname, MD_LOCAL_NAME)) {
2422 		if ((sp = metasetname(sname, ep)) != NULL) {
2423 			/* Set exists - check for MN diskset */
2424 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2425 				mde_perror(ep, "");
2426 				md_exit(sp, 1);
2427 			}
2428 			if (MD_MNSET_DESC(sd)) {
2429 				/*
2430 				 * If a MN diskset always set multi_node
2431 				 * regardless of whether the -M option was
2432 				 * used or not (mflag).
2433 				 */
2434 				multi_node = 1;
2435 			} else {
2436 				/*
2437 				 * If a traditional diskset, mflag must
2438 				 * not be set.
2439 				 */
2440 				if (mflag) {
2441 					usage(sp, gettext(
2442 					    "-M option only allowed "
2443 					    "on multi-owner diskset"));
2444 				}
2445 			}
2446 		} else {
2447 			/*
2448 			 * Set name does not exist, set multi_node
2449 			 * based on -M option.
2450 			 */
2451 			if (mflag) {
2452 				multi_node = 1;
2453 			}
2454 		}
2455 	}
2456 
2457 	if (auto_take && multi_node) {
2458 		/* Can't mix multinode and auto-take on a diskset */
2459 		usage(sp,
2460 		    gettext("-A option not allowed on multi-owner diskset"));
2461 	}
2462 
2463 	/*
2464 	 * MN disksets don't use DCS clustering services, so
2465 	 * do not get primary_node for MN diskset since no command
2466 	 * proxying is done to Primary cluster node.  Do not proxy
2467 	 * MN diskset commands of join and withdraw when issued without
2468 	 * a valid setname.
2469 	 * For traditional disksets: proxy all commands except a take
2470 	 * and release.  Use first host listed as the host to send the
2471 	 * command to if there isn't already a primary
2472 	 */
2473 	if (strcmp(sname, MD_LOCAL_NAME) && (multi_node == 0) &&
2474 	    (cmd != take) && (cmd != release) &&
2475 	    (cmd != cluster) && (cmd != join) &&
2476 	    (cmd != withdraw) && (cmd != purge)) {
2477 		stat = sdssc_get_primary_host(sname, primary_node,
2478 		    SDSSC_NODE_NAME_LEN);
2479 		switch (stat) {
2480 			case SDSSC_ERROR:
2481 				return (0);
2482 
2483 			case SDSSC_NO_SERVICE:
2484 				if (hostname != SDSSC_PROXY_PRIMARY) {
2485 					(void) strlcpy(primary_node, hostname,
2486 					    SDSSC_NODE_NAME_LEN);
2487 				} else {
2488 					memset(primary_node, '\0',
2489 					    SDSSC_NODE_NAME_LEN);
2490 				}
2491 				break;
2492 		}
2493 
2494 		/*
2495 		 * We've got a complicated decision here regarding
2496 		 * the hostname. If we didn't get a primary host
2497 		 * and a host name wasn't supplied on the command line
2498 		 * then we need to revert to SDSSC_PROXY_PRIMARY. Otherwise
2499 		 * use what's been found.
2500 		 */
2501 		if (sdssc_cmd_proxy(argc, argv,
2502 		    primary_node[0] == '\0' ?
2503 			SDSSC_PROXY_PRIMARY : primary_node,
2504 		    &error) == SDSSC_PROXY_DONE) {
2505 			exit(error);
2506 		}
2507 	}
2508 
2509 	/* cluster-specific commands */
2510 	if (cmd == cluster) {
2511 		if (multi_node) {
2512 			/*
2513 			 * If a specific MN diskset is given, immediately
2514 			 * fail -C command.
2515 			 */
2516 			usage(sp, gettext(
2517 			    "-C option not allowed on multi-owner diskset"));
2518 		} else {
2519 			parse_cluster(argc, argv);
2520 			/*NOTREACHED*/
2521 		}
2522 	}
2523 
2524 	/* join MultiNode diskset */
2525 	if (cmd == join) {
2526 		/*
2527 		 * If diskset specified, verify that it exists
2528 		 * and is a multinode diskset.
2529 		 */
2530 		if (strcmp(sname, MD_LOCAL_NAME)) {
2531 			if ((sp = metasetname(sname, ep)) == NULL) {
2532 				mde_perror(ep, "");
2533 				md_exit(sp, 1);
2534 			}
2535 
2536 			if (!multi_node) {
2537 				usage(sp, gettext(
2538 				    "-j option only allowed on "
2539 				    "multi-owner diskset"));
2540 			}
2541 		}
2542 		/*
2543 		 * Start mddoors daemon here.
2544 		 * mddoors itself takes care there will be only one
2545 		 * instance running, so starting it twice won't hurt
2546 		 */
2547 		pclose(popen("/usr/lib/lvm/mddoors", "w"));
2548 		parse_joinset(argc, argv);
2549 		/*NOTREACHED*/
2550 	}
2551 
2552 	/* withdraw from MultiNode diskset */
2553 	if (cmd == withdraw) {
2554 		/*
2555 		 * If diskset specified, verify that it exists
2556 		 * and is a multinode diskset.
2557 		 */
2558 		if (strcmp(sname, MD_LOCAL_NAME)) {
2559 			if ((sp = metasetname(sname, ep)) == NULL) {
2560 				mde_perror(ep, "");
2561 				md_exit(sp, 1);
2562 			}
2563 
2564 			if (!multi_node) {
2565 				usage(sp, gettext(
2566 				    "-w option only allowed on "
2567 				    "multi-owner diskset"));
2568 			}
2569 		}
2570 		parse_withdrawset(argc, argv);
2571 		/*NOTREACHED*/
2572 	}
2573 
2574 	/* must have set for everything else */
2575 	if (strcmp(sname, MD_LOCAL_NAME) == 0)
2576 		usage(sp, gettext("setname must be specified"));
2577 
2578 	/* add hosts or drives */
2579 	if (cmd == add) {
2580 		/*
2581 		 * In the multi node case start mddoors daemon.
2582 		 * mddoors itself takes care there will be
2583 		 * only one instance running, so starting it twice won't hurt
2584 		 */
2585 		if (multi_node) {
2586 			pclose(popen("/usr/lib/lvm/mddoors", "w"));
2587 		}
2588 
2589 		parse_add(argc, argv);
2590 		/*NOTREACHED*/
2591 	}
2592 
2593 	/* re-balance the replicas */
2594 	if (cmd == balance) {
2595 		parse_balance(argc, argv);
2596 		/*NOTREACHED*/
2597 	}
2598 
2599 	/* delete hosts or drives */
2600 	if (cmd == delete) {
2601 		parse_del(argc, argv);
2602 		/*NOTREACHED*/
2603 	}
2604 
2605 	/* check ownership */
2606 	if (cmd == isowner) {
2607 		parse_isowner(argc, argv);
2608 		/*NOTREACHED*/
2609 	}
2610 
2611 	/* purge the diskset */
2612 	if (cmd == purge) {
2613 		parse_purge(argc, argv);
2614 		/*NOTREACHED*/
2615 	}
2616 
2617 	/* query for data marks */
2618 	if (cmd == query) {
2619 		parse_query(argc, argv);
2620 		/*NOTREACHED*/
2621 	}
2622 
2623 	/* release ownership */
2624 	if (cmd == release) {
2625 		if (multi_node) {
2626 			/* Can't release multinode diskset */
2627 			usage(sp, gettext(
2628 			    "-r option not allowed on multi-owner diskset"));
2629 		} else {
2630 			parse_releaseset(argc, argv);
2631 			/*NOTREACHED*/
2632 		}
2633 	}
2634 
2635 	/* take ownership */
2636 	if (cmd == take) {
2637 		if (multi_node) {
2638 			/* Can't take multinode diskset */
2639 			usage(sp, gettext(
2640 			    "-t option not allowed on multi-owner diskset"));
2641 		} else {
2642 			parse_takeset(argc, argv);
2643 			/*NOTREACHED*/
2644 		}
2645 	}
2646 
2647 	/* take ownership of auto-take sets */
2648 	if (auto_take) {
2649 		parse_autotake(argc, argv);
2650 		/*NOTREACHED*/
2651 	}
2652 
2653 	/*NOTREACHED*/
2654 	return (0);
2655 }
2656