xref: /titanic_41/usr/src/cmd/lvm/util/metaset.c (revision 70025d765b044c6d8594bb965a2247a61e991a99)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Metadevice diskset utility.
31  */
32 
33 #include <meta.h>
34 #include <sys/lvm/md_mddb.h>
35 #include <sdssc.h>
36 
37 enum metaset_cmd {
38 	notspecified,
39 	add,
40 	balance,
41 	delete,
42 	cluster,
43 	isowner,
44 	purge,
45 	query,
46 	release,
47 	take,
48 	join,			/* Join a multinode diskset */
49 	withdraw		/* Withdraw from a multinode diskset */
50 };
51 
52 enum cluster_cmd {
53 	ccnotspecified,
54 	clusterversion,		/* Return the version of the cluster I/F */
55 	clusterdisksin,		/* List disks in a given diskset */
56 	clustertake,		/* back door for Cluster take */
57 	clusterrelease,		/* ditto */
58 	clusterpurge,		/* back door for Cluster purge */
59 	clusterproxy		/* proxy the args after '--' to primary */
60 };
61 
62 static void
63 usage(
64 	mdsetname_t	*sp,
65 	char		*string)
66 {
67 	if ((string != NULL) && (*string != '\0'))
68 		md_eprintf("%s\n", string);
69 	(void) fprintf(stderr, gettext(
70 "usage:	%s -s setname -a [-A enable | disable] -h hostname ...\n"
71 "	%s -s setname -a [-M] -h hostname ...\n"
72 "	%s -s setname -a [-M] [-l length] [-L] drivename ...\n"
73 "	%s -s setname -d [-M] -h hostname ...\n"
74 "	%s -s setname -d [-M] -f -h all-hostnames\n"
75 "	%s -s setname -d [-M] [-f] drivename ...\n"
76 "	%s -s setname -d [-M] [-f] hostname ...\n"
77 "	%s -s setname -A enable | disable\n"
78 "	%s -s setname -t [-f]\n"
79 "	%s -s setname -r\n"
80 "	%s [-s setname] -j [-M]\n"
81 "	%s [-s setname] -w [-M]\n"
82 "	%s -s setname -P [-M]\n"
83 "	%s -s setname -b [-M]\n"
84 "	%s -s setname -o [-M] [-h hostname]\n"
85 "	%s [-s setname]\n"
86 "\n"
87 "		hostname = contents of /etc/nodename\n"
88 "		drivename = cNtNdN no slice\n"
89 "		[-M] for multi-owner set is optional except on set creation\n"),
90 	myname, myname, myname, myname, myname, myname, myname, myname,
91 	myname, myname, myname, myname, myname, myname, myname, myname);
92 	md_exit(sp, (string == NULL) ? 0 : 1);
93 }
94 
95 /*
96  * The svm.sync rc script relies heavily on the metaset output.
97  * Any changes to the metaset output MUST verify that the rc script
98  * does not break. Not doing so may potentially leave the system
99  * unusable. You have been WARNED.
100  */
101 static int
102 printset(mdsetname_t *sp, md_error_t *ep)
103 {
104 	int			i, j;
105 	md_set_desc		*sd;
106 	md_drive_desc		*dd, *p;
107 	int			max_meds;
108 	md_mnnode_desc		*nd;
109 
110 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
111 		return (-1);
112 
113 	/*
114 	 * Only get set owner information for traditional diskset.
115 	 * This set owner information is stored in the node records
116 	 * for a MN diskset.
117 	 */
118 	if (!(MD_MNSET_DESC(sd))) {
119 		if (metaget_setownership(sp, ep) == -1)
120 			return (-1);
121 	}
122 
123 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
124 	    ep)) == NULL) && !mdisok(ep))
125 		return (-1);
126 
127 	if (MD_MNSET_DESC(sd)) {
128 		(void) printf(gettext(
129 		"\nMulti-owner Set name = %s, Set number = %d, Master = %s\n"),
130 		    sp->setname, sp->setno, sd->sd_mn_master_nodenm);
131 		if ((sd->sd_mn_master_nodeid == MD_MN_INVALID_NID) &&
132 		    (dd != NULL)) {
133 			(void) printf(gettext(
134 				"Master and owner information unavailable "
135 				"until joined (metaset -j)\n"));
136 		}
137 	} else {
138 		(void) printf(gettext(
139 		    "\nSet name = %s, Set number = %d\n"),
140 		    sp->setname, sp->setno);
141 	}
142 
143 	if (MD_MNSET_DESC(sd)) {
144 		(void) printf(gettext("\n%-19.19s %-14.14s %-6.6s\n"),
145 			gettext("Host"), gettext("Owner"), gettext("Member"));
146 		nd = sd->sd_nodelist;
147 		while (nd) {
148 			/*
149 			 * Don't print nodes that aren't ok since they may be
150 			 * removed from config during a reconfig cycle.  If a
151 			 * node was being added to a diskset and the entire
152 			 * cluster went down but the node being added was unable
153 			 * to reboot, there's no way to know if that node had
154 			 * its own node record set to OK or not.  So, node
155 			 * record is left in ADD state during reconfig cycle.
156 			 * When that node reboots and returns to the cluster,
157 			 * the reconfig cycle will either remove the node
158 			 * record (if not marked OK on that node) or will mark
159 			 * it OK on all nodes.
160 			 * It is very important to only remove a node record
161 			 * from the other nodes when that node record is not
162 			 * marked OK on its own node - otherwise, different
163 			 * nodes would have different nodelists possibly
164 			 * causing different nodes to to choose different
165 			 * masters.
166 			 */
167 			if (!(nd->nd_flags & MD_MN_NODE_OK)) {
168 				nd = nd->nd_next;
169 				continue;
170 			}
171 			if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
172 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
173 				(void) printf(
174 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
175 				    nd->nd_nodename, gettext("multi-owner"),
176 				    gettext("Yes"));
177 			} else /* Should never be able to happen */
178 			    if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
179 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
180 				(void) printf(
181 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
182 				    nd->nd_nodename, gettext("multi-owner"),
183 				    gettext("No"));
184 			} else if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
185 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
186 				(void) printf(
187 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
188 				    nd->nd_nodename, gettext(""),
189 				    gettext("Yes"));
190 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
191 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
192 				(void) printf(
193 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
194 				    nd->nd_nodename, gettext(""),
195 				    gettext("No"));
196 			}
197 			nd = nd->nd_next;
198 		}
199 	} else {
200 		(void) printf("\n%-19.19s %-5.5s\n",
201 			gettext("Host"), gettext("Owner"));
202 		for (i = 0; i < MD_MAXSIDES; i++) {
203 			/* Skip empty slots */
204 			if (sd->sd_nodes[i][0] == '\0')
205 				continue;
206 
207 			/*
208 			 * Standard hostname field is 17 bytes but metaset will
209 			 * display up to MD_MAX_NODENAME, def in meta_basic.h
210 			 */
211 			(void) printf("  %-17.*s  %s\n", MD_MAX_NODENAME,
212 			    sd->sd_nodes[i], (sd->sd_flags & MD_SR_AUTO_TAKE ?
213 				(sd->sd_isown[i] ? gettext("Yes (auto)") :
214 				    gettext("No (auto)"))
215 				: (sd->sd_isown[i] ? gettext("Yes") : "")));
216 		}
217 	}
218 
219 	if (sd->sd_med.n_cnt > 0)
220 		(void) printf("\n%-19.19s %-7.7s\n",
221 		    gettext("Mediator Host(s)"), gettext("Aliases"));
222 
223 	if ((max_meds = get_max_meds(ep)) == 0)
224 		return (-1);
225 
226 	for (i = 0; i < max_meds; i++) {
227 		if (sd->sd_med.n_lst[i].a_cnt == 0)
228 			continue;
229 		(void) printf("  %-17.17s   ", sd->sd_med.n_lst[i].a_nm[0]);
230 		for (j = 1; j < sd->sd_med.n_lst[i].a_cnt; j++) {
231 			(void) printf("%s", sd->sd_med.n_lst[i].a_nm[j]);
232 			if (sd->sd_med.n_lst[i].a_cnt - j > 1)
233 				(void) printf(gettext(", "));
234 		}
235 		(void) printf("\n");
236 	}
237 
238 	if (dd) {
239 		int	len = 0;
240 
241 
242 		/*
243 		 * Building a format string on the fly that will
244 		 * be used in (f)printf. This allows the length
245 		 * of the ctd to vary from small to large without
246 		 * looking horrible.
247 		 */
248 		for (p = dd; p != NULL; p = p->dd_next)
249 			len = max(len, strlen(p->dd_dnp->cname));
250 
251 		len += 2;
252 		(void) printf("\n%-*.*s %-5.5s\n", len, len,
253 		    gettext("Drive"),
254 		    gettext("Dbase"));
255 		for (p = dd; p != NULL; p = p->dd_next) {
256 			(void) printf("\n%-*.*s %-5.5s\n", len, len,
257 			    p->dd_dnp->cname,
258 			    (p->dd_dbcnt ? gettext("Yes") :
259 			    gettext("No")));
260 		}
261 	}
262 
263 	return (0);
264 }
265 
266 static int
267 printsets(mdsetname_t *sp, md_error_t *ep)
268 {
269 	int			i;
270 	mdsetname_t		*sp1;
271 	set_t			max_sets;
272 
273 	/*
274 	 * print setname given.
275 	 */
276 	if (! metaislocalset(sp)) {
277 		if (printset(sp, ep))
278 			return (-1);
279 		return (0);
280 	}
281 
282 	if ((max_sets = get_max_sets(ep)) == 0)
283 		return (-1);
284 
285 	/*
286 	 * Print all known sets
287 	 */
288 	for (i = 1; i < max_sets; i++) {
289 		if ((sp1 = metasetnosetname(i, ep)) == NULL) {
290 			if (! mdiserror(ep, MDE_NO_SET))
291 				break;
292 			mdclrerror(ep);
293 			continue;
294 		}
295 
296 		if (printset(sp1, ep))
297 			break;
298 	}
299 	if (! mdisok(ep))
300 		return (-1);
301 
302 	return (0);
303 }
304 
305 /*
306  * Print the current versionn of the cluster contract private interface.
307  */
308 static void
309 printclusterversion()
310 {
311 	printf("%s\n", METASETIFVERSION);
312 }
313 
314 /*
315  * Print the disks that make up the given disk set. This is used
316  * exclusively by Sun Cluster and is contract private.
317  * Should never be called with sname of a Multinode diskset.
318  */
319 static int
320 printdisksin(char *sname, md_error_t *ep)
321 {
322 	mdsetname_t	*sp;
323 	md_drive_desc	*dd, *p;
324 
325 	if ((sp = metasetname(sname, ep)) == NULL) {
326 
327 		/*
328 		 * During a deletion of a set the associated service is
329 		 * put offline. The SC3.0 reservation code calls disksuite
330 		 * to find a list of disks associated with the set so that
331 		 * it can release the reservation on those disks. In this
332 		 * case there won't be any disks or even a set left. So just
333 		 * return.
334 		 */
335 		return (0);
336 	}
337 
338 	if (metaget_setownership(sp, ep) == -1)
339 		return (-1);
340 
341 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
342 	    ep)) == NULL) && !mdisok(ep))
343 		return (-1);
344 
345 	for (p = dd; p != NULL; p = p->dd_next)
346 		(void) printf("%s\n", p->dd_dnp->rname);
347 
348 	return (0);
349 }
350 
351 static void
352 parse_printset(int argc, char **argv)
353 {
354 	int		c;
355 	mdsetname_t	*sp = NULL;
356 	char		*sname = MD_LOCAL_NAME;
357 	md_error_t	status = mdnullerror;
358 	md_error_t	*ep = &status;
359 
360 	/* reset and parse args */
361 	optind = 1;
362 	opterr = 1;
363 	while ((c = getopt(argc, argv, "s:")) != -1) {
364 		switch (c) {
365 		case 's':
366 			sname = optarg;
367 			break;
368 		default:
369 			usage(sp, gettext("unknown options"));
370 		}
371 	}
372 
373 	argc -= optind;
374 	argv += optind;
375 
376 	if (argc != 0)
377 		usage(sp, gettext("too many args"));
378 
379 	if ((sp = metasetname(sname, ep)) == NULL) {
380 		mde_perror(ep, "");
381 		md_exit(sp, 1);
382 	}
383 
384 	if (printsets(sp, ep) && !mdiserror(ep, MDE_SMF_NO_SERVICE)) {
385 		mde_perror(ep, "");
386 		md_exit(sp, 1);
387 	}
388 
389 	if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) {
390 		mde_perror(ep, "");
391 		md_exit(sp, 1);
392 	}
393 
394 	md_exit(sp, 0);
395 }
396 
397 static void
398 parse_add(int argc, char **argv)
399 {
400 	int			c,
401 				created_set,
402 				hosts = FALSE,
403 				meds = FALSE,
404 				auto_take = FALSE,
405 				force_label = FALSE,
406 				default_size = TRUE;
407 	mdsetname_t		*sp = NULL;
408 	char			*sname = MD_LOCAL_NAME;
409 	md_error_t		status = mdnullerror,
410 				*ep = &status;
411 	mddrivenamelist_t	*dnlp = NULL;
412 	mddrivenamelist_t	*p;
413 	daddr_t			dbsize,
414 				nblks;
415 	mdsetname_t		*local_sp = NULL;
416 	int			multi_node = 0;
417 	md_set_desc		*sd;
418 	rval_e			sdssc_rval;
419 
420 	/* reset and parse args */
421 	optind = 1;
422 	opterr = 1;
423 	while ((c = getopt(argc, argv, "MaA:hl:Lms:")) != -1) {
424 		switch (c) {
425 		case 'M':
426 			multi_node = 1;
427 			break;
428 		case 'A':
429 			/* verified sub-option in main */
430 			if (strcmp(optarg, "enable") == 0)
431 				auto_take = TRUE;
432 			break;
433 		case 'a':
434 			break;
435 		case 'h':
436 		case 'm':
437 			if (meds == TRUE || hosts == TRUE)
438 				usage(sp, gettext(
439 				    "only one -m or -h option allowed"));
440 
441 			if (default_size == FALSE || force_label == TRUE)
442 				usage(sp, gettext(
443 				    "conflicting options"));
444 
445 			if (c == 'h')
446 				hosts = TRUE;
447 			else
448 				meds = TRUE;
449 			break;
450 		case 'l':
451 			if (hosts == TRUE || meds == TRUE)
452 				usage(sp, gettext(
453 				    "conflicting options"));
454 			if (sscanf(optarg, "%ld", &dbsize) != 1) {
455 				md_eprintf(gettext(
456 				    "%s: bad format\n"), optarg);
457 				usage(sp, "");
458 			}
459 
460 			default_size = FALSE;
461 			break;
462 		case 'L':
463 			/* Same criteria as -l */
464 			if (hosts == TRUE || meds == TRUE)
465 				usage(sp, gettext(
466 				    "conflicting options"));
467 			force_label = TRUE;
468 			break;
469 		case 's':
470 			sname = optarg;
471 			break;
472 		default:
473 			usage(sp, gettext(
474 			    "unknown options"));
475 		}
476 	}
477 
478 	/* Can only use -A enable when creating the single-node set */
479 	if (auto_take && hosts != TRUE)
480 		usage(sp, gettext("conflicting options"));
481 
482 	argc -= optind;
483 	argv += optind;
484 
485 	/*
486 	 * Add hosts
487 	 */
488 	if (hosts == TRUE) {
489 
490 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
491 			mde_perror(ep, "");
492 			md_exit(local_sp, 1);
493 		}
494 
495 		if (meta_lock(local_sp, TRUE, ep) != 0) {
496 			mde_perror(ep, "");
497 			md_exit(local_sp, 1);
498 		}
499 
500 		/*
501 		 * Keep track of Cluster set creation. Need to complete
502 		 * the transaction no matter if the set was created or not.
503 		 */
504 		created_set = 0;
505 
506 		/*
507 		 * Have no set, cannot take the lock, so only take the
508 		 * local lock.
509 		 */
510 		if ((sp = metasetname(sname, ep)) == NULL) {
511 			sdssc_rval = 0;
512 			if (multi_node) {
513 				/*
514 				 * When running on a cluster system that
515 				 * does not support MN disksets, the routine
516 				 * sdssc_mo_create_begin will be bound
517 				 * to the SVM routine not_bound_error
518 				 * which returns SDSSC_NOT_BOUND_ERROR.
519 				 *
520 				 * When running on a cluster system that
521 				 * does support MN disksets, the routine
522 				 * sdssc_mo_create_begin will be bound to
523 				 * the sdssc_mo_create_begin routine in
524 				 * library libsdssc_so.  A call to
525 				 * sdssc_mo_create_begin will return with
526 				 * either SDSSC_ERROR or SDSSC_OKAY. If
527 				 * an SDSSC_OKAY is returned, then the
528 				 * cluster framework has allocated a
529 				 * set number for this new set that is unique
530 				 * across traditional and MN disksets.
531 				 * Libmeta will get this unique set number
532 				 * by calling sdssc_get_index.
533 				 *
534 				 * When running on a non-cluster system,
535 				 * the routine sdssc_mo_create_begin
536 				 * will be bound to the SVM routine
537 				 * not_bound which returns SDSSC_NOT_BOUND.
538 				 * In this case, all sdssc routines will
539 				 * return SDSSC_NOT_BOUND.  No need to check
540 				 * for return value of SDSSC_NOT_BOUND since
541 				 * the libmeta call to get the set number
542 				 * (sdssc_get_index) will also fail with
543 				 * SDSSC_NOT_BOUND causing libmeta to
544 				 * determine its own set number.
545 				 */
546 				sdssc_rval = sdssc_mo_create_begin(sname, argc,
547 					argv, SDSSC_PICK_SETNO);
548 				if (sdssc_rval == SDSSC_NOT_BOUND_ERROR) {
549 					mderror(ep, MDE_NOT_MN, NULL);
550 					mde_perror(ep,
551 					"Cluster node does not support "
552 					"multi-owner diskset operations");
553 					md_exit(local_sp, 1);
554 				} else if (sdssc_rval == SDSSC_ERROR) {
555 					mde_perror(ep, "");
556 					md_exit(local_sp, 1);
557 				}
558 			} else {
559 				sdssc_rval = sdssc_create_begin(sname, argc,
560 					argv, SDSSC_PICK_SETNO);
561 				if (sdssc_rval == SDSSC_ERROR) {
562 					mde_perror(ep, "");
563 					md_exit(local_sp, 1);
564 				}
565 			}
566 			/*
567 			 * Created diskset (as opposed to adding a
568 			 * host to an existing diskset).
569 			 */
570 			created_set = 1;
571 
572 			sp = Zalloc(sizeof (*sp));
573 			sp->setname = Strdup(sname);
574 			sp->lockfd = MD_NO_LOCK;
575 			mdclrerror(ep);
576 		} else {
577 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
578 				mde_perror(ep, "");
579 				md_exit(local_sp, 1);
580 			}
581 			if (MD_MNSET_DESC(sd)) {
582 				multi_node = 1;
583 			}
584 
585 			/*
586 			 * can't add hosts to an existing set & enable
587 			 * auto-take
588 			 */
589 			if (auto_take)
590 				usage(sp, gettext("conflicting options"));
591 
592 			/*
593 			 * Have a valid set, take the set lock also.
594 			 *
595 			 * A MN diskset does not use the set meta_lock but
596 			 * instead uses the clnt_lock of rpc.metad and the
597 			 * suspend/resume feature of the rpc.mdcommd.  Can't
598 			 * use set meta_lock since class 1 messages are
599 			 * grabbing this lock and if this thread is holding
600 			 * the set meta_lock then no rpc.mdcommd suspend
601 			 * can occur.
602 			 */
603 			if (!multi_node) {
604 				if (meta_lock(sp, TRUE, ep) != 0) {
605 					mde_perror(ep, "");
606 					md_exit(local_sp, 1);
607 				}
608 			}
609 		}
610 
611 		if (meta_set_addhosts(sp, multi_node, argc, argv, auto_take,
612 		    ep)) {
613 			if (created_set)
614 				sdssc_create_end(sname, SDSSC_CLEANUP);
615 			mde_perror(&status, "");
616 			if (!multi_node)
617 				(void) meta_unlock(sp, ep);
618 			md_exit(local_sp, 1);
619 		}
620 
621 		if (created_set)
622 			sdssc_create_end(sname, SDSSC_COMMIT);
623 
624 		else {
625 			/*
626 			 * If adding hosts to existing diskset,
627 			 * call DCS svcs
628 			 */
629 			sdssc_add_hosts(sname, argc, argv);
630 		}
631 		if (!multi_node)
632 			(void) meta_unlock(sp, ep);
633 		md_exit(local_sp, 0);
634 	}
635 
636 	/*
637 	 * Add mediators
638 	 */
639 	if (meds == TRUE) {
640 
641 		if ((sp = metasetname(sname, ep)) == NULL) {
642 			mde_perror(ep, "");
643 			md_exit(local_sp, 1);
644 		}
645 
646 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
647 			mde_perror(ep, "");
648 			md_exit(local_sp, 1);
649 		}
650 
651 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
652 			mde_perror(ep, "");
653 			md_exit(local_sp, 1);
654 		}
655 		if (MD_MNSET_DESC(sd)) {
656 			multi_node = 1;
657 		}
658 
659 		if (meta_lock(local_sp, TRUE, ep) != 0) {
660 			mde_perror(ep, "");
661 			md_exit(local_sp, 1);
662 		}
663 		/*
664 		 * A MN diskset does not use the set meta_lock but
665 		 * instead uses the clnt_lock of rpc.metad and the
666 		 * suspend/resume feature of the rpc.mdcommd.  Can't
667 		 * use set meta_lock since class 1 messages are
668 		 * grabbing this lock and if this thread is holding
669 		 * the set meta_lock then no rpc.mdcommd suspend
670 		 * can occur.
671 		 */
672 		if (!multi_node) {
673 			if (meta_lock(sp, TRUE, ep) != 0) {
674 				mde_perror(ep, "");
675 				md_exit(local_sp, 1);
676 			}
677 		}
678 
679 		if (meta_set_addmeds(sp, argc, argv, ep)) {
680 			mde_perror(&status, "");
681 			if (!multi_node)
682 				(void) meta_unlock(sp, ep);
683 			md_exit(local_sp, 1);
684 		}
685 
686 		if (!multi_node)
687 			(void) meta_unlock(sp, ep);
688 		md_exit(local_sp, 0);
689 	}
690 
691 	/*
692 	 * Add drives
693 	 */
694 	if ((sp = metasetname(sname, ep)) == NULL) {
695 		mde_perror(ep, "");
696 		md_exit(local_sp, 1);
697 	}
698 
699 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
700 		mde_perror(ep, "");
701 		md_exit(local_sp, 1);
702 	}
703 
704 	/* Determine if diskset is a MN diskset or not */
705 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
706 		mde_perror(ep, "");
707 		md_exit(local_sp, 1);
708 	}
709 	if (MD_MNSET_DESC(sd)) {
710 		multi_node = 1;
711 	}
712 
713 	if (meta_lock(local_sp, TRUE, ep) != 0) {
714 		mde_perror(ep, "");
715 		md_exit(local_sp, 1);
716 	}
717 
718 	/* Make sure database size is within limits */
719 	if (default_size == FALSE) {
720 		if ((multi_node && dbsize < MDDB_MN_MINBLKS) ||
721 		    (!multi_node && dbsize < MDDB_MINBLKS))
722 			usage(sp, gettext(
723 			    "size (-l) is too small"));
724 
725 		if ((multi_node && dbsize > MDDB_MN_MAXBLKS) ||
726 		    (!multi_node && dbsize > MDDB_MAXBLKS))
727 			usage(sp, gettext(
728 			    "size (-l) is too big"));
729 	}
730 
731 	/*
732 	 * Have a valid set, take the set lock also.
733 	 *
734 	 * A MN diskset does not use the set meta_lock but
735 	 * instead uses the clnt_lock of rpc.metad and the
736 	 * suspend/resume feature of the rpc.mdcommd.  Can't
737 	 * use set meta_lock since class 1 messages are
738 	 * grabbing this lock and if this thread is holding
739 	 * the set meta_lock then no rpc.mdcommd suspend
740 	 * can occur.
741 	 */
742 	if (!multi_node) {
743 		if (meta_lock(sp, TRUE, ep) != 0) {
744 			mde_perror(ep, "");
745 			md_exit(local_sp, 1);
746 		}
747 	}
748 
749 
750 	/*
751 	 * If using the default size,
752 	 *   then let's adjust the default to the minimum
753 	 *   size currently in use.
754 	 */
755 	if (default_size) {
756 		dbsize = multi_node ? MD_MN_DBSIZE : MD_DBSIZE;
757 		if ((nblks = meta_db_minreplica(sp, ep)) < 0)
758 			mdclrerror(ep);
759 		else
760 			dbsize = nblks;	/* adjust replica size */
761 	}
762 
763 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
764 		mde_perror(ep, "");
765 		if (!multi_node)
766 			(void) meta_unlock(sp, ep);
767 		md_exit(local_sp, 1);
768 	}
769 
770 	if (c == 0) {
771 		md_perror(gettext(
772 		    "No drives specified to add.\n"));
773 		if (!multi_node)
774 			(void) meta_unlock(sp, ep);
775 		md_exit(local_sp, 1);
776 	}
777 
778 	if (meta_set_adddrives(sp, dnlp, dbsize, force_label, ep)) {
779 		metafreedrivenamelist(dnlp);
780 		mde_perror(ep, "");
781 		if (!multi_node)
782 			(void) meta_unlock(sp, ep);
783 		md_exit(local_sp, 1);
784 	}
785 
786 	/*
787 	 * MN disksets don't have a device id in the master block
788 	 * For traditional disksets, check for the drive device
789 	 * id not fitting in the master block
790 	 */
791 	if (!multi_node) {
792 		for (p = dnlp; p != NULL; p = p->next) {
793 			int 		fd;
794 			ddi_devid_t	devid;
795 			mdname_t	*np;
796 
797 			np = metaslicename(p->drivenamep, 0, ep);
798 			if (np == NULL)
799 				continue;
800 
801 			if ((fd = open(np->rname, O_RDONLY | O_NDELAY)) < 0)
802 				continue;
803 
804 			if (devid_get(fd, &devid) == 0) {
805 				size_t len;
806 
807 				len = devid_sizeof(devid);
808 				if (len > (DEV_BSIZE - sizeof (mddb_mb_t)))
809 					(void) mddserror(ep,
810 					    MDE_DS_NOTSELFIDENTIFY, NULL, NULL,
811 					    np->rname, NULL);
812 				devid_free(devid);
813 			} else {
814 				(void) mddserror(ep, MDE_DS_NOTSELFIDENTIFY,
815 				    NULL, NULL, np->rname, NULL);
816 			}
817 			(void) close(fd);
818 		}
819 	}
820 
821 	/*
822 	 * MN disksets don't use DCS clustering services.
823 	 * For traditional disksets:
824 	 * There's not really much we can do here if this call fails.
825 	 * The drives have been added to the set and DiskSuite believes
826 	 * it owns the drives.
827 	 * Relase the set and hope for the best.
828 	 */
829 	if ((!multi_node) &&
830 	    (sdssc_notify_service(sname, Make_Primary) == SDSSC_ERROR)) {
831 		meta_set_release(sp, ep);
832 		printf(gettext(
833 		    "Sun Clustering failed to make set primary\n"));
834 	}
835 
836 	metafreedrivenamelist(dnlp);
837 	if (!multi_node)
838 		(void) meta_unlock(sp, ep);
839 	md_exit(local_sp, 0);
840 }
841 
842 static void
843 parse_balance(int argc, char **argv)
844 {
845 	int		c;
846 	mdsetname_t	*sp = NULL;
847 	char		*sname = MD_LOCAL_NAME;
848 	md_error_t	status = mdnullerror;
849 	md_set_desc	*sd;
850 	int		multi_node = 0;
851 
852 	/* reset and parse args */
853 	optind = 1;
854 	opterr = 1;
855 	while ((c = getopt(argc, argv, "Mbs:")) != -1) {
856 		switch (c) {
857 		case 'M':
858 			break;
859 		case 'b':
860 			break;
861 		case 's':
862 			sname = optarg;
863 			break;
864 		default:
865 			usage(sp, gettext("unknown options"));
866 		}
867 	}
868 
869 	argc -= optind;
870 	argv += optind;
871 
872 	if (argc != 0)
873 		usage(sp, gettext("too many args"));
874 
875 	if ((sp = metasetname(sname, &status)) == NULL) {
876 		mde_perror(&status, "");
877 		md_exit(sp, 1);
878 	}
879 	if ((sd = metaget_setdesc(sp, &status)) == NULL) {
880 		mde_perror(&status, "");
881 		md_exit(sp, 1);
882 	}
883 	if (MD_MNSET_DESC(sd)) {
884 		multi_node = 1;
885 	}
886 	/*
887 	 * Have a valid set, take the set lock also.
888 	 *
889 	 * A MN diskset does not use the set meta_lock but
890 	 * instead uses the clnt_lock of rpc.metad and the
891 	 * suspend/resume feature of the rpc.mdcommd.  Can't
892 	 * use set meta_lock since class 1 messages are
893 	 * grabbing this lock and if this thread is holding
894 	 * the set meta_lock then no rpc.mdcommd suspend
895 	 * can occur.
896 	 */
897 	if (!multi_node) {
898 		if (meta_lock(sp, TRUE, &status) != 0) {
899 			mde_perror(&status, "");
900 			md_exit(sp, 1);
901 		}
902 	}
903 
904 	if (meta_set_balance(sp, &status) != 0) {
905 		mde_perror(&status, "");
906 		md_exit(sp, 1);
907 	}
908 	md_exit(sp, 0);
909 }
910 
911 static void
912 parse_autotake(int argc, char **argv)
913 {
914 	int			c;
915 	int			enable = 0;
916 	mdsetname_t		*sp = NULL;
917 	char			*sname = MD_LOCAL_NAME;
918 	md_error_t		status = mdnullerror;
919 	md_error_t		*ep = &status;
920 
921 	/* reset and parse args */
922 	optind = 1;
923 	opterr = 1;
924 	while ((c = getopt(argc, argv, "A:s:")) != -1) {
925 		switch (c) {
926 		case 'A':
927 			/* verified sub-option in main */
928 			if (strcmp(optarg, "enable") == 0)
929 				enable = 1;
930 			break;
931 		case 's':
932 			/* verified presence of setname in main */
933 			sname = optarg;
934 			break;
935 		default:
936 			usage(sp, gettext("unknown options"));
937 		}
938 	}
939 
940 	if ((sp = metasetname(sname, ep)) == NULL) {
941 		mde_perror(ep, "");
942 		md_exit(sp, 1);
943 	}
944 
945 	if (meta_lock(sp, TRUE, ep) != 0) {
946 		mde_perror(ep, "");
947 		md_exit(sp, 1);
948 	}
949 
950 	if (meta_check_ownership(sp, ep) != 0) {
951 		mde_perror(ep, "");
952 		md_exit(sp, 1);
953 	}
954 
955 	if (meta_set_auto_take(sp, enable, ep) != 0) {
956 		mde_perror(ep, "");
957 		md_exit(sp, 1);
958 	}
959 
960 	md_exit(sp, 0);
961 }
962 
963 static void
964 parse_del(int argc, char **argv)
965 {
966 	int			c;
967 	mdsetname_t		*sp = NULL;
968 	char			*sname = MD_LOCAL_NAME;
969 	int			hosts = FALSE;
970 	int			meds = FALSE;
971 	int			forceflg = FALSE;
972 	md_error_t		status = mdnullerror;
973 	md_error_t		*ep = &status;
974 	mddrivenamelist_t	*dnlp = NULL;
975 	mdsetname_t		*local_sp = NULL;
976 	md_set_desc		*sd;
977 	int			multi_node = 0;
978 
979 	/* reset and parse args */
980 	optind = 1;
981 	opterr = 1;
982 	while ((c = getopt(argc, argv, "Mdfhms:")) != -1) {
983 		switch (c) {
984 		case 'M':
985 			break;
986 		case 'd':
987 			break;
988 		case 'f':
989 			forceflg = TRUE;
990 			break;
991 		case 'h':
992 		case 'm':
993 			if (meds == TRUE || hosts == TRUE)
994 				usage(sp, gettext(
995 				    "only one -m or -h option allowed"));
996 
997 			if (c == 'h')
998 				hosts = TRUE;
999 			else
1000 				meds = TRUE;
1001 			break;
1002 		case 's':
1003 			sname = optarg;
1004 			break;
1005 		default:
1006 			usage(sp, gettext("unknown options"));
1007 		}
1008 	}
1009 
1010 	argc -= optind;
1011 	argv += optind;
1012 
1013 	if ((sp = metasetname(sname, ep)) == NULL) {
1014 		mde_perror(ep, "");
1015 		md_exit(local_sp, 1);
1016 	}
1017 
1018 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1019 		mde_perror(ep, "");
1020 		md_exit(local_sp, 1);
1021 	}
1022 
1023 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1024 		mde_perror(ep, "");
1025 		md_exit(local_sp, 1);
1026 	}
1027 	if (MD_MNSET_DESC(sd))
1028 		multi_node = 1;
1029 
1030 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1031 		mde_perror(ep, "");
1032 		md_exit(local_sp, 1);
1033 	}
1034 
1035 	/*
1036 	 * Have a valid set, take the set lock also.
1037 	 *
1038 	 * A MN diskset does not use the set meta_lock but
1039 	 * instead uses the clnt_lock of rpc.metad and the
1040 	 * suspend/resume feature of the rpc.mdcommd.  Can't
1041 	 * use set meta_lock since class 1 messages are
1042 	 * grabbing this lock and if this thread is holding
1043 	 * the set meta_lock then no rpc.mdcommd suspend
1044 	 * can occur.
1045 	 */
1046 	if (!multi_node) {
1047 		if (meta_lock(sp, TRUE, ep) != 0) {
1048 			mde_perror(ep, "");
1049 			md_exit(local_sp, 1);
1050 		}
1051 	}
1052 
1053 	/*
1054 	 * Delete hosts
1055 	 */
1056 	if (hosts == TRUE) {
1057 		if (meta_check_ownership(sp, ep) != 0) {
1058 			/*
1059 			 * If we don't own the set bail out here otherwise
1060 			 * we could delete the node from the DCS service
1061 			 * yet not delete the host from the set.
1062 			 */
1063 			mde_perror(ep, "");
1064 			if (!multi_node)
1065 				(void) meta_unlock(sp, ep);
1066 			md_exit(local_sp, 1);
1067 		}
1068 		if (sdssc_delete_hosts(sname, argc, argv) == SDSSC_ERROR) {
1069 		    if (!metad_isautotakebyname(sname)) {
1070 			/*
1071 			 * SC could have been installed after the set was
1072 			 * created.  We still want to be able to delete these
1073 			 * sets.
1074 			 */
1075 			md_perror(gettext(
1076 			    "Failed to delete hosts from DCS service"));
1077 			if (!multi_node)
1078 				(void) meta_unlock(sp, ep);
1079 			md_exit(local_sp, 1);
1080 		    }
1081 		}
1082 		if (meta_set_deletehosts(sp, argc, argv, forceflg, ep)) {
1083 			if (sdssc_add_hosts(sname, argc, argv) == SDSSC_ERROR) {
1084 				(void) printf(gettext(
1085 				    "Failed to restore host(s) in DCS "
1086 				    "database\n"));
1087 			}
1088 			mde_perror(ep, "");
1089 			if (!multi_node)
1090 				(void) meta_unlock(sp, ep);
1091 			md_exit(local_sp, 1);
1092 		}
1093 		if (!multi_node)
1094 			(void) meta_unlock(sp, ep);
1095 		md_exit(local_sp, 0);
1096 	}
1097 
1098 	/*
1099 	 * Delete mediators
1100 	 */
1101 	if (meds == TRUE) {
1102 		if (meta_set_deletemeds(sp, argc, argv, forceflg, ep)) {
1103 			mde_perror(ep, "");
1104 			if (!multi_node)
1105 				(void) meta_unlock(sp, ep);
1106 			md_exit(local_sp, 1);
1107 		}
1108 		if (!multi_node)
1109 			(void) meta_unlock(sp, ep);
1110 		md_exit(local_sp, 0);
1111 	}
1112 
1113 	/*
1114 	 * Delete drives
1115 	 */
1116 
1117 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
1118 		mde_perror(ep, "");
1119 		if (!multi_node)
1120 			(void) meta_unlock(sp, ep);
1121 		md_exit(local_sp, 1);
1122 	}
1123 
1124 	if (c == 0) {
1125 		md_perror(gettext(
1126 		    "No drives specified to delete.\n"));
1127 		if (!multi_node)
1128 			(void) meta_unlock(sp, ep);
1129 		md_exit(local_sp, 1);
1130 	}
1131 
1132 	if (meta_set_deletedrives(sp, dnlp, forceflg, ep)) {
1133 		metafreedrivenamelist(dnlp);
1134 		mde_perror(ep, "");
1135 		if (!multi_node)
1136 			(void) meta_unlock(sp, ep);
1137 		md_exit(local_sp, 1);
1138 	}
1139 
1140 	metafreedrivenamelist(dnlp);
1141 	if (!multi_node)
1142 		(void) meta_unlock(sp, ep);
1143 	md_exit(local_sp, 0);
1144 }
1145 
1146 static void
1147 parse_isowner(int argc, char **argv)
1148 {
1149 	int		c;
1150 	mdsetname_t	*sp = NULL;
1151 	char		*sname = MD_LOCAL_NAME;
1152 	md_error_t	status = mdnullerror;
1153 	md_error_t	*ep = &status;
1154 	char		*host = NULL;
1155 
1156 	/* reset and parse args */
1157 	optind = 1;
1158 	opterr = 1;
1159 	while ((c = getopt(argc, argv, "Moh:s:")) != -1) {
1160 		switch (c) {
1161 		case 'M':
1162 			break;
1163 		case 'o':
1164 			break;
1165 		case 'h':
1166 			if (host != NULL) {
1167 				usage(sp, gettext(
1168 				    "only one -h option allowed"));
1169 			}
1170 			host = optarg;
1171 			break;
1172 		case 's':
1173 			sname = optarg;
1174 			break;
1175 		default:
1176 			usage(sp, gettext("unknown options"));
1177 		}
1178 	}
1179 
1180 	argc -= optind;
1181 	argv += optind;
1182 
1183 	if (argc != 0)
1184 		usage(sp, gettext("too many args"));
1185 
1186 	if ((sp = metasetname(sname, ep)) == NULL) {
1187 		mde_perror(ep, "");
1188 		md_exit(sp, 1);
1189 	}
1190 
1191 	if (host == NULL) {
1192 		if (meta_check_ownership(sp, ep) != 0) {
1193 			mde_perror(ep, "");
1194 			md_exit(sp, 1);
1195 		}
1196 	} else {
1197 		if (meta_check_ownership_on_host(sp, host, ep) != 0) {
1198 			mde_perror(ep, "");
1199 			md_exit(sp, 1);
1200 		}
1201 	}
1202 	md_exit(sp, 0);
1203 }
1204 
1205 static void
1206 parse_purge(int argc, char **argv)
1207 {
1208 	int		c;
1209 	mdsetname_t	*sp = NULL;
1210 	mdsetname_t	*local_sp = NULL;
1211 	md_drive_desc	*dd;
1212 	char		*sname = MD_LOCAL_NAME;
1213 	char		*thishost = mynode();
1214 	md_error_t	status = mdnullerror;
1215 	md_error_t	*ep = &status;
1216 	int		bypass_cluster_purge = 0;
1217 	int		forceflg = FALSE;
1218 	int		ret = 0;
1219 	int		multi_node = 0;
1220 	md_set_desc		*sd;
1221 
1222 	optind = 1;
1223 	opterr = 1;
1224 	while ((c = getopt(argc, argv, "C:fPs:")) != -1) {
1225 		switch (c) {
1226 		case 'M':
1227 			break;
1228 		case 'C':
1229 			bypass_cluster_purge = 1;
1230 			break;
1231 		case 'f':
1232 			forceflg = TRUE;
1233 			break;
1234 		case 'P':
1235 			break;
1236 		case 's':
1237 			sname = optarg;
1238 			break;
1239 		default:
1240 			usage(sp, gettext("unknown options"));
1241 		}
1242 	}
1243 
1244 	argc -= optind;
1245 	argv += optind;
1246 
1247 	if (argc != 0)
1248 		usage(sp, gettext("too many arguments"));
1249 
1250 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1251 		mde_perror(ep, "");
1252 		md_exit(local_sp, 1);
1253 	}
1254 
1255 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1256 		mde_perror(ep, "");
1257 		md_exit(local_sp, 1);
1258 	}
1259 
1260 	if ((sp = metasetname(sname, ep)) == NULL) {
1261 		mde_perror(ep, "");
1262 		md_exit(sp, 1);
1263 	}
1264 
1265 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1266 		mde_perror(ep, "");
1267 		md_exit(local_sp, 1);
1268 	}
1269 	if (MD_MNSET_DESC(sd))
1270 		multi_node = 1;
1271 
1272 	if (!multi_node) {
1273 		if (meta_lock(sp, TRUE, ep) != 0) {
1274 			mde_perror(ep, "");
1275 			md_exit(local_sp, 1);
1276 		}
1277 	}
1278 
1279 	/* Must not own the set if purging it from this host */
1280 	if (meta_check_ownership(sp, ep) == 0) {
1281 		/*
1282 		 * Need to see if there are disks in the set, if not then
1283 		 * there is no ownership but meta_check_ownership returns 0
1284 		 */
1285 		dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
1286 		if (!mdisok(ep)) {
1287 			mde_perror(ep, "");
1288 			if (!multi_node)
1289 				(void) meta_unlock(sp, ep);
1290 			md_exit(local_sp, 1);
1291 		}
1292 		if (dd != NULL) {
1293 			(void) printf(gettext
1294 			    ("Must not be owner of the set when purging it\n"));
1295 			if (!multi_node)
1296 				(void) meta_unlock(sp, ep);
1297 			md_exit(local_sp, 1);
1298 		}
1299 	}
1300 	/*
1301 	 * Remove the node from the DCS service
1302 	 */
1303 	if (!bypass_cluster_purge) {
1304 		if (sdssc_delete_hosts(sname, 1, &thishost) == SDSSC_ERROR) {
1305 			md_perror(gettext
1306 			    ("Failed to purge hosts from DCS service"));
1307 			if (!multi_node)
1308 				(void) meta_unlock(sp, ep);
1309 			md_exit(local_sp, 1);
1310 		}
1311 	}
1312 
1313 	if ((ret = meta_set_purge(sp, bypass_cluster_purge, forceflg,
1314 	    ep)) != 0) {
1315 		if (!bypass_cluster_purge) {
1316 			if (sdssc_add_hosts(sname, 1, &thishost) ==
1317 			    SDSSC_ERROR) {
1318 				(void) printf(gettext(
1319 				    "Failed to restore host in DCS "
1320 				    "database\n"));
1321 			}
1322 		}
1323 		mde_perror(ep, "");
1324 		if (!multi_node)
1325 			(void) meta_unlock(sp, ep);
1326 		md_exit(local_sp, ret);
1327 	}
1328 
1329 	if (!multi_node)
1330 		(void) meta_unlock(sp, ep);
1331 	md_exit(local_sp, 0);
1332 }
1333 
1334 static void
1335 parse_query(int argc, char **argv)
1336 {
1337 	int		c;
1338 	mdsetname_t	*sp = NULL;
1339 	mddb_dtag_lst_t	*dtlp = NULL;
1340 	mddb_dtag_lst_t	*tdtlp;
1341 	char		*sname = MD_LOCAL_NAME;
1342 	md_error_t	status = mdnullerror;
1343 
1344 	/* reset and parse args */
1345 	optind = 1;
1346 	opterr = 1;
1347 	while ((c = getopt(argc, argv, "Mqs:")) != -1) {
1348 		switch (c) {
1349 		case 'M':
1350 			break;
1351 		case 'q':
1352 			break;
1353 		case 's':
1354 			sname = optarg;
1355 			break;
1356 		default:
1357 			usage(sp, gettext("unknown options"));
1358 		}
1359 	}
1360 
1361 	argc -= optind;
1362 	argv += optind;
1363 
1364 	if (argc != 0)
1365 		usage(sp, gettext("too many args"));
1366 
1367 	if ((sp = metasetname(sname, &status)) == NULL) {
1368 		mde_perror(&status, "");
1369 		md_exit(sp, 1);
1370 	}
1371 
1372 	if (meta_lock(sp, TRUE, &status) != 0) {
1373 		mde_perror(&status, "");
1374 		md_exit(sp, 1);
1375 	}
1376 
1377 	if (meta_set_query(sp, &dtlp, &status) != 0) {
1378 		mde_perror(&status, "");
1379 		md_exit(sp, 1);
1380 	}
1381 
1382 	if (dtlp != NULL)
1383 		(void) printf("The following tag(s) were found:\n");
1384 
1385 	for (tdtlp = dtlp; tdtlp != NULL; tdtlp = dtlp) {
1386 		dtlp = tdtlp->dtl_nx;
1387 		(void) printf("%2d - %s - %s", tdtlp->dtl_dt.dt_id,
1388 		    tdtlp->dtl_dt.dt_hn,
1389 		    ctime((long *)&tdtlp->dtl_dt.dt_tv.tv_sec));
1390 		Free(tdtlp);
1391 	}
1392 
1393 	md_exit(sp, 0);
1394 }
1395 
1396 /* Should never be called with sname of a Multinode diskset. */
1397 static void
1398 parse_releaseset(int argc, char **argv)
1399 {
1400 	int		c;
1401 	mdsetname_t	*sp = NULL;
1402 	md_error_t	status = mdnullerror;
1403 	md_error_t	*ep = &status;
1404 	char		*sname = MD_LOCAL_NAME;
1405 	int		no_lock = 0;
1406 	sdssc_boolean_e	cluster_release = SDSSC_False;
1407 	sdssc_version_t	vers;
1408 	rval_e		rval;
1409 	md_set_desc	*sd;
1410 
1411 	/* reset and parse args */
1412 	optind = 1;
1413 	opterr = 1;
1414 	while ((c = getopt(argc, argv, "C:ns:r")) != -1) {
1415 		switch (c) {
1416 		case 'C':
1417 			cluster_release = SDSSC_True;
1418 			break;
1419 		case 'n':
1420 			no_lock = 1;
1421 			break;
1422 		case 's':
1423 			sname = optarg;
1424 			break;
1425 		case 'r':
1426 			break;
1427 		default:
1428 			usage(sp, gettext("unknown options"));
1429 		}
1430 	}
1431 
1432 	argc -= optind;
1433 	argv += optind;
1434 
1435 	if (argc > 0)
1436 		usage(sp, gettext("too many args"));
1437 
1438 	memset(&vers, 0, sizeof (vers));
1439 
1440 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1441 	    (vers.major == 3) &&
1442 	    (cluster_release == SDSSC_False)) {
1443 
1444 		/*
1445 		 * If the release is being done by the user via the CLI
1446 		 * we need to notify the DCS to release this node as being
1447 		 * the primary. The reason nothing else needs to be done
1448 		 * is due to the fact that the reservation code will exec
1449 		 * metaset -C release to complete the operation.
1450 		 */
1451 		rval = sdssc_notify_service(sname, Release_Primary);
1452 		if (rval == SDSSC_ERROR) {
1453 			printf(gettext(
1454 			    "metaset: failed to notify DCS of release\n"));
1455 		}
1456 		md_exit(NULL, rval == SDSSC_ERROR);
1457 	}
1458 
1459 	if ((sp = metasetname(sname, ep)) == NULL) {
1460 
1461 		/*
1462 		 * It's entirely possible for the SC3.0 reservation code
1463 		 * to call for DiskSet to release a diskset and have that
1464 		 * diskset not exist. During a diskset removal DiskSuite
1465 		 * maybe able to remove all traces of the diskset before
1466 		 * the reservation code execs metaset -C release in which
1467 		 * case the metasetname will fail, but the overall command
1468 		 * shouldn't.
1469 		 */
1470 		if (vers.major == 3)
1471 			md_exit(sp, 0);
1472 		else {
1473 			mde_perror(ep, "");
1474 			md_exit(sp, 1);
1475 		}
1476 	}
1477 
1478 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1479 		mde_perror(ep, "");
1480 		md_exit(sp, 1);
1481 	}
1482 
1483 	if (sd->sd_flags & MD_SR_AUTO_TAKE) {
1484 		md_eprintf(gettext("cannot release auto-take diskset\n"));
1485 		md_exit(sp, 1);
1486 	}
1487 
1488 	if (meta_lock_nowait(sp, ep) != 0) {
1489 		if (no_lock) {
1490 			mdclrerror(ep);		/* continue */
1491 		} else {
1492 			mde_perror(ep, "");
1493 			md_exit(sp, 10);	/* special errcode */
1494 		}
1495 	}
1496 
1497 	if (meta_set_release(sp, ep)) {
1498 		mde_perror(ep, "");
1499 		md_exit(sp, 1);
1500 	}
1501 	md_exit(sp, 0);
1502 }
1503 
1504 /* Should never be called with sname of a Multinode diskset. */
1505 static void
1506 parse_takeset(int argc, char **argv)
1507 {
1508 	int		c;
1509 	mdsetname_t	*sp = NULL;
1510 	int		flags = 0;
1511 	char		*sname = MD_LOCAL_NAME;
1512 	mhd_mhiargs_t	mhiargs;
1513 	char 		*cp = NULL;
1514 	int		pos = -1;	/* position of timeout value */
1515 	int		usetag = 0;
1516 	static char	*nullopts[] = { NULL };
1517 	md_error_t	status = mdnullerror;
1518 	md_error_t	*ep = &status;
1519 	int		no_lock = 0;
1520 	sdssc_boolean_e	cluster_take = SDSSC_False;
1521 	sdssc_version_t	vers;
1522 	rval_e		rval;
1523 
1524 	/* reset and parse args */
1525 	optind = 1;
1526 	opterr = 1;
1527 	while ((c = getopt(argc, argv, "C:fns:tu:y")) != -1) {
1528 		switch (c) {
1529 		case 'C':
1530 			cluster_take = SDSSC_True;
1531 			break;
1532 		case 'f':
1533 			flags |= TAKE_FORCE;
1534 			break;
1535 		case 'n':
1536 			no_lock = 1;
1537 			break;
1538 		case 's':
1539 			sname = optarg;
1540 			break;
1541 		case 't':
1542 			break;
1543 		case 'u':
1544 			usetag = atoi(optarg);
1545 			flags |= TAKE_USETAG;
1546 			break;
1547 		case 'y':
1548 			flags |= TAKE_USEIT;
1549 			break;
1550 		default:
1551 			usage(sp, gettext("unknown options"));
1552 		}
1553 	}
1554 
1555 	mhiargs = defmhiargs;
1556 
1557 	argc -= optind;
1558 	argv += optind;
1559 
1560 	if (argc > 1)
1561 		usage(sp, gettext("too many args"));
1562 
1563 	/*
1564 	 * If we have a list of timeout value overrides, handle it here
1565 	 */
1566 	while (argv[0] != NULL && *argv[0] != '\0') {
1567 		/*
1568 		 * The use of the nullopts[] "token list" here is to make
1569 		 * getsubopts() simply parse a comma separated list
1570 		 * returning either "" or the contents of the field, the
1571 		 * end condition is exaustion of the initial string, which
1572 		 * is modified in the process.
1573 		 */
1574 		(void) getsubopt(&argv[0], nullopts, &cp);
1575 
1576 		c = 0;			/* re-use c as temp value of timeout */
1577 
1578 		if (*cp != '-')		/* '-' uses default */
1579 			c = atoi(cp);
1580 
1581 		if (c < 0) {
1582 			usage(sp, gettext(
1583 			    "time out values must be > 0"));
1584 		}
1585 
1586 		if (++pos > 3) {
1587 			usage(sp, gettext(
1588 			    "too many timeout values specified."));
1589 		}
1590 
1591 		if (c == 0)		/* 0 or "" field uses default */
1592 			continue;
1593 
1594 		/*
1595 		 * Assign temp value to appropriate structure member based on
1596 		 * its position in the comma separated list.
1597 		 */
1598 		switch (pos) {
1599 		    case 0:
1600 			mhiargs.mh_ff = c;
1601 			break;
1602 
1603 		    case 1:
1604 			mhiargs.mh_tk.reinstate_resv_delay = c;
1605 			break;
1606 
1607 		    case 2:
1608 			mhiargs.mh_tk.min_ownership_delay = c;
1609 			break;
1610 
1611 		    case 3:
1612 			mhiargs.mh_tk.max_ownership_delay = c;
1613 			break;
1614 		}
1615 	}
1616 
1617 	memset(&vers, 0, sizeof (vers));
1618 
1619 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1620 	    (vers.major == 3) &&
1621 	    (cluster_take == SDSSC_False)) {
1622 
1623 		/*
1624 		 * If the take is beging done by the user via the CLI we need
1625 		 * to notify the DCS to make this current node the primary.
1626 		 * The SC3.0 reservation code will in turn exec metaset with
1627 		 * the -C take arg to complete this operation.
1628 		 */
1629 		if ((rval = sdssc_notify_service(sname, Make_Primary)) ==
1630 		    SDSSC_ERROR) {
1631 			printf(gettext(
1632 			    "metaset: failed to notify DCS of take\n"));
1633 		}
1634 		md_exit(NULL, rval == SDSSC_ERROR);
1635 	}
1636 
1637 	if ((sp = metasetname(sname, ep)) == NULL) {
1638 		mde_perror(ep, "");
1639 		md_exit(sp, 1);
1640 	}
1641 
1642 	if ((vers.major == 3) && (meta_check_ownership(sp, ep) == 0)) {
1643 
1644 		/*
1645 		 * If we're running in a cluster environment and this
1646 		 * node already owns the set. Don't bother trying to
1647 		 * take the set again. There's one case where an adminstrator
1648 		 * is adding disks to a set for the first time. metaset
1649 		 * will take the ownership of the set at that point. During
1650 		 * that add operation SC3.0 notices activity on the device
1651 		 * and also tries to perform a take operation. The SC3.0 take
1652 		 * will fail because the adminstrative add has the set locked
1653 		 */
1654 		md_exit(sp, 0);
1655 	}
1656 
1657 	if (meta_lock_nowait(sp, ep) != 0) {
1658 		if (no_lock) {
1659 			mdclrerror(ep);
1660 		} else {
1661 			mde_perror(ep, "");
1662 			md_exit(sp, 10);	/* special errcode */
1663 		}
1664 	}
1665 
1666 	if (meta_set_take(sp, &mhiargs, flags, usetag, &status)) {
1667 		mde_perror(&status, "");
1668 		if (mdismddberror(&status, MDE_DB_TAGDATA))
1669 			md_exit(sp, 2);
1670 		if (mdismddberror(&status, MDE_DB_ACCOK))
1671 			md_exit(sp, 3);
1672 		if (mdismddberror(&status, MDE_DB_STALE))
1673 			md_exit(sp, 66);
1674 		md_exit(sp, 1);
1675 	}
1676 	md_exit(sp, 0);
1677 }
1678 
1679 /*
1680  * Joins a node to a specific set or to all multinode disksets known
1681  * by this node.  If set is specified then caller should have verified
1682  * that the set is a multinode diskset.
1683  *
1684  * If an error occurs, metaset exits with a 1.
1685  * If there is no error, metaset exits with a 0.
1686  */
1687 static void
1688 parse_joinset(int argc, char **argv)
1689 {
1690 	int		c;
1691 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1692 	char		*sname = MD_LOCAL_NAME;
1693 	md_error_t	status = mdnullerror;
1694 	md_error_t	*ep = &status;
1695 	md_set_desc	*sd;
1696 	char		buf[BUFSIZ];
1697 	char		*p = buf;
1698 	set_t		max_sets, setno;
1699 	int		err, cumm_err = 0;
1700 	size_t		bufsz;
1701 
1702 	bufsz = sizeof (buf);
1703 	/* reset and parse args */
1704 	optind = 1;
1705 	opterr = 1;
1706 	while ((c = getopt(argc, argv, "Ms:j")) != -1) {
1707 		switch (c) {
1708 		case 'M':
1709 			break;
1710 		case 'j':
1711 			break;
1712 		case 's':
1713 			sname = optarg;
1714 			break;
1715 		default:
1716 			usage(sp, gettext("unknown options"));
1717 		}
1718 	}
1719 
1720 	argc -= optind;
1721 	argv += optind;
1722 
1723 	if (argc > 1)
1724 		usage(sp, gettext("too many args"));
1725 
1726 	/*
1727 	 * If no setname option was used, then join all disksets
1728 	 * that this node knows about.   Attempt to join all
1729 	 * disksets that this node knows about.
1730 	 *
1731 	 * Additional text is added to the error messages during
1732 	 * this section of code in order to help the user understand
1733 	 * why the 'join of all sets' failed and which set caused
1734 	 * the failure.
1735 	 */
1736 
1737 	/*
1738 	 * Hold local set lock throughout this call to keep
1739 	 * other actions from interfering (such as creating a new
1740 	 * set, etc.).
1741 	 */
1742 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1743 		mde_perror(ep, "");
1744 		md_exit(sp, 1);
1745 	}
1746 
1747 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1748 		mde_perror(ep, "");
1749 		md_exit(local_sp, 1);
1750 	}
1751 
1752 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1753 		/*
1754 		 * If no set name is given, then walk through all sets
1755 		 * on this node which could include:
1756 		 * 	- MN disksets
1757 		 *	- traditional disksets
1758 		 *	- non-existent disksets
1759 		 * Attempt to join the MN disksets.
1760 		 * If the join of one set fails, print out an error message
1761 		 * about that set and continue the walk.
1762 		 */
1763 		if ((max_sets = get_max_sets(ep)) == 0) {
1764 			mde_perror(ep, "");
1765 			md_exit(local_sp, 1);
1766 		}
1767 
1768 		/* Start walking through all possible disksets */
1769 		for (setno = 1; setno < max_sets; setno++) {
1770 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1771 				if (mdiserror(ep, MDE_NO_SET)) {
1772 					/* No set for this setno - continue */
1773 					mdclrerror(ep);
1774 					continue;
1775 				} else {
1776 					(void) sprintf(p, gettext(
1777 					"Unable to get set %d information"),
1778 					    setno);
1779 					mde_perror(ep, p);
1780 					cumm_err = 1;
1781 					mdclrerror(ep);
1782 					continue;
1783 				}
1784 			}
1785 
1786 			/* If setname is there, set desc should exist. */
1787 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1788 				(void) snprintf(p, bufsz, gettext(
1789 				    "Unable to get set %s desc information"),
1790 				    sp->setname);
1791 				mde_perror(ep, p);
1792 				cumm_err = 1;
1793 				mdclrerror(ep);
1794 				continue;
1795 			}
1796 
1797 			/* Only check MN disksets */
1798 			if (!MD_MNSET_DESC(sd)) {
1799 				continue;
1800 			}
1801 
1802 			/*
1803 			 * Return value of 0 is success.
1804 			 * Return value of -1 means a failure.
1805 			 * Return value of -2 means set could not be
1806 			 * joined, but shouldn't cause an error.
1807 			 * Reasons would be:
1808 			 * 	- no drives in set
1809 			 * 	- node already joined to set
1810 			 * Return value of -3 means joined stale set.
1811 			 * Can't check for all reasons here
1812 			 * since set isn't locked yet across all
1813 			 * nodes in the cluster.  The call
1814 			 * to libmeta routine, meta_set_join, will
1815 			 * lock across the cluster and perform
1816 			 * the checks.
1817 			 */
1818 			if ((err = meta_set_join(sp, ep)) == -1) {
1819 				/* Print error of diskset join failure */
1820 				(void) snprintf(p, bufsz,
1821 				    gettext("Join to diskset %s failed"),
1822 				    sp->setname);
1823 				mde_perror(ep, p);
1824 				cumm_err = 1;
1825 				mdclrerror(ep);
1826 				continue;
1827 			}
1828 
1829 			if (err == -3) {
1830 				/* Print error of diskset join failure */
1831 				(void) snprintf(p, bufsz,
1832 				    gettext("Joined to stale diskset %s"),
1833 				    sp->setname);
1834 				mde_perror(ep, p);
1835 				mdclrerror(ep);
1836 			}
1837 
1838 			mdclrerror(ep);
1839 		}
1840 
1841 		md_exit(local_sp, cumm_err);
1842 	}
1843 
1844 	/*
1845 	 * Code for a specific set is much simpler.
1846 	 * Error messages don't need extra text since specific setname
1847 	 * was used.
1848 	 * Don't need to lock the local set, just the specific set given.
1849 	 */
1850 	if ((sp = metasetname(sname, ep)) == NULL) {
1851 		mde_perror(ep, "");
1852 		md_exit(local_sp, 1);
1853 	}
1854 
1855 	/*
1856 	 * Fail command if meta_set_join returns -1.
1857 	 *
1858 	 * Return of 0 means that node joined set.
1859 	 *
1860 	 * Return of -2 means that node was unable to
1861 	 * join a set since that set had no drives
1862 	 * or that had already joined the set.  No
1863 	 * need to fail the command for these reasons.
1864 	 *
1865 	 * Return of -3 means that set is stale.
1866 	 * Return a value of 66 to historically match traditional disksets.
1867 	 */
1868 	if ((err = meta_set_join(sp, ep)) == -1) {
1869 		mde_perror(&status, "");
1870 		md_exit(local_sp, 1);
1871 	}
1872 
1873 	if (err == -3) {
1874 		/* Print error of diskset join failure */
1875 		(void) snprintf(p, bufsz,
1876 		    gettext("Joined to stale diskset %s"),
1877 		    sp->setname);
1878 		mde_perror(&status, "");
1879 		md_exit(local_sp, 66);
1880 	}
1881 
1882 	md_exit(local_sp, 0);
1883 }
1884 
1885 /*
1886  * Withdraws a node from a specific set or from all multinode disksets known
1887  * by this node.  If set is specified then caller should have verified
1888  * that the set is a multinode diskset.
1889  *
1890  * If an error occurs, metaset exits with a 1.
1891  * If there is no error, metaset exits with a 0.
1892  */
1893 static void
1894 parse_withdrawset(int argc, char **argv)
1895 {
1896 	int		c;
1897 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1898 	char		*sname = MD_LOCAL_NAME;
1899 	md_error_t	status = mdnullerror;
1900 	md_error_t	*ep = &status;
1901 	char		buf[BUFSIZ];
1902 	char		*p = buf;
1903 	md_set_desc	*sd;
1904 	set_t		max_sets, setno;
1905 	int		err, cumm_err = 0;
1906 	size_t		bufsz;
1907 
1908 	bufsz = sizeof (buf);
1909 	/* reset and parse args */
1910 	optind = 1;
1911 	opterr = 1;
1912 	while ((c = getopt(argc, argv, "Ms:w")) != -1) {
1913 		switch (c) {
1914 		case 'M':
1915 			break;
1916 		case 'w':
1917 			break;
1918 		case 's':
1919 			sname = optarg;
1920 			break;
1921 		default:
1922 			usage(sp, gettext("unknown options"));
1923 		}
1924 	}
1925 
1926 	argc -= optind;
1927 	argv += optind;
1928 
1929 	if (argc > 1)
1930 		usage(sp, gettext("too many args"));
1931 
1932 	/*
1933 	 * If no setname option was used, then withdraw from all disksets
1934 	 * that this node knows about.
1935 	 *
1936 	 * Additional text is added to the error messages during
1937 	 * this section of code in order to help the user understand
1938 	 * why the 'withdraw from all sets' failed and which set caused
1939 	 * the failure.
1940 	 */
1941 
1942 	/*
1943 	 * Hold local set lock throughout this call to keep
1944 	 * other actions from interfering (such as creating a new
1945 	 * set, etc.).
1946 	 */
1947 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1948 		mde_perror(ep, "");
1949 		md_exit(sp, 1);
1950 	}
1951 
1952 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1953 		mde_perror(ep, "");
1954 		md_exit(local_sp, 1);
1955 	}
1956 
1957 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1958 		/*
1959 		 * If no set name is given, then walk through all sets
1960 		 * on this node which could include:
1961 		 * 	- MN disksets
1962 		 *	- traditional disksets
1963 		 *	- non-existent disksets
1964 		 * Attempt to withdraw from the MN disksets.
1965 		 * If the withdraw of one set fails, print out an error
1966 		 * message about that set and continue the walk.
1967 		 */
1968 		if ((max_sets = get_max_sets(ep)) == 0) {
1969 			mde_perror(ep, "");
1970 			md_exit(local_sp, 1);
1971 		}
1972 
1973 		/* Start walking through all possible disksets */
1974 		for (setno = 1; setno < max_sets; setno++) {
1975 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1976 				if (mdiserror(ep, MDE_NO_SET)) {
1977 					/* No set for this setno - continue */
1978 					mdclrerror(ep);
1979 					continue;
1980 				} else {
1981 					(void) sprintf(p, gettext(
1982 					    "Unable to get set %d information"),
1983 					    setno);
1984 					mde_perror(ep, p);
1985 					cumm_err = 1;
1986 					mdclrerror(ep);
1987 					continue;
1988 				}
1989 			}
1990 
1991 			/* If setname is there, set desc should exist. */
1992 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1993 				(void) snprintf(p, bufsz, gettext(
1994 				    "Unable to get set %s desc information"),
1995 				    sp->setname);
1996 				mde_perror(ep, p);
1997 				cumm_err = 1;
1998 				mdclrerror(ep);
1999 				continue;
2000 			}
2001 
2002 			/* Only check MN disksets */
2003 			if (!MD_MNSET_DESC(sd)) {
2004 				continue;
2005 			}
2006 
2007 			/*
2008 			 * Return value of 0 is success.
2009 			 * Return value of -1 means a failure.
2010 			 * Return value of -2 means set could not be
2011 			 * withdrawn from, but this shouldn't cause
2012 			 * an error.  Reasons would be:
2013 			 * 	- no drives in set
2014 			 * 	- node already withdrawn from set
2015 			 * Can't check for all reasons here
2016 			 * since set isn't locked yet across all
2017 			 * nodes in the cluster.  The call
2018 			 * to libmeta routine, meta_set_withdraw, will
2019 			 * lock across the cluster and perform
2020 			 * the checks.
2021 			 */
2022 			if ((err = meta_set_withdraw(sp, ep)) == -1) {
2023 				/* Print error of diskset withdraw failure */
2024 				(void) snprintf(p, bufsz,
2025 				    gettext("Withdraw from diskset %s failed"),
2026 				    sp->setname);
2027 				mde_perror(ep, p);
2028 				mdclrerror(ep);
2029 				cumm_err = 1;
2030 				continue;
2031 			}
2032 
2033 			if (err == -2) {
2034 				mdclrerror(ep);
2035 				continue;
2036 			}
2037 
2038 			mdclrerror(ep);
2039 		}
2040 		md_exit(local_sp, cumm_err);
2041 	}
2042 
2043 
2044 	/*
2045 	 * Code for a specific set is much simpler.
2046 	 * Error messages don't need extra text since specific setname
2047 	 * was used.
2048 	 * Don't need to lock the local set, just the specific set given.
2049 	 */
2050 	if ((sp = metasetname(sname, ep)) == NULL) {
2051 		mde_perror(ep, "");
2052 		md_exit(local_sp, 1);
2053 	}
2054 
2055 	/*
2056 	 * Fail command if meta_set_withdraw returns -1.
2057 	 *
2058 	 * Return of 0 means that node withdrew from set.
2059 	 *
2060 	 * Return of -2 means that node was unable to
2061 	 * withdraw from a set since that set had no drives
2062 	 * or node was not joined to set.  No
2063 	 * need to fail the command for these reasons.
2064 	 */
2065 	if (meta_set_withdraw(sp, ep) == -1) {
2066 		mde_perror(&status, "");
2067 		md_exit(local_sp, 1);
2068 	}
2069 
2070 	md_exit(local_sp, 0);
2071 }
2072 
2073 /*
2074  * Should never be called with sname of a Multinode diskset.
2075  */
2076 static void
2077 parse_cluster(int argc, char **argv)
2078 {
2079 	int			c,
2080 				error,
2081 				new_argc,
2082 				x;
2083 	enum cluster_cmd	cmd = ccnotspecified;
2084 	char			*hostname = SDSSC_PROXY_PRIMARY,
2085 				*argument = NULL,
2086 				*sname = MD_LOCAL_NAME,
2087 				primary_node[SDSSC_NODE_NAME_LEN],
2088 				**new_argv = NULL,
2089 				**np = NULL;
2090 	mdsetname_t		*sp = NULL;
2091 	md_error_t		status = mdnullerror;
2092 	md_error_t		*ep = &status;
2093 
2094 	/* reset and parse args */
2095 	optind = 1;
2096 	opterr = 1;
2097 	while ((c = getopt(argc, argv, "C:s:h:fntu:yr")) != -1) {
2098 		switch (c) {
2099 		case 'C':
2100 			if (cmd != ccnotspecified) {
2101 				md_exit(sp, -1);
2102 			}
2103 			argument = optarg;
2104 
2105 			if (strcmp(argument, "disksin") == 0) {
2106 				cmd = clusterdisksin;
2107 			} else if (strcmp(argument, "version") == 0) {
2108 				cmd = clusterversion;
2109 			} else if (strcmp(argument, "release") == 0) {
2110 				cmd = clusterrelease;
2111 			} else if (strcmp(argument, "take") == 0) {
2112 				cmd = clustertake;
2113 			} else if (strcmp(argument, "proxy") == 0) {
2114 				cmd = clusterproxy;
2115 			} else if (strcmp(argument, "purge") == 0) {
2116 				cmd = clusterpurge;
2117 			} else {
2118 				md_exit(sp, -1);
2119 			}
2120 
2121 			break;
2122 
2123 		case 'h':
2124 			hostname = optarg;
2125 			break;
2126 
2127 		case 's':
2128 			sname = optarg;
2129 			break;
2130 
2131 		case 'f':
2132 		case 'n':
2133 		case 't':
2134 		case 'u':
2135 		case 'y':
2136 		case 'r':
2137 			break;
2138 
2139 		default:
2140 			md_exit(sp, -1);
2141 		}
2142 	}
2143 
2144 	/* Now call the appropriate command function. */
2145 	switch (cmd) {
2146 	case clusterversion:
2147 	    printclusterversion();
2148 	    break;
2149 
2150 	case clusterdisksin:
2151 	    if (printdisksin(sname, ep)) {
2152 		md_exit(sp, -1);
2153 	    }
2154 	    break;
2155 
2156 	case clusterrelease:
2157 	    parse_releaseset(argc, argv);
2158 	    break;
2159 
2160 	case clustertake:
2161 	    parse_takeset(argc, argv);
2162 	    break;
2163 
2164 	case clusterproxy:
2165 		/* Should never get here if sname is for MN diskset */
2166 
2167 		if ((new_argv = calloc(argc, sizeof (char *))) == NULL) {
2168 			printf(gettext("Out of memory\n"));
2169 			md_exit(sp, 1);
2170 		}
2171 
2172 		np = new_argv;
2173 		new_argc = 0;
2174 		memset(primary_node, '\0', SDSSC_NODE_NAME_LEN);
2175 
2176 		for (x = 0; x < argc; x++) {
2177 			if (strcmp(argv[x], "-C") == 0) {
2178 
2179 				/*
2180 				 * Need to skip the '-C proxy' args so
2181 				 * just increase x by one and the work is
2182 				 * done.
2183 				 */
2184 				x++;
2185 			} else {
2186 				*np++ = strdup(argv[x]);
2187 				new_argc++;
2188 			}
2189 		}
2190 
2191 		switch (sdssc_get_primary_host(sname, primary_node,
2192 		    SDSSC_NODE_NAME_LEN)) {
2193 		case SDSSC_ERROR:
2194 			md_exit(sp, 1);
2195 			break;
2196 
2197 		case SDSSC_NO_SERVICE:
2198 			if (hostname != SDSSC_PROXY_PRIMARY) {
2199 				(void) strlcpy(primary_node, hostname,
2200 				    SDSSC_NODE_NAME_LEN);
2201 			}
2202 			break;
2203 		}
2204 
2205 		if (sdssc_cmd_proxy(new_argc, new_argv,
2206 		    primary_node[0] == '\0' ? SDSSC_PROXY_PRIMARY :
2207 		    primary_node, &error) == SDSSC_PROXY_DONE) {
2208 			md_exit(sp, error);
2209 		} else {
2210 			printf(gettext(
2211 			    "Couldn't proxy command\n"));
2212 			md_exit(sp, 1);
2213 		}
2214 		break;
2215 
2216 	case clusterpurge:
2217 		parse_purge(argc, argv);
2218 		break;
2219 
2220 	default:
2221 	    break;
2222 	}
2223 
2224 	md_exit(sp, 0);
2225 }
2226 
2227 /*
2228  * parse args and do it
2229  */
2230 int
2231 main(int argc, char *argv[])
2232 {
2233 	enum metaset_cmd	cmd = notspecified;
2234 	md_error_t		status = mdnullerror;
2235 	md_error_t		*ep = &status;
2236 	mdsetname_t		*sp = NULL;
2237 	char			*hostname = SDSSC_PROXY_PRIMARY,
2238 				*sname = MD_LOCAL_NAME,
2239 				*auto_take_option = NULL,
2240 				primary_node[SDSSC_NODE_NAME_LEN];
2241 	int			error,
2242 				c,
2243 				auto_take = FALSE,
2244 				stat;
2245 	md_set_desc		*sd;
2246 	int			mflag = 0;
2247 	int			multi_node = 0;
2248 	rval_e			sdssc_res;
2249 
2250 	/*
2251 	 * Get the locale set up before calling any other routines
2252 	 * with messages to ouput.  Just in case we're not in a build
2253 	 * environment, make sure that TEXT_DOMAIN gets set to
2254 	 * something.
2255 	 */
2256 #if !defined(TEXT_DOMAIN)
2257 #define	TEXT_DOMAIN "SYS_TEST"
2258 #endif
2259 	(void) setlocale(LC_ALL, "");
2260 	(void) textdomain(TEXT_DOMAIN);
2261 
2262 	sdssc_res = sdssc_bind_library();
2263 	if (sdssc_res == SDSSC_ERROR) {
2264 		printf(gettext(
2265 		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
2266 		exit(1);
2267 	}
2268 
2269 	/* initialize */
2270 	if (md_init(argc, argv, 0, 1, ep) != 0) {
2271 		mde_perror(ep, "");
2272 		md_exit(sp, 1);
2273 	}
2274 
2275 	optind = 1;
2276 	opterr = 1;
2277 
2278 	/*
2279 	 * NOTE: The "C" option is strictly for cluster use. it is not
2280 	 * and should not be documented for the customer. - JST
2281 	 */
2282 	while ((c = getopt(argc, argv, "C:MaA:bdfh:jl:Lm:noPqrs:tu:wy?"))
2283 	    != -1) {
2284 		switch (c) {
2285 		case 'M':
2286 			mflag = 1;
2287 			break;
2288 		case 'A':
2289 			auto_take = TRUE;
2290 			if (optarg == NULL || !(strcmp(optarg, "enable") == 0 ||
2291 			    strcmp(optarg, "disable") == 0))
2292 				usage(sp, gettext(
2293 				    "-A: enable or disable must be specified"));
2294 			auto_take_option = optarg;
2295 			break;
2296 		case 'a':
2297 			if (cmd != notspecified) {
2298 				usage(sp, gettext(
2299 				    "conflicting options"));
2300 			}
2301 			cmd = add;
2302 			break;
2303 		case 'b':
2304 			if (cmd != notspecified) {
2305 				usage(sp, gettext(
2306 				    "conflicting options"));
2307 			}
2308 			cmd = balance;
2309 			break;
2310 		case 'd':
2311 			if (cmd != notspecified) {
2312 				usage(sp, gettext(
2313 				    "conflicting options"));
2314 			}
2315 			cmd = delete;
2316 			break;
2317 		case 'C':	/* cluster commands */
2318 			if (cmd != notspecified) {
2319 				md_exit(sp, -1);    /* conflicting options */
2320 			}
2321 			cmd = cluster;
2322 			break;
2323 		case 'f':
2324 			break;
2325 		case 'h':
2326 			hostname = optarg;
2327 			break;
2328 		case 'j':
2329 			if (cmd != notspecified) {
2330 				usage(sp, gettext(
2331 				    "conflicting options"));
2332 			}
2333 			cmd = join;
2334 			break;
2335 		case 'l':
2336 			break;
2337 		case 'L':
2338 			break;
2339 		case 'm':
2340 			break;
2341 		case 'n':
2342 			break;
2343 		case 'o':
2344 			if (cmd != notspecified) {
2345 				usage(sp, gettext(
2346 				    "conflicting options"));
2347 			}
2348 			cmd = isowner;
2349 			break;
2350 		case 'P':
2351 			if (cmd != notspecified) {
2352 				usage(sp, gettext(
2353 				    "conflicting options"));
2354 			}
2355 			cmd = purge;
2356 			break;
2357 		case 'q':
2358 			if (cmd != notspecified) {
2359 				usage(sp, gettext(
2360 				    "conflicting options"));
2361 			}
2362 			cmd = query;
2363 			break;
2364 		case 'r':
2365 			if (cmd != notspecified) {
2366 				usage(sp, gettext(
2367 				    "conflicting options"));
2368 			}
2369 			cmd = release;
2370 			break;
2371 		case 's':
2372 			sname = optarg;
2373 			break;
2374 		case 't':
2375 			if (cmd != notspecified) {
2376 				usage(sp, gettext(
2377 				    "conflicting options"));
2378 			}
2379 			cmd = take;
2380 			break;
2381 		case 'u':
2382 			break;
2383 		case 'w':
2384 			if (cmd != notspecified) {
2385 				usage(sp, gettext(
2386 				    "conflicting options"));
2387 			}
2388 			cmd = withdraw;
2389 			break;
2390 		case 'y':
2391 			break;
2392 		case '?':
2393 			if (optopt == '?')
2394 				usage(sp, NULL);
2395 			/*FALLTHROUGH*/
2396 		default:
2397 			if (cmd == cluster) {    /* cluster is silent */
2398 				md_exit(sp, -1);
2399 			} else {
2400 				usage(sp, gettext(
2401 				    "unknown command"));
2402 			}
2403 		}
2404 	}
2405 
2406 	/* check if suncluster is installed and -A enable specified */
2407 	if (auto_take && sdssc_res != SDSSC_NOT_BOUND &&
2408 	    strcmp(auto_take_option, "enable") == 0) {
2409 	    md_eprintf(gettext(
2410 		"cannot enable auto-take when SunCluster is installed\n"));
2411 	    md_exit(sp, 1);
2412 	}
2413 
2414 	/*
2415 	 * At this point we know that if the -A enable option is specified
2416 	 * for an auto-take diskset that SC is not installed on the machine, so
2417 	 * all of the sdssc calls will just be no-ops.
2418 	 */
2419 
2420 	/* list sets */
2421 	if (cmd == notspecified && auto_take == FALSE) {
2422 		parse_printset(argc, argv);
2423 		/*NOTREACHED*/
2424 	}
2425 
2426 	if (meta_check_root(ep) != 0) {
2427 		mde_perror(ep, "");
2428 		md_exit(sp, 1);
2429 	}
2430 
2431 	/* snarf MDDB */
2432 	if (meta_setup_db_locations(ep) != 0) {
2433 		mde_perror(ep, "");
2434 		md_exit(sp, 1);
2435 	}
2436 
2437 	/*
2438 	 * If sname is a diskset - check for multi_node.
2439 	 * It is possible for sname to not exist.
2440 	 */
2441 	if (strcmp(sname, MD_LOCAL_NAME)) {
2442 		if ((sp = metasetname(sname, ep)) != NULL) {
2443 			/* Set exists - check for MN diskset */
2444 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2445 				mde_perror(ep, "");
2446 				md_exit(sp, 1);
2447 			}
2448 			if (MD_MNSET_DESC(sd)) {
2449 				/*
2450 				 * If a MN diskset always set multi_node
2451 				 * regardless of whether the -M option was
2452 				 * used or not (mflag).
2453 				 */
2454 				multi_node = 1;
2455 			} else {
2456 				/*
2457 				 * If a traditional diskset, mflag must
2458 				 * not be set.
2459 				 */
2460 				if (mflag) {
2461 					usage(sp, gettext(
2462 					    "-M option only allowed "
2463 					    "on multi-owner diskset"));
2464 				}
2465 			}
2466 		} else {
2467 			/*
2468 			 * Set name does not exist, set multi_node
2469 			 * based on -M option.
2470 			 */
2471 			if (mflag) {
2472 				multi_node = 1;
2473 			}
2474 		}
2475 	}
2476 
2477 	if (auto_take && multi_node) {
2478 		/* Can't mix multinode and auto-take on a diskset */
2479 		usage(sp,
2480 		    gettext("-A option not allowed on multi-owner diskset"));
2481 	}
2482 
2483 	/*
2484 	 * MN disksets don't use DCS clustering services, so
2485 	 * do not get primary_node for MN diskset since no command
2486 	 * proxying is done to Primary cluster node.  Do not proxy
2487 	 * MN diskset commands of join and withdraw when issued without
2488 	 * a valid setname.
2489 	 * For traditional disksets: proxy all commands except a take
2490 	 * and release.  Use first host listed as the host to send the
2491 	 * command to if there isn't already a primary
2492 	 */
2493 	if (strcmp(sname, MD_LOCAL_NAME) && (multi_node == 0) &&
2494 	    (cmd != take) && (cmd != release) &&
2495 	    (cmd != cluster) && (cmd != join) &&
2496 	    (cmd != withdraw) && (cmd != purge)) {
2497 		stat = sdssc_get_primary_host(sname, primary_node,
2498 		    SDSSC_NODE_NAME_LEN);
2499 		switch (stat) {
2500 			case SDSSC_ERROR:
2501 				return (0);
2502 
2503 			case SDSSC_NO_SERVICE:
2504 				if (hostname != SDSSC_PROXY_PRIMARY) {
2505 					(void) strlcpy(primary_node, hostname,
2506 					    SDSSC_NODE_NAME_LEN);
2507 				} else {
2508 					memset(primary_node, '\0',
2509 					    SDSSC_NODE_NAME_LEN);
2510 				}
2511 				break;
2512 		}
2513 
2514 		/*
2515 		 * We've got a complicated decision here regarding
2516 		 * the hostname. If we didn't get a primary host
2517 		 * and a host name wasn't supplied on the command line
2518 		 * then we need to revert to SDSSC_PROXY_PRIMARY. Otherwise
2519 		 * use what's been found.
2520 		 */
2521 		if (sdssc_cmd_proxy(argc, argv,
2522 		    primary_node[0] == '\0' ?
2523 			SDSSC_PROXY_PRIMARY : primary_node,
2524 		    &error) == SDSSC_PROXY_DONE) {
2525 			exit(error);
2526 		}
2527 	}
2528 
2529 	/* cluster-specific commands */
2530 	if (cmd == cluster) {
2531 		if (multi_node) {
2532 			/*
2533 			 * If a specific MN diskset is given, immediately
2534 			 * fail -C command.
2535 			 */
2536 			usage(sp, gettext(
2537 			    "-C option not allowed on multi-owner diskset"));
2538 		} else {
2539 			parse_cluster(argc, argv);
2540 			/*NOTREACHED*/
2541 		}
2542 	}
2543 
2544 	/* join MultiNode diskset */
2545 	if (cmd == join) {
2546 		/*
2547 		 * If diskset specified, verify that it exists
2548 		 * and is a multinode diskset.
2549 		 */
2550 		if (strcmp(sname, MD_LOCAL_NAME)) {
2551 			if ((sp = metasetname(sname, ep)) == NULL) {
2552 				mde_perror(ep, "");
2553 				md_exit(sp, 1);
2554 			}
2555 
2556 			if (!multi_node) {
2557 				usage(sp, gettext(
2558 				    "-j option only allowed on "
2559 				    "multi-owner diskset"));
2560 			}
2561 		}
2562 		/*
2563 		 * Start mddoors daemon here.
2564 		 * mddoors itself takes care there will be only one
2565 		 * instance running, so starting it twice won't hurt
2566 		 */
2567 		pclose(popen("/usr/lib/lvm/mddoors", "w"));
2568 		parse_joinset(argc, argv);
2569 		/*NOTREACHED*/
2570 	}
2571 
2572 	/* withdraw from MultiNode diskset */
2573 	if (cmd == withdraw) {
2574 		/*
2575 		 * If diskset specified, verify that it exists
2576 		 * and is a multinode diskset.
2577 		 */
2578 		if (strcmp(sname, MD_LOCAL_NAME)) {
2579 			if ((sp = metasetname(sname, ep)) == NULL) {
2580 				mde_perror(ep, "");
2581 				md_exit(sp, 1);
2582 			}
2583 
2584 			if (!multi_node) {
2585 				usage(sp, gettext(
2586 				    "-w option only allowed on "
2587 				    "multi-owner diskset"));
2588 			}
2589 		}
2590 		parse_withdrawset(argc, argv);
2591 		/*NOTREACHED*/
2592 	}
2593 
2594 	/* must have set for everything else */
2595 	if (strcmp(sname, MD_LOCAL_NAME) == 0)
2596 		usage(sp, gettext("setname must be specified"));
2597 
2598 	/* add hosts or drives */
2599 	if (cmd == add) {
2600 		/*
2601 		 * In the multi node case start mddoors daemon.
2602 		 * mddoors itself takes care there will be
2603 		 * only one instance running, so starting it twice won't hurt
2604 		 */
2605 		if (multi_node) {
2606 			pclose(popen("/usr/lib/lvm/mddoors", "w"));
2607 		}
2608 
2609 		parse_add(argc, argv);
2610 		/*NOTREACHED*/
2611 	}
2612 
2613 	/* re-balance the replicas */
2614 	if (cmd == balance) {
2615 		parse_balance(argc, argv);
2616 		/*NOTREACHED*/
2617 	}
2618 
2619 	/* delete hosts or drives */
2620 	if (cmd == delete) {
2621 		parse_del(argc, argv);
2622 		/*NOTREACHED*/
2623 	}
2624 
2625 	/* check ownership */
2626 	if (cmd == isowner) {
2627 		parse_isowner(argc, argv);
2628 		/*NOTREACHED*/
2629 	}
2630 
2631 	/* purge the diskset */
2632 	if (cmd == purge) {
2633 		parse_purge(argc, argv);
2634 		/*NOTREACHED*/
2635 	}
2636 
2637 	/* query for data marks */
2638 	if (cmd == query) {
2639 		parse_query(argc, argv);
2640 		/*NOTREACHED*/
2641 	}
2642 
2643 	/* release ownership */
2644 	if (cmd == release) {
2645 		if (multi_node) {
2646 			/* Can't release multinode diskset */
2647 			usage(sp, gettext(
2648 			    "-r option not allowed on multi-owner diskset"));
2649 		} else {
2650 			parse_releaseset(argc, argv);
2651 			/*NOTREACHED*/
2652 		}
2653 	}
2654 
2655 	/* take ownership */
2656 	if (cmd == take) {
2657 		if (multi_node) {
2658 			/* Can't take multinode diskset */
2659 			usage(sp, gettext(
2660 			    "-t option not allowed on multi-owner diskset"));
2661 		} else {
2662 			parse_takeset(argc, argv);
2663 			/*NOTREACHED*/
2664 		}
2665 	}
2666 
2667 	/* take ownership of auto-take sets */
2668 	if (auto_take) {
2669 		parse_autotake(argc, argv);
2670 		/*NOTREACHED*/
2671 	}
2672 
2673 	/*NOTREACHED*/
2674 	return (0);
2675 }
2676