xref: /titanic_41/usr/src/cmd/lvm/util/metaset.c (revision c892ebf1bef94f4f922f282c11516677c134dbe0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Metadevice diskset utility.
30  */
31 
32 #include <meta.h>
33 #include <sys/lvm/md_mddb.h>
34 #include <sdssc.h>
35 
36 enum metaset_cmd {
37 	notspecified,
38 	add,
39 	balance,
40 	delete,
41 	cluster,
42 	isowner,
43 	purge,
44 	query,
45 	release,
46 	take,
47 	join,			/* Join a multinode diskset */
48 	withdraw		/* Withdraw from a multinode diskset */
49 };
50 
51 enum cluster_cmd {
52 	ccnotspecified,
53 	clusterversion,		/* Return the version of the cluster I/F */
54 	clusterdisksin,		/* List disks in a given diskset */
55 	clustertake,		/* back door for Cluster take */
56 	clusterrelease,		/* ditto */
57 	clusterpurge,		/* back door for Cluster purge */
58 	clusterproxy		/* proxy the args after '--' to primary */
59 };
60 
61 static void
62 usage(
63 	mdsetname_t	*sp,
64 	char		*string)
65 {
66 	if ((string != NULL) && (*string != '\0'))
67 		md_eprintf("%s\n", string);
68 	(void) fprintf(stderr, gettext(
69 "usage:	%s -s setname -a [-A enable | disable] -h hostname ...\n"
70 "	%s -s setname -a [-M] -h hostname ...\n"
71 "	%s -s setname -a [-M] [-l length] [-L] drivename ...\n"
72 "	%s -s setname -d [-M] -h hostname ...\n"
73 "	%s -s setname -d [-M] -f -h all-hostnames\n"
74 "	%s -s setname -d [-M] [-f] drivename ...\n"
75 "	%s -s setname -d [-M] [-f] hostname ...\n"
76 "	%s -s setname -A enable | disable\n"
77 "	%s -s setname -t [-f]\n"
78 "	%s -s setname -r\n"
79 "	%s [-s setname] -j [-M]\n"
80 "	%s [-s setname] -w [-M]\n"
81 "	%s -s setname -P [-M]\n"
82 "	%s -s setname -b [-M]\n"
83 "	%s -s setname -o [-M] [-h hostname]\n"
84 "	%s [-s setname]\n"
85 "\n"
86 "		hostname = contents of /etc/nodename\n"
87 "		drivename = cNtNdN no slice\n"
88 "		[-M] for multi-owner set is optional except on set creation\n"),
89 	myname, myname, myname, myname, myname, myname, myname, myname,
90 	myname, myname, myname, myname, myname, myname, myname, myname);
91 	md_exit(sp, (string == NULL) ? 0 : 1);
92 }
93 
94 /*
95  * The svm.sync rc script relies heavily on the metaset output.
96  * Any changes to the metaset output MUST verify that the rc script
97  * does not break. Not doing so may potentially leave the system
98  * unusable. You have been WARNED.
99  */
100 static int
101 printset(mdsetname_t *sp, md_error_t *ep)
102 {
103 	int			i, j;
104 	md_set_desc		*sd;
105 	md_drive_desc		*dd, *p;
106 	int			max_meds;
107 	md_mnnode_desc		*nd;
108 
109 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
110 		return (-1);
111 
112 	/*
113 	 * Only get set owner information for traditional diskset.
114 	 * This set owner information is stored in the node records
115 	 * for a MN diskset.
116 	 */
117 	if (!(MD_MNSET_DESC(sd))) {
118 		if (metaget_setownership(sp, ep) == -1)
119 			return (-1);
120 	}
121 
122 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
123 	    ep)) == NULL) && !mdisok(ep))
124 		return (-1);
125 
126 	if (MD_MNSET_DESC(sd)) {
127 		(void) printf(gettext(
128 		"\nMulti-owner Set name = %s, Set number = %d, Master = %s\n"),
129 		    sp->setname, sp->setno, sd->sd_mn_master_nodenm);
130 		if ((sd->sd_mn_master_nodeid == MD_MN_INVALID_NID) &&
131 		    (dd != NULL)) {
132 			(void) printf(gettext(
133 				"Master and owner information unavailable "
134 				"until joined (metaset -j)\n"));
135 		}
136 	} else {
137 		(void) printf(gettext(
138 		    "\nSet name = %s, Set number = %d\n"),
139 		    sp->setname, sp->setno);
140 	}
141 
142 	if (MD_MNSET_DESC(sd)) {
143 		(void) printf(gettext("\n%-19.19s %-14.14s %-6.6s\n"),
144 			gettext("Host"), gettext("Owner"), gettext("Member"));
145 		nd = sd->sd_nodelist;
146 		while (nd) {
147 			/*
148 			 * Don't print nodes that aren't ok since they may be
149 			 * removed from config during a reconfig cycle.  If a
150 			 * node was being added to a diskset and the entire
151 			 * cluster went down but the node being added was unable
152 			 * to reboot, there's no way to know if that node had
153 			 * its own node record set to OK or not.  So, node
154 			 * record is left in ADD state during reconfig cycle.
155 			 * When that node reboots and returns to the cluster,
156 			 * the reconfig cycle will either remove the node
157 			 * record (if not marked OK on that node) or will mark
158 			 * it OK on all nodes.
159 			 * It is very important to only remove a node record
160 			 * from the other nodes when that node record is not
161 			 * marked OK on its own node - otherwise, different
162 			 * nodes would have different nodelists possibly
163 			 * causing different nodes to to choose different
164 			 * masters.
165 			 */
166 			if (!(nd->nd_flags & MD_MN_NODE_OK)) {
167 				nd = nd->nd_next;
168 				continue;
169 			}
170 			if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
171 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
172 				(void) printf(
173 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
174 				    nd->nd_nodename, gettext("multi-owner"),
175 				    gettext("Yes"));
176 			} else /* Should never be able to happen */
177 			    if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
178 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
179 				(void) printf(
180 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
181 				    nd->nd_nodename, gettext("multi-owner"),
182 				    gettext("No"));
183 			} else if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
184 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
185 				(void) printf(
186 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
187 				    nd->nd_nodename, gettext(""),
188 				    gettext("Yes"));
189 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
190 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
191 				(void) printf(
192 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
193 				    nd->nd_nodename, gettext(""),
194 				    gettext("No"));
195 			}
196 			nd = nd->nd_next;
197 		}
198 	} else {
199 		(void) printf("\n%-19.19s %-5.5s\n",
200 			gettext("Host"), gettext("Owner"));
201 		for (i = 0; i < MD_MAXSIDES; i++) {
202 			/* Skip empty slots */
203 			if (sd->sd_nodes[i][0] == '\0')
204 				continue;
205 
206 			/*
207 			 * Standard hostname field is 17 bytes but metaset will
208 			 * display up to MD_MAX_NODENAME, def in meta_basic.h
209 			 */
210 			(void) printf("  %-17.*s  %s\n", MD_MAX_NODENAME,
211 			    sd->sd_nodes[i], (sd->sd_flags & MD_SR_AUTO_TAKE ?
212 				(sd->sd_isown[i] ? gettext("Yes (auto)") :
213 				    gettext("No (auto)"))
214 				: (sd->sd_isown[i] ? gettext("Yes") : "")));
215 		}
216 	}
217 
218 	if (sd->sd_med.n_cnt > 0)
219 		(void) printf("\n%-19.19s %-7.7s\n",
220 		    gettext("Mediator Host(s)"), gettext("Aliases"));
221 
222 	if ((max_meds = get_max_meds(ep)) == 0)
223 		return (-1);
224 
225 	for (i = 0; i < max_meds; i++) {
226 		if (sd->sd_med.n_lst[i].a_cnt == 0)
227 			continue;
228 		/*
229 		 * Standard hostname field is 17 bytes but metaset will
230 		 * display up to MD_MAX_NODENAME, def in meta_basic.h
231 		 */
232 		(void) printf("  %-17.*s   ", MD_MAX_NODENAME,
233 			sd->sd_med.n_lst[i].a_nm[0]);
234 		for (j = 1; j < sd->sd_med.n_lst[i].a_cnt; j++) {
235 			(void) printf("%s", sd->sd_med.n_lst[i].a_nm[j]);
236 			if (sd->sd_med.n_lst[i].a_cnt - j > 1)
237 				(void) printf(gettext(", "));
238 		}
239 		(void) printf("\n");
240 	}
241 
242 	if (dd) {
243 		int	len = 0;
244 
245 
246 		/*
247 		 * Building a format string on the fly that will
248 		 * be used in (f)printf. This allows the length
249 		 * of the ctd to vary from small to large without
250 		 * looking horrible.
251 		 */
252 		for (p = dd; p != NULL; p = p->dd_next)
253 			len = max(len, strlen(p->dd_dnp->cname));
254 
255 		len += 2;
256 		(void) printf("\n%-*.*s %-5.5s\n", len, len,
257 		    gettext("Drive"),
258 		    gettext("Dbase"));
259 		for (p = dd; p != NULL; p = p->dd_next) {
260 			(void) printf("\n%-*.*s %-5.5s\n", len, len,
261 			    p->dd_dnp->cname,
262 			    (p->dd_dbcnt ? gettext("Yes") :
263 			    gettext("No")));
264 		}
265 	}
266 
267 	return (0);
268 }
269 
270 static int
271 printsets(mdsetname_t *sp, md_error_t *ep)
272 {
273 	int			i;
274 	mdsetname_t		*sp1;
275 	set_t			max_sets;
276 
277 	/*
278 	 * print setname given.
279 	 */
280 	if (! metaislocalset(sp)) {
281 		if (printset(sp, ep))
282 			return (-1);
283 		return (0);
284 	}
285 
286 	if ((max_sets = get_max_sets(ep)) == 0)
287 		return (-1);
288 
289 	/*
290 	 * Print all known sets
291 	 */
292 	for (i = 1; i < max_sets; i++) {
293 		if ((sp1 = metasetnosetname(i, ep)) == NULL) {
294 			if (! mdiserror(ep, MDE_NO_SET))
295 				break;
296 			mdclrerror(ep);
297 			continue;
298 		}
299 
300 		if (printset(sp1, ep))
301 			break;
302 	}
303 	if (! mdisok(ep))
304 		return (-1);
305 
306 	return (0);
307 }
308 
309 /*
310  * Print the current versionn of the cluster contract private interface.
311  */
312 static void
313 printclusterversion()
314 {
315 	printf("%s\n", METASETIFVERSION);
316 }
317 
318 /*
319  * Print the disks that make up the given disk set. This is used
320  * exclusively by Sun Cluster and is contract private.
321  * Should never be called with sname of a Multinode diskset.
322  */
323 static int
324 printdisksin(char *sname, md_error_t *ep)
325 {
326 	mdsetname_t	*sp;
327 	md_drive_desc	*dd, *p;
328 
329 	if ((sp = metasetname(sname, ep)) == NULL) {
330 
331 		/*
332 		 * During a deletion of a set the associated service is
333 		 * put offline. The SC3.0 reservation code calls disksuite
334 		 * to find a list of disks associated with the set so that
335 		 * it can release the reservation on those disks. In this
336 		 * case there won't be any disks or even a set left. So just
337 		 * return.
338 		 */
339 		return (0);
340 	}
341 
342 	if (metaget_setownership(sp, ep) == -1)
343 		return (-1);
344 
345 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
346 	    ep)) == NULL) && !mdisok(ep))
347 		return (-1);
348 
349 	for (p = dd; p != NULL; p = p->dd_next)
350 		(void) printf("%s\n", p->dd_dnp->rname);
351 
352 	return (0);
353 }
354 
355 static void
356 parse_printset(int argc, char **argv)
357 {
358 	int		c;
359 	mdsetname_t	*sp = NULL;
360 	char		*sname = MD_LOCAL_NAME;
361 	md_error_t	status = mdnullerror;
362 	md_error_t	*ep = &status;
363 
364 	/* reset and parse args */
365 	optind = 1;
366 	opterr = 1;
367 	while ((c = getopt(argc, argv, "s:")) != -1) {
368 		switch (c) {
369 		case 's':
370 			sname = optarg;
371 			break;
372 		default:
373 			usage(sp, gettext("unknown options"));
374 		}
375 	}
376 
377 	argc -= optind;
378 	argv += optind;
379 
380 	if (argc != 0)
381 		usage(sp, gettext("too many args"));
382 
383 	if ((sp = metasetname(sname, ep)) == NULL) {
384 		mde_perror(ep, "");
385 		md_exit(sp, 1);
386 	}
387 
388 	if (printsets(sp, ep) && !mdiserror(ep, MDE_SMF_NO_SERVICE)) {
389 		mde_perror(ep, "");
390 		md_exit(sp, 1);
391 	}
392 
393 	if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) {
394 		mde_perror(ep, "");
395 		md_exit(sp, 1);
396 	}
397 
398 	md_exit(sp, 0);
399 }
400 
401 static void
402 parse_add(int argc, char **argv)
403 {
404 	int			c,
405 				created_set,
406 				hosts = FALSE,
407 				meds = FALSE,
408 				auto_take = FALSE,
409 				force_label = FALSE,
410 				default_size = TRUE;
411 	mdsetname_t		*sp = NULL;
412 	char			*sname = MD_LOCAL_NAME;
413 	md_error_t		status = mdnullerror,
414 				*ep = &status;
415 	mddrivenamelist_t	*dnlp = NULL;
416 	mddrivenamelist_t	*p;
417 	daddr_t			dbsize,
418 				nblks;
419 	mdsetname_t		*local_sp = NULL;
420 	int			multi_node = 0;
421 	md_set_desc		*sd;
422 	rval_e			sdssc_rval;
423 
424 	/* reset and parse args */
425 	optind = 1;
426 	opterr = 1;
427 	while ((c = getopt(argc, argv, "MaA:hl:Lms:")) != -1) {
428 		switch (c) {
429 		case 'M':
430 			multi_node = 1;
431 			break;
432 		case 'A':
433 			/* verified sub-option in main */
434 			if (strcmp(optarg, "enable") == 0)
435 				auto_take = TRUE;
436 			break;
437 		case 'a':
438 			break;
439 		case 'h':
440 		case 'm':
441 			if (meds == TRUE || hosts == TRUE)
442 				usage(sp, gettext(
443 				    "only one -m or -h option allowed"));
444 
445 			if (default_size == FALSE || force_label == TRUE)
446 				usage(sp, gettext(
447 				    "conflicting options"));
448 
449 			if (c == 'h')
450 				hosts = TRUE;
451 			else
452 				meds = TRUE;
453 			break;
454 		case 'l':
455 			if (hosts == TRUE || meds == TRUE)
456 				usage(sp, gettext(
457 				    "conflicting options"));
458 			if (sscanf(optarg, "%ld", &dbsize) != 1) {
459 				md_eprintf(gettext(
460 				    "%s: bad format\n"), optarg);
461 				usage(sp, "");
462 			}
463 
464 			default_size = FALSE;
465 			break;
466 		case 'L':
467 			/* Same criteria as -l */
468 			if (hosts == TRUE || meds == TRUE)
469 				usage(sp, gettext(
470 				    "conflicting options"));
471 			force_label = TRUE;
472 			break;
473 		case 's':
474 			sname = optarg;
475 			break;
476 		default:
477 			usage(sp, gettext(
478 			    "unknown options"));
479 		}
480 	}
481 
482 	/* Can only use -A enable when creating the single-node set */
483 	if (auto_take && hosts != TRUE)
484 		usage(sp, gettext("conflicting options"));
485 
486 	argc -= optind;
487 	argv += optind;
488 
489 	/*
490 	 * Add hosts
491 	 */
492 	if (hosts == TRUE) {
493 
494 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
495 			mde_perror(ep, "");
496 			md_exit(local_sp, 1);
497 		}
498 
499 		if (meta_lock(local_sp, TRUE, ep) != 0) {
500 			mde_perror(ep, "");
501 			md_exit(local_sp, 1);
502 		}
503 
504 		/*
505 		 * Keep track of Cluster set creation. Need to complete
506 		 * the transaction no matter if the set was created or not.
507 		 */
508 		created_set = 0;
509 
510 		/*
511 		 * Have no set, cannot take the lock, so only take the
512 		 * local lock.
513 		 */
514 		if ((sp = metasetname(sname, ep)) == NULL) {
515 			sdssc_rval = 0;
516 			if (multi_node) {
517 				/*
518 				 * When running on a cluster system that
519 				 * does not support MN disksets, the routine
520 				 * sdssc_mo_create_begin will be bound
521 				 * to the SVM routine not_bound_error
522 				 * which returns SDSSC_NOT_BOUND_ERROR.
523 				 *
524 				 * When running on a cluster system that
525 				 * does support MN disksets, the routine
526 				 * sdssc_mo_create_begin will be bound to
527 				 * the sdssc_mo_create_begin routine in
528 				 * library libsdssc_so.  A call to
529 				 * sdssc_mo_create_begin will return with
530 				 * either SDSSC_ERROR or SDSSC_OKAY. If
531 				 * an SDSSC_OKAY is returned, then the
532 				 * cluster framework has allocated a
533 				 * set number for this new set that is unique
534 				 * across traditional and MN disksets.
535 				 * Libmeta will get this unique set number
536 				 * by calling sdssc_get_index.
537 				 *
538 				 * When running on a non-cluster system,
539 				 * the routine sdssc_mo_create_begin
540 				 * will be bound to the SVM routine
541 				 * not_bound which returns SDSSC_NOT_BOUND.
542 				 * In this case, all sdssc routines will
543 				 * return SDSSC_NOT_BOUND.  No need to check
544 				 * for return value of SDSSC_NOT_BOUND since
545 				 * the libmeta call to get the set number
546 				 * (sdssc_get_index) will also fail with
547 				 * SDSSC_NOT_BOUND causing libmeta to
548 				 * determine its own set number.
549 				 */
550 				sdssc_rval = sdssc_mo_create_begin(sname, argc,
551 					argv, SDSSC_PICK_SETNO);
552 				if (sdssc_rval == SDSSC_NOT_BOUND_ERROR) {
553 					mderror(ep, MDE_NOT_MN, NULL);
554 					mde_perror(ep,
555 					"Cluster node does not support "
556 					"multi-owner diskset operations");
557 					md_exit(local_sp, 1);
558 				} else if (sdssc_rval == SDSSC_ERROR) {
559 					mde_perror(ep, "");
560 					md_exit(local_sp, 1);
561 				}
562 			} else {
563 				sdssc_rval = sdssc_create_begin(sname, argc,
564 					argv, SDSSC_PICK_SETNO);
565 				if (sdssc_rval == SDSSC_ERROR) {
566 					mde_perror(ep, "");
567 					md_exit(local_sp, 1);
568 				}
569 			}
570 			/*
571 			 * Created diskset (as opposed to adding a
572 			 * host to an existing diskset).
573 			 */
574 			created_set = 1;
575 
576 			sp = Zalloc(sizeof (*sp));
577 			sp->setname = Strdup(sname);
578 			sp->lockfd = MD_NO_LOCK;
579 			mdclrerror(ep);
580 		} else {
581 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
582 				mde_perror(ep, "");
583 				md_exit(local_sp, 1);
584 			}
585 			if (MD_MNSET_DESC(sd)) {
586 				multi_node = 1;
587 			}
588 
589 			/*
590 			 * can't add hosts to an existing set & enable
591 			 * auto-take
592 			 */
593 			if (auto_take)
594 				usage(sp, gettext("conflicting options"));
595 
596 			/*
597 			 * Have a valid set, take the set lock also.
598 			 *
599 			 * A MN diskset does not use the set meta_lock but
600 			 * instead uses the clnt_lock of rpc.metad and the
601 			 * suspend/resume feature of the rpc.mdcommd.  Can't
602 			 * use set meta_lock since class 1 messages are
603 			 * grabbing this lock and if this thread is holding
604 			 * the set meta_lock then no rpc.mdcommd suspend
605 			 * can occur.
606 			 */
607 			if (!multi_node) {
608 				if (meta_lock(sp, TRUE, ep) != 0) {
609 					mde_perror(ep, "");
610 					md_exit(local_sp, 1);
611 				}
612 			}
613 		}
614 
615 		if (meta_set_addhosts(sp, multi_node, argc, argv, auto_take,
616 		    ep)) {
617 			if (created_set)
618 				sdssc_create_end(sname, SDSSC_CLEANUP);
619 			mde_perror(&status, "");
620 			if (!multi_node)
621 				(void) meta_unlock(sp, ep);
622 			md_exit(local_sp, 1);
623 		}
624 
625 		if (created_set)
626 			sdssc_create_end(sname, SDSSC_COMMIT);
627 
628 		else {
629 			/*
630 			 * If adding hosts to existing diskset,
631 			 * call DCS svcs
632 			 */
633 			sdssc_add_hosts(sname, argc, argv);
634 		}
635 		if (!multi_node)
636 			(void) meta_unlock(sp, ep);
637 		md_exit(local_sp, 0);
638 	}
639 
640 	/*
641 	 * Add mediators
642 	 */
643 	if (meds == TRUE) {
644 
645 		if ((sp = metasetname(sname, ep)) == NULL) {
646 			mde_perror(ep, "");
647 			md_exit(local_sp, 1);
648 		}
649 
650 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
651 			mde_perror(ep, "");
652 			md_exit(local_sp, 1);
653 		}
654 
655 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
656 			mde_perror(ep, "");
657 			md_exit(local_sp, 1);
658 		}
659 		if (MD_MNSET_DESC(sd)) {
660 			multi_node = 1;
661 		}
662 
663 		if (meta_lock(local_sp, TRUE, ep) != 0) {
664 			mde_perror(ep, "");
665 			md_exit(local_sp, 1);
666 		}
667 		/*
668 		 * A MN diskset does not use the set meta_lock but
669 		 * instead uses the clnt_lock of rpc.metad and the
670 		 * suspend/resume feature of the rpc.mdcommd.  Can't
671 		 * use set meta_lock since class 1 messages are
672 		 * grabbing this lock and if this thread is holding
673 		 * the set meta_lock then no rpc.mdcommd suspend
674 		 * can occur.
675 		 */
676 		if (!multi_node) {
677 			if (meta_lock(sp, TRUE, ep) != 0) {
678 				mde_perror(ep, "");
679 				md_exit(local_sp, 1);
680 			}
681 		}
682 
683 		if (meta_set_addmeds(sp, argc, argv, ep)) {
684 			mde_perror(&status, "");
685 			if (!multi_node)
686 				(void) meta_unlock(sp, ep);
687 			md_exit(local_sp, 1);
688 		}
689 
690 		if (!multi_node)
691 			(void) meta_unlock(sp, ep);
692 		md_exit(local_sp, 0);
693 	}
694 
695 	/*
696 	 * Add drives
697 	 */
698 	if ((sp = metasetname(sname, ep)) == NULL) {
699 		mde_perror(ep, "");
700 		md_exit(local_sp, 1);
701 	}
702 
703 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
704 		mde_perror(ep, "");
705 		md_exit(local_sp, 1);
706 	}
707 
708 	/* Determine if diskset is a MN diskset or not */
709 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
710 		mde_perror(ep, "");
711 		md_exit(local_sp, 1);
712 	}
713 	if (MD_MNSET_DESC(sd)) {
714 		multi_node = 1;
715 	}
716 
717 	if (meta_lock(local_sp, TRUE, ep) != 0) {
718 		mde_perror(ep, "");
719 		md_exit(local_sp, 1);
720 	}
721 
722 	/* Make sure database size is within limits */
723 	if (default_size == FALSE) {
724 		if ((multi_node && dbsize < MDDB_MN_MINBLKS) ||
725 		    (!multi_node && dbsize < MDDB_MINBLKS))
726 			usage(sp, gettext(
727 			    "size (-l) is too small"));
728 
729 		if ((multi_node && dbsize > MDDB_MN_MAXBLKS) ||
730 		    (!multi_node && dbsize > MDDB_MAXBLKS))
731 			usage(sp, gettext(
732 			    "size (-l) is too big"));
733 	}
734 
735 	/*
736 	 * Have a valid set, take the set lock also.
737 	 *
738 	 * A MN diskset does not use the set meta_lock but
739 	 * instead uses the clnt_lock of rpc.metad and the
740 	 * suspend/resume feature of the rpc.mdcommd.  Can't
741 	 * use set meta_lock since class 1 messages are
742 	 * grabbing this lock and if this thread is holding
743 	 * the set meta_lock then no rpc.mdcommd suspend
744 	 * can occur.
745 	 */
746 	if (!multi_node) {
747 		if (meta_lock(sp, TRUE, ep) != 0) {
748 			mde_perror(ep, "");
749 			md_exit(local_sp, 1);
750 		}
751 	}
752 
753 
754 	/*
755 	 * If using the default size,
756 	 *   then let's adjust the default to the minimum
757 	 *   size currently in use.
758 	 */
759 	if (default_size) {
760 		dbsize = multi_node ? MD_MN_DBSIZE : MD_DBSIZE;
761 		if ((nblks = meta_db_minreplica(sp, ep)) < 0)
762 			mdclrerror(ep);
763 		else
764 			dbsize = nblks;	/* adjust replica size */
765 	}
766 
767 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
768 		mde_perror(ep, "");
769 		if (!multi_node)
770 			(void) meta_unlock(sp, ep);
771 		md_exit(local_sp, 1);
772 	}
773 
774 	if (c == 0) {
775 		md_perror(gettext(
776 		    "No drives specified to add.\n"));
777 		if (!multi_node)
778 			(void) meta_unlock(sp, ep);
779 		md_exit(local_sp, 1);
780 	}
781 
782 	if (meta_set_adddrives(sp, dnlp, dbsize, force_label, ep)) {
783 		metafreedrivenamelist(dnlp);
784 		mde_perror(ep, "");
785 		if (!multi_node)
786 			(void) meta_unlock(sp, ep);
787 		md_exit(local_sp, 1);
788 	}
789 
790 	/*
791 	 * MN disksets don't have a device id in the master block
792 	 * For traditional disksets, check for the drive device
793 	 * id not fitting in the master block
794 	 */
795 	if (!multi_node) {
796 		for (p = dnlp; p != NULL; p = p->next) {
797 			int 		fd;
798 			ddi_devid_t	devid;
799 			mdname_t	*np;
800 
801 			np = metaslicename(p->drivenamep, 0, ep);
802 			if (np == NULL)
803 				continue;
804 
805 			if ((fd = open(np->rname, O_RDONLY | O_NDELAY)) < 0)
806 				continue;
807 
808 			if (devid_get(fd, &devid) == 0) {
809 				size_t len;
810 
811 				len = devid_sizeof(devid);
812 				if (len > (DEV_BSIZE - sizeof (mddb_mb_t)))
813 					(void) mddserror(ep,
814 					    MDE_DS_NOTSELFIDENTIFY, NULL, NULL,
815 					    np->rname, NULL);
816 				devid_free(devid);
817 			} else {
818 				(void) mddserror(ep, MDE_DS_NOTSELFIDENTIFY,
819 				    NULL, NULL, np->rname, NULL);
820 			}
821 			(void) close(fd);
822 		}
823 	}
824 
825 	/*
826 	 * MN disksets don't use DCS clustering services.
827 	 * For traditional disksets:
828 	 * There's not really much we can do here if this call fails.
829 	 * The drives have been added to the set and DiskSuite believes
830 	 * it owns the drives.
831 	 * Relase the set and hope for the best.
832 	 */
833 	if ((!multi_node) &&
834 	    (sdssc_notify_service(sname, Make_Primary) == SDSSC_ERROR)) {
835 		meta_set_release(sp, ep);
836 		printf(gettext(
837 		    "Sun Clustering failed to make set primary\n"));
838 	}
839 
840 	metafreedrivenamelist(dnlp);
841 	if (!multi_node)
842 		(void) meta_unlock(sp, ep);
843 	md_exit(local_sp, 0);
844 }
845 
846 static void
847 parse_balance(int argc, char **argv)
848 {
849 	int		c;
850 	mdsetname_t	*sp = NULL;
851 	char		*sname = MD_LOCAL_NAME;
852 	md_error_t	status = mdnullerror;
853 	md_set_desc	*sd;
854 	int		multi_node = 0;
855 
856 	/* reset and parse args */
857 	optind = 1;
858 	opterr = 1;
859 	while ((c = getopt(argc, argv, "Mbs:")) != -1) {
860 		switch (c) {
861 		case 'M':
862 			break;
863 		case 'b':
864 			break;
865 		case 's':
866 			sname = optarg;
867 			break;
868 		default:
869 			usage(sp, gettext("unknown options"));
870 		}
871 	}
872 
873 	argc -= optind;
874 	argv += optind;
875 
876 	if (argc != 0)
877 		usage(sp, gettext("too many args"));
878 
879 	if ((sp = metasetname(sname, &status)) == NULL) {
880 		mde_perror(&status, "");
881 		md_exit(sp, 1);
882 	}
883 	if ((sd = metaget_setdesc(sp, &status)) == NULL) {
884 		mde_perror(&status, "");
885 		md_exit(sp, 1);
886 	}
887 	if (MD_MNSET_DESC(sd)) {
888 		multi_node = 1;
889 	}
890 	/*
891 	 * Have a valid set, take the set lock also.
892 	 *
893 	 * A MN diskset does not use the set meta_lock but
894 	 * instead uses the clnt_lock of rpc.metad and the
895 	 * suspend/resume feature of the rpc.mdcommd.  Can't
896 	 * use set meta_lock since class 1 messages are
897 	 * grabbing this lock and if this thread is holding
898 	 * the set meta_lock then no rpc.mdcommd suspend
899 	 * can occur.
900 	 */
901 	if (!multi_node) {
902 		if (meta_lock(sp, TRUE, &status) != 0) {
903 			mde_perror(&status, "");
904 			md_exit(sp, 1);
905 		}
906 	}
907 
908 	if (meta_set_balance(sp, &status) != 0) {
909 		mde_perror(&status, "");
910 		md_exit(sp, 1);
911 	}
912 	md_exit(sp, 0);
913 }
914 
915 static void
916 parse_autotake(int argc, char **argv)
917 {
918 	int			c;
919 	int			enable = 0;
920 	mdsetname_t		*sp = NULL;
921 	char			*sname = MD_LOCAL_NAME;
922 	md_error_t		status = mdnullerror;
923 	md_error_t		*ep = &status;
924 
925 	/* reset and parse args */
926 	optind = 1;
927 	opterr = 1;
928 	while ((c = getopt(argc, argv, "A:s:")) != -1) {
929 		switch (c) {
930 		case 'A':
931 			/* verified sub-option in main */
932 			if (strcmp(optarg, "enable") == 0)
933 				enable = 1;
934 			break;
935 		case 's':
936 			/* verified presence of setname in main */
937 			sname = optarg;
938 			break;
939 		default:
940 			usage(sp, gettext("unknown options"));
941 		}
942 	}
943 
944 	if ((sp = metasetname(sname, ep)) == NULL) {
945 		mde_perror(ep, "");
946 		md_exit(sp, 1);
947 	}
948 
949 	if (meta_lock(sp, TRUE, ep) != 0) {
950 		mde_perror(ep, "");
951 		md_exit(sp, 1);
952 	}
953 
954 	if (meta_check_ownership(sp, ep) != 0) {
955 		mde_perror(ep, "");
956 		md_exit(sp, 1);
957 	}
958 
959 	if (meta_set_auto_take(sp, enable, ep) != 0) {
960 		mde_perror(ep, "");
961 		md_exit(sp, 1);
962 	}
963 
964 	md_exit(sp, 0);
965 }
966 
967 static void
968 parse_del(int argc, char **argv)
969 {
970 	int			c;
971 	mdsetname_t		*sp = NULL;
972 	char			*sname = MD_LOCAL_NAME;
973 	int			hosts = FALSE;
974 	int			meds = FALSE;
975 	int			forceflg = FALSE;
976 	md_error_t		status = mdnullerror;
977 	md_error_t		*ep = &status;
978 	mddrivenamelist_t	*dnlp = NULL;
979 	mdsetname_t		*local_sp = NULL;
980 	md_set_desc		*sd;
981 	int			multi_node = 0;
982 
983 	/* reset and parse args */
984 	optind = 1;
985 	opterr = 1;
986 	while ((c = getopt(argc, argv, "Mdfhms:")) != -1) {
987 		switch (c) {
988 		case 'M':
989 			break;
990 		case 'd':
991 			break;
992 		case 'f':
993 			forceflg = TRUE;
994 			break;
995 		case 'h':
996 		case 'm':
997 			if (meds == TRUE || hosts == TRUE)
998 				usage(sp, gettext(
999 				    "only one -m or -h option allowed"));
1000 
1001 			if (c == 'h')
1002 				hosts = TRUE;
1003 			else
1004 				meds = TRUE;
1005 			break;
1006 		case 's':
1007 			sname = optarg;
1008 			break;
1009 		default:
1010 			usage(sp, gettext("unknown options"));
1011 		}
1012 	}
1013 
1014 	argc -= optind;
1015 	argv += optind;
1016 
1017 	if ((sp = metasetname(sname, ep)) == NULL) {
1018 		mde_perror(ep, "");
1019 		md_exit(local_sp, 1);
1020 	}
1021 
1022 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1023 		mde_perror(ep, "");
1024 		md_exit(local_sp, 1);
1025 	}
1026 
1027 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1028 		mde_perror(ep, "");
1029 		md_exit(local_sp, 1);
1030 	}
1031 	if (MD_MNSET_DESC(sd))
1032 		multi_node = 1;
1033 
1034 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1035 		mde_perror(ep, "");
1036 		md_exit(local_sp, 1);
1037 	}
1038 
1039 	/*
1040 	 * Have a valid set, take the set lock also.
1041 	 *
1042 	 * A MN diskset does not use the set meta_lock but
1043 	 * instead uses the clnt_lock of rpc.metad and the
1044 	 * suspend/resume feature of the rpc.mdcommd.  Can't
1045 	 * use set meta_lock since class 1 messages are
1046 	 * grabbing this lock and if this thread is holding
1047 	 * the set meta_lock then no rpc.mdcommd suspend
1048 	 * can occur.
1049 	 */
1050 	if (!multi_node) {
1051 		if (meta_lock(sp, TRUE, ep) != 0) {
1052 			mde_perror(ep, "");
1053 			md_exit(local_sp, 1);
1054 		}
1055 	}
1056 
1057 	/*
1058 	 * Delete hosts
1059 	 */
1060 	if (hosts == TRUE) {
1061 		if (meta_check_ownership(sp, ep) != 0) {
1062 			/*
1063 			 * If we don't own the set bail out here otherwise
1064 			 * we could delete the node from the DCS service
1065 			 * yet not delete the host from the set.
1066 			 */
1067 			mde_perror(ep, "");
1068 			if (!multi_node)
1069 				(void) meta_unlock(sp, ep);
1070 			md_exit(local_sp, 1);
1071 		}
1072 		if (sdssc_delete_hosts(sname, argc, argv) == SDSSC_ERROR) {
1073 		    if (!metad_isautotakebyname(sname)) {
1074 			/*
1075 			 * SC could have been installed after the set was
1076 			 * created.  We still want to be able to delete these
1077 			 * sets.
1078 			 */
1079 			md_perror(gettext(
1080 			    "Failed to delete hosts from DCS service"));
1081 			if (!multi_node)
1082 				(void) meta_unlock(sp, ep);
1083 			md_exit(local_sp, 1);
1084 		    }
1085 		}
1086 		if (meta_set_deletehosts(sp, argc, argv, forceflg, ep)) {
1087 			if (sdssc_add_hosts(sname, argc, argv) == SDSSC_ERROR) {
1088 				(void) printf(gettext(
1089 				    "Failed to restore host(s) in DCS "
1090 				    "database\n"));
1091 			}
1092 			mde_perror(ep, "");
1093 			if (!multi_node)
1094 				(void) meta_unlock(sp, ep);
1095 			md_exit(local_sp, 1);
1096 		}
1097 		if (!multi_node)
1098 			(void) meta_unlock(sp, ep);
1099 		md_exit(local_sp, 0);
1100 	}
1101 
1102 	/*
1103 	 * Delete mediators
1104 	 */
1105 	if (meds == TRUE) {
1106 		if (meta_set_deletemeds(sp, argc, argv, forceflg, ep)) {
1107 			mde_perror(ep, "");
1108 			if (!multi_node)
1109 				(void) meta_unlock(sp, ep);
1110 			md_exit(local_sp, 1);
1111 		}
1112 		if (!multi_node)
1113 			(void) meta_unlock(sp, ep);
1114 		md_exit(local_sp, 0);
1115 	}
1116 
1117 	/*
1118 	 * Delete drives
1119 	 */
1120 
1121 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
1122 		mde_perror(ep, "");
1123 		if (!multi_node)
1124 			(void) meta_unlock(sp, ep);
1125 		md_exit(local_sp, 1);
1126 	}
1127 
1128 	if (c == 0) {
1129 		md_perror(gettext(
1130 		    "No drives specified to delete.\n"));
1131 		if (!multi_node)
1132 			(void) meta_unlock(sp, ep);
1133 		md_exit(local_sp, 1);
1134 	}
1135 
1136 	if (meta_set_deletedrives(sp, dnlp, forceflg, ep)) {
1137 		metafreedrivenamelist(dnlp);
1138 		mde_perror(ep, "");
1139 		if (!multi_node)
1140 			(void) meta_unlock(sp, ep);
1141 		md_exit(local_sp, 1);
1142 	}
1143 
1144 	metafreedrivenamelist(dnlp);
1145 	if (!multi_node)
1146 		(void) meta_unlock(sp, ep);
1147 	md_exit(local_sp, 0);
1148 }
1149 
1150 static void
1151 parse_isowner(int argc, char **argv)
1152 {
1153 	int		c;
1154 	mdsetname_t	*sp = NULL;
1155 	char		*sname = MD_LOCAL_NAME;
1156 	md_error_t	status = mdnullerror;
1157 	md_error_t	*ep = &status;
1158 	char		*host = NULL;
1159 
1160 	/* reset and parse args */
1161 	optind = 1;
1162 	opterr = 1;
1163 	while ((c = getopt(argc, argv, "Moh:s:")) != -1) {
1164 		switch (c) {
1165 		case 'M':
1166 			break;
1167 		case 'o':
1168 			break;
1169 		case 'h':
1170 			if (host != NULL) {
1171 				usage(sp, gettext(
1172 				    "only one -h option allowed"));
1173 			}
1174 			host = optarg;
1175 			break;
1176 		case 's':
1177 			sname = optarg;
1178 			break;
1179 		default:
1180 			usage(sp, gettext("unknown options"));
1181 		}
1182 	}
1183 
1184 	argc -= optind;
1185 	argv += optind;
1186 
1187 	if (argc != 0)
1188 		usage(sp, gettext("too many args"));
1189 
1190 	if ((sp = metasetname(sname, ep)) == NULL) {
1191 		mde_perror(ep, "");
1192 		md_exit(sp, 1);
1193 	}
1194 
1195 	if (host == NULL) {
1196 		if (meta_check_ownership(sp, ep) != 0) {
1197 			mde_perror(ep, "");
1198 			md_exit(sp, 1);
1199 		}
1200 	} else {
1201 		if (meta_check_ownership_on_host(sp, host, ep) != 0) {
1202 			mde_perror(ep, "");
1203 			md_exit(sp, 1);
1204 		}
1205 	}
1206 	md_exit(sp, 0);
1207 }
1208 
1209 static void
1210 parse_purge(int argc, char **argv)
1211 {
1212 	int		c;
1213 	mdsetname_t	*sp = NULL;
1214 	mdsetname_t	*local_sp = NULL;
1215 	md_drive_desc	*dd;
1216 	char		*sname = MD_LOCAL_NAME;
1217 	char		*thishost = mynode();
1218 	md_error_t	status = mdnullerror;
1219 	md_error_t	*ep = &status;
1220 	int		bypass_cluster_purge = 0;
1221 	int		forceflg = FALSE;
1222 	int		ret = 0;
1223 	int		multi_node = 0;
1224 	md_set_desc		*sd;
1225 
1226 	optind = 1;
1227 	opterr = 1;
1228 	while ((c = getopt(argc, argv, "C:fPs:")) != -1) {
1229 		switch (c) {
1230 		case 'M':
1231 			break;
1232 		case 'C':
1233 			bypass_cluster_purge = 1;
1234 			break;
1235 		case 'f':
1236 			forceflg = TRUE;
1237 			break;
1238 		case 'P':
1239 			break;
1240 		case 's':
1241 			sname = optarg;
1242 			break;
1243 		default:
1244 			usage(sp, gettext("unknown options"));
1245 		}
1246 	}
1247 
1248 	argc -= optind;
1249 	argv += optind;
1250 
1251 	if (argc != 0)
1252 		usage(sp, gettext("too many arguments"));
1253 
1254 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1255 		mde_perror(ep, "");
1256 		md_exit(local_sp, 1);
1257 	}
1258 
1259 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1260 		mde_perror(ep, "");
1261 		md_exit(local_sp, 1);
1262 	}
1263 
1264 	if ((sp = metasetname(sname, ep)) == NULL) {
1265 		mde_perror(ep, "");
1266 		md_exit(sp, 1);
1267 	}
1268 
1269 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1270 		mde_perror(ep, "");
1271 		md_exit(local_sp, 1);
1272 	}
1273 	if (MD_MNSET_DESC(sd))
1274 		multi_node = 1;
1275 
1276 	if (!multi_node) {
1277 		if (meta_lock(sp, TRUE, ep) != 0) {
1278 			mde_perror(ep, "");
1279 			md_exit(local_sp, 1);
1280 		}
1281 	}
1282 
1283 	/* Must not own the set if purging it from this host */
1284 	if (meta_check_ownership(sp, ep) == 0) {
1285 		/*
1286 		 * Need to see if there are disks in the set, if not then
1287 		 * there is no ownership but meta_check_ownership returns 0
1288 		 */
1289 		dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
1290 		if (!mdisok(ep)) {
1291 			mde_perror(ep, "");
1292 			if (!multi_node)
1293 				(void) meta_unlock(sp, ep);
1294 			md_exit(local_sp, 1);
1295 		}
1296 		if (dd != NULL) {
1297 			(void) printf(gettext
1298 			    ("Must not be owner of the set when purging it\n"));
1299 			if (!multi_node)
1300 				(void) meta_unlock(sp, ep);
1301 			md_exit(local_sp, 1);
1302 		}
1303 	}
1304 	/*
1305 	 * Remove the node from the DCS service
1306 	 */
1307 	if (!bypass_cluster_purge) {
1308 		if (sdssc_delete_hosts(sname, 1, &thishost) == SDSSC_ERROR) {
1309 			md_perror(gettext
1310 			    ("Failed to purge hosts from DCS service"));
1311 			if (!multi_node)
1312 				(void) meta_unlock(sp, ep);
1313 			md_exit(local_sp, 1);
1314 		}
1315 	}
1316 
1317 	if ((ret = meta_set_purge(sp, bypass_cluster_purge, forceflg,
1318 	    ep)) != 0) {
1319 		if (!bypass_cluster_purge) {
1320 			if (sdssc_add_hosts(sname, 1, &thishost) ==
1321 			    SDSSC_ERROR) {
1322 				(void) printf(gettext(
1323 				    "Failed to restore host in DCS "
1324 				    "database\n"));
1325 			}
1326 		}
1327 		mde_perror(ep, "");
1328 		if (!multi_node)
1329 			(void) meta_unlock(sp, ep);
1330 		md_exit(local_sp, ret);
1331 	}
1332 
1333 	if (!multi_node)
1334 		(void) meta_unlock(sp, ep);
1335 	md_exit(local_sp, 0);
1336 }
1337 
1338 static void
1339 parse_query(int argc, char **argv)
1340 {
1341 	int		c;
1342 	mdsetname_t	*sp = NULL;
1343 	mddb_dtag_lst_t	*dtlp = NULL;
1344 	mddb_dtag_lst_t	*tdtlp;
1345 	char		*sname = MD_LOCAL_NAME;
1346 	md_error_t	status = mdnullerror;
1347 
1348 	/* reset and parse args */
1349 	optind = 1;
1350 	opterr = 1;
1351 	while ((c = getopt(argc, argv, "Mqs:")) != -1) {
1352 		switch (c) {
1353 		case 'M':
1354 			break;
1355 		case 'q':
1356 			break;
1357 		case 's':
1358 			sname = optarg;
1359 			break;
1360 		default:
1361 			usage(sp, gettext("unknown options"));
1362 		}
1363 	}
1364 
1365 	argc -= optind;
1366 	argv += optind;
1367 
1368 	if (argc != 0)
1369 		usage(sp, gettext("too many args"));
1370 
1371 	if ((sp = metasetname(sname, &status)) == NULL) {
1372 		mde_perror(&status, "");
1373 		md_exit(sp, 1);
1374 	}
1375 
1376 	if (meta_lock(sp, TRUE, &status) != 0) {
1377 		mde_perror(&status, "");
1378 		md_exit(sp, 1);
1379 	}
1380 
1381 	if (meta_set_query(sp, &dtlp, &status) != 0) {
1382 		mde_perror(&status, "");
1383 		md_exit(sp, 1);
1384 	}
1385 
1386 	if (dtlp != NULL)
1387 		(void) printf("The following tag(s) were found:\n");
1388 
1389 	for (tdtlp = dtlp; tdtlp != NULL; tdtlp = dtlp) {
1390 		dtlp = tdtlp->dtl_nx;
1391 		(void) printf("%2d - %s - %s", tdtlp->dtl_dt.dt_id,
1392 		    tdtlp->dtl_dt.dt_hn,
1393 		    ctime((long *)&tdtlp->dtl_dt.dt_tv.tv_sec));
1394 		Free(tdtlp);
1395 	}
1396 
1397 	md_exit(sp, 0);
1398 }
1399 
1400 /* Should never be called with sname of a Multinode diskset. */
1401 static void
1402 parse_releaseset(int argc, char **argv)
1403 {
1404 	int		c;
1405 	mdsetname_t	*sp = NULL;
1406 	md_error_t	status = mdnullerror;
1407 	md_error_t	*ep = &status;
1408 	char		*sname = MD_LOCAL_NAME;
1409 	sdssc_boolean_e	cluster_release = SDSSC_False;
1410 	sdssc_version_t	vers;
1411 	rval_e		rval;
1412 	md_set_desc	*sd;
1413 
1414 	/* reset and parse args */
1415 	optind = 1;
1416 	opterr = 1;
1417 	while ((c = getopt(argc, argv, "C:s:r")) != -1) {
1418 		switch (c) {
1419 		case 'C':
1420 			cluster_release = SDSSC_True;
1421 			break;
1422 		case 's':
1423 			sname = optarg;
1424 			break;
1425 		case 'r':
1426 			break;
1427 		default:
1428 			usage(sp, gettext("unknown options"));
1429 		}
1430 	}
1431 
1432 	argc -= optind;
1433 	argv += optind;
1434 
1435 	if (argc > 0)
1436 		usage(sp, gettext("too many args"));
1437 
1438 	memset(&vers, 0, sizeof (vers));
1439 
1440 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1441 	    (vers.major == 3) &&
1442 	    (cluster_release == SDSSC_False)) {
1443 
1444 		/*
1445 		 * If the release is being done by the user via the CLI
1446 		 * we need to notify the DCS to release this node as being
1447 		 * the primary. The reason nothing else needs to be done
1448 		 * is due to the fact that the reservation code will exec
1449 		 * metaset -C release to complete the operation.
1450 		 */
1451 		rval = sdssc_notify_service(sname, Release_Primary);
1452 		if (rval == SDSSC_ERROR) {
1453 			printf(gettext(
1454 			    "metaset: failed to notify DCS of release\n"));
1455 		}
1456 		md_exit(NULL, rval == SDSSC_ERROR);
1457 	}
1458 
1459 	if ((sp = metasetname(sname, ep)) == NULL) {
1460 
1461 		/*
1462 		 * It's entirely possible for the SC3.0 reservation code
1463 		 * to call for DiskSet to release a diskset and have that
1464 		 * diskset not exist. During a diskset removal DiskSuite
1465 		 * maybe able to remove all traces of the diskset before
1466 		 * the reservation code execs metaset -C release in which
1467 		 * case the metasetname will fail, but the overall command
1468 		 * shouldn't.
1469 		 */
1470 		if (vers.major == 3)
1471 			md_exit(sp, 0);
1472 		else {
1473 			mde_perror(ep, "");
1474 			md_exit(sp, 1);
1475 		}
1476 	}
1477 
1478 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1479 		mde_perror(ep, "");
1480 		md_exit(sp, 1);
1481 	}
1482 
1483 	if (sd->sd_flags & MD_SR_AUTO_TAKE) {
1484 		md_eprintf(gettext("cannot release auto-take diskset\n"));
1485 		md_exit(sp, 1);
1486 	}
1487 
1488 	if (meta_lock_nowait(sp, ep) != 0) {
1489 		mde_perror(ep, "");
1490 		md_exit(sp, 10);	/* special errcode */
1491 	}
1492 
1493 	if (meta_set_release(sp, ep)) {
1494 		mde_perror(ep, "");
1495 		md_exit(sp, 1);
1496 	}
1497 	md_exit(sp, 0);
1498 }
1499 
1500 /* Should never be called with sname of a Multinode diskset. */
1501 static void
1502 parse_takeset(int argc, char **argv)
1503 {
1504 	int		c;
1505 	mdsetname_t	*sp = NULL;
1506 	int		flags = 0;
1507 	char		*sname = MD_LOCAL_NAME;
1508 	mhd_mhiargs_t	mhiargs;
1509 	char 		*cp = NULL;
1510 	int		pos = -1;	/* position of timeout value */
1511 	int		usetag = 0;
1512 	static char	*nullopts[] = { NULL };
1513 	md_error_t	status = mdnullerror;
1514 	md_error_t	*ep = &status;
1515 	sdssc_boolean_e	cluster_take = SDSSC_False;
1516 	sdssc_version_t	vers;
1517 	rval_e		rval;
1518 	int		set_take_rval;
1519 
1520 	/* reset and parse args */
1521 	optind = 1;
1522 	opterr = 1;
1523 	while ((c = getopt(argc, argv, "C:fs:tu:y")) != -1) {
1524 		switch (c) {
1525 		case 'C':
1526 			cluster_take = SDSSC_True;
1527 			break;
1528 		case 'f':
1529 			flags |= TAKE_FORCE;
1530 			break;
1531 		case 's':
1532 			sname = optarg;
1533 			break;
1534 		case 't':
1535 			break;
1536 		case 'u':
1537 			usetag = atoi(optarg);
1538 			flags |= TAKE_USETAG;
1539 			break;
1540 		case 'y':
1541 			flags |= TAKE_USEIT;
1542 			break;
1543 		default:
1544 			usage(sp, gettext("unknown options"));
1545 		}
1546 	}
1547 
1548 	mhiargs = defmhiargs;
1549 
1550 	argc -= optind;
1551 	argv += optind;
1552 
1553 	if (argc > 1)
1554 		usage(sp, gettext("too many args"));
1555 
1556 	/*
1557 	 * If we have a list of timeout value overrides, handle it here
1558 	 */
1559 	while (argv[0] != NULL && *argv[0] != '\0') {
1560 		/*
1561 		 * The use of the nullopts[] "token list" here is to make
1562 		 * getsubopts() simply parse a comma separated list
1563 		 * returning either "" or the contents of the field, the
1564 		 * end condition is exaustion of the initial string, which
1565 		 * is modified in the process.
1566 		 */
1567 		(void) getsubopt(&argv[0], nullopts, &cp);
1568 
1569 		c = 0;			/* re-use c as temp value of timeout */
1570 
1571 		if (*cp != '-')		/* '-' uses default */
1572 			c = atoi(cp);
1573 
1574 		if (c < 0) {
1575 			usage(sp, gettext(
1576 			    "time out values must be > 0"));
1577 		}
1578 
1579 		if (++pos > 3) {
1580 			usage(sp, gettext(
1581 			    "too many timeout values specified."));
1582 		}
1583 
1584 		if (c == 0)		/* 0 or "" field uses default */
1585 			continue;
1586 
1587 		/*
1588 		 * Assign temp value to appropriate structure member based on
1589 		 * its position in the comma separated list.
1590 		 */
1591 		switch (pos) {
1592 		    case 0:
1593 			mhiargs.mh_ff = c;
1594 			break;
1595 
1596 		    case 1:
1597 			mhiargs.mh_tk.reinstate_resv_delay = c;
1598 			break;
1599 
1600 		    case 2:
1601 			mhiargs.mh_tk.min_ownership_delay = c;
1602 			break;
1603 
1604 		    case 3:
1605 			mhiargs.mh_tk.max_ownership_delay = c;
1606 			break;
1607 		}
1608 	}
1609 
1610 	memset(&vers, 0, sizeof (vers));
1611 
1612 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1613 	    (vers.major == 3) &&
1614 	    (cluster_take == SDSSC_False)) {
1615 
1616 		/*
1617 		 * If the take is beging done by the user via the CLI we need
1618 		 * to notify the DCS to make this current node the primary.
1619 		 * The SC3.0 reservation code will in turn exec metaset with
1620 		 * the -C take arg to complete this operation.
1621 		 */
1622 		if ((rval = sdssc_notify_service(sname, Make_Primary)) ==
1623 		    SDSSC_ERROR) {
1624 			printf(gettext(
1625 			    "metaset: failed to notify DCS of take\n"));
1626 		}
1627 		md_exit(NULL, rval == SDSSC_ERROR);
1628 	}
1629 
1630 	if ((sp = metasetname(sname, ep)) == NULL) {
1631 		mde_perror(ep, "");
1632 		md_exit(sp, 1);
1633 	}
1634 
1635 	if ((vers.major == 3) && (meta_check_ownership(sp, ep) == 0)) {
1636 
1637 		/*
1638 		 * If we're running in a cluster environment and this
1639 		 * node already owns the set. Don't bother trying to
1640 		 * take the set again. There's one case where an adminstrator
1641 		 * is adding disks to a set for the first time. metaset
1642 		 * will take the ownership of the set at that point. During
1643 		 * that add operation SC3.0 notices activity on the device
1644 		 * and also tries to perform a take operation. The SC3.0 take
1645 		 * will fail because the adminstrative add has the set locked
1646 		 */
1647 		md_exit(sp, 0);
1648 	}
1649 
1650 	if (meta_lock_nowait(sp, ep) != 0) {
1651 		mde_perror(ep, "");
1652 		md_exit(sp, 10);	/* special errcode */
1653 	}
1654 
1655 	/*
1656 	 * If a 2 is returned from meta_set_take, this take was able to resolve
1657 	 * an unresolved replicated disk (i.e. a disk is now available that
1658 	 * had been missing during the import of the replicated diskset).
1659 	 * Need to release the diskset and re-take in order to have
1660 	 * the subdrivers re-snarf using the newly resolved (or newly mapped)
1661 	 * devids.  This also allows the namespace to be updated with the
1662 	 * correct major names in the case where the disk being replicated
1663 	 * was handled by a different driver than the replicated disk.
1664 	 */
1665 	set_take_rval = meta_set_take(sp, &mhiargs, flags, usetag, &status);
1666 	if (set_take_rval == 2) {
1667 		if (meta_set_release(sp, &status)) {
1668 			mde_perror(&status,
1669 			    "Need to release and take set to resolve names.");
1670 			md_exit(sp, 1);
1671 		}
1672 		metaflushdrivenames();
1673 		metaflushsetname(sp);
1674 		set_take_rval = meta_set_take(sp, &mhiargs,
1675 		    (flags | TAKE_RETAKE), usetag, &status);
1676 	}
1677 
1678 	if (set_take_rval == -1) {
1679 		mde_perror(&status, "");
1680 		if (mdismddberror(&status, MDE_DB_TAGDATA))
1681 			md_exit(sp, 2);
1682 		if (mdismddberror(&status, MDE_DB_ACCOK))
1683 			md_exit(sp, 3);
1684 		if (mdismddberror(&status, MDE_DB_STALE))
1685 			md_exit(sp, 66);
1686 		md_exit(sp, 1);
1687 	}
1688 	md_exit(sp, 0);
1689 }
1690 
1691 /*
1692  * Joins a node to a specific set or to all multinode disksets known
1693  * by this node.  If set is specified then caller should have verified
1694  * that the set is a multinode diskset.
1695  *
1696  * If an error occurs, metaset exits with a 1.
1697  * If there is no error, metaset exits with a 0.
1698  */
1699 static void
1700 parse_joinset(int argc, char **argv)
1701 {
1702 	int		c;
1703 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1704 	char		*sname = MD_LOCAL_NAME;
1705 	md_error_t	status = mdnullerror;
1706 	md_error_t	*ep = &status;
1707 	md_set_desc	*sd;
1708 	char		buf[BUFSIZ];
1709 	char		*p = buf;
1710 	set_t		max_sets, setno;
1711 	int		err, cumm_err = 0;
1712 	size_t		bufsz;
1713 
1714 	bufsz = sizeof (buf);
1715 	/* reset and parse args */
1716 	optind = 1;
1717 	opterr = 1;
1718 	while ((c = getopt(argc, argv, "Ms:j")) != -1) {
1719 		switch (c) {
1720 		case 'M':
1721 			break;
1722 		case 'j':
1723 			break;
1724 		case 's':
1725 			sname = optarg;
1726 			break;
1727 		default:
1728 			usage(sp, gettext("unknown options"));
1729 		}
1730 	}
1731 
1732 	argc -= optind;
1733 	argv += optind;
1734 
1735 	if (argc > 1)
1736 		usage(sp, gettext("too many args"));
1737 
1738 	/*
1739 	 * If no setname option was used, then join all disksets
1740 	 * that this node knows about.   Attempt to join all
1741 	 * disksets that this node knows about.
1742 	 *
1743 	 * Additional text is added to the error messages during
1744 	 * this section of code in order to help the user understand
1745 	 * why the 'join of all sets' failed and which set caused
1746 	 * the failure.
1747 	 */
1748 
1749 	/*
1750 	 * Hold local set lock throughout this call to keep
1751 	 * other actions from interfering (such as creating a new
1752 	 * set, etc.).
1753 	 */
1754 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1755 		mde_perror(ep, "");
1756 		md_exit(sp, 1);
1757 	}
1758 
1759 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1760 		mde_perror(ep, "");
1761 		md_exit(local_sp, 1);
1762 	}
1763 
1764 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1765 		/*
1766 		 * If no set name is given, then walk through all sets
1767 		 * on this node which could include:
1768 		 * 	- MN disksets
1769 		 *	- traditional disksets
1770 		 *	- non-existent disksets
1771 		 * Attempt to join the MN disksets.
1772 		 * If the join of one set fails, print out an error message
1773 		 * about that set and continue the walk.
1774 		 */
1775 		if ((max_sets = get_max_sets(ep)) == 0) {
1776 			mde_perror(ep, "");
1777 			md_exit(local_sp, 1);
1778 		}
1779 
1780 		/* Start walking through all possible disksets */
1781 		for (setno = 1; setno < max_sets; setno++) {
1782 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1783 				if (mdiserror(ep, MDE_NO_SET)) {
1784 					/* No set for this setno - continue */
1785 					mdclrerror(ep);
1786 					continue;
1787 				} else {
1788 					(void) sprintf(p, gettext(
1789 					"Unable to get set %d information"),
1790 					    setno);
1791 					mde_perror(ep, p);
1792 					cumm_err = 1;
1793 					mdclrerror(ep);
1794 					continue;
1795 				}
1796 			}
1797 
1798 			/* If setname is there, set desc should exist. */
1799 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1800 				(void) snprintf(p, bufsz, gettext(
1801 				    "Unable to get set %s desc information"),
1802 				    sp->setname);
1803 				mde_perror(ep, p);
1804 				cumm_err = 1;
1805 				mdclrerror(ep);
1806 				continue;
1807 			}
1808 
1809 			/* Only check MN disksets */
1810 			if (!MD_MNSET_DESC(sd)) {
1811 				continue;
1812 			}
1813 
1814 			/*
1815 			 * Return value of 0 is success.
1816 			 * Return value of -1 means a failure.
1817 			 * Return value of -2 means set could not be
1818 			 * joined, but shouldn't cause an error.
1819 			 * Reasons would be:
1820 			 * 	- no drives in set
1821 			 * 	- node already joined to set
1822 			 * Return value of -3 means joined stale set.
1823 			 * Can't check for all reasons here
1824 			 * since set isn't locked yet across all
1825 			 * nodes in the cluster.  The call
1826 			 * to libmeta routine, meta_set_join, will
1827 			 * lock across the cluster and perform
1828 			 * the checks.
1829 			 */
1830 			if ((err = meta_set_join(sp, ep)) == -1) {
1831 				/* Print error of diskset join failure */
1832 				(void) snprintf(p, bufsz,
1833 				    gettext("Join to diskset %s failed"),
1834 				    sp->setname);
1835 				mde_perror(ep, p);
1836 				cumm_err = 1;
1837 				mdclrerror(ep);
1838 				continue;
1839 			}
1840 
1841 			if (err == -3) {
1842 				/* Print error of diskset join failure */
1843 				(void) snprintf(p, bufsz,
1844 				    gettext("Joined to stale diskset %s"),
1845 				    sp->setname);
1846 				mde_perror(ep, p);
1847 				mdclrerror(ep);
1848 			}
1849 
1850 			mdclrerror(ep);
1851 		}
1852 
1853 		md_exit(local_sp, cumm_err);
1854 	}
1855 
1856 	/*
1857 	 * Code for a specific set is much simpler.
1858 	 * Error messages don't need extra text since specific setname
1859 	 * was used.
1860 	 * Don't need to lock the local set, just the specific set given.
1861 	 */
1862 	if ((sp = metasetname(sname, ep)) == NULL) {
1863 		mde_perror(ep, "");
1864 		md_exit(local_sp, 1);
1865 	}
1866 
1867 	/*
1868 	 * Fail command if meta_set_join returns -1.
1869 	 *
1870 	 * Return of 0 means that node joined set.
1871 	 *
1872 	 * Return of -2 means that node was unable to
1873 	 * join a set since that set had no drives
1874 	 * or that had already joined the set.  No
1875 	 * need to fail the command for these reasons.
1876 	 *
1877 	 * Return of -3 means that set is stale.
1878 	 * Return a value of 66 to historically match traditional disksets.
1879 	 */
1880 	if ((err = meta_set_join(sp, ep)) == -1) {
1881 		mde_perror(&status, "");
1882 		md_exit(local_sp, 1);
1883 	}
1884 
1885 	if (err == -3) {
1886 		/* Print error of diskset join failure */
1887 		(void) snprintf(p, bufsz,
1888 		    gettext("Joined to stale diskset %s"),
1889 		    sp->setname);
1890 		mde_perror(&status, "");
1891 		md_exit(local_sp, 66);
1892 	}
1893 
1894 	md_exit(local_sp, 0);
1895 }
1896 
1897 /*
1898  * Withdraws a node from a specific set or from all multinode disksets known
1899  * by this node.  If set is specified then caller should have verified
1900  * that the set is a multinode diskset.
1901  *
1902  * If an error occurs, metaset exits with a 1.
1903  * If there is no error, metaset exits with a 0.
1904  */
1905 static void
1906 parse_withdrawset(int argc, char **argv)
1907 {
1908 	int		c;
1909 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1910 	char		*sname = MD_LOCAL_NAME;
1911 	md_error_t	status = mdnullerror;
1912 	md_error_t	*ep = &status;
1913 	char		buf[BUFSIZ];
1914 	char		*p = buf;
1915 	md_set_desc	*sd;
1916 	set_t		max_sets, setno;
1917 	int		err, cumm_err = 0;
1918 	size_t		bufsz;
1919 
1920 	bufsz = sizeof (buf);
1921 	/* reset and parse args */
1922 	optind = 1;
1923 	opterr = 1;
1924 	while ((c = getopt(argc, argv, "Ms:w")) != -1) {
1925 		switch (c) {
1926 		case 'M':
1927 			break;
1928 		case 'w':
1929 			break;
1930 		case 's':
1931 			sname = optarg;
1932 			break;
1933 		default:
1934 			usage(sp, gettext("unknown options"));
1935 		}
1936 	}
1937 
1938 	argc -= optind;
1939 	argv += optind;
1940 
1941 	if (argc > 1)
1942 		usage(sp, gettext("too many args"));
1943 
1944 	/*
1945 	 * If no setname option was used, then withdraw from all disksets
1946 	 * that this node knows about.
1947 	 *
1948 	 * Additional text is added to the error messages during
1949 	 * this section of code in order to help the user understand
1950 	 * why the 'withdraw from all sets' failed and which set caused
1951 	 * the failure.
1952 	 */
1953 
1954 	/*
1955 	 * Hold local set lock throughout this call to keep
1956 	 * other actions from interfering (such as creating a new
1957 	 * set, etc.).
1958 	 */
1959 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1960 		mde_perror(ep, "");
1961 		md_exit(sp, 1);
1962 	}
1963 
1964 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1965 		mde_perror(ep, "");
1966 		md_exit(local_sp, 1);
1967 	}
1968 
1969 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1970 		/*
1971 		 * If no set name is given, then walk through all sets
1972 		 * on this node which could include:
1973 		 * 	- MN disksets
1974 		 *	- traditional disksets
1975 		 *	- non-existent disksets
1976 		 * Attempt to withdraw from the MN disksets.
1977 		 * If the withdraw of one set fails, print out an error
1978 		 * message about that set and continue the walk.
1979 		 */
1980 		if ((max_sets = get_max_sets(ep)) == 0) {
1981 			mde_perror(ep, "");
1982 			md_exit(local_sp, 1);
1983 		}
1984 
1985 		/* Start walking through all possible disksets */
1986 		for (setno = 1; setno < max_sets; setno++) {
1987 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1988 				if (mdiserror(ep, MDE_NO_SET)) {
1989 					/* No set for this setno - continue */
1990 					mdclrerror(ep);
1991 					continue;
1992 				} else {
1993 					(void) sprintf(p, gettext(
1994 					    "Unable to get set %d information"),
1995 					    setno);
1996 					mde_perror(ep, p);
1997 					cumm_err = 1;
1998 					mdclrerror(ep);
1999 					continue;
2000 				}
2001 			}
2002 
2003 			/* If setname is there, set desc should exist. */
2004 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2005 				(void) snprintf(p, bufsz, gettext(
2006 				    "Unable to get set %s desc information"),
2007 				    sp->setname);
2008 				mde_perror(ep, p);
2009 				cumm_err = 1;
2010 				mdclrerror(ep);
2011 				continue;
2012 			}
2013 
2014 			/* Only check MN disksets */
2015 			if (!MD_MNSET_DESC(sd)) {
2016 				continue;
2017 			}
2018 
2019 			/*
2020 			 * Return value of 0 is success.
2021 			 * Return value of -1 means a failure.
2022 			 * Return value of -2 means set could not be
2023 			 * withdrawn from, but this shouldn't cause
2024 			 * an error.  Reasons would be:
2025 			 * 	- no drives in set
2026 			 * 	- node already withdrawn from set
2027 			 * Can't check for all reasons here
2028 			 * since set isn't locked yet across all
2029 			 * nodes in the cluster.  The call
2030 			 * to libmeta routine, meta_set_withdraw, will
2031 			 * lock across the cluster and perform
2032 			 * the checks.
2033 			 */
2034 			if ((err = meta_set_withdraw(sp, ep)) == -1) {
2035 				/* Print error of diskset withdraw failure */
2036 				(void) snprintf(p, bufsz,
2037 				    gettext("Withdraw from diskset %s failed"),
2038 				    sp->setname);
2039 				mde_perror(ep, p);
2040 				mdclrerror(ep);
2041 				cumm_err = 1;
2042 				continue;
2043 			}
2044 
2045 			if (err == -2) {
2046 				mdclrerror(ep);
2047 				continue;
2048 			}
2049 
2050 			mdclrerror(ep);
2051 		}
2052 		md_exit(local_sp, cumm_err);
2053 	}
2054 
2055 
2056 	/*
2057 	 * Code for a specific set is much simpler.
2058 	 * Error messages don't need extra text since specific setname
2059 	 * was used.
2060 	 * Don't need to lock the local set, just the specific set given.
2061 	 */
2062 	if ((sp = metasetname(sname, ep)) == NULL) {
2063 		mde_perror(ep, "");
2064 		md_exit(local_sp, 1);
2065 	}
2066 
2067 	/*
2068 	 * Fail command if meta_set_withdraw returns -1.
2069 	 *
2070 	 * Return of 0 means that node withdrew from set.
2071 	 *
2072 	 * Return of -2 means that node was unable to
2073 	 * withdraw from a set since that set had no drives
2074 	 * or node was not joined to set.  No
2075 	 * need to fail the command for these reasons.
2076 	 */
2077 	if (meta_set_withdraw(sp, ep) == -1) {
2078 		mde_perror(&status, "");
2079 		md_exit(local_sp, 1);
2080 	}
2081 
2082 	md_exit(local_sp, 0);
2083 }
2084 
2085 /*
2086  * Should never be called with sname of a Multinode diskset.
2087  */
2088 static void
2089 parse_cluster(int argc, char **argv)
2090 {
2091 	int			c,
2092 				error,
2093 				new_argc,
2094 				x;
2095 	enum cluster_cmd	cmd = ccnotspecified;
2096 	char			*hostname = SDSSC_PROXY_PRIMARY,
2097 				*argument = NULL,
2098 				*sname = MD_LOCAL_NAME,
2099 				primary_node[SDSSC_NODE_NAME_LEN],
2100 				**new_argv = NULL,
2101 				**np = NULL;
2102 	mdsetname_t		*sp = NULL;
2103 	md_error_t		status = mdnullerror;
2104 	md_error_t		*ep = &status;
2105 
2106 	/* reset and parse args */
2107 	optind = 1;
2108 	opterr = 1;
2109 	while ((c = getopt(argc, argv, "C:s:h:ftu:yr")) != -1) {
2110 		switch (c) {
2111 		case 'C':
2112 			if (cmd != ccnotspecified) {
2113 				md_exit(sp, -1);
2114 			}
2115 			argument = optarg;
2116 
2117 			if (strcmp(argument, "disksin") == 0) {
2118 				cmd = clusterdisksin;
2119 			} else if (strcmp(argument, "version") == 0) {
2120 				cmd = clusterversion;
2121 			} else if (strcmp(argument, "release") == 0) {
2122 				cmd = clusterrelease;
2123 			} else if (strcmp(argument, "take") == 0) {
2124 				cmd = clustertake;
2125 			} else if (strcmp(argument, "proxy") == 0) {
2126 				cmd = clusterproxy;
2127 			} else if (strcmp(argument, "purge") == 0) {
2128 				cmd = clusterpurge;
2129 			} else {
2130 				md_exit(sp, -1);
2131 			}
2132 
2133 			break;
2134 
2135 		case 'h':
2136 			hostname = optarg;
2137 			break;
2138 
2139 		case 's':
2140 			sname = optarg;
2141 			break;
2142 
2143 		case 'f':
2144 		case 't':
2145 		case 'u':
2146 		case 'y':
2147 		case 'r':
2148 			break;
2149 
2150 		default:
2151 			md_exit(sp, -1);
2152 		}
2153 	}
2154 
2155 	/* Now call the appropriate command function. */
2156 	switch (cmd) {
2157 	case clusterversion:
2158 	    printclusterversion();
2159 	    break;
2160 
2161 	case clusterdisksin:
2162 	    if (printdisksin(sname, ep)) {
2163 		md_exit(sp, -1);
2164 	    }
2165 	    break;
2166 
2167 	case clusterrelease:
2168 	    parse_releaseset(argc, argv);
2169 	    break;
2170 
2171 	case clustertake:
2172 	    parse_takeset(argc, argv);
2173 	    break;
2174 
2175 	case clusterproxy:
2176 		/* Should never get here if sname is for MN diskset */
2177 
2178 		if ((new_argv = calloc(argc, sizeof (char *))) == NULL) {
2179 			printf(gettext("Out of memory\n"));
2180 			md_exit(sp, 1);
2181 		}
2182 
2183 		np = new_argv;
2184 		new_argc = 0;
2185 		memset(primary_node, '\0', SDSSC_NODE_NAME_LEN);
2186 
2187 		for (x = 0; x < argc; x++) {
2188 			if (strcmp(argv[x], "-C") == 0) {
2189 
2190 				/*
2191 				 * Need to skip the '-C proxy' args so
2192 				 * just increase x by one and the work is
2193 				 * done.
2194 				 */
2195 				x++;
2196 			} else {
2197 				*np++ = strdup(argv[x]);
2198 				new_argc++;
2199 			}
2200 		}
2201 
2202 		switch (sdssc_get_primary_host(sname, primary_node,
2203 		    SDSSC_NODE_NAME_LEN)) {
2204 		case SDSSC_ERROR:
2205 			md_exit(sp, 1);
2206 			break;
2207 
2208 		case SDSSC_NO_SERVICE:
2209 			if (hostname != SDSSC_PROXY_PRIMARY) {
2210 				(void) strlcpy(primary_node, hostname,
2211 				    SDSSC_NODE_NAME_LEN);
2212 			}
2213 			break;
2214 		}
2215 
2216 		if (sdssc_cmd_proxy(new_argc, new_argv,
2217 		    primary_node[0] == '\0' ? SDSSC_PROXY_PRIMARY :
2218 		    primary_node, &error) == SDSSC_PROXY_DONE) {
2219 			md_exit(sp, error);
2220 		} else {
2221 			printf(gettext(
2222 			    "Couldn't proxy command\n"));
2223 			md_exit(sp, 1);
2224 		}
2225 		break;
2226 
2227 	case clusterpurge:
2228 		parse_purge(argc, argv);
2229 		break;
2230 
2231 	default:
2232 	    break;
2233 	}
2234 
2235 	md_exit(sp, 0);
2236 }
2237 
2238 /*
2239  * parse args and do it
2240  */
2241 int
2242 main(int argc, char *argv[])
2243 {
2244 	enum metaset_cmd	cmd = notspecified;
2245 	md_error_t		status = mdnullerror;
2246 	md_error_t		*ep = &status;
2247 	mdsetname_t		*sp = NULL;
2248 	char			*hostname = SDSSC_PROXY_PRIMARY,
2249 				*sname = MD_LOCAL_NAME,
2250 				*auto_take_option = NULL,
2251 				primary_node[SDSSC_NODE_NAME_LEN];
2252 	int			error,
2253 				c,
2254 				auto_take = FALSE,
2255 				stat;
2256 	md_set_desc		*sd;
2257 	int			mflag = 0;
2258 	int			multi_node = 0;
2259 	rval_e			sdssc_res;
2260 
2261 	/*
2262 	 * Get the locale set up before calling any other routines
2263 	 * with messages to ouput.  Just in case we're not in a build
2264 	 * environment, make sure that TEXT_DOMAIN gets set to
2265 	 * something.
2266 	 */
2267 #if !defined(TEXT_DOMAIN)
2268 #define	TEXT_DOMAIN "SYS_TEST"
2269 #endif
2270 	(void) setlocale(LC_ALL, "");
2271 	(void) textdomain(TEXT_DOMAIN);
2272 
2273 	sdssc_res = sdssc_bind_library();
2274 	if (sdssc_res == SDSSC_ERROR) {
2275 		printf(gettext(
2276 		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
2277 		exit(1);
2278 	}
2279 
2280 	/* initialize */
2281 	if (md_init(argc, argv, 0, 1, ep) != 0) {
2282 		mde_perror(ep, "");
2283 		md_exit(sp, 1);
2284 	}
2285 
2286 	optind = 1;
2287 	opterr = 1;
2288 
2289 	/*
2290 	 * NOTE: The "C" option is strictly for cluster use. it is not
2291 	 * and should not be documented for the customer. - JST
2292 	 */
2293 	while ((c = getopt(argc, argv, "C:MaA:bdfh:jl:Lm:oPqrs:tu:wy?"))
2294 	    != -1) {
2295 		switch (c) {
2296 		case 'M':
2297 			mflag = 1;
2298 			break;
2299 		case 'A':
2300 			auto_take = TRUE;
2301 			if (optarg == NULL || !(strcmp(optarg, "enable") == 0 ||
2302 			    strcmp(optarg, "disable") == 0))
2303 				usage(sp, gettext(
2304 				    "-A: enable or disable must be specified"));
2305 			auto_take_option = optarg;
2306 			break;
2307 		case 'a':
2308 			if (cmd != notspecified) {
2309 				usage(sp, gettext(
2310 				    "conflicting options"));
2311 			}
2312 			cmd = add;
2313 			break;
2314 		case 'b':
2315 			if (cmd != notspecified) {
2316 				usage(sp, gettext(
2317 				    "conflicting options"));
2318 			}
2319 			cmd = balance;
2320 			break;
2321 		case 'd':
2322 			if (cmd != notspecified) {
2323 				usage(sp, gettext(
2324 				    "conflicting options"));
2325 			}
2326 			cmd = delete;
2327 			break;
2328 		case 'C':	/* cluster commands */
2329 			if (cmd != notspecified) {
2330 				md_exit(sp, -1);    /* conflicting options */
2331 			}
2332 			cmd = cluster;
2333 			break;
2334 		case 'f':
2335 			break;
2336 		case 'h':
2337 			hostname = optarg;
2338 			break;
2339 		case 'j':
2340 			if (cmd != notspecified) {
2341 				usage(sp, gettext(
2342 				    "conflicting options"));
2343 			}
2344 			cmd = join;
2345 			break;
2346 		case 'l':
2347 			break;
2348 		case 'L':
2349 			break;
2350 		case 'm':
2351 			break;
2352 		case 'o':
2353 			if (cmd != notspecified) {
2354 				usage(sp, gettext(
2355 				    "conflicting options"));
2356 			}
2357 			cmd = isowner;
2358 			break;
2359 		case 'P':
2360 			if (cmd != notspecified) {
2361 				usage(sp, gettext(
2362 				    "conflicting options"));
2363 			}
2364 			cmd = purge;
2365 			break;
2366 		case 'q':
2367 			if (cmd != notspecified) {
2368 				usage(sp, gettext(
2369 				    "conflicting options"));
2370 			}
2371 			cmd = query;
2372 			break;
2373 		case 'r':
2374 			if (cmd != notspecified) {
2375 				usage(sp, gettext(
2376 				    "conflicting options"));
2377 			}
2378 			cmd = release;
2379 			break;
2380 		case 's':
2381 			sname = optarg;
2382 			break;
2383 		case 't':
2384 			if (cmd != notspecified) {
2385 				usage(sp, gettext(
2386 				    "conflicting options"));
2387 			}
2388 			cmd = take;
2389 			break;
2390 		case 'u':
2391 			break;
2392 		case 'w':
2393 			if (cmd != notspecified) {
2394 				usage(sp, gettext(
2395 				    "conflicting options"));
2396 			}
2397 			cmd = withdraw;
2398 			break;
2399 		case 'y':
2400 			break;
2401 		case '?':
2402 			if (optopt == '?')
2403 				usage(sp, NULL);
2404 			/*FALLTHROUGH*/
2405 		default:
2406 			if (cmd == cluster) {    /* cluster is silent */
2407 				md_exit(sp, -1);
2408 			} else {
2409 				usage(sp, gettext(
2410 				    "unknown command"));
2411 			}
2412 		}
2413 	}
2414 
2415 	/* check if suncluster is installed and -A enable specified */
2416 	if (auto_take && sdssc_res != SDSSC_NOT_BOUND &&
2417 	    strcmp(auto_take_option, "enable") == 0) {
2418 	    md_eprintf(gettext(
2419 		"cannot enable auto-take when SunCluster is installed\n"));
2420 	    md_exit(sp, 1);
2421 	}
2422 
2423 	/*
2424 	 * At this point we know that if the -A enable option is specified
2425 	 * for an auto-take diskset that SC is not installed on the machine, so
2426 	 * all of the sdssc calls will just be no-ops.
2427 	 */
2428 
2429 	/* list sets */
2430 	if (cmd == notspecified && auto_take == FALSE) {
2431 		parse_printset(argc, argv);
2432 		/*NOTREACHED*/
2433 	}
2434 
2435 	if (meta_check_root(ep) != 0) {
2436 		mde_perror(ep, "");
2437 		md_exit(sp, 1);
2438 	}
2439 
2440 	/* snarf MDDB */
2441 	if (meta_setup_db_locations(ep) != 0) {
2442 		mde_perror(ep, "");
2443 		md_exit(sp, 1);
2444 	}
2445 
2446 	/*
2447 	 * If sname is a diskset - check for multi_node.
2448 	 * It is possible for sname to not exist.
2449 	 */
2450 	if (strcmp(sname, MD_LOCAL_NAME)) {
2451 		if ((sp = metasetname(sname, ep)) != NULL) {
2452 			/* Set exists - check for MN diskset */
2453 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2454 				mde_perror(ep, "");
2455 				md_exit(sp, 1);
2456 			}
2457 			if (MD_MNSET_DESC(sd)) {
2458 				/*
2459 				 * If a MN diskset always set multi_node
2460 				 * regardless of whether the -M option was
2461 				 * used or not (mflag).
2462 				 */
2463 				multi_node = 1;
2464 			} else {
2465 				/*
2466 				 * If a traditional diskset, mflag must
2467 				 * not be set.
2468 				 */
2469 				if (mflag) {
2470 					usage(sp, gettext(
2471 					    "-M option only allowed "
2472 					    "on multi-owner diskset"));
2473 				}
2474 			}
2475 		} else {
2476 			/*
2477 			 * Set name does not exist, set multi_node
2478 			 * based on -M option.
2479 			 */
2480 			if (mflag) {
2481 				multi_node = 1;
2482 			}
2483 		}
2484 	}
2485 
2486 	if (auto_take && multi_node) {
2487 		/* Can't mix multinode and auto-take on a diskset */
2488 		usage(sp,
2489 		    gettext("-A option not allowed on multi-owner diskset"));
2490 	}
2491 
2492 	/*
2493 	 * MN disksets don't use DCS clustering services, so
2494 	 * do not get primary_node for MN diskset since no command
2495 	 * proxying is done to Primary cluster node.  Do not proxy
2496 	 * MN diskset commands of join and withdraw when issued without
2497 	 * a valid setname.
2498 	 * For traditional disksets: proxy all commands except a take
2499 	 * and release.  Use first host listed as the host to send the
2500 	 * command to if there isn't already a primary
2501 	 */
2502 	if (strcmp(sname, MD_LOCAL_NAME) && (multi_node == 0) &&
2503 	    (cmd != take) && (cmd != release) &&
2504 	    (cmd != cluster) && (cmd != join) &&
2505 	    (cmd != withdraw) && (cmd != purge)) {
2506 		stat = sdssc_get_primary_host(sname, primary_node,
2507 		    SDSSC_NODE_NAME_LEN);
2508 		switch (stat) {
2509 			case SDSSC_ERROR:
2510 				return (0);
2511 
2512 			case SDSSC_NO_SERVICE:
2513 				if (hostname != SDSSC_PROXY_PRIMARY) {
2514 					(void) strlcpy(primary_node, hostname,
2515 					    SDSSC_NODE_NAME_LEN);
2516 				} else {
2517 					memset(primary_node, '\0',
2518 					    SDSSC_NODE_NAME_LEN);
2519 				}
2520 				break;
2521 		}
2522 
2523 		/*
2524 		 * We've got a complicated decision here regarding
2525 		 * the hostname. If we didn't get a primary host
2526 		 * and a host name wasn't supplied on the command line
2527 		 * then we need to revert to SDSSC_PROXY_PRIMARY. Otherwise
2528 		 * use what's been found.
2529 		 */
2530 		if (sdssc_cmd_proxy(argc, argv,
2531 		    primary_node[0] == '\0' ?
2532 			SDSSC_PROXY_PRIMARY : primary_node,
2533 		    &error) == SDSSC_PROXY_DONE) {
2534 			exit(error);
2535 		}
2536 	}
2537 
2538 	/* cluster-specific commands */
2539 	if (cmd == cluster) {
2540 		if (multi_node) {
2541 			/*
2542 			 * If a specific MN diskset is given, immediately
2543 			 * fail -C command.
2544 			 */
2545 			usage(sp, gettext(
2546 			    "-C option not allowed on multi-owner diskset"));
2547 		} else {
2548 			parse_cluster(argc, argv);
2549 			/*NOTREACHED*/
2550 		}
2551 	}
2552 
2553 	/* join MultiNode diskset */
2554 	if (cmd == join) {
2555 		/*
2556 		 * If diskset specified, verify that it exists
2557 		 * and is a multinode diskset.
2558 		 */
2559 		if (strcmp(sname, MD_LOCAL_NAME)) {
2560 			if ((sp = metasetname(sname, ep)) == NULL) {
2561 				mde_perror(ep, "");
2562 				md_exit(sp, 1);
2563 			}
2564 
2565 			if (!multi_node) {
2566 				usage(sp, gettext(
2567 				    "-j option only allowed on "
2568 				    "multi-owner diskset"));
2569 			}
2570 		}
2571 		/*
2572 		 * Start mddoors daemon here.
2573 		 * mddoors itself takes care there will be only one
2574 		 * instance running, so starting it twice won't hurt
2575 		 */
2576 		pclose(popen("/usr/lib/lvm/mddoors", "w"));
2577 		parse_joinset(argc, argv);
2578 		/*NOTREACHED*/
2579 	}
2580 
2581 	/* withdraw from MultiNode diskset */
2582 	if (cmd == withdraw) {
2583 		/*
2584 		 * If diskset specified, verify that it exists
2585 		 * and is a multinode diskset.
2586 		 */
2587 		if (strcmp(sname, MD_LOCAL_NAME)) {
2588 			if ((sp = metasetname(sname, ep)) == NULL) {
2589 				mde_perror(ep, "");
2590 				md_exit(sp, 1);
2591 			}
2592 
2593 			if (!multi_node) {
2594 				usage(sp, gettext(
2595 				    "-w option only allowed on "
2596 				    "multi-owner diskset"));
2597 			}
2598 		}
2599 		parse_withdrawset(argc, argv);
2600 		/*NOTREACHED*/
2601 	}
2602 
2603 	/* must have set for everything else */
2604 	if (strcmp(sname, MD_LOCAL_NAME) == 0)
2605 		usage(sp, gettext("setname must be specified"));
2606 
2607 	/* add hosts or drives */
2608 	if (cmd == add) {
2609 		/*
2610 		 * In the multi node case start mddoors daemon.
2611 		 * mddoors itself takes care there will be
2612 		 * only one instance running, so starting it twice won't hurt
2613 		 */
2614 		if (multi_node) {
2615 			pclose(popen("/usr/lib/lvm/mddoors", "w"));
2616 		}
2617 
2618 		parse_add(argc, argv);
2619 		/*NOTREACHED*/
2620 	}
2621 
2622 	/* re-balance the replicas */
2623 	if (cmd == balance) {
2624 		parse_balance(argc, argv);
2625 		/*NOTREACHED*/
2626 	}
2627 
2628 	/* delete hosts or drives */
2629 	if (cmd == delete) {
2630 		parse_del(argc, argv);
2631 		/*NOTREACHED*/
2632 	}
2633 
2634 	/* check ownership */
2635 	if (cmd == isowner) {
2636 		parse_isowner(argc, argv);
2637 		/*NOTREACHED*/
2638 	}
2639 
2640 	/* purge the diskset */
2641 	if (cmd == purge) {
2642 		parse_purge(argc, argv);
2643 		/*NOTREACHED*/
2644 	}
2645 
2646 	/* query for data marks */
2647 	if (cmd == query) {
2648 		parse_query(argc, argv);
2649 		/*NOTREACHED*/
2650 	}
2651 
2652 	/* release ownership */
2653 	if (cmd == release) {
2654 		if (multi_node) {
2655 			/* Can't release multinode diskset */
2656 			usage(sp, gettext(
2657 			    "-r option not allowed on multi-owner diskset"));
2658 		} else {
2659 			parse_releaseset(argc, argv);
2660 			/*NOTREACHED*/
2661 		}
2662 	}
2663 
2664 	/* take ownership */
2665 	if (cmd == take) {
2666 		if (multi_node) {
2667 			/* Can't take multinode diskset */
2668 			usage(sp, gettext(
2669 			    "-t option not allowed on multi-owner diskset"));
2670 		} else {
2671 			parse_takeset(argc, argv);
2672 			/*NOTREACHED*/
2673 		}
2674 	}
2675 
2676 	/* take ownership of auto-take sets */
2677 	if (auto_take) {
2678 		parse_autotake(argc, argv);
2679 		/*NOTREACHED*/
2680 	}
2681 
2682 	/*NOTREACHED*/
2683 	return (0);
2684 }
2685