xref: /titanic_41/usr/src/cmd/lvm/util/metaset.c (revision b54157c1b1bf9673e4da8b526477d59202cd08a6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Metadevice diskset utility.
30  */
31 
32 #include <meta.h>
33 #include <sys/lvm/md_mddb.h>
34 #include <sdssc.h>
35 
36 enum metaset_cmd {
37 	notspecified,
38 	add,
39 	balance,
40 	delete,
41 	cluster,
42 	isowner,
43 	purge,
44 	query,
45 	release,
46 	take,
47 	join,			/* Join a multinode diskset */
48 	withdraw		/* Withdraw from a multinode diskset */
49 };
50 
51 enum cluster_cmd {
52 	ccnotspecified,
53 	clusterversion,		/* Return the version of the cluster I/F */
54 	clusterdisksin,		/* List disks in a given diskset */
55 	clustertake,		/* back door for Cluster take */
56 	clusterrelease,		/* ditto */
57 	clusterpurge,		/* back door for Cluster purge */
58 	clusterproxy		/* proxy the args after '--' to primary */
59 };
60 
61 static void
62 usage(
63 	mdsetname_t	*sp,
64 	char		*string)
65 {
66 	if ((string != NULL) && (*string != '\0'))
67 		md_eprintf("%s\n", string);
68 	(void) fprintf(stderr, gettext(
69 	    "usage:\t%s -s setname -a [-A enable | disable] -h hostname ...\n"
70 	    "	%s -s setname -a [-M] -h hostname ...\n"
71 	    "	%s -s setname -a [-M] [-l length] [-L] drivename ...\n"
72 	    "	%s -s setname -d [-M] -h hostname ...\n"
73 	    "	%s -s setname -d [-M] -f -h all-hostnames\n"
74 	    "	%s -s setname -d [-M] [-f] drivename ...\n"
75 	    "	%s -s setname -d [-M] [-f] hostname ...\n"
76 	    "	%s -s setname -A enable | disable\n"
77 	    "	%s -s setname -t [-f]\n"
78 	    "	%s -s setname -r\n"
79 	    "	%s [-s setname] -j [-M]\n"
80 	    "	%s [-s setname] -w [-M]\n"
81 	    "	%s -s setname -P [-M]\n"
82 	    "	%s -s setname -b [-M]\n"
83 	    "	%s -s setname -o [-M] [-h hostname]\n"
84 	    "	%s [-s setname]\n"
85 	    "\n"
86 	    "		hostname = contents of /etc/nodename\n"
87 	    "		drivename = cNtNdN no slice\n"
88 	    "		[-M] for multi-owner set is optional except"
89 	    " on set creation\n"),
90 	    myname, myname, myname, myname, myname, myname, myname, myname,
91 	    myname, myname, myname, myname, myname, myname, myname, myname);
92 	md_exit(sp, (string == NULL) ? 0 : 1);
93 }
94 
95 /*
96  * The svm.sync rc script relies heavily on the metaset output.
97  * Any changes to the metaset output MUST verify that the rc script
98  * does not break. Not doing so may potentially leave the system
99  * unusable. You have been WARNED.
100  */
101 static int
102 printset(mdsetname_t *sp, md_error_t *ep)
103 {
104 	int			i, j;
105 	md_set_desc		*sd;
106 	md_drive_desc		*dd, *p;
107 	int			max_meds;
108 	md_mnnode_desc		*nd;
109 
110 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
111 		return (-1);
112 
113 	/*
114 	 * Only get set owner information for traditional diskset.
115 	 * This set owner information is stored in the node records
116 	 * for a MN diskset.
117 	 */
118 	if (!(MD_MNSET_DESC(sd))) {
119 		if (metaget_setownership(sp, ep) == -1)
120 			return (-1);
121 	}
122 
123 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
124 	    ep)) == NULL) && !mdisok(ep))
125 		return (-1);
126 
127 	if (MD_MNSET_DESC(sd)) {
128 		(void) printf(gettext(
129 		"\nMulti-owner Set name = %s, Set number = %d, Master = %s\n"),
130 		    sp->setname, sp->setno, sd->sd_mn_master_nodenm);
131 		if ((sd->sd_mn_master_nodeid == MD_MN_INVALID_NID) &&
132 		    (dd != NULL)) {
133 			(void) printf(gettext(
134 			    "Master and owner information unavailable "
135 			    "until joined (metaset -j)\n"));
136 		}
137 	} else {
138 		(void) printf(gettext(
139 		    "\nSet name = %s, Set number = %d\n"),
140 		    sp->setname, sp->setno);
141 	}
142 
143 	if (MD_MNSET_DESC(sd)) {
144 		(void) printf(gettext("\n%-19.19s %-14.14s %-6.6s\n"),
145 		    gettext("Host"), gettext("Owner"), gettext("Member"));
146 		nd = sd->sd_nodelist;
147 		while (nd) {
148 			/*
149 			 * Don't print nodes that aren't ok since they may be
150 			 * removed from config during a reconfig cycle.  If a
151 			 * node was being added to a diskset and the entire
152 			 * cluster went down but the node being added was unable
153 			 * to reboot, there's no way to know if that node had
154 			 * its own node record set to OK or not.  So, node
155 			 * record is left in ADD state during reconfig cycle.
156 			 * When that node reboots and returns to the cluster,
157 			 * the reconfig cycle will either remove the node
158 			 * record (if not marked OK on that node) or will mark
159 			 * it OK on all nodes.
160 			 * It is very important to only remove a node record
161 			 * from the other nodes when that node record is not
162 			 * marked OK on its own node - otherwise, different
163 			 * nodes would have different nodelists possibly
164 			 * causing different nodes to to choose different
165 			 * masters.
166 			 */
167 			if (!(nd->nd_flags & MD_MN_NODE_OK)) {
168 				nd = nd->nd_next;
169 				continue;
170 			}
171 			if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
172 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
173 				(void) printf(
174 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
175 				    nd->nd_nodename, gettext("multi-owner"),
176 				    gettext("Yes"));
177 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
178 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
179 				/* Should never be able to happen */
180 				(void) printf(
181 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
182 				    nd->nd_nodename, gettext("multi-owner"),
183 				    gettext("No"));
184 			} else if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
185 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
186 				(void) printf(
187 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
188 				    nd->nd_nodename, gettext(""),
189 				    gettext("Yes"));
190 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
191 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
192 				(void) printf(
193 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
194 				    nd->nd_nodename, gettext(""),
195 				    gettext("No"));
196 			}
197 			nd = nd->nd_next;
198 		}
199 	} else {
200 		(void) printf("\n%-19.19s %-5.5s\n",
201 		    gettext("Host"), gettext("Owner"));
202 		for (i = 0; i < MD_MAXSIDES; i++) {
203 			/* Skip empty slots */
204 			if (sd->sd_nodes[i][0] == '\0')
205 				continue;
206 
207 			/*
208 			 * Standard hostname field is 17 bytes but metaset will
209 			 * display up to MD_MAX_NODENAME, def in meta_basic.h
210 			 */
211 			(void) printf("  %-17.*s  %s\n", MD_MAX_NODENAME,
212 			    sd->sd_nodes[i], (sd->sd_flags & MD_SR_AUTO_TAKE ?
213 			    (sd->sd_isown[i] ? gettext("Yes (auto)") :
214 			    gettext("No (auto)"))
215 			    : (sd->sd_isown[i] ? gettext("Yes") : "")));
216 		}
217 	}
218 
219 	if (sd->sd_med.n_cnt > 0)
220 		(void) printf("\n%-19.19s %-7.7s\n",
221 		    gettext("Mediator Host(s)"), gettext("Aliases"));
222 
223 	if ((max_meds = get_max_meds(ep)) == 0)
224 		return (-1);
225 
226 	for (i = 0; i < max_meds; i++) {
227 		if (sd->sd_med.n_lst[i].a_cnt == 0)
228 			continue;
229 		/*
230 		 * Standard hostname field is 17 bytes but metaset will
231 		 * display up to MD_MAX_NODENAME, def in meta_basic.h
232 		 */
233 		(void) printf("  %-17.*s   ", MD_MAX_NODENAME,
234 		    sd->sd_med.n_lst[i].a_nm[0]);
235 		for (j = 1; j < sd->sd_med.n_lst[i].a_cnt; j++) {
236 			(void) printf("%s", sd->sd_med.n_lst[i].a_nm[j]);
237 			if (sd->sd_med.n_lst[i].a_cnt - j > 1)
238 				(void) printf(gettext(", "));
239 		}
240 		(void) printf("\n");
241 	}
242 
243 	if (dd) {
244 		int	len = 0;
245 
246 
247 		/*
248 		 * Building a format string on the fly that will
249 		 * be used in (f)printf. This allows the length
250 		 * of the ctd to vary from small to large without
251 		 * looking horrible.
252 		 */
253 		for (p = dd; p != NULL; p = p->dd_next)
254 			len = max(len, strlen(p->dd_dnp->cname));
255 
256 		len += 2;
257 		(void) printf("\n%-*.*s %-5.5s\n", len, len,
258 		    gettext("Drive"),
259 		    gettext("Dbase"));
260 		for (p = dd; p != NULL; p = p->dd_next) {
261 			(void) printf("\n%-*.*s %-5.5s\n", len, len,
262 			    p->dd_dnp->cname,
263 			    (p->dd_dbcnt ? gettext("Yes") :
264 			    gettext("No")));
265 		}
266 	}
267 
268 	return (0);
269 }
270 
271 static int
272 printsets(mdsetname_t *sp, md_error_t *ep)
273 {
274 	int			i;
275 	mdsetname_t		*sp1;
276 	set_t			max_sets;
277 
278 	/*
279 	 * print setname given.
280 	 */
281 	if (! metaislocalset(sp)) {
282 		if (printset(sp, ep))
283 			return (-1);
284 		return (0);
285 	}
286 
287 	if ((max_sets = get_max_sets(ep)) == 0)
288 		return (-1);
289 
290 	/*
291 	 * Print all known sets
292 	 */
293 	for (i = 1; i < max_sets; i++) {
294 		if ((sp1 = metasetnosetname(i, ep)) == NULL) {
295 			if (! mdiserror(ep, MDE_NO_SET))
296 				break;
297 			mdclrerror(ep);
298 			continue;
299 		}
300 
301 		if (printset(sp1, ep))
302 			break;
303 	}
304 	if (! mdisok(ep))
305 		return (-1);
306 
307 	return (0);
308 }
309 
310 /*
311  * Print the current versionn of the cluster contract private interface.
312  */
313 static void
314 printclusterversion()
315 {
316 	printf("%s\n", METASETIFVERSION);
317 }
318 
319 /*
320  * Print the disks that make up the given disk set. This is used
321  * exclusively by Sun Cluster and is contract private.
322  * Should never be called with sname of a Multinode diskset.
323  */
324 static int
325 printdisksin(char *sname, md_error_t *ep)
326 {
327 	mdsetname_t	*sp;
328 	md_drive_desc	*dd, *p;
329 
330 	if ((sp = metasetname(sname, ep)) == NULL) {
331 
332 		/*
333 		 * During a deletion of a set the associated service is
334 		 * put offline. The SC3.0 reservation code calls disksuite
335 		 * to find a list of disks associated with the set so that
336 		 * it can release the reservation on those disks. In this
337 		 * case there won't be any disks or even a set left. So just
338 		 * return.
339 		 */
340 		return (0);
341 	}
342 
343 	if (metaget_setownership(sp, ep) == -1)
344 		return (-1);
345 
346 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
347 	    ep)) == NULL) && !mdisok(ep))
348 		return (-1);
349 
350 	for (p = dd; p != NULL; p = p->dd_next)
351 		(void) printf("%s\n", p->dd_dnp->rname);
352 
353 	return (0);
354 }
355 
356 static void
357 parse_printset(int argc, char **argv)
358 {
359 	int		c;
360 	mdsetname_t	*sp = NULL;
361 	char		*sname = MD_LOCAL_NAME;
362 	md_error_t	status = mdnullerror;
363 	md_error_t	*ep = &status;
364 
365 	/* reset and parse args */
366 	optind = 1;
367 	opterr = 1;
368 	while ((c = getopt(argc, argv, "s:")) != -1) {
369 		switch (c) {
370 		case 's':
371 			sname = optarg;
372 			break;
373 		default:
374 			usage(sp, gettext("unknown options"));
375 		}
376 	}
377 
378 	argc -= optind;
379 	argv += optind;
380 
381 	if (argc != 0)
382 		usage(sp, gettext("too many args"));
383 
384 	if ((sp = metasetname(sname, ep)) == NULL) {
385 		mde_perror(ep, "");
386 		md_exit(sp, 1);
387 	}
388 
389 	if (printsets(sp, ep) && !mdiserror(ep, MDE_SMF_NO_SERVICE)) {
390 		mde_perror(ep, "");
391 		md_exit(sp, 1);
392 	}
393 
394 	if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) {
395 		mde_perror(ep, "");
396 		md_exit(sp, 1);
397 	}
398 
399 	md_exit(sp, 0);
400 }
401 
402 static void
403 parse_add(int argc, char **argv)
404 {
405 	int			c, created_set;
406 	int			hosts = FALSE;
407 	int			meds = FALSE;
408 	int			auto_take = FALSE;
409 	int			force_label = FALSE;
410 	int			default_size = TRUE;
411 	mdsetname_t		*sp = NULL;
412 	char			*sname = MD_LOCAL_NAME;
413 	md_error_t		status = mdnullerror;
414 	md_error_t		 *ep = &status;
415 	mddrivenamelist_t	*dnlp = NULL;
416 	mddrivenamelist_t	*p;
417 	daddr_t			dbsize, nblks;
418 	mdsetname_t		*local_sp = NULL;
419 	int			multi_node = 0;
420 	md_set_desc		*sd;
421 	rval_e			sdssc_rval;
422 
423 	/* reset and parse args */
424 	optind = 1;
425 	opterr = 1;
426 	while ((c = getopt(argc, argv, "MaA:hl:Lms:")) != -1) {
427 		switch (c) {
428 		case 'M':
429 			multi_node = 1;
430 			break;
431 		case 'A':
432 			/* verified sub-option in main */
433 			if (strcmp(optarg, "enable") == 0)
434 				auto_take = TRUE;
435 			break;
436 		case 'a':
437 			break;
438 		case 'h':
439 		case 'm':
440 			if (meds == TRUE || hosts == TRUE)
441 				usage(sp, gettext(
442 				    "only one -m or -h option allowed"));
443 
444 			if (default_size == FALSE || force_label == TRUE)
445 				usage(sp, gettext(
446 				    "conflicting options"));
447 
448 			if (c == 'h')
449 				hosts = TRUE;
450 			else
451 				meds = TRUE;
452 			break;
453 		case 'l':
454 			if (hosts == TRUE || meds == TRUE)
455 				usage(sp, gettext(
456 				    "conflicting options"));
457 			if (sscanf(optarg, "%ld", &dbsize) != 1) {
458 				md_eprintf(gettext(
459 				    "%s: bad format\n"), optarg);
460 				usage(sp, "");
461 			}
462 
463 			default_size = FALSE;
464 			break;
465 		case 'L':
466 			/* Same criteria as -l */
467 			if (hosts == TRUE || meds == TRUE)
468 				usage(sp, gettext(
469 				    "conflicting options"));
470 			force_label = TRUE;
471 			break;
472 		case 's':
473 			sname = optarg;
474 			break;
475 		default:
476 			usage(sp, gettext(
477 			    "unknown options"));
478 		}
479 	}
480 
481 	/* Can only use -A enable when creating the single-node set */
482 	if (auto_take && hosts != TRUE)
483 		usage(sp, gettext("conflicting options"));
484 
485 	argc -= optind;
486 	argv += optind;
487 
488 	/*
489 	 * Add hosts
490 	 */
491 	if (hosts == TRUE) {
492 
493 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
494 			mde_perror(ep, "");
495 			md_exit(local_sp, 1);
496 		}
497 
498 		if (meta_lock(local_sp, TRUE, ep) != 0) {
499 			mde_perror(ep, "");
500 			md_exit(local_sp, 1);
501 		}
502 
503 		/*
504 		 * Keep track of Cluster set creation. Need to complete
505 		 * the transaction no matter if the set was created or not.
506 		 */
507 		created_set = 0;
508 
509 		/*
510 		 * Have no set, cannot take the lock, so only take the
511 		 * local lock.
512 		 */
513 		if ((sp = metasetname(sname, ep)) == NULL) {
514 			sdssc_rval = 0;
515 			if (multi_node) {
516 				/*
517 				 * When running on a cluster system that
518 				 * does not support MN disksets, the routine
519 				 * sdssc_mo_create_begin will be bound
520 				 * to the SVM routine not_bound_error
521 				 * which returns SDSSC_NOT_BOUND_ERROR.
522 				 *
523 				 * When running on a cluster system that
524 				 * does support MN disksets, the routine
525 				 * sdssc_mo_create_begin will be bound to
526 				 * the sdssc_mo_create_begin routine in
527 				 * library libsdssc_so.  A call to
528 				 * sdssc_mo_create_begin will return with
529 				 * either SDSSC_ERROR or SDSSC_OKAY. If
530 				 * an SDSSC_OKAY is returned, then the
531 				 * cluster framework has allocated a
532 				 * set number for this new set that is unique
533 				 * across traditional and MN disksets.
534 				 * Libmeta will get this unique set number
535 				 * by calling sdssc_get_index.
536 				 *
537 				 * When running on a non-cluster system,
538 				 * the routine sdssc_mo_create_begin
539 				 * will be bound to the SVM routine
540 				 * not_bound which returns SDSSC_NOT_BOUND.
541 				 * In this case, all sdssc routines will
542 				 * return SDSSC_NOT_BOUND.  No need to check
543 				 * for return value of SDSSC_NOT_BOUND since
544 				 * the libmeta call to get the set number
545 				 * (sdssc_get_index) will also fail with
546 				 * SDSSC_NOT_BOUND causing libmeta to
547 				 * determine its own set number.
548 				 */
549 				sdssc_rval = sdssc_mo_create_begin(sname, argc,
550 				    argv, SDSSC_PICK_SETNO);
551 				if (sdssc_rval == SDSSC_NOT_BOUND_ERROR) {
552 					mderror(ep, MDE_NOT_MN, NULL);
553 					mde_perror(ep,
554 					"Cluster node does not support "
555 					"multi-owner diskset operations");
556 					md_exit(local_sp, 1);
557 				} else if (sdssc_rval == SDSSC_ERROR) {
558 					mde_perror(ep, "");
559 					md_exit(local_sp, 1);
560 				}
561 			} else {
562 				sdssc_rval = sdssc_create_begin(sname, argc,
563 				    argv, SDSSC_PICK_SETNO);
564 				if (sdssc_rval == SDSSC_ERROR) {
565 					mde_perror(ep, "");
566 					md_exit(local_sp, 1);
567 				}
568 			}
569 			/*
570 			 * Created diskset (as opposed to adding a
571 			 * host to an existing diskset).
572 			 */
573 			created_set = 1;
574 
575 			sp = Zalloc(sizeof (*sp));
576 			sp->setname = Strdup(sname);
577 			sp->lockfd = MD_NO_LOCK;
578 			mdclrerror(ep);
579 		} else {
580 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
581 				mde_perror(ep, "");
582 				md_exit(local_sp, 1);
583 			}
584 			if (MD_MNSET_DESC(sd)) {
585 				multi_node = 1;
586 			}
587 
588 			/*
589 			 * can't add hosts to an existing set & enable
590 			 * auto-take
591 			 */
592 			if (auto_take)
593 				usage(sp, gettext("conflicting options"));
594 
595 			/*
596 			 * Have a valid set, take the set lock also.
597 			 *
598 			 * A MN diskset does not use the set meta_lock but
599 			 * instead uses the clnt_lock of rpc.metad and the
600 			 * suspend/resume feature of the rpc.mdcommd.  Can't
601 			 * use set meta_lock since class 1 messages are
602 			 * grabbing this lock and if this thread is holding
603 			 * the set meta_lock then no rpc.mdcommd suspend
604 			 * can occur.
605 			 */
606 			if (!multi_node) {
607 				if (meta_lock(sp, TRUE, ep) != 0) {
608 					mde_perror(ep, "");
609 					md_exit(local_sp, 1);
610 				}
611 			}
612 		}
613 
614 		if (meta_set_addhosts(sp, multi_node, argc, argv, auto_take,
615 		    ep)) {
616 			if (created_set)
617 				sdssc_create_end(sname, SDSSC_CLEANUP);
618 			mde_perror(&status, "");
619 			if (!multi_node)
620 				(void) meta_unlock(sp, ep);
621 			md_exit(local_sp, 1);
622 		}
623 
624 		if (created_set)
625 			sdssc_create_end(sname, SDSSC_COMMIT);
626 
627 		else {
628 			/*
629 			 * If adding hosts to existing diskset,
630 			 * call DCS svcs
631 			 */
632 			sdssc_add_hosts(sname, argc, argv);
633 		}
634 		if (!multi_node)
635 			(void) meta_unlock(sp, ep);
636 		md_exit(local_sp, 0);
637 	}
638 
639 	/*
640 	 * Add mediators
641 	 */
642 	if (meds == TRUE) {
643 
644 		if ((sp = metasetname(sname, ep)) == NULL) {
645 			mde_perror(ep, "");
646 			md_exit(local_sp, 1);
647 		}
648 
649 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
650 			mde_perror(ep, "");
651 			md_exit(local_sp, 1);
652 		}
653 
654 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
655 			mde_perror(ep, "");
656 			md_exit(local_sp, 1);
657 		}
658 		if (MD_MNSET_DESC(sd)) {
659 			multi_node = 1;
660 		}
661 
662 		if (meta_lock(local_sp, TRUE, ep) != 0) {
663 			mde_perror(ep, "");
664 			md_exit(local_sp, 1);
665 		}
666 		/*
667 		 * A MN diskset does not use the set meta_lock but
668 		 * instead uses the clnt_lock of rpc.metad and the
669 		 * suspend/resume feature of the rpc.mdcommd.  Can't
670 		 * use set meta_lock since class 1 messages are
671 		 * grabbing this lock and if this thread is holding
672 		 * the set meta_lock then no rpc.mdcommd suspend
673 		 * can occur.
674 		 */
675 		if (!multi_node) {
676 			if (meta_lock(sp, TRUE, ep) != 0) {
677 				mde_perror(ep, "");
678 				md_exit(local_sp, 1);
679 			}
680 		}
681 
682 		if (meta_set_addmeds(sp, argc, argv, ep)) {
683 			mde_perror(&status, "");
684 			if (!multi_node)
685 				(void) meta_unlock(sp, ep);
686 			md_exit(local_sp, 1);
687 		}
688 
689 		if (!multi_node)
690 			(void) meta_unlock(sp, ep);
691 		md_exit(local_sp, 0);
692 	}
693 
694 	/*
695 	 * Add drives
696 	 */
697 	if ((sp = metasetname(sname, ep)) == NULL) {
698 		mde_perror(ep, "");
699 		md_exit(local_sp, 1);
700 	}
701 
702 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
703 		mde_perror(ep, "");
704 		md_exit(local_sp, 1);
705 	}
706 
707 	/* Determine if diskset is a MN diskset or not */
708 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
709 		mde_perror(ep, "");
710 		md_exit(local_sp, 1);
711 	}
712 	if (MD_MNSET_DESC(sd)) {
713 		multi_node = 1;
714 	}
715 
716 	if (meta_lock(local_sp, TRUE, ep) != 0) {
717 		mde_perror(ep, "");
718 		md_exit(local_sp, 1);
719 	}
720 
721 	/* Make sure database size is within limits */
722 	if (default_size == FALSE) {
723 		if ((multi_node && dbsize < MDDB_MN_MINBLKS) ||
724 		    (!multi_node && dbsize < MDDB_MINBLKS))
725 			usage(sp, gettext(
726 			    "size (-l) is too small"));
727 
728 		if ((multi_node && dbsize > MDDB_MN_MAXBLKS) ||
729 		    (!multi_node && dbsize > MDDB_MAXBLKS))
730 			usage(sp, gettext(
731 			    "size (-l) is too big"));
732 	}
733 
734 	/*
735 	 * Have a valid set, take the set lock also.
736 	 *
737 	 * A MN diskset does not use the set meta_lock but
738 	 * instead uses the clnt_lock of rpc.metad and the
739 	 * suspend/resume feature of the rpc.mdcommd.  Can't
740 	 * use set meta_lock since class 1 messages are
741 	 * grabbing this lock and if this thread is holding
742 	 * the set meta_lock then no rpc.mdcommd suspend
743 	 * can occur.
744 	 */
745 	if (!multi_node) {
746 		if (meta_lock(sp, TRUE, ep) != 0) {
747 			mde_perror(ep, "");
748 			md_exit(local_sp, 1);
749 		}
750 	}
751 
752 
753 	/*
754 	 * If using the default size,
755 	 *   then let's adjust the default to the minimum
756 	 *   size currently in use.
757 	 */
758 	if (default_size) {
759 		dbsize = multi_node ? MD_MN_DBSIZE : MD_DBSIZE;
760 		if ((nblks = meta_db_minreplica(sp, ep)) < 0)
761 			mdclrerror(ep);
762 		else
763 			dbsize = nblks;	/* adjust replica size */
764 	}
765 
766 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
767 		mde_perror(ep, "");
768 		if (!multi_node)
769 			(void) meta_unlock(sp, ep);
770 		md_exit(local_sp, 1);
771 	}
772 
773 	if (c == 0) {
774 		md_perror(gettext(
775 		    "No drives specified to add.\n"));
776 		if (!multi_node)
777 			(void) meta_unlock(sp, ep);
778 		md_exit(local_sp, 1);
779 	}
780 
781 	if (meta_set_adddrives(sp, dnlp, dbsize, force_label, ep)) {
782 		metafreedrivenamelist(dnlp);
783 		mde_perror(ep, "");
784 		if (!multi_node)
785 			(void) meta_unlock(sp, ep);
786 		md_exit(local_sp, 1);
787 	}
788 
789 	/*
790 	 * MN disksets don't have a device id in the master block
791 	 * For traditional disksets, check for the drive device
792 	 * id not fitting in the master block
793 	 */
794 	if (!multi_node) {
795 		for (p = dnlp; p != NULL; p = p->next) {
796 			int 		fd;
797 			ddi_devid_t	devid;
798 			mdname_t	*np;
799 
800 			np = metaslicename(p->drivenamep, 0, ep);
801 			if (np == NULL)
802 				continue;
803 
804 			if ((fd = open(np->rname, O_RDONLY | O_NDELAY)) < 0)
805 				continue;
806 
807 			if (devid_get(fd, &devid) == 0) {
808 				size_t len;
809 
810 				len = devid_sizeof(devid);
811 				if (len > (DEV_BSIZE - sizeof (mddb_mb_t)))
812 					(void) mddserror(ep,
813 					    MDE_DS_NOTSELFIDENTIFY, NULL, NULL,
814 					    np->rname, NULL);
815 				devid_free(devid);
816 			} else {
817 				(void) mddserror(ep, MDE_DS_NOTSELFIDENTIFY,
818 				    NULL, NULL, np->rname, NULL);
819 			}
820 			(void) close(fd);
821 		}
822 	}
823 
824 	/*
825 	 * MN disksets don't use DCS clustering services.
826 	 * For traditional disksets:
827 	 * There's not really much we can do here if this call fails.
828 	 * The drives have been added to the set and DiskSuite believes
829 	 * it owns the drives.
830 	 * Relase the set and hope for the best.
831 	 */
832 	if ((!multi_node) &&
833 	    (sdssc_notify_service(sname, Make_Primary) == SDSSC_ERROR)) {
834 		meta_set_release(sp, ep);
835 		printf(gettext(
836 		    "Sun Clustering failed to make set primary\n"));
837 	}
838 
839 	metafreedrivenamelist(dnlp);
840 	if (!multi_node)
841 		(void) meta_unlock(sp, ep);
842 	md_exit(local_sp, 0);
843 }
844 
845 static void
846 parse_balance(int argc, char **argv)
847 {
848 	int		c;
849 	mdsetname_t	*sp = NULL;
850 	char		*sname = MD_LOCAL_NAME;
851 	md_error_t	status = mdnullerror;
852 	md_set_desc	*sd;
853 	int		multi_node = 0;
854 
855 	/* reset and parse args */
856 	optind = 1;
857 	opterr = 1;
858 	while ((c = getopt(argc, argv, "Mbs:")) != -1) {
859 		switch (c) {
860 		case 'M':
861 			break;
862 		case 'b':
863 			break;
864 		case 's':
865 			sname = optarg;
866 			break;
867 		default:
868 			usage(sp, gettext("unknown options"));
869 		}
870 	}
871 
872 	argc -= optind;
873 	argv += optind;
874 
875 	if (argc != 0)
876 		usage(sp, gettext("too many args"));
877 
878 	if ((sp = metasetname(sname, &status)) == NULL) {
879 		mde_perror(&status, "");
880 		md_exit(sp, 1);
881 	}
882 	if ((sd = metaget_setdesc(sp, &status)) == NULL) {
883 		mde_perror(&status, "");
884 		md_exit(sp, 1);
885 	}
886 	if (MD_MNSET_DESC(sd)) {
887 		multi_node = 1;
888 	}
889 	/*
890 	 * Have a valid set, take the set lock also.
891 	 *
892 	 * A MN diskset does not use the set meta_lock but
893 	 * instead uses the clnt_lock of rpc.metad and the
894 	 * suspend/resume feature of the rpc.mdcommd.  Can't
895 	 * use set meta_lock since class 1 messages are
896 	 * grabbing this lock and if this thread is holding
897 	 * the set meta_lock then no rpc.mdcommd suspend
898 	 * can occur.
899 	 */
900 	if (!multi_node) {
901 		if (meta_lock(sp, TRUE, &status) != 0) {
902 			mde_perror(&status, "");
903 			md_exit(sp, 1);
904 		}
905 	}
906 
907 	if (meta_set_balance(sp, &status) != 0) {
908 		mde_perror(&status, "");
909 		md_exit(sp, 1);
910 	}
911 	md_exit(sp, 0);
912 }
913 
914 static void
915 parse_autotake(int argc, char **argv)
916 {
917 	int			c;
918 	int			enable = 0;
919 	mdsetname_t		*sp = NULL;
920 	char			*sname = MD_LOCAL_NAME;
921 	md_error_t		status = mdnullerror;
922 	md_error_t		*ep = &status;
923 
924 	/* reset and parse args */
925 	optind = 1;
926 	opterr = 1;
927 	while ((c = getopt(argc, argv, "A:s:")) != -1) {
928 		switch (c) {
929 		case 'A':
930 			/* verified sub-option in main */
931 			if (strcmp(optarg, "enable") == 0)
932 				enable = 1;
933 			break;
934 		case 's':
935 			/* verified presence of setname in main */
936 			sname = optarg;
937 			break;
938 		default:
939 			usage(sp, gettext("unknown options"));
940 		}
941 	}
942 
943 	if ((sp = metasetname(sname, ep)) == NULL) {
944 		mde_perror(ep, "");
945 		md_exit(sp, 1);
946 	}
947 
948 	if (meta_lock(sp, TRUE, ep) != 0) {
949 		mde_perror(ep, "");
950 		md_exit(sp, 1);
951 	}
952 
953 	if (meta_check_ownership(sp, ep) != 0) {
954 		mde_perror(ep, "");
955 		md_exit(sp, 1);
956 	}
957 
958 	if (meta_set_auto_take(sp, enable, ep) != 0) {
959 		mde_perror(ep, "");
960 		md_exit(sp, 1);
961 	}
962 
963 	md_exit(sp, 0);
964 }
965 
966 static void
967 parse_del(int argc, char **argv)
968 {
969 	int			c;
970 	mdsetname_t		*sp = NULL;
971 	char			*sname = MD_LOCAL_NAME;
972 	int			hosts = FALSE;
973 	int			meds = FALSE;
974 	int			forceflg = FALSE;
975 	md_error_t		status = mdnullerror;
976 	md_error_t		*ep = &status;
977 	mddrivenamelist_t	*dnlp = NULL;
978 	mdsetname_t		*local_sp = NULL;
979 	md_set_desc		*sd;
980 	int			multi_node = 0;
981 
982 	/* reset and parse args */
983 	optind = 1;
984 	opterr = 1;
985 	while ((c = getopt(argc, argv, "Mdfhms:")) != -1) {
986 		switch (c) {
987 		case 'M':
988 			break;
989 		case 'd':
990 			break;
991 		case 'f':
992 			forceflg = TRUE;
993 			break;
994 		case 'h':
995 		case 'm':
996 			if (meds == TRUE || hosts == TRUE)
997 				usage(sp, gettext(
998 				    "only one -m or -h option allowed"));
999 
1000 			if (c == 'h')
1001 				hosts = TRUE;
1002 			else
1003 				meds = TRUE;
1004 			break;
1005 		case 's':
1006 			sname = optarg;
1007 			break;
1008 		default:
1009 			usage(sp, gettext("unknown options"));
1010 		}
1011 	}
1012 
1013 	argc -= optind;
1014 	argv += optind;
1015 
1016 	if ((sp = metasetname(sname, ep)) == NULL) {
1017 		mde_perror(ep, "");
1018 		md_exit(local_sp, 1);
1019 	}
1020 
1021 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1022 		mde_perror(ep, "");
1023 		md_exit(local_sp, 1);
1024 	}
1025 
1026 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1027 		mde_perror(ep, "");
1028 		md_exit(local_sp, 1);
1029 	}
1030 	if (MD_MNSET_DESC(sd))
1031 		multi_node = 1;
1032 
1033 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1034 		mde_perror(ep, "");
1035 		md_exit(local_sp, 1);
1036 	}
1037 
1038 	/*
1039 	 * Have a valid set, take the set lock also.
1040 	 *
1041 	 * A MN diskset does not use the set meta_lock but
1042 	 * instead uses the clnt_lock of rpc.metad and the
1043 	 * suspend/resume feature of the rpc.mdcommd.  Can't
1044 	 * use set meta_lock since class 1 messages are
1045 	 * grabbing this lock and if this thread is holding
1046 	 * the set meta_lock then no rpc.mdcommd suspend
1047 	 * can occur.
1048 	 */
1049 	if (!multi_node) {
1050 		if (meta_lock(sp, TRUE, ep) != 0) {
1051 			mde_perror(ep, "");
1052 			md_exit(local_sp, 1);
1053 		}
1054 	}
1055 
1056 	/*
1057 	 * Delete hosts
1058 	 */
1059 	if (hosts == TRUE) {
1060 		if (meta_check_ownership(sp, ep) != 0) {
1061 			/*
1062 			 * If we don't own the set bail out here otherwise
1063 			 * we could delete the node from the DCS service
1064 			 * yet not delete the host from the set.
1065 			 */
1066 			mde_perror(ep, "");
1067 			if (!multi_node)
1068 				(void) meta_unlock(sp, ep);
1069 			md_exit(local_sp, 1);
1070 		}
1071 		if (sdssc_delete_hosts(sname, argc, argv) == SDSSC_ERROR) {
1072 			if (!metad_isautotakebyname(sname)) {
1073 				/*
1074 				 * SC could have been installed after the set
1075 				 * was created. We still want to be able to
1076 				 * delete these sets.
1077 				 */
1078 				md_perror(gettext(
1079 				    "Failed to delete hosts from DCS service"));
1080 				if (!multi_node)
1081 					(void) meta_unlock(sp, ep);
1082 				md_exit(local_sp, 1);
1083 			}
1084 		}
1085 		if (meta_set_deletehosts(sp, argc, argv, forceflg, ep)) {
1086 			if (sdssc_add_hosts(sname, argc, argv) == SDSSC_ERROR) {
1087 				(void) printf(gettext(
1088 				    "Failed to restore host(s) in DCS "
1089 				    "database\n"));
1090 			}
1091 			mde_perror(ep, "");
1092 			if (!multi_node)
1093 				(void) meta_unlock(sp, ep);
1094 			md_exit(local_sp, 1);
1095 		}
1096 		if (!multi_node)
1097 			(void) meta_unlock(sp, ep);
1098 		md_exit(local_sp, 0);
1099 	}
1100 
1101 	/*
1102 	 * Delete mediators
1103 	 */
1104 	if (meds == TRUE) {
1105 		if (meta_set_deletemeds(sp, argc, argv, forceflg, ep)) {
1106 			mde_perror(ep, "");
1107 			if (!multi_node)
1108 				(void) meta_unlock(sp, ep);
1109 			md_exit(local_sp, 1);
1110 		}
1111 		if (!multi_node)
1112 			(void) meta_unlock(sp, ep);
1113 		md_exit(local_sp, 0);
1114 	}
1115 
1116 	/*
1117 	 * Delete drives
1118 	 */
1119 
1120 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
1121 		mde_perror(ep, "");
1122 		if (!multi_node)
1123 			(void) meta_unlock(sp, ep);
1124 		md_exit(local_sp, 1);
1125 	}
1126 
1127 	if (c == 0) {
1128 		md_perror(gettext(
1129 		    "No drives specified to delete.\n"));
1130 		if (!multi_node)
1131 			(void) meta_unlock(sp, ep);
1132 		md_exit(local_sp, 1);
1133 	}
1134 
1135 	if (meta_set_deletedrives(sp, dnlp, forceflg, ep)) {
1136 		metafreedrivenamelist(dnlp);
1137 		mde_perror(ep, "");
1138 		if (!multi_node)
1139 			(void) meta_unlock(sp, ep);
1140 		md_exit(local_sp, 1);
1141 	}
1142 
1143 	metafreedrivenamelist(dnlp);
1144 	if (!multi_node)
1145 		(void) meta_unlock(sp, ep);
1146 	md_exit(local_sp, 0);
1147 }
1148 
1149 static void
1150 parse_isowner(int argc, char **argv)
1151 {
1152 	int		c;
1153 	mdsetname_t	*sp = NULL;
1154 	char		*sname = MD_LOCAL_NAME;
1155 	md_error_t	status = mdnullerror;
1156 	md_error_t	*ep = &status;
1157 	char		*host = NULL;
1158 
1159 	/* reset and parse args */
1160 	optind = 1;
1161 	opterr = 1;
1162 	while ((c = getopt(argc, argv, "Moh:s:")) != -1) {
1163 		switch (c) {
1164 		case 'M':
1165 			break;
1166 		case 'o':
1167 			break;
1168 		case 'h':
1169 			if (host != NULL) {
1170 				usage(sp, gettext(
1171 				    "only one -h option allowed"));
1172 			}
1173 			host = optarg;
1174 			break;
1175 		case 's':
1176 			sname = optarg;
1177 			break;
1178 		default:
1179 			usage(sp, gettext("unknown options"));
1180 		}
1181 	}
1182 
1183 	argc -= optind;
1184 	argv += optind;
1185 
1186 	if (argc != 0)
1187 		usage(sp, gettext("too many args"));
1188 
1189 	if ((sp = metasetname(sname, ep)) == NULL) {
1190 		mde_perror(ep, "");
1191 		md_exit(sp, 1);
1192 	}
1193 
1194 	if (host == NULL) {
1195 		if (meta_check_ownership(sp, ep) != 0) {
1196 			mde_perror(ep, "");
1197 			md_exit(sp, 1);
1198 		}
1199 	} else {
1200 		if (meta_check_ownership_on_host(sp, host, ep) != 0) {
1201 			mde_perror(ep, "");
1202 			md_exit(sp, 1);
1203 		}
1204 	}
1205 	md_exit(sp, 0);
1206 }
1207 
1208 static void
1209 parse_purge(int argc, char **argv)
1210 {
1211 	int		c;
1212 	mdsetname_t	*sp = NULL;
1213 	mdsetname_t	*local_sp = NULL;
1214 	md_drive_desc	*dd;
1215 	char		*sname = MD_LOCAL_NAME;
1216 	char		*thishost = mynode();
1217 	md_error_t	status = mdnullerror;
1218 	md_error_t	*ep = &status;
1219 	int		bypass_cluster_purge = 0;
1220 	int		forceflg = FALSE;
1221 	int		ret = 0;
1222 	int		multi_node = 0;
1223 	md_set_desc		*sd;
1224 
1225 	optind = 1;
1226 	opterr = 1;
1227 	while ((c = getopt(argc, argv, "C:fPs:")) != -1) {
1228 		switch (c) {
1229 		case 'M':
1230 			break;
1231 		case 'C':
1232 			bypass_cluster_purge = 1;
1233 			break;
1234 		case 'f':
1235 			forceflg = TRUE;
1236 			break;
1237 		case 'P':
1238 			break;
1239 		case 's':
1240 			sname = optarg;
1241 			break;
1242 		default:
1243 			usage(sp, gettext("unknown options"));
1244 		}
1245 	}
1246 
1247 	argc -= optind;
1248 	argv += optind;
1249 
1250 	if (argc != 0)
1251 		usage(sp, gettext("too many arguments"));
1252 
1253 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1254 		mde_perror(ep, "");
1255 		md_exit(local_sp, 1);
1256 	}
1257 
1258 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1259 		mde_perror(ep, "");
1260 		md_exit(local_sp, 1);
1261 	}
1262 
1263 	if ((sp = metasetname(sname, ep)) == NULL) {
1264 		mde_perror(ep, "");
1265 		md_exit(sp, 1);
1266 	}
1267 
1268 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1269 		mde_perror(ep, "");
1270 		md_exit(local_sp, 1);
1271 	}
1272 	if (MD_MNSET_DESC(sd))
1273 		multi_node = 1;
1274 
1275 	if (!multi_node) {
1276 		if (meta_lock(sp, TRUE, ep) != 0) {
1277 			mde_perror(ep, "");
1278 			md_exit(local_sp, 1);
1279 		}
1280 	}
1281 
1282 	/* Must not own the set if purging it from this host */
1283 	if (meta_check_ownership(sp, ep) == 0) {
1284 		/*
1285 		 * Need to see if there are disks in the set, if not then
1286 		 * there is no ownership but meta_check_ownership returns 0
1287 		 */
1288 		dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
1289 		if (!mdisok(ep)) {
1290 			mde_perror(ep, "");
1291 			if (!multi_node)
1292 				(void) meta_unlock(sp, ep);
1293 			md_exit(local_sp, 1);
1294 		}
1295 		if (dd != NULL) {
1296 			(void) printf(gettext
1297 			    ("Must not be owner of the set when purging it\n"));
1298 			if (!multi_node)
1299 				(void) meta_unlock(sp, ep);
1300 			md_exit(local_sp, 1);
1301 		}
1302 	}
1303 	/*
1304 	 * Remove the node from the DCS service
1305 	 */
1306 	if (!bypass_cluster_purge) {
1307 		if (sdssc_delete_hosts(sname, 1, &thishost) == SDSSC_ERROR) {
1308 			md_perror(gettext
1309 			    ("Failed to purge hosts from DCS service"));
1310 			if (!multi_node)
1311 				(void) meta_unlock(sp, ep);
1312 			md_exit(local_sp, 1);
1313 		}
1314 	}
1315 
1316 	if ((ret = meta_set_purge(sp, bypass_cluster_purge, forceflg,
1317 	    ep)) != 0) {
1318 		if (!bypass_cluster_purge) {
1319 			if (sdssc_add_hosts(sname, 1, &thishost) ==
1320 			    SDSSC_ERROR) {
1321 				(void) printf(gettext(
1322 				    "Failed to restore host in DCS "
1323 				    "database\n"));
1324 			}
1325 		}
1326 		mde_perror(ep, "");
1327 		if (!multi_node)
1328 			(void) meta_unlock(sp, ep);
1329 		md_exit(local_sp, ret);
1330 	}
1331 
1332 	if (!multi_node)
1333 		(void) meta_unlock(sp, ep);
1334 	md_exit(local_sp, 0);
1335 }
1336 
1337 static void
1338 parse_query(int argc, char **argv)
1339 {
1340 	int		c;
1341 	mdsetname_t	*sp = NULL;
1342 	mddb_dtag_lst_t	*dtlp = NULL;
1343 	mddb_dtag_lst_t	*tdtlp;
1344 	char		*sname = MD_LOCAL_NAME;
1345 	md_error_t	status = mdnullerror;
1346 
1347 	/* reset and parse args */
1348 	optind = 1;
1349 	opterr = 1;
1350 	while ((c = getopt(argc, argv, "Mqs:")) != -1) {
1351 		switch (c) {
1352 		case 'M':
1353 			break;
1354 		case 'q':
1355 			break;
1356 		case 's':
1357 			sname = optarg;
1358 			break;
1359 		default:
1360 			usage(sp, gettext("unknown options"));
1361 		}
1362 	}
1363 
1364 	argc -= optind;
1365 	argv += optind;
1366 
1367 	if (argc != 0)
1368 		usage(sp, gettext("too many args"));
1369 
1370 	if ((sp = metasetname(sname, &status)) == NULL) {
1371 		mde_perror(&status, "");
1372 		md_exit(sp, 1);
1373 	}
1374 
1375 	if (meta_lock(sp, TRUE, &status) != 0) {
1376 		mde_perror(&status, "");
1377 		md_exit(sp, 1);
1378 	}
1379 
1380 	if (meta_set_query(sp, &dtlp, &status) != 0) {
1381 		mde_perror(&status, "");
1382 		md_exit(sp, 1);
1383 	}
1384 
1385 	if (dtlp != NULL)
1386 		(void) printf("The following tag(s) were found:\n");
1387 
1388 	for (tdtlp = dtlp; tdtlp != NULL; tdtlp = dtlp) {
1389 		dtlp = tdtlp->dtl_nx;
1390 		(void) printf("%2d - %s - %s", tdtlp->dtl_dt.dt_id,
1391 		    tdtlp->dtl_dt.dt_hn,
1392 		    ctime((long *)&tdtlp->dtl_dt.dt_tv.tv_sec));
1393 		Free(tdtlp);
1394 	}
1395 
1396 	md_exit(sp, 0);
1397 }
1398 
1399 /* Should never be called with sname of a Multinode diskset. */
1400 static void
1401 parse_releaseset(int argc, char **argv)
1402 {
1403 	int		c;
1404 	mdsetname_t	*sp = NULL;
1405 	md_error_t	status = mdnullerror;
1406 	md_error_t	*ep = &status;
1407 	char		*sname = MD_LOCAL_NAME;
1408 	sdssc_boolean_e	cluster_release = SDSSC_False;
1409 	sdssc_version_t	vers;
1410 	rval_e		rval;
1411 	md_set_desc	*sd;
1412 
1413 	/* reset and parse args */
1414 	optind = 1;
1415 	opterr = 1;
1416 	while ((c = getopt(argc, argv, "C:s:r")) != -1) {
1417 		switch (c) {
1418 		case 'C':
1419 			cluster_release = SDSSC_True;
1420 			break;
1421 		case 's':
1422 			sname = optarg;
1423 			break;
1424 		case 'r':
1425 			break;
1426 		default:
1427 			usage(sp, gettext("unknown options"));
1428 		}
1429 	}
1430 
1431 	argc -= optind;
1432 	argv += optind;
1433 
1434 	if (argc > 0)
1435 		usage(sp, gettext("too many args"));
1436 
1437 	memset(&vers, 0, sizeof (vers));
1438 
1439 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1440 	    (vers.major == 3) &&
1441 	    (cluster_release == SDSSC_False)) {
1442 
1443 		/*
1444 		 * If the release is being done by the user via the CLI
1445 		 * we need to notify the DCS to release this node as being
1446 		 * the primary. The reason nothing else needs to be done
1447 		 * is due to the fact that the reservation code will exec
1448 		 * metaset -C release to complete the operation.
1449 		 */
1450 		rval = sdssc_notify_service(sname, Release_Primary);
1451 		if (rval == SDSSC_ERROR) {
1452 			printf(gettext(
1453 			    "metaset: failed to notify DCS of release\n"));
1454 		}
1455 		md_exit(NULL, rval == SDSSC_ERROR);
1456 	}
1457 
1458 	if ((sp = metasetname(sname, ep)) == NULL) {
1459 
1460 		/*
1461 		 * It's entirely possible for the SC3.0 reservation code
1462 		 * to call for DiskSet to release a diskset and have that
1463 		 * diskset not exist. During a diskset removal DiskSuite
1464 		 * maybe able to remove all traces of the diskset before
1465 		 * the reservation code execs metaset -C release in which
1466 		 * case the metasetname will fail, but the overall command
1467 		 * shouldn't.
1468 		 */
1469 		if (vers.major == 3)
1470 			md_exit(sp, 0);
1471 		else {
1472 			mde_perror(ep, "");
1473 			md_exit(sp, 1);
1474 		}
1475 	}
1476 
1477 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1478 		mde_perror(ep, "");
1479 		md_exit(sp, 1);
1480 	}
1481 
1482 	if (sd->sd_flags & MD_SR_AUTO_TAKE) {
1483 		md_eprintf(gettext("cannot release auto-take diskset\n"));
1484 		md_exit(sp, 1);
1485 	}
1486 
1487 	if (meta_lock_nowait(sp, ep) != 0) {
1488 		mde_perror(ep, "");
1489 		md_exit(sp, 10);	/* special errcode */
1490 	}
1491 
1492 	if (meta_set_release(sp, ep)) {
1493 		mde_perror(ep, "");
1494 		md_exit(sp, 1);
1495 	}
1496 	md_exit(sp, 0);
1497 }
1498 
1499 /* Should never be called with sname of a Multinode diskset. */
1500 static void
1501 parse_takeset(int argc, char **argv)
1502 {
1503 	int		c;
1504 	mdsetname_t	*sp = NULL;
1505 	int		flags = 0;
1506 	char		*sname = MD_LOCAL_NAME;
1507 	mhd_mhiargs_t	mhiargs;
1508 	char 		*cp = NULL;
1509 	int		pos = -1;	/* position of timeout value */
1510 	int		usetag = 0;
1511 	static char	*nullopts[] = { NULL };
1512 	md_error_t	status = mdnullerror;
1513 	md_error_t	*ep = &status;
1514 	sdssc_boolean_e	cluster_take = SDSSC_False;
1515 	sdssc_version_t	vers;
1516 	rval_e		rval;
1517 	int		set_take_rval;
1518 
1519 	/* reset and parse args */
1520 	optind = 1;
1521 	opterr = 1;
1522 	while ((c = getopt(argc, argv, "C:fs:tu:y")) != -1) {
1523 		switch (c) {
1524 		case 'C':
1525 			cluster_take = SDSSC_True;
1526 			break;
1527 		case 'f':
1528 			flags |= TAKE_FORCE;
1529 			break;
1530 		case 's':
1531 			sname = optarg;
1532 			break;
1533 		case 't':
1534 			break;
1535 		case 'u':
1536 			usetag = atoi(optarg);
1537 			flags |= TAKE_USETAG;
1538 			break;
1539 		case 'y':
1540 			flags |= TAKE_USEIT;
1541 			break;
1542 		default:
1543 			usage(sp, gettext("unknown options"));
1544 		}
1545 	}
1546 
1547 	mhiargs = defmhiargs;
1548 
1549 	argc -= optind;
1550 	argv += optind;
1551 
1552 	if (argc > 1)
1553 		usage(sp, gettext("too many args"));
1554 
1555 	/*
1556 	 * If we have a list of timeout value overrides, handle it here
1557 	 */
1558 	while (argv[0] != NULL && *argv[0] != '\0') {
1559 		/*
1560 		 * The use of the nullopts[] "token list" here is to make
1561 		 * getsubopts() simply parse a comma separated list
1562 		 * returning either "" or the contents of the field, the
1563 		 * end condition is exaustion of the initial string, which
1564 		 * is modified in the process.
1565 		 */
1566 		(void) getsubopt(&argv[0], nullopts, &cp);
1567 
1568 		c = 0;			/* re-use c as temp value of timeout */
1569 
1570 		if (*cp != '-')		/* '-' uses default */
1571 			c = atoi(cp);
1572 
1573 		if (c < 0) {
1574 			usage(sp, gettext(
1575 			    "time out values must be > 0"));
1576 		}
1577 
1578 		if (++pos > 3) {
1579 			usage(sp, gettext(
1580 			    "too many timeout values specified."));
1581 		}
1582 
1583 		if (c == 0)		/* 0 or "" field uses default */
1584 			continue;
1585 
1586 		/*
1587 		 * Assign temp value to appropriate structure member based on
1588 		 * its position in the comma separated list.
1589 		 */
1590 		switch (pos) {
1591 			case 0:
1592 				mhiargs.mh_ff = c;
1593 				break;
1594 
1595 			case 1:
1596 				mhiargs.mh_tk.reinstate_resv_delay = c;
1597 				break;
1598 
1599 			case 2:
1600 				mhiargs.mh_tk.min_ownership_delay = c;
1601 				break;
1602 
1603 			case 3:
1604 				mhiargs.mh_tk.max_ownership_delay = c;
1605 				break;
1606 		}
1607 	}
1608 
1609 	memset(&vers, 0, sizeof (vers));
1610 
1611 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1612 	    (vers.major == 3) &&
1613 	    (cluster_take == SDSSC_False)) {
1614 
1615 		/*
1616 		 * If the take is beging done by the user via the CLI we need
1617 		 * to notify the DCS to make this current node the primary.
1618 		 * The SC3.0 reservation code will in turn exec metaset with
1619 		 * the -C take arg to complete this operation.
1620 		 */
1621 		if ((rval = sdssc_notify_service(sname, Make_Primary)) ==
1622 		    SDSSC_ERROR) {
1623 			printf(gettext(
1624 			    "metaset: failed to notify DCS of take\n"));
1625 		}
1626 		md_exit(NULL, rval == SDSSC_ERROR);
1627 	}
1628 
1629 	if ((sp = metasetname(sname, ep)) == NULL) {
1630 		mde_perror(ep, "");
1631 		md_exit(sp, 1);
1632 	}
1633 
1634 	if ((vers.major == 3) && (meta_check_ownership(sp, ep) == 0)) {
1635 
1636 		/*
1637 		 * If we're running in a cluster environment and this
1638 		 * node already owns the set. Don't bother trying to
1639 		 * take the set again. There's one case where an adminstrator
1640 		 * is adding disks to a set for the first time. metaset
1641 		 * will take the ownership of the set at that point. During
1642 		 * that add operation SC3.0 notices activity on the device
1643 		 * and also tries to perform a take operation. The SC3.0 take
1644 		 * will fail because the adminstrative add has the set locked
1645 		 */
1646 		md_exit(sp, 0);
1647 	}
1648 
1649 	if (meta_lock_nowait(sp, ep) != 0) {
1650 		mde_perror(ep, "");
1651 		md_exit(sp, 10);	/* special errcode */
1652 	}
1653 
1654 	/*
1655 	 * If a 2 is returned from meta_set_take, this take was able to resolve
1656 	 * an unresolved replicated disk (i.e. a disk is now available that
1657 	 * had been missing during the import of the replicated diskset).
1658 	 * Need to release the diskset and re-take in order to have
1659 	 * the subdrivers re-snarf using the newly resolved (or newly mapped)
1660 	 * devids.  This also allows the namespace to be updated with the
1661 	 * correct major names in the case where the disk being replicated
1662 	 * was handled by a different driver than the replicated disk.
1663 	 */
1664 	set_take_rval = meta_set_take(sp, &mhiargs, flags, usetag, &status);
1665 	if (set_take_rval == 2) {
1666 		if (meta_set_release(sp, &status)) {
1667 			mde_perror(&status,
1668 			    "Need to release and take set to resolve names.");
1669 			md_exit(sp, 1);
1670 		}
1671 		metaflushdrivenames();
1672 		metaflushsetname(sp);
1673 		set_take_rval = meta_set_take(sp, &mhiargs,
1674 		    (flags | TAKE_RETAKE), usetag, &status);
1675 	}
1676 
1677 	if (set_take_rval == -1) {
1678 		mde_perror(&status, "");
1679 		if (mdismddberror(&status, MDE_DB_TAGDATA))
1680 			md_exit(sp, 2);
1681 		if (mdismddberror(&status, MDE_DB_ACCOK))
1682 			md_exit(sp, 3);
1683 		if (mdismddberror(&status, MDE_DB_STALE))
1684 			md_exit(sp, 66);
1685 		md_exit(sp, 1);
1686 	}
1687 	md_exit(sp, 0);
1688 }
1689 
1690 /*
1691  * Joins a node to a specific set or to all multinode disksets known
1692  * by this node.  If set is specified then caller should have verified
1693  * that the set is a multinode diskset.
1694  *
1695  * If an error occurs, metaset exits with a 1.
1696  * If there is no error, metaset exits with a 0.
1697  */
1698 static void
1699 parse_joinset(int argc, char **argv)
1700 {
1701 	int		c;
1702 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1703 	char		*sname = MD_LOCAL_NAME;
1704 	md_error_t	status = mdnullerror;
1705 	md_error_t	*ep = &status;
1706 	md_set_desc	*sd;
1707 	char		buf[BUFSIZ];
1708 	char		*p = buf;
1709 	set_t		max_sets, setno;
1710 	int		err, cumm_err = 0;
1711 	size_t		bufsz;
1712 
1713 	bufsz = sizeof (buf);
1714 	/* reset and parse args */
1715 	optind = 1;
1716 	opterr = 1;
1717 	while ((c = getopt(argc, argv, "Ms:j")) != -1) {
1718 		switch (c) {
1719 		case 'M':
1720 			break;
1721 		case 'j':
1722 			break;
1723 		case 's':
1724 			sname = optarg;
1725 			break;
1726 		default:
1727 			usage(sp, gettext("unknown options"));
1728 		}
1729 	}
1730 
1731 	argc -= optind;
1732 	argv += optind;
1733 
1734 	if (argc > 1)
1735 		usage(sp, gettext("too many args"));
1736 
1737 	/*
1738 	 * If no setname option was used, then join all disksets
1739 	 * that this node knows about.   Attempt to join all
1740 	 * disksets that this node knows about.
1741 	 *
1742 	 * Additional text is added to the error messages during
1743 	 * this section of code in order to help the user understand
1744 	 * why the 'join of all sets' failed and which set caused
1745 	 * the failure.
1746 	 */
1747 
1748 	/*
1749 	 * Hold local set lock throughout this call to keep
1750 	 * other actions from interfering (such as creating a new
1751 	 * set, etc.).
1752 	 */
1753 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1754 		mde_perror(ep, "");
1755 		md_exit(sp, 1);
1756 	}
1757 
1758 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1759 		mde_perror(ep, "");
1760 		md_exit(local_sp, 1);
1761 	}
1762 
1763 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1764 		/*
1765 		 * If no set name is given, then walk through all sets
1766 		 * on this node which could include:
1767 		 * 	- MN disksets
1768 		 *	- traditional disksets
1769 		 *	- non-existent disksets
1770 		 * Attempt to join the MN disksets.
1771 		 * If the join of one set fails, print out an error message
1772 		 * about that set and continue the walk.
1773 		 */
1774 		if ((max_sets = get_max_sets(ep)) == 0) {
1775 			mde_perror(ep, "");
1776 			md_exit(local_sp, 1);
1777 		}
1778 
1779 		/* Start walking through all possible disksets */
1780 		for (setno = 1; setno < max_sets; setno++) {
1781 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1782 				if (mdiserror(ep, MDE_NO_SET)) {
1783 					/* No set for this setno - continue */
1784 					mdclrerror(ep);
1785 					continue;
1786 				} else {
1787 					(void) sprintf(p, gettext(
1788 					"Unable to get set %d information"),
1789 					    setno);
1790 					mde_perror(ep, p);
1791 					cumm_err = 1;
1792 					mdclrerror(ep);
1793 					continue;
1794 				}
1795 			}
1796 
1797 			/* If setname is there, set desc should exist. */
1798 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1799 				(void) snprintf(p, bufsz, gettext(
1800 				    "Unable to get set %s desc information"),
1801 				    sp->setname);
1802 				mde_perror(ep, p);
1803 				cumm_err = 1;
1804 				mdclrerror(ep);
1805 				continue;
1806 			}
1807 
1808 			/* Only check MN disksets */
1809 			if (!MD_MNSET_DESC(sd)) {
1810 				continue;
1811 			}
1812 
1813 			/*
1814 			 * Return value of 0 is success.
1815 			 * Return value of -1 means a failure.
1816 			 * Return value of -2 means set could not be
1817 			 * joined, but shouldn't cause an error.
1818 			 * Reasons would be:
1819 			 * 	- no drives in set
1820 			 * 	- node already joined to set
1821 			 * Return value of -3 means joined stale set.
1822 			 * Can't check for all reasons here
1823 			 * since set isn't locked yet across all
1824 			 * nodes in the cluster.  The call
1825 			 * to libmeta routine, meta_set_join, will
1826 			 * lock across the cluster and perform
1827 			 * the checks.
1828 			 */
1829 			if ((err = meta_set_join(sp, ep)) == -1) {
1830 				/* Print error of diskset join failure */
1831 				(void) snprintf(p, bufsz,
1832 				    gettext("Join to diskset %s failed"),
1833 				    sp->setname);
1834 				mde_perror(ep, p);
1835 				cumm_err = 1;
1836 				mdclrerror(ep);
1837 				continue;
1838 			}
1839 
1840 			if (err == -3) {
1841 				/* Print error of diskset join failure */
1842 				(void) snprintf(p, bufsz,
1843 				    gettext("Joined to stale diskset %s"),
1844 				    sp->setname);
1845 				mde_perror(ep, p);
1846 				mdclrerror(ep);
1847 			}
1848 
1849 			mdclrerror(ep);
1850 		}
1851 
1852 		md_exit(local_sp, cumm_err);
1853 	}
1854 
1855 	/*
1856 	 * Code for a specific set is much simpler.
1857 	 * Error messages don't need extra text since specific setname
1858 	 * was used.
1859 	 * Don't need to lock the local set, just the specific set given.
1860 	 */
1861 	if ((sp = metasetname(sname, ep)) == NULL) {
1862 		mde_perror(ep, "");
1863 		md_exit(local_sp, 1);
1864 	}
1865 
1866 	/*
1867 	 * Fail command if meta_set_join returns -1.
1868 	 *
1869 	 * Return of 0 means that node joined set.
1870 	 *
1871 	 * Return of -2 means that node was unable to
1872 	 * join a set since that set had no drives
1873 	 * or that had already joined the set.  No
1874 	 * need to fail the command for these reasons.
1875 	 *
1876 	 * Return of -3 means that set is stale.
1877 	 * Return a value of 66 to historically match traditional disksets.
1878 	 */
1879 	if ((err = meta_set_join(sp, ep)) == -1) {
1880 		mde_perror(&status, "");
1881 		md_exit(local_sp, 1);
1882 	}
1883 
1884 	if (err == -3) {
1885 		/* Print error of diskset join failure */
1886 		(void) snprintf(p, bufsz,
1887 		    gettext("Joined to stale diskset %s"),
1888 		    sp->setname);
1889 		mde_perror(&status, "");
1890 		md_exit(local_sp, 66);
1891 	}
1892 
1893 	md_exit(local_sp, 0);
1894 }
1895 
1896 /*
1897  * Withdraws a node from a specific set or from all multinode disksets known
1898  * by this node.  If set is specified then caller should have verified
1899  * that the set is a multinode diskset.
1900  *
1901  * If an error occurs, metaset exits with a 1.
1902  * If there is no error, metaset exits with a 0.
1903  */
1904 static void
1905 parse_withdrawset(int argc, char **argv)
1906 {
1907 	int		c;
1908 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1909 	char		*sname = MD_LOCAL_NAME;
1910 	md_error_t	status = mdnullerror;
1911 	md_error_t	*ep = &status;
1912 	char		buf[BUFSIZ];
1913 	char		*p = buf;
1914 	md_set_desc	*sd;
1915 	set_t		max_sets, setno;
1916 	int		err, cumm_err = 0;
1917 	size_t		bufsz;
1918 
1919 	bufsz = sizeof (buf);
1920 	/* reset and parse args */
1921 	optind = 1;
1922 	opterr = 1;
1923 	while ((c = getopt(argc, argv, "Ms:w")) != -1) {
1924 		switch (c) {
1925 		case 'M':
1926 			break;
1927 		case 'w':
1928 			break;
1929 		case 's':
1930 			sname = optarg;
1931 			break;
1932 		default:
1933 			usage(sp, gettext("unknown options"));
1934 		}
1935 	}
1936 
1937 	argc -= optind;
1938 	argv += optind;
1939 
1940 	if (argc > 1)
1941 		usage(sp, gettext("too many args"));
1942 
1943 	/*
1944 	 * If no setname option was used, then withdraw from all disksets
1945 	 * that this node knows about.
1946 	 *
1947 	 * Additional text is added to the error messages during
1948 	 * this section of code in order to help the user understand
1949 	 * why the 'withdraw from all sets' failed and which set caused
1950 	 * the failure.
1951 	 */
1952 
1953 	/*
1954 	 * Hold local set lock throughout this call to keep
1955 	 * other actions from interfering (such as creating a new
1956 	 * set, etc.).
1957 	 */
1958 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1959 		mde_perror(ep, "");
1960 		md_exit(sp, 1);
1961 	}
1962 
1963 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1964 		mde_perror(ep, "");
1965 		md_exit(local_sp, 1);
1966 	}
1967 
1968 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1969 		/*
1970 		 * If no set name is given, then walk through all sets
1971 		 * on this node which could include:
1972 		 * 	- MN disksets
1973 		 *	- traditional disksets
1974 		 *	- non-existent disksets
1975 		 * Attempt to withdraw from the MN disksets.
1976 		 * If the withdraw of one set fails, print out an error
1977 		 * message about that set and continue the walk.
1978 		 */
1979 		if ((max_sets = get_max_sets(ep)) == 0) {
1980 			mde_perror(ep, "");
1981 			md_exit(local_sp, 1);
1982 		}
1983 
1984 		/* Start walking through all possible disksets */
1985 		for (setno = 1; setno < max_sets; setno++) {
1986 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1987 				if (mdiserror(ep, MDE_NO_SET)) {
1988 					/* No set for this setno - continue */
1989 					mdclrerror(ep);
1990 					continue;
1991 				} else {
1992 					(void) sprintf(p, gettext(
1993 					    "Unable to get set %d information"),
1994 					    setno);
1995 					mde_perror(ep, p);
1996 					cumm_err = 1;
1997 					mdclrerror(ep);
1998 					continue;
1999 				}
2000 			}
2001 
2002 			/* If setname is there, set desc should exist. */
2003 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2004 				(void) snprintf(p, bufsz, gettext(
2005 				    "Unable to get set %s desc information"),
2006 				    sp->setname);
2007 				mde_perror(ep, p);
2008 				cumm_err = 1;
2009 				mdclrerror(ep);
2010 				continue;
2011 			}
2012 
2013 			/* Only check MN disksets */
2014 			if (!MD_MNSET_DESC(sd)) {
2015 				continue;
2016 			}
2017 
2018 			/*
2019 			 * Return value of 0 is success.
2020 			 * Return value of -1 means a failure.
2021 			 * Return value of -2 means set could not be
2022 			 * withdrawn from, but this shouldn't cause
2023 			 * an error.  Reasons would be:
2024 			 * 	- no drives in set
2025 			 * 	- node already withdrawn from set
2026 			 * Can't check for all reasons here
2027 			 * since set isn't locked yet across all
2028 			 * nodes in the cluster.  The call
2029 			 * to libmeta routine, meta_set_withdraw, will
2030 			 * lock across the cluster and perform
2031 			 * the checks.
2032 			 */
2033 			if ((err = meta_set_withdraw(sp, ep)) == -1) {
2034 				/* Print error of diskset withdraw failure */
2035 				(void) snprintf(p, bufsz,
2036 				    gettext("Withdraw from diskset %s failed"),
2037 				    sp->setname);
2038 				mde_perror(ep, p);
2039 				mdclrerror(ep);
2040 				cumm_err = 1;
2041 				continue;
2042 			}
2043 
2044 			if (err == -2) {
2045 				mdclrerror(ep);
2046 				continue;
2047 			}
2048 
2049 			mdclrerror(ep);
2050 		}
2051 		md_exit(local_sp, cumm_err);
2052 	}
2053 
2054 
2055 	/*
2056 	 * Code for a specific set is much simpler.
2057 	 * Error messages don't need extra text since specific setname
2058 	 * was used.
2059 	 * Don't need to lock the local set, just the specific set given.
2060 	 */
2061 	if ((sp = metasetname(sname, ep)) == NULL) {
2062 		mde_perror(ep, "");
2063 		md_exit(local_sp, 1);
2064 	}
2065 
2066 	/*
2067 	 * Fail command if meta_set_withdraw returns -1.
2068 	 *
2069 	 * Return of 0 means that node withdrew from set.
2070 	 *
2071 	 * Return of -2 means that node was unable to
2072 	 * withdraw from a set since that set had no drives
2073 	 * or node was not joined to set.  No
2074 	 * need to fail the command for these reasons.
2075 	 */
2076 	if (meta_set_withdraw(sp, ep) == -1) {
2077 		mde_perror(&status, "");
2078 		md_exit(local_sp, 1);
2079 	}
2080 
2081 	md_exit(local_sp, 0);
2082 }
2083 
2084 static void
2085 parse_cluster(int argc, char **argv, int multi_node)
2086 {
2087 	int			c, error, new_argc, x;
2088 	enum cluster_cmd	cmd = ccnotspecified;
2089 	char			*hostname = SDSSC_PROXY_PRIMARY;
2090 	char			*argument = NULL;
2091 	char			*sname = MD_LOCAL_NAME;
2092 	char			primary_node[SDSSC_NODE_NAME_LEN];
2093 	char			**new_argv = NULL;
2094 	char			**np = NULL;
2095 	mdsetname_t		*sp = NULL;
2096 	md_error_t		status = mdnullerror;
2097 	md_error_t		*ep = &status;
2098 
2099 	/* reset and parse args */
2100 	optind = 1;
2101 	opterr = 1;
2102 	while ((c = getopt(argc, argv, "C:s:h:ftu:yr")) != -1) {
2103 		switch (c) {
2104 		case 'C':
2105 			if (cmd != ccnotspecified) {
2106 				md_exit(sp, -1);
2107 			}
2108 			argument = optarg;
2109 
2110 			if (strcmp(argument, "disksin") == 0) {
2111 				cmd = clusterdisksin;
2112 			} else if (strcmp(argument, "version") == 0) {
2113 				cmd = clusterversion;
2114 			} else if (strcmp(argument, "release") == 0) {
2115 				cmd = clusterrelease;
2116 			} else if (strcmp(argument, "take") == 0) {
2117 				cmd = clustertake;
2118 			} else if (strcmp(argument, "proxy") == 0) {
2119 				cmd = clusterproxy;
2120 			} else if (strcmp(argument, "purge") == 0) {
2121 				cmd = clusterpurge;
2122 			} else {
2123 				md_exit(sp, -1);
2124 			}
2125 
2126 			break;
2127 
2128 		case 'h':
2129 			hostname = optarg;
2130 			break;
2131 
2132 		case 's':
2133 			sname = optarg;
2134 			break;
2135 
2136 		case 'f':
2137 		case 't':
2138 		case 'u':
2139 		case 'y':
2140 		case 'r':
2141 			break;
2142 
2143 		default:
2144 			md_exit(sp, -1);
2145 		}
2146 	}
2147 
2148 	/* Now call the appropriate command function. */
2149 	switch (cmd) {
2150 	case clusterversion:
2151 		printclusterversion();
2152 		break;
2153 
2154 	case clusterdisksin:
2155 		if (printdisksin(sname, ep)) {
2156 			md_exit(sp, -1);
2157 		}
2158 		break;
2159 
2160 	case clusterrelease:
2161 		if (multi_node) {
2162 			usage(sp, gettext(
2163 			    "-C release is not allowed on multi-owner"
2164 			    " disksets"));
2165 		}
2166 		parse_releaseset(argc, argv);
2167 		break;
2168 
2169 	case clustertake:
2170 		if (multi_node) {
2171 			usage(sp, gettext(
2172 			    "-C take is not allowed on multi-owner disksets"));
2173 		}
2174 		parse_takeset(argc, argv);
2175 		break;
2176 
2177 	case clusterproxy:
2178 		if (multi_node) {
2179 			usage(sp, gettext(
2180 			    "-C proxy is not allowed on multi-owner disksets"));
2181 		}
2182 
2183 		if ((new_argv = calloc(argc, sizeof (char *))) == NULL) {
2184 			printf(gettext("Out of memory\n"));
2185 			md_exit(sp, 1);
2186 		}
2187 
2188 		np = new_argv;
2189 		new_argc = 0;
2190 		memset(primary_node, '\0', SDSSC_NODE_NAME_LEN);
2191 
2192 		for (x = 0; x < argc; x++) {
2193 			if (strcmp(argv[x], "-C") == 0) {
2194 
2195 				/*
2196 				 * Need to skip the '-C proxy' args so
2197 				 * just increase x by one and the work is
2198 				 * done.
2199 				 */
2200 				x++;
2201 			} else {
2202 				*np++ = strdup(argv[x]);
2203 				new_argc++;
2204 			}
2205 		}
2206 
2207 		switch (sdssc_get_primary_host(sname, primary_node,
2208 		    SDSSC_NODE_NAME_LEN)) {
2209 		case SDSSC_ERROR:
2210 			md_exit(sp, 1);
2211 			break;
2212 
2213 		case SDSSC_NO_SERVICE:
2214 			if (hostname != SDSSC_PROXY_PRIMARY) {
2215 				(void) strlcpy(primary_node, hostname,
2216 				    SDSSC_NODE_NAME_LEN);
2217 			}
2218 			break;
2219 		}
2220 
2221 		if (sdssc_cmd_proxy(new_argc, new_argv,
2222 		    primary_node[0] == '\0' ? SDSSC_PROXY_PRIMARY :
2223 		    primary_node, &error) == SDSSC_PROXY_DONE) {
2224 			md_exit(sp, error);
2225 		} else {
2226 			printf(gettext(
2227 			    "Couldn't proxy command\n"));
2228 			md_exit(sp, 1);
2229 		}
2230 		break;
2231 
2232 	case clusterpurge:
2233 		parse_purge(argc, argv);
2234 		break;
2235 
2236 	default:
2237 		break;
2238 	}
2239 
2240 	md_exit(sp, 0);
2241 }
2242 
2243 /*
2244  * parse args and do it
2245  */
2246 int
2247 main(int argc, char *argv[])
2248 {
2249 	enum metaset_cmd	cmd = notspecified;
2250 	md_error_t		status = mdnullerror;
2251 	md_error_t		*ep = &status;
2252 	mdsetname_t		*sp = NULL;
2253 	char			*hostname = SDSSC_PROXY_PRIMARY;
2254 	char			*sname = MD_LOCAL_NAME;
2255 	char			*auto_take_option = NULL;
2256 	char			primary_node[SDSSC_NODE_NAME_LEN];
2257 	int			error, c, stat;
2258 	int			auto_take = FALSE;
2259 	md_set_desc		*sd;
2260 	int			mflag = 0;
2261 	int			multi_node = 0;
2262 	rval_e			sdssc_res;
2263 
2264 	/*
2265 	 * Get the locale set up before calling any other routines
2266 	 * with messages to ouput.  Just in case we're not in a build
2267 	 * environment, make sure that TEXT_DOMAIN gets set to
2268 	 * something.
2269 	 */
2270 #if !defined(TEXT_DOMAIN)
2271 #define	TEXT_DOMAIN "SYS_TEST"
2272 #endif
2273 	(void) setlocale(LC_ALL, "");
2274 	(void) textdomain(TEXT_DOMAIN);
2275 
2276 	sdssc_res = sdssc_bind_library();
2277 	if (sdssc_res == SDSSC_ERROR) {
2278 		printf(gettext(
2279 		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
2280 		exit(1);
2281 	}
2282 
2283 	/* initialize */
2284 	if (md_init(argc, argv, 0, 1, ep) != 0) {
2285 		mde_perror(ep, "");
2286 		md_exit(sp, 1);
2287 	}
2288 
2289 	optind = 1;
2290 	opterr = 1;
2291 
2292 	/*
2293 	 * NOTE: The "C" option is strictly for cluster use. it is not
2294 	 * and should not be documented for the customer. - JST
2295 	 */
2296 	while ((c = getopt(argc, argv, "C:MaA:bdfh:jl:Lm:oPqrs:tu:wy?"))
2297 	    != -1) {
2298 		switch (c) {
2299 		case 'M':
2300 			mflag = 1;
2301 			break;
2302 		case 'A':
2303 			auto_take = TRUE;
2304 			if (optarg == NULL || !(strcmp(optarg, "enable") == 0 ||
2305 			    strcmp(optarg, "disable") == 0))
2306 				usage(sp, gettext(
2307 				    "-A: enable or disable must be specified"));
2308 			auto_take_option = optarg;
2309 			break;
2310 		case 'a':
2311 			if (cmd != notspecified) {
2312 				usage(sp, gettext(
2313 				    "conflicting options"));
2314 			}
2315 			cmd = add;
2316 			break;
2317 		case 'b':
2318 			if (cmd != notspecified) {
2319 				usage(sp, gettext(
2320 				    "conflicting options"));
2321 			}
2322 			cmd = balance;
2323 			break;
2324 		case 'd':
2325 			if (cmd != notspecified) {
2326 				usage(sp, gettext(
2327 				    "conflicting options"));
2328 			}
2329 			cmd = delete;
2330 			break;
2331 		case 'C':	/* cluster commands */
2332 			if (cmd != notspecified) {
2333 				md_exit(sp, -1);    /* conflicting options */
2334 			}
2335 			cmd = cluster;
2336 			break;
2337 		case 'f':
2338 			break;
2339 		case 'h':
2340 			hostname = optarg;
2341 			break;
2342 		case 'j':
2343 			if (cmd != notspecified) {
2344 				usage(sp, gettext(
2345 				    "conflicting options"));
2346 			}
2347 			cmd = join;
2348 			break;
2349 		case 'l':
2350 			break;
2351 		case 'L':
2352 			break;
2353 		case 'm':
2354 			break;
2355 		case 'o':
2356 			if (cmd != notspecified) {
2357 				usage(sp, gettext(
2358 				    "conflicting options"));
2359 			}
2360 			cmd = isowner;
2361 			break;
2362 		case 'P':
2363 			if (cmd != notspecified) {
2364 				usage(sp, gettext(
2365 				    "conflicting options"));
2366 			}
2367 			cmd = purge;
2368 			break;
2369 		case 'q':
2370 			if (cmd != notspecified) {
2371 				usage(sp, gettext(
2372 				    "conflicting options"));
2373 			}
2374 			cmd = query;
2375 			break;
2376 		case 'r':
2377 			if (cmd != notspecified) {
2378 				usage(sp, gettext(
2379 				    "conflicting options"));
2380 			}
2381 			cmd = release;
2382 			break;
2383 		case 's':
2384 			sname = optarg;
2385 			break;
2386 		case 't':
2387 			if (cmd != notspecified) {
2388 				usage(sp, gettext(
2389 				    "conflicting options"));
2390 			}
2391 			cmd = take;
2392 			break;
2393 		case 'u':
2394 			break;
2395 		case 'w':
2396 			if (cmd != notspecified) {
2397 				usage(sp, gettext(
2398 				    "conflicting options"));
2399 			}
2400 			cmd = withdraw;
2401 			break;
2402 		case 'y':
2403 			break;
2404 		case '?':
2405 			if (optopt == '?')
2406 				usage(sp, NULL);
2407 			/*FALLTHROUGH*/
2408 		default:
2409 			if (cmd == cluster) {    /* cluster is silent */
2410 				md_exit(sp, -1);
2411 			} else {
2412 				usage(sp, gettext(
2413 				    "unknown command"));
2414 			}
2415 		}
2416 	}
2417 
2418 	/* check if suncluster is installed and -A enable specified */
2419 	if (auto_take && sdssc_res != SDSSC_NOT_BOUND &&
2420 	    strcmp(auto_take_option, "enable") == 0) {
2421 		md_eprintf(gettext(
2422 		    "cannot enable auto-take when SunCluster is installed\n"));
2423 		md_exit(sp, 1);
2424 	}
2425 
2426 	/*
2427 	 * At this point we know that if the -A enable option is specified
2428 	 * for an auto-take diskset that SC is not installed on the machine, so
2429 	 * all of the sdssc calls will just be no-ops.
2430 	 */
2431 
2432 	/* list sets */
2433 	if (cmd == notspecified && auto_take == FALSE) {
2434 		parse_printset(argc, argv);
2435 		/*NOTREACHED*/
2436 	}
2437 
2438 	if (meta_check_root(ep) != 0) {
2439 		mde_perror(ep, "");
2440 		md_exit(sp, 1);
2441 	}
2442 
2443 	/* snarf MDDB */
2444 	if (meta_setup_db_locations(ep) != 0) {
2445 		mde_perror(ep, "");
2446 		md_exit(sp, 1);
2447 	}
2448 
2449 	/*
2450 	 * If sname is a diskset - check for multi_node.
2451 	 * It is possible for sname to not exist.
2452 	 */
2453 	if (strcmp(sname, MD_LOCAL_NAME)) {
2454 		if ((sp = metasetname(sname, ep)) != NULL) {
2455 			/* Set exists - check for MN diskset */
2456 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2457 				mde_perror(ep, "");
2458 				md_exit(sp, 1);
2459 			}
2460 			if (MD_MNSET_DESC(sd)) {
2461 				/*
2462 				 * If a MN diskset always set multi_node
2463 				 * regardless of whether the -M option was
2464 				 * used or not (mflag).
2465 				 */
2466 				multi_node = 1;
2467 			} else {
2468 				/*
2469 				 * If a traditional diskset, mflag must
2470 				 * not be set.
2471 				 */
2472 				if (mflag) {
2473 					usage(sp, gettext(
2474 					    "-M option only allowed "
2475 					    "on multi-owner diskset"));
2476 				}
2477 			}
2478 		} else {
2479 			/*
2480 			 * Set name does not exist, set multi_node
2481 			 * based on -M option.
2482 			 */
2483 			if (mflag) {
2484 				multi_node = 1;
2485 			}
2486 		}
2487 	}
2488 
2489 	if (auto_take && multi_node) {
2490 		/* Can't mix multinode and auto-take on a diskset */
2491 		usage(sp,
2492 		    gettext("-A option not allowed on multi-owner diskset"));
2493 	}
2494 
2495 	/*
2496 	 * MN disksets don't use DCS clustering services, so
2497 	 * do not get primary_node for MN diskset since no command
2498 	 * proxying is done to Primary cluster node.  Do not proxy
2499 	 * MN diskset commands of join and withdraw when issued without
2500 	 * a valid setname.
2501 	 * For traditional disksets: proxy all commands except a take
2502 	 * and release.  Use first host listed as the host to send the
2503 	 * command to if there isn't already a primary
2504 	 */
2505 	if (strcmp(sname, MD_LOCAL_NAME) && (multi_node == 0) &&
2506 	    (cmd != take) && (cmd != release) &&
2507 	    (cmd != cluster) && (cmd != join) &&
2508 	    (cmd != withdraw) && (cmd != purge)) {
2509 		stat = sdssc_get_primary_host(sname, primary_node,
2510 		    SDSSC_NODE_NAME_LEN);
2511 		switch (stat) {
2512 			case SDSSC_ERROR:
2513 				return (0);
2514 
2515 			case SDSSC_NO_SERVICE:
2516 				if (hostname != SDSSC_PROXY_PRIMARY) {
2517 					(void) strlcpy(primary_node, hostname,
2518 					    SDSSC_NODE_NAME_LEN);
2519 				} else {
2520 					memset(primary_node, '\0',
2521 					    SDSSC_NODE_NAME_LEN);
2522 				}
2523 				break;
2524 		}
2525 
2526 		/*
2527 		 * We've got a complicated decision here regarding
2528 		 * the hostname. If we didn't get a primary host
2529 		 * and a host name wasn't supplied on the command line
2530 		 * then we need to revert to SDSSC_PROXY_PRIMARY. Otherwise
2531 		 * use what's been found.
2532 		 */
2533 		if (sdssc_cmd_proxy(argc, argv,
2534 		    primary_node[0] == '\0' ?
2535 		    SDSSC_PROXY_PRIMARY : primary_node,
2536 		    &error) == SDSSC_PROXY_DONE) {
2537 			exit(error);
2538 		}
2539 	}
2540 
2541 	/* cluster-specific commands */
2542 	if (cmd == cluster) {
2543 		parse_cluster(argc, argv, multi_node);
2544 		/*NOTREACHED*/
2545 	}
2546 
2547 	/* join MultiNode diskset */
2548 	if (cmd == join) {
2549 		/*
2550 		 * If diskset specified, verify that it exists
2551 		 * and is a multinode diskset.
2552 		 */
2553 		if (strcmp(sname, MD_LOCAL_NAME)) {
2554 			if ((sp = metasetname(sname, ep)) == NULL) {
2555 				mde_perror(ep, "");
2556 				md_exit(sp, 1);
2557 			}
2558 
2559 			if (!multi_node) {
2560 				usage(sp, gettext(
2561 				    "-j option only allowed on "
2562 				    "multi-owner diskset"));
2563 			}
2564 		}
2565 		/*
2566 		 * Start mddoors daemon here.
2567 		 * mddoors itself takes care there will be only one
2568 		 * instance running, so starting it twice won't hurt
2569 		 */
2570 		pclose(popen("/usr/lib/lvm/mddoors", "w"));
2571 		parse_joinset(argc, argv);
2572 		/*NOTREACHED*/
2573 	}
2574 
2575 	/* withdraw from MultiNode diskset */
2576 	if (cmd == withdraw) {
2577 		/*
2578 		 * If diskset specified, verify that it exists
2579 		 * and is a multinode diskset.
2580 		 */
2581 		if (strcmp(sname, MD_LOCAL_NAME)) {
2582 			if ((sp = metasetname(sname, ep)) == NULL) {
2583 				mde_perror(ep, "");
2584 				md_exit(sp, 1);
2585 			}
2586 
2587 			if (!multi_node) {
2588 				usage(sp, gettext(
2589 				    "-w option only allowed on "
2590 				    "multi-owner diskset"));
2591 			}
2592 		}
2593 		parse_withdrawset(argc, argv);
2594 		/*NOTREACHED*/
2595 	}
2596 
2597 	/* must have set for everything else */
2598 	if (strcmp(sname, MD_LOCAL_NAME) == 0)
2599 		usage(sp, gettext("setname must be specified"));
2600 
2601 	/* add hosts or drives */
2602 	if (cmd == add) {
2603 		/*
2604 		 * In the multi node case start mddoors daemon.
2605 		 * mddoors itself takes care there will be
2606 		 * only one instance running, so starting it twice won't hurt
2607 		 */
2608 		if (multi_node) {
2609 			pclose(popen("/usr/lib/lvm/mddoors", "w"));
2610 		}
2611 
2612 		parse_add(argc, argv);
2613 		/*NOTREACHED*/
2614 	}
2615 
2616 	/* re-balance the replicas */
2617 	if (cmd == balance) {
2618 		parse_balance(argc, argv);
2619 		/*NOTREACHED*/
2620 	}
2621 
2622 	/* delete hosts or drives */
2623 	if (cmd == delete) {
2624 		parse_del(argc, argv);
2625 		/*NOTREACHED*/
2626 	}
2627 
2628 	/* check ownership */
2629 	if (cmd == isowner) {
2630 		parse_isowner(argc, argv);
2631 		/*NOTREACHED*/
2632 	}
2633 
2634 	/* purge the diskset */
2635 	if (cmd == purge) {
2636 		parse_purge(argc, argv);
2637 		/*NOTREACHED*/
2638 	}
2639 
2640 	/* query for data marks */
2641 	if (cmd == query) {
2642 		parse_query(argc, argv);
2643 		/*NOTREACHED*/
2644 	}
2645 
2646 	/* release ownership */
2647 	if (cmd == release) {
2648 		if (multi_node) {
2649 			/* Can't release multinode diskset */
2650 			usage(sp, gettext(
2651 			    "-r option not allowed on multi-owner diskset"));
2652 		} else {
2653 			parse_releaseset(argc, argv);
2654 			/*NOTREACHED*/
2655 		}
2656 	}
2657 
2658 	/* take ownership */
2659 	if (cmd == take) {
2660 		if (multi_node) {
2661 			/* Can't take multinode diskset */
2662 			usage(sp, gettext(
2663 			    "-t option not allowed on multi-owner diskset"));
2664 		} else {
2665 			parse_takeset(argc, argv);
2666 			/*NOTREACHED*/
2667 		}
2668 	}
2669 
2670 	/* take ownership of auto-take sets */
2671 	if (auto_take) {
2672 		parse_autotake(argc, argv);
2673 		/*NOTREACHED*/
2674 	}
2675 
2676 	/*NOTREACHED*/
2677 	return (0);
2678 }
2679