xref: /titanic_41/usr/src/cmd/lvm/util/metaset.c (revision 3e14f97f673e8a630f076077de35afdd43dc1587)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Metadevice diskset utility.
28  */
29 
30 #include <meta.h>
31 #include <sys/lvm/md_mddb.h>
32 #include <sdssc.h>
33 
34 enum metaset_cmd {
35 	notspecified,
36 	add,
37 	balance,
38 	delete,
39 	cluster,
40 	isowner,
41 	purge,
42 	query,
43 	release,
44 	take,
45 	join,			/* Join a multinode diskset */
46 	withdraw		/* Withdraw from a multinode diskset */
47 };
48 
49 enum cluster_cmd {
50 	ccnotspecified,
51 	clusterversion,		/* Return the version of the cluster I/F */
52 	clusterdisksin,		/* List disks in a given diskset */
53 	clustertake,		/* back door for Cluster take */
54 	clusterrelease,		/* ditto */
55 	clusterpurge,		/* back door for Cluster purge */
56 	clusterproxy		/* proxy the args after '--' to primary */
57 };
58 
59 static void
60 usage(
61 	mdsetname_t	*sp,
62 	char		*string)
63 {
64 	if ((string != NULL) && (*string != '\0'))
65 		md_eprintf("%s\n", string);
66 	(void) fprintf(stderr, gettext(
67 	    "usage:\t%s -s setname -a [-A enable | disable] -h hostname ...\n"
68 	    "	%s -s setname -a [-M] -h hostname ...\n"
69 	    "	%s -s setname -a [-M] [-l length] [-L] drivename ...\n"
70 	    "	%s -s setname -d [-M] -h hostname ...\n"
71 	    "	%s -s setname -d [-M] -f -h all-hostnames\n"
72 	    "	%s -s setname -d [-M] [-f] drivename ...\n"
73 	    "	%s -s setname -d [-M] [-f] hostname ...\n"
74 	    "	%s -s setname -A enable | disable\n"
75 	    "	%s -s setname -t [-f]\n"
76 	    "	%s -s setname -r\n"
77 	    "	%s [-s setname] -j [-M]\n"
78 	    "	%s [-s setname] -w [-M]\n"
79 	    "	%s -s setname -P [-M]\n"
80 	    "	%s -s setname -b [-M]\n"
81 	    "	%s -s setname -o [-M] [-h hostname]\n"
82 	    "	%s [-s setname]\n"
83 	    "\n"
84 	    "		hostname = contents of /etc/nodename\n"
85 	    "		drivename = cNtNdN no slice\n"
86 	    "		[-M] for multi-owner set is optional except"
87 	    " on set creation\n"),
88 	    myname, myname, myname, myname, myname, myname, myname, myname,
89 	    myname, myname, myname, myname, myname, myname, myname, myname);
90 	md_exit(sp, (string == NULL) ? 0 : 1);
91 }
92 
93 /*
94  * The svm.sync rc script relies heavily on the metaset output.
95  * Any changes to the metaset output MUST verify that the rc script
96  * does not break. Not doing so may potentially leave the system
97  * unusable. You have been WARNED.
98  */
99 static int
100 printset(mdsetname_t *sp, md_error_t *ep)
101 {
102 	int			i, j;
103 	md_set_desc		*sd;
104 	md_drive_desc		*dd, *p;
105 	int			max_meds;
106 	md_mnnode_desc		*nd;
107 
108 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
109 		return (-1);
110 
111 	/*
112 	 * Only get set owner information for traditional diskset.
113 	 * This set owner information is stored in the node records
114 	 * for a MN diskset.
115 	 */
116 	if (!(MD_MNSET_DESC(sd))) {
117 		if (metaget_setownership(sp, ep) == -1)
118 			return (-1);
119 	}
120 
121 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
122 	    ep)) == NULL) && !mdisok(ep))
123 		return (-1);
124 
125 	if (MD_MNSET_DESC(sd)) {
126 		(void) printf(gettext(
127 		"\nMulti-owner Set name = %s, Set number = %d, Master = %s\n"),
128 		    sp->setname, sp->setno, sd->sd_mn_master_nodenm);
129 		if ((sd->sd_mn_master_nodeid == MD_MN_INVALID_NID) &&
130 		    (dd != NULL)) {
131 			(void) printf(gettext(
132 			    "Master and owner information unavailable "
133 			    "until joined (metaset -j)\n"));
134 		}
135 	} else {
136 		(void) printf(gettext(
137 		    "\nSet name = %s, Set number = %d\n"),
138 		    sp->setname, sp->setno);
139 	}
140 
141 	if (MD_MNSET_DESC(sd)) {
142 		(void) printf(gettext("\n%-19.19s %-14.14s %-6.6s\n"),
143 		    gettext("Host"), gettext("Owner"), gettext("Member"));
144 		nd = sd->sd_nodelist;
145 		while (nd) {
146 			/*
147 			 * Don't print nodes that aren't ok since they may be
148 			 * removed from config during a reconfig cycle.  If a
149 			 * node was being added to a diskset and the entire
150 			 * cluster went down but the node being added was unable
151 			 * to reboot, there's no way to know if that node had
152 			 * its own node record set to OK or not.  So, node
153 			 * record is left in ADD state during reconfig cycle.
154 			 * When that node reboots and returns to the cluster,
155 			 * the reconfig cycle will either remove the node
156 			 * record (if not marked OK on that node) or will mark
157 			 * it OK on all nodes.
158 			 * It is very important to only remove a node record
159 			 * from the other nodes when that node record is not
160 			 * marked OK on its own node - otherwise, different
161 			 * nodes would have different nodelists possibly
162 			 * causing different nodes to to choose different
163 			 * masters.
164 			 */
165 			if (!(nd->nd_flags & MD_MN_NODE_OK)) {
166 				nd = nd->nd_next;
167 				continue;
168 			}
169 			if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
170 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
171 				(void) printf(
172 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
173 				    nd->nd_nodename, gettext("multi-owner"),
174 				    gettext("Yes"));
175 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
176 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
177 				/* Should never be able to happen */
178 				(void) printf(
179 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
180 				    nd->nd_nodename, gettext("multi-owner"),
181 				    gettext("No"));
182 			} else if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
183 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
184 				(void) printf(
185 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
186 				    nd->nd_nodename, gettext(""),
187 				    gettext("Yes"));
188 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
189 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
190 				(void) printf(
191 				    gettext("  %-17.17s  %-12.12s  %-4.4s\n"),
192 				    nd->nd_nodename, gettext(""),
193 				    gettext("No"));
194 			}
195 			nd = nd->nd_next;
196 		}
197 	} else {
198 		(void) printf("\n%-19.19s %-5.5s\n",
199 		    gettext("Host"), gettext("Owner"));
200 		for (i = 0; i < MD_MAXSIDES; i++) {
201 			/* Skip empty slots */
202 			if (sd->sd_nodes[i][0] == '\0')
203 				continue;
204 
205 			/*
206 			 * Standard hostname field is 17 bytes but metaset will
207 			 * display up to MD_MAX_NODENAME, def in meta_basic.h
208 			 */
209 			(void) printf("  %-17.*s  %s\n", MD_MAX_NODENAME,
210 			    sd->sd_nodes[i], (sd->sd_flags & MD_SR_AUTO_TAKE ?
211 			    (sd->sd_isown[i] ? gettext("Yes (auto)") :
212 			    gettext("No (auto)"))
213 			    : (sd->sd_isown[i] ? gettext("Yes") : "")));
214 		}
215 	}
216 
217 	if (sd->sd_med.n_cnt > 0)
218 		(void) printf("\n%-19.19s %-7.7s\n",
219 		    gettext("Mediator Host(s)"), gettext("Aliases"));
220 
221 	if ((max_meds = get_max_meds(ep)) == 0)
222 		return (-1);
223 
224 	for (i = 0; i < max_meds; i++) {
225 		if (sd->sd_med.n_lst[i].a_cnt == 0)
226 			continue;
227 		/*
228 		 * Standard hostname field is 17 bytes but metaset will
229 		 * display up to MD_MAX_NODENAME, def in meta_basic.h
230 		 */
231 		(void) printf("  %-17.*s   ", MD_MAX_NODENAME,
232 		    sd->sd_med.n_lst[i].a_nm[0]);
233 		for (j = 1; j < sd->sd_med.n_lst[i].a_cnt; j++) {
234 			(void) printf("%s", sd->sd_med.n_lst[i].a_nm[j]);
235 			if (sd->sd_med.n_lst[i].a_cnt - j > 1)
236 				(void) printf(gettext(", "));
237 		}
238 		(void) printf("\n");
239 	}
240 
241 	if (dd) {
242 		int	len = 0;
243 
244 
245 		/*
246 		 * Building a format string on the fly that will
247 		 * be used in (f)printf. This allows the length
248 		 * of the ctd to vary from small to large without
249 		 * looking horrible.
250 		 */
251 		for (p = dd; p != NULL; p = p->dd_next)
252 			len = max(len, strlen(p->dd_dnp->cname));
253 
254 		len += 2;
255 		(void) printf("\n%-*.*s %-5.5s\n", len, len,
256 		    gettext("Drive"),
257 		    gettext("Dbase"));
258 		for (p = dd; p != NULL; p = p->dd_next) {
259 			(void) printf("\n%-*.*s %-5.5s\n", len, len,
260 			    p->dd_dnp->cname,
261 			    (p->dd_dbcnt ? gettext("Yes") :
262 			    gettext("No")));
263 		}
264 	}
265 
266 	return (0);
267 }
268 
269 static int
270 printsets(mdsetname_t *sp, md_error_t *ep)
271 {
272 	int			i;
273 	mdsetname_t		*sp1;
274 	set_t			max_sets;
275 
276 	/*
277 	 * print setname given.
278 	 */
279 	if (! metaislocalset(sp)) {
280 		if (printset(sp, ep))
281 			return (-1);
282 		return (0);
283 	}
284 
285 	if ((max_sets = get_max_sets(ep)) == 0)
286 		return (-1);
287 
288 	/*
289 	 * Print all known sets
290 	 */
291 	for (i = 1; i < max_sets; i++) {
292 		if ((sp1 = metasetnosetname(i, ep)) == NULL) {
293 			if (! mdiserror(ep, MDE_NO_SET))
294 				break;
295 			mdclrerror(ep);
296 			continue;
297 		}
298 
299 		if (printset(sp1, ep))
300 			break;
301 	}
302 	if (! mdisok(ep))
303 		return (-1);
304 
305 	return (0);
306 }
307 
308 /*
309  * Print the current versionn of the cluster contract private interface.
310  */
311 static void
312 printclusterversion()
313 {
314 	(void) printf("%s\n", METASETIFVERSION);
315 }
316 
317 /*
318  * Print the disks that make up the given disk set. This is used
319  * exclusively by Sun Cluster and is contract private.
320  * Should never be called with sname of a Multinode diskset.
321  */
322 static int
323 printdisksin(char *sname, md_error_t *ep)
324 {
325 	mdsetname_t	*sp;
326 	md_drive_desc	*dd, *p;
327 
328 	if ((sp = metasetname(sname, ep)) == NULL) {
329 
330 		/*
331 		 * During a deletion of a set the associated service is
332 		 * put offline. The SC3.0 reservation code calls disksuite
333 		 * to find a list of disks associated with the set so that
334 		 * it can release the reservation on those disks. In this
335 		 * case there won't be any disks or even a set left. So just
336 		 * return.
337 		 */
338 		return (0);
339 	}
340 
341 	if (metaget_setownership(sp, ep) == -1)
342 		return (-1);
343 
344 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
345 	    ep)) == NULL) && !mdisok(ep))
346 		return (-1);
347 
348 	for (p = dd; p != NULL; p = p->dd_next)
349 		(void) printf("%s\n", p->dd_dnp->rname);
350 
351 	return (0);
352 }
353 
354 static void
355 parse_printset(int argc, char **argv)
356 {
357 	int		c;
358 	mdsetname_t	*sp = NULL;
359 	char		*sname = MD_LOCAL_NAME;
360 	md_error_t	status = mdnullerror;
361 	md_error_t	*ep = &status;
362 
363 	/* reset and parse args */
364 	optind = 1;
365 	opterr = 1;
366 	while ((c = getopt(argc, argv, "s:")) != -1) {
367 		switch (c) {
368 		case 's':
369 			sname = optarg;
370 			break;
371 		default:
372 			usage(sp, gettext("unknown options"));
373 		}
374 	}
375 
376 	argc -= optind;
377 	argv += optind;
378 
379 	if (argc != 0)
380 		usage(sp, gettext("too many args"));
381 
382 	if ((sp = metasetname(sname, ep)) == NULL) {
383 		mde_perror(ep, "");
384 		md_exit(sp, 1);
385 	}
386 
387 	if (printsets(sp, ep) && !mdiserror(ep, MDE_SMF_NO_SERVICE)) {
388 		mde_perror(ep, "");
389 		md_exit(sp, 1);
390 	}
391 
392 	if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) {
393 		mde_perror(ep, "");
394 		md_exit(sp, 1);
395 	}
396 
397 	md_exit(sp, 0);
398 }
399 
400 static void
401 parse_add(int argc, char **argv)
402 {
403 	int			c, created_set;
404 	int			hosts = FALSE;
405 	int			meds = FALSE;
406 	int			auto_take = FALSE;
407 	int			force_label = FALSE;
408 	int			default_size = TRUE;
409 	mdsetname_t		*sp = NULL;
410 	char			*sname = MD_LOCAL_NAME;
411 	md_error_t		status = mdnullerror;
412 	md_error_t		 *ep = &status;
413 	mddrivenamelist_t	*dnlp = NULL;
414 	mddrivenamelist_t	*p;
415 	daddr_t			dbsize, nblks;
416 	mdsetname_t		*local_sp = NULL;
417 	int			multi_node = 0;
418 	md_set_desc		*sd;
419 	rval_e			sdssc_rval;
420 
421 	/* reset and parse args */
422 	optind = 1;
423 	opterr = 1;
424 	while ((c = getopt(argc, argv, "MaA:hl:Lms:")) != -1) {
425 		switch (c) {
426 		case 'M':
427 			multi_node = 1;
428 			break;
429 		case 'A':
430 			/* verified sub-option in main */
431 			if (strcmp(optarg, "enable") == 0)
432 				auto_take = TRUE;
433 			break;
434 		case 'a':
435 			break;
436 		case 'h':
437 		case 'm':
438 			if (meds == TRUE || hosts == TRUE)
439 				usage(sp, gettext(
440 				    "only one -m or -h option allowed"));
441 
442 			if (default_size == FALSE || force_label == TRUE)
443 				usage(sp, gettext(
444 				    "conflicting options"));
445 
446 			if (c == 'h')
447 				hosts = TRUE;
448 			else
449 				meds = TRUE;
450 			break;
451 		case 'l':
452 			if (hosts == TRUE || meds == TRUE)
453 				usage(sp, gettext(
454 				    "conflicting options"));
455 			if (sscanf(optarg, "%ld", &dbsize) != 1) {
456 				md_eprintf(gettext(
457 				    "%s: bad format\n"), optarg);
458 				usage(sp, "");
459 			}
460 
461 			default_size = FALSE;
462 			break;
463 		case 'L':
464 			/* Same criteria as -l */
465 			if (hosts == TRUE || meds == TRUE)
466 				usage(sp, gettext(
467 				    "conflicting options"));
468 			force_label = TRUE;
469 			break;
470 		case 's':
471 			sname = optarg;
472 			break;
473 		default:
474 			usage(sp, gettext(
475 			    "unknown options"));
476 		}
477 	}
478 
479 	/* Can only use -A enable when creating the single-node set */
480 	if (auto_take && hosts != TRUE)
481 		usage(sp, gettext("conflicting options"));
482 
483 	argc -= optind;
484 	argv += optind;
485 
486 	/*
487 	 * Add hosts
488 	 */
489 	if (hosts == TRUE) {
490 
491 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
492 			mde_perror(ep, "");
493 			md_exit(local_sp, 1);
494 		}
495 
496 		if (meta_lock(local_sp, TRUE, ep) != 0) {
497 			mde_perror(ep, "");
498 			md_exit(local_sp, 1);
499 		}
500 
501 		/*
502 		 * Keep track of Cluster set creation. Need to complete
503 		 * the transaction no matter if the set was created or not.
504 		 */
505 		created_set = 0;
506 
507 		/*
508 		 * Have no set, cannot take the lock, so only take the
509 		 * local lock.
510 		 */
511 		if ((sp = metasetname(sname, ep)) == NULL) {
512 			sdssc_rval = 0;
513 			if (multi_node) {
514 				/*
515 				 * When running on a cluster system that
516 				 * does not support MN disksets, the routine
517 				 * sdssc_mo_create_begin will be bound
518 				 * to the SVM routine not_bound_error
519 				 * which returns SDSSC_NOT_BOUND_ERROR.
520 				 *
521 				 * When running on a cluster system that
522 				 * does support MN disksets, the routine
523 				 * sdssc_mo_create_begin will be bound to
524 				 * the sdssc_mo_create_begin routine in
525 				 * library libsdssc_so.  A call to
526 				 * sdssc_mo_create_begin will return with
527 				 * either SDSSC_ERROR or SDSSC_OKAY. If
528 				 * an SDSSC_OKAY is returned, then the
529 				 * cluster framework has allocated a
530 				 * set number for this new set that is unique
531 				 * across traditional and MN disksets.
532 				 * Libmeta will get this unique set number
533 				 * by calling sdssc_get_index.
534 				 *
535 				 * When running on a non-cluster system,
536 				 * the routine sdssc_mo_create_begin
537 				 * will be bound to the SVM routine
538 				 * not_bound which returns SDSSC_NOT_BOUND.
539 				 * In this case, all sdssc routines will
540 				 * return SDSSC_NOT_BOUND.  No need to check
541 				 * for return value of SDSSC_NOT_BOUND since
542 				 * the libmeta call to get the set number
543 				 * (sdssc_get_index) will also fail with
544 				 * SDSSC_NOT_BOUND causing libmeta to
545 				 * determine its own set number.
546 				 */
547 				sdssc_rval = sdssc_mo_create_begin(sname, argc,
548 				    argv, SDSSC_PICK_SETNO);
549 				if (sdssc_rval == SDSSC_NOT_BOUND_ERROR) {
550 					(void) mderror(ep, MDE_NOT_MN, NULL);
551 					mde_perror(ep,
552 					"Cluster node does not support "
553 					"multi-owner diskset operations");
554 					md_exit(local_sp, 1);
555 				} else if (sdssc_rval == SDSSC_ERROR) {
556 					mde_perror(ep, "");
557 					md_exit(local_sp, 1);
558 				}
559 			} else {
560 				sdssc_rval = sdssc_create_begin(sname, argc,
561 				    argv, SDSSC_PICK_SETNO);
562 				if (sdssc_rval == SDSSC_ERROR) {
563 					mde_perror(ep, "");
564 					md_exit(local_sp, 1);
565 				}
566 			}
567 			/*
568 			 * Created diskset (as opposed to adding a
569 			 * host to an existing diskset).
570 			 */
571 			created_set = 1;
572 
573 			sp = Zalloc(sizeof (*sp));
574 			sp->setname = Strdup(sname);
575 			sp->lockfd = MD_NO_LOCK;
576 			mdclrerror(ep);
577 		} else {
578 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
579 				mde_perror(ep, "");
580 				md_exit(local_sp, 1);
581 			}
582 			if (MD_MNSET_DESC(sd)) {
583 				multi_node = 1;
584 			}
585 
586 			/*
587 			 * can't add hosts to an existing set & enable
588 			 * auto-take
589 			 */
590 			if (auto_take)
591 				usage(sp, gettext("conflicting options"));
592 
593 			/*
594 			 * Have a valid set, take the set lock also.
595 			 *
596 			 * A MN diskset does not use the set meta_lock but
597 			 * instead uses the clnt_lock of rpc.metad and the
598 			 * suspend/resume feature of the rpc.mdcommd.  Can't
599 			 * use set meta_lock since class 1 messages are
600 			 * grabbing this lock and if this thread is holding
601 			 * the set meta_lock then no rpc.mdcommd suspend
602 			 * can occur.
603 			 */
604 			if (!multi_node) {
605 				if (meta_lock(sp, TRUE, ep) != 0) {
606 					mde_perror(ep, "");
607 					md_exit(local_sp, 1);
608 				}
609 			}
610 		}
611 
612 		if (meta_set_addhosts(sp, multi_node, argc, argv, auto_take,
613 		    ep)) {
614 			if (created_set)
615 				sdssc_create_end(sname, SDSSC_CLEANUP);
616 			mde_perror(&status, "");
617 			if (!multi_node)
618 				(void) meta_unlock(sp, ep);
619 			md_exit(local_sp, 1);
620 		}
621 
622 		if (created_set)
623 			sdssc_create_end(sname, SDSSC_COMMIT);
624 
625 		else {
626 			/*
627 			 * If adding hosts to existing diskset,
628 			 * call DCS svcs
629 			 */
630 			sdssc_add_hosts(sname, argc, argv);
631 		}
632 		if (!multi_node)
633 			(void) meta_unlock(sp, ep);
634 		md_exit(local_sp, 0);
635 	}
636 
637 	/*
638 	 * Add mediators
639 	 */
640 	if (meds == TRUE) {
641 
642 		if ((sp = metasetname(sname, ep)) == NULL) {
643 			mde_perror(ep, "");
644 			md_exit(local_sp, 1);
645 		}
646 
647 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
648 			mde_perror(ep, "");
649 			md_exit(local_sp, 1);
650 		}
651 
652 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
653 			mde_perror(ep, "");
654 			md_exit(local_sp, 1);
655 		}
656 		if (MD_MNSET_DESC(sd)) {
657 			multi_node = 1;
658 		}
659 
660 		if (meta_lock(local_sp, TRUE, ep) != 0) {
661 			mde_perror(ep, "");
662 			md_exit(local_sp, 1);
663 		}
664 		/*
665 		 * A MN diskset does not use the set meta_lock but
666 		 * instead uses the clnt_lock of rpc.metad and the
667 		 * suspend/resume feature of the rpc.mdcommd.  Can't
668 		 * use set meta_lock since class 1 messages are
669 		 * grabbing this lock and if this thread is holding
670 		 * the set meta_lock then no rpc.mdcommd suspend
671 		 * can occur.
672 		 */
673 		if (!multi_node) {
674 			if (meta_lock(sp, TRUE, ep) != 0) {
675 				mde_perror(ep, "");
676 				md_exit(local_sp, 1);
677 			}
678 		}
679 
680 		if (meta_set_addmeds(sp, argc, argv, ep)) {
681 			mde_perror(&status, "");
682 			if (!multi_node)
683 				(void) meta_unlock(sp, ep);
684 			md_exit(local_sp, 1);
685 		}
686 
687 		if (!multi_node)
688 			(void) meta_unlock(sp, ep);
689 		md_exit(local_sp, 0);
690 	}
691 
692 	/*
693 	 * Add drives
694 	 */
695 	if ((sp = metasetname(sname, ep)) == NULL) {
696 		mde_perror(ep, "");
697 		md_exit(local_sp, 1);
698 	}
699 
700 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
701 		mde_perror(ep, "");
702 		md_exit(local_sp, 1);
703 	}
704 
705 	/* Determine if diskset is a MN diskset or not */
706 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
707 		mde_perror(ep, "");
708 		md_exit(local_sp, 1);
709 	}
710 	if (MD_MNSET_DESC(sd)) {
711 		multi_node = 1;
712 	}
713 
714 	if (meta_lock(local_sp, TRUE, ep) != 0) {
715 		mde_perror(ep, "");
716 		md_exit(local_sp, 1);
717 	}
718 
719 	/* Make sure database size is within limits */
720 	if (default_size == FALSE) {
721 		if ((multi_node && dbsize < MDDB_MN_MINBLKS) ||
722 		    (!multi_node && dbsize < MDDB_MINBLKS))
723 			usage(sp, gettext(
724 			    "size (-l) is too small"));
725 
726 		if ((multi_node && dbsize > MDDB_MN_MAXBLKS) ||
727 		    (!multi_node && dbsize > MDDB_MAXBLKS))
728 			usage(sp, gettext(
729 			    "size (-l) is too big"));
730 	}
731 
732 	/*
733 	 * Have a valid set, take the set lock also.
734 	 *
735 	 * A MN diskset does not use the set meta_lock but
736 	 * instead uses the clnt_lock of rpc.metad and the
737 	 * suspend/resume feature of the rpc.mdcommd.  Can't
738 	 * use set meta_lock since class 1 messages are
739 	 * grabbing this lock and if this thread is holding
740 	 * the set meta_lock then no rpc.mdcommd suspend
741 	 * can occur.
742 	 */
743 	if (!multi_node) {
744 		if (meta_lock(sp, TRUE, ep) != 0) {
745 			mde_perror(ep, "");
746 			md_exit(local_sp, 1);
747 		}
748 	}
749 
750 
751 	/*
752 	 * If using the default size,
753 	 *   then let's adjust the default to the minimum
754 	 *   size currently in use.
755 	 */
756 	if (default_size) {
757 		dbsize = multi_node ? MD_MN_DBSIZE : MD_DBSIZE;
758 		if ((nblks = meta_db_minreplica(sp, ep)) < 0)
759 			mdclrerror(ep);
760 		else
761 			dbsize = nblks;	/* adjust replica size */
762 	}
763 
764 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
765 		mde_perror(ep, "");
766 		if (!multi_node)
767 			(void) meta_unlock(sp, ep);
768 		md_exit(local_sp, 1);
769 	}
770 
771 	if (c == 0) {
772 		md_perror(gettext(
773 		    "No drives specified to add.\n"));
774 		if (!multi_node)
775 			(void) meta_unlock(sp, ep);
776 		md_exit(local_sp, 1);
777 	}
778 
779 	if (meta_set_adddrives(sp, dnlp, dbsize, force_label, ep)) {
780 		metafreedrivenamelist(dnlp);
781 		mde_perror(ep, "");
782 		if (!multi_node)
783 			(void) meta_unlock(sp, ep);
784 		md_exit(local_sp, 1);
785 	}
786 
787 	/*
788 	 * MN disksets don't have a device id in the master block
789 	 * For traditional disksets, check for the drive device
790 	 * id not fitting in the master block
791 	 */
792 	if (!multi_node) {
793 		for (p = dnlp; p != NULL; p = p->next) {
794 			int 		fd;
795 			ddi_devid_t	devid;
796 			mdname_t	*np;
797 
798 			np = metaslicename(p->drivenamep, 0, ep);
799 			if (np == NULL)
800 				continue;
801 
802 			if ((fd = open(np->rname, O_RDONLY | O_NDELAY)) < 0)
803 				continue;
804 
805 			if (devid_get(fd, &devid) == 0) {
806 				size_t len;
807 
808 				len = devid_sizeof(devid);
809 				if (len > (DEV_BSIZE - sizeof (mddb_mb_t)))
810 					(void) mddserror(ep,
811 					    MDE_DS_NOTSELFIDENTIFY, NULL, NULL,
812 					    np->rname, NULL);
813 				devid_free(devid);
814 			} else {
815 				(void) mddserror(ep, MDE_DS_NOTSELFIDENTIFY,
816 				    NULL, NULL, np->rname, NULL);
817 			}
818 			(void) close(fd);
819 		}
820 	}
821 
822 	/*
823 	 * MN disksets don't use DCS clustering services.
824 	 * For traditional disksets:
825 	 * There's not really much we can do here if this call fails.
826 	 * The drives have been added to the set and DiskSuite believes
827 	 * it owns the drives.
828 	 * Relase the set and hope for the best.
829 	 */
830 	if ((!multi_node) &&
831 	    (sdssc_notify_service(sname, Make_Primary) == SDSSC_ERROR)) {
832 		(void) meta_set_release(sp, ep);
833 		(void) printf(gettext(
834 		    "Sun Clustering failed to make set primary\n"));
835 	}
836 
837 	metafreedrivenamelist(dnlp);
838 	if (!multi_node)
839 		(void) meta_unlock(sp, ep);
840 	md_exit(local_sp, 0);
841 }
842 
843 static void
844 parse_balance(int argc, char **argv)
845 {
846 	int		c;
847 	mdsetname_t	*sp = NULL;
848 	char		*sname = MD_LOCAL_NAME;
849 	md_error_t	status = mdnullerror;
850 	md_set_desc	*sd;
851 	int		multi_node = 0;
852 
853 	/* reset and parse args */
854 	optind = 1;
855 	opterr = 1;
856 	while ((c = getopt(argc, argv, "Mbs:")) != -1) {
857 		switch (c) {
858 		case 'M':
859 			break;
860 		case 'b':
861 			break;
862 		case 's':
863 			sname = optarg;
864 			break;
865 		default:
866 			usage(sp, gettext("unknown options"));
867 		}
868 	}
869 
870 	argc -= optind;
871 	argv += optind;
872 
873 	if (argc != 0)
874 		usage(sp, gettext("too many args"));
875 
876 	if ((sp = metasetname(sname, &status)) == NULL) {
877 		mde_perror(&status, "");
878 		md_exit(sp, 1);
879 	}
880 	if ((sd = metaget_setdesc(sp, &status)) == NULL) {
881 		mde_perror(&status, "");
882 		md_exit(sp, 1);
883 	}
884 	if (MD_MNSET_DESC(sd)) {
885 		multi_node = 1;
886 	}
887 	/*
888 	 * Have a valid set, take the set lock also.
889 	 *
890 	 * A MN diskset does not use the set meta_lock but
891 	 * instead uses the clnt_lock of rpc.metad and the
892 	 * suspend/resume feature of the rpc.mdcommd.  Can't
893 	 * use set meta_lock since class 1 messages are
894 	 * grabbing this lock and if this thread is holding
895 	 * the set meta_lock then no rpc.mdcommd suspend
896 	 * can occur.
897 	 */
898 	if (!multi_node) {
899 		if (meta_lock(sp, TRUE, &status) != 0) {
900 			mde_perror(&status, "");
901 			md_exit(sp, 1);
902 		}
903 	}
904 
905 	if (meta_set_balance(sp, &status) != 0) {
906 		mde_perror(&status, "");
907 		md_exit(sp, 1);
908 	}
909 	md_exit(sp, 0);
910 }
911 
912 static void
913 parse_autotake(int argc, char **argv)
914 {
915 	int			c;
916 	int			enable = 0;
917 	mdsetname_t		*sp = NULL;
918 	char			*sname = MD_LOCAL_NAME;
919 	md_error_t		status = mdnullerror;
920 	md_error_t		*ep = &status;
921 
922 	/* reset and parse args */
923 	optind = 1;
924 	opterr = 1;
925 	while ((c = getopt(argc, argv, "A:s:")) != -1) {
926 		switch (c) {
927 		case 'A':
928 			/* verified sub-option in main */
929 			if (strcmp(optarg, "enable") == 0)
930 				enable = 1;
931 			break;
932 		case 's':
933 			/* verified presence of setname in main */
934 			sname = optarg;
935 			break;
936 		default:
937 			usage(sp, gettext("unknown options"));
938 		}
939 	}
940 
941 	if ((sp = metasetname(sname, ep)) == NULL) {
942 		mde_perror(ep, "");
943 		md_exit(sp, 1);
944 	}
945 
946 	if (meta_lock(sp, TRUE, ep) != 0) {
947 		mde_perror(ep, "");
948 		md_exit(sp, 1);
949 	}
950 
951 	if (meta_check_ownership(sp, ep) != 0) {
952 		mde_perror(ep, "");
953 		md_exit(sp, 1);
954 	}
955 
956 	if (meta_set_auto_take(sp, enable, ep) != 0) {
957 		mde_perror(ep, "");
958 		md_exit(sp, 1);
959 	}
960 
961 	md_exit(sp, 0);
962 }
963 
964 static void
965 parse_del(int argc, char **argv)
966 {
967 	int			c;
968 	mdsetname_t		*sp = NULL;
969 	char			*sname = MD_LOCAL_NAME;
970 	int			hosts = FALSE;
971 	int			meds = FALSE;
972 	int			forceflg = FALSE;
973 	md_error_t		status = mdnullerror;
974 	md_error_t		*ep = &status;
975 	mddrivenamelist_t	*dnlp = NULL;
976 	mdsetname_t		*local_sp = NULL;
977 	md_set_desc		*sd;
978 	int			multi_node = 0;
979 
980 	/* reset and parse args */
981 	optind = 1;
982 	opterr = 1;
983 	while ((c = getopt(argc, argv, "Mdfhms:")) != -1) {
984 		switch (c) {
985 		case 'M':
986 			break;
987 		case 'd':
988 			break;
989 		case 'f':
990 			forceflg = TRUE;
991 			break;
992 		case 'h':
993 		case 'm':
994 			if (meds == TRUE || hosts == TRUE)
995 				usage(sp, gettext(
996 				    "only one -m or -h option allowed"));
997 
998 			if (c == 'h')
999 				hosts = TRUE;
1000 			else
1001 				meds = TRUE;
1002 			break;
1003 		case 's':
1004 			sname = optarg;
1005 			break;
1006 		default:
1007 			usage(sp, gettext("unknown options"));
1008 		}
1009 	}
1010 
1011 	argc -= optind;
1012 	argv += optind;
1013 
1014 	if ((sp = metasetname(sname, ep)) == NULL) {
1015 		mde_perror(ep, "");
1016 		md_exit(local_sp, 1);
1017 	}
1018 
1019 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1020 		mde_perror(ep, "");
1021 		md_exit(local_sp, 1);
1022 	}
1023 
1024 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1025 		mde_perror(ep, "");
1026 		md_exit(local_sp, 1);
1027 	}
1028 	if (MD_MNSET_DESC(sd))
1029 		multi_node = 1;
1030 
1031 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1032 		mde_perror(ep, "");
1033 		md_exit(local_sp, 1);
1034 	}
1035 
1036 	/*
1037 	 * Have a valid set, take the set lock also.
1038 	 *
1039 	 * A MN diskset does not use the set meta_lock but
1040 	 * instead uses the clnt_lock of rpc.metad and the
1041 	 * suspend/resume feature of the rpc.mdcommd.  Can't
1042 	 * use set meta_lock since class 1 messages are
1043 	 * grabbing this lock and if this thread is holding
1044 	 * the set meta_lock then no rpc.mdcommd suspend
1045 	 * can occur.
1046 	 */
1047 	if (!multi_node) {
1048 		if (meta_lock(sp, TRUE, ep) != 0) {
1049 			mde_perror(ep, "");
1050 			md_exit(local_sp, 1);
1051 		}
1052 	}
1053 
1054 	/*
1055 	 * Delete hosts
1056 	 */
1057 	if (hosts == TRUE) {
1058 		if (meta_check_ownership(sp, ep) != 0) {
1059 			/*
1060 			 * If we don't own the set bail out here otherwise
1061 			 * we could delete the node from the DCS service
1062 			 * yet not delete the host from the set.
1063 			 */
1064 			mde_perror(ep, "");
1065 			if (!multi_node)
1066 				(void) meta_unlock(sp, ep);
1067 			md_exit(local_sp, 1);
1068 		}
1069 		if (sdssc_delete_hosts(sname, argc, argv) == SDSSC_ERROR) {
1070 			if (!metad_isautotakebyname(sname)) {
1071 				/*
1072 				 * SC could have been installed after the set
1073 				 * was created. We still want to be able to
1074 				 * delete these sets.
1075 				 */
1076 				md_perror(gettext(
1077 				    "Failed to delete hosts from DCS service"));
1078 				if (!multi_node)
1079 					(void) meta_unlock(sp, ep);
1080 				md_exit(local_sp, 1);
1081 			}
1082 		}
1083 		if (meta_set_deletehosts(sp, argc, argv, forceflg, ep)) {
1084 			if (sdssc_add_hosts(sname, argc, argv) == SDSSC_ERROR) {
1085 				(void) printf(gettext(
1086 				    "Failed to restore host(s) in DCS "
1087 				    "database\n"));
1088 			}
1089 			mde_perror(ep, "");
1090 			if (!multi_node)
1091 				(void) meta_unlock(sp, ep);
1092 			md_exit(local_sp, 1);
1093 		}
1094 		if (!multi_node)
1095 			(void) meta_unlock(sp, ep);
1096 		md_exit(local_sp, 0);
1097 	}
1098 
1099 	/*
1100 	 * Delete mediators
1101 	 */
1102 	if (meds == TRUE) {
1103 		if (meta_set_deletemeds(sp, argc, argv, forceflg, ep)) {
1104 			mde_perror(ep, "");
1105 			if (!multi_node)
1106 				(void) meta_unlock(sp, ep);
1107 			md_exit(local_sp, 1);
1108 		}
1109 		if (!multi_node)
1110 			(void) meta_unlock(sp, ep);
1111 		md_exit(local_sp, 0);
1112 	}
1113 
1114 	/*
1115 	 * Delete drives
1116 	 */
1117 
1118 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
1119 		mde_perror(ep, "");
1120 		if (!multi_node)
1121 			(void) meta_unlock(sp, ep);
1122 		md_exit(local_sp, 1);
1123 	}
1124 
1125 	if (c == 0) {
1126 		md_perror(gettext(
1127 		    "No drives specified to delete.\n"));
1128 		if (!multi_node)
1129 			(void) meta_unlock(sp, ep);
1130 		md_exit(local_sp, 1);
1131 	}
1132 
1133 	if (meta_set_deletedrives(sp, dnlp, forceflg, ep)) {
1134 		metafreedrivenamelist(dnlp);
1135 		mde_perror(ep, "");
1136 		if (!multi_node)
1137 			(void) meta_unlock(sp, ep);
1138 		md_exit(local_sp, 1);
1139 	}
1140 
1141 	metafreedrivenamelist(dnlp);
1142 	if (!multi_node)
1143 		(void) meta_unlock(sp, ep);
1144 	md_exit(local_sp, 0);
1145 }
1146 
1147 static void
1148 parse_isowner(int argc, char **argv)
1149 {
1150 	int		c;
1151 	mdsetname_t	*sp = NULL;
1152 	char		*sname = MD_LOCAL_NAME;
1153 	md_error_t	status = mdnullerror;
1154 	md_error_t	*ep = &status;
1155 	char		*host = NULL;
1156 
1157 	/* reset and parse args */
1158 	optind = 1;
1159 	opterr = 1;
1160 	while ((c = getopt(argc, argv, "Moh:s:")) != -1) {
1161 		switch (c) {
1162 		case 'M':
1163 			break;
1164 		case 'o':
1165 			break;
1166 		case 'h':
1167 			if (host != NULL) {
1168 				usage(sp, gettext(
1169 				    "only one -h option allowed"));
1170 			}
1171 			host = optarg;
1172 			break;
1173 		case 's':
1174 			sname = optarg;
1175 			break;
1176 		default:
1177 			usage(sp, gettext("unknown options"));
1178 		}
1179 	}
1180 
1181 	argc -= optind;
1182 	argv += optind;
1183 
1184 	if (argc != 0)
1185 		usage(sp, gettext("too many args"));
1186 
1187 	if ((sp = metasetname(sname, ep)) == NULL) {
1188 		mde_perror(ep, "");
1189 		md_exit(sp, 1);
1190 	}
1191 
1192 	if (host == NULL) {
1193 		if (meta_check_ownership(sp, ep) != 0) {
1194 			mde_perror(ep, "");
1195 			md_exit(sp, 1);
1196 		}
1197 	} else {
1198 		if (meta_check_ownership_on_host(sp, host, ep) != 0) {
1199 			mde_perror(ep, "");
1200 			md_exit(sp, 1);
1201 		}
1202 	}
1203 	md_exit(sp, 0);
1204 }
1205 
1206 static void
1207 parse_purge(int argc, char **argv)
1208 {
1209 	int		c;
1210 	mdsetname_t	*sp = NULL;
1211 	mdsetname_t	*local_sp = NULL;
1212 	md_drive_desc	*dd;
1213 	char		*sname = MD_LOCAL_NAME;
1214 	char		*thishost = mynode();
1215 	md_error_t	status = mdnullerror;
1216 	md_error_t	*ep = &status;
1217 	int		bypass_cluster_purge = 0;
1218 	int		forceflg = FALSE;
1219 	int		ret = 0;
1220 	int		multi_node = 0;
1221 	md_set_desc		*sd;
1222 
1223 	optind = 1;
1224 	opterr = 1;
1225 	while ((c = getopt(argc, argv, "C:fPs:")) != -1) {
1226 		switch (c) {
1227 		case 'M':
1228 			break;
1229 		case 'C':
1230 			bypass_cluster_purge = 1;
1231 			break;
1232 		case 'f':
1233 			forceflg = TRUE;
1234 			break;
1235 		case 'P':
1236 			break;
1237 		case 's':
1238 			sname = optarg;
1239 			break;
1240 		default:
1241 			usage(sp, gettext("unknown options"));
1242 		}
1243 	}
1244 
1245 	argc -= optind;
1246 	argv += optind;
1247 
1248 	if (argc != 0)
1249 		usage(sp, gettext("too many arguments"));
1250 
1251 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1252 		mde_perror(ep, "");
1253 		md_exit(local_sp, 1);
1254 	}
1255 
1256 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1257 		mde_perror(ep, "");
1258 		md_exit(local_sp, 1);
1259 	}
1260 
1261 	if ((sp = metasetname(sname, ep)) == NULL) {
1262 		mde_perror(ep, "");
1263 		md_exit(sp, 1);
1264 	}
1265 
1266 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1267 		mde_perror(ep, "");
1268 		md_exit(local_sp, 1);
1269 	}
1270 	if (MD_MNSET_DESC(sd))
1271 		multi_node = 1;
1272 
1273 	if (!multi_node) {
1274 		if (meta_lock(sp, TRUE, ep) != 0) {
1275 			mde_perror(ep, "");
1276 			md_exit(local_sp, 1);
1277 		}
1278 	}
1279 
1280 	/* Must not own the set if purging it from this host */
1281 	if (meta_check_ownership(sp, ep) == 0) {
1282 		/*
1283 		 * Need to see if there are disks in the set, if not then
1284 		 * there is no ownership but meta_check_ownership returns 0
1285 		 */
1286 		dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
1287 		if (!mdisok(ep)) {
1288 			mde_perror(ep, "");
1289 			if (!multi_node)
1290 				(void) meta_unlock(sp, ep);
1291 			md_exit(local_sp, 1);
1292 		}
1293 		if (dd != NULL) {
1294 			(void) printf(gettext
1295 			    ("Must not be owner of the set when purging it\n"));
1296 			if (!multi_node)
1297 				(void) meta_unlock(sp, ep);
1298 			md_exit(local_sp, 1);
1299 		}
1300 	}
1301 	/*
1302 	 * Remove the node from the DCS service
1303 	 */
1304 	if (!bypass_cluster_purge) {
1305 		if (sdssc_delete_hosts(sname, 1, &thishost) == SDSSC_ERROR) {
1306 			md_perror(gettext
1307 			    ("Failed to purge hosts from DCS service"));
1308 			if (!multi_node)
1309 				(void) meta_unlock(sp, ep);
1310 			md_exit(local_sp, 1);
1311 		}
1312 	}
1313 
1314 	if ((ret = meta_set_purge(sp, bypass_cluster_purge, forceflg,
1315 	    ep)) != 0) {
1316 		if (!bypass_cluster_purge) {
1317 			if (sdssc_add_hosts(sname, 1, &thishost) ==
1318 			    SDSSC_ERROR) {
1319 				(void) printf(gettext(
1320 				    "Failed to restore host in DCS "
1321 				    "database\n"));
1322 			}
1323 		}
1324 		mde_perror(ep, "");
1325 		if (!multi_node)
1326 			(void) meta_unlock(sp, ep);
1327 		md_exit(local_sp, ret);
1328 	}
1329 
1330 	if (!multi_node)
1331 		(void) meta_unlock(sp, ep);
1332 	md_exit(local_sp, 0);
1333 }
1334 
1335 static void
1336 parse_query(int argc, char **argv)
1337 {
1338 	int		c;
1339 	mdsetname_t	*sp = NULL;
1340 	mddb_dtag_lst_t	*dtlp = NULL;
1341 	mddb_dtag_lst_t	*tdtlp;
1342 	char		*sname = MD_LOCAL_NAME;
1343 	md_error_t	status = mdnullerror;
1344 
1345 	/* reset and parse args */
1346 	optind = 1;
1347 	opterr = 1;
1348 	while ((c = getopt(argc, argv, "Mqs:")) != -1) {
1349 		switch (c) {
1350 		case 'M':
1351 			break;
1352 		case 'q':
1353 			break;
1354 		case 's':
1355 			sname = optarg;
1356 			break;
1357 		default:
1358 			usage(sp, gettext("unknown options"));
1359 		}
1360 	}
1361 
1362 	argc -= optind;
1363 	argv += optind;
1364 
1365 	if (argc != 0)
1366 		usage(sp, gettext("too many args"));
1367 
1368 	if ((sp = metasetname(sname, &status)) == NULL) {
1369 		mde_perror(&status, "");
1370 		md_exit(sp, 1);
1371 	}
1372 
1373 	if (meta_lock(sp, TRUE, &status) != 0) {
1374 		mde_perror(&status, "");
1375 		md_exit(sp, 1);
1376 	}
1377 
1378 	if (meta_set_query(sp, &dtlp, &status) != 0) {
1379 		mde_perror(&status, "");
1380 		md_exit(sp, 1);
1381 	}
1382 
1383 	if (dtlp != NULL)
1384 		(void) printf("The following tag(s) were found:\n");
1385 
1386 	for (tdtlp = dtlp; tdtlp != NULL; tdtlp = dtlp) {
1387 		dtlp = tdtlp->dtl_nx;
1388 		(void) printf("%2d - %s - %s", tdtlp->dtl_dt.dt_id,
1389 		    tdtlp->dtl_dt.dt_hn,
1390 		    ctime((long *)&tdtlp->dtl_dt.dt_tv.tv_sec));
1391 		Free(tdtlp);
1392 	}
1393 
1394 	md_exit(sp, 0);
1395 }
1396 
1397 /* Should never be called with sname of a Multinode diskset. */
1398 static void
1399 parse_releaseset(int argc, char **argv)
1400 {
1401 	int		c;
1402 	mdsetname_t	*sp = NULL;
1403 	md_error_t	status = mdnullerror;
1404 	md_error_t	*ep = &status;
1405 	char		*sname = MD_LOCAL_NAME;
1406 	sdssc_boolean_e	cluster_release = SDSSC_False;
1407 	sdssc_version_t	vers;
1408 	rval_e		rval;
1409 	md_set_desc	*sd;
1410 
1411 	/* reset and parse args */
1412 	optind = 1;
1413 	opterr = 1;
1414 	while ((c = getopt(argc, argv, "C:s:r")) != -1) {
1415 		switch (c) {
1416 		case 'C':
1417 			cluster_release = SDSSC_True;
1418 			break;
1419 		case 's':
1420 			sname = optarg;
1421 			break;
1422 		case 'r':
1423 			break;
1424 		default:
1425 			usage(sp, gettext("unknown options"));
1426 		}
1427 	}
1428 
1429 	argc -= optind;
1430 	argv += optind;
1431 
1432 	if (argc > 0)
1433 		usage(sp, gettext("too many args"));
1434 
1435 	(void) memset(&vers, 0, sizeof (vers));
1436 
1437 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1438 	    (vers.major == 3) &&
1439 	    (cluster_release == SDSSC_False)) {
1440 
1441 		/*
1442 		 * If the release is being done by the user via the CLI
1443 		 * we need to notify the DCS to release this node as being
1444 		 * the primary. The reason nothing else needs to be done
1445 		 * is due to the fact that the reservation code will exec
1446 		 * metaset -C release to complete the operation.
1447 		 */
1448 		rval = sdssc_notify_service(sname, Release_Primary);
1449 		if (rval == SDSSC_ERROR) {
1450 			(void) printf(gettext(
1451 			    "metaset: failed to notify DCS of release\n"));
1452 		}
1453 		md_exit(NULL, rval == SDSSC_ERROR);
1454 	}
1455 
1456 	if ((sp = metasetname(sname, ep)) == NULL) {
1457 
1458 		/*
1459 		 * It's entirely possible for the SC3.0 reservation code
1460 		 * to call for DiskSet to release a diskset and have that
1461 		 * diskset not exist. During a diskset removal DiskSuite
1462 		 * maybe able to remove all traces of the diskset before
1463 		 * the reservation code execs metaset -C release in which
1464 		 * case the metasetname will fail, but the overall command
1465 		 * shouldn't.
1466 		 */
1467 		if (vers.major == 3)
1468 			md_exit(sp, 0);
1469 		else {
1470 			mde_perror(ep, "");
1471 			md_exit(sp, 1);
1472 		}
1473 	}
1474 
1475 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1476 		mde_perror(ep, "");
1477 		md_exit(sp, 1);
1478 	}
1479 
1480 	if (sd->sd_flags & MD_SR_AUTO_TAKE) {
1481 		md_eprintf(gettext("cannot release auto-take diskset\n"));
1482 		md_exit(sp, 1);
1483 	}
1484 
1485 	if (meta_lock_nowait(sp, ep) != 0) {
1486 		mde_perror(ep, "");
1487 		md_exit(sp, 10);	/* special errcode */
1488 	}
1489 
1490 	if (meta_set_release(sp, ep)) {
1491 		mde_perror(ep, "");
1492 		md_exit(sp, 1);
1493 	}
1494 	md_exit(sp, 0);
1495 }
1496 
1497 /* Should never be called with sname of a Multinode diskset. */
1498 static void
1499 parse_takeset(int argc, char **argv)
1500 {
1501 	int		c;
1502 	mdsetname_t	*sp = NULL;
1503 	int		flags = 0;
1504 	char		*sname = MD_LOCAL_NAME;
1505 	mhd_mhiargs_t	mhiargs;
1506 	char 		*cp = NULL;
1507 	int		pos = -1;	/* position of timeout value */
1508 	int		usetag = 0;
1509 	static char	*nullopts[] = { NULL };
1510 	md_error_t	status = mdnullerror;
1511 	md_error_t	*ep = &status;
1512 	sdssc_boolean_e	cluster_take = SDSSC_False;
1513 	sdssc_version_t	vers;
1514 	rval_e		rval;
1515 	int		set_take_rval;
1516 
1517 	/* reset and parse args */
1518 	optind = 1;
1519 	opterr = 1;
1520 	while ((c = getopt(argc, argv, "C:fs:tu:y")) != -1) {
1521 		switch (c) {
1522 		case 'C':
1523 			cluster_take = SDSSC_True;
1524 			break;
1525 		case 'f':
1526 			flags |= TAKE_FORCE;
1527 			break;
1528 		case 's':
1529 			sname = optarg;
1530 			break;
1531 		case 't':
1532 			break;
1533 		case 'u':
1534 			usetag = atoi(optarg);
1535 			flags |= TAKE_USETAG;
1536 			break;
1537 		case 'y':
1538 			flags |= TAKE_USEIT;
1539 			break;
1540 		default:
1541 			usage(sp, gettext("unknown options"));
1542 		}
1543 	}
1544 
1545 	mhiargs = defmhiargs;
1546 
1547 	argc -= optind;
1548 	argv += optind;
1549 
1550 	if (argc > 1)
1551 		usage(sp, gettext("too many args"));
1552 
1553 	/*
1554 	 * If we have a list of timeout value overrides, handle it here
1555 	 */
1556 	while (argv[0] != NULL && *argv[0] != '\0') {
1557 		/*
1558 		 * The use of the nullopts[] "token list" here is to make
1559 		 * getsubopts() simply parse a comma separated list
1560 		 * returning either "" or the contents of the field, the
1561 		 * end condition is exaustion of the initial string, which
1562 		 * is modified in the process.
1563 		 */
1564 		(void) getsubopt(&argv[0], nullopts, &cp);
1565 
1566 		c = 0;			/* re-use c as temp value of timeout */
1567 
1568 		if (*cp != '-')		/* '-' uses default */
1569 			c = atoi(cp);
1570 
1571 		if (c < 0) {
1572 			usage(sp, gettext(
1573 			    "time out values must be > 0"));
1574 		}
1575 
1576 		if (++pos > 3) {
1577 			usage(sp, gettext(
1578 			    "too many timeout values specified."));
1579 		}
1580 
1581 		if (c == 0)		/* 0 or "" field uses default */
1582 			continue;
1583 
1584 		/*
1585 		 * Assign temp value to appropriate structure member based on
1586 		 * its position in the comma separated list.
1587 		 */
1588 		switch (pos) {
1589 			case 0:
1590 				mhiargs.mh_ff = c;
1591 				break;
1592 
1593 			case 1:
1594 				mhiargs.mh_tk.reinstate_resv_delay = c;
1595 				break;
1596 
1597 			case 2:
1598 				mhiargs.mh_tk.min_ownership_delay = c;
1599 				break;
1600 
1601 			case 3:
1602 				mhiargs.mh_tk.max_ownership_delay = c;
1603 				break;
1604 		}
1605 	}
1606 
1607 	(void) memset(&vers, 0, sizeof (vers));
1608 
1609 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1610 	    (vers.major == 3) &&
1611 	    (cluster_take == SDSSC_False)) {
1612 
1613 		/*
1614 		 * If the take is beging done by the user via the CLI we need
1615 		 * to notify the DCS to make this current node the primary.
1616 		 * The SC3.0 reservation code will in turn exec metaset with
1617 		 * the -C take arg to complete this operation.
1618 		 */
1619 		if ((rval = sdssc_notify_service(sname, Make_Primary)) ==
1620 		    SDSSC_ERROR) {
1621 			(void) printf(gettext(
1622 			    "metaset: failed to notify DCS of take\n"));
1623 		}
1624 		md_exit(NULL, rval == SDSSC_ERROR);
1625 	}
1626 
1627 	if ((sp = metasetname(sname, ep)) == NULL) {
1628 		mde_perror(ep, "");
1629 		md_exit(sp, 1);
1630 	}
1631 
1632 	if ((vers.major == 3) && (meta_check_ownership(sp, ep) == 0)) {
1633 
1634 		/*
1635 		 * If we're running in a cluster environment and this
1636 		 * node already owns the set. Don't bother trying to
1637 		 * take the set again. There's one case where an adminstrator
1638 		 * is adding disks to a set for the first time. metaset
1639 		 * will take the ownership of the set at that point. During
1640 		 * that add operation SC3.0 notices activity on the device
1641 		 * and also tries to perform a take operation. The SC3.0 take
1642 		 * will fail because the adminstrative add has the set locked
1643 		 */
1644 		md_exit(sp, 0);
1645 	}
1646 
1647 	if (meta_lock_nowait(sp, ep) != 0) {
1648 		mde_perror(ep, "");
1649 		md_exit(sp, 10);	/* special errcode */
1650 	}
1651 
1652 	/*
1653 	 * If a 2 is returned from meta_set_take, this take was able to resolve
1654 	 * an unresolved replicated disk (i.e. a disk is now available that
1655 	 * had been missing during the import of the replicated diskset).
1656 	 * Need to release the diskset and re-take in order to have
1657 	 * the subdrivers re-snarf using the newly resolved (or newly mapped)
1658 	 * devids.  This also allows the namespace to be updated with the
1659 	 * correct major names in the case where the disk being replicated
1660 	 * was handled by a different driver than the replicated disk.
1661 	 */
1662 	set_take_rval = meta_set_take(sp, &mhiargs, flags, usetag, &status);
1663 	if (set_take_rval == 2) {
1664 		if (meta_set_release(sp, &status)) {
1665 			mde_perror(&status,
1666 			    "Need to release and take set to resolve names.");
1667 			md_exit(sp, 1);
1668 		}
1669 		metaflushdrivenames();
1670 		metaflushsetname(sp);
1671 		set_take_rval = meta_set_take(sp, &mhiargs,
1672 		    (flags | TAKE_RETAKE), usetag, &status);
1673 	}
1674 
1675 	if (set_take_rval == -1) {
1676 		mde_perror(&status, "");
1677 		if (mdismddberror(&status, MDE_DB_TAGDATA))
1678 			md_exit(sp, 2);
1679 		if (mdismddberror(&status, MDE_DB_ACCOK))
1680 			md_exit(sp, 3);
1681 		if (mdismddberror(&status, MDE_DB_STALE))
1682 			md_exit(sp, 66);
1683 		md_exit(sp, 1);
1684 	}
1685 	md_exit(sp, 0);
1686 }
1687 
1688 /*
1689  * Joins a node to a specific set or to all multinode disksets known
1690  * by this node.  If set is specified then caller should have verified
1691  * that the set is a multinode diskset.
1692  *
1693  * If an error occurs, metaset exits with a 1.
1694  * If there is no error, metaset exits with a 0.
1695  */
1696 static void
1697 parse_joinset(int argc, char **argv)
1698 {
1699 	int		c;
1700 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1701 	char		*sname = MD_LOCAL_NAME;
1702 	md_error_t	status = mdnullerror;
1703 	md_error_t	*ep = &status;
1704 	md_set_desc	*sd;
1705 	char		buf[BUFSIZ];
1706 	char		*p = buf;
1707 	set_t		max_sets, setno;
1708 	int		err, cumm_err = 0;
1709 	size_t		bufsz;
1710 
1711 	bufsz = sizeof (buf);
1712 	/* reset and parse args */
1713 	optind = 1;
1714 	opterr = 1;
1715 	while ((c = getopt(argc, argv, "Ms:j")) != -1) {
1716 		switch (c) {
1717 		case 'M':
1718 			break;
1719 		case 'j':
1720 			break;
1721 		case 's':
1722 			sname = optarg;
1723 			break;
1724 		default:
1725 			usage(sp, gettext("unknown options"));
1726 		}
1727 	}
1728 
1729 	argc -= optind;
1730 	argv += optind;
1731 
1732 	if (argc > 1)
1733 		usage(sp, gettext("too many args"));
1734 
1735 	/*
1736 	 * If no setname option was used, then join all disksets
1737 	 * that this node knows about.   Attempt to join all
1738 	 * disksets that this node knows about.
1739 	 *
1740 	 * Additional text is added to the error messages during
1741 	 * this section of code in order to help the user understand
1742 	 * why the 'join of all sets' failed and which set caused
1743 	 * the failure.
1744 	 */
1745 
1746 	/*
1747 	 * Hold local set lock throughout this call to keep
1748 	 * other actions from interfering (such as creating a new
1749 	 * set, etc.).
1750 	 */
1751 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1752 		mde_perror(ep, "");
1753 		md_exit(sp, 1);
1754 	}
1755 
1756 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1757 		mde_perror(ep, "");
1758 		md_exit(local_sp, 1);
1759 	}
1760 
1761 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1762 		/*
1763 		 * If no set name is given, then walk through all sets
1764 		 * on this node which could include:
1765 		 * 	- MN disksets
1766 		 *	- traditional disksets
1767 		 *	- non-existent disksets
1768 		 * Attempt to join the MN disksets.
1769 		 * If the join of one set fails, print out an error message
1770 		 * about that set and continue the walk.
1771 		 */
1772 		if ((max_sets = get_max_sets(ep)) == 0) {
1773 			mde_perror(ep, "");
1774 			md_exit(local_sp, 1);
1775 		}
1776 
1777 		/* Start walking through all possible disksets */
1778 		for (setno = 1; setno < max_sets; setno++) {
1779 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1780 				if (mdiserror(ep, MDE_NO_SET)) {
1781 					/* No set for this setno - continue */
1782 					mdclrerror(ep);
1783 					continue;
1784 				} else {
1785 					(void) sprintf(p, gettext(
1786 					"Unable to get set %d information"),
1787 					    setno);
1788 					mde_perror(ep, p);
1789 					cumm_err = 1;
1790 					mdclrerror(ep);
1791 					continue;
1792 				}
1793 			}
1794 
1795 			/* If setname is there, set desc should exist. */
1796 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1797 				(void) snprintf(p, bufsz, gettext(
1798 				    "Unable to get set %s desc information"),
1799 				    sp->setname);
1800 				mde_perror(ep, p);
1801 				cumm_err = 1;
1802 				mdclrerror(ep);
1803 				continue;
1804 			}
1805 
1806 			/* Only check MN disksets */
1807 			if (!MD_MNSET_DESC(sd)) {
1808 				continue;
1809 			}
1810 
1811 			/*
1812 			 * Return value of 0 is success.
1813 			 * Return value of -1 means a failure.
1814 			 * Return value of -2 means set could not be
1815 			 * joined, but shouldn't cause an error.
1816 			 * Reasons would be:
1817 			 * 	- no drives in set
1818 			 * 	- node already joined to set
1819 			 * Return value of -3 means joined stale set.
1820 			 * Can't check for all reasons here
1821 			 * since set isn't locked yet across all
1822 			 * nodes in the cluster.  The call
1823 			 * to libmeta routine, meta_set_join, will
1824 			 * lock across the cluster and perform
1825 			 * the checks.
1826 			 */
1827 			if ((err = meta_set_join(sp, ep)) == -1) {
1828 				/* Print error of diskset join failure */
1829 				(void) snprintf(p, bufsz,
1830 				    gettext("Join to diskset %s failed"),
1831 				    sp->setname);
1832 				mde_perror(ep, p);
1833 				cumm_err = 1;
1834 				mdclrerror(ep);
1835 				continue;
1836 			}
1837 
1838 			if (err == -3) {
1839 				/* Print error of diskset join failure */
1840 				(void) snprintf(p, bufsz,
1841 				    gettext("Joined to stale diskset %s"),
1842 				    sp->setname);
1843 				mde_perror(ep, p);
1844 				mdclrerror(ep);
1845 			}
1846 
1847 			mdclrerror(ep);
1848 		}
1849 
1850 		md_exit(local_sp, cumm_err);
1851 	}
1852 
1853 	/*
1854 	 * Code for a specific set is much simpler.
1855 	 * Error messages don't need extra text since specific setname
1856 	 * was used.
1857 	 * Don't need to lock the local set, just the specific set given.
1858 	 */
1859 	if ((sp = metasetname(sname, ep)) == NULL) {
1860 		mde_perror(ep, "");
1861 		md_exit(local_sp, 1);
1862 	}
1863 
1864 	/*
1865 	 * Fail command if meta_set_join returns -1.
1866 	 *
1867 	 * Return of 0 means that node joined set.
1868 	 *
1869 	 * Return of -2 means that node was unable to
1870 	 * join a set since that set had no drives
1871 	 * or that had already joined the set.  No
1872 	 * need to fail the command for these reasons.
1873 	 *
1874 	 * Return of -3 means that set is stale.
1875 	 * Return a value of 66 to historically match traditional disksets.
1876 	 */
1877 	if ((err = meta_set_join(sp, ep)) == -1) {
1878 		mde_perror(&status, "");
1879 		md_exit(local_sp, 1);
1880 	}
1881 
1882 	if (err == -3) {
1883 		/* Print error of diskset join failure */
1884 		(void) snprintf(p, bufsz,
1885 		    gettext("Joined to stale diskset %s"),
1886 		    sp->setname);
1887 		mde_perror(&status, "");
1888 		md_exit(local_sp, 66);
1889 	}
1890 
1891 	md_exit(local_sp, 0);
1892 }
1893 
1894 /*
1895  * Withdraws a node from a specific set or from all multinode disksets known
1896  * by this node.  If set is specified then caller should have verified
1897  * that the set is a multinode diskset.
1898  *
1899  * If an error occurs, metaset exits with a 1.
1900  * If there is no error, metaset exits with a 0.
1901  */
1902 static void
1903 parse_withdrawset(int argc, char **argv)
1904 {
1905 	int		c;
1906 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1907 	char		*sname = MD_LOCAL_NAME;
1908 	md_error_t	status = mdnullerror;
1909 	md_error_t	*ep = &status;
1910 	char		buf[BUFSIZ];
1911 	char		*p = buf;
1912 	md_set_desc	*sd;
1913 	set_t		max_sets, setno;
1914 	int		err, cumm_err = 0;
1915 	size_t		bufsz;
1916 
1917 	bufsz = sizeof (buf);
1918 	/* reset and parse args */
1919 	optind = 1;
1920 	opterr = 1;
1921 	while ((c = getopt(argc, argv, "Ms:w")) != -1) {
1922 		switch (c) {
1923 		case 'M':
1924 			break;
1925 		case 'w':
1926 			break;
1927 		case 's':
1928 			sname = optarg;
1929 			break;
1930 		default:
1931 			usage(sp, gettext("unknown options"));
1932 		}
1933 	}
1934 
1935 	argc -= optind;
1936 	argv += optind;
1937 
1938 	if (argc > 1)
1939 		usage(sp, gettext("too many args"));
1940 
1941 	/*
1942 	 * If no setname option was used, then withdraw from all disksets
1943 	 * that this node knows about.
1944 	 *
1945 	 * Additional text is added to the error messages during
1946 	 * this section of code in order to help the user understand
1947 	 * why the 'withdraw from all sets' failed and which set caused
1948 	 * the failure.
1949 	 */
1950 
1951 	/*
1952 	 * Hold local set lock throughout this call to keep
1953 	 * other actions from interfering (such as creating a new
1954 	 * set, etc.).
1955 	 */
1956 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1957 		mde_perror(ep, "");
1958 		md_exit(sp, 1);
1959 	}
1960 
1961 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1962 		mde_perror(ep, "");
1963 		md_exit(local_sp, 1);
1964 	}
1965 
1966 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1967 		/*
1968 		 * If no set name is given, then walk through all sets
1969 		 * on this node which could include:
1970 		 * 	- MN disksets
1971 		 *	- traditional disksets
1972 		 *	- non-existent disksets
1973 		 * Attempt to withdraw from the MN disksets.
1974 		 * If the withdraw of one set fails, print out an error
1975 		 * message about that set and continue the walk.
1976 		 */
1977 		if ((max_sets = get_max_sets(ep)) == 0) {
1978 			mde_perror(ep, "");
1979 			md_exit(local_sp, 1);
1980 		}
1981 
1982 		/* Start walking through all possible disksets */
1983 		for (setno = 1; setno < max_sets; setno++) {
1984 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1985 				if (mdiserror(ep, MDE_NO_SET)) {
1986 					/* No set for this setno - continue */
1987 					mdclrerror(ep);
1988 					continue;
1989 				} else {
1990 					(void) sprintf(p, gettext(
1991 					    "Unable to get set %d information"),
1992 					    setno);
1993 					mde_perror(ep, p);
1994 					cumm_err = 1;
1995 					mdclrerror(ep);
1996 					continue;
1997 				}
1998 			}
1999 
2000 			/* If setname is there, set desc should exist. */
2001 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2002 				(void) snprintf(p, bufsz, gettext(
2003 				    "Unable to get set %s desc information"),
2004 				    sp->setname);
2005 				mde_perror(ep, p);
2006 				cumm_err = 1;
2007 				mdclrerror(ep);
2008 				continue;
2009 			}
2010 
2011 			/* Only check MN disksets */
2012 			if (!MD_MNSET_DESC(sd)) {
2013 				continue;
2014 			}
2015 
2016 			/*
2017 			 * Return value of 0 is success.
2018 			 * Return value of -1 means a failure.
2019 			 * Return value of -2 means set could not be
2020 			 * withdrawn from, but this shouldn't cause
2021 			 * an error.  Reasons would be:
2022 			 * 	- no drives in set
2023 			 * 	- node already withdrawn from set
2024 			 * Can't check for all reasons here
2025 			 * since set isn't locked yet across all
2026 			 * nodes in the cluster.  The call
2027 			 * to libmeta routine, meta_set_withdraw, will
2028 			 * lock across the cluster and perform
2029 			 * the checks.
2030 			 */
2031 			if ((err = meta_set_withdraw(sp, ep)) == -1) {
2032 				/* Print error of diskset withdraw failure */
2033 				(void) snprintf(p, bufsz,
2034 				    gettext("Withdraw from diskset %s failed"),
2035 				    sp->setname);
2036 				mde_perror(ep, p);
2037 				mdclrerror(ep);
2038 				cumm_err = 1;
2039 				continue;
2040 			}
2041 
2042 			if (err == -2) {
2043 				mdclrerror(ep);
2044 				continue;
2045 			}
2046 
2047 			mdclrerror(ep);
2048 		}
2049 		md_exit(local_sp, cumm_err);
2050 	}
2051 
2052 
2053 	/*
2054 	 * Code for a specific set is much simpler.
2055 	 * Error messages don't need extra text since specific setname
2056 	 * was used.
2057 	 * Don't need to lock the local set, just the specific set given.
2058 	 */
2059 	if ((sp = metasetname(sname, ep)) == NULL) {
2060 		mde_perror(ep, "");
2061 		md_exit(local_sp, 1);
2062 	}
2063 
2064 	/*
2065 	 * Fail command if meta_set_withdraw returns -1.
2066 	 *
2067 	 * Return of 0 means that node withdrew from set.
2068 	 *
2069 	 * Return of -2 means that node was unable to
2070 	 * withdraw from a set since that set had no drives
2071 	 * or node was not joined to set.  No
2072 	 * need to fail the command for these reasons.
2073 	 */
2074 	if (meta_set_withdraw(sp, ep) == -1) {
2075 		mde_perror(&status, "");
2076 		md_exit(local_sp, 1);
2077 	}
2078 
2079 	md_exit(local_sp, 0);
2080 }
2081 
2082 static void
2083 parse_cluster(int argc, char **argv, int multi_node)
2084 {
2085 	int			c, error, new_argc, x;
2086 	enum cluster_cmd	cmd = ccnotspecified;
2087 	char			*hostname = SDSSC_PROXY_PRIMARY;
2088 	char			*argument = NULL;
2089 	char			*sname = MD_LOCAL_NAME;
2090 	char			primary_node[SDSSC_NODE_NAME_LEN];
2091 	char			**new_argv = NULL;
2092 	char			**np = NULL;
2093 	mdsetname_t		*sp = NULL;
2094 	md_error_t		status = mdnullerror;
2095 	md_error_t		*ep = &status;
2096 
2097 	/* reset and parse args */
2098 	optind = 1;
2099 	opterr = 1;
2100 	while ((c = getopt(argc, argv, "C:s:h:ftu:yr")) != -1) {
2101 		switch (c) {
2102 		case 'C':
2103 			if (cmd != ccnotspecified) {
2104 				md_exit(sp, -1);
2105 			}
2106 			argument = optarg;
2107 
2108 			if (strcmp(argument, "disksin") == 0) {
2109 				cmd = clusterdisksin;
2110 			} else if (strcmp(argument, "version") == 0) {
2111 				cmd = clusterversion;
2112 			} else if (strcmp(argument, "release") == 0) {
2113 				cmd = clusterrelease;
2114 			} else if (strcmp(argument, "take") == 0) {
2115 				cmd = clustertake;
2116 			} else if (strcmp(argument, "proxy") == 0) {
2117 				cmd = clusterproxy;
2118 			} else if (strcmp(argument, "purge") == 0) {
2119 				cmd = clusterpurge;
2120 			} else {
2121 				md_exit(sp, -1);
2122 			}
2123 
2124 			break;
2125 
2126 		case 'h':
2127 			hostname = optarg;
2128 			break;
2129 
2130 		case 's':
2131 			sname = optarg;
2132 			break;
2133 
2134 		case 'f':
2135 		case 't':
2136 		case 'u':
2137 		case 'y':
2138 		case 'r':
2139 			break;
2140 
2141 		default:
2142 			md_exit(sp, -1);
2143 		}
2144 	}
2145 
2146 	/* Now call the appropriate command function. */
2147 	switch (cmd) {
2148 	case clusterversion:
2149 		printclusterversion();
2150 		break;
2151 
2152 	case clusterdisksin:
2153 		if (printdisksin(sname, ep)) {
2154 			md_exit(sp, -1);
2155 		}
2156 		break;
2157 
2158 	case clusterrelease:
2159 		if (multi_node) {
2160 			usage(sp, gettext(
2161 			    "-C release is not allowed on multi-owner"
2162 			    " disksets"));
2163 		}
2164 		parse_releaseset(argc, argv);
2165 		break;
2166 
2167 	case clustertake:
2168 		if (multi_node) {
2169 			usage(sp, gettext(
2170 			    "-C take is not allowed on multi-owner disksets"));
2171 		}
2172 		parse_takeset(argc, argv);
2173 		break;
2174 
2175 	case clusterproxy:
2176 		if (multi_node) {
2177 			usage(sp, gettext(
2178 			    "-C proxy is not allowed on multi-owner disksets"));
2179 		}
2180 
2181 		if ((new_argv = calloc(argc, sizeof (char *))) == NULL) {
2182 			(void) printf(gettext("Out of memory\n"));
2183 			md_exit(sp, 1);
2184 		}
2185 
2186 		np = new_argv;
2187 		new_argc = 0;
2188 		(void) memset(primary_node, '\0', SDSSC_NODE_NAME_LEN);
2189 
2190 		for (x = 0; x < argc; x++) {
2191 			if (strcmp(argv[x], "-C") == 0) {
2192 
2193 				/*
2194 				 * Need to skip the '-C proxy' args so
2195 				 * just increase x by one and the work is
2196 				 * done.
2197 				 */
2198 				x++;
2199 			} else {
2200 				*np++ = strdup(argv[x]);
2201 				new_argc++;
2202 			}
2203 		}
2204 
2205 		switch (sdssc_get_primary_host(sname, primary_node,
2206 		    SDSSC_NODE_NAME_LEN)) {
2207 		case SDSSC_ERROR:
2208 			md_exit(sp, 1);
2209 			break;
2210 
2211 		case SDSSC_NO_SERVICE:
2212 			if (hostname != SDSSC_PROXY_PRIMARY) {
2213 				(void) strlcpy(primary_node, hostname,
2214 				    SDSSC_NODE_NAME_LEN);
2215 			}
2216 			break;
2217 		}
2218 
2219 		if (sdssc_cmd_proxy(new_argc, new_argv,
2220 		    primary_node[0] == '\0' ? SDSSC_PROXY_PRIMARY :
2221 		    primary_node, &error) == SDSSC_PROXY_DONE) {
2222 			md_exit(sp, error);
2223 		} else {
2224 			(void) printf(gettext(
2225 			    "Couldn't proxy command\n"));
2226 			md_exit(sp, 1);
2227 		}
2228 		break;
2229 
2230 	case clusterpurge:
2231 		parse_purge(argc, argv);
2232 		break;
2233 
2234 	default:
2235 		break;
2236 	}
2237 
2238 	md_exit(sp, 0);
2239 }
2240 
2241 /*
2242  * parse args and do it
2243  */
2244 int
2245 main(int argc, char *argv[])
2246 {
2247 	enum metaset_cmd	cmd = notspecified;
2248 	md_error_t		status = mdnullerror;
2249 	md_error_t		*ep = &status;
2250 	mdsetname_t		*sp = NULL;
2251 	char			*hostname = SDSSC_PROXY_PRIMARY;
2252 	char			*sname = MD_LOCAL_NAME;
2253 	char			*auto_take_option = NULL;
2254 	char			primary_node[SDSSC_NODE_NAME_LEN];
2255 	int			error, c, stat;
2256 	int			auto_take = FALSE;
2257 	md_set_desc		*sd;
2258 	int			mflag = 0;
2259 	int			multi_node = 0;
2260 	rval_e			sdssc_res;
2261 
2262 	/*
2263 	 * Get the locale set up before calling any other routines
2264 	 * with messages to ouput.  Just in case we're not in a build
2265 	 * environment, make sure that TEXT_DOMAIN gets set to
2266 	 * something.
2267 	 */
2268 #if !defined(TEXT_DOMAIN)
2269 #define	TEXT_DOMAIN "SYS_TEST"
2270 #endif
2271 	(void) setlocale(LC_ALL, "");
2272 	(void) textdomain(TEXT_DOMAIN);
2273 
2274 	sdssc_res = sdssc_bind_library();
2275 	if (sdssc_res == SDSSC_ERROR) {
2276 		(void) printf(gettext(
2277 		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
2278 		exit(1);
2279 	}
2280 
2281 	/* initialize */
2282 	if (md_init(argc, argv, 0, 1, ep) != 0) {
2283 		mde_perror(ep, "");
2284 		md_exit(sp, 1);
2285 	}
2286 
2287 	optind = 1;
2288 	opterr = 1;
2289 
2290 	/*
2291 	 * NOTE: The "C" option is strictly for cluster use. it is not
2292 	 * and should not be documented for the customer. - JST
2293 	 */
2294 	while ((c = getopt(argc, argv, "C:MaA:bdfh:jl:Lm:oPqrs:tu:wy?"))
2295 	    != -1) {
2296 		switch (c) {
2297 		case 'M':
2298 			mflag = 1;
2299 			break;
2300 		case 'A':
2301 			auto_take = TRUE;
2302 			if (optarg == NULL || !(strcmp(optarg, "enable") == 0 ||
2303 			    strcmp(optarg, "disable") == 0))
2304 				usage(sp, gettext(
2305 				    "-A: enable or disable must be specified"));
2306 			auto_take_option = optarg;
2307 			break;
2308 		case 'a':
2309 			if (cmd != notspecified) {
2310 				usage(sp, gettext(
2311 				    "conflicting options"));
2312 			}
2313 			cmd = add;
2314 			break;
2315 		case 'b':
2316 			if (cmd != notspecified) {
2317 				usage(sp, gettext(
2318 				    "conflicting options"));
2319 			}
2320 			cmd = balance;
2321 			break;
2322 		case 'd':
2323 			if (cmd != notspecified) {
2324 				usage(sp, gettext(
2325 				    "conflicting options"));
2326 			}
2327 			cmd = delete;
2328 			break;
2329 		case 'C':	/* cluster commands */
2330 			if (cmd != notspecified) {
2331 				md_exit(sp, -1);    /* conflicting options */
2332 			}
2333 			cmd = cluster;
2334 			break;
2335 		case 'f':
2336 			break;
2337 		case 'h':
2338 			hostname = optarg;
2339 			break;
2340 		case 'j':
2341 			if (cmd != notspecified) {
2342 				usage(sp, gettext(
2343 				    "conflicting options"));
2344 			}
2345 			cmd = join;
2346 			break;
2347 		case 'l':
2348 			break;
2349 		case 'L':
2350 			break;
2351 		case 'm':
2352 			break;
2353 		case 'o':
2354 			if (cmd != notspecified) {
2355 				usage(sp, gettext(
2356 				    "conflicting options"));
2357 			}
2358 			cmd = isowner;
2359 			break;
2360 		case 'P':
2361 			if (cmd != notspecified) {
2362 				usage(sp, gettext(
2363 				    "conflicting options"));
2364 			}
2365 			cmd = purge;
2366 			break;
2367 		case 'q':
2368 			if (cmd != notspecified) {
2369 				usage(sp, gettext(
2370 				    "conflicting options"));
2371 			}
2372 			cmd = query;
2373 			break;
2374 		case 'r':
2375 			if (cmd != notspecified) {
2376 				usage(sp, gettext(
2377 				    "conflicting options"));
2378 			}
2379 			cmd = release;
2380 			break;
2381 		case 's':
2382 			sname = optarg;
2383 			break;
2384 		case 't':
2385 			if (cmd != notspecified) {
2386 				usage(sp, gettext(
2387 				    "conflicting options"));
2388 			}
2389 			cmd = take;
2390 			break;
2391 		case 'u':
2392 			break;
2393 		case 'w':
2394 			if (cmd != notspecified) {
2395 				usage(sp, gettext(
2396 				    "conflicting options"));
2397 			}
2398 			cmd = withdraw;
2399 			break;
2400 		case 'y':
2401 			break;
2402 		case '?':
2403 			if (optopt == '?')
2404 				usage(sp, NULL);
2405 			/*FALLTHROUGH*/
2406 		default:
2407 			if (cmd == cluster) {    /* cluster is silent */
2408 				md_exit(sp, -1);
2409 			} else {
2410 				usage(sp, gettext(
2411 				    "unknown command"));
2412 			}
2413 		}
2414 	}
2415 
2416 	/* check if suncluster is installed and -A enable specified */
2417 	if (auto_take && sdssc_res != SDSSC_NOT_BOUND &&
2418 	    strcmp(auto_take_option, "enable") == 0) {
2419 		md_eprintf(gettext(
2420 		    "cannot enable auto-take when SunCluster is installed\n"));
2421 		md_exit(sp, 1);
2422 	}
2423 
2424 	/*
2425 	 * At this point we know that if the -A enable option is specified
2426 	 * for an auto-take diskset that SC is not installed on the machine, so
2427 	 * all of the sdssc calls will just be no-ops.
2428 	 */
2429 
2430 	/* list sets */
2431 	if (cmd == notspecified && auto_take == FALSE) {
2432 		parse_printset(argc, argv);
2433 		/*NOTREACHED*/
2434 	}
2435 
2436 	if (meta_check_root(ep) != 0) {
2437 		mde_perror(ep, "");
2438 		md_exit(sp, 1);
2439 	}
2440 
2441 	/* snarf MDDB */
2442 	if (meta_setup_db_locations(ep) != 0) {
2443 		mde_perror(ep, "");
2444 		md_exit(sp, 1);
2445 	}
2446 
2447 	/*
2448 	 * If sname is a diskset - check for multi_node.
2449 	 * It is possible for sname to not exist.
2450 	 */
2451 	if (strcmp(sname, MD_LOCAL_NAME)) {
2452 		if ((sp = metasetname(sname, ep)) != NULL) {
2453 			/* Set exists - check for MN diskset */
2454 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2455 				mde_perror(ep, "");
2456 				md_exit(sp, 1);
2457 			}
2458 			if (MD_MNSET_DESC(sd)) {
2459 				/*
2460 				 * If a MN diskset always set multi_node
2461 				 * regardless of whether the -M option was
2462 				 * used or not (mflag).
2463 				 */
2464 				multi_node = 1;
2465 			} else {
2466 				/*
2467 				 * If a traditional diskset, mflag must
2468 				 * not be set.
2469 				 */
2470 				if (mflag) {
2471 					usage(sp, gettext(
2472 					    "-M option only allowed "
2473 					    "on multi-owner diskset"));
2474 				}
2475 			}
2476 		} else {
2477 			/*
2478 			 * Set name does not exist, set multi_node
2479 			 * based on -M option.
2480 			 */
2481 			if (mflag) {
2482 				multi_node = 1;
2483 			}
2484 		}
2485 	}
2486 
2487 	if (auto_take && multi_node) {
2488 		/* Can't mix multinode and auto-take on a diskset */
2489 		usage(sp,
2490 		    gettext("-A option not allowed on multi-owner diskset"));
2491 	}
2492 
2493 	/*
2494 	 * MN disksets don't use DCS clustering services, so
2495 	 * do not get primary_node for MN diskset since no command
2496 	 * proxying is done to Primary cluster node.  Do not proxy
2497 	 * MN diskset commands of join and withdraw when issued without
2498 	 * a valid setname.
2499 	 * For traditional disksets: proxy all commands except a take
2500 	 * and release.  Use first host listed as the host to send the
2501 	 * command to if there isn't already a primary
2502 	 */
2503 	if (strcmp(sname, MD_LOCAL_NAME) && (multi_node == 0) &&
2504 	    (cmd != take) && (cmd != release) &&
2505 	    (cmd != cluster) && (cmd != join) &&
2506 	    (cmd != withdraw) && (cmd != purge)) {
2507 		stat = sdssc_get_primary_host(sname, primary_node,
2508 		    SDSSC_NODE_NAME_LEN);
2509 		switch (stat) {
2510 			case SDSSC_ERROR:
2511 				return (0);
2512 
2513 			case SDSSC_NO_SERVICE:
2514 				if (hostname != SDSSC_PROXY_PRIMARY) {
2515 					(void) strlcpy(primary_node, hostname,
2516 					    SDSSC_NODE_NAME_LEN);
2517 				} else {
2518 					(void) memset(primary_node, '\0',
2519 					    SDSSC_NODE_NAME_LEN);
2520 				}
2521 				break;
2522 		}
2523 
2524 		/*
2525 		 * We've got a complicated decision here regarding
2526 		 * the hostname. If we didn't get a primary host
2527 		 * and a host name wasn't supplied on the command line
2528 		 * then we need to revert to SDSSC_PROXY_PRIMARY. Otherwise
2529 		 * use what's been found.
2530 		 */
2531 		if (sdssc_cmd_proxy(argc, argv,
2532 		    primary_node[0] == '\0' ?
2533 		    SDSSC_PROXY_PRIMARY : primary_node,
2534 		    &error) == SDSSC_PROXY_DONE) {
2535 			exit(error);
2536 		}
2537 	}
2538 
2539 	/* cluster-specific commands */
2540 	if (cmd == cluster) {
2541 		parse_cluster(argc, argv, multi_node);
2542 		/*NOTREACHED*/
2543 	}
2544 
2545 	/* join MultiNode diskset */
2546 	if (cmd == join) {
2547 		/*
2548 		 * If diskset specified, verify that it exists
2549 		 * and is a multinode diskset.
2550 		 */
2551 		if (strcmp(sname, MD_LOCAL_NAME)) {
2552 			if ((sp = metasetname(sname, ep)) == NULL) {
2553 				mde_perror(ep, "");
2554 				md_exit(sp, 1);
2555 			}
2556 
2557 			if (!multi_node) {
2558 				usage(sp, gettext(
2559 				    "-j option only allowed on "
2560 				    "multi-owner diskset"));
2561 			}
2562 		}
2563 		/*
2564 		 * Start mddoors daemon here.
2565 		 * mddoors itself takes care there will be only one
2566 		 * instance running, so starting it twice won't hurt
2567 		 */
2568 		(void) pclose(popen("/usr/lib/lvm/mddoors", "w"));
2569 		parse_joinset(argc, argv);
2570 		/*NOTREACHED*/
2571 	}
2572 
2573 	/* withdraw from MultiNode diskset */
2574 	if (cmd == withdraw) {
2575 		/*
2576 		 * If diskset specified, verify that it exists
2577 		 * and is a multinode diskset.
2578 		 */
2579 		if (strcmp(sname, MD_LOCAL_NAME)) {
2580 			if ((sp = metasetname(sname, ep)) == NULL) {
2581 				mde_perror(ep, "");
2582 				md_exit(sp, 1);
2583 			}
2584 
2585 			if (!multi_node) {
2586 				usage(sp, gettext(
2587 				    "-w option only allowed on "
2588 				    "multi-owner diskset"));
2589 			}
2590 		}
2591 		parse_withdrawset(argc, argv);
2592 		/*NOTREACHED*/
2593 	}
2594 
2595 	/* must have set for everything else */
2596 	if (strcmp(sname, MD_LOCAL_NAME) == 0)
2597 		usage(sp, gettext("setname must be specified"));
2598 
2599 	/* add hosts or drives */
2600 	if (cmd == add) {
2601 		/*
2602 		 * In the multi node case start mddoors daemon.
2603 		 * mddoors itself takes care there will be
2604 		 * only one instance running, so starting it twice won't hurt
2605 		 */
2606 		if (multi_node) {
2607 			(void) pclose(popen("/usr/lib/lvm/mddoors", "w"));
2608 		}
2609 
2610 		parse_add(argc, argv);
2611 		/*NOTREACHED*/
2612 	}
2613 
2614 	/* re-balance the replicas */
2615 	if (cmd == balance) {
2616 		parse_balance(argc, argv);
2617 		/*NOTREACHED*/
2618 	}
2619 
2620 	/* delete hosts or drives */
2621 	if (cmd == delete) {
2622 		parse_del(argc, argv);
2623 		/*NOTREACHED*/
2624 	}
2625 
2626 	/* check ownership */
2627 	if (cmd == isowner) {
2628 		parse_isowner(argc, argv);
2629 		/*NOTREACHED*/
2630 	}
2631 
2632 	/* purge the diskset */
2633 	if (cmd == purge) {
2634 		parse_purge(argc, argv);
2635 		/*NOTREACHED*/
2636 	}
2637 
2638 	/* query for data marks */
2639 	if (cmd == query) {
2640 		parse_query(argc, argv);
2641 		/*NOTREACHED*/
2642 	}
2643 
2644 	/* release ownership */
2645 	if (cmd == release) {
2646 		if (multi_node) {
2647 			/* Can't release multinode diskset */
2648 			usage(sp, gettext(
2649 			    "-r option not allowed on multi-owner diskset"));
2650 		} else {
2651 			parse_releaseset(argc, argv);
2652 			/*NOTREACHED*/
2653 		}
2654 	}
2655 
2656 	/* take ownership */
2657 	if (cmd == take) {
2658 		if (multi_node) {
2659 			/* Can't take multinode diskset */
2660 			usage(sp, gettext(
2661 			    "-t option not allowed on multi-owner diskset"));
2662 		} else {
2663 			parse_takeset(argc, argv);
2664 			/*NOTREACHED*/
2665 		}
2666 	}
2667 
2668 	/* take ownership of auto-take sets */
2669 	if (auto_take) {
2670 		parse_autotake(argc, argv);
2671 		/*NOTREACHED*/
2672 	}
2673 
2674 	/*NOTREACHED*/
2675 	return (0);
2676 }
2677