xref: /titanic_41/usr/src/cmd/lvm/util/metaset.c (revision fffafeb2cc01732fd6a28ed530e4424094685ece)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Metadevice diskset utility.
27  */
28 
29 #include <meta.h>
30 #include <sys/lvm/md_mddb.h>
31 #include <sdssc.h>
32 
33 enum metaset_cmd {
34 	notspecified,
35 	add,
36 	balance,
37 	delete,
38 	cluster,
39 	isowner,
40 	purge,
41 	query,
42 	release,
43 	take,
44 	join,			/* Join a multinode diskset */
45 	withdraw		/* Withdraw from a multinode diskset */
46 };
47 
48 enum cluster_cmd {
49 	ccnotspecified,
50 	clusterversion,		/* Return the version of the cluster I/F */
51 	clusterdisksin,		/* List disks in a given diskset */
52 	clustertake,		/* back door for Cluster take */
53 	clusterrelease,		/* ditto */
54 	clusterpurge,		/* back door for Cluster purge */
55 	clusterproxy		/* proxy the args after '--' to primary */
56 };
57 
58 static void
59 usage(
60 	mdsetname_t	*sp,
61 	char		*string)
62 {
63 	if ((string != NULL) && (*string != '\0'))
64 		md_eprintf("%s\n", string);
65 	(void) fprintf(stderr, gettext(
66 	    "usage:\t%s -s setname -a [-A enable | disable] -h hostname ...\n"
67 	    "	%s -s setname -a [-M] -h hostname ...\n"
68 	    "	%s -s setname -a [-M] [-l length] [-L] drivename ...\n"
69 	    "	%s -s setname -d [-M] -h hostname ...\n"
70 	    "	%s -s setname -d [-M] -f -h all-hostnames\n"
71 	    "	%s -s setname -d [-M] [-f] drivename ...\n"
72 	    "	%s -s setname -d [-M] [-f] hostname ...\n"
73 	    "	%s -s setname -A enable | disable\n"
74 	    "	%s -s setname -t [-f]\n"
75 	    "	%s -s setname -r\n"
76 	    "	%s [-s setname] -j [-M]\n"
77 	    "	%s [-s setname] -w [-M]\n"
78 	    "	%s -s setname -P [-M]\n"
79 	    "	%s -s setname -b [-M]\n"
80 	    "	%s -s setname -o [-M] [-h hostname]\n"
81 	    "	%s [-s setname]\n"
82 	    "\n"
83 	    "		hostname = contents of /etc/nodename\n"
84 	    "		drivename = cNtNdN no slice\n"
85 	    "		[-M] for multi-owner set is optional except"
86 	    " on set creation\n"),
87 	    myname, myname, myname, myname, myname, myname, myname, myname,
88 	    myname, myname, myname, myname, myname, myname, myname, myname);
89 	md_exit(sp, (string == NULL) ? 0 : 1);
90 }
91 
92 /*
93  * The svm.sync rc script relies heavily on the metaset output.
94  * Any changes to the metaset output MUST verify that the rc script
95  * does not break. Not doing so may potentially leave the system
96  * unusable. You have been WARNED.
97  */
98 static int
99 printset(mdsetname_t *sp, md_error_t *ep)
100 {
101 	int			i, j;
102 	md_set_desc		*sd;
103 	md_drive_desc		*dd, *p;
104 	int			max_meds;
105 	md_mnnode_desc		*nd;
106 
107 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
108 		return (-1);
109 
110 	/*
111 	 * Only get set owner information for traditional diskset.
112 	 * This set owner information is stored in the node records
113 	 * for a MN diskset.
114 	 */
115 	if (!(MD_MNSET_DESC(sd))) {
116 		if (metaget_setownership(sp, ep) == -1)
117 			return (-1);
118 	}
119 
120 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
121 	    ep)) == NULL) && !mdisok(ep))
122 		return (-1);
123 
124 	if (MD_MNSET_DESC(sd)) {
125 		(void) printf(gettext(
126 		"\nMulti-owner Set name = %s, Set number = %d, Master = %s\n"),
127 		    sp->setname, sp->setno, sd->sd_mn_master_nodenm);
128 		if ((sd->sd_mn_master_nodeid == MD_MN_INVALID_NID) &&
129 		    (dd != NULL)) {
130 			(void) printf(gettext(
131 			    "Master and owner information unavailable "
132 			    "until joined (metaset -j)\n"));
133 		}
134 	} else {
135 		(void) printf(gettext(
136 		    "\nSet name = %s, Set number = %d\n"),
137 		    sp->setname, sp->setno);
138 	}
139 
140 	if (MD_MNSET_DESC(sd)) {
141 		(void) printf(gettext("\n%-19.19s %-14.14s %-6.6s\n"),
142 		    gettext("Host"), gettext("Owner"), gettext("Member"));
143 		nd = sd->sd_nodelist;
144 		while (nd) {
145 			/*
146 			 * Don't print nodes that aren't ok since they may be
147 			 * removed from config during a reconfig cycle.  If a
148 			 * node was being added to a diskset and the entire
149 			 * cluster went down but the node being added was unable
150 			 * to reboot, there's no way to know if that node had
151 			 * its own node record set to OK or not.  So, node
152 			 * record is left in ADD state during reconfig cycle.
153 			 * When that node reboots and returns to the cluster,
154 			 * the reconfig cycle will either remove the node
155 			 * record (if not marked OK on that node) or will mark
156 			 * it OK on all nodes.
157 			 * It is very important to only remove a node record
158 			 * from the other nodes when that node record is not
159 			 * marked OK on its own node - otherwise, different
160 			 * nodes would have different nodelists possibly
161 			 * causing different nodes to to choose different
162 			 * masters.
163 			 *
164 			 * Standard hostname field is 17 bytes but metaset
165 			 * will display up to MD_MAX_NODENAME, defined in
166 			 * meta_basic.h
167 			 */
168 			if (!(nd->nd_flags & MD_MN_NODE_OK)) {
169 				nd = nd->nd_next;
170 				continue;
171 			}
172 			if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
173 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
174 				(void) printf(
175 				    gettext("  %-17.*s  %-12.12s  %-4.4s\n"),
176 				    MD_MAX_NODENAME,
177 				    nd->nd_nodename, gettext("multi-owner"),
178 				    gettext("Yes"));
179 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
180 			    (nd->nd_flags & MD_MN_NODE_OWN)) {
181 				/* Should never be able to happen */
182 				(void) printf(
183 				    gettext("  %-17.*s  %-12.12s  %-4.4s\n"),
184 				    MD_MAX_NODENAME,
185 				    nd->nd_nodename, gettext("multi-owner"),
186 				    gettext("No"));
187 			} else if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
188 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
189 				(void) printf(
190 				    gettext("  %-17.*s  %-12.12s  %-4.4s\n"),
191 				    MD_MAX_NODENAME,
192 				    nd->nd_nodename, gettext(""),
193 				    gettext("Yes"));
194 			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
195 			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
196 				(void) printf(
197 				    gettext("  %-17.*s  %-12.12s  %-4.4s\n"),
198 				    MD_MAX_NODENAME,
199 				    nd->nd_nodename, gettext(""),
200 				    gettext("No"));
201 			}
202 			nd = nd->nd_next;
203 		}
204 	} else {
205 		(void) printf("\n%-19.19s %-5.5s\n",
206 		    gettext("Host"), gettext("Owner"));
207 		for (i = 0; i < MD_MAXSIDES; i++) {
208 			/* Skip empty slots */
209 			if (sd->sd_nodes[i][0] == '\0')
210 				continue;
211 
212 			/*
213 			 * Standard hostname field is 17 bytes but metaset will
214 			 * display up to MD_MAX_NODENAME, def in meta_basic.h
215 			 */
216 			(void) printf("  %-17.*s  %s\n", MD_MAX_NODENAME,
217 			    sd->sd_nodes[i], (sd->sd_flags & MD_SR_AUTO_TAKE ?
218 			    (sd->sd_isown[i] ? gettext("Yes (auto)") :
219 			    gettext("No (auto)"))
220 			    : (sd->sd_isown[i] ? gettext("Yes") : "")));
221 		}
222 	}
223 
224 	if (sd->sd_med.n_cnt > 0)
225 		(void) printf("\n%-19.19s %-7.7s\n",
226 		    gettext("Mediator Host(s)"), gettext("Aliases"));
227 
228 	if ((max_meds = get_max_meds(ep)) == 0)
229 		return (-1);
230 
231 	for (i = 0; i < max_meds; i++) {
232 		if (sd->sd_med.n_lst[i].a_cnt == 0)
233 			continue;
234 		/*
235 		 * Standard hostname field is 17 bytes but metaset will
236 		 * display up to MD_MAX_NODENAME, def in meta_basic.h
237 		 */
238 		(void) printf("  %-17.*s   ", MD_MAX_NODENAME,
239 		    sd->sd_med.n_lst[i].a_nm[0]);
240 		for (j = 1; j < sd->sd_med.n_lst[i].a_cnt; j++) {
241 			(void) printf("%s", sd->sd_med.n_lst[i].a_nm[j]);
242 			if (sd->sd_med.n_lst[i].a_cnt - j > 1)
243 				(void) printf(gettext(", "));
244 		}
245 		(void) printf("\n");
246 	}
247 
248 	if (dd) {
249 		int	len = 0;
250 
251 
252 		/*
253 		 * Building a format string on the fly that will
254 		 * be used in (f)printf. This allows the length
255 		 * of the ctd to vary from small to large without
256 		 * looking horrible.
257 		 */
258 		for (p = dd; p != NULL; p = p->dd_next)
259 			len = max(len, strlen(p->dd_dnp->cname));
260 
261 		len += 2;
262 		(void) printf("\n%-*.*s %-5.5s\n", len, len,
263 		    gettext("Drive"),
264 		    gettext("Dbase"));
265 		for (p = dd; p != NULL; p = p->dd_next) {
266 			(void) printf("\n%-*.*s %-5.5s\n", len, len,
267 			    p->dd_dnp->cname,
268 			    (p->dd_dbcnt ? gettext("Yes") :
269 			    gettext("No")));
270 		}
271 	}
272 
273 	return (0);
274 }
275 
276 static int
277 printsets(mdsetname_t *sp, md_error_t *ep)
278 {
279 	int			i;
280 	mdsetname_t		*sp1;
281 	set_t			max_sets;
282 
283 	/*
284 	 * print setname given.
285 	 */
286 	if (! metaislocalset(sp)) {
287 		if (printset(sp, ep))
288 			return (-1);
289 		return (0);
290 	}
291 
292 	if ((max_sets = get_max_sets(ep)) == 0)
293 		return (-1);
294 
295 	/*
296 	 * Print all known sets
297 	 */
298 	for (i = 1; i < max_sets; i++) {
299 		if ((sp1 = metasetnosetname(i, ep)) == NULL) {
300 			if (! mdiserror(ep, MDE_NO_SET))
301 				break;
302 			mdclrerror(ep);
303 			continue;
304 		}
305 
306 		if (printset(sp1, ep))
307 			break;
308 	}
309 	if (! mdisok(ep))
310 		return (-1);
311 
312 	return (0);
313 }
314 
315 /*
316  * Print the current versionn of the cluster contract private interface.
317  */
318 static void
319 printclusterversion()
320 {
321 	(void) printf("%s\n", METASETIFVERSION);
322 }
323 
324 /*
325  * Print the disks that make up the given disk set. This is used
326  * exclusively by Sun Cluster and is contract private.
327  * Should never be called with sname of a Multinode diskset.
328  */
329 static int
330 printdisksin(char *sname, md_error_t *ep)
331 {
332 	mdsetname_t	*sp;
333 	md_drive_desc	*dd, *p;
334 
335 	if ((sp = metasetname(sname, ep)) == NULL) {
336 
337 		/*
338 		 * During a deletion of a set the associated service is
339 		 * put offline. The SC3.0 reservation code calls disksuite
340 		 * to find a list of disks associated with the set so that
341 		 * it can release the reservation on those disks. In this
342 		 * case there won't be any disks or even a set left. So just
343 		 * return.
344 		 */
345 		return (0);
346 	}
347 
348 	if (metaget_setownership(sp, ep) == -1)
349 		return (-1);
350 
351 	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
352 	    ep)) == NULL) && !mdisok(ep))
353 		return (-1);
354 
355 	for (p = dd; p != NULL; p = p->dd_next)
356 		(void) printf("%s\n", p->dd_dnp->rname);
357 
358 	return (0);
359 }
360 
361 static void
362 parse_printset(int argc, char **argv)
363 {
364 	int		c;
365 	mdsetname_t	*sp = NULL;
366 	char		*sname = MD_LOCAL_NAME;
367 	md_error_t	status = mdnullerror;
368 	md_error_t	*ep = &status;
369 
370 	/* reset and parse args */
371 	optind = 1;
372 	opterr = 1;
373 	while ((c = getopt(argc, argv, "s:")) != -1) {
374 		switch (c) {
375 		case 's':
376 			sname = optarg;
377 			break;
378 		default:
379 			usage(sp, gettext("unknown options"));
380 		}
381 	}
382 
383 	argc -= optind;
384 	argv += optind;
385 
386 	if (argc != 0)
387 		usage(sp, gettext("too many args"));
388 
389 	if ((sp = metasetname(sname, ep)) == NULL) {
390 		mde_perror(ep, "");
391 		md_exit(sp, 1);
392 	}
393 
394 	if (printsets(sp, ep) && !mdiserror(ep, MDE_SMF_NO_SERVICE)) {
395 		mde_perror(ep, "");
396 		md_exit(sp, 1);
397 	}
398 
399 	if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) {
400 		mde_perror(ep, "");
401 		md_exit(sp, 1);
402 	}
403 
404 	md_exit(sp, 0);
405 }
406 
407 static void
408 parse_add(int argc, char **argv)
409 {
410 	int			c, created_set;
411 	int			hosts = FALSE;
412 	int			meds = FALSE;
413 	int			auto_take = FALSE;
414 	int			force_label = FALSE;
415 	int			default_size = TRUE;
416 	mdsetname_t		*sp = NULL;
417 	char			*sname = MD_LOCAL_NAME;
418 	md_error_t		status = mdnullerror;
419 	md_error_t		 *ep = &status;
420 	mddrivenamelist_t	*dnlp = NULL;
421 	mddrivenamelist_t	*p;
422 	daddr_t			dbsize, nblks;
423 	mdsetname_t		*local_sp = NULL;
424 	int			multi_node = 0;
425 	md_set_desc		*sd;
426 	rval_e			sdssc_rval;
427 
428 	/* reset and parse args */
429 	optind = 1;
430 	opterr = 1;
431 	while ((c = getopt(argc, argv, "MaA:hl:Lms:")) != -1) {
432 		switch (c) {
433 		case 'M':
434 			multi_node = 1;
435 			break;
436 		case 'A':
437 			/* verified sub-option in main */
438 			if (strcmp(optarg, "enable") == 0)
439 				auto_take = TRUE;
440 			break;
441 		case 'a':
442 			break;
443 		case 'h':
444 		case 'm':
445 			if (meds == TRUE || hosts == TRUE)
446 				usage(sp, gettext(
447 				    "only one -m or -h option allowed"));
448 
449 			if (default_size == FALSE || force_label == TRUE)
450 				usage(sp, gettext(
451 				    "conflicting options"));
452 
453 			if (c == 'h')
454 				hosts = TRUE;
455 			else
456 				meds = TRUE;
457 			break;
458 		case 'l':
459 			if (hosts == TRUE || meds == TRUE)
460 				usage(sp, gettext(
461 				    "conflicting options"));
462 			if (sscanf(optarg, "%ld", &dbsize) != 1) {
463 				md_eprintf(gettext(
464 				    "%s: bad format\n"), optarg);
465 				usage(sp, "");
466 			}
467 
468 			default_size = FALSE;
469 			break;
470 		case 'L':
471 			/* Same criteria as -l */
472 			if (hosts == TRUE || meds == TRUE)
473 				usage(sp, gettext(
474 				    "conflicting options"));
475 			force_label = TRUE;
476 			break;
477 		case 's':
478 			sname = optarg;
479 			break;
480 		default:
481 			usage(sp, gettext(
482 			    "unknown options"));
483 		}
484 	}
485 
486 	/* Can only use -A enable when creating the single-node set */
487 	if (auto_take && hosts != TRUE)
488 		usage(sp, gettext("conflicting options"));
489 
490 	argc -= optind;
491 	argv += optind;
492 
493 	/*
494 	 * Add hosts
495 	 */
496 	if (hosts == TRUE) {
497 
498 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
499 			mde_perror(ep, "");
500 			md_exit(local_sp, 1);
501 		}
502 
503 		if (meta_lock(local_sp, TRUE, ep) != 0) {
504 			mde_perror(ep, "");
505 			md_exit(local_sp, 1);
506 		}
507 
508 		/*
509 		 * Keep track of Cluster set creation. Need to complete
510 		 * the transaction no matter if the set was created or not.
511 		 */
512 		created_set = 0;
513 
514 		/*
515 		 * Have no set, cannot take the lock, so only take the
516 		 * local lock.
517 		 */
518 		if ((sp = metasetname(sname, ep)) == NULL) {
519 			sdssc_rval = 0;
520 			if (multi_node) {
521 				/*
522 				 * When running on a cluster system that
523 				 * does not support MN disksets, the routine
524 				 * sdssc_mo_create_begin will be bound
525 				 * to the SVM routine not_bound_error
526 				 * which returns SDSSC_NOT_BOUND_ERROR.
527 				 *
528 				 * When running on a cluster system that
529 				 * does support MN disksets, the routine
530 				 * sdssc_mo_create_begin will be bound to
531 				 * the sdssc_mo_create_begin routine in
532 				 * library libsdssc_so.  A call to
533 				 * sdssc_mo_create_begin will return with
534 				 * either SDSSC_ERROR or SDSSC_OKAY. If
535 				 * an SDSSC_OKAY is returned, then the
536 				 * cluster framework has allocated a
537 				 * set number for this new set that is unique
538 				 * across traditional and MN disksets.
539 				 * Libmeta will get this unique set number
540 				 * by calling sdssc_get_index.
541 				 *
542 				 * When running on a non-cluster system,
543 				 * the routine sdssc_mo_create_begin
544 				 * will be bound to the SVM routine
545 				 * not_bound which returns SDSSC_NOT_BOUND.
546 				 * In this case, all sdssc routines will
547 				 * return SDSSC_NOT_BOUND.  No need to check
548 				 * for return value of SDSSC_NOT_BOUND since
549 				 * the libmeta call to get the set number
550 				 * (sdssc_get_index) will also fail with
551 				 * SDSSC_NOT_BOUND causing libmeta to
552 				 * determine its own set number.
553 				 */
554 				sdssc_rval = sdssc_mo_create_begin(sname, argc,
555 				    argv, SDSSC_PICK_SETNO);
556 				if (sdssc_rval == SDSSC_NOT_BOUND_ERROR) {
557 					(void) mderror(ep, MDE_NOT_MN, NULL);
558 					mde_perror(ep,
559 					"Cluster node does not support "
560 					"multi-owner diskset operations");
561 					md_exit(local_sp, 1);
562 				} else if (sdssc_rval == SDSSC_ERROR) {
563 					mde_perror(ep, "");
564 					md_exit(local_sp, 1);
565 				}
566 			} else {
567 				sdssc_rval = sdssc_create_begin(sname, argc,
568 				    argv, SDSSC_PICK_SETNO);
569 				if (sdssc_rval == SDSSC_ERROR) {
570 					mde_perror(ep, "");
571 					md_exit(local_sp, 1);
572 				}
573 			}
574 			/*
575 			 * Created diskset (as opposed to adding a
576 			 * host to an existing diskset).
577 			 */
578 			created_set = 1;
579 
580 			sp = Zalloc(sizeof (*sp));
581 			sp->setname = Strdup(sname);
582 			sp->lockfd = MD_NO_LOCK;
583 			mdclrerror(ep);
584 		} else {
585 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
586 				mde_perror(ep, "");
587 				md_exit(local_sp, 1);
588 			}
589 			if (MD_MNSET_DESC(sd)) {
590 				multi_node = 1;
591 			}
592 
593 			/*
594 			 * can't add hosts to an existing set & enable
595 			 * auto-take
596 			 */
597 			if (auto_take)
598 				usage(sp, gettext("conflicting options"));
599 
600 			/*
601 			 * Have a valid set, take the set lock also.
602 			 *
603 			 * A MN diskset does not use the set meta_lock but
604 			 * instead uses the clnt_lock of rpc.metad and the
605 			 * suspend/resume feature of the rpc.mdcommd.  Can't
606 			 * use set meta_lock since class 1 messages are
607 			 * grabbing this lock and if this thread is holding
608 			 * the set meta_lock then no rpc.mdcommd suspend
609 			 * can occur.
610 			 */
611 			if (!multi_node) {
612 				if (meta_lock(sp, TRUE, ep) != 0) {
613 					mde_perror(ep, "");
614 					md_exit(local_sp, 1);
615 				}
616 			}
617 		}
618 
619 		if (meta_set_addhosts(sp, multi_node, argc, argv, auto_take,
620 		    ep)) {
621 			if (created_set)
622 				sdssc_create_end(sname, SDSSC_CLEANUP);
623 			mde_perror(&status, "");
624 			if (!multi_node)
625 				(void) meta_unlock(sp, ep);
626 			md_exit(local_sp, 1);
627 		}
628 
629 		if (created_set)
630 			sdssc_create_end(sname, SDSSC_COMMIT);
631 
632 		else {
633 			/*
634 			 * If adding hosts to existing diskset,
635 			 * call DCS svcs
636 			 */
637 			sdssc_add_hosts(sname, argc, argv);
638 		}
639 		if (!multi_node)
640 			(void) meta_unlock(sp, ep);
641 		md_exit(local_sp, 0);
642 	}
643 
644 	/*
645 	 * Add mediators
646 	 */
647 	if (meds == TRUE) {
648 
649 		if ((sp = metasetname(sname, ep)) == NULL) {
650 			mde_perror(ep, "");
651 			md_exit(local_sp, 1);
652 		}
653 
654 		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
655 			mde_perror(ep, "");
656 			md_exit(local_sp, 1);
657 		}
658 
659 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
660 			mde_perror(ep, "");
661 			md_exit(local_sp, 1);
662 		}
663 		if (MD_MNSET_DESC(sd)) {
664 			multi_node = 1;
665 		}
666 
667 		if (meta_lock(local_sp, TRUE, ep) != 0) {
668 			mde_perror(ep, "");
669 			md_exit(local_sp, 1);
670 		}
671 		/*
672 		 * A MN diskset does not use the set meta_lock but
673 		 * instead uses the clnt_lock of rpc.metad and the
674 		 * suspend/resume feature of the rpc.mdcommd.  Can't
675 		 * use set meta_lock since class 1 messages are
676 		 * grabbing this lock and if this thread is holding
677 		 * the set meta_lock then no rpc.mdcommd suspend
678 		 * can occur.
679 		 */
680 		if (!multi_node) {
681 			if (meta_lock(sp, TRUE, ep) != 0) {
682 				mde_perror(ep, "");
683 				md_exit(local_sp, 1);
684 			}
685 		}
686 
687 		if (meta_set_addmeds(sp, argc, argv, ep)) {
688 			mde_perror(&status, "");
689 			if (!multi_node)
690 				(void) meta_unlock(sp, ep);
691 			md_exit(local_sp, 1);
692 		}
693 
694 		if (!multi_node)
695 			(void) meta_unlock(sp, ep);
696 		md_exit(local_sp, 0);
697 	}
698 
699 	/*
700 	 * Add drives
701 	 */
702 	if ((sp = metasetname(sname, ep)) == NULL) {
703 		mde_perror(ep, "");
704 		md_exit(local_sp, 1);
705 	}
706 
707 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
708 		mde_perror(ep, "");
709 		md_exit(local_sp, 1);
710 	}
711 
712 	/* Determine if diskset is a MN diskset or not */
713 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
714 		mde_perror(ep, "");
715 		md_exit(local_sp, 1);
716 	}
717 	if (MD_MNSET_DESC(sd)) {
718 		multi_node = 1;
719 	}
720 
721 	if (meta_lock(local_sp, TRUE, ep) != 0) {
722 		mde_perror(ep, "");
723 		md_exit(local_sp, 1);
724 	}
725 
726 	/* Make sure database size is within limits */
727 	if (default_size == FALSE) {
728 		if ((multi_node && dbsize < MDDB_MN_MINBLKS) ||
729 		    (!multi_node && dbsize < MDDB_MINBLKS))
730 			usage(sp, gettext(
731 			    "size (-l) is too small"));
732 
733 		if ((multi_node && dbsize > MDDB_MN_MAXBLKS) ||
734 		    (!multi_node && dbsize > MDDB_MAXBLKS))
735 			usage(sp, gettext(
736 			    "size (-l) is too big"));
737 	}
738 
739 	/*
740 	 * Have a valid set, take the set lock also.
741 	 *
742 	 * A MN diskset does not use the set meta_lock but
743 	 * instead uses the clnt_lock of rpc.metad and the
744 	 * suspend/resume feature of the rpc.mdcommd.  Can't
745 	 * use set meta_lock since class 1 messages are
746 	 * grabbing this lock and if this thread is holding
747 	 * the set meta_lock then no rpc.mdcommd suspend
748 	 * can occur.
749 	 */
750 	if (!multi_node) {
751 		if (meta_lock(sp, TRUE, ep) != 0) {
752 			mde_perror(ep, "");
753 			md_exit(local_sp, 1);
754 		}
755 	}
756 
757 
758 	/*
759 	 * If using the default size,
760 	 *   then let's adjust the default to the minimum
761 	 *   size currently in use.
762 	 */
763 	if (default_size) {
764 		dbsize = multi_node ? MD_MN_DBSIZE : MD_DBSIZE;
765 		if ((nblks = meta_db_minreplica(sp, ep)) < 0)
766 			mdclrerror(ep);
767 		else
768 			dbsize = nblks;	/* adjust replica size */
769 	}
770 
771 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
772 		mde_perror(ep, "");
773 		if (!multi_node)
774 			(void) meta_unlock(sp, ep);
775 		md_exit(local_sp, 1);
776 	}
777 
778 	if (c == 0) {
779 		md_perror(gettext(
780 		    "No drives specified to add.\n"));
781 		if (!multi_node)
782 			(void) meta_unlock(sp, ep);
783 		md_exit(local_sp, 1);
784 	}
785 
786 	if (meta_set_adddrives(sp, dnlp, dbsize, force_label, ep)) {
787 		metafreedrivenamelist(dnlp);
788 		mde_perror(ep, "");
789 		if (!multi_node)
790 			(void) meta_unlock(sp, ep);
791 		md_exit(local_sp, 1);
792 	}
793 
794 	/*
795 	 * MN disksets don't have a device id in the master block
796 	 * For traditional disksets, check for the drive device
797 	 * id not fitting in the master block
798 	 */
799 	if (!multi_node) {
800 		for (p = dnlp; p != NULL; p = p->next) {
801 			int 		fd;
802 			ddi_devid_t	devid;
803 			mdname_t	*np;
804 
805 			np = metaslicename(p->drivenamep, 0, ep);
806 			if (np == NULL)
807 				continue;
808 
809 			if ((fd = open(np->rname, O_RDONLY | O_NDELAY)) < 0)
810 				continue;
811 
812 			if (devid_get(fd, &devid) == 0) {
813 				size_t len;
814 
815 				len = devid_sizeof(devid);
816 				if (len > (DEV_BSIZE - sizeof (mddb_mb_t)))
817 					(void) mddserror(ep,
818 					    MDE_DS_NOTSELFIDENTIFY, NULL, NULL,
819 					    np->rname, NULL);
820 				devid_free(devid);
821 			} else {
822 				(void) mddserror(ep, MDE_DS_NOTSELFIDENTIFY,
823 				    NULL, NULL, np->rname, NULL);
824 			}
825 			(void) close(fd);
826 		}
827 	}
828 
829 	/*
830 	 * MN disksets don't use DCS clustering services.
831 	 * For traditional disksets:
832 	 * There's not really much we can do here if this call fails.
833 	 * The drives have been added to the set and DiskSuite believes
834 	 * it owns the drives.
835 	 * Relase the set and hope for the best.
836 	 */
837 	if ((!multi_node) &&
838 	    (sdssc_notify_service(sname, Make_Primary) == SDSSC_ERROR)) {
839 		(void) meta_set_release(sp, ep);
840 		(void) printf(gettext(
841 		    "Sun Clustering failed to make set primary\n"));
842 	}
843 
844 	metafreedrivenamelist(dnlp);
845 	if (!multi_node)
846 		(void) meta_unlock(sp, ep);
847 	md_exit(local_sp, 0);
848 }
849 
850 static void
851 parse_balance(int argc, char **argv)
852 {
853 	int		c;
854 	mdsetname_t	*sp = NULL;
855 	char		*sname = MD_LOCAL_NAME;
856 	md_error_t	status = mdnullerror;
857 	md_set_desc	*sd;
858 	int		multi_node = 0;
859 
860 	/* reset and parse args */
861 	optind = 1;
862 	opterr = 1;
863 	while ((c = getopt(argc, argv, "Mbs:")) != -1) {
864 		switch (c) {
865 		case 'M':
866 			break;
867 		case 'b':
868 			break;
869 		case 's':
870 			sname = optarg;
871 			break;
872 		default:
873 			usage(sp, gettext("unknown options"));
874 		}
875 	}
876 
877 	argc -= optind;
878 	argv += optind;
879 
880 	if (argc != 0)
881 		usage(sp, gettext("too many args"));
882 
883 	if ((sp = metasetname(sname, &status)) == NULL) {
884 		mde_perror(&status, "");
885 		md_exit(sp, 1);
886 	}
887 	if ((sd = metaget_setdesc(sp, &status)) == NULL) {
888 		mde_perror(&status, "");
889 		md_exit(sp, 1);
890 	}
891 	if (MD_MNSET_DESC(sd)) {
892 		multi_node = 1;
893 	}
894 	/*
895 	 * Have a valid set, take the set lock also.
896 	 *
897 	 * A MN diskset does not use the set meta_lock but
898 	 * instead uses the clnt_lock of rpc.metad and the
899 	 * suspend/resume feature of the rpc.mdcommd.  Can't
900 	 * use set meta_lock since class 1 messages are
901 	 * grabbing this lock and if this thread is holding
902 	 * the set meta_lock then no rpc.mdcommd suspend
903 	 * can occur.
904 	 */
905 	if (!multi_node) {
906 		if (meta_lock(sp, TRUE, &status) != 0) {
907 			mde_perror(&status, "");
908 			md_exit(sp, 1);
909 		}
910 	}
911 
912 	if (meta_set_balance(sp, &status) != 0) {
913 		mde_perror(&status, "");
914 		md_exit(sp, 1);
915 	}
916 	md_exit(sp, 0);
917 }
918 
919 static void
920 parse_autotake(int argc, char **argv)
921 {
922 	int			c;
923 	int			enable = 0;
924 	mdsetname_t		*sp = NULL;
925 	char			*sname = MD_LOCAL_NAME;
926 	md_error_t		status = mdnullerror;
927 	md_error_t		*ep = &status;
928 
929 	/* reset and parse args */
930 	optind = 1;
931 	opterr = 1;
932 	while ((c = getopt(argc, argv, "A:s:")) != -1) {
933 		switch (c) {
934 		case 'A':
935 			/* verified sub-option in main */
936 			if (strcmp(optarg, "enable") == 0)
937 				enable = 1;
938 			break;
939 		case 's':
940 			/* verified presence of setname in main */
941 			sname = optarg;
942 			break;
943 		default:
944 			usage(sp, gettext("unknown options"));
945 		}
946 	}
947 
948 	if ((sp = metasetname(sname, ep)) == NULL) {
949 		mde_perror(ep, "");
950 		md_exit(sp, 1);
951 	}
952 
953 	if (meta_lock(sp, TRUE, ep) != 0) {
954 		mde_perror(ep, "");
955 		md_exit(sp, 1);
956 	}
957 
958 	if (meta_check_ownership(sp, ep) != 0) {
959 		mde_perror(ep, "");
960 		md_exit(sp, 1);
961 	}
962 
963 	if (meta_set_auto_take(sp, enable, ep) != 0) {
964 		mde_perror(ep, "");
965 		md_exit(sp, 1);
966 	}
967 
968 	md_exit(sp, 0);
969 }
970 
971 static void
972 parse_del(int argc, char **argv)
973 {
974 	int			c;
975 	mdsetname_t		*sp = NULL;
976 	char			*sname = MD_LOCAL_NAME;
977 	int			hosts = FALSE;
978 	int			meds = FALSE;
979 	int			forceflg = FALSE;
980 	md_error_t		status = mdnullerror;
981 	md_error_t		*ep = &status;
982 	mddrivenamelist_t	*dnlp = NULL;
983 	mdsetname_t		*local_sp = NULL;
984 	md_set_desc		*sd;
985 	int			multi_node = 0;
986 
987 	/* reset and parse args */
988 	optind = 1;
989 	opterr = 1;
990 	while ((c = getopt(argc, argv, "Mdfhms:")) != -1) {
991 		switch (c) {
992 		case 'M':
993 			break;
994 		case 'd':
995 			break;
996 		case 'f':
997 			forceflg = TRUE;
998 			break;
999 		case 'h':
1000 		case 'm':
1001 			if (meds == TRUE || hosts == TRUE)
1002 				usage(sp, gettext(
1003 				    "only one -m or -h option allowed"));
1004 
1005 			if (c == 'h')
1006 				hosts = TRUE;
1007 			else
1008 				meds = TRUE;
1009 			break;
1010 		case 's':
1011 			sname = optarg;
1012 			break;
1013 		default:
1014 			usage(sp, gettext("unknown options"));
1015 		}
1016 	}
1017 
1018 	argc -= optind;
1019 	argv += optind;
1020 
1021 	if ((sp = metasetname(sname, ep)) == NULL) {
1022 		mde_perror(ep, "");
1023 		md_exit(local_sp, 1);
1024 	}
1025 
1026 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1027 		mde_perror(ep, "");
1028 		md_exit(local_sp, 1);
1029 	}
1030 
1031 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1032 		mde_perror(ep, "");
1033 		md_exit(local_sp, 1);
1034 	}
1035 	if (MD_MNSET_DESC(sd))
1036 		multi_node = 1;
1037 
1038 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1039 		mde_perror(ep, "");
1040 		md_exit(local_sp, 1);
1041 	}
1042 
1043 	/*
1044 	 * Have a valid set, take the set lock also.
1045 	 *
1046 	 * A MN diskset does not use the set meta_lock but
1047 	 * instead uses the clnt_lock of rpc.metad and the
1048 	 * suspend/resume feature of the rpc.mdcommd.  Can't
1049 	 * use set meta_lock since class 1 messages are
1050 	 * grabbing this lock and if this thread is holding
1051 	 * the set meta_lock then no rpc.mdcommd suspend
1052 	 * can occur.
1053 	 */
1054 	if (!multi_node) {
1055 		if (meta_lock(sp, TRUE, ep) != 0) {
1056 			mde_perror(ep, "");
1057 			md_exit(local_sp, 1);
1058 		}
1059 	}
1060 
1061 	/*
1062 	 * Delete hosts
1063 	 */
1064 	if (hosts == TRUE) {
1065 		if (meta_check_ownership(sp, ep) != 0) {
1066 			/*
1067 			 * If we don't own the set bail out here otherwise
1068 			 * we could delete the node from the DCS service
1069 			 * yet not delete the host from the set.
1070 			 */
1071 			mde_perror(ep, "");
1072 			if (!multi_node)
1073 				(void) meta_unlock(sp, ep);
1074 			md_exit(local_sp, 1);
1075 		}
1076 		if (sdssc_delete_hosts(sname, argc, argv) == SDSSC_ERROR) {
1077 			if (!metad_isautotakebyname(sname)) {
1078 				/*
1079 				 * SC could have been installed after the set
1080 				 * was created. We still want to be able to
1081 				 * delete these sets.
1082 				 */
1083 				md_perror(gettext(
1084 				    "Failed to delete hosts from DCS service"));
1085 				if (!multi_node)
1086 					(void) meta_unlock(sp, ep);
1087 				md_exit(local_sp, 1);
1088 			}
1089 		}
1090 		if (meta_set_deletehosts(sp, argc, argv, forceflg, ep)) {
1091 			if (sdssc_add_hosts(sname, argc, argv) == SDSSC_ERROR) {
1092 				(void) printf(gettext(
1093 				    "Failed to restore host(s) in DCS "
1094 				    "database\n"));
1095 			}
1096 			mde_perror(ep, "");
1097 			if (!multi_node)
1098 				(void) meta_unlock(sp, ep);
1099 			md_exit(local_sp, 1);
1100 		}
1101 		if (!multi_node)
1102 			(void) meta_unlock(sp, ep);
1103 		md_exit(local_sp, 0);
1104 	}
1105 
1106 	/*
1107 	 * Delete mediators
1108 	 */
1109 	if (meds == TRUE) {
1110 		if (meta_set_deletemeds(sp, argc, argv, forceflg, ep)) {
1111 			mde_perror(ep, "");
1112 			if (!multi_node)
1113 				(void) meta_unlock(sp, ep);
1114 			md_exit(local_sp, 1);
1115 		}
1116 		if (!multi_node)
1117 			(void) meta_unlock(sp, ep);
1118 		md_exit(local_sp, 0);
1119 	}
1120 
1121 	/*
1122 	 * Delete drives
1123 	 */
1124 
1125 	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
1126 		mde_perror(ep, "");
1127 		if (!multi_node)
1128 			(void) meta_unlock(sp, ep);
1129 		md_exit(local_sp, 1);
1130 	}
1131 
1132 	if (c == 0) {
1133 		md_perror(gettext(
1134 		    "No drives specified to delete.\n"));
1135 		if (!multi_node)
1136 			(void) meta_unlock(sp, ep);
1137 		md_exit(local_sp, 1);
1138 	}
1139 
1140 	if (meta_set_deletedrives(sp, dnlp, forceflg, ep)) {
1141 		metafreedrivenamelist(dnlp);
1142 		mde_perror(ep, "");
1143 		if (!multi_node)
1144 			(void) meta_unlock(sp, ep);
1145 		md_exit(local_sp, 1);
1146 	}
1147 
1148 	metafreedrivenamelist(dnlp);
1149 	if (!multi_node)
1150 		(void) meta_unlock(sp, ep);
1151 	md_exit(local_sp, 0);
1152 }
1153 
1154 static void
1155 parse_isowner(int argc, char **argv)
1156 {
1157 	int		c;
1158 	mdsetname_t	*sp = NULL;
1159 	char		*sname = MD_LOCAL_NAME;
1160 	md_error_t	status = mdnullerror;
1161 	md_error_t	*ep = &status;
1162 	char		*host = NULL;
1163 
1164 	/* reset and parse args */
1165 	optind = 1;
1166 	opterr = 1;
1167 	while ((c = getopt(argc, argv, "Moh:s:")) != -1) {
1168 		switch (c) {
1169 		case 'M':
1170 			break;
1171 		case 'o':
1172 			break;
1173 		case 'h':
1174 			if (host != NULL) {
1175 				usage(sp, gettext(
1176 				    "only one -h option allowed"));
1177 			}
1178 			host = optarg;
1179 			break;
1180 		case 's':
1181 			sname = optarg;
1182 			break;
1183 		default:
1184 			usage(sp, gettext("unknown options"));
1185 		}
1186 	}
1187 
1188 	argc -= optind;
1189 	argv += optind;
1190 
1191 	if (argc != 0)
1192 		usage(sp, gettext("too many args"));
1193 
1194 	if ((sp = metasetname(sname, ep)) == NULL) {
1195 		mde_perror(ep, "");
1196 		md_exit(sp, 1);
1197 	}
1198 
1199 	if (host == NULL) {
1200 		if (meta_check_ownership(sp, ep) != 0) {
1201 			mde_perror(ep, "");
1202 			md_exit(sp, 1);
1203 		}
1204 	} else {
1205 		if (meta_check_ownership_on_host(sp, host, ep) != 0) {
1206 			mde_perror(ep, "");
1207 			md_exit(sp, 1);
1208 		}
1209 	}
1210 	md_exit(sp, 0);
1211 }
1212 
1213 static void
1214 parse_purge(int argc, char **argv)
1215 {
1216 	int		c;
1217 	mdsetname_t	*sp = NULL;
1218 	mdsetname_t	*local_sp = NULL;
1219 	md_drive_desc	*dd;
1220 	char		*sname = MD_LOCAL_NAME;
1221 	char		*thishost = mynode();
1222 	md_error_t	status = mdnullerror;
1223 	md_error_t	*ep = &status;
1224 	int		bypass_cluster_purge = 0;
1225 	int		forceflg = FALSE;
1226 	int		ret = 0;
1227 	int		multi_node = 0;
1228 	md_set_desc		*sd;
1229 
1230 	optind = 1;
1231 	opterr = 1;
1232 	while ((c = getopt(argc, argv, "C:fPs:")) != -1) {
1233 		switch (c) {
1234 		case 'M':
1235 			break;
1236 		case 'C':
1237 			bypass_cluster_purge = 1;
1238 			break;
1239 		case 'f':
1240 			forceflg = TRUE;
1241 			break;
1242 		case 'P':
1243 			break;
1244 		case 's':
1245 			sname = optarg;
1246 			break;
1247 		default:
1248 			usage(sp, gettext("unknown options"));
1249 		}
1250 	}
1251 
1252 	argc -= optind;
1253 	argv += optind;
1254 
1255 	if (argc != 0)
1256 		usage(sp, gettext("too many arguments"));
1257 
1258 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1259 		mde_perror(ep, "");
1260 		md_exit(local_sp, 1);
1261 	}
1262 
1263 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1264 		mde_perror(ep, "");
1265 		md_exit(local_sp, 1);
1266 	}
1267 
1268 	if ((sp = metasetname(sname, ep)) == NULL) {
1269 		mde_perror(ep, "");
1270 		md_exit(sp, 1);
1271 	}
1272 
1273 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1274 		mde_perror(ep, "");
1275 		md_exit(local_sp, 1);
1276 	}
1277 	if (MD_MNSET_DESC(sd))
1278 		multi_node = 1;
1279 
1280 	if (!multi_node) {
1281 		if (meta_lock(sp, TRUE, ep) != 0) {
1282 			mde_perror(ep, "");
1283 			md_exit(local_sp, 1);
1284 		}
1285 	}
1286 
1287 	/* Must not own the set if purging it from this host */
1288 	if (meta_check_ownership(sp, ep) == 0) {
1289 		/*
1290 		 * Need to see if there are disks in the set, if not then
1291 		 * there is no ownership but meta_check_ownership returns 0
1292 		 */
1293 		dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
1294 		if (!mdisok(ep)) {
1295 			mde_perror(ep, "");
1296 			if (!multi_node)
1297 				(void) meta_unlock(sp, ep);
1298 			md_exit(local_sp, 1);
1299 		}
1300 		if (dd != NULL) {
1301 			(void) printf(gettext
1302 			    ("Must not be owner of the set when purging it\n"));
1303 			if (!multi_node)
1304 				(void) meta_unlock(sp, ep);
1305 			md_exit(local_sp, 1);
1306 		}
1307 	}
1308 	/*
1309 	 * Remove the node from the DCS service
1310 	 */
1311 	if (!bypass_cluster_purge) {
1312 		if (sdssc_delete_hosts(sname, 1, &thishost) == SDSSC_ERROR) {
1313 			md_perror(gettext
1314 			    ("Failed to purge hosts from DCS service"));
1315 			if (!multi_node)
1316 				(void) meta_unlock(sp, ep);
1317 			md_exit(local_sp, 1);
1318 		}
1319 	}
1320 
1321 	if ((ret = meta_set_purge(sp, bypass_cluster_purge, forceflg,
1322 	    ep)) != 0) {
1323 		if (!bypass_cluster_purge) {
1324 			if (sdssc_add_hosts(sname, 1, &thishost) ==
1325 			    SDSSC_ERROR) {
1326 				(void) printf(gettext(
1327 				    "Failed to restore host in DCS "
1328 				    "database\n"));
1329 			}
1330 		}
1331 		mde_perror(ep, "");
1332 		if (!multi_node)
1333 			(void) meta_unlock(sp, ep);
1334 		md_exit(local_sp, ret);
1335 	}
1336 
1337 	if (!multi_node)
1338 		(void) meta_unlock(sp, ep);
1339 	md_exit(local_sp, 0);
1340 }
1341 
1342 static void
1343 parse_query(int argc, char **argv)
1344 {
1345 	int		c;
1346 	mdsetname_t	*sp = NULL;
1347 	mddb_dtag_lst_t	*dtlp = NULL;
1348 	mddb_dtag_lst_t	*tdtlp;
1349 	char		*sname = MD_LOCAL_NAME;
1350 	md_error_t	status = mdnullerror;
1351 
1352 	/* reset and parse args */
1353 	optind = 1;
1354 	opterr = 1;
1355 	while ((c = getopt(argc, argv, "Mqs:")) != -1) {
1356 		switch (c) {
1357 		case 'M':
1358 			break;
1359 		case 'q':
1360 			break;
1361 		case 's':
1362 			sname = optarg;
1363 			break;
1364 		default:
1365 			usage(sp, gettext("unknown options"));
1366 		}
1367 	}
1368 
1369 	argc -= optind;
1370 	argv += optind;
1371 
1372 	if (argc != 0)
1373 		usage(sp, gettext("too many args"));
1374 
1375 	if ((sp = metasetname(sname, &status)) == NULL) {
1376 		mde_perror(&status, "");
1377 		md_exit(sp, 1);
1378 	}
1379 
1380 	if (meta_lock(sp, TRUE, &status) != 0) {
1381 		mde_perror(&status, "");
1382 		md_exit(sp, 1);
1383 	}
1384 
1385 	if (meta_set_query(sp, &dtlp, &status) != 0) {
1386 		mde_perror(&status, "");
1387 		md_exit(sp, 1);
1388 	}
1389 
1390 	if (dtlp != NULL)
1391 		(void) printf("The following tag(s) were found:\n");
1392 
1393 	for (tdtlp = dtlp; tdtlp != NULL; tdtlp = dtlp) {
1394 		dtlp = tdtlp->dtl_nx;
1395 		(void) printf("%2d - %s - %s", tdtlp->dtl_dt.dt_id,
1396 		    tdtlp->dtl_dt.dt_hn,
1397 		    ctime((long *)&tdtlp->dtl_dt.dt_tv.tv_sec));
1398 		Free(tdtlp);
1399 	}
1400 
1401 	md_exit(sp, 0);
1402 }
1403 
1404 /* Should never be called with sname of a Multinode diskset. */
1405 static void
1406 parse_releaseset(int argc, char **argv)
1407 {
1408 	int		c;
1409 	mdsetname_t	*sp = NULL;
1410 	md_error_t	status = mdnullerror;
1411 	md_error_t	*ep = &status;
1412 	char		*sname = MD_LOCAL_NAME;
1413 	sdssc_boolean_e	cluster_release = SDSSC_False;
1414 	sdssc_version_t	vers;
1415 	rval_e		rval;
1416 	md_set_desc	*sd;
1417 
1418 	/* reset and parse args */
1419 	optind = 1;
1420 	opterr = 1;
1421 	while ((c = getopt(argc, argv, "C:s:r")) != -1) {
1422 		switch (c) {
1423 		case 'C':
1424 			cluster_release = SDSSC_True;
1425 			break;
1426 		case 's':
1427 			sname = optarg;
1428 			break;
1429 		case 'r':
1430 			break;
1431 		default:
1432 			usage(sp, gettext("unknown options"));
1433 		}
1434 	}
1435 
1436 	argc -= optind;
1437 	argv += optind;
1438 
1439 	if (argc > 0)
1440 		usage(sp, gettext("too many args"));
1441 
1442 	(void) memset(&vers, 0, sizeof (vers));
1443 
1444 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1445 	    (vers.major == 3) &&
1446 	    (cluster_release == SDSSC_False)) {
1447 
1448 		/*
1449 		 * If the release is being done by the user via the CLI
1450 		 * we need to notify the DCS to release this node as being
1451 		 * the primary. The reason nothing else needs to be done
1452 		 * is due to the fact that the reservation code will exec
1453 		 * metaset -C release to complete the operation.
1454 		 */
1455 		rval = sdssc_notify_service(sname, Release_Primary);
1456 		if (rval == SDSSC_ERROR) {
1457 			(void) printf(gettext(
1458 			    "metaset: failed to notify DCS of release\n"));
1459 		}
1460 		md_exit(NULL, rval == SDSSC_ERROR);
1461 	}
1462 
1463 	if ((sp = metasetname(sname, ep)) == NULL) {
1464 
1465 		/*
1466 		 * It's entirely possible for the SC3.0 reservation code
1467 		 * to call for DiskSet to release a diskset and have that
1468 		 * diskset not exist. During a diskset removal DiskSuite
1469 		 * maybe able to remove all traces of the diskset before
1470 		 * the reservation code execs metaset -C release in which
1471 		 * case the metasetname will fail, but the overall command
1472 		 * shouldn't.
1473 		 */
1474 		if (vers.major == 3)
1475 			md_exit(sp, 0);
1476 		else {
1477 			mde_perror(ep, "");
1478 			md_exit(sp, 1);
1479 		}
1480 	}
1481 
1482 	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1483 		mde_perror(ep, "");
1484 		md_exit(sp, 1);
1485 	}
1486 
1487 	if (sd->sd_flags & MD_SR_AUTO_TAKE) {
1488 		md_eprintf(gettext("cannot release auto-take diskset\n"));
1489 		md_exit(sp, 1);
1490 	}
1491 
1492 	if (meta_lock_nowait(sp, ep) != 0) {
1493 		mde_perror(ep, "");
1494 		md_exit(sp, 10);	/* special errcode */
1495 	}
1496 
1497 	if (meta_set_release(sp, ep)) {
1498 		mde_perror(ep, "");
1499 		md_exit(sp, 1);
1500 	}
1501 	md_exit(sp, 0);
1502 }
1503 
1504 /* Should never be called with sname of a Multinode diskset. */
1505 static void
1506 parse_takeset(int argc, char **argv)
1507 {
1508 	int		c;
1509 	mdsetname_t	*sp = NULL;
1510 	int		flags = 0;
1511 	char		*sname = MD_LOCAL_NAME;
1512 	mhd_mhiargs_t	mhiargs;
1513 	char 		*cp = NULL;
1514 	int		pos = -1;	/* position of timeout value */
1515 	int		usetag = 0;
1516 	static char	*nullopts[] = { NULL };
1517 	md_error_t	status = mdnullerror;
1518 	md_error_t	*ep = &status;
1519 	sdssc_boolean_e	cluster_take = SDSSC_False;
1520 	sdssc_version_t	vers;
1521 	rval_e		rval;
1522 	int		set_take_rval;
1523 
1524 	/* reset and parse args */
1525 	optind = 1;
1526 	opterr = 1;
1527 	while ((c = getopt(argc, argv, "C:fs:tu:y")) != -1) {
1528 		switch (c) {
1529 		case 'C':
1530 			cluster_take = SDSSC_True;
1531 			break;
1532 		case 'f':
1533 			flags |= TAKE_FORCE;
1534 			break;
1535 		case 's':
1536 			sname = optarg;
1537 			break;
1538 		case 't':
1539 			break;
1540 		case 'u':
1541 			usetag = atoi(optarg);
1542 			flags |= TAKE_USETAG;
1543 			break;
1544 		case 'y':
1545 			flags |= TAKE_USEIT;
1546 			break;
1547 		default:
1548 			usage(sp, gettext("unknown options"));
1549 		}
1550 	}
1551 
1552 	mhiargs = defmhiargs;
1553 
1554 	argc -= optind;
1555 	argv += optind;
1556 
1557 	if (argc > 1)
1558 		usage(sp, gettext("too many args"));
1559 
1560 	/*
1561 	 * If we have a list of timeout value overrides, handle it here
1562 	 */
1563 	while (argv[0] != NULL && *argv[0] != '\0') {
1564 		/*
1565 		 * The use of the nullopts[] "token list" here is to make
1566 		 * getsubopts() simply parse a comma separated list
1567 		 * returning either "" or the contents of the field, the
1568 		 * end condition is exaustion of the initial string, which
1569 		 * is modified in the process.
1570 		 */
1571 		(void) getsubopt(&argv[0], nullopts, &cp);
1572 
1573 		c = 0;			/* re-use c as temp value of timeout */
1574 
1575 		if (*cp != '-')		/* '-' uses default */
1576 			c = atoi(cp);
1577 
1578 		if (c < 0) {
1579 			usage(sp, gettext(
1580 			    "time out values must be > 0"));
1581 		}
1582 
1583 		if (++pos > 3) {
1584 			usage(sp, gettext(
1585 			    "too many timeout values specified."));
1586 		}
1587 
1588 		if (c == 0)		/* 0 or "" field uses default */
1589 			continue;
1590 
1591 		/*
1592 		 * Assign temp value to appropriate structure member based on
1593 		 * its position in the comma separated list.
1594 		 */
1595 		switch (pos) {
1596 			case 0:
1597 				mhiargs.mh_ff = c;
1598 				break;
1599 
1600 			case 1:
1601 				mhiargs.mh_tk.reinstate_resv_delay = c;
1602 				break;
1603 
1604 			case 2:
1605 				mhiargs.mh_tk.min_ownership_delay = c;
1606 				break;
1607 
1608 			case 3:
1609 				mhiargs.mh_tk.max_ownership_delay = c;
1610 				break;
1611 		}
1612 	}
1613 
1614 	(void) memset(&vers, 0, sizeof (vers));
1615 
1616 	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1617 	    (vers.major == 3) &&
1618 	    (cluster_take == SDSSC_False)) {
1619 
1620 		/*
1621 		 * If the take is beging done by the user via the CLI we need
1622 		 * to notify the DCS to make this current node the primary.
1623 		 * The SC3.0 reservation code will in turn exec metaset with
1624 		 * the -C take arg to complete this operation.
1625 		 */
1626 		if ((rval = sdssc_notify_service(sname, Make_Primary)) ==
1627 		    SDSSC_ERROR) {
1628 			(void) printf(gettext(
1629 			    "metaset: failed to notify DCS of take\n"));
1630 		}
1631 		md_exit(NULL, rval == SDSSC_ERROR);
1632 	}
1633 
1634 	if ((sp = metasetname(sname, ep)) == NULL) {
1635 		mde_perror(ep, "");
1636 		md_exit(sp, 1);
1637 	}
1638 
1639 	if ((vers.major == 3) && (meta_check_ownership(sp, ep) == 0)) {
1640 
1641 		/*
1642 		 * If we're running in a cluster environment and this
1643 		 * node already owns the set. Don't bother trying to
1644 		 * take the set again. There's one case where an adminstrator
1645 		 * is adding disks to a set for the first time. metaset
1646 		 * will take the ownership of the set at that point. During
1647 		 * that add operation SC3.0 notices activity on the device
1648 		 * and also tries to perform a take operation. The SC3.0 take
1649 		 * will fail because the adminstrative add has the set locked
1650 		 */
1651 		md_exit(sp, 0);
1652 	}
1653 
1654 	if (meta_lock_nowait(sp, ep) != 0) {
1655 		mde_perror(ep, "");
1656 		md_exit(sp, 10);	/* special errcode */
1657 	}
1658 
1659 	/*
1660 	 * If a 2 is returned from meta_set_take, this take was able to resolve
1661 	 * an unresolved replicated disk (i.e. a disk is now available that
1662 	 * had been missing during the import of the replicated diskset).
1663 	 * Need to release the diskset and re-take in order to have
1664 	 * the subdrivers re-snarf using the newly resolved (or newly mapped)
1665 	 * devids.  This also allows the namespace to be updated with the
1666 	 * correct major names in the case where the disk being replicated
1667 	 * was handled by a different driver than the replicated disk.
1668 	 */
1669 	set_take_rval = meta_set_take(sp, &mhiargs, flags, usetag, &status);
1670 	if (set_take_rval == 2) {
1671 		if (meta_set_release(sp, &status)) {
1672 			mde_perror(&status,
1673 			    "Need to release and take set to resolve names.");
1674 			md_exit(sp, 1);
1675 		}
1676 		metaflushdrivenames();
1677 		metaflushsetname(sp);
1678 		set_take_rval = meta_set_take(sp, &mhiargs,
1679 		    (flags | TAKE_RETAKE), usetag, &status);
1680 	}
1681 
1682 	if (set_take_rval == -1) {
1683 		mde_perror(&status, "");
1684 		if (mdismddberror(&status, MDE_DB_TAGDATA))
1685 			md_exit(sp, 2);
1686 		if (mdismddberror(&status, MDE_DB_ACCOK))
1687 			md_exit(sp, 3);
1688 		if (mdismddberror(&status, MDE_DB_STALE))
1689 			md_exit(sp, 66);
1690 		md_exit(sp, 1);
1691 	}
1692 	md_exit(sp, 0);
1693 }
1694 
1695 /*
1696  * Joins a node to a specific set or to all multinode disksets known
1697  * by this node.  If set is specified then caller should have verified
1698  * that the set is a multinode diskset.
1699  *
1700  * If an error occurs, metaset exits with a 1.
1701  * If there is no error, metaset exits with a 0.
1702  */
1703 static void
1704 parse_joinset(int argc, char **argv)
1705 {
1706 	int		c;
1707 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1708 	char		*sname = MD_LOCAL_NAME;
1709 	md_error_t	status = mdnullerror;
1710 	md_error_t	*ep = &status;
1711 	md_set_desc	*sd;
1712 	char		buf[BUFSIZ];
1713 	char		*p = buf;
1714 	set_t		max_sets, setno;
1715 	int		err, cumm_err = 0;
1716 	size_t		bufsz;
1717 
1718 	bufsz = sizeof (buf);
1719 	/* reset and parse args */
1720 	optind = 1;
1721 	opterr = 1;
1722 	while ((c = getopt(argc, argv, "Ms:j")) != -1) {
1723 		switch (c) {
1724 		case 'M':
1725 			break;
1726 		case 'j':
1727 			break;
1728 		case 's':
1729 			sname = optarg;
1730 			break;
1731 		default:
1732 			usage(sp, gettext("unknown options"));
1733 		}
1734 	}
1735 
1736 	argc -= optind;
1737 	argv += optind;
1738 
1739 	if (argc > 1)
1740 		usage(sp, gettext("too many args"));
1741 
1742 	/*
1743 	 * If no setname option was used, then join all disksets
1744 	 * that this node knows about.   Attempt to join all
1745 	 * disksets that this node knows about.
1746 	 *
1747 	 * Additional text is added to the error messages during
1748 	 * this section of code in order to help the user understand
1749 	 * why the 'join of all sets' failed and which set caused
1750 	 * the failure.
1751 	 */
1752 
1753 	/*
1754 	 * Hold local set lock throughout this call to keep
1755 	 * other actions from interfering (such as creating a new
1756 	 * set, etc.).
1757 	 */
1758 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1759 		mde_perror(ep, "");
1760 		md_exit(sp, 1);
1761 	}
1762 
1763 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1764 		mde_perror(ep, "");
1765 		md_exit(local_sp, 1);
1766 	}
1767 
1768 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1769 		/*
1770 		 * If no set name is given, then walk through all sets
1771 		 * on this node which could include:
1772 		 * 	- MN disksets
1773 		 *	- traditional disksets
1774 		 *	- non-existent disksets
1775 		 * Attempt to join the MN disksets.
1776 		 * If the join of one set fails, print out an error message
1777 		 * about that set and continue the walk.
1778 		 */
1779 		if ((max_sets = get_max_sets(ep)) == 0) {
1780 			mde_perror(ep, "");
1781 			md_exit(local_sp, 1);
1782 		}
1783 
1784 		/* Start walking through all possible disksets */
1785 		for (setno = 1; setno < max_sets; setno++) {
1786 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1787 				if (mdiserror(ep, MDE_NO_SET)) {
1788 					/* No set for this setno - continue */
1789 					mdclrerror(ep);
1790 					continue;
1791 				} else {
1792 					(void) sprintf(p, gettext(
1793 					"Unable to get set %d information"),
1794 					    setno);
1795 					mde_perror(ep, p);
1796 					cumm_err = 1;
1797 					mdclrerror(ep);
1798 					continue;
1799 				}
1800 			}
1801 
1802 			/* If setname is there, set desc should exist. */
1803 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1804 				(void) snprintf(p, bufsz, gettext(
1805 				    "Unable to get set %s desc information"),
1806 				    sp->setname);
1807 				mde_perror(ep, p);
1808 				cumm_err = 1;
1809 				mdclrerror(ep);
1810 				continue;
1811 			}
1812 
1813 			/* Only check MN disksets */
1814 			if (!MD_MNSET_DESC(sd)) {
1815 				continue;
1816 			}
1817 
1818 			/*
1819 			 * Return value of 0 is success.
1820 			 * Return value of -1 means a failure.
1821 			 * Return value of -2 means set could not be
1822 			 * joined, but shouldn't cause an error.
1823 			 * Reasons would be:
1824 			 * 	- no drives in set
1825 			 * 	- node already joined to set
1826 			 * Return value of -3 means joined stale set.
1827 			 * Can't check for all reasons here
1828 			 * since set isn't locked yet across all
1829 			 * nodes in the cluster.  The call
1830 			 * to libmeta routine, meta_set_join, will
1831 			 * lock across the cluster and perform
1832 			 * the checks.
1833 			 */
1834 			if ((err = meta_set_join(sp, ep)) == -1) {
1835 				/* Print error of diskset join failure */
1836 				(void) snprintf(p, bufsz,
1837 				    gettext("Join to diskset %s failed"),
1838 				    sp->setname);
1839 				mde_perror(ep, p);
1840 				cumm_err = 1;
1841 				mdclrerror(ep);
1842 				continue;
1843 			}
1844 
1845 			if (err == -3) {
1846 				/* Print error of diskset join failure */
1847 				(void) snprintf(p, bufsz,
1848 				    gettext("Joined to stale diskset %s"),
1849 				    sp->setname);
1850 				mde_perror(ep, p);
1851 				mdclrerror(ep);
1852 			}
1853 
1854 			mdclrerror(ep);
1855 		}
1856 
1857 		md_exit(local_sp, cumm_err);
1858 	}
1859 
1860 	/*
1861 	 * Code for a specific set is much simpler.
1862 	 * Error messages don't need extra text since specific setname
1863 	 * was used.
1864 	 * Don't need to lock the local set, just the specific set given.
1865 	 */
1866 	if ((sp = metasetname(sname, ep)) == NULL) {
1867 		mde_perror(ep, "");
1868 		md_exit(local_sp, 1);
1869 	}
1870 
1871 	/*
1872 	 * Fail command if meta_set_join returns -1.
1873 	 *
1874 	 * Return of 0 means that node joined set.
1875 	 *
1876 	 * Return of -2 means that node was unable to
1877 	 * join a set since that set had no drives
1878 	 * or that had already joined the set.  No
1879 	 * need to fail the command for these reasons.
1880 	 *
1881 	 * Return of -3 means that set is stale.
1882 	 * Return a value of 66 to historically match traditional disksets.
1883 	 */
1884 	if ((err = meta_set_join(sp, ep)) == -1) {
1885 		mde_perror(&status, "");
1886 		md_exit(local_sp, 1);
1887 	}
1888 
1889 	if (err == -3) {
1890 		/* Print error of diskset join failure */
1891 		(void) snprintf(p, bufsz,
1892 		    gettext("Joined to stale diskset %s"),
1893 		    sp->setname);
1894 		mde_perror(&status, "");
1895 		md_exit(local_sp, 66);
1896 	}
1897 
1898 	md_exit(local_sp, 0);
1899 }
1900 
1901 /*
1902  * Withdraws a node from a specific set or from all multinode disksets known
1903  * by this node.  If set is specified then caller should have verified
1904  * that the set is a multinode diskset.
1905  *
1906  * If an error occurs, metaset exits with a 1.
1907  * If there is no error, metaset exits with a 0.
1908  */
1909 static void
1910 parse_withdrawset(int argc, char **argv)
1911 {
1912 	int		c;
1913 	mdsetname_t	*sp = NULL, *local_sp = NULL;
1914 	char		*sname = MD_LOCAL_NAME;
1915 	md_error_t	status = mdnullerror;
1916 	md_error_t	*ep = &status;
1917 	char		buf[BUFSIZ];
1918 	char		*p = buf;
1919 	md_set_desc	*sd;
1920 	set_t		max_sets, setno;
1921 	int		err, cumm_err = 0;
1922 	size_t		bufsz;
1923 
1924 	bufsz = sizeof (buf);
1925 	/* reset and parse args */
1926 	optind = 1;
1927 	opterr = 1;
1928 	while ((c = getopt(argc, argv, "Ms:w")) != -1) {
1929 		switch (c) {
1930 		case 'M':
1931 			break;
1932 		case 'w':
1933 			break;
1934 		case 's':
1935 			sname = optarg;
1936 			break;
1937 		default:
1938 			usage(sp, gettext("unknown options"));
1939 		}
1940 	}
1941 
1942 	argc -= optind;
1943 	argv += optind;
1944 
1945 	if (argc > 1)
1946 		usage(sp, gettext("too many args"));
1947 
1948 	/*
1949 	 * If no setname option was used, then withdraw from all disksets
1950 	 * that this node knows about.
1951 	 *
1952 	 * Additional text is added to the error messages during
1953 	 * this section of code in order to help the user understand
1954 	 * why the 'withdraw from all sets' failed and which set caused
1955 	 * the failure.
1956 	 */
1957 
1958 	/*
1959 	 * Hold local set lock throughout this call to keep
1960 	 * other actions from interfering (such as creating a new
1961 	 * set, etc.).
1962 	 */
1963 	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1964 		mde_perror(ep, "");
1965 		md_exit(sp, 1);
1966 	}
1967 
1968 	if (meta_lock(local_sp, TRUE, ep) != 0) {
1969 		mde_perror(ep, "");
1970 		md_exit(local_sp, 1);
1971 	}
1972 
1973 	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1974 		/*
1975 		 * If no set name is given, then walk through all sets
1976 		 * on this node which could include:
1977 		 * 	- MN disksets
1978 		 *	- traditional disksets
1979 		 *	- non-existent disksets
1980 		 * Attempt to withdraw from the MN disksets.
1981 		 * If the withdraw of one set fails, print out an error
1982 		 * message about that set and continue the walk.
1983 		 */
1984 		if ((max_sets = get_max_sets(ep)) == 0) {
1985 			mde_perror(ep, "");
1986 			md_exit(local_sp, 1);
1987 		}
1988 
1989 		/* Start walking through all possible disksets */
1990 		for (setno = 1; setno < max_sets; setno++) {
1991 			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1992 				if (mdiserror(ep, MDE_NO_SET)) {
1993 					/* No set for this setno - continue */
1994 					mdclrerror(ep);
1995 					continue;
1996 				} else {
1997 					(void) sprintf(p, gettext(
1998 					    "Unable to get set %d information"),
1999 					    setno);
2000 					mde_perror(ep, p);
2001 					cumm_err = 1;
2002 					mdclrerror(ep);
2003 					continue;
2004 				}
2005 			}
2006 
2007 			/* If setname is there, set desc should exist. */
2008 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2009 				(void) snprintf(p, bufsz, gettext(
2010 				    "Unable to get set %s desc information"),
2011 				    sp->setname);
2012 				mde_perror(ep, p);
2013 				cumm_err = 1;
2014 				mdclrerror(ep);
2015 				continue;
2016 			}
2017 
2018 			/* Only check MN disksets */
2019 			if (!MD_MNSET_DESC(sd)) {
2020 				continue;
2021 			}
2022 
2023 			/*
2024 			 * Return value of 0 is success.
2025 			 * Return value of -1 means a failure.
2026 			 * Return value of -2 means set could not be
2027 			 * withdrawn from, but this shouldn't cause
2028 			 * an error.  Reasons would be:
2029 			 * 	- no drives in set
2030 			 * 	- node already withdrawn from set
2031 			 * Can't check for all reasons here
2032 			 * since set isn't locked yet across all
2033 			 * nodes in the cluster.  The call
2034 			 * to libmeta routine, meta_set_withdraw, will
2035 			 * lock across the cluster and perform
2036 			 * the checks.
2037 			 */
2038 			if ((err = meta_set_withdraw(sp, ep)) == -1) {
2039 				/* Print error of diskset withdraw failure */
2040 				(void) snprintf(p, bufsz,
2041 				    gettext("Withdraw from diskset %s failed"),
2042 				    sp->setname);
2043 				mde_perror(ep, p);
2044 				mdclrerror(ep);
2045 				cumm_err = 1;
2046 				continue;
2047 			}
2048 
2049 			if (err == -2) {
2050 				mdclrerror(ep);
2051 				continue;
2052 			}
2053 
2054 			mdclrerror(ep);
2055 		}
2056 		md_exit(local_sp, cumm_err);
2057 	}
2058 
2059 
2060 	/*
2061 	 * Code for a specific set is much simpler.
2062 	 * Error messages don't need extra text since specific setname
2063 	 * was used.
2064 	 * Don't need to lock the local set, just the specific set given.
2065 	 */
2066 	if ((sp = metasetname(sname, ep)) == NULL) {
2067 		mde_perror(ep, "");
2068 		md_exit(local_sp, 1);
2069 	}
2070 
2071 	/*
2072 	 * Fail command if meta_set_withdraw returns -1.
2073 	 *
2074 	 * Return of 0 means that node withdrew from set.
2075 	 *
2076 	 * Return of -2 means that node was unable to
2077 	 * withdraw from a set since that set had no drives
2078 	 * or node was not joined to set.  No
2079 	 * need to fail the command for these reasons.
2080 	 */
2081 	if (meta_set_withdraw(sp, ep) == -1) {
2082 		mde_perror(&status, "");
2083 		md_exit(local_sp, 1);
2084 	}
2085 
2086 	md_exit(local_sp, 0);
2087 }
2088 
2089 static void
2090 parse_cluster(int argc, char **argv, int multi_node)
2091 {
2092 	int			c, error, new_argc, x;
2093 	enum cluster_cmd	cmd = ccnotspecified;
2094 	char			*hostname = SDSSC_PROXY_PRIMARY;
2095 	char			*argument = NULL;
2096 	char			*sname = MD_LOCAL_NAME;
2097 	char			primary_node[SDSSC_NODE_NAME_LEN];
2098 	char			**new_argv = NULL;
2099 	char			**np = NULL;
2100 	mdsetname_t		*sp = NULL;
2101 	md_error_t		status = mdnullerror;
2102 	md_error_t		*ep = &status;
2103 
2104 	/* reset and parse args */
2105 	optind = 1;
2106 	opterr = 1;
2107 	while ((c = getopt(argc, argv, "C:s:h:ftu:yr")) != -1) {
2108 		switch (c) {
2109 		case 'C':
2110 			if (cmd != ccnotspecified) {
2111 				md_exit(sp, -1);
2112 			}
2113 			argument = optarg;
2114 
2115 			if (strcmp(argument, "disksin") == 0) {
2116 				cmd = clusterdisksin;
2117 			} else if (strcmp(argument, "version") == 0) {
2118 				cmd = clusterversion;
2119 			} else if (strcmp(argument, "release") == 0) {
2120 				cmd = clusterrelease;
2121 			} else if (strcmp(argument, "take") == 0) {
2122 				cmd = clustertake;
2123 			} else if (strcmp(argument, "proxy") == 0) {
2124 				cmd = clusterproxy;
2125 			} else if (strcmp(argument, "purge") == 0) {
2126 				cmd = clusterpurge;
2127 			} else {
2128 				md_exit(sp, -1);
2129 			}
2130 
2131 			break;
2132 
2133 		case 'h':
2134 			hostname = optarg;
2135 			break;
2136 
2137 		case 's':
2138 			sname = optarg;
2139 			break;
2140 
2141 		case 'f':
2142 		case 't':
2143 		case 'u':
2144 		case 'y':
2145 		case 'r':
2146 			break;
2147 
2148 		default:
2149 			md_exit(sp, -1);
2150 		}
2151 	}
2152 
2153 	/* Now call the appropriate command function. */
2154 	switch (cmd) {
2155 	case clusterversion:
2156 		printclusterversion();
2157 		break;
2158 
2159 	case clusterdisksin:
2160 		if (printdisksin(sname, ep)) {
2161 			md_exit(sp, -1);
2162 		}
2163 		break;
2164 
2165 	case clusterrelease:
2166 		if (multi_node) {
2167 			usage(sp, gettext(
2168 			    "-C release is not allowed on multi-owner"
2169 			    " disksets"));
2170 		}
2171 		parse_releaseset(argc, argv);
2172 		break;
2173 
2174 	case clustertake:
2175 		if (multi_node) {
2176 			usage(sp, gettext(
2177 			    "-C take is not allowed on multi-owner disksets"));
2178 		}
2179 		parse_takeset(argc, argv);
2180 		break;
2181 
2182 	case clusterproxy:
2183 		if (multi_node) {
2184 			usage(sp, gettext(
2185 			    "-C proxy is not allowed on multi-owner disksets"));
2186 		}
2187 
2188 		if ((new_argv = calloc(argc, sizeof (char *))) == NULL) {
2189 			(void) printf(gettext("Out of memory\n"));
2190 			md_exit(sp, 1);
2191 		}
2192 
2193 		np = new_argv;
2194 		new_argc = 0;
2195 		(void) memset(primary_node, '\0', SDSSC_NODE_NAME_LEN);
2196 
2197 		for (x = 0; x < argc; x++) {
2198 			if (strcmp(argv[x], "-C") == 0) {
2199 
2200 				/*
2201 				 * Need to skip the '-C proxy' args so
2202 				 * just increase x by one and the work is
2203 				 * done.
2204 				 */
2205 				x++;
2206 			} else {
2207 				*np++ = strdup(argv[x]);
2208 				new_argc++;
2209 			}
2210 		}
2211 
2212 		switch (sdssc_get_primary_host(sname, primary_node,
2213 		    SDSSC_NODE_NAME_LEN)) {
2214 		case SDSSC_ERROR:
2215 			md_exit(sp, 1);
2216 			break;
2217 
2218 		case SDSSC_NO_SERVICE:
2219 			if (hostname != SDSSC_PROXY_PRIMARY) {
2220 				(void) strlcpy(primary_node, hostname,
2221 				    SDSSC_NODE_NAME_LEN);
2222 			}
2223 			break;
2224 		}
2225 
2226 		if (sdssc_cmd_proxy(new_argc, new_argv,
2227 		    primary_node[0] == '\0' ? SDSSC_PROXY_PRIMARY :
2228 		    primary_node, &error) == SDSSC_PROXY_DONE) {
2229 			md_exit(sp, error);
2230 		} else {
2231 			(void) printf(gettext(
2232 			    "Couldn't proxy command\n"));
2233 			md_exit(sp, 1);
2234 		}
2235 		break;
2236 
2237 	case clusterpurge:
2238 		parse_purge(argc, argv);
2239 		break;
2240 
2241 	default:
2242 		break;
2243 	}
2244 
2245 	md_exit(sp, 0);
2246 }
2247 
2248 /*
2249  * parse args and do it
2250  */
2251 int
2252 main(int argc, char *argv[])
2253 {
2254 	enum metaset_cmd	cmd = notspecified;
2255 	md_error_t		status = mdnullerror;
2256 	md_error_t		*ep = &status;
2257 	mdsetname_t		*sp = NULL;
2258 	char			*hostname = SDSSC_PROXY_PRIMARY;
2259 	char			*sname = MD_LOCAL_NAME;
2260 	char			*auto_take_option = NULL;
2261 	char			primary_node[SDSSC_NODE_NAME_LEN];
2262 	int			error, c, stat;
2263 	int			auto_take = FALSE;
2264 	md_set_desc		*sd;
2265 	int			mflag = 0;
2266 	int			multi_node = 0;
2267 	rval_e			sdssc_res;
2268 
2269 	/*
2270 	 * Get the locale set up before calling any other routines
2271 	 * with messages to ouput.  Just in case we're not in a build
2272 	 * environment, make sure that TEXT_DOMAIN gets set to
2273 	 * something.
2274 	 */
2275 #if !defined(TEXT_DOMAIN)
2276 #define	TEXT_DOMAIN "SYS_TEST"
2277 #endif
2278 	(void) setlocale(LC_ALL, "");
2279 	(void) textdomain(TEXT_DOMAIN);
2280 
2281 	sdssc_res = sdssc_bind_library();
2282 	if (sdssc_res == SDSSC_ERROR) {
2283 		(void) printf(gettext(
2284 		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
2285 		exit(1);
2286 	}
2287 
2288 	/* initialize */
2289 	if (md_init(argc, argv, 0, 1, ep) != 0) {
2290 		mde_perror(ep, "");
2291 		md_exit(sp, 1);
2292 	}
2293 
2294 	optind = 1;
2295 	opterr = 1;
2296 
2297 	/*
2298 	 * NOTE: The "C" option is strictly for cluster use. it is not
2299 	 * and should not be documented for the customer. - JST
2300 	 */
2301 	while ((c = getopt(argc, argv, "C:MaA:bdfh:jl:Lm:oPqrs:tu:wy?"))
2302 	    != -1) {
2303 		switch (c) {
2304 		case 'M':
2305 			mflag = 1;
2306 			break;
2307 		case 'A':
2308 			auto_take = TRUE;
2309 			if (optarg == NULL || !(strcmp(optarg, "enable") == 0 ||
2310 			    strcmp(optarg, "disable") == 0))
2311 				usage(sp, gettext(
2312 				    "-A: enable or disable must be specified"));
2313 			auto_take_option = optarg;
2314 			break;
2315 		case 'a':
2316 			if (cmd != notspecified) {
2317 				usage(sp, gettext(
2318 				    "conflicting options"));
2319 			}
2320 			cmd = add;
2321 			break;
2322 		case 'b':
2323 			if (cmd != notspecified) {
2324 				usage(sp, gettext(
2325 				    "conflicting options"));
2326 			}
2327 			cmd = balance;
2328 			break;
2329 		case 'd':
2330 			if (cmd != notspecified) {
2331 				usage(sp, gettext(
2332 				    "conflicting options"));
2333 			}
2334 			cmd = delete;
2335 			break;
2336 		case 'C':	/* cluster commands */
2337 			if (cmd != notspecified) {
2338 				md_exit(sp, -1);    /* conflicting options */
2339 			}
2340 			cmd = cluster;
2341 			break;
2342 		case 'f':
2343 			break;
2344 		case 'h':
2345 			hostname = optarg;
2346 			break;
2347 		case 'j':
2348 			if (cmd != notspecified) {
2349 				usage(sp, gettext(
2350 				    "conflicting options"));
2351 			}
2352 			cmd = join;
2353 			break;
2354 		case 'l':
2355 			break;
2356 		case 'L':
2357 			break;
2358 		case 'm':
2359 			break;
2360 		case 'o':
2361 			if (cmd != notspecified) {
2362 				usage(sp, gettext(
2363 				    "conflicting options"));
2364 			}
2365 			cmd = isowner;
2366 			break;
2367 		case 'P':
2368 			if (cmd != notspecified) {
2369 				usage(sp, gettext(
2370 				    "conflicting options"));
2371 			}
2372 			cmd = purge;
2373 			break;
2374 		case 'q':
2375 			if (cmd != notspecified) {
2376 				usage(sp, gettext(
2377 				    "conflicting options"));
2378 			}
2379 			cmd = query;
2380 			break;
2381 		case 'r':
2382 			if (cmd != notspecified) {
2383 				usage(sp, gettext(
2384 				    "conflicting options"));
2385 			}
2386 			cmd = release;
2387 			break;
2388 		case 's':
2389 			sname = optarg;
2390 			break;
2391 		case 't':
2392 			if (cmd != notspecified) {
2393 				usage(sp, gettext(
2394 				    "conflicting options"));
2395 			}
2396 			cmd = take;
2397 			break;
2398 		case 'u':
2399 			break;
2400 		case 'w':
2401 			if (cmd != notspecified) {
2402 				usage(sp, gettext(
2403 				    "conflicting options"));
2404 			}
2405 			cmd = withdraw;
2406 			break;
2407 		case 'y':
2408 			break;
2409 		case '?':
2410 			if (optopt == '?')
2411 				usage(sp, NULL);
2412 			/*FALLTHROUGH*/
2413 		default:
2414 			if (cmd == cluster) {    /* cluster is silent */
2415 				md_exit(sp, -1);
2416 			} else {
2417 				usage(sp, gettext(
2418 				    "unknown command"));
2419 			}
2420 		}
2421 	}
2422 
2423 	/* check if suncluster is installed and -A enable specified */
2424 	if (auto_take && sdssc_res != SDSSC_NOT_BOUND &&
2425 	    strcmp(auto_take_option, "enable") == 0) {
2426 		md_eprintf(gettext(
2427 		    "cannot enable auto-take when SunCluster is installed\n"));
2428 		md_exit(sp, 1);
2429 	}
2430 
2431 	/*
2432 	 * At this point we know that if the -A enable option is specified
2433 	 * for an auto-take diskset that SC is not installed on the machine, so
2434 	 * all of the sdssc calls will just be no-ops.
2435 	 */
2436 
2437 	/* list sets */
2438 	if (cmd == notspecified && auto_take == FALSE) {
2439 		parse_printset(argc, argv);
2440 		/*NOTREACHED*/
2441 	}
2442 
2443 	if (meta_check_root(ep) != 0) {
2444 		mde_perror(ep, "");
2445 		md_exit(sp, 1);
2446 	}
2447 
2448 	/* snarf MDDB */
2449 	if (meta_setup_db_locations(ep) != 0) {
2450 		mde_perror(ep, "");
2451 		md_exit(sp, 1);
2452 	}
2453 
2454 	/*
2455 	 * If sname is a diskset - check for multi_node.
2456 	 * It is possible for sname to not exist.
2457 	 */
2458 	if (strcmp(sname, MD_LOCAL_NAME)) {
2459 		if ((sp = metasetname(sname, ep)) != NULL) {
2460 			/* Set exists - check for MN diskset */
2461 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2462 				mde_perror(ep, "");
2463 				md_exit(sp, 1);
2464 			}
2465 			if (MD_MNSET_DESC(sd)) {
2466 				/*
2467 				 * If a MN diskset always set multi_node
2468 				 * regardless of whether the -M option was
2469 				 * used or not (mflag).
2470 				 */
2471 				multi_node = 1;
2472 			} else {
2473 				/*
2474 				 * If a traditional diskset, mflag must
2475 				 * not be set.
2476 				 */
2477 				if (mflag) {
2478 					usage(sp, gettext(
2479 					    "-M option only allowed "
2480 					    "on multi-owner diskset"));
2481 				}
2482 			}
2483 		} else {
2484 			/*
2485 			 * Set name does not exist, set multi_node
2486 			 * based on -M option.
2487 			 */
2488 			if (mflag) {
2489 				multi_node = 1;
2490 			}
2491 		}
2492 	}
2493 
2494 	if (auto_take && multi_node) {
2495 		/* Can't mix multinode and auto-take on a diskset */
2496 		usage(sp,
2497 		    gettext("-A option not allowed on multi-owner diskset"));
2498 	}
2499 
2500 	/*
2501 	 * MN disksets don't use DCS clustering services, so
2502 	 * do not get primary_node for MN diskset since no command
2503 	 * proxying is done to Primary cluster node.  Do not proxy
2504 	 * MN diskset commands of join and withdraw when issued without
2505 	 * a valid setname.
2506 	 * For traditional disksets: proxy all commands except a take
2507 	 * and release.  Use first host listed as the host to send the
2508 	 * command to if there isn't already a primary
2509 	 */
2510 	if (strcmp(sname, MD_LOCAL_NAME) && (multi_node == 0) &&
2511 	    (cmd != take) && (cmd != release) &&
2512 	    (cmd != cluster) && (cmd != join) &&
2513 	    (cmd != withdraw) && (cmd != purge)) {
2514 		stat = sdssc_get_primary_host(sname, primary_node,
2515 		    SDSSC_NODE_NAME_LEN);
2516 		switch (stat) {
2517 			case SDSSC_ERROR:
2518 				return (0);
2519 
2520 			case SDSSC_NO_SERVICE:
2521 				if (hostname != SDSSC_PROXY_PRIMARY) {
2522 					(void) strlcpy(primary_node, hostname,
2523 					    SDSSC_NODE_NAME_LEN);
2524 				} else {
2525 					(void) memset(primary_node, '\0',
2526 					    SDSSC_NODE_NAME_LEN);
2527 				}
2528 				break;
2529 		}
2530 
2531 		/*
2532 		 * We've got a complicated decision here regarding
2533 		 * the hostname. If we didn't get a primary host
2534 		 * and a host name wasn't supplied on the command line
2535 		 * then we need to revert to SDSSC_PROXY_PRIMARY. Otherwise
2536 		 * use what's been found.
2537 		 */
2538 		if (sdssc_cmd_proxy(argc, argv,
2539 		    primary_node[0] == '\0' ?
2540 		    SDSSC_PROXY_PRIMARY : primary_node,
2541 		    &error) == SDSSC_PROXY_DONE) {
2542 			exit(error);
2543 		}
2544 	}
2545 
2546 	/* cluster-specific commands */
2547 	if (cmd == cluster) {
2548 		parse_cluster(argc, argv, multi_node);
2549 		/*NOTREACHED*/
2550 	}
2551 
2552 	/* join MultiNode diskset */
2553 	if (cmd == join) {
2554 		/*
2555 		 * If diskset specified, verify that it exists
2556 		 * and is a multinode diskset.
2557 		 */
2558 		if (strcmp(sname, MD_LOCAL_NAME)) {
2559 			if ((sp = metasetname(sname, ep)) == NULL) {
2560 				mde_perror(ep, "");
2561 				md_exit(sp, 1);
2562 			}
2563 
2564 			if (!multi_node) {
2565 				usage(sp, gettext(
2566 				    "-j option only allowed on "
2567 				    "multi-owner diskset"));
2568 			}
2569 		}
2570 		/*
2571 		 * Start mddoors daemon here.
2572 		 * mddoors itself takes care there will be only one
2573 		 * instance running, so starting it twice won't hurt
2574 		 */
2575 		(void) pclose(popen("/usr/lib/lvm/mddoors", "w"));
2576 		parse_joinset(argc, argv);
2577 		/*NOTREACHED*/
2578 	}
2579 
2580 	/* withdraw from MultiNode diskset */
2581 	if (cmd == withdraw) {
2582 		/*
2583 		 * If diskset specified, verify that it exists
2584 		 * and is a multinode diskset.
2585 		 */
2586 		if (strcmp(sname, MD_LOCAL_NAME)) {
2587 			if ((sp = metasetname(sname, ep)) == NULL) {
2588 				mde_perror(ep, "");
2589 				md_exit(sp, 1);
2590 			}
2591 
2592 			if (!multi_node) {
2593 				usage(sp, gettext(
2594 				    "-w option only allowed on "
2595 				    "multi-owner diskset"));
2596 			}
2597 		}
2598 		parse_withdrawset(argc, argv);
2599 		/*NOTREACHED*/
2600 	}
2601 
2602 	/* must have set for everything else */
2603 	if (strcmp(sname, MD_LOCAL_NAME) == 0)
2604 		usage(sp, gettext("setname must be specified"));
2605 
2606 	/* add hosts or drives */
2607 	if (cmd == add) {
2608 		/*
2609 		 * In the multi node case start mddoors daemon.
2610 		 * mddoors itself takes care there will be
2611 		 * only one instance running, so starting it twice won't hurt
2612 		 */
2613 		if (multi_node) {
2614 			(void) pclose(popen("/usr/lib/lvm/mddoors", "w"));
2615 		}
2616 
2617 		parse_add(argc, argv);
2618 		/*NOTREACHED*/
2619 	}
2620 
2621 	/* re-balance the replicas */
2622 	if (cmd == balance) {
2623 		parse_balance(argc, argv);
2624 		/*NOTREACHED*/
2625 	}
2626 
2627 	/* delete hosts or drives */
2628 	if (cmd == delete) {
2629 		parse_del(argc, argv);
2630 		/*NOTREACHED*/
2631 	}
2632 
2633 	/* check ownership */
2634 	if (cmd == isowner) {
2635 		parse_isowner(argc, argv);
2636 		/*NOTREACHED*/
2637 	}
2638 
2639 	/* purge the diskset */
2640 	if (cmd == purge) {
2641 		parse_purge(argc, argv);
2642 		/*NOTREACHED*/
2643 	}
2644 
2645 	/* query for data marks */
2646 	if (cmd == query) {
2647 		parse_query(argc, argv);
2648 		/*NOTREACHED*/
2649 	}
2650 
2651 	/* release ownership */
2652 	if (cmd == release) {
2653 		if (multi_node) {
2654 			/* Can't release multinode diskset */
2655 			usage(sp, gettext(
2656 			    "-r option not allowed on multi-owner diskset"));
2657 		} else {
2658 			parse_releaseset(argc, argv);
2659 			/*NOTREACHED*/
2660 		}
2661 	}
2662 
2663 	/* take ownership */
2664 	if (cmd == take) {
2665 		if (multi_node) {
2666 			/* Can't take multinode diskset */
2667 			usage(sp, gettext(
2668 			    "-t option not allowed on multi-owner diskset"));
2669 		} else {
2670 			parse_takeset(argc, argv);
2671 			/*NOTREACHED*/
2672 		}
2673 	}
2674 
2675 	/* take ownership of auto-take sets */
2676 	if (auto_take) {
2677 		parse_autotake(argc, argv);
2678 		/*NOTREACHED*/
2679 	}
2680 
2681 	/*NOTREACHED*/
2682 	return (0);
2683 }
2684