xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_metad_subr.c (revision 1687f56b4e67e718c63ad22d6206a0d6bd51820d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Just in case we're not in a build environment, make sure that
28  * TEXT_DOMAIN gets set to something.
29  */
30 #if !defined(TEXT_DOMAIN)
31 #define	TEXT_DOMAIN "SYS_TEST"
32 #endif
33 
34 /*
35  * interface between user land and the set records
36  */
37 
38 #include <meta.h>
39 #include <metad.h>
40 #include <sdssc.h>
41 #include <syslog.h>
42 #include <sys/cladm.h>
43 #include "meta_set_prv.h"
44 
45 #include <sys/sysevent/eventdefs.h>
46 #include <sys/sysevent/svm.h>
47 
48 static	md_set_record	*setrecords = NULL; /* head of cache linked list */
49 static	int		setsnarfdone = 0;
50 
51 typedef struct key_lst_t {
52 	side_t			kl_side;
53 	mdkey_t			kl_key;
54 	struct key_lst_t	*kl_next;
55 } key_lst_t;
56 
57 typedef struct ur_recid_lst {
58 	mddb_recid_t		url_recid;
59 	struct	ur_recid_lst	*url_nx;
60 } ur_recid_lst_t;
61 
62 static ur_recid_lst_t		*url_used = NULL;
63 static ur_recid_lst_t		*url_tode = NULL;
64 
65 static void
url_addl(ur_recid_lst_t ** urlpp,mddb_recid_t recid)66 url_addl(ur_recid_lst_t **urlpp, mddb_recid_t recid)
67 {
68 	/* Run to the end of the list */
69 	for (/* void */; (*urlpp != NULL); urlpp = &(*urlpp)->url_nx)
70 		if ((*urlpp)->url_recid == recid)
71 			return;
72 
73 	/* Add the new member */
74 	*urlpp = Zalloc(sizeof (**urlpp));
75 	if (*urlpp == NULL)
76 		return;
77 
78 	(*urlpp)->url_recid = recid;
79 }
80 
81 static int
url_findl(ur_recid_lst_t * urlp,mddb_recid_t recid)82 url_findl(ur_recid_lst_t *urlp, mddb_recid_t recid)
83 {
84 	while (urlp != NULL) {
85 		if (urlp->url_recid == recid)
86 			return (1);
87 		urlp = urlp->url_nx;
88 	}
89 	return (0);
90 }
91 
92 static void
url_freel(ur_recid_lst_t ** urlpp)93 url_freel(ur_recid_lst_t **urlpp)
94 {
95 	ur_recid_lst_t	*urlp;
96 	ur_recid_lst_t	*turlp;
97 
98 	for (turlp = *urlpp; turlp != NULL; turlp = urlp) {
99 		urlp = turlp->url_nx;
100 		Free(turlp);
101 	}
102 	*urlpp = (ur_recid_lst_t *)NULL;
103 }
104 
105 static int
ckncvt_set_record(mddb_userreq_t * reqp,md_error_t * ep)106 ckncvt_set_record(mddb_userreq_t *reqp, md_error_t *ep)
107 {
108 	mddb_userreq_t	req;
109 	md_set_record	*sr;
110 	int		recs[3];
111 
112 	if (reqp->ur_size == sizeof (*sr))
113 		return (0);
114 
115 	if (! md_in_daemon) {
116 		if (reqp->ur_size >= sizeof (*sr))
117 			return (0);
118 
119 		reqp->ur_data = (uintptr_t)Realloc((void *)(uintptr_t)
120 		    reqp->ur_data, sizeof (*sr));
121 		(void) memset(
122 		    ((char *)(uintptr_t)reqp->ur_data) + reqp->ur_size,
123 		    '\0', sizeof (*sr) - reqp->ur_size);
124 		reqp->ur_size = sizeof (*sr);
125 		return (0);
126 	}
127 
128 	/*
129 	 * If here, then the daemon is calling, and so the automatic
130 	 * conversion will be performed.
131 	 */
132 
133 	/* shorthand */
134 	req = *reqp;			/* structure assignment */
135 	sr = (md_set_record *)(uintptr_t)req.ur_data;
136 
137 	if (sr->sr_flags & MD_SR_CVT)
138 		return (0);
139 
140 	/* Leave multi-node set records alone */
141 	if (MD_MNSET_REC(sr)) {
142 		return (0);
143 	}
144 
145 	/* Mark the old record as converted */
146 	sr->sr_flags |= MD_SR_CVT;
147 
148 	METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
149 
150 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
151 		return (mdstealerror(ep, &req.ur_mde));
152 
153 	/* Create space for the new record */
154 	METAD_SETUP_SR(MD_DB_CREATE, 0);
155 	req.ur_size = sizeof (*sr);
156 
157 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
158 		return (mdstealerror(ep, &req.ur_mde));
159 
160 	/* Allocate the new record */
161 	sr = Zalloc(sizeof (*sr));
162 
163 	/* copy all the data from the record being converted */
164 	(void) memmove(sr, (void *)(uintptr_t)reqp->ur_data, reqp->ur_size);
165 	sr->sr_flags &= ~MD_SR_CVT;
166 
167 	/* adjust the selfid to point to the new record */
168 	sr->sr_selfid = req.ur_recid;
169 
170 	METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
171 	req.ur_size = sizeof (*sr);
172 	req.ur_data = (uintptr_t)sr;
173 
174 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
175 		Free(sr);
176 		return (mdstealerror(ep, &req.ur_mde));
177 	}
178 
179 	/* Commit the old and the new */
180 	recs[0] = ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid;
181 	recs[1] = sr->sr_selfid;
182 	recs[2] = 0;
183 
184 	METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
185 	req.ur_size = sizeof (recs);
186 	req.ur_data = (uintptr_t)recs;
187 
188 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
189 		Free(sr);
190 		return (mdstealerror(ep, &req.ur_mde));
191 	}
192 
193 	/* Add the the old record to the list of records to delete */
194 	url_addl(&url_tode,
195 	    ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid);
196 
197 	/* Free the old records space */
198 	Free((void *)(uintptr_t)reqp->ur_data);
199 
200 	/* Adjust the reqp structure to point to the new record and size */
201 	reqp->ur_recid = sr->sr_selfid;
202 	reqp->ur_size = sizeof (*sr);
203 	reqp->ur_data = (uintptr_t)sr;
204 
205 	return (0);
206 }
207 
208 mddb_userreq_t *
get_db_rec(md_ur_get_cmd_t cmd,set_t setno,mddb_type_t type,uint_t type2,mddb_recid_t * idp,md_error_t * ep)209 get_db_rec(
210 	md_ur_get_cmd_t	cmd,
211 	set_t		setno,
212 	mddb_type_t	type,
213 	uint_t		type2,
214 	mddb_recid_t	*idp,
215 	md_error_t	*ep
216 )
217 {
218 	mddb_userreq_t	*reqp = Zalloc(sizeof (*reqp));
219 	mdsetname_t	*sp;
220 	md_set_desc	*sd;
221 	int		ureq;
222 
223 	if ((sp = metasetnosetname(setno, ep)) == NULL) {
224 		Free(reqp);
225 		return (NULL);
226 	}
227 
228 	if (metaislocalset(sp)) {
229 		ureq = MD_DB_USERREQ;
230 	} else {
231 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
232 			Free(reqp);
233 			return (NULL);
234 		}
235 		ureq = MD_MNSET_DESC(sd) ? MD_MN_DB_USERREQ : MD_DB_USERREQ;
236 	}
237 
238 	reqp->ur_setno = setno;
239 	reqp->ur_type = type;
240 	reqp->ur_type2 = type2;
241 
242 	switch (cmd) {
243 	case MD_UR_GET_NEXT:
244 		reqp->ur_cmd = MD_DB_GETNEXTREC;
245 		reqp->ur_recid = *idp;
246 		if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
247 			(void) mdstealerror(ep, &reqp->ur_mde);
248 			Free(reqp);
249 			return (NULL);
250 		}
251 		*idp = reqp->ur_recid;
252 		break;
253 	case MD_UR_GET_WKEY:
254 		reqp->ur_recid = *idp;
255 		break;
256 	}
257 
258 	if (*idp <= 0) {
259 		Free(reqp);
260 		return (NULL);
261 	}
262 
263 	reqp->ur_cmd = MD_DB_GETSIZE;
264 	if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
265 		(void) mdstealerror(ep, &reqp->ur_mde);
266 		Free(reqp);
267 
268 		*idp = 0;
269 		return (NULL);
270 	}
271 
272 	reqp->ur_cmd = MD_DB_GETDATA;
273 	reqp->ur_data = (uintptr_t)Zalloc(reqp->ur_size);
274 	if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
275 		(void) mdstealerror(ep, &reqp->ur_mde);
276 		Free((void *)(uintptr_t)reqp->ur_data);
277 		Free(reqp);
278 		*idp = 0;
279 		return (NULL);
280 	}
281 
282 	switch (reqp->ur_type) {
283 	case MDDB_USER:
284 		switch (reqp->ur_type2) {
285 		case MDDB_UR_SR:
286 			if (ckncvt_set_record(reqp, ep)) {
287 				Free((void *)(uintptr_t)reqp->ur_data);
288 				Free(reqp);
289 				return (NULL);
290 			}
291 			break;
292 		}
293 		break;
294 	}
295 
296 	return (reqp);
297 }
298 
299 void *
get_ur_rec(set_t setno,md_ur_get_cmd_t cmd,uint_t type2,mddb_recid_t * idp,md_error_t * ep)300 get_ur_rec(
301 	set_t		setno,
302 	md_ur_get_cmd_t	cmd,
303 	uint_t		type2,
304 	mddb_recid_t	*idp,
305 	md_error_t	*ep
306 )
307 {
308 	mddb_userreq_t	*reqp = NULL;
309 	void		*ret_val;
310 
311 	assert(idp != NULL);
312 
313 	reqp = get_db_rec(cmd, setno, MDDB_USER, type2, idp, ep);
314 	if (reqp == NULL)
315 		return (NULL);
316 
317 	ret_val = (void *)(uintptr_t)reqp->ur_data;
318 	Free(reqp);
319 	return (ret_val);
320 }
321 
322 /*
323  * Called by rpc.metad on startup of disksets to cleanup
324  * the host entries associated with a diskset.  This is needed if
325  * a node failed or the metaset command was killed during the addition
326  * of a node to a diskset.
327  *
328  * This is called for all traditional disksets.
329  * This is only called for MNdisksets when in there is only one node
330  * in all of the MN disksets and this node is not running SunCluster.
331  * (Otherwise, the cleanup of the host entries is handled by a
332  * reconfig cycle that the SunCluster software calls).
333  */
334 static int
sr_hosts(md_set_record * sr)335 sr_hosts(md_set_record *sr)
336 {
337 	int		i;
338 	int		nid = 0;
339 	int		self_in_set = FALSE;
340 	md_error_t	xep = mdnullerror;
341 	md_mnnode_record	*nr;
342 	md_mnset_record		*mnsr;
343 
344 	if (MD_MNSET_REC(sr)) {
345 		mnsr = (struct md_mnset_record *)sr;
346 		nr = mnsr->sr_nodechain;
347 		/*
348 		 * Already guaranteed to be only 1 node in set which
349 		 * is mynode (done in sr_validate).
350 		 * Now, check if node is in the OK state.  If not in
351 		 * the OK state, leave self_in_set FALSE so that
352 		 * set will be removed.
353 		 */
354 		if (nr->nr_flags & MD_MN_NODE_OK)
355 			self_in_set = TRUE;
356 	} else {
357 		for (i = 0; i < MD_MAXSIDES; i++) {
358 			/* Skip empty slots */
359 			if (sr->sr_nodes[i][0] == '\0')
360 				continue;
361 
362 			/* Make sure we are in the set and skip this node */
363 			if (strcmp(sr->sr_nodes[i], mynode()) == 0) {
364 				self_in_set = TRUE;
365 				break;
366 			}
367 		}
368 	}
369 
370 	if ((self_in_set == FALSE) && (!(MD_MNSET_REC(sr)))) {
371 		/*
372 		 * Under some circumstances (/etc/cluster/nodeid file is
373 		 * missing) it is possible for the call to _cladm() to
374 		 * return 0 and a nid of 0. In this instance do not remove
375 		 * the set as it is Sun Cluster error that needs to be fixed.
376 		 */
377 		if (_cladm(CL_CONFIG, CL_NODEID, &nid) == 0 && nid > 0) {
378 
379 			/*
380 			 * See if we've got a node which has been booted in
381 			 * non-cluster mode. If true the nodeid will match
382 			 * one of the sr_nodes values because the conversion
383 			 * from nodeid to hostname failed to occur.
384 			 */
385 			for (i = 0; i < MD_MAXSIDES; i++) {
386 				if (sr->sr_nodes[i][0] == 0)
387 					continue;
388 				if (atoi(sr->sr_nodes[i]) == nid)
389 					self_in_set = TRUE;
390 			}
391 
392 			/* If we aren't in the set, delete the set */
393 			if (self_in_set == FALSE) {
394 				syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
395 				    "Removing set %s from database\n"),
396 				    sr->sr_setname);
397 				s_delset(sr->sr_setname, &xep);
398 				if (! mdisok(&xep))
399 					mdclrerror(&xep);
400 				return (1);
401 			}
402 		} else {
403 			/*
404 			 * Send a message to syslog and return without
405 			 * deleting any sets
406 			 */
407 			syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
408 			    "Call to _cladm failed for set %s nodeid %d\n"),
409 			    sr->sr_setname, nid);
410 			return (1);
411 		}
412 	}
413 	return (0);
414 }
415 
416 void
sr_del_drv(md_set_record * sr,mddb_recid_t recid)417 sr_del_drv(md_set_record *sr, mddb_recid_t recid)
418 {
419 	mddb_userreq_t		req;
420 	md_error_t		xep = mdnullerror;
421 
422 	if (!s_ownset(sr->sr_setno, &xep)) {
423 		if (! mdisok(&xep))
424 			mdclrerror(&xep);
425 		goto skip;
426 	}
427 
428 	/* delete the replicas? */
429 	/* release ownership of the drive? */
430 	/* NOTE: We may not have a name, so both of the above are ugly! */
431 
432 skip:
433 	(void) memset(&req, 0, sizeof (req));
434 	METAD_SETUP_DR(MD_DB_DELETE, recid)
435 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
436 		mdclrerror(&req.ur_mde);
437 
438 	dr_cache_del(sr, recid);
439 }
440 
441 static void
sr_drvs(md_set_record * sr)442 sr_drvs(md_set_record *sr)
443 {
444 	md_drive_record		*dr;
445 	int			i;
446 	int			modified = 0;
447 	int			sidesok;
448 	mdnm_params_t		nm;
449 	static	char		device_name[MAXPATHLEN];
450 	md_error_t		xep = mdnullerror;
451 	md_mnnode_record	*nr;
452 	md_mnset_record		*mnsr;
453 
454 	for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
455 		/* If we were mid-add, cleanup */
456 		if ((dr->dr_flags & MD_DR_ADD)) {
457 			sr_del_drv(sr, dr->dr_selfid);
458 			modified++;
459 			continue;
460 		}
461 
462 		sidesok = TRUE;
463 		if (MD_MNSET_REC(sr)) {
464 			mnsr = (md_mnset_record *)sr;
465 			nr = mnsr->sr_nodechain;
466 			/*
467 			 * MultiNode disksets only have entries for
468 			 * their side in the local set.  Verify
469 			 * that drive has a name associated with
470 			 * this node's side.
471 			 */
472 			while (nr) {
473 				/* Find my node */
474 				if (strcmp(mynode(), nr->nr_nodename) != 0) {
475 					nr = nr->nr_next;
476 					continue;
477 				}
478 
479 				(void) memset(&nm, '\0', sizeof (nm));
480 				nm.setno = MD_LOCAL_SET;
481 				nm.side = nr->nr_nodeid;
482 				nm.key = dr->dr_key;
483 				nm.devname = (uintptr_t)device_name;
484 
485 				if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
486 				    NULL) != 0) {
487 					if (! mdissyserror(&nm.mde, ENOENT)) {
488 						mdclrerror(&nm.mde);
489 						return;
490 					}
491 				}
492 
493 				/*
494 				 * If entry is found for this node, then
495 				 * break out of loop walking through
496 				 * node list.  For a multi-node diskset,
497 				 * there should only be an entry for
498 				 * this node.
499 				 */
500 				if (nm.key != MD_KEYWILD &&
501 				    ! mdissyserror(&nm.mde, ENOENT)) {
502 					break;
503 				}
504 
505 				/*
506 				 * If entry is not found for this node,
507 				 * then delete the drive.  No need to
508 				 * continue through the node loop since
509 				 * our node has already been found.
510 				 */
511 				sidesok = FALSE;
512 				mdclrerror(&nm.mde);
513 
514 				/* If we are missing a sidename, cleanup */
515 				sr_del_drv(sr, dr->dr_selfid);
516 				modified++;
517 
518 				break;
519 			}
520 		} else  {
521 			for (i = 0; i < MD_MAXSIDES; i++) {
522 				/* Skip empty slots */
523 				if (sr->sr_nodes[i][0] == '\0')
524 					continue;
525 
526 				(void) memset(&nm, '\0', sizeof (nm));
527 				nm.setno = MD_LOCAL_SET;
528 				nm.side = i + SKEW;
529 				nm.key = dr->dr_key;
530 				nm.devname = (uintptr_t)device_name;
531 
532 				if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
533 				    NULL) != 0) {
534 					if (! mdissyserror(&nm.mde, ENOENT)) {
535 						mdclrerror(&nm.mde);
536 						return;
537 					}
538 				}
539 
540 				if (nm.key != MD_KEYWILD &&
541 				    ! mdissyserror(&nm.mde, ENOENT))
542 					continue;
543 
544 				sidesok = FALSE;
545 				mdclrerror(&nm.mde);
546 
547 				/* If we are missing a sidename, cleanup */
548 				sr_del_drv(sr, dr->dr_selfid);
549 				modified++;
550 
551 				break;
552 			}
553 		}
554 
555 		if (sidesok == FALSE)
556 			continue;
557 
558 		/*
559 		 * If we got this far, the drive record is either in the OK
560 		 * or DEL state, if it is in the DEL state and the sidenames
561 		 * all checked out, then we will make it OK.
562 		 */
563 		if ((dr->dr_flags & MD_DR_OK))
564 			continue;
565 
566 		dr->dr_flags = MD_DR_OK;
567 
568 		modified++;
569 	}
570 
571 	if (modified) {
572 		commitset(sr, FALSE, &xep);
573 		if (! mdisok(&xep))
574 			mdclrerror(&xep);
575 	}
576 }
577 
578 static void
add_key_to_lst(key_lst_t ** klpp,side_t side,mdkey_t key)579 add_key_to_lst(key_lst_t **klpp, side_t side, mdkey_t key)
580 {
581 	key_lst_t	*klp;
582 
583 	assert(klpp != NULL);
584 
585 	for (/* void */; *klpp != NULL; klpp = &(*klpp)->kl_next)
586 		/* void */;
587 
588 	/* allocate new list element */
589 	klp = *klpp = Zalloc(sizeof (*klp));
590 
591 	klp->kl_side = side;
592 	klp->kl_key  = key;
593 }
594 
595 #ifdef DUMPKEYLST
596 static void
pr_key_lst(char * tag,key_lst_t * klp)597 pr_key_lst(char *tag, key_lst_t *klp)
598 {
599 	key_lst_t	*tklp;
600 
601 	md_eprintf("Tag=%s\n", tag);
602 	for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
603 		md_eprintf("side=%d, key=%lu\n", tklp->kl_side, tklp->kl_key);
604 }
605 #endif	/* DUMPKEYLST */
606 
607 static int
key_in_key_lst(key_lst_t * klp,side_t side,mdkey_t key)608 key_in_key_lst(key_lst_t *klp, side_t side, mdkey_t key)
609 {
610 	key_lst_t	*tklp;
611 
612 	for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
613 		if (tklp->kl_side == side && tklp->kl_key == key)
614 			return (1);
615 
616 	return (0);
617 }
618 
619 static void
destroy_key_lst(key_lst_t ** klpp)620 destroy_key_lst(key_lst_t **klpp)
621 {
622 	key_lst_t	*tklp, *klp;
623 
624 	assert(klpp != NULL);
625 
626 	tklp = klp = *klpp;
627 	while (klp != NULL) {
628 		tklp = klp;
629 		klp = klp->kl_next;
630 		Free(tklp);
631 	}
632 	*klpp = NULL;
633 }
634 
635 static void
sr_sidenms(void)636 sr_sidenms(void)
637 {
638 	md_drive_record		*dr;
639 	md_set_record		*sr;
640 	key_lst_t		*use = NULL;
641 	mdnm_params_t		nm;
642 	int			i;
643 	md_mnset_record		*mnsr;
644 	md_mnnode_record	*nr;
645 	side_t			myside = 0;
646 
647 	/*
648 	 * We now go through the list of set and drive records collecting
649 	 * the key/side pairs that are being used.
650 	 */
651 	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
652 		/*
653 		 * To handle the multi-node diskset case, get the sideno
654 		 * associated with this node.  This sideno will be the
655 		 * same across all multi-node disksets.
656 		 */
657 		if ((myside == 0) && (MD_MNSET_REC(sr))) {
658 			mnsr = (struct md_mnset_record *)sr;
659 			nr = mnsr->sr_nodechain;
660 			while (nr) {
661 				if (strcmp(mynode(), nr->nr_nodename) == 0) {
662 					myside = nr->nr_nodeid;
663 					break;
664 				}
665 				nr = nr->nr_next;
666 			}
667 			/*
668 			 * If this node is not in this MNset -
669 			 * then skip this set.
670 			 */
671 			if (!nr) {
672 				continue;
673 			}
674 		}
675 
676 		for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
677 			if (MD_MNSET_REC(sr)) {
678 				/*
679 				 * There are no non-local sidenames in the
680 				 * local set for a multi-node diskset.
681 				 */
682 				add_key_to_lst(&use, myside, dr->dr_key);
683 			} else {
684 				for (i = 0; i < MD_MAXSIDES; i++) {
685 					/* Skip empty slots */
686 					if (sr->sr_nodes[i][0] == '\0')
687 						continue;
688 
689 					add_key_to_lst(&use, i + SKEW,
690 					    dr->dr_key);
691 				}
692 			}
693 		}
694 	}
695 
696 #ifdef DUMPKEYLST
697 	pr_key_lst("use", use);
698 #endif	/* DUMPKEYLST */
699 
700 	/*
701 	 * We take the list above and get all non-local sidenames, checking
702 	 * each to see if they are in use, if they are not used, we delete them.
703 	 * Do the check for myside to cover multinode disksets.
704 	 * Then do the check for MD_MAXSIDES to cover non-multinode disksets.
705 	 * If any multi-node disksets were present, myside would be non-zero.
706 	 * myside is the same for all multi-node disksets for this node.
707 	 */
708 	if (myside) {
709 		(void) memset(&nm, '\0', sizeof (nm));
710 		nm.setno = MD_LOCAL_SET;
711 		nm.side = myside;
712 		nm.key = MD_KEYWILD;
713 
714 		/*CONSTCOND*/
715 		while (1) {
716 			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
717 			    NULL) != 0) {
718 				mdclrerror(&nm.mde);
719 				break;
720 			}
721 
722 			if (nm.key == MD_KEYWILD)
723 				break;
724 
725 			if (! key_in_key_lst(use, nm.side, nm.key)) {
726 				if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
727 				    NULL) != 0) {
728 					mdclrerror(&nm.mde);
729 					continue;
730 				}
731 			}
732 		}
733 	}
734 	/* Now handle the non-multinode disksets */
735 	for (i = 0; i < MD_MAXSIDES; i++) {
736 		(void) memset(&nm, '\0', sizeof (nm));
737 		nm.setno = MD_LOCAL_SET;
738 		nm.side = i + SKEW;
739 		nm.key = MD_KEYWILD;
740 
741 		/*CONSTCOND*/
742 		while (1) {
743 			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
744 			    NULL) != 0) {
745 				mdclrerror(&nm.mde);
746 				break;
747 			}
748 
749 			if (nm.key == MD_KEYWILD)
750 				break;
751 
752 			if (! key_in_key_lst(use, nm.side, nm.key)) {
753 				if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
754 				    NULL) != 0) {
755 					mdclrerror(&nm.mde);
756 					continue;
757 				}
758 			}
759 		}
760 	}
761 
762 	/* Cleanup */
763 	destroy_key_lst(&use);
764 }
765 
766 void
sr_validate(void)767 sr_validate(void)
768 {
769 	md_set_record			*sr;
770 	md_error_t			xep = mdnullerror;
771 	int				mnset_single_node;
772 	md_mnnode_record		*nr;
773 	md_mnset_record			*mnsr;
774 
775 	assert(setsnarfdone != 0);
776 
777 	/* We have validated the records already */
778 	if (setsnarfdone == 3)
779 		return;
780 
781 	/*
782 	 * Check if we are in a single node non-SC3.x environmemnt
783 	 */
784 	mnset_single_node = meta_mn_singlenode();
785 	/*
786 	 * If a possible single_node situation, verify that all
787 	 * MN disksets have only one node (which is mynode()).
788 	 */
789 	if (mnset_single_node) {
790 		for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
791 			if (MD_MNSET_REC(sr)) {
792 				mnsr = (struct md_mnset_record *)sr;
793 				nr = mnsr->sr_nodechain;
794 				/*
795 				 * If next pointer is non-null (more than
796 				 * one node in list) or if the single node
797 				 * isn't my node - reset single node flag.
798 				 */
799 				if ((nr->nr_next) ||
800 				    (strcmp(nr->nr_nodename, mynode()) != 0)) {
801 					mnset_single_node = 0;
802 					break;
803 				}
804 			}
805 		}
806 	}
807 
808 	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
809 		/*
810 		 * If a MN diskset and not in the single node
811 		 * situation, then don't validate the MN set.
812 		 * This is done during a reconfig cycle since all
813 		 * nodes must take the same action.
814 		 */
815 		if (MD_MNSET_REC(sr) && (mnset_single_node == 0))
816 			continue;
817 
818 		/* Since we do "partial" snarf's, we only check new entries */
819 		if (! (sr->sr_flags & MD_SR_CHECK))
820 			continue;
821 
822 		/* If we were mid-add, cleanup */
823 		if ((sr->sr_flags & MD_SR_ADD)) {
824 			s_delset(sr->sr_setname, &xep);
825 			if (! mdisok(&xep))
826 				mdclrerror(&xep);
827 			continue;
828 		}
829 
830 		/* Make sure we are in the set. */
831 		if (sr_hosts(sr))
832 			continue;
833 
834 		/* Check has been done, clear the flag */
835 		if ((sr->sr_flags & MD_SR_CHECK))
836 			sr->sr_flags &= ~MD_SR_CHECK;
837 
838 		/*
839 		 * If we got here, we are in the set, make sure the flags make
840 		 * sense.
841 		 */
842 		if (! (sr->sr_flags & MD_SR_OK)) {
843 			sr->sr_flags &= ~MD_SR_STATE_FLAGS;
844 			sr->sr_flags |= MD_SR_OK;
845 			commitset(sr, FALSE, &xep);
846 			if (! mdisok(&xep))
847 				mdclrerror(&xep);
848 		}
849 
850 		/* Make sure all the drives are in a stable state. */
851 		sr_drvs(sr);
852 	}
853 
854 	/* Cleanup any stray sidenames */
855 	sr_sidenms();
856 
857 	setsnarfdone = 3;
858 }
859 
860 static md_set_record *
sr_in_cache(mddb_recid_t recid)861 sr_in_cache(mddb_recid_t recid)
862 {
863 	md_set_record *tsr;
864 
865 	for (tsr = setrecords; tsr != NULL; tsr = tsr->sr_next)
866 		if (tsr->sr_selfid == recid)
867 			return (tsr);
868 	return ((md_set_record *)NULL);
869 }
870 
871 int
set_snarf(md_error_t * ep)872 set_snarf(md_error_t *ep)
873 {
874 	md_set_record			*sr;
875 	md_mnset_record			*mnsr;
876 	md_set_record			*tsr;
877 	md_drive_record			*dr;
878 	mddb_userreq_t			*reqp;
879 	ur_recid_lst_t			*urlp;
880 	mddb_recid_t			id;
881 	mddb_recid_t			*p;
882 	md_error_t			xep = mdnullerror;
883 	md_mnnode_record		*nr;
884 	mddb_set_node_params_t		snp;
885 	int				nodecnt;
886 	mndiskset_membershiplist_t	 *nl, *nl2;
887 
888 	/* We have done the snarf call */
889 	if (setsnarfdone != 0)
890 		return (0);
891 
892 	if (meta_setup_db_locations(ep) != 0) {
893 		if (! mdismddberror(ep, MDE_DB_STALE))
894 			return (-1);
895 		mdclrerror(ep);
896 	}
897 
898 	/*
899 	 * Get membershiplist from API routine.
900 	 * If there's an error, just use a NULL
901 	 * nodelist.
902 	 */
903 	if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
904 		nodecnt = 0;  /* no nodes are alive */
905 		nl = NULL;
906 		mdclrerror(ep);
907 	}
908 
909 	/* Let sr_cache_add and dr_cache_add know we are doing the snarf */
910 	setsnarfdone = 1;
911 
912 	/* Go get the set records */
913 	id = 0;
914 	while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR,
915 	    &id, ep)) != NULL) {
916 		sr->sr_next = NULL;
917 		sr->sr_drivechain = NULL;
918 
919 		/*
920 		 * Cluster nodename support
921 		 * Convert nodeid -> nodename
922 		 * Don't do this for MN disksets since we've already stored
923 		 * both the nodeid and name.
924 		 */
925 		if (!(MD_MNSET_REC(sr)))
926 			sdssc_cm_sr_nid2nm(sr);
927 
928 		/* If we were mid-cvt, cleanup */
929 		if (sr->sr_flags & MD_SR_CVT) {
930 			/* If the daemon is calling, cleanup */
931 			if (md_in_daemon)
932 				url_addl(&url_tode, sr->sr_selfid);
933 			continue;
934 		}
935 
936 		if (md_in_daemon)
937 			url_addl(&url_used, sr->sr_selfid);
938 
939 		/* Skip cached records */
940 		tsr = sr_in_cache(sr->sr_selfid);
941 		if (tsr != (md_set_record *)NULL) {
942 			if (MD_MNSET_REC(sr)) {
943 				mnsr = (struct md_mnset_record *)sr;
944 				Free(mnsr);
945 			} else {
946 				Free(sr);
947 			}
948 			if (md_in_daemon)
949 				for (dr = tsr->sr_drivechain;
950 				    dr != (md_drive_record *)NULL;
951 				    dr = dr->dr_next)
952 					url_addl(&url_used, dr->dr_selfid);
953 			continue;
954 		}
955 
956 		/* Mark the record as one to be checked */
957 		sr->sr_flags |= MD_SR_CHECK;
958 
959 		sr_cache_add(sr);
960 
961 		/* If MNdiskset, go get the node records */
962 		if (MD_MNSET_REC(sr)) {
963 			mnsr = (struct md_mnset_record *)sr;
964 			mnsr->sr_nodechain = NULL;
965 			p = &mnsr->sr_noderec;
966 			while ((nr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
967 			    MDDB_UR_NR, p, ep)) != NULL) {
968 				nr->nr_next = NULL;
969 
970 				if (md_in_daemon)
971 					url_addl(&url_used, nr->nr_selfid);
972 
973 				/*
974 				 * Turn off ALIVE node flag based on member
975 				 * list.
976 				 * If ALIVE flag is not set, reset OWN flag.
977 				 * If this node is mynode, set the OWN flag
978 				 * to match the ownership of the diskset.
979 				 */
980 				if (md_in_daemon) {
981 					nr->nr_flags &= ~MD_MN_NODE_ALIVE;
982 					nl2 = nl;
983 					while (nl2) {
984 						/*
985 						 * If in member list,
986 						 * set alive.
987 						 */
988 						if (nl2->msl_node_id ==
989 						    nr->nr_nodeid) {
990 							nr->nr_flags |=
991 							    MD_MN_NODE_ALIVE;
992 							break;
993 						}
994 						nl2 = nl2->next;
995 					}
996 					/*
997 					 * If mynode is in member list, then
998 					 * check to see if set is snarfed.
999 					 * If set snarfed, set own flag;
1000 					 * otherwise reset it.
1001 					 * Don't change master even if
1002 					 * node isn't an owner node, since
1003 					 * node may be master, but hasn't
1004 					 * joined the set yet.
1005 					 */
1006 					if (nr->nr_flags & MD_MN_NODE_ALIVE) {
1007 					    if (strcmp(nr->nr_nodename,
1008 						mynode()) == 0) {
1009 						    if (s_ownset(
1010 							mnsr->sr_setno, ep)) {
1011 							nr->nr_flags |=
1012 							    MD_MN_NODE_OWN;
1013 						    } else {
1014 							nr->nr_flags &=
1015 							    ~MD_MN_NODE_OWN;
1016 						    }
1017 					    }
1018 					} else {
1019 					    if (strcmp(nr->nr_nodename,
1020 						mynode()) == 0) {
1021 						/*
1022 						 * If my node isn't in member
1023 						 * list then reset master.
1024 						 */
1025 						mnsr = (struct
1026 						    md_mnset_record *)sr;
1027 						mnsr->sr_master_nodeid =
1028 							MD_MN_INVALID_NID;
1029 						mnsr->sr_master_nodenm[0] =
1030 							'\0';
1031 					    }
1032 					    nr->nr_flags &= ~MD_MN_NODE_OWN;
1033 					}
1034 				}
1035 
1036 				/*
1037 				 * Must grab nr_nextrec now since
1038 				 * mnnr_cache_add may change it
1039 				 * (mnnr_cache_add is storing the nodes in
1040 				 * an ascending nodeid order list in order
1041 				 * to support reconfig).
1042 				 */
1043 				if (nr->nr_nextrec != 0)
1044 					p = &nr->nr_nextrec;
1045 				else
1046 					p = NULL;
1047 
1048 				mnnr_cache_add((struct md_mnset_record *)sr,
1049 				    nr);
1050 
1051 				if ((md_in_daemon) &&
1052 				    (strcmp(nr->nr_nodename, mynode()) == 0)) {
1053 					(void) memset(&snp, 0, sizeof (snp));
1054 					snp.sn_nodeid = nr->nr_nodeid;
1055 					snp.sn_setno = mnsr->sr_setno;
1056 					if (metaioctl(MD_MN_SET_NODEID, &snp,
1057 					    &snp.sn_mde, NULL) != 0) {
1058 						(void) mdstealerror(ep,
1059 						    &snp.sn_mde);
1060 					}
1061 				}
1062 
1063 				if (p == NULL)
1064 					break;
1065 			}
1066 			if (! mdisok(ep)) {
1067 				if (! mdissyserror(ep, ENOENT))
1068 					goto out;
1069 				mdclrerror(ep);
1070 			}
1071 		}
1072 
1073 		if (sr->sr_driverec == 0)
1074 			continue;
1075 
1076 		/* Go get the drive records */
1077 		p = &sr->sr_driverec;
1078 		while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
1079 		    MDDB_UR_DR, p, ep)) != NULL) {
1080 			dr->dr_next = NULL;
1081 
1082 			if (md_in_daemon)
1083 				url_addl(&url_used, dr->dr_selfid);
1084 
1085 			dr_cache_add(sr, dr);
1086 
1087 			if (dr->dr_nextrec == 0)
1088 				break;
1089 
1090 			p = &dr->dr_nextrec;
1091 		}
1092 		if (! mdisok(ep)) {
1093 			if (! mdissyserror(ep, ENOENT))
1094 				goto out;
1095 			mdclrerror(ep);
1096 			/*
1097 			 * If dr_nextrec was not valid, or we had some
1098 			 * problem getting the record, we end up here.
1099 			 * get_ur_rec() zeroes the recid we passed in,
1100 			 * if we had a failure getting a record using a key,
1101 			 * so we simply commit the set record and valid
1102 			 * drive records, if this fails, we hand an error
1103 			 * back to the caller.
1104 			 */
1105 			commitset(sr, FALSE, ep);
1106 			if (! mdisok(ep))
1107 				goto out;
1108 		}
1109 	}
1110 	if (! mdisok(ep)) {
1111 		if (! mdissyserror(ep, ENOENT))
1112 			goto out;
1113 		mdclrerror(ep);
1114 	}
1115 
1116 	/*
1117 	 * If the daemon called, go through the USER records and cleanup
1118 	 * any that are not used by valid sets.
1119 	 */
1120 	if (md_in_daemon) {
1121 		id = 0;
1122 		/* Make a list of records to delete */
1123 		while ((reqp = get_db_rec(MD_UR_GET_NEXT, MD_LOCAL_SET,
1124 		    MDDB_USER, 0, &id, ep)) != NULL) {
1125 			if (reqp->ur_type2 != MDDB_UR_SR &&
1126 			    reqp->ur_type2 != MDDB_UR_DR) {
1127 				Free((void *)(uintptr_t)reqp->ur_data);
1128 				Free(reqp);
1129 				continue;
1130 			}
1131 			if (! url_findl(url_used, reqp->ur_recid))
1132 				url_addl(&url_tode, reqp->ur_recid);
1133 			Free((void *)(uintptr_t)reqp->ur_data);
1134 			Free(reqp);
1135 		}
1136 		if (! mdisok(ep)) {
1137 			if (! mdissyserror(ep, ENOENT))
1138 				goto out;
1139 			mdclrerror(ep);
1140 		}
1141 
1142 		/* Delete all the delete listed records */
1143 		for (urlp = url_tode; urlp != NULL; urlp = urlp->url_nx) {
1144 			s_delrec(urlp->url_recid, &xep);
1145 			if (! mdisok(&xep))
1146 				mdclrerror(&xep);
1147 		}
1148 	}
1149 
1150 	url_freel(&url_used);
1151 	url_freel(&url_tode);
1152 
1153 	if (nodecnt)
1154 		meta_free_nodelist(nl);
1155 
1156 	/* Mark the snarf complete */
1157 	setsnarfdone = 2;
1158 	return (0);
1159 
1160 out:
1161 	url_freel(&url_used);
1162 	url_freel(&url_tode);
1163 
1164 	sr_cache_flush(1);
1165 
1166 	if (nodecnt)
1167 		meta_free_nodelist(nl);
1168 
1169 	/* Snarf failed, reset state */
1170 	setsnarfdone = 0;
1171 
1172 	return (-1);
1173 }
1174 
1175 void
sr_cache_add(md_set_record * sr)1176 sr_cache_add(md_set_record *sr)
1177 {
1178 	md_set_record *tsr;
1179 
1180 	assert(setsnarfdone != 0);
1181 
1182 	if (setrecords == NULL) {
1183 		setrecords = sr;
1184 		return;
1185 	}
1186 
1187 	for (tsr = setrecords; tsr->sr_next != NULL; tsr = tsr->sr_next)
1188 		/* void */;
1189 	tsr->sr_next = sr;
1190 }
1191 
1192 void
sr_cache_del(mddb_recid_t recid)1193 sr_cache_del(mddb_recid_t recid)
1194 {
1195 	md_set_record	*sr, *tsr;
1196 	md_mnset_record	*mnsr;
1197 
1198 	assert(setsnarfdone != 0);
1199 
1200 	for (sr = tsr = setrecords; sr != NULL; tsr = sr, sr = sr->sr_next) {
1201 		if (sr->sr_selfid != recid)
1202 			continue;
1203 		if (sr == setrecords)
1204 			setrecords = sr->sr_next;
1205 		else
1206 			tsr->sr_next = sr->sr_next;
1207 		if (MD_MNSET_REC(sr)) {
1208 			mnsr = (struct md_mnset_record *)sr;
1209 			Free(mnsr);
1210 		} else {
1211 			Free(sr);
1212 		}
1213 		break;
1214 	}
1215 	if (setrecords == NULL)
1216 		setsnarfdone = 0;
1217 }
1218 
1219 void
dr_cache_add(md_set_record * sr,md_drive_record * dr)1220 dr_cache_add(md_set_record *sr, md_drive_record *dr)
1221 {
1222 	md_drive_record	*tdr;
1223 
1224 	assert(setsnarfdone != 0);
1225 
1226 	assert(sr != NULL);
1227 
1228 	if (sr->sr_drivechain == NULL) {
1229 		sr->sr_drivechain = dr;
1230 		sr->sr_driverec = dr->dr_selfid;
1231 		return;
1232 	}
1233 
1234 	for (tdr = sr->sr_drivechain; tdr->dr_next != NULL; tdr = tdr->dr_next)
1235 		/* void */;
1236 
1237 	tdr->dr_next = dr;
1238 	tdr->dr_nextrec = dr->dr_selfid;
1239 }
1240 
1241 void
dr_cache_del(md_set_record * sr,mddb_recid_t recid)1242 dr_cache_del(md_set_record *sr, mddb_recid_t recid)
1243 {
1244 	md_drive_record *dr;
1245 	md_drive_record *tdr;
1246 
1247 	assert(setsnarfdone != 0);
1248 
1249 	assert(sr != NULL);
1250 
1251 	for (dr = tdr = sr->sr_drivechain; dr != NULL;
1252 	    tdr = dr, dr = dr->dr_next) {
1253 		if (dr->dr_selfid != recid)
1254 			continue;
1255 
1256 		if (dr == sr->sr_drivechain) {
1257 			sr->sr_drivechain = dr->dr_next;
1258 			sr->sr_driverec = dr->dr_nextrec;
1259 		} else {
1260 			tdr->dr_next = dr->dr_next;
1261 			tdr->dr_nextrec = dr->dr_nextrec;
1262 		}
1263 		Free(dr);
1264 		break;
1265 	}
1266 }
1267 
1268 /*
1269  * Nodes must be kept in ascending node id order in order to
1270  * support reconfig.
1271  *
1272  * This routine may change nr->nr_next and nr->nr_nextrec.
1273  */
1274 void
mnnr_cache_add(md_mnset_record * mnsr,md_mnnode_record * nr)1275 mnnr_cache_add(md_mnset_record *mnsr, md_mnnode_record *nr)
1276 {
1277 	md_mnnode_record	*tnr, *tnr_prev;
1278 
1279 	assert(mnsr != NULL);
1280 
1281 	if (mnsr->sr_nodechain == NULL) {
1282 		mnsr->sr_nodechain = nr;
1283 		mnsr->sr_noderec = nr->nr_selfid;
1284 		return;
1285 	}
1286 
1287 	/*
1288 	 * If new_record->nodeid < first_record->nodeid,
1289 	 * put new_record at beginning of list.
1290 	 */
1291 	if (nr->nr_nodeid < mnsr->sr_nodechain->nr_nodeid) {
1292 		nr->nr_next = mnsr->sr_nodechain;
1293 		nr->nr_nextrec = mnsr->sr_noderec;
1294 		mnsr->sr_nodechain = nr;
1295 		mnsr->sr_noderec = nr->nr_selfid;
1296 		return;
1297 	}
1298 
1299 	/*
1300 	 * Walk list looking for place to insert record.
1301 	 */
1302 
1303 	tnr_prev = mnsr->sr_nodechain;
1304 	tnr = tnr_prev->nr_next;
1305 	while (tnr) {
1306 		/* Insert new record between tnr_prev and tnr */
1307 		if (nr->nr_nodeid < tnr->nr_nodeid) {
1308 			nr->nr_next = tnr;
1309 			nr->nr_nextrec = tnr->nr_selfid; /* tnr's recid */
1310 			tnr_prev->nr_next = nr;
1311 			tnr_prev->nr_nextrec = nr->nr_selfid;
1312 			return;
1313 		}
1314 		tnr_prev = tnr;
1315 		tnr = tnr->nr_next;
1316 	}
1317 
1318 	/*
1319 	 * Add record to end of list.
1320 	 */
1321 	tnr_prev->nr_next = nr;
1322 	tnr_prev->nr_nextrec = nr->nr_selfid;
1323 }
1324 
1325 void
mnnr_cache_del(md_mnset_record * mnsr,mddb_recid_t recid)1326 mnnr_cache_del(md_mnset_record *mnsr, mddb_recid_t recid)
1327 {
1328 	md_mnnode_record *nr;
1329 	md_mnnode_record *tnr;
1330 
1331 	assert(mnsr != NULL);
1332 
1333 	tnr = 0;
1334 	nr = mnsr->sr_nodechain;
1335 	while (nr) {
1336 		if (nr->nr_selfid != recid) {
1337 			tnr = nr;
1338 			nr = nr->nr_next;
1339 			continue;
1340 		}
1341 
1342 		if (nr == mnsr->sr_nodechain) {
1343 			mnsr->sr_nodechain = nr->nr_next;
1344 			mnsr->sr_noderec = nr->nr_nextrec;
1345 		} else {
1346 			tnr->nr_next = nr->nr_next;
1347 			tnr->nr_nextrec = nr->nr_nextrec;
1348 		}
1349 		Free(nr);
1350 		break;
1351 	}
1352 }
1353 
1354 int
metad_isautotakebyname(char * setname)1355 metad_isautotakebyname(char *setname)
1356 {
1357 	md_error_t	error = mdnullerror;
1358 	md_set_record	*sr;
1359 
1360 	if (md_in_daemon) {
1361 		assert(setsnarfdone != 0);
1362 	} else if (set_snarf(&error)) {
1363 		mdclrerror(&error);
1364 		return (0);
1365 	}
1366 
1367 	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
1368 		if (strcmp(setname, sr->sr_setname) == 0) {
1369 			if (sr->sr_flags & MD_SR_AUTO_TAKE)
1370 				return (1);
1371 			return (0);
1372 		}
1373 	}
1374 
1375 	return (0);
1376 }
1377 
1378 int
metad_isautotakebynum(set_t setno)1379 metad_isautotakebynum(set_t setno)
1380 {
1381 	md_error_t	error = mdnullerror;
1382 	md_set_record	*sr;
1383 
1384 	if (md_in_daemon) {
1385 		assert(setsnarfdone != 0);
1386 	} else if (set_snarf(&error)) {
1387 		mdclrerror(&error);
1388 		return (0);
1389 	}
1390 
1391 	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
1392 		if (setno == sr->sr_setno) {
1393 			if (sr->sr_flags & MD_SR_AUTO_TAKE)
1394 				return (1);
1395 			return (0);
1396 		}
1397 	}
1398 
1399 	return (0);
1400 }
1401 
1402 md_set_record *
metad_getsetbyname(char * setname,md_error_t * ep)1403 metad_getsetbyname(char *setname, md_error_t *ep)
1404 {
1405 	md_set_record	*sr;
1406 	char		buf[100];
1407 
1408 	assert(setsnarfdone != 0);
1409 
1410 	for (sr = setrecords; sr != NULL; sr = sr->sr_next)
1411 		if (strcmp(setname, sr->sr_setname) == 0)
1412 			return (sr);
1413 
1414 	(void) snprintf(buf, sizeof (buf), "setname \"%s\"", setname);
1415 	(void) mderror(ep, MDE_NO_SET, buf);
1416 	return (NULL);
1417 }
1418 
1419 md_set_record *
metad_getsetbynum(set_t setno,md_error_t * ep)1420 metad_getsetbynum(set_t setno, md_error_t *ep)
1421 {
1422 	md_set_record	*sr;
1423 	char		buf[100];
1424 
1425 	if (md_in_daemon)
1426 		assert(setsnarfdone != 0);
1427 	else if (set_snarf(ep))		/* BYPASS DAEMON mode */
1428 		return (NULL);
1429 
1430 	for (sr = setrecords; sr != NULL; sr = sr->sr_next)
1431 		if (setno == sr->sr_setno)
1432 			return (sr);
1433 
1434 	(void) sprintf(buf, "setno %u", setno);
1435 	(void) mderror(ep, MDE_NO_SET, buf);
1436 	return (NULL);
1437 }
1438 
1439 
1440 /*
1441  * Commit the set record and all of its associated records
1442  * (drive records, node records for a MNset) to the local mddb.
1443  */
1444 void
commitset(md_set_record * sr,int inc_genid,md_error_t * ep)1445 commitset(md_set_record *sr, int inc_genid, md_error_t *ep)
1446 {
1447 	int		drc, nrc, rc;
1448 	int		*recs;
1449 	uint_t		size;
1450 	md_drive_record	*dr;
1451 	mddb_userreq_t	req;
1452 	md_mnset_record	*mnsr;
1453 	md_mnnode_record	*nr;
1454 
1455 	assert(setsnarfdone != 0);
1456 
1457 	/*
1458 	 * Cluster nodename support
1459 	 * Convert nodename -> nodeid
1460 	 * Don't do this for MN disksets since we've already stored
1461 	 * both the nodeid and name.
1462 	 */
1463 	if (!(MD_MNSET_REC(sr)))
1464 		sdssc_cm_sr_nm2nid(sr);
1465 
1466 	/* Send down to kernel the data in mddb USER set record */
1467 	if (inc_genid)
1468 		sr->sr_genid++;
1469 	(void) memset(&req, 0, sizeof (req));
1470 	METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
1471 	if (MD_MNSET_REC(sr)) {
1472 		req.ur_size = sizeof (*mnsr);
1473 	} else {
1474 		req.ur_size = sizeof (*sr);
1475 	}
1476 	req.ur_data = (uintptr_t)sr;
1477 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1478 		(void) mdstealerror(ep, &req.ur_mde);
1479 		return;
1480 	}
1481 
1482 	/*
1483 	 * Walk through the drive records associated with this set record
1484 	 * and send down to kernel the data in mddb USER drive record.
1485 	 */
1486 	drc = 0;
1487 	dr = sr->sr_drivechain;
1488 	while (dr) {
1489 		if (inc_genid)
1490 			dr->dr_genid++;
1491 		METAD_SETUP_DR(MD_DB_SETDATA, dr->dr_selfid)
1492 		req.ur_size = sizeof (*dr);
1493 		req.ur_data = (uintptr_t)dr;
1494 		if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1495 			(void) mdstealerror(ep, &req.ur_mde);
1496 			return;
1497 		}
1498 		drc++;
1499 		dr = dr->dr_next;
1500 	}
1501 
1502 
1503 	/*
1504 	 * If this set is a multi-node set -
1505 	 * walk through the node records associated with this set record
1506 	 * and send down to kernel the data in mddb USER node record.
1507 	 */
1508 	nrc = 0;
1509 	if (MD_MNSET_REC(sr)) {
1510 		mnsr = (struct md_mnset_record *)sr;
1511 		nr = mnsr->sr_nodechain;
1512 		while (nr) {
1513 			if (inc_genid)
1514 				nr->nr_genid++;
1515 			METAD_SETUP_NR(MD_DB_SETDATA, nr->nr_selfid)
1516 			req.ur_size = sizeof (*nr);
1517 			req.ur_data = (uint64_t)(uintptr_t)nr;
1518 			if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL)
1519 			    != 0) {
1520 				(void) mdstealerror(ep, &req.ur_mde);
1521 				return;
1522 			}
1523 			nrc++;
1524 			nr = nr->nr_next;
1525 		}
1526 	}
1527 
1528 	/*
1529 	 * Set up list of mddb USER recids containing set and drive records
1530 	 * and node records if a MNset.
1531 	 */
1532 	rc = 0;
1533 	size = (nrc + drc + 2) * sizeof (int);
1534 	recs = Zalloc(size);
1535 	/* First recid in list is the set record's id */
1536 	recs[rc] = sr->sr_selfid;
1537 	rc++;
1538 	dr = sr->sr_drivechain;
1539 	while (dr) {
1540 		/* Now, fill in the drive record ids */
1541 		recs[rc] = dr->dr_selfid;
1542 		dr = dr->dr_next;
1543 		rc++;
1544 	}
1545 	if (MD_MNSET_REC(sr)) {
1546 		nr = mnsr->sr_nodechain;
1547 		while (nr) {
1548 			/* If a MNset, fill in the node record ids */
1549 			recs[rc] = nr->nr_selfid;
1550 			nr = nr->nr_next;
1551 			rc++;
1552 		}
1553 	}
1554 	/* Set last record to null recid */
1555 	recs[rc] = 0;
1556 
1557 	/* Write out the set and drive and node records to the local mddb */
1558 	METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
1559 	req.ur_size = size;
1560 	req.ur_data = (uintptr_t)recs;
1561 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1562 		(void) mdstealerror(ep, &req.ur_mde);
1563 		return;
1564 	}
1565 
1566 	/*
1567 	 * Cluster nodename support
1568 	 * Convert nodeid -> nodename
1569 	 * Don't do this for MN disksets since we've already stored
1570 	 * both the nodeid and name.
1571 	 */
1572 	if (!(MD_MNSET_REC(sr)))
1573 		sdssc_cm_sr_nid2nm(sr);
1574 
1575 	Free(recs);
1576 }
1577 
1578 /*
1579  * This routine only handles returns a md_set_record structure even
1580  * if the set record describes a MN set.  This will allow pre-MN
1581  * SVM RPC code to access a MN set record and to display it.
1582  *
1583  * The MN SVM RPC code detects if the set record returned describes
1584  * a MN set and then will copy it using mnsetdup.
1585  */
1586 md_set_record *
setdup(md_set_record * sr)1587 setdup(md_set_record *sr)
1588 {
1589 	md_set_record		*tsr = NULL;
1590 	md_drive_record		**tdrpp = NULL;
1591 
1592 	if (sr && (tsr = Malloc(sizeof (*sr))) != NULL) {
1593 		(void) memmove(tsr, sr, sizeof (*sr));
1594 		tsr->sr_next = NULL;
1595 		tdrpp = &tsr->sr_drivechain;
1596 		while (*tdrpp) {
1597 			*tdrpp = drdup(*tdrpp);
1598 			tdrpp = &(*tdrpp)->dr_next;
1599 		}
1600 	}
1601 	return (tsr);
1602 }
1603 
1604 /*
1605  * This routine only copies MN set records.   If a non-MN set
1606  * record was passed in NULL pointer will be returned.
1607  */
1608 md_mnset_record *
mnsetdup(md_mnset_record * mnsr)1609 mnsetdup(md_mnset_record *mnsr)
1610 {
1611 	md_mnset_record		*tmnsr = NULL;
1612 	md_drive_record		**tdrpp = NULL;
1613 	md_mnnode_record	**tnrpp = NULL;
1614 
1615 	if (!MD_MNSET_REC(mnsr)) {
1616 		return (NULL);
1617 	}
1618 
1619 	if (mnsr && (tmnsr = Malloc(sizeof (*mnsr))) != NULL) {
1620 		(void) memmove(tmnsr, mnsr, sizeof (*mnsr));
1621 		tmnsr->sr_next = NULL;
1622 		tdrpp = &tmnsr->sr_drivechain;
1623 		while (*tdrpp) {
1624 			*tdrpp = drdup(*tdrpp);
1625 			tdrpp = &(*tdrpp)->dr_next;
1626 		}
1627 		tnrpp = &tmnsr->sr_nodechain;
1628 		while (*tnrpp) {
1629 			*tnrpp = nrdup(*tnrpp);
1630 			tnrpp = &(*tnrpp)->nr_next;
1631 		}
1632 	}
1633 	return (tmnsr);
1634 }
1635 
1636 md_drive_record *
drdup(md_drive_record * dr)1637 drdup(md_drive_record *dr)
1638 {
1639 	md_drive_record		*tdr = NULL;
1640 
1641 	if (dr && (tdr = Malloc(sizeof (*dr))) != NULL)
1642 		(void) memmove(tdr, dr, sizeof (*dr));
1643 	return (tdr);
1644 }
1645 
1646 md_mnnode_record *
nrdup(md_mnnode_record * nr)1647 nrdup(md_mnnode_record *nr)
1648 {
1649 	md_mnnode_record	*tnr = NULL;
1650 
1651 	if (nr && (tnr = Malloc(sizeof (*nr))) != NULL)
1652 		(void) memmove(tnr, nr, sizeof (*nr));
1653 	return (tnr);
1654 }
1655 
1656 /*
1657  * Duplicate parts of the drive decriptor list for this node.
1658  * Only duplicate the drive name string in the mddrivename structure, don't
1659  * need to copy any other pointers since only interested in the flags and
1660  * the drive name (i.e. other pointers will be set to NULL).
1661  *	Returns NULL if failure due to Malloc failure.
1662  *	Returns pointer (non-NULL) to dup'd list if successful.
1663  */
1664 md_drive_desc *
dd_list_dup(md_drive_desc * dd)1665 dd_list_dup(md_drive_desc *dd)
1666 {
1667 	md_drive_desc	*orig_dd;
1668 	md_drive_desc	*copy_dd = NULL, *copy_dd_prev = NULL;
1669 	md_drive_desc	*copy_dd_head = NULL;
1670 	mddrivename_t	*copy_dnp;
1671 	char		*copy_cname;
1672 	char		*copy_devid;
1673 
1674 	if (dd == NULL)
1675 		return (NULL);
1676 
1677 	orig_dd = dd;
1678 
1679 	while (orig_dd) {
1680 		copy_dd = Zalloc(sizeof (*copy_dd));
1681 		copy_dnp = Zalloc(sizeof (mddrivename_t));
1682 		copy_cname = Zalloc(sizeof (orig_dd->dd_dnp->cname));
1683 		if (orig_dd->dd_dnp->devid) {
1684 			copy_devid = Zalloc(sizeof (orig_dd->dd_dnp->devid));
1685 		} else {
1686 			copy_devid = NULL;
1687 		}
1688 		copy_dd->dd_next = NULL;
1689 		if ((copy_dd == NULL) || (copy_dnp == NULL) ||
1690 		    (copy_cname == NULL)) {
1691 			while (copy_dd_head) {
1692 				copy_dd = copy_dd_head->dd_next;
1693 				Free(copy_dd_head);
1694 				copy_dd_head = copy_dd;
1695 			}
1696 			if (copy_dnp)
1697 				Free(copy_dnp);
1698 			if (copy_dd)
1699 				Free(copy_dd);
1700 			if (copy_cname)
1701 				Free(copy_cname);
1702 			if (copy_devid)
1703 				Free(copy_devid);
1704 			return (NULL);
1705 		}
1706 		(void) memmove(copy_dd, orig_dd, sizeof (*orig_dd));
1707 		(void) strlcpy(copy_cname, orig_dd->dd_dnp->cname,
1708 		    sizeof (orig_dd->dd_dnp->cname));
1709 		copy_dd->dd_next = NULL;
1710 		copy_dd->dd_dnp = copy_dnp;
1711 		copy_dd->dd_dnp->cname = copy_cname;
1712 		if (copy_devid) {
1713 			(void) strlcpy(copy_devid, orig_dd->dd_dnp->devid,
1714 			    sizeof (orig_dd->dd_dnp->devid));
1715 		}
1716 
1717 		if (copy_dd_prev == NULL) {
1718 			copy_dd_head = copy_dd;
1719 			copy_dd_prev = copy_dd;
1720 		} else {
1721 			copy_dd_prev->dd_next = copy_dd;
1722 			copy_dd_prev = copy_dd;
1723 		}
1724 		orig_dd = orig_dd->dd_next;
1725 	}
1726 	copy_dd->dd_next = NULL;
1727 	return (copy_dd_head);
1728 }
1729 
1730 void
sr_cache_flush(int flushnames)1731 sr_cache_flush(int flushnames)
1732 {
1733 	md_set_record	*sr, *tsr;
1734 	md_mnset_record	*mnsr;
1735 	md_drive_record *dr, *tdr;
1736 	md_mnnode_record *nr, *tnr;
1737 
1738 	sr = tsr = setrecords;
1739 	while (sr != NULL) {
1740 		dr = tdr = sr->sr_drivechain;
1741 		while (dr != NULL) {
1742 			tdr = dr;
1743 			dr = dr->dr_next;
1744 			Free(tdr);
1745 		}
1746 		tsr = sr;
1747 		sr = sr->sr_next;
1748 		if (MD_MNSET_REC(tsr)) {
1749 			mnsr = (struct md_mnset_record *)tsr;
1750 			nr = tnr = mnsr->sr_nodechain;
1751 			while (nr != NULL) {
1752 				tnr = nr;
1753 				nr = nr->nr_next;
1754 				Free(tnr);
1755 			}
1756 			Free(mnsr);
1757 		} else {
1758 			Free(tsr);
1759 		}
1760 	}
1761 
1762 	setrecords = NULL;
1763 
1764 	setsnarfdone = 0;
1765 
1766 	/* This will cause the other caches to be cleared */
1767 	if (flushnames)
1768 		metaflushnames(0);
1769 }
1770 
1771 void
sr_cache_flush_setno(set_t setno)1772 sr_cache_flush_setno(set_t setno)
1773 {
1774 	md_set_record	*sr, *tsr;
1775 	md_mnset_record	*mnsr;
1776 	md_drive_record *dr, *tdr;
1777 
1778 	assert(setsnarfdone != 0);
1779 
1780 	for (sr = tsr = setrecords; sr; tsr = sr, sr = sr->sr_next) {
1781 		if (sr->sr_setno != setno)
1782 			continue;
1783 
1784 		dr = tdr = sr->sr_drivechain;
1785 		while (dr != NULL) {
1786 			tdr = dr;
1787 			dr = dr->dr_next;
1788 			Free(tdr);
1789 		}
1790 		if (sr == setrecords)
1791 			setrecords = sr->sr_next;
1792 		else
1793 			tsr->sr_next = sr->sr_next;
1794 		if (MD_MNSET_REC(sr)) {
1795 			mnsr = (struct md_mnset_record *)sr;
1796 			Free(mnsr);
1797 		} else {
1798 			Free(sr);
1799 		}
1800 		break;
1801 	}
1802 
1803 	setsnarfdone = 0;
1804 
1805 	/* This will cause the other caches to be cleared */
1806 	metaflushnames(0);
1807 }
1808 
1809 int
s_ownset(set_t setno,md_error_t * ep)1810 s_ownset(set_t setno, md_error_t *ep)
1811 {
1812 	mddb_ownset_t		ownset_arg;
1813 
1814 	ownset_arg.setno = setno;
1815 	ownset_arg.owns_set = MD_SETOWNER_NONE;
1816 
1817 	if (metaioctl(MD_DB_OWNSET, &ownset_arg, ep, NULL) != 0)
1818 		return (0);
1819 
1820 	return (ownset_arg.owns_set);
1821 }
1822 
1823 void
s_delset(char * setname,md_error_t * ep)1824 s_delset(char *setname, md_error_t *ep)
1825 {
1826 	md_set_record		*sr;
1827 	md_set_record		*tsr;
1828 	md_drive_record		*dr;
1829 	md_drive_record		*tdr;
1830 	md_mnnode_record	*nr, *tnr;
1831 	mddb_userreq_t		req;
1832 	char			stringbuf[100];
1833 	int			i;
1834 	mdsetname_t		*sp = NULL;
1835 	mddrivename_t		*dn = NULL;
1836 	mdname_t		*np = NULL;
1837 	md_dev64_t		dev;
1838 	side_t			myside = MD_SIDEWILD;
1839 	md_error_t		xep = mdnullerror;
1840 	md_mnset_record		*mnsr;
1841 	int			num_sets = 0;
1842 	int			num_mn_sets = 0;
1843 
1844 	(void) memset(&req, 0, sizeof (mddb_userreq_t));
1845 
1846 	if ((sr = getsetbyname(setname, ep)) == NULL)
1847 		return;
1848 
1849 	sp = metasetnosetname(sr->sr_setno, &xep);
1850 	mdclrerror(&xep);
1851 
1852 	if (MD_MNSET_REC(sr)) {
1853 		/*
1854 		 * If this node is a set owner, halt the set before
1855 		 * deleting the set records.  Ignore any errors since
1856 		 * s_ownset and halt_set could fail if panic had occurred
1857 		 * during the add/delete of a node.
1858 		 */
1859 		if (s_ownset(sr->sr_setno, &xep)) {
1860 			mdclrerror(&xep);
1861 			if (halt_set(sp, &xep))
1862 				mdclrerror(&xep);
1863 		}
1864 	}
1865 
1866 	(void) snprintf(stringbuf, sizeof (stringbuf), "/dev/md/%s", setname);
1867 	(void) unlink(stringbuf);
1868 	(void) unlink(meta_lock_name(sr->sr_setno));
1869 
1870 	if (MD_MNSET_REC(sr)) {
1871 		mnsr = (struct md_mnset_record *)sr;
1872 		nr = mnsr->sr_nodechain;
1873 		while (nr) {
1874 			/* Setting myside for later use */
1875 			if (strcmp(mynode(), nr->nr_nodename) == 0)
1876 				myside = nr->nr_nodeid;
1877 
1878 			(void) memset(&req, 0, sizeof (req));
1879 			METAD_SETUP_NR(MD_DB_DELETE, nr->nr_selfid)
1880 			if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde,
1881 			    NULL) != 0) {
1882 				(void) mdstealerror(ep, &req.ur_mde);
1883 				free_sr(sr);
1884 				return;
1885 			}
1886 			tnr = nr;
1887 			nr = nr->nr_next;
1888 
1889 			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
1890 			    sr->sr_setno, tnr->nr_nodeid);
1891 
1892 			mnnr_cache_del((struct md_mnset_record *)sr,
1893 			    tnr->nr_selfid);
1894 		}
1895 	} else {
1896 		for (i = 0; i < MD_MAXSIDES; i++) {
1897 			/* Skip empty slots */
1898 			if (sr->sr_nodes[i][0] == '\0')
1899 				continue;
1900 
1901 			if (strcmp(mynode(), sr->sr_nodes[i]) == 0)
1902 				myside = i;
1903 
1904 			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
1905 			    sr->sr_setno, i);
1906 		}
1907 	}
1908 
1909 	dr = sr->sr_drivechain;
1910 	while (dr) {
1911 		(void) memset(&req, 0, sizeof (req));
1912 		METAD_SETUP_DR(MD_DB_DELETE, dr->dr_selfid)
1913 		if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1914 			(void) mdstealerror(ep, &req.ur_mde);
1915 			free_sr(sr);
1916 			return;
1917 		}
1918 		tdr = dr;
1919 		dr = dr->dr_next;
1920 
1921 		dev = NODEV64;
1922 		if (myside != MD_SIDEWILD && sp != NULL) {
1923 			dn = metadrivename_withdrkey(sp, myside,
1924 			    tdr->dr_key, MD_BASICNAME_OK, &xep);
1925 			if (dn != NULL) {
1926 				uint_t	rep_slice;
1927 
1928 				np = NULL;
1929 				if (meta_replicaslice(dn, &rep_slice,
1930 				    &xep) == 0) {
1931 					np = metaslicename(dn, rep_slice, &xep);
1932 				}
1933 
1934 				if (np != NULL)
1935 					dev = np->dev;
1936 				else
1937 					mdclrerror(&xep);
1938 			} else
1939 				mdclrerror(&xep);
1940 		} else
1941 			mdclrerror(&xep);
1942 
1943 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE,
1944 		    sr->sr_setno, dev);
1945 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE,
1946 		    MD_LOCAL_SET, dev);
1947 
1948 		dr_cache_del(sr, tdr->dr_selfid);
1949 
1950 	}
1951 
1952 	(void) memset(&req, 0, sizeof (req));
1953 	METAD_SETUP_SR(MD_DB_DELETE, sr->sr_selfid)
1954 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1955 		(void) mdstealerror(ep, &req.ur_mde);
1956 		free_sr(sr);
1957 		return;
1958 	}
1959 
1960 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_SET, sr->sr_setno,
1961 	    NODEV64);
1962 
1963 	for (tsr = setrecords; tsr; tsr = tsr->sr_next) {
1964 		if (tsr == sr)
1965 			continue;
1966 
1967 		num_sets++;
1968 		if (MD_MNSET_REC(tsr))
1969 			num_mn_sets++;
1970 	}
1971 
1972 	if (num_mn_sets == 0)
1973 		(void) meta_smf_disable(META_SMF_MN_DISKSET, NULL);
1974 
1975 	/* The set we just deleted is the only one left */
1976 	if (num_sets == 0)
1977 		(void) meta_smf_disable(META_SMF_DISKSET, NULL);
1978 
1979 	sr_cache_del(sr->sr_selfid);
1980 	free_sr(sr);
1981 
1982 }
1983 
1984 void
s_delrec(mddb_recid_t recid,md_error_t * ep)1985 s_delrec(mddb_recid_t recid, md_error_t *ep)
1986 {
1987 	mddb_userreq_t		req;
1988 
1989 	(void) memset(&req, 0, sizeof (req));
1990 
1991 	METAD_SETUP_SR(MD_DB_DELETE, recid)
1992 
1993 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
1994 		(void) mdstealerror(ep, &req.ur_mde);
1995 }
1996 
1997 /*
1998  * resnarf the imported set
1999  */
2000 int
resnarf_set(set_t setno,md_error_t * ep)2001 resnarf_set(
2002 	set_t			setno,
2003 	md_error_t		*ep
2004 )
2005 {
2006 	md_set_record	*sr;
2007 	md_drive_record	*dr;
2008 	mddb_recid_t	id, *p;
2009 
2010 	if (meta_setup_db_locations(ep) != 0) {
2011 		if (! mdismddberror(ep, MDE_DB_STALE))
2012 			return (-1);
2013 		mdclrerror(ep);
2014 	}
2015 
2016 	setsnarfdone = 1;
2017 
2018 	id = 0;
2019 	while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR, &id,
2020 	    ep)) != NULL) {
2021 
2022 		if (sr->sr_setno != setno)
2023 			continue;
2024 
2025 		/* Don't allow resnarf of a multi-node diskset */
2026 		if (MD_MNSET_REC(sr))
2027 			goto out;
2028 
2029 		sr->sr_next = NULL;
2030 		sr->sr_drivechain = NULL;
2031 
2032 		if (md_in_daemon)
2033 			url_addl(&url_used, sr->sr_selfid);
2034 
2035 		sr->sr_flags |= MD_SR_CHECK;
2036 
2037 		sr_cache_add(sr);
2038 
2039 		if (sr->sr_driverec == 0)
2040 			break;
2041 
2042 		p = &sr->sr_driverec;
2043 		while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
2044 		    MDDB_UR_DR, p, ep)) != NULL) {
2045 			dr->dr_next = NULL;
2046 
2047 			if (md_in_daemon)
2048 				url_addl(&url_used, dr->dr_selfid);
2049 
2050 			dr_cache_add(sr, dr);
2051 
2052 			if (dr->dr_nextrec == 0)
2053 				break;
2054 
2055 			p = &dr->dr_nextrec;
2056 		}
2057 		if (! mdisok(ep)) {
2058 			if (! mdissyserror(ep, ENOENT))
2059 				goto out;
2060 			mdclrerror(ep);
2061 			commitset(sr, FALSE, ep);
2062 			if (! mdisok(ep))
2063 				goto out;
2064 		}
2065 	}
2066 	if (! mdisok(ep)) {
2067 		if (! mdissyserror(ep, ENOENT))
2068 			goto out;
2069 		mdclrerror(ep);
2070 	}
2071 
2072 	setsnarfdone = 2;
2073 
2074 	url_freel(&url_used);
2075 	url_freel(&url_tode);
2076 	return (0);
2077 
2078 out:
2079 	url_freel(&url_used);
2080 	url_freel(&url_tode);
2081 
2082 	sr_cache_flush(1);
2083 
2084 	setsnarfdone = 0;
2085 
2086 	return (-1);
2087 }
2088