1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Just in case we're not in a build environment, make sure that
28 * TEXT_DOMAIN gets set to something.
29 */
30 #if !defined(TEXT_DOMAIN)
31 #define TEXT_DOMAIN "SYS_TEST"
32 #endif
33
34 /*
35 * interface between user land and the set records
36 */
37
38 #include <meta.h>
39 #include <metad.h>
40 #include <sdssc.h>
41 #include <syslog.h>
42 #include <sys/cladm.h>
43 #include "meta_set_prv.h"
44
45 #include <sys/sysevent/eventdefs.h>
46 #include <sys/sysevent/svm.h>
47
48 static md_set_record *setrecords = NULL; /* head of cache linked list */
49 static int setsnarfdone = 0;
50
51 typedef struct key_lst_t {
52 side_t kl_side;
53 mdkey_t kl_key;
54 struct key_lst_t *kl_next;
55 } key_lst_t;
56
57 typedef struct ur_recid_lst {
58 mddb_recid_t url_recid;
59 struct ur_recid_lst *url_nx;
60 } ur_recid_lst_t;
61
62 static ur_recid_lst_t *url_used = NULL;
63 static ur_recid_lst_t *url_tode = NULL;
64
65 static void
url_addl(ur_recid_lst_t ** urlpp,mddb_recid_t recid)66 url_addl(ur_recid_lst_t **urlpp, mddb_recid_t recid)
67 {
68 /* Run to the end of the list */
69 for (/* void */; (*urlpp != NULL); urlpp = &(*urlpp)->url_nx)
70 if ((*urlpp)->url_recid == recid)
71 return;
72
73 /* Add the new member */
74 *urlpp = Zalloc(sizeof (**urlpp));
75 if (*urlpp == NULL)
76 return;
77
78 (*urlpp)->url_recid = recid;
79 }
80
81 static int
url_findl(ur_recid_lst_t * urlp,mddb_recid_t recid)82 url_findl(ur_recid_lst_t *urlp, mddb_recid_t recid)
83 {
84 while (urlp != NULL) {
85 if (urlp->url_recid == recid)
86 return (1);
87 urlp = urlp->url_nx;
88 }
89 return (0);
90 }
91
92 static void
url_freel(ur_recid_lst_t ** urlpp)93 url_freel(ur_recid_lst_t **urlpp)
94 {
95 ur_recid_lst_t *urlp;
96 ur_recid_lst_t *turlp;
97
98 for (turlp = *urlpp; turlp != NULL; turlp = urlp) {
99 urlp = turlp->url_nx;
100 Free(turlp);
101 }
102 *urlpp = (ur_recid_lst_t *)NULL;
103 }
104
105 static int
ckncvt_set_record(mddb_userreq_t * reqp,md_error_t * ep)106 ckncvt_set_record(mddb_userreq_t *reqp, md_error_t *ep)
107 {
108 mddb_userreq_t req;
109 md_set_record *sr;
110 int recs[3];
111
112 if (reqp->ur_size == sizeof (*sr))
113 return (0);
114
115 if (! md_in_daemon) {
116 if (reqp->ur_size >= sizeof (*sr))
117 return (0);
118
119 reqp->ur_data = (uintptr_t)Realloc((void *)(uintptr_t)
120 reqp->ur_data, sizeof (*sr));
121 (void) memset(
122 ((char *)(uintptr_t)reqp->ur_data) + reqp->ur_size,
123 '\0', sizeof (*sr) - reqp->ur_size);
124 reqp->ur_size = sizeof (*sr);
125 return (0);
126 }
127
128 /*
129 * If here, then the daemon is calling, and so the automatic
130 * conversion will be performed.
131 */
132
133 /* shorthand */
134 req = *reqp; /* structure assignment */
135 sr = (md_set_record *)(uintptr_t)req.ur_data;
136
137 if (sr->sr_flags & MD_SR_CVT)
138 return (0);
139
140 /* Leave multi-node set records alone */
141 if (MD_MNSET_REC(sr)) {
142 return (0);
143 }
144
145 /* Mark the old record as converted */
146 sr->sr_flags |= MD_SR_CVT;
147
148 METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
149
150 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
151 return (mdstealerror(ep, &req.ur_mde));
152
153 /* Create space for the new record */
154 METAD_SETUP_SR(MD_DB_CREATE, 0);
155 req.ur_size = sizeof (*sr);
156
157 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
158 return (mdstealerror(ep, &req.ur_mde));
159
160 /* Allocate the new record */
161 sr = Zalloc(sizeof (*sr));
162
163 /* copy all the data from the record being converted */
164 (void) memmove(sr, (void *)(uintptr_t)reqp->ur_data, reqp->ur_size);
165 sr->sr_flags &= ~MD_SR_CVT;
166
167 /* adjust the selfid to point to the new record */
168 sr->sr_selfid = req.ur_recid;
169
170 METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
171 req.ur_size = sizeof (*sr);
172 req.ur_data = (uintptr_t)sr;
173
174 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
175 Free(sr);
176 return (mdstealerror(ep, &req.ur_mde));
177 }
178
179 /* Commit the old and the new */
180 recs[0] = ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid;
181 recs[1] = sr->sr_selfid;
182 recs[2] = 0;
183
184 METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
185 req.ur_size = sizeof (recs);
186 req.ur_data = (uintptr_t)recs;
187
188 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
189 Free(sr);
190 return (mdstealerror(ep, &req.ur_mde));
191 }
192
193 /* Add the the old record to the list of records to delete */
194 url_addl(&url_tode,
195 ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid);
196
197 /* Free the old records space */
198 Free((void *)(uintptr_t)reqp->ur_data);
199
200 /* Adjust the reqp structure to point to the new record and size */
201 reqp->ur_recid = sr->sr_selfid;
202 reqp->ur_size = sizeof (*sr);
203 reqp->ur_data = (uintptr_t)sr;
204
205 return (0);
206 }
207
208 mddb_userreq_t *
get_db_rec(md_ur_get_cmd_t cmd,set_t setno,mddb_type_t type,uint_t type2,mddb_recid_t * idp,md_error_t * ep)209 get_db_rec(
210 md_ur_get_cmd_t cmd,
211 set_t setno,
212 mddb_type_t type,
213 uint_t type2,
214 mddb_recid_t *idp,
215 md_error_t *ep
216 )
217 {
218 mddb_userreq_t *reqp = Zalloc(sizeof (*reqp));
219 mdsetname_t *sp;
220 md_set_desc *sd;
221 int ureq;
222
223 if ((sp = metasetnosetname(setno, ep)) == NULL) {
224 Free(reqp);
225 return (NULL);
226 }
227
228 if (metaislocalset(sp)) {
229 ureq = MD_DB_USERREQ;
230 } else {
231 if ((sd = metaget_setdesc(sp, ep)) == NULL) {
232 Free(reqp);
233 return (NULL);
234 }
235 ureq = MD_MNSET_DESC(sd) ? MD_MN_DB_USERREQ : MD_DB_USERREQ;
236 }
237
238 reqp->ur_setno = setno;
239 reqp->ur_type = type;
240 reqp->ur_type2 = type2;
241
242 switch (cmd) {
243 case MD_UR_GET_NEXT:
244 reqp->ur_cmd = MD_DB_GETNEXTREC;
245 reqp->ur_recid = *idp;
246 if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
247 (void) mdstealerror(ep, &reqp->ur_mde);
248 Free(reqp);
249 return (NULL);
250 }
251 *idp = reqp->ur_recid;
252 break;
253 case MD_UR_GET_WKEY:
254 reqp->ur_recid = *idp;
255 break;
256 }
257
258 if (*idp <= 0) {
259 Free(reqp);
260 return (NULL);
261 }
262
263 reqp->ur_cmd = MD_DB_GETSIZE;
264 if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
265 (void) mdstealerror(ep, &reqp->ur_mde);
266 Free(reqp);
267
268 *idp = 0;
269 return (NULL);
270 }
271
272 reqp->ur_cmd = MD_DB_GETDATA;
273 reqp->ur_data = (uintptr_t)Zalloc(reqp->ur_size);
274 if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
275 (void) mdstealerror(ep, &reqp->ur_mde);
276 Free((void *)(uintptr_t)reqp->ur_data);
277 Free(reqp);
278 *idp = 0;
279 return (NULL);
280 }
281
282 switch (reqp->ur_type) {
283 case MDDB_USER:
284 switch (reqp->ur_type2) {
285 case MDDB_UR_SR:
286 if (ckncvt_set_record(reqp, ep)) {
287 Free((void *)(uintptr_t)reqp->ur_data);
288 Free(reqp);
289 return (NULL);
290 }
291 break;
292 }
293 break;
294 }
295
296 return (reqp);
297 }
298
299 void *
get_ur_rec(set_t setno,md_ur_get_cmd_t cmd,uint_t type2,mddb_recid_t * idp,md_error_t * ep)300 get_ur_rec(
301 set_t setno,
302 md_ur_get_cmd_t cmd,
303 uint_t type2,
304 mddb_recid_t *idp,
305 md_error_t *ep
306 )
307 {
308 mddb_userreq_t *reqp = NULL;
309 void *ret_val;
310
311 assert(idp != NULL);
312
313 reqp = get_db_rec(cmd, setno, MDDB_USER, type2, idp, ep);
314 if (reqp == NULL)
315 return (NULL);
316
317 ret_val = (void *)(uintptr_t)reqp->ur_data;
318 Free(reqp);
319 return (ret_val);
320 }
321
322 /*
323 * Called by rpc.metad on startup of disksets to cleanup
324 * the host entries associated with a diskset. This is needed if
325 * a node failed or the metaset command was killed during the addition
326 * of a node to a diskset.
327 *
328 * This is called for all traditional disksets.
329 * This is only called for MNdisksets when in there is only one node
330 * in all of the MN disksets and this node is not running SunCluster.
331 * (Otherwise, the cleanup of the host entries is handled by a
332 * reconfig cycle that the SunCluster software calls).
333 */
334 static int
sr_hosts(md_set_record * sr)335 sr_hosts(md_set_record *sr)
336 {
337 int i;
338 int nid = 0;
339 int self_in_set = FALSE;
340 md_error_t xep = mdnullerror;
341 md_mnnode_record *nr;
342 md_mnset_record *mnsr;
343
344 if (MD_MNSET_REC(sr)) {
345 mnsr = (struct md_mnset_record *)sr;
346 nr = mnsr->sr_nodechain;
347 /*
348 * Already guaranteed to be only 1 node in set which
349 * is mynode (done in sr_validate).
350 * Now, check if node is in the OK state. If not in
351 * the OK state, leave self_in_set FALSE so that
352 * set will be removed.
353 */
354 if (nr->nr_flags & MD_MN_NODE_OK)
355 self_in_set = TRUE;
356 } else {
357 for (i = 0; i < MD_MAXSIDES; i++) {
358 /* Skip empty slots */
359 if (sr->sr_nodes[i][0] == '\0')
360 continue;
361
362 /* Make sure we are in the set and skip this node */
363 if (strcmp(sr->sr_nodes[i], mynode()) == 0) {
364 self_in_set = TRUE;
365 break;
366 }
367 }
368 }
369
370 if ((self_in_set == FALSE) && (!(MD_MNSET_REC(sr)))) {
371 /*
372 * Under some circumstances (/etc/cluster/nodeid file is
373 * missing) it is possible for the call to _cladm() to
374 * return 0 and a nid of 0. In this instance do not remove
375 * the set as it is Sun Cluster error that needs to be fixed.
376 */
377 if (_cladm(CL_CONFIG, CL_NODEID, &nid) == 0 && nid > 0) {
378
379 /*
380 * See if we've got a node which has been booted in
381 * non-cluster mode. If true the nodeid will match
382 * one of the sr_nodes values because the conversion
383 * from nodeid to hostname failed to occur.
384 */
385 for (i = 0; i < MD_MAXSIDES; i++) {
386 if (sr->sr_nodes[i][0] == 0)
387 continue;
388 if (atoi(sr->sr_nodes[i]) == nid)
389 self_in_set = TRUE;
390 }
391
392 /* If we aren't in the set, delete the set */
393 if (self_in_set == FALSE) {
394 syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
395 "Removing set %s from database\n"),
396 sr->sr_setname);
397 s_delset(sr->sr_setname, &xep);
398 if (! mdisok(&xep))
399 mdclrerror(&xep);
400 return (1);
401 }
402 } else {
403 /*
404 * Send a message to syslog and return without
405 * deleting any sets
406 */
407 syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
408 "Call to _cladm failed for set %s nodeid %d\n"),
409 sr->sr_setname, nid);
410 return (1);
411 }
412 }
413 return (0);
414 }
415
416 void
sr_del_drv(md_set_record * sr,mddb_recid_t recid)417 sr_del_drv(md_set_record *sr, mddb_recid_t recid)
418 {
419 mddb_userreq_t req;
420 md_error_t xep = mdnullerror;
421
422 if (!s_ownset(sr->sr_setno, &xep)) {
423 if (! mdisok(&xep))
424 mdclrerror(&xep);
425 goto skip;
426 }
427
428 /* delete the replicas? */
429 /* release ownership of the drive? */
430 /* NOTE: We may not have a name, so both of the above are ugly! */
431
432 skip:
433 (void) memset(&req, 0, sizeof (req));
434 METAD_SETUP_DR(MD_DB_DELETE, recid)
435 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
436 mdclrerror(&req.ur_mde);
437
438 dr_cache_del(sr, recid);
439 }
440
441 static void
sr_drvs(md_set_record * sr)442 sr_drvs(md_set_record *sr)
443 {
444 md_drive_record *dr;
445 int i;
446 int modified = 0;
447 int sidesok;
448 mdnm_params_t nm;
449 static char device_name[MAXPATHLEN];
450 md_error_t xep = mdnullerror;
451 md_mnnode_record *nr;
452 md_mnset_record *mnsr;
453
454 for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
455 /* If we were mid-add, cleanup */
456 if ((dr->dr_flags & MD_DR_ADD)) {
457 sr_del_drv(sr, dr->dr_selfid);
458 modified++;
459 continue;
460 }
461
462 sidesok = TRUE;
463 if (MD_MNSET_REC(sr)) {
464 mnsr = (md_mnset_record *)sr;
465 nr = mnsr->sr_nodechain;
466 /*
467 * MultiNode disksets only have entries for
468 * their side in the local set. Verify
469 * that drive has a name associated with
470 * this node's side.
471 */
472 while (nr) {
473 /* Find my node */
474 if (strcmp(mynode(), nr->nr_nodename) != 0) {
475 nr = nr->nr_next;
476 continue;
477 }
478
479 (void) memset(&nm, '\0', sizeof (nm));
480 nm.setno = MD_LOCAL_SET;
481 nm.side = nr->nr_nodeid;
482 nm.key = dr->dr_key;
483 nm.devname = (uintptr_t)device_name;
484
485 if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
486 NULL) != 0) {
487 if (! mdissyserror(&nm.mde, ENOENT)) {
488 mdclrerror(&nm.mde);
489 return;
490 }
491 }
492
493 /*
494 * If entry is found for this node, then
495 * break out of loop walking through
496 * node list. For a multi-node diskset,
497 * there should only be an entry for
498 * this node.
499 */
500 if (nm.key != MD_KEYWILD &&
501 ! mdissyserror(&nm.mde, ENOENT)) {
502 break;
503 }
504
505 /*
506 * If entry is not found for this node,
507 * then delete the drive. No need to
508 * continue through the node loop since
509 * our node has already been found.
510 */
511 sidesok = FALSE;
512 mdclrerror(&nm.mde);
513
514 /* If we are missing a sidename, cleanup */
515 sr_del_drv(sr, dr->dr_selfid);
516 modified++;
517
518 break;
519 }
520 } else {
521 for (i = 0; i < MD_MAXSIDES; i++) {
522 /* Skip empty slots */
523 if (sr->sr_nodes[i][0] == '\0')
524 continue;
525
526 (void) memset(&nm, '\0', sizeof (nm));
527 nm.setno = MD_LOCAL_SET;
528 nm.side = i + SKEW;
529 nm.key = dr->dr_key;
530 nm.devname = (uintptr_t)device_name;
531
532 if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
533 NULL) != 0) {
534 if (! mdissyserror(&nm.mde, ENOENT)) {
535 mdclrerror(&nm.mde);
536 return;
537 }
538 }
539
540 if (nm.key != MD_KEYWILD &&
541 ! mdissyserror(&nm.mde, ENOENT))
542 continue;
543
544 sidesok = FALSE;
545 mdclrerror(&nm.mde);
546
547 /* If we are missing a sidename, cleanup */
548 sr_del_drv(sr, dr->dr_selfid);
549 modified++;
550
551 break;
552 }
553 }
554
555 if (sidesok == FALSE)
556 continue;
557
558 /*
559 * If we got this far, the drive record is either in the OK
560 * or DEL state, if it is in the DEL state and the sidenames
561 * all checked out, then we will make it OK.
562 */
563 if ((dr->dr_flags & MD_DR_OK))
564 continue;
565
566 dr->dr_flags = MD_DR_OK;
567
568 modified++;
569 }
570
571 if (modified) {
572 commitset(sr, FALSE, &xep);
573 if (! mdisok(&xep))
574 mdclrerror(&xep);
575 }
576 }
577
578 static void
add_key_to_lst(key_lst_t ** klpp,side_t side,mdkey_t key)579 add_key_to_lst(key_lst_t **klpp, side_t side, mdkey_t key)
580 {
581 key_lst_t *klp;
582
583 assert(klpp != NULL);
584
585 for (/* void */; *klpp != NULL; klpp = &(*klpp)->kl_next)
586 /* void */;
587
588 /* allocate new list element */
589 klp = *klpp = Zalloc(sizeof (*klp));
590
591 klp->kl_side = side;
592 klp->kl_key = key;
593 }
594
595 #ifdef DUMPKEYLST
596 static void
pr_key_lst(char * tag,key_lst_t * klp)597 pr_key_lst(char *tag, key_lst_t *klp)
598 {
599 key_lst_t *tklp;
600
601 md_eprintf("Tag=%s\n", tag);
602 for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
603 md_eprintf("side=%d, key=%lu\n", tklp->kl_side, tklp->kl_key);
604 }
605 #endif /* DUMPKEYLST */
606
607 static int
key_in_key_lst(key_lst_t * klp,side_t side,mdkey_t key)608 key_in_key_lst(key_lst_t *klp, side_t side, mdkey_t key)
609 {
610 key_lst_t *tklp;
611
612 for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
613 if (tklp->kl_side == side && tklp->kl_key == key)
614 return (1);
615
616 return (0);
617 }
618
619 static void
destroy_key_lst(key_lst_t ** klpp)620 destroy_key_lst(key_lst_t **klpp)
621 {
622 key_lst_t *tklp, *klp;
623
624 assert(klpp != NULL);
625
626 tklp = klp = *klpp;
627 while (klp != NULL) {
628 tklp = klp;
629 klp = klp->kl_next;
630 Free(tklp);
631 }
632 *klpp = NULL;
633 }
634
635 static void
sr_sidenms(void)636 sr_sidenms(void)
637 {
638 md_drive_record *dr;
639 md_set_record *sr;
640 key_lst_t *use = NULL;
641 mdnm_params_t nm;
642 int i;
643 md_mnset_record *mnsr;
644 md_mnnode_record *nr;
645 side_t myside = 0;
646
647 /*
648 * We now go through the list of set and drive records collecting
649 * the key/side pairs that are being used.
650 */
651 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
652 /*
653 * To handle the multi-node diskset case, get the sideno
654 * associated with this node. This sideno will be the
655 * same across all multi-node disksets.
656 */
657 if ((myside == 0) && (MD_MNSET_REC(sr))) {
658 mnsr = (struct md_mnset_record *)sr;
659 nr = mnsr->sr_nodechain;
660 while (nr) {
661 if (strcmp(mynode(), nr->nr_nodename) == 0) {
662 myside = nr->nr_nodeid;
663 break;
664 }
665 nr = nr->nr_next;
666 }
667 /*
668 * If this node is not in this MNset -
669 * then skip this set.
670 */
671 if (!nr) {
672 continue;
673 }
674 }
675
676 for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
677 if (MD_MNSET_REC(sr)) {
678 /*
679 * There are no non-local sidenames in the
680 * local set for a multi-node diskset.
681 */
682 add_key_to_lst(&use, myside, dr->dr_key);
683 } else {
684 for (i = 0; i < MD_MAXSIDES; i++) {
685 /* Skip empty slots */
686 if (sr->sr_nodes[i][0] == '\0')
687 continue;
688
689 add_key_to_lst(&use, i + SKEW,
690 dr->dr_key);
691 }
692 }
693 }
694 }
695
696 #ifdef DUMPKEYLST
697 pr_key_lst("use", use);
698 #endif /* DUMPKEYLST */
699
700 /*
701 * We take the list above and get all non-local sidenames, checking
702 * each to see if they are in use, if they are not used, we delete them.
703 * Do the check for myside to cover multinode disksets.
704 * Then do the check for MD_MAXSIDES to cover non-multinode disksets.
705 * If any multi-node disksets were present, myside would be non-zero.
706 * myside is the same for all multi-node disksets for this node.
707 */
708 if (myside) {
709 (void) memset(&nm, '\0', sizeof (nm));
710 nm.setno = MD_LOCAL_SET;
711 nm.side = myside;
712 nm.key = MD_KEYWILD;
713
714 /*CONSTCOND*/
715 while (1) {
716 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
717 NULL) != 0) {
718 mdclrerror(&nm.mde);
719 break;
720 }
721
722 if (nm.key == MD_KEYWILD)
723 break;
724
725 if (! key_in_key_lst(use, nm.side, nm.key)) {
726 if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
727 NULL) != 0) {
728 mdclrerror(&nm.mde);
729 continue;
730 }
731 }
732 }
733 }
734 /* Now handle the non-multinode disksets */
735 for (i = 0; i < MD_MAXSIDES; i++) {
736 (void) memset(&nm, '\0', sizeof (nm));
737 nm.setno = MD_LOCAL_SET;
738 nm.side = i + SKEW;
739 nm.key = MD_KEYWILD;
740
741 /*CONSTCOND*/
742 while (1) {
743 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
744 NULL) != 0) {
745 mdclrerror(&nm.mde);
746 break;
747 }
748
749 if (nm.key == MD_KEYWILD)
750 break;
751
752 if (! key_in_key_lst(use, nm.side, nm.key)) {
753 if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
754 NULL) != 0) {
755 mdclrerror(&nm.mde);
756 continue;
757 }
758 }
759 }
760 }
761
762 /* Cleanup */
763 destroy_key_lst(&use);
764 }
765
766 void
sr_validate(void)767 sr_validate(void)
768 {
769 md_set_record *sr;
770 md_error_t xep = mdnullerror;
771 int mnset_single_node;
772 md_mnnode_record *nr;
773 md_mnset_record *mnsr;
774
775 assert(setsnarfdone != 0);
776
777 /* We have validated the records already */
778 if (setsnarfdone == 3)
779 return;
780
781 /*
782 * Check if we are in a single node non-SC3.x environmemnt
783 */
784 mnset_single_node = meta_mn_singlenode();
785 /*
786 * If a possible single_node situation, verify that all
787 * MN disksets have only one node (which is mynode()).
788 */
789 if (mnset_single_node) {
790 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
791 if (MD_MNSET_REC(sr)) {
792 mnsr = (struct md_mnset_record *)sr;
793 nr = mnsr->sr_nodechain;
794 /*
795 * If next pointer is non-null (more than
796 * one node in list) or if the single node
797 * isn't my node - reset single node flag.
798 */
799 if ((nr->nr_next) ||
800 (strcmp(nr->nr_nodename, mynode()) != 0)) {
801 mnset_single_node = 0;
802 break;
803 }
804 }
805 }
806 }
807
808 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
809 /*
810 * If a MN diskset and not in the single node
811 * situation, then don't validate the MN set.
812 * This is done during a reconfig cycle since all
813 * nodes must take the same action.
814 */
815 if (MD_MNSET_REC(sr) && (mnset_single_node == 0))
816 continue;
817
818 /* Since we do "partial" snarf's, we only check new entries */
819 if (! (sr->sr_flags & MD_SR_CHECK))
820 continue;
821
822 /* If we were mid-add, cleanup */
823 if ((sr->sr_flags & MD_SR_ADD)) {
824 s_delset(sr->sr_setname, &xep);
825 if (! mdisok(&xep))
826 mdclrerror(&xep);
827 continue;
828 }
829
830 /* Make sure we are in the set. */
831 if (sr_hosts(sr))
832 continue;
833
834 /* Check has been done, clear the flag */
835 if ((sr->sr_flags & MD_SR_CHECK))
836 sr->sr_flags &= ~MD_SR_CHECK;
837
838 /*
839 * If we got here, we are in the set, make sure the flags make
840 * sense.
841 */
842 if (! (sr->sr_flags & MD_SR_OK)) {
843 sr->sr_flags &= ~MD_SR_STATE_FLAGS;
844 sr->sr_flags |= MD_SR_OK;
845 commitset(sr, FALSE, &xep);
846 if (! mdisok(&xep))
847 mdclrerror(&xep);
848 }
849
850 /* Make sure all the drives are in a stable state. */
851 sr_drvs(sr);
852 }
853
854 /* Cleanup any stray sidenames */
855 sr_sidenms();
856
857 setsnarfdone = 3;
858 }
859
860 static md_set_record *
sr_in_cache(mddb_recid_t recid)861 sr_in_cache(mddb_recid_t recid)
862 {
863 md_set_record *tsr;
864
865 for (tsr = setrecords; tsr != NULL; tsr = tsr->sr_next)
866 if (tsr->sr_selfid == recid)
867 return (tsr);
868 return ((md_set_record *)NULL);
869 }
870
871 int
set_snarf(md_error_t * ep)872 set_snarf(md_error_t *ep)
873 {
874 md_set_record *sr;
875 md_mnset_record *mnsr;
876 md_set_record *tsr;
877 md_drive_record *dr;
878 mddb_userreq_t *reqp;
879 ur_recid_lst_t *urlp;
880 mddb_recid_t id;
881 mddb_recid_t *p;
882 md_error_t xep = mdnullerror;
883 md_mnnode_record *nr;
884 mddb_set_node_params_t snp;
885 int nodecnt;
886 mndiskset_membershiplist_t *nl, *nl2;
887
888 /* We have done the snarf call */
889 if (setsnarfdone != 0)
890 return (0);
891
892 if (meta_setup_db_locations(ep) != 0) {
893 if (! mdismddberror(ep, MDE_DB_STALE))
894 return (-1);
895 mdclrerror(ep);
896 }
897
898 /*
899 * Get membershiplist from API routine.
900 * If there's an error, just use a NULL
901 * nodelist.
902 */
903 if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
904 nodecnt = 0; /* no nodes are alive */
905 nl = NULL;
906 mdclrerror(ep);
907 }
908
909 /* Let sr_cache_add and dr_cache_add know we are doing the snarf */
910 setsnarfdone = 1;
911
912 /* Go get the set records */
913 id = 0;
914 while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR,
915 &id, ep)) != NULL) {
916 sr->sr_next = NULL;
917 sr->sr_drivechain = NULL;
918
919 /*
920 * Cluster nodename support
921 * Convert nodeid -> nodename
922 * Don't do this for MN disksets since we've already stored
923 * both the nodeid and name.
924 */
925 if (!(MD_MNSET_REC(sr)))
926 sdssc_cm_sr_nid2nm(sr);
927
928 /* If we were mid-cvt, cleanup */
929 if (sr->sr_flags & MD_SR_CVT) {
930 /* If the daemon is calling, cleanup */
931 if (md_in_daemon)
932 url_addl(&url_tode, sr->sr_selfid);
933 continue;
934 }
935
936 if (md_in_daemon)
937 url_addl(&url_used, sr->sr_selfid);
938
939 /* Skip cached records */
940 tsr = sr_in_cache(sr->sr_selfid);
941 if (tsr != (md_set_record *)NULL) {
942 if (MD_MNSET_REC(sr)) {
943 mnsr = (struct md_mnset_record *)sr;
944 Free(mnsr);
945 } else {
946 Free(sr);
947 }
948 if (md_in_daemon)
949 for (dr = tsr->sr_drivechain;
950 dr != (md_drive_record *)NULL;
951 dr = dr->dr_next)
952 url_addl(&url_used, dr->dr_selfid);
953 continue;
954 }
955
956 /* Mark the record as one to be checked */
957 sr->sr_flags |= MD_SR_CHECK;
958
959 sr_cache_add(sr);
960
961 /* If MNdiskset, go get the node records */
962 if (MD_MNSET_REC(sr)) {
963 mnsr = (struct md_mnset_record *)sr;
964 mnsr->sr_nodechain = NULL;
965 p = &mnsr->sr_noderec;
966 while ((nr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
967 MDDB_UR_NR, p, ep)) != NULL) {
968 nr->nr_next = NULL;
969
970 if (md_in_daemon)
971 url_addl(&url_used, nr->nr_selfid);
972
973 /*
974 * Turn off ALIVE node flag based on member
975 * list.
976 * If ALIVE flag is not set, reset OWN flag.
977 * If this node is mynode, set the OWN flag
978 * to match the ownership of the diskset.
979 */
980 if (md_in_daemon) {
981 nr->nr_flags &= ~MD_MN_NODE_ALIVE;
982 nl2 = nl;
983 while (nl2) {
984 /*
985 * If in member list,
986 * set alive.
987 */
988 if (nl2->msl_node_id ==
989 nr->nr_nodeid) {
990 nr->nr_flags |=
991 MD_MN_NODE_ALIVE;
992 break;
993 }
994 nl2 = nl2->next;
995 }
996 /*
997 * If mynode is in member list, then
998 * check to see if set is snarfed.
999 * If set snarfed, set own flag;
1000 * otherwise reset it.
1001 * Don't change master even if
1002 * node isn't an owner node, since
1003 * node may be master, but hasn't
1004 * joined the set yet.
1005 */
1006 if (nr->nr_flags & MD_MN_NODE_ALIVE) {
1007 if (strcmp(nr->nr_nodename,
1008 mynode()) == 0) {
1009 if (s_ownset(
1010 mnsr->sr_setno, ep)) {
1011 nr->nr_flags |=
1012 MD_MN_NODE_OWN;
1013 } else {
1014 nr->nr_flags &=
1015 ~MD_MN_NODE_OWN;
1016 }
1017 }
1018 } else {
1019 if (strcmp(nr->nr_nodename,
1020 mynode()) == 0) {
1021 /*
1022 * If my node isn't in member
1023 * list then reset master.
1024 */
1025 mnsr = (struct
1026 md_mnset_record *)sr;
1027 mnsr->sr_master_nodeid =
1028 MD_MN_INVALID_NID;
1029 mnsr->sr_master_nodenm[0] =
1030 '\0';
1031 }
1032 nr->nr_flags &= ~MD_MN_NODE_OWN;
1033 }
1034 }
1035
1036 /*
1037 * Must grab nr_nextrec now since
1038 * mnnr_cache_add may change it
1039 * (mnnr_cache_add is storing the nodes in
1040 * an ascending nodeid order list in order
1041 * to support reconfig).
1042 */
1043 if (nr->nr_nextrec != 0)
1044 p = &nr->nr_nextrec;
1045 else
1046 p = NULL;
1047
1048 mnnr_cache_add((struct md_mnset_record *)sr,
1049 nr);
1050
1051 if ((md_in_daemon) &&
1052 (strcmp(nr->nr_nodename, mynode()) == 0)) {
1053 (void) memset(&snp, 0, sizeof (snp));
1054 snp.sn_nodeid = nr->nr_nodeid;
1055 snp.sn_setno = mnsr->sr_setno;
1056 if (metaioctl(MD_MN_SET_NODEID, &snp,
1057 &snp.sn_mde, NULL) != 0) {
1058 (void) mdstealerror(ep,
1059 &snp.sn_mde);
1060 }
1061 }
1062
1063 if (p == NULL)
1064 break;
1065 }
1066 if (! mdisok(ep)) {
1067 if (! mdissyserror(ep, ENOENT))
1068 goto out;
1069 mdclrerror(ep);
1070 }
1071 }
1072
1073 if (sr->sr_driverec == 0)
1074 continue;
1075
1076 /* Go get the drive records */
1077 p = &sr->sr_driverec;
1078 while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
1079 MDDB_UR_DR, p, ep)) != NULL) {
1080 dr->dr_next = NULL;
1081
1082 if (md_in_daemon)
1083 url_addl(&url_used, dr->dr_selfid);
1084
1085 dr_cache_add(sr, dr);
1086
1087 if (dr->dr_nextrec == 0)
1088 break;
1089
1090 p = &dr->dr_nextrec;
1091 }
1092 if (! mdisok(ep)) {
1093 if (! mdissyserror(ep, ENOENT))
1094 goto out;
1095 mdclrerror(ep);
1096 /*
1097 * If dr_nextrec was not valid, or we had some
1098 * problem getting the record, we end up here.
1099 * get_ur_rec() zeroes the recid we passed in,
1100 * if we had a failure getting a record using a key,
1101 * so we simply commit the set record and valid
1102 * drive records, if this fails, we hand an error
1103 * back to the caller.
1104 */
1105 commitset(sr, FALSE, ep);
1106 if (! mdisok(ep))
1107 goto out;
1108 }
1109 }
1110 if (! mdisok(ep)) {
1111 if (! mdissyserror(ep, ENOENT))
1112 goto out;
1113 mdclrerror(ep);
1114 }
1115
1116 /*
1117 * If the daemon called, go through the USER records and cleanup
1118 * any that are not used by valid sets.
1119 */
1120 if (md_in_daemon) {
1121 id = 0;
1122 /* Make a list of records to delete */
1123 while ((reqp = get_db_rec(MD_UR_GET_NEXT, MD_LOCAL_SET,
1124 MDDB_USER, 0, &id, ep)) != NULL) {
1125 if (reqp->ur_type2 != MDDB_UR_SR &&
1126 reqp->ur_type2 != MDDB_UR_DR) {
1127 Free((void *)(uintptr_t)reqp->ur_data);
1128 Free(reqp);
1129 continue;
1130 }
1131 if (! url_findl(url_used, reqp->ur_recid))
1132 url_addl(&url_tode, reqp->ur_recid);
1133 Free((void *)(uintptr_t)reqp->ur_data);
1134 Free(reqp);
1135 }
1136 if (! mdisok(ep)) {
1137 if (! mdissyserror(ep, ENOENT))
1138 goto out;
1139 mdclrerror(ep);
1140 }
1141
1142 /* Delete all the delete listed records */
1143 for (urlp = url_tode; urlp != NULL; urlp = urlp->url_nx) {
1144 s_delrec(urlp->url_recid, &xep);
1145 if (! mdisok(&xep))
1146 mdclrerror(&xep);
1147 }
1148 }
1149
1150 url_freel(&url_used);
1151 url_freel(&url_tode);
1152
1153 if (nodecnt)
1154 meta_free_nodelist(nl);
1155
1156 /* Mark the snarf complete */
1157 setsnarfdone = 2;
1158 return (0);
1159
1160 out:
1161 url_freel(&url_used);
1162 url_freel(&url_tode);
1163
1164 sr_cache_flush(1);
1165
1166 if (nodecnt)
1167 meta_free_nodelist(nl);
1168
1169 /* Snarf failed, reset state */
1170 setsnarfdone = 0;
1171
1172 return (-1);
1173 }
1174
1175 void
sr_cache_add(md_set_record * sr)1176 sr_cache_add(md_set_record *sr)
1177 {
1178 md_set_record *tsr;
1179
1180 assert(setsnarfdone != 0);
1181
1182 if (setrecords == NULL) {
1183 setrecords = sr;
1184 return;
1185 }
1186
1187 for (tsr = setrecords; tsr->sr_next != NULL; tsr = tsr->sr_next)
1188 /* void */;
1189 tsr->sr_next = sr;
1190 }
1191
1192 void
sr_cache_del(mddb_recid_t recid)1193 sr_cache_del(mddb_recid_t recid)
1194 {
1195 md_set_record *sr, *tsr;
1196 md_mnset_record *mnsr;
1197
1198 assert(setsnarfdone != 0);
1199
1200 for (sr = tsr = setrecords; sr != NULL; tsr = sr, sr = sr->sr_next) {
1201 if (sr->sr_selfid != recid)
1202 continue;
1203 if (sr == setrecords)
1204 setrecords = sr->sr_next;
1205 else
1206 tsr->sr_next = sr->sr_next;
1207 if (MD_MNSET_REC(sr)) {
1208 mnsr = (struct md_mnset_record *)sr;
1209 Free(mnsr);
1210 } else {
1211 Free(sr);
1212 }
1213 break;
1214 }
1215 if (setrecords == NULL)
1216 setsnarfdone = 0;
1217 }
1218
1219 void
dr_cache_add(md_set_record * sr,md_drive_record * dr)1220 dr_cache_add(md_set_record *sr, md_drive_record *dr)
1221 {
1222 md_drive_record *tdr;
1223
1224 assert(setsnarfdone != 0);
1225
1226 assert(sr != NULL);
1227
1228 if (sr->sr_drivechain == NULL) {
1229 sr->sr_drivechain = dr;
1230 sr->sr_driverec = dr->dr_selfid;
1231 return;
1232 }
1233
1234 for (tdr = sr->sr_drivechain; tdr->dr_next != NULL; tdr = tdr->dr_next)
1235 /* void */;
1236
1237 tdr->dr_next = dr;
1238 tdr->dr_nextrec = dr->dr_selfid;
1239 }
1240
1241 void
dr_cache_del(md_set_record * sr,mddb_recid_t recid)1242 dr_cache_del(md_set_record *sr, mddb_recid_t recid)
1243 {
1244 md_drive_record *dr;
1245 md_drive_record *tdr;
1246
1247 assert(setsnarfdone != 0);
1248
1249 assert(sr != NULL);
1250
1251 for (dr = tdr = sr->sr_drivechain; dr != NULL;
1252 tdr = dr, dr = dr->dr_next) {
1253 if (dr->dr_selfid != recid)
1254 continue;
1255
1256 if (dr == sr->sr_drivechain) {
1257 sr->sr_drivechain = dr->dr_next;
1258 sr->sr_driverec = dr->dr_nextrec;
1259 } else {
1260 tdr->dr_next = dr->dr_next;
1261 tdr->dr_nextrec = dr->dr_nextrec;
1262 }
1263 Free(dr);
1264 break;
1265 }
1266 }
1267
1268 /*
1269 * Nodes must be kept in ascending node id order in order to
1270 * support reconfig.
1271 *
1272 * This routine may change nr->nr_next and nr->nr_nextrec.
1273 */
1274 void
mnnr_cache_add(md_mnset_record * mnsr,md_mnnode_record * nr)1275 mnnr_cache_add(md_mnset_record *mnsr, md_mnnode_record *nr)
1276 {
1277 md_mnnode_record *tnr, *tnr_prev;
1278
1279 assert(mnsr != NULL);
1280
1281 if (mnsr->sr_nodechain == NULL) {
1282 mnsr->sr_nodechain = nr;
1283 mnsr->sr_noderec = nr->nr_selfid;
1284 return;
1285 }
1286
1287 /*
1288 * If new_record->nodeid < first_record->nodeid,
1289 * put new_record at beginning of list.
1290 */
1291 if (nr->nr_nodeid < mnsr->sr_nodechain->nr_nodeid) {
1292 nr->nr_next = mnsr->sr_nodechain;
1293 nr->nr_nextrec = mnsr->sr_noderec;
1294 mnsr->sr_nodechain = nr;
1295 mnsr->sr_noderec = nr->nr_selfid;
1296 return;
1297 }
1298
1299 /*
1300 * Walk list looking for place to insert record.
1301 */
1302
1303 tnr_prev = mnsr->sr_nodechain;
1304 tnr = tnr_prev->nr_next;
1305 while (tnr) {
1306 /* Insert new record between tnr_prev and tnr */
1307 if (nr->nr_nodeid < tnr->nr_nodeid) {
1308 nr->nr_next = tnr;
1309 nr->nr_nextrec = tnr->nr_selfid; /* tnr's recid */
1310 tnr_prev->nr_next = nr;
1311 tnr_prev->nr_nextrec = nr->nr_selfid;
1312 return;
1313 }
1314 tnr_prev = tnr;
1315 tnr = tnr->nr_next;
1316 }
1317
1318 /*
1319 * Add record to end of list.
1320 */
1321 tnr_prev->nr_next = nr;
1322 tnr_prev->nr_nextrec = nr->nr_selfid;
1323 }
1324
1325 void
mnnr_cache_del(md_mnset_record * mnsr,mddb_recid_t recid)1326 mnnr_cache_del(md_mnset_record *mnsr, mddb_recid_t recid)
1327 {
1328 md_mnnode_record *nr;
1329 md_mnnode_record *tnr;
1330
1331 assert(mnsr != NULL);
1332
1333 tnr = 0;
1334 nr = mnsr->sr_nodechain;
1335 while (nr) {
1336 if (nr->nr_selfid != recid) {
1337 tnr = nr;
1338 nr = nr->nr_next;
1339 continue;
1340 }
1341
1342 if (nr == mnsr->sr_nodechain) {
1343 mnsr->sr_nodechain = nr->nr_next;
1344 mnsr->sr_noderec = nr->nr_nextrec;
1345 } else {
1346 tnr->nr_next = nr->nr_next;
1347 tnr->nr_nextrec = nr->nr_nextrec;
1348 }
1349 Free(nr);
1350 break;
1351 }
1352 }
1353
1354 int
metad_isautotakebyname(char * setname)1355 metad_isautotakebyname(char *setname)
1356 {
1357 md_error_t error = mdnullerror;
1358 md_set_record *sr;
1359
1360 if (md_in_daemon) {
1361 assert(setsnarfdone != 0);
1362 } else if (set_snarf(&error)) {
1363 mdclrerror(&error);
1364 return (0);
1365 }
1366
1367 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
1368 if (strcmp(setname, sr->sr_setname) == 0) {
1369 if (sr->sr_flags & MD_SR_AUTO_TAKE)
1370 return (1);
1371 return (0);
1372 }
1373 }
1374
1375 return (0);
1376 }
1377
1378 int
metad_isautotakebynum(set_t setno)1379 metad_isautotakebynum(set_t setno)
1380 {
1381 md_error_t error = mdnullerror;
1382 md_set_record *sr;
1383
1384 if (md_in_daemon) {
1385 assert(setsnarfdone != 0);
1386 } else if (set_snarf(&error)) {
1387 mdclrerror(&error);
1388 return (0);
1389 }
1390
1391 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
1392 if (setno == sr->sr_setno) {
1393 if (sr->sr_flags & MD_SR_AUTO_TAKE)
1394 return (1);
1395 return (0);
1396 }
1397 }
1398
1399 return (0);
1400 }
1401
1402 md_set_record *
metad_getsetbyname(char * setname,md_error_t * ep)1403 metad_getsetbyname(char *setname, md_error_t *ep)
1404 {
1405 md_set_record *sr;
1406 char buf[100];
1407
1408 assert(setsnarfdone != 0);
1409
1410 for (sr = setrecords; sr != NULL; sr = sr->sr_next)
1411 if (strcmp(setname, sr->sr_setname) == 0)
1412 return (sr);
1413
1414 (void) snprintf(buf, sizeof (buf), "setname \"%s\"", setname);
1415 (void) mderror(ep, MDE_NO_SET, buf);
1416 return (NULL);
1417 }
1418
1419 md_set_record *
metad_getsetbynum(set_t setno,md_error_t * ep)1420 metad_getsetbynum(set_t setno, md_error_t *ep)
1421 {
1422 md_set_record *sr;
1423 char buf[100];
1424
1425 if (md_in_daemon)
1426 assert(setsnarfdone != 0);
1427 else if (set_snarf(ep)) /* BYPASS DAEMON mode */
1428 return (NULL);
1429
1430 for (sr = setrecords; sr != NULL; sr = sr->sr_next)
1431 if (setno == sr->sr_setno)
1432 return (sr);
1433
1434 (void) sprintf(buf, "setno %u", setno);
1435 (void) mderror(ep, MDE_NO_SET, buf);
1436 return (NULL);
1437 }
1438
1439
1440 /*
1441 * Commit the set record and all of its associated records
1442 * (drive records, node records for a MNset) to the local mddb.
1443 */
1444 void
commitset(md_set_record * sr,int inc_genid,md_error_t * ep)1445 commitset(md_set_record *sr, int inc_genid, md_error_t *ep)
1446 {
1447 int drc, nrc, rc;
1448 int *recs;
1449 uint_t size;
1450 md_drive_record *dr;
1451 mddb_userreq_t req;
1452 md_mnset_record *mnsr;
1453 md_mnnode_record *nr;
1454
1455 assert(setsnarfdone != 0);
1456
1457 /*
1458 * Cluster nodename support
1459 * Convert nodename -> nodeid
1460 * Don't do this for MN disksets since we've already stored
1461 * both the nodeid and name.
1462 */
1463 if (!(MD_MNSET_REC(sr)))
1464 sdssc_cm_sr_nm2nid(sr);
1465
1466 /* Send down to kernel the data in mddb USER set record */
1467 if (inc_genid)
1468 sr->sr_genid++;
1469 (void) memset(&req, 0, sizeof (req));
1470 METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
1471 if (MD_MNSET_REC(sr)) {
1472 req.ur_size = sizeof (*mnsr);
1473 } else {
1474 req.ur_size = sizeof (*sr);
1475 }
1476 req.ur_data = (uintptr_t)sr;
1477 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1478 (void) mdstealerror(ep, &req.ur_mde);
1479 return;
1480 }
1481
1482 /*
1483 * Walk through the drive records associated with this set record
1484 * and send down to kernel the data in mddb USER drive record.
1485 */
1486 drc = 0;
1487 dr = sr->sr_drivechain;
1488 while (dr) {
1489 if (inc_genid)
1490 dr->dr_genid++;
1491 METAD_SETUP_DR(MD_DB_SETDATA, dr->dr_selfid)
1492 req.ur_size = sizeof (*dr);
1493 req.ur_data = (uintptr_t)dr;
1494 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1495 (void) mdstealerror(ep, &req.ur_mde);
1496 return;
1497 }
1498 drc++;
1499 dr = dr->dr_next;
1500 }
1501
1502
1503 /*
1504 * If this set is a multi-node set -
1505 * walk through the node records associated with this set record
1506 * and send down to kernel the data in mddb USER node record.
1507 */
1508 nrc = 0;
1509 if (MD_MNSET_REC(sr)) {
1510 mnsr = (struct md_mnset_record *)sr;
1511 nr = mnsr->sr_nodechain;
1512 while (nr) {
1513 if (inc_genid)
1514 nr->nr_genid++;
1515 METAD_SETUP_NR(MD_DB_SETDATA, nr->nr_selfid)
1516 req.ur_size = sizeof (*nr);
1517 req.ur_data = (uint64_t)(uintptr_t)nr;
1518 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL)
1519 != 0) {
1520 (void) mdstealerror(ep, &req.ur_mde);
1521 return;
1522 }
1523 nrc++;
1524 nr = nr->nr_next;
1525 }
1526 }
1527
1528 /*
1529 * Set up list of mddb USER recids containing set and drive records
1530 * and node records if a MNset.
1531 */
1532 rc = 0;
1533 size = (nrc + drc + 2) * sizeof (int);
1534 recs = Zalloc(size);
1535 /* First recid in list is the set record's id */
1536 recs[rc] = sr->sr_selfid;
1537 rc++;
1538 dr = sr->sr_drivechain;
1539 while (dr) {
1540 /* Now, fill in the drive record ids */
1541 recs[rc] = dr->dr_selfid;
1542 dr = dr->dr_next;
1543 rc++;
1544 }
1545 if (MD_MNSET_REC(sr)) {
1546 nr = mnsr->sr_nodechain;
1547 while (nr) {
1548 /* If a MNset, fill in the node record ids */
1549 recs[rc] = nr->nr_selfid;
1550 nr = nr->nr_next;
1551 rc++;
1552 }
1553 }
1554 /* Set last record to null recid */
1555 recs[rc] = 0;
1556
1557 /* Write out the set and drive and node records to the local mddb */
1558 METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
1559 req.ur_size = size;
1560 req.ur_data = (uintptr_t)recs;
1561 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1562 (void) mdstealerror(ep, &req.ur_mde);
1563 return;
1564 }
1565
1566 /*
1567 * Cluster nodename support
1568 * Convert nodeid -> nodename
1569 * Don't do this for MN disksets since we've already stored
1570 * both the nodeid and name.
1571 */
1572 if (!(MD_MNSET_REC(sr)))
1573 sdssc_cm_sr_nid2nm(sr);
1574
1575 Free(recs);
1576 }
1577
1578 /*
1579 * This routine only handles returns a md_set_record structure even
1580 * if the set record describes a MN set. This will allow pre-MN
1581 * SVM RPC code to access a MN set record and to display it.
1582 *
1583 * The MN SVM RPC code detects if the set record returned describes
1584 * a MN set and then will copy it using mnsetdup.
1585 */
1586 md_set_record *
setdup(md_set_record * sr)1587 setdup(md_set_record *sr)
1588 {
1589 md_set_record *tsr = NULL;
1590 md_drive_record **tdrpp = NULL;
1591
1592 if (sr && (tsr = Malloc(sizeof (*sr))) != NULL) {
1593 (void) memmove(tsr, sr, sizeof (*sr));
1594 tsr->sr_next = NULL;
1595 tdrpp = &tsr->sr_drivechain;
1596 while (*tdrpp) {
1597 *tdrpp = drdup(*tdrpp);
1598 tdrpp = &(*tdrpp)->dr_next;
1599 }
1600 }
1601 return (tsr);
1602 }
1603
1604 /*
1605 * This routine only copies MN set records. If a non-MN set
1606 * record was passed in NULL pointer will be returned.
1607 */
1608 md_mnset_record *
mnsetdup(md_mnset_record * mnsr)1609 mnsetdup(md_mnset_record *mnsr)
1610 {
1611 md_mnset_record *tmnsr = NULL;
1612 md_drive_record **tdrpp = NULL;
1613 md_mnnode_record **tnrpp = NULL;
1614
1615 if (!MD_MNSET_REC(mnsr)) {
1616 return (NULL);
1617 }
1618
1619 if (mnsr && (tmnsr = Malloc(sizeof (*mnsr))) != NULL) {
1620 (void) memmove(tmnsr, mnsr, sizeof (*mnsr));
1621 tmnsr->sr_next = NULL;
1622 tdrpp = &tmnsr->sr_drivechain;
1623 while (*tdrpp) {
1624 *tdrpp = drdup(*tdrpp);
1625 tdrpp = &(*tdrpp)->dr_next;
1626 }
1627 tnrpp = &tmnsr->sr_nodechain;
1628 while (*tnrpp) {
1629 *tnrpp = nrdup(*tnrpp);
1630 tnrpp = &(*tnrpp)->nr_next;
1631 }
1632 }
1633 return (tmnsr);
1634 }
1635
1636 md_drive_record *
drdup(md_drive_record * dr)1637 drdup(md_drive_record *dr)
1638 {
1639 md_drive_record *tdr = NULL;
1640
1641 if (dr && (tdr = Malloc(sizeof (*dr))) != NULL)
1642 (void) memmove(tdr, dr, sizeof (*dr));
1643 return (tdr);
1644 }
1645
1646 md_mnnode_record *
nrdup(md_mnnode_record * nr)1647 nrdup(md_mnnode_record *nr)
1648 {
1649 md_mnnode_record *tnr = NULL;
1650
1651 if (nr && (tnr = Malloc(sizeof (*nr))) != NULL)
1652 (void) memmove(tnr, nr, sizeof (*nr));
1653 return (tnr);
1654 }
1655
1656 /*
1657 * Duplicate parts of the drive decriptor list for this node.
1658 * Only duplicate the drive name string in the mddrivename structure, don't
1659 * need to copy any other pointers since only interested in the flags and
1660 * the drive name (i.e. other pointers will be set to NULL).
1661 * Returns NULL if failure due to Malloc failure.
1662 * Returns pointer (non-NULL) to dup'd list if successful.
1663 */
1664 md_drive_desc *
dd_list_dup(md_drive_desc * dd)1665 dd_list_dup(md_drive_desc *dd)
1666 {
1667 md_drive_desc *orig_dd;
1668 md_drive_desc *copy_dd = NULL, *copy_dd_prev = NULL;
1669 md_drive_desc *copy_dd_head = NULL;
1670 mddrivename_t *copy_dnp;
1671 char *copy_cname;
1672 char *copy_devid;
1673
1674 if (dd == NULL)
1675 return (NULL);
1676
1677 orig_dd = dd;
1678
1679 while (orig_dd) {
1680 copy_dd = Zalloc(sizeof (*copy_dd));
1681 copy_dnp = Zalloc(sizeof (mddrivename_t));
1682 copy_cname = Zalloc(sizeof (orig_dd->dd_dnp->cname));
1683 if (orig_dd->dd_dnp->devid) {
1684 copy_devid = Zalloc(sizeof (orig_dd->dd_dnp->devid));
1685 } else {
1686 copy_devid = NULL;
1687 }
1688 copy_dd->dd_next = NULL;
1689 if ((copy_dd == NULL) || (copy_dnp == NULL) ||
1690 (copy_cname == NULL)) {
1691 while (copy_dd_head) {
1692 copy_dd = copy_dd_head->dd_next;
1693 Free(copy_dd_head);
1694 copy_dd_head = copy_dd;
1695 }
1696 if (copy_dnp)
1697 Free(copy_dnp);
1698 if (copy_dd)
1699 Free(copy_dd);
1700 if (copy_cname)
1701 Free(copy_cname);
1702 if (copy_devid)
1703 Free(copy_devid);
1704 return (NULL);
1705 }
1706 (void) memmove(copy_dd, orig_dd, sizeof (*orig_dd));
1707 (void) strlcpy(copy_cname, orig_dd->dd_dnp->cname,
1708 sizeof (orig_dd->dd_dnp->cname));
1709 copy_dd->dd_next = NULL;
1710 copy_dd->dd_dnp = copy_dnp;
1711 copy_dd->dd_dnp->cname = copy_cname;
1712 if (copy_devid) {
1713 (void) strlcpy(copy_devid, orig_dd->dd_dnp->devid,
1714 sizeof (orig_dd->dd_dnp->devid));
1715 }
1716
1717 if (copy_dd_prev == NULL) {
1718 copy_dd_head = copy_dd;
1719 copy_dd_prev = copy_dd;
1720 } else {
1721 copy_dd_prev->dd_next = copy_dd;
1722 copy_dd_prev = copy_dd;
1723 }
1724 orig_dd = orig_dd->dd_next;
1725 }
1726 copy_dd->dd_next = NULL;
1727 return (copy_dd_head);
1728 }
1729
1730 void
sr_cache_flush(int flushnames)1731 sr_cache_flush(int flushnames)
1732 {
1733 md_set_record *sr, *tsr;
1734 md_mnset_record *mnsr;
1735 md_drive_record *dr, *tdr;
1736 md_mnnode_record *nr, *tnr;
1737
1738 sr = tsr = setrecords;
1739 while (sr != NULL) {
1740 dr = tdr = sr->sr_drivechain;
1741 while (dr != NULL) {
1742 tdr = dr;
1743 dr = dr->dr_next;
1744 Free(tdr);
1745 }
1746 tsr = sr;
1747 sr = sr->sr_next;
1748 if (MD_MNSET_REC(tsr)) {
1749 mnsr = (struct md_mnset_record *)tsr;
1750 nr = tnr = mnsr->sr_nodechain;
1751 while (nr != NULL) {
1752 tnr = nr;
1753 nr = nr->nr_next;
1754 Free(tnr);
1755 }
1756 Free(mnsr);
1757 } else {
1758 Free(tsr);
1759 }
1760 }
1761
1762 setrecords = NULL;
1763
1764 setsnarfdone = 0;
1765
1766 /* This will cause the other caches to be cleared */
1767 if (flushnames)
1768 metaflushnames(0);
1769 }
1770
1771 void
sr_cache_flush_setno(set_t setno)1772 sr_cache_flush_setno(set_t setno)
1773 {
1774 md_set_record *sr, *tsr;
1775 md_mnset_record *mnsr;
1776 md_drive_record *dr, *tdr;
1777
1778 assert(setsnarfdone != 0);
1779
1780 for (sr = tsr = setrecords; sr; tsr = sr, sr = sr->sr_next) {
1781 if (sr->sr_setno != setno)
1782 continue;
1783
1784 dr = tdr = sr->sr_drivechain;
1785 while (dr != NULL) {
1786 tdr = dr;
1787 dr = dr->dr_next;
1788 Free(tdr);
1789 }
1790 if (sr == setrecords)
1791 setrecords = sr->sr_next;
1792 else
1793 tsr->sr_next = sr->sr_next;
1794 if (MD_MNSET_REC(sr)) {
1795 mnsr = (struct md_mnset_record *)sr;
1796 Free(mnsr);
1797 } else {
1798 Free(sr);
1799 }
1800 break;
1801 }
1802
1803 setsnarfdone = 0;
1804
1805 /* This will cause the other caches to be cleared */
1806 metaflushnames(0);
1807 }
1808
1809 int
s_ownset(set_t setno,md_error_t * ep)1810 s_ownset(set_t setno, md_error_t *ep)
1811 {
1812 mddb_ownset_t ownset_arg;
1813
1814 ownset_arg.setno = setno;
1815 ownset_arg.owns_set = MD_SETOWNER_NONE;
1816
1817 if (metaioctl(MD_DB_OWNSET, &ownset_arg, ep, NULL) != 0)
1818 return (0);
1819
1820 return (ownset_arg.owns_set);
1821 }
1822
1823 void
s_delset(char * setname,md_error_t * ep)1824 s_delset(char *setname, md_error_t *ep)
1825 {
1826 md_set_record *sr;
1827 md_set_record *tsr;
1828 md_drive_record *dr;
1829 md_drive_record *tdr;
1830 md_mnnode_record *nr, *tnr;
1831 mddb_userreq_t req;
1832 char stringbuf[100];
1833 int i;
1834 mdsetname_t *sp = NULL;
1835 mddrivename_t *dn = NULL;
1836 mdname_t *np = NULL;
1837 md_dev64_t dev;
1838 side_t myside = MD_SIDEWILD;
1839 md_error_t xep = mdnullerror;
1840 md_mnset_record *mnsr;
1841 int num_sets = 0;
1842 int num_mn_sets = 0;
1843
1844 (void) memset(&req, 0, sizeof (mddb_userreq_t));
1845
1846 if ((sr = getsetbyname(setname, ep)) == NULL)
1847 return;
1848
1849 sp = metasetnosetname(sr->sr_setno, &xep);
1850 mdclrerror(&xep);
1851
1852 if (MD_MNSET_REC(sr)) {
1853 /*
1854 * If this node is a set owner, halt the set before
1855 * deleting the set records. Ignore any errors since
1856 * s_ownset and halt_set could fail if panic had occurred
1857 * during the add/delete of a node.
1858 */
1859 if (s_ownset(sr->sr_setno, &xep)) {
1860 mdclrerror(&xep);
1861 if (halt_set(sp, &xep))
1862 mdclrerror(&xep);
1863 }
1864 }
1865
1866 (void) snprintf(stringbuf, sizeof (stringbuf), "/dev/md/%s", setname);
1867 (void) unlink(stringbuf);
1868 (void) unlink(meta_lock_name(sr->sr_setno));
1869
1870 if (MD_MNSET_REC(sr)) {
1871 mnsr = (struct md_mnset_record *)sr;
1872 nr = mnsr->sr_nodechain;
1873 while (nr) {
1874 /* Setting myside for later use */
1875 if (strcmp(mynode(), nr->nr_nodename) == 0)
1876 myside = nr->nr_nodeid;
1877
1878 (void) memset(&req, 0, sizeof (req));
1879 METAD_SETUP_NR(MD_DB_DELETE, nr->nr_selfid)
1880 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde,
1881 NULL) != 0) {
1882 (void) mdstealerror(ep, &req.ur_mde);
1883 free_sr(sr);
1884 return;
1885 }
1886 tnr = nr;
1887 nr = nr->nr_next;
1888
1889 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
1890 sr->sr_setno, tnr->nr_nodeid);
1891
1892 mnnr_cache_del((struct md_mnset_record *)sr,
1893 tnr->nr_selfid);
1894 }
1895 } else {
1896 for (i = 0; i < MD_MAXSIDES; i++) {
1897 /* Skip empty slots */
1898 if (sr->sr_nodes[i][0] == '\0')
1899 continue;
1900
1901 if (strcmp(mynode(), sr->sr_nodes[i]) == 0)
1902 myside = i;
1903
1904 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
1905 sr->sr_setno, i);
1906 }
1907 }
1908
1909 dr = sr->sr_drivechain;
1910 while (dr) {
1911 (void) memset(&req, 0, sizeof (req));
1912 METAD_SETUP_DR(MD_DB_DELETE, dr->dr_selfid)
1913 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1914 (void) mdstealerror(ep, &req.ur_mde);
1915 free_sr(sr);
1916 return;
1917 }
1918 tdr = dr;
1919 dr = dr->dr_next;
1920
1921 dev = NODEV64;
1922 if (myside != MD_SIDEWILD && sp != NULL) {
1923 dn = metadrivename_withdrkey(sp, myside,
1924 tdr->dr_key, MD_BASICNAME_OK, &xep);
1925 if (dn != NULL) {
1926 uint_t rep_slice;
1927
1928 np = NULL;
1929 if (meta_replicaslice(dn, &rep_slice,
1930 &xep) == 0) {
1931 np = metaslicename(dn, rep_slice, &xep);
1932 }
1933
1934 if (np != NULL)
1935 dev = np->dev;
1936 else
1937 mdclrerror(&xep);
1938 } else
1939 mdclrerror(&xep);
1940 } else
1941 mdclrerror(&xep);
1942
1943 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE,
1944 sr->sr_setno, dev);
1945 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE,
1946 MD_LOCAL_SET, dev);
1947
1948 dr_cache_del(sr, tdr->dr_selfid);
1949
1950 }
1951
1952 (void) memset(&req, 0, sizeof (req));
1953 METAD_SETUP_SR(MD_DB_DELETE, sr->sr_selfid)
1954 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1955 (void) mdstealerror(ep, &req.ur_mde);
1956 free_sr(sr);
1957 return;
1958 }
1959
1960 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_SET, sr->sr_setno,
1961 NODEV64);
1962
1963 for (tsr = setrecords; tsr; tsr = tsr->sr_next) {
1964 if (tsr == sr)
1965 continue;
1966
1967 num_sets++;
1968 if (MD_MNSET_REC(tsr))
1969 num_mn_sets++;
1970 }
1971
1972 if (num_mn_sets == 0)
1973 (void) meta_smf_disable(META_SMF_MN_DISKSET, NULL);
1974
1975 /* The set we just deleted is the only one left */
1976 if (num_sets == 0)
1977 (void) meta_smf_disable(META_SMF_DISKSET, NULL);
1978
1979 sr_cache_del(sr->sr_selfid);
1980 free_sr(sr);
1981
1982 }
1983
1984 void
s_delrec(mddb_recid_t recid,md_error_t * ep)1985 s_delrec(mddb_recid_t recid, md_error_t *ep)
1986 {
1987 mddb_userreq_t req;
1988
1989 (void) memset(&req, 0, sizeof (req));
1990
1991 METAD_SETUP_SR(MD_DB_DELETE, recid)
1992
1993 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
1994 (void) mdstealerror(ep, &req.ur_mde);
1995 }
1996
1997 /*
1998 * resnarf the imported set
1999 */
2000 int
resnarf_set(set_t setno,md_error_t * ep)2001 resnarf_set(
2002 set_t setno,
2003 md_error_t *ep
2004 )
2005 {
2006 md_set_record *sr;
2007 md_drive_record *dr;
2008 mddb_recid_t id, *p;
2009
2010 if (meta_setup_db_locations(ep) != 0) {
2011 if (! mdismddberror(ep, MDE_DB_STALE))
2012 return (-1);
2013 mdclrerror(ep);
2014 }
2015
2016 setsnarfdone = 1;
2017
2018 id = 0;
2019 while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR, &id,
2020 ep)) != NULL) {
2021
2022 if (sr->sr_setno != setno)
2023 continue;
2024
2025 /* Don't allow resnarf of a multi-node diskset */
2026 if (MD_MNSET_REC(sr))
2027 goto out;
2028
2029 sr->sr_next = NULL;
2030 sr->sr_drivechain = NULL;
2031
2032 if (md_in_daemon)
2033 url_addl(&url_used, sr->sr_selfid);
2034
2035 sr->sr_flags |= MD_SR_CHECK;
2036
2037 sr_cache_add(sr);
2038
2039 if (sr->sr_driverec == 0)
2040 break;
2041
2042 p = &sr->sr_driverec;
2043 while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
2044 MDDB_UR_DR, p, ep)) != NULL) {
2045 dr->dr_next = NULL;
2046
2047 if (md_in_daemon)
2048 url_addl(&url_used, dr->dr_selfid);
2049
2050 dr_cache_add(sr, dr);
2051
2052 if (dr->dr_nextrec == 0)
2053 break;
2054
2055 p = &dr->dr_nextrec;
2056 }
2057 if (! mdisok(ep)) {
2058 if (! mdissyserror(ep, ENOENT))
2059 goto out;
2060 mdclrerror(ep);
2061 commitset(sr, FALSE, ep);
2062 if (! mdisok(ep))
2063 goto out;
2064 }
2065 }
2066 if (! mdisok(ep)) {
2067 if (! mdissyserror(ep, ENOENT))
2068 goto out;
2069 mdclrerror(ep);
2070 }
2071
2072 setsnarfdone = 2;
2073
2074 url_freel(&url_used);
2075 url_freel(&url_tode);
2076 return (0);
2077
2078 out:
2079 url_freel(&url_used);
2080 url_freel(&url_tode);
2081
2082 sr_cache_flush(1);
2083
2084 setsnarfdone = 0;
2085
2086 return (-1);
2087 }
2088