1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <libdevinfo.h>
31 #include <mdiox.h>
32 #include <meta.h>
33 #include "meta_repartition.h"
34 #include "meta_set_prv.h"
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <sys/lvm/md_mddb.h>
39 #include <sys/lvm/md_names.h>
40 #include <sys/lvm/md_crc.h>
41 #include <sys/lvm/md_convert.h>
42
43 typedef struct did_list {
44 void *rdid; /* real did if replicated set */
45 void *did; /* did stored in lb */
46 char *devname;
47 dev_t dev;
48 uint_t did_index;
49 char *minor_name;
50 char *driver_name;
51 int available;
52 struct did_list *next;
53 } did_list_t;
54
55 typedef struct replicated_disk {
56 void *old_devid;
57 void *new_devid;
58 struct replicated_disk *next;
59 } replicated_disk_t;
60
61 /*
62 * The current implementation limits the max device id length to 256 bytes.
63 * Should the max device id length be increased, this definition would have to
64 * be bumped up accordingly
65 */
66 #define MAX_DEVID_LEN 256
67
68 /*
69 * We store a global list of all the replicated disks in the system. In
70 * order to prevent us from performing a linear search on this list, we
71 * store the disks in a two dimensional sparse array. The disks are bucketed
72 * based on the length of their device ids.
73 */
74 static replicated_disk_t *replicated_disk_list[MAX_DEVID_LEN + 1] = {NULL};
75
76 /*
77 * The list of replicated disks is built just once and this flag is set
78 * once it's done
79 */
80 int replicated_disk_list_built_pass1 = 0;
81 int replicated_disk_list_built_pass2 = 0;
82 int *replicated_disk_list_built;
83
84 static void free_did_list(did_list_t *did_listp);
85
86 /*
87 * Map logical blk to physical
88 *
89 * This is based on the routine of the same name in the md kernel module (see
90 * file md_mddb.c), with the following caveats:
91 *
92 * - The kernel routine works on in core master blocks, or mddb_mb_ic_t; this
93 * routine works instead on the mddb_mb_t read directly from the disk
94 */
95 daddr_t
getphysblk(mddb_block_t blk,mddb_mb_t * mbp)96 getphysblk(
97 mddb_block_t blk,
98 mddb_mb_t *mbp
99 )
100 {
101 /*
102 * Sanity check: is the block within range? If so, we then assume
103 * that the block range map in the master block is valid and
104 * consistent with the block count. Unfortunately, there is no
105 * reliable way to validate this assumption.
106 */
107 if (blk >= mbp->mb_blkcnt || blk >= mbp->mb_blkmap.m_consecutive)
108 return ((daddr_t)-1);
109
110 return (mbp->mb_blkmap.m_firstblk + blk);
111 }
112
113
114
115 /*
116 * drive_append()
117 *
118 * Append to tail of linked list of md_im_drive_info_t.
119 *
120 * Will allocate space for new node and copy args into new space.
121 *
122 * Returns pointer to new node.
123 */
124 static md_im_drive_info_t *
drive_append(md_im_drive_info_t ** midpp,mddrivename_t * dnp,did_list_t * nonrep_did_listp,minor_t mnum,md_timeval32_t timestamp,md_im_replica_info_t * mirp)125 drive_append(
126 md_im_drive_info_t **midpp,
127 mddrivename_t *dnp,
128 did_list_t *nonrep_did_listp,
129 minor_t mnum,
130 md_timeval32_t timestamp,
131 md_im_replica_info_t *mirp
132 )
133 {
134 md_im_drive_info_t *midp;
135 int o_devid_sz;
136 int devid_sz;
137
138 for (; (*midpp != NULL); midpp = &((*midpp)->mid_next))
139 ;
140
141 midp = *midpp = Zalloc(sizeof (md_im_drive_info_t));
142
143 midp->mid_dnp = dnp;
144
145 /*
146 * If rdid is not NULL then we know we are dealing with
147 * replicated diskset case. 'devid_sz' will always be the
148 * size of a valid devid which can be 'did' or 'rdid'
149 */
150
151 if (nonrep_did_listp->rdid) {
152 devid_sz = devid_sizeof(nonrep_did_listp->rdid);
153 midp->mid_devid = (void *)Malloc(devid_sz);
154 (void) memcpy(midp->mid_devid, nonrep_did_listp->rdid,
155 devid_sz);
156 /*
157 * Also need to store the 'other' devid
158 */
159 o_devid_sz = devid_sizeof((ddi_devid_t)(nonrep_did_listp->did));
160 midp->mid_o_devid = (void *)Malloc(o_devid_sz);
161 (void) memcpy(midp->mid_o_devid, nonrep_did_listp->did,
162 o_devid_sz);
163 midp->mid_o_devid_sz = o_devid_sz;
164 } else {
165 devid_sz = devid_sizeof(nonrep_did_listp->did);
166 midp->mid_devid = (void *)Malloc(devid_sz);
167 /*
168 * In the case of regular diskset, midp->mid_o_devid
169 * will be a NULL pointer
170 */
171 (void) memcpy(midp->mid_devid, nonrep_did_listp->did, devid_sz);
172 }
173
174 midp->mid_devid_sz = devid_sz;
175 midp->mid_setcreatetimestamp = timestamp;
176 midp->mid_available = nonrep_did_listp->available;
177 if (nonrep_did_listp->minor_name) {
178 (void) strlcpy(midp->mid_minor_name,
179 nonrep_did_listp->minor_name, MDDB_MINOR_NAME_MAX);
180 }
181 midp->mid_mnum = mnum;
182 if (nonrep_did_listp->driver_name)
183 midp->mid_driver_name = Strdup(nonrep_did_listp->driver_name);
184 midp->mid_replicas = mirp;
185 if (nonrep_did_listp->devname)
186 midp->mid_devname = Strdup(nonrep_did_listp->devname);
187 return (midp);
188 }
189
190
191
192 /*
193 * drive_append_wrapper()
194 *
195 * Constant time append wrapper; the append function will always walk the list,
196 * this will take a tail argument and use the append function on just the tail
197 * node, doing the appropriate old-tail-next-pointer bookkeeping.
198 */
199 static md_im_drive_info_t **
drive_append_wrapper(md_im_drive_info_t ** tailpp,mddrivename_t * dnp,did_list_t * nonrep_did_listp,minor_t mnum,md_timeval32_t timestamp,md_im_replica_info_t * mirp)200 drive_append_wrapper(
201 md_im_drive_info_t **tailpp,
202 mddrivename_t *dnp,
203 did_list_t *nonrep_did_listp,
204 minor_t mnum,
205 md_timeval32_t timestamp,
206 md_im_replica_info_t *mirp
207 )
208 {
209 (void) drive_append(tailpp, dnp, nonrep_did_listp, mnum, timestamp,
210 mirp);
211
212 if ((*tailpp)->mid_next == NULL)
213 return (tailpp);
214
215 return (&((*tailpp)->mid_next));
216 }
217
218
219
220 /*
221 * replica_append()
222 *
223 * Append to tail of linked list of md_im_replica_info_t.
224 *
225 * Will allocate space for new node and copy args into new space.
226 *
227 * Returns pointer to new node.
228 */
229 static md_im_replica_info_t *
replica_append(md_im_replica_info_t ** mirpp,int flags,daddr32_t offset,daddr32_t length,md_timeval32_t timestamp)230 replica_append(
231 md_im_replica_info_t **mirpp,
232 int flags,
233 daddr32_t offset,
234 daddr32_t length,
235 md_timeval32_t timestamp
236 )
237 {
238 md_im_replica_info_t *mirp;
239
240 for (; (*mirpp != NULL); mirpp = &((*mirpp)->mir_next))
241 ;
242
243 mirp = *mirpp = Zalloc(sizeof (md_im_replica_info_t));
244
245 mirp->mir_flags = flags;
246 mirp->mir_offset = offset;
247 mirp->mir_length = length;
248 mirp->mir_timestamp = timestamp;
249
250 return (mirp);
251
252 }
253
254
255
256 /*
257 * replica_append_wrapper()
258 *
259 * Constant time append wrapper; the append function will always walk the list,
260 * this will take a tail argument and use the append function on just the tail
261 * node, doing the appropriate old-tail-next-pointer bookkeeping.
262 */
263 static md_im_replica_info_t **
replica_append_wrapper(md_im_replica_info_t ** tailpp,int flags,daddr32_t offset,daddr32_t length,md_timeval32_t timestamp)264 replica_append_wrapper(
265 md_im_replica_info_t **tailpp,
266 int flags,
267 daddr32_t offset,
268 daddr32_t length,
269 md_timeval32_t timestamp
270 )
271 {
272 (void) replica_append(tailpp, flags, offset, length, timestamp);
273
274 if ((*tailpp)->mir_next == NULL)
275 return (tailpp);
276
277 return (&(*tailpp)->mir_next);
278 }
279
280 /*
281 * map_replica_disk()
282 *
283 * Searches the device id list for a specific
284 * disk based on the locator block device id array index.
285 *
286 * Returns a pointer to the did_list node if a match was
287 * found or NULL otherwise.
288 */
289 static did_list_t *
map_replica_disk(did_list_t * did_listp,int did_index)290 map_replica_disk(
291 did_list_t *did_listp,
292 int did_index
293 )
294 {
295 did_list_t *tailp = did_listp;
296
297 while (tailp != NULL) {
298 if (tailp->did_index == did_index)
299 return (tailp);
300 tailp = tailp->next;
301 }
302
303 /* not found, return failure */
304 return (NULL);
305 }
306
307 /*
308 * replicated_list_lookup()
309 *
310 * looks up a replicated disk entry in the global replicated disk list
311 * based upon the length of that disk's device id. returns the new device id
312 * for the disk.
313 * If you store the returned devid you must create a local copy.
314 */
315 void *
replicated_list_lookup(uint_t devid_len,void * old_devid)316 replicated_list_lookup(
317 uint_t devid_len,
318 void *old_devid
319 )
320 {
321 replicated_disk_t *head = NULL;
322
323 assert(devid_len <= MAX_DEVID_LEN);
324 head = replicated_disk_list[devid_len];
325
326 if (head == NULL)
327 return (NULL);
328
329 do {
330 if (devid_compare((ddi_devid_t)old_devid,
331 (ddi_devid_t)head->old_devid) == 0)
332 return (head->new_devid);
333 head = head->next;
334 } while (head != NULL);
335
336 return (NULL);
337 }
338
339 /*
340 * replicated_list_insert()
341 *
342 * inserts a replicated disk entry into the global replicated disk list
343 */
344 static void
replicated_list_insert(size_t old_devid_len,void * old_devid,void * new_devid)345 replicated_list_insert(
346 size_t old_devid_len,
347 void *old_devid,
348 void *new_devid
349 )
350 {
351 replicated_disk_t *repl_disk, **first_entry;
352 void *repl_old_devid = NULL;
353
354 assert(old_devid_len <= MAX_DEVID_LEN);
355
356 repl_disk = Zalloc(sizeof (replicated_disk_t));
357 repl_old_devid = Zalloc(old_devid_len);
358 (void) memcpy(repl_old_devid, (void *)old_devid, old_devid_len);
359
360 repl_disk->old_devid = repl_old_devid;
361 repl_disk->new_devid = new_devid;
362
363 first_entry = &replicated_disk_list[old_devid_len];
364
365 if (*first_entry == NULL) {
366 *first_entry = repl_disk;
367 return;
368 }
369
370 repl_disk->next = *first_entry;
371 replicated_disk_list[old_devid_len] = repl_disk;
372 }
373
374 /*
375 * get_replica_disks()
376 *
377 * Will step through the locator records in the supplied locator block, and add
378 * each one with an active replica to a supplied list of md_im_drive_info_t, and
379 * add the appropriate replicas to the md_im_replica_info_t contained therein.
380 */
381 static void
get_replica_disks(md_im_set_desc_t * misp,did_list_t * did_listp,mddb_mb_t * mb,mddb_lb_t * lbp,md_error_t * ep)382 get_replica_disks(
383 md_im_set_desc_t *misp,
384 did_list_t *did_listp,
385 mddb_mb_t *mb,
386 mddb_lb_t *lbp,
387 md_error_t *ep
388 )
389 {
390 mddrivename_t *dnp;
391 int indx, on_list;
392 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
393 int flags;
394 did_list_t *replica_disk;
395 daddr32_t offset;
396 daddr32_t length;
397 md_timeval32_t timestamp;
398 md_im_replica_info_t **mirpp = NULL;
399 md_im_drive_info_t **midpp = &misp->mis_drives;
400 md_im_drive_info_t *midp;
401
402 for (indx = 0; indx < lbp->lb_loccnt; indx++) {
403
404 on_list = 0;
405 if ((lbp->lb_locators[indx].l_flags == 0) ||
406 (lbp->lb_locators[indx].l_flags & MDDB_F_DELETED))
407 continue;
408
409 /*
410 * search the device id list for a
411 * specific ctds based on the locator
412 * block device id array index.
413 */
414 replica_disk = map_replica_disk(did_listp, indx);
415
416 assert(replica_disk != NULL);
417
418
419 /*
420 * metadrivename() can fail for a slice name
421 * if there is not an existing mddrivename_t.
422 * So we use metadiskname() to strip the slice
423 * number.
424 */
425 dnp = metadrivename(&sp, metadiskname(replica_disk->devname),
426 ep);
427
428 for (midp = misp->mis_drives; midp != NULL;
429 midp = midp->mid_next) {
430 if (dnp == midp->mid_dnp) {
431 /*
432 * You could get a dnp match, but if 1 disk
433 * is unavailable and the other isn't, they
434 * will have the same dnp due
435 * to the name being the same, but in fact
436 * are different disks.
437 */
438 if (midp->mid_available ==
439 replica_disk->available) {
440 on_list = 1;
441 mirpp = &midp->mid_replicas;
442 break;
443 }
444 }
445 }
446
447 /*
448 * New on the list so add it
449 */
450 if (!on_list) {
451 mddb_mb_t *mbp;
452 uint_t sliceno;
453 mdname_t *rsp;
454 int fd = -1;
455
456 mbp = Malloc(DEV_BSIZE);
457
458 /*
459 * If the disk isn't available, we don't
460 * want to try to read from it.
461 */
462 if (replica_disk->available == MD_IM_DISK_AVAILABLE) {
463 /* determine the replica slice */
464 if (meta_replicaslice(dnp, &sliceno,
465 ep) != 0) {
466 Free(mbp);
467 continue;
468 }
469
470 /*
471 * if the replica slice size is zero,
472 * don't bother opening
473 */
474 if (dnp->vtoc.parts[sliceno].size == 0) {
475 Free(mbp);
476 continue;
477 }
478
479 if ((rsp = metaslicename(dnp, sliceno,
480 ep)) == NULL) {
481 Free(mbp);
482 continue;
483 }
484
485 if ((fd = open(rsp->rname,
486 O_RDONLY| O_NDELAY)) < 0) {
487 Free(mbp);
488 continue;
489 }
490
491 /*
492 * a drive may not have a master block
493 */
494 if (read_master_block(ep, fd, mbp,
495 DEV_BSIZE) <= 0) {
496 mdclrerror(ep);
497 Free(mbp);
498 (void) close(fd);
499 continue;
500 }
501
502 (void) close(fd);
503 }
504 midpp = drive_append_wrapper(midpp, dnp,
505 replica_disk,
506 meta_getminor(replica_disk->dev),
507 mbp->mb_setcreatetime, NULL);
508 mirpp = &((*midpp)->mid_replicas);
509 Free(mbp);
510 }
511
512 /*
513 * For either of these assertions to fail, it implies
514 * a NULL return from metadrivename() above. Since
515 * the args came from a presumed valid locator block,
516 * that's Bad.
517 */
518 assert(midpp != NULL);
519 assert(mirpp != NULL);
520
521 /*
522 * Extract the parameters describing this replica.
523 *
524 * The magic "1" in the length calculation accounts
525 * for the length of the master block, in addition to
526 * the block count it describes. (The master block
527 * will always take up one block on the disk, and
528 * there will always only be one master block per
529 * replica, even though much of the code is structured
530 * to handle noncontiguous replicas.)
531 */
532 flags = lbp->lb_locators[indx].l_flags;
533 offset = lbp->lb_locators[indx].l_blkno;
534 length = mb->mb_blkcnt + 1;
535 timestamp = mb->mb_setcreatetime;
536
537 mirpp = replica_append_wrapper(mirpp, flags,
538 offset, length, timestamp);
539
540 /*
541 * If we're here it means -
542 *
543 * we've added the disk to the list of
544 * disks.
545 */
546
547 /*
548 * We need to bump up the number of active
549 * replica count for each such replica that is
550 * active so that it can be used later for replica
551 * quorum check.
552 */
553 if (flags & MDDB_F_ACTIVE) {
554 misp->mis_active_replicas++;
555 }
556 }
557 }
558
559
560 /*
561 * append_pnm_rec()
562 *
563 * Append pnm_rec_t entry to list of physical devices in the diskset. Entry
564 * contains a mapping of n_key in NM namespace(or min_key in DID_NM namespace)
565 * to name of the physical device. This list will be used to ensure that the
566 * correct names of the physical devices are printed in the metastat output--the
567 * NM namespace might have stale information about where the physical devices
568 * were previously located when the diskset was last active.
569 */
570 static void
append_pnm_rec(pnm_rec_t ** pnm,mdkey_t min_key,char * n_name)571 append_pnm_rec(
572 pnm_rec_t **pnm,
573 mdkey_t min_key,
574 char *n_name
575 )
576 {
577 pnm_rec_t *tmp_pnm;
578 char *p;
579 int len;
580
581 if ((p = strrchr(n_name, '/')) != NULL)
582 p++;
583
584 /*
585 * Allocates pnm_rec_t record for the physical
586 * device.
587 */
588 len = strlen(p) + 1; /* Length of name plus Null term */
589 tmp_pnm = Malloc(sizeof (pnm_rec_t) + len);
590 (void) strncpy(tmp_pnm->n_name, p, len);
591 tmp_pnm->n_key = min_key;
592
593 /*
594 * Adds new element to head of pnm_rec_t list.
595 */
596 if (*pnm == NULL) {
597 tmp_pnm->next = NULL;
598 *pnm = tmp_pnm;
599 } else {
600 tmp_pnm->next = *pnm;
601 *pnm = tmp_pnm;
602 }
603 }
604
605 /*
606 * free_pnm_rec_list()
607 *
608 * Freeing all pnm_rec_t entries on the list of physical devices in the
609 * diskset.
610 */
611 void
free_pnm_rec_list(pnm_rec_t ** pnm)612 free_pnm_rec_list(pnm_rec_t **pnm)
613 {
614 pnm_rec_t *tmp_pnm, *rm_pnm;
615
616 for (tmp_pnm = *pnm; tmp_pnm != NULL; ) {
617 rm_pnm = tmp_pnm;
618 tmp_pnm = tmp_pnm->next;
619 Free(rm_pnm);
620 }
621
622 *pnm = NULL;
623 }
624
625
626 /*
627 * get_disks_from_didnamespace()
628 * This function was origionally called: get_nonreplica_disks()
629 *
630 * Extracts the disks without replicas from the locator name space and adds them
631 * to the supplied list of md_im_drive_info_t.
632 * If the print verbose option was given then this function will also
633 * correct the nm namespace so that the n_name is the right ctd name
634 */
635 static void
get_disks_from_didnamespace(md_im_set_desc_t * misp,pnm_rec_t ** pnm,mddb_rb_t * nm,mddb_rb_t * shrnm,mddb_rb_t * did_nm,mddb_rb_t * did_shrnm,uint_t imp_flags,int replicated,md_error_t * ep)636 get_disks_from_didnamespace(
637 md_im_set_desc_t *misp,
638 pnm_rec_t **pnm,
639 mddb_rb_t *nm,
640 mddb_rb_t *shrnm,
641 mddb_rb_t *did_nm,
642 mddb_rb_t *did_shrnm,
643 uint_t imp_flags,
644 int replicated,
645 md_error_t *ep
646 )
647 {
648 char *search_path = "/dev";
649 devid_nmlist_t *nmlist;
650 md_im_drive_info_t *midp, **midpp = &misp->mis_drives;
651 mddrivename_t *dnp;
652 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
653 mddb_rb_t *rbp_did = did_nm;
654 mddb_rb_t *rbp_did_shr = did_shrnm;
655 mddb_rb_t *rbp_nm = nm;
656 mddb_rb_t *rbp_shr_nm = shrnm;
657 int on_list = 0;
658 struct devid_min_rec *did_rec;
659 struct devid_shr_rec *did_shr_rec;
660 struct nm_rec *namesp_rec;
661 struct nm_shr_rec *namesp_shr_rec;
662 struct did_shr_name *did;
663 struct did_min_name *min;
664 void *r_did; /* NULL if not a replicated diskset */
665 void *valid_did;
666 int avail = 0;
667 struct nm_name *nmp;
668 struct nm_shared_name *snmp;
669 mdkey_t drv_key, key, dev_key;
670 minor_t mnum = 0;
671 did_list_t *nonrep_did_listp;
672 size_t used_size, offset;
673
674 /*
675 * We got a pointer to an mddb record, which we expect to contain a
676 * name record; extract the pointer thereto.
677 */
678 /* LINTED */
679 did_rec = (struct devid_min_rec *)((caddr_t)(&rbp_did->rb_data));
680 /* LINTED */
681 did_shr_rec = (struct devid_shr_rec *)
682 ((caddr_t)(&rbp_did_shr->rb_data));
683 /* LINTED */
684 namesp_rec = (struct nm_rec *)((caddr_t)(&rbp_nm->rb_data));
685 /* LINTED */
686 namesp_shr_rec = (struct nm_shr_rec *)((caddr_t)(&rbp_shr_nm->rb_data));
687
688 /*
689 * Skip the nm_rec_hdr and iterate on the array of struct minor_name
690 * at the end of the devid_min_rec
691 */
692 for (min = &did_rec->minor_name[0]; min->min_devid_key != 0;
693 /* LINTED */
694 min = (struct did_min_name *)((char *)min + DID_NAMSIZ(min))) {
695
696 on_list = 0;
697 r_did = NULL;
698 nonrep_did_listp = Zalloc(sizeof (struct did_list));
699
700 /*
701 * For a given DID_NM key, locate the corresponding device
702 * id from DID_NM_SHR
703 */
704 for (did = &did_shr_rec->device_id[0]; did->did_key != 0;
705 /* LINTED */
706 did = (struct did_shr_name *)
707 ((char *)did + DID_SHR_NAMSIZ(did))) {
708 /*
709 * We got a match, this is the device id we're
710 * looking for
711 */
712 if (min->min_devid_key == did->did_key)
713 break;
714 }
715
716 if (did->did_key == 0) {
717 /* we didn't find a match */
718 assert(did->did_key != 0);
719 md_exit(NULL, 1);
720 }
721
722 /*
723 * If replicated diskset
724 */
725 if (replicated) {
726 size_t new_devid_len, old_devid_len;
727 char *temp;
728 /*
729 * In this case, did->did_devid will
730 * be invalid so lookup the real one
731 */
732 temp = replicated_list_lookup(did->did_size,
733 did->did_devid);
734 if (temp == NULL) {
735 /* we have a partial replicated set, fake it */
736 new_devid_len = did->did_size;
737 r_did = Zalloc(new_devid_len);
738 (void) memcpy(r_did, did->did_devid,
739 new_devid_len);
740 } else {
741 new_devid_len = devid_sizeof((ddi_devid_t)temp);
742 r_did = Zalloc(new_devid_len);
743 (void) memcpy(r_did, temp, new_devid_len);
744 }
745 valid_did = r_did;
746 nonrep_did_listp->rdid = Zalloc(new_devid_len);
747 (void) memcpy(nonrep_did_listp->rdid, r_did,
748 new_devid_len);
749 old_devid_len =
750 devid_sizeof((ddi_devid_t)did->did_devid);
751 nonrep_did_listp->did = Zalloc(old_devid_len);
752 (void) memcpy((void *)nonrep_did_listp->did,
753 (void *)did->did_devid, old_devid_len);
754 } else {
755 size_t new_devid_len;
756
757 valid_did = did->did_devid;
758 new_devid_len =
759 devid_sizeof((ddi_devid_t)did->did_devid);
760 nonrep_did_listp->did = Zalloc(new_devid_len);
761 (void) memcpy((void *)nonrep_did_listp->did,
762 (void *)did->did_devid, new_devid_len);
763 }
764
765 /*
766 * Get a ctds mapping for that device id.
767 * Since disk is being imported into this system,
768 * just use the first ctds in list.
769 */
770 if (meta_deviceid_to_nmlist(search_path,
771 (ddi_devid_t)valid_did,
772 &min->min_name[0], &nmlist) == 0) {
773 /*
774 * We know the disk is available. Use the
775 * device information in nmlist.
776 */
777 assert(nmlist[0].devname != NULL);
778 nonrep_did_listp->devname = Strdup(nmlist[0].devname);
779 nonrep_did_listp->available = MD_IM_DISK_AVAILABLE;
780 avail = 0;
781 mnum = meta_getminor(nmlist[0].dev);
782 devid_free_nmlist(nmlist);
783 } else {
784 /*
785 * The disk is not available. That means we need to
786 * use the (old) device information stored in the
787 * namespace.
788 */
789 /* search in nm space for a match */
790 offset = sizeof (struct nm_rec) -
791 sizeof (struct nm_name);
792 used_size = namesp_rec->r_rec_hdr.r_used_size - offset;
793 for (nmp = &namesp_rec->r_name[0]; nmp->n_key != 0;
794 /* LINTED */
795 nmp = (struct nm_name *)((char *)nmp +
796 NAMSIZ(nmp))) {
797 if (nmp->n_key == min->min_key)
798 break;
799 used_size -= NAMSIZ(nmp);
800 if ((int)used_size <= 0) {
801 md_exit(NULL, 1);
802 }
803 }
804
805 if (nmp->n_key == 0) {
806 assert(nmp->n_key != 0);
807 md_exit(NULL, 1);
808 }
809 dev_key = nmp->n_dir_key;
810 snmp = &namesp_shr_rec->sr_name[0];
811 key = snmp->sn_key;
812 /*
813 * Use the namespace n_dir_key to look in the
814 * shared namespace. When we find the matching
815 * key, that is the devname and minor number we
816 * want.
817 */
818 offset = sizeof (struct nm_shr_rec) -
819 sizeof (struct nm_shared_name);
820 used_size = namesp_shr_rec->sr_rec_hdr.r_used_size -
821 offset;
822 while (key != 0) {
823 if (dev_key == key) {
824 /*
825 * This complicated looking series
826 * of code creates a devname of the
827 * form <sn_name>/<n_name> which
828 * will look like /dev/dsk/c1t4d0s0.
829 */
830 nonrep_did_listp->devname =
831 Zalloc(strlen(nmp->n_name) +
832 strlen(snmp->sn_name) + 2);
833 (void) strlcpy(
834 nonrep_did_listp->devname,
835 snmp->sn_name,
836 strlen(snmp->sn_name));
837 (void) strlcat(
838 nonrep_did_listp->devname, "/",
839 strlen(nmp->n_name) +
840 strlen(snmp->sn_name) + 2);
841 (void) strlcat(
842 nonrep_did_listp->devname,
843 nmp->n_name,
844 strlen(nmp->n_name) +
845 strlen(snmp->sn_name) + 2);
846 mnum = nmp->n_minor;
847 break;
848 }
849 /* LINTED */
850 snmp = (struct nm_shared_name *)((char *)snmp +
851 SHR_NAMSIZ(snmp));
852 key = snmp->sn_key;
853 used_size -= SHR_NAMSIZ(snmp);
854 if ((int)used_size <= 0) {
855 md_exit(NULL, 1);
856 }
857 }
858 if (key == 0) {
859 nonrep_did_listp->devname = NULL;
860 mnum = 0;
861 }
862
863 nonrep_did_listp->available = MD_IM_DISK_NOT_AVAILABLE;
864 nonrep_did_listp->minor_name = Strdup(min->min_name);
865 avail = 1;
866 drv_key = nmp->n_drv_key;
867 snmp = &namesp_shr_rec->sr_name[0];
868 key = snmp->sn_key;
869 /*
870 * Use the namespace n_drv_key to look in the
871 * shared namespace. When we find the matching
872 * key, that is the driver name for the disk.
873 */
874 offset = sizeof (struct nm_shr_rec) -
875 sizeof (struct nm_shared_name);
876 used_size = namesp_shr_rec->sr_rec_hdr.r_used_size -
877 offset;
878 while (key != 0) {
879 if (drv_key == key) {
880 nonrep_did_listp->driver_name =
881 Strdup(snmp->sn_name);
882 break;
883 }
884 /* LINTED */
885 snmp = (struct nm_shared_name *)((char *)snmp +
886 SHR_NAMSIZ(snmp));
887 key = snmp->sn_key;
888 used_size -= SHR_NAMSIZ(snmp);
889 if ((int)used_size <= 0) {
890 md_exit(NULL, 1);
891 }
892 }
893 if (key == 0)
894 nonrep_did_listp->driver_name = NULL;
895 }
896 dnp = metadrivename(&sp,
897 metadiskname(nonrep_did_listp->devname), ep);
898 /*
899 * Add drive to pnm_rec_t list of physical devices for
900 * metastat output.
901 */
902 if (imp_flags & META_IMP_VERBOSE) {
903 append_pnm_rec(pnm, min->min_key,
904 nonrep_did_listp->devname);
905 }
906
907 assert(dnp != NULL);
908 /* Is it already on the list? */
909 for (midp = misp->mis_drives; midp != NULL;
910 midp = midp->mid_next) {
911 if (midp->mid_dnp == dnp) {
912 if (midp->mid_available ==
913 nonrep_did_listp->available) {
914 on_list = 1;
915 break;
916 }
917 }
918 }
919
920 if (!on_list) {
921 mddb_mb_t *mbp;
922 uint_t sliceno;
923 mdname_t *rsp;
924 int fd = -1;
925
926 mbp = Malloc(DEV_BSIZE);
927
928 if (!avail) {
929 /* determine the replica slice */
930 if (meta_replicaslice(dnp, &sliceno,
931 ep) != 0) {
932 Free(mbp);
933 free_did_list(nonrep_did_listp);
934 continue;
935 }
936
937 /*
938 * if the replica slice size is zero,
939 * don't bother opening
940 */
941 if (dnp->vtoc.parts[sliceno].size
942 == 0) {
943 Free(mbp);
944 free_did_list(nonrep_did_listp);
945 continue;
946 }
947
948 if ((rsp = metaslicename(dnp, sliceno,
949 ep)) == NULL) {
950 Free(mbp);
951 free_did_list(nonrep_did_listp);
952 continue;
953 }
954
955 if ((fd = open(rsp->rname,
956 O_RDONLY| O_NDELAY)) < 0) {
957 Free(mbp);
958 free_did_list(nonrep_did_listp);
959 continue;
960 }
961
962 /*
963 * a drive may not have a master block
964 */
965 if (read_master_block(ep, fd, mbp,
966 DEV_BSIZE) <= 0) {
967 mdclrerror(ep);
968 Free(mbp);
969 free_did_list(nonrep_did_listp);
970 (void) close(fd);
971 continue;
972 }
973
974 (void) close(fd);
975 }
976 /*
977 * If it is replicated diskset,
978 * r_did will be non-NULL.
979 * Passing the devname as NULL because field
980 * is not currently used for a non-replica disk.
981 */
982 midpp = drive_append_wrapper(midpp,
983 dnp, nonrep_did_listp,
984 mnum, mbp->mb_setcreatetime, NULL);
985 Free(mbp);
986 free_did_list(nonrep_did_listp);
987 }
988 free_did_list(nonrep_did_listp);
989 }
990 }
991
992 /*
993 * set_append()
994 *
995 * Append to tail of linked list of md_im_set_desc_t.
996 *
997 * Will allocate space for new node AND populate it by extracting disks with
998 * and without replicas from the locator blocks and locator namespace.
999 *
1000 * Returns pointer to new node.
1001 */
1002 static md_im_set_desc_t *
set_append(md_im_set_desc_t ** mispp,did_list_t * did_listp,mddb_mb_t * mb,mddb_lb_t * lbp,mddb_rb_t * nm,mddb_rb_t * shrnm,pnm_rec_t ** pnm,mddb_rb_t * did_nm,mddb_rb_t * did_shrnm,uint_t imp_flags,md_error_t * ep)1003 set_append(
1004 md_im_set_desc_t **mispp,
1005 did_list_t *did_listp,
1006 mddb_mb_t *mb,
1007 mddb_lb_t *lbp,
1008 mddb_rb_t *nm,
1009 mddb_rb_t *shrnm,
1010 pnm_rec_t **pnm,
1011 mddb_rb_t *did_nm,
1012 mddb_rb_t *did_shrnm,
1013 uint_t imp_flags,
1014 md_error_t *ep
1015 )
1016 {
1017
1018 md_im_set_desc_t *misp;
1019 set_t setno = mb->mb_setno;
1020 int partial = imp_flags & MD_IM_PARTIAL_DISKSET;
1021 int replicated = imp_flags & MD_IM_SET_REPLICATED;
1022
1023 /* run to end of list */
1024 for (; (*mispp != NULL); mispp = &((*mispp)->mis_next))
1025 ;
1026
1027 /* allocate new list element */
1028 misp = *mispp = Zalloc(sizeof (md_im_set_desc_t));
1029
1030 if (replicated)
1031 misp->mis_flags = MD_IM_SET_REPLICATED;
1032
1033 misp->mis_oldsetno = setno;
1034 misp->mis_partial = partial;
1035
1036 /* Get the disks with and without replicas */
1037 get_replica_disks(misp, did_listp, mb, lbp, ep);
1038
1039 if (nm != NULL && did_nm != NULL && did_shrnm != NULL) {
1040 get_disks_from_didnamespace(misp, pnm, nm, shrnm, did_nm,
1041 did_shrnm, imp_flags, replicated, ep);
1042 }
1043
1044 /*
1045 * An error in this struct could come from either of
1046 * the above routines;
1047 * in both cases, we want to pass it back on up.
1048 */
1049
1050 return (misp);
1051 }
1052
1053
1054 /*
1055 * add_disk_names()
1056 *
1057 * Iterator to walk the minor node tree of the device snapshot, adding only the
1058 * first non-block instance of each non-cdrom minor node to a list of disks.
1059 */
1060 static int
add_disk_names(di_node_t node,di_minor_t minor,void * args)1061 add_disk_names(di_node_t node, di_minor_t minor, void *args)
1062 {
1063 char *search_path = "/dev";
1064 ddi_devid_t devid = di_devid(node);
1065 devid_nmlist_t *nm;
1066 char *min = di_minor_name(minor);
1067 md_im_names_t *cnames = (md_im_names_t *)args;
1068 static di_node_t save_node = NULL;
1069
1070 /*
1071 * skip CD devices
1072 * If a device does not have a device id, we can't
1073 * do anything with it so just exclude it from our
1074 * list.
1075 *
1076 * This would also encompass CD devices and floppy
1077 * devices that don't have a device id.
1078 */
1079 if (devid == NULL) {
1080 return (DI_WALK_CONTINUE);
1081 }
1082
1083 /* char disk devices (as opposed to block) */
1084 if (di_minor_spectype(minor) == S_IFCHR) {
1085
1086 /* only first occurrence (slice 0) of each instance */
1087 if (save_node == NULL || node != save_node) {
1088 save_node = node;
1089 if (meta_deviceid_to_nmlist(search_path, devid,
1090 min, &nm) == 0) {
1091 int index = cnames->min_count++;
1092
1093 assert(nm->devname != NULL);
1094 cnames->min_names =
1095 Realloc(cnames->min_names,
1096 cnames->min_count *
1097 sizeof (char *));
1098
1099 assert(cnames->min_names != NULL);
1100 cnames->min_names[index] =
1101 metadiskname(nm->devname);
1102 devid_free_nmlist(nm);
1103 }
1104 }
1105 }
1106 return (DI_WALK_CONTINUE);
1107 }
1108
1109
1110
1111 /*
1112 * meta_list_disks()
1113 *
1114 * Snapshots the device tree and extracts disk devices from the snapshot.
1115 */
1116 int
meta_list_disks(md_error_t * ep,md_im_names_t * cnames)1117 meta_list_disks(md_error_t *ep, md_im_names_t *cnames)
1118 {
1119 di_node_t root_node;
1120
1121 assert(cnames != NULL);
1122 cnames->min_count = 0;
1123 cnames->min_names = NULL;
1124
1125 if ((root_node = di_init("/", DINFOCPYALL|DINFOFORCE))
1126 == DI_NODE_NIL) {
1127 return (mdsyserror(ep, errno, NULL));
1128 }
1129
1130 (void) di_walk_minor(root_node, DDI_NT_BLOCK, 0, cnames,
1131 add_disk_names);
1132
1133 di_fini(root_node);
1134 return (0);
1135 }
1136
1137 /*
1138 * meta_imp_drvused
1139 *
1140 * Checks if given drive is mounted, swapped, part of disk configuration
1141 * or in use by SVM. ep also has error code set up if drive is in use.
1142 *
1143 * Returns 1 if drive is in use.
1144 * Returns 0 if drive is not in use.
1145 */
1146 int
meta_imp_drvused(mdsetname_t * sp,mddrivename_t * dnp,md_error_t * ep)1147 meta_imp_drvused(
1148 mdsetname_t *sp,
1149 mddrivename_t *dnp,
1150 md_error_t *ep
1151 )
1152 {
1153 md_error_t status = mdnullerror;
1154 md_error_t *db_ep = &status;
1155
1156 /*
1157 * We pass in db_ep to meta_setup_db_locations
1158 * and never ever use the error contained therein
1159 * because all we're interested in is a check to
1160 * see whether any local metadbs are present.
1161 */
1162 if ((meta_check_drivemounted(sp, dnp, ep) != 0) ||
1163 (meta_check_driveswapped(sp, dnp, ep) != 0) ||
1164 (((meta_setup_db_locations(db_ep) == 0) &&
1165 ((meta_check_drive_inuse(sp, dnp, 1, ep) != 0) ||
1166 (meta_check_driveinset(sp, dnp, ep) != 0))))) {
1167 return (1);
1168 } else {
1169 return (0);
1170 }
1171 }
1172
1173 /*
1174 * meta_prune_cnames()
1175 *
1176 * Removes in-use disks from the list prior to further processing.
1177 *
1178 * Return value depends on err_on_prune flag: if set, and one or more disks
1179 * are pruned, the return list will be the pruned disks. If not set, or if no
1180 * disks are pruned, the return list will be the unpruned disks.
1181 */
1182 mddrivenamelist_t *
meta_prune_cnames(md_error_t * ep,md_im_names_t * cnames,int err_on_prune)1183 meta_prune_cnames(
1184 md_error_t *ep,
1185 md_im_names_t *cnames,
1186 int err_on_prune
1187 )
1188 {
1189 int d;
1190 int fcount = 0;
1191 mddrivenamelist_t *dnlp = NULL;
1192 mddrivenamelist_t **dnlpp = &dnlp;
1193 mddrivenamelist_t *fdnlp = NULL;
1194 mddrivenamelist_t **fdnlpp = &fdnlp;
1195 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep);
1196
1197 for (d = 0; d < cnames->min_count; ++d) {
1198 mddrivename_t *dnp;
1199
1200 dnp = metadrivename(&sp, cnames->min_names[d], ep);
1201 if (dnp == NULL) {
1202 /*
1203 * Assuming we're interested in knowing about
1204 * whatever error occurred, but not in stopping.
1205 */
1206 mde_perror(ep, cnames->min_names[d]);
1207 mdclrerror(ep);
1208
1209 continue;
1210 }
1211
1212 /*
1213 * Check if the drive is inuse.
1214 */
1215 if (meta_imp_drvused(sp, dnp, ep)) {
1216 fdnlpp = meta_drivenamelist_append_wrapper(fdnlpp, dnp);
1217 fcount++;
1218 mdclrerror(ep);
1219 } else {
1220 dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
1221 }
1222 }
1223
1224 if (fcount) {
1225 if (err_on_prune) {
1226 (void) mddserror(ep, MDE_DS_DRIVEINUSE, 0,
1227 NULL, fdnlp->drivenamep->cname, NULL);
1228 metafreedrivenamelist(dnlp);
1229 return (fdnlp);
1230 }
1231 metafreedrivenamelist(fdnlp);
1232 }
1233
1234 return (dnlp);
1235 }
1236
1237 /*
1238 * read_master_block()
1239 *
1240 * Returns:
1241 * < 0 for failure
1242 * 0 for no valid master block
1243 * 1 for valid master block
1244 *
1245 * The supplied buffer will be filled in for EITHER 0 or 1.
1246 */
1247 int
read_master_block(md_error_t * ep,int fd,void * bp,int bsize)1248 read_master_block(
1249 md_error_t *ep,
1250 int fd,
1251 void *bp,
1252 int bsize
1253 )
1254 {
1255 mddb_mb_t *mbp = bp;
1256 int rval = 1;
1257
1258 assert(bp != NULL);
1259
1260 if (lseek(fd, (off_t)dbtob(16), SEEK_SET) < 0)
1261 return (mdsyserror(ep, errno, NULL));
1262
1263 if (read(fd, bp, bsize) != bsize)
1264 return (mdsyserror(ep, errno, NULL));
1265
1266 /*
1267 * The master block magic number can either be MDDB_MAGIC_MB in
1268 * the case of a real master block, or, it can be MDDB_MAGIC_DU
1269 * in the case of a dummy master block
1270 */
1271 if ((mbp->mb_magic != MDDB_MAGIC_MB) &&
1272 (mbp->mb_magic != MDDB_MAGIC_DU)) {
1273 rval = 0;
1274 (void) mdmddberror(ep, MDE_DB_MASTER, 0, 0, 0, NULL);
1275 }
1276
1277 if (mbp->mb_revision != MDDB_REV_MB) {
1278 rval = 0;
1279 }
1280
1281 return (rval);
1282 }
1283
1284 /*
1285 * read_locator_block()
1286 *
1287 * Returns:
1288 * < 0 for failure
1289 * 0 for no valid locator block
1290 * 1 for valid locator block
1291 */
1292 int
read_locator_block(md_error_t * ep,int fd,mddb_mb_t * mbp,void * bp,int bsize)1293 read_locator_block(
1294 md_error_t *ep,
1295 int fd,
1296 mddb_mb_t *mbp,
1297 void *bp,
1298 int bsize
1299 )
1300 {
1301 mddb_lb_t *lbp = bp;
1302
1303 assert(bp != NULL);
1304
1305 if (lseek(fd, (off_t)dbtob(mbp->mb_blkmap.m_firstblk), SEEK_SET) < 0)
1306 return (mdsyserror(ep, errno, NULL));
1307
1308 if (read(fd, bp, bsize) != bsize)
1309 return (mdsyserror(ep, errno, NULL));
1310
1311 return ((lbp->lb_magic == MDDB_MAGIC_LB) ? 1 : 0);
1312 }
1313
1314 int
phys_read(md_error_t * ep,int fd,mddb_mb_t * mbp,daddr_t blk,void * bp,int bcount)1315 phys_read(
1316 md_error_t *ep,
1317 int fd,
1318 mddb_mb_t *mbp,
1319 daddr_t blk,
1320 void *bp,
1321 int bcount
1322 )
1323 {
1324 daddr_t pblk;
1325
1326 if ((pblk = getphysblk(blk, mbp)) < 0)
1327 return (mdmddberror(ep, MDE_DB_BLKRANGE, NODEV32,
1328 MD_LOCAL_SET, blk, NULL));
1329
1330 if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0)
1331 return (mdsyserror(ep, errno, NULL));
1332
1333 if (read(fd, bp, bcount) != bcount)
1334 return (mdsyserror(ep, errno, NULL));
1335
1336 return (bcount);
1337 }
1338
1339 /*
1340 * read_locator_block_did()
1341 *
1342 * Returns:
1343 * < 0 for failure
1344 * 0 for no valid locator name struct
1345 * 1 for valid locator name struct
1346 */
1347 int
read_locator_block_did(md_error_t * ep,int fd,mddb_mb_t * mbp,mddb_lb_t * lbp,void * bp,int bsize)1348 read_locator_block_did(
1349 md_error_t *ep,
1350 int fd,
1351 mddb_mb_t *mbp,
1352 mddb_lb_t *lbp,
1353 void *bp,
1354 int bsize
1355 )
1356 {
1357 int lb_didfirstblk = lbp->lb_didfirstblk;
1358 mddb_did_blk_t *lbdidp = bp;
1359 int rval;
1360
1361 assert(bp != NULL);
1362
1363 if ((rval = phys_read(ep, fd, mbp, lb_didfirstblk, bp, bsize)) < 0)
1364 return (rval);
1365
1366 return ((lbdidp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
1367 }
1368
1369 /*
1370 * read_locator_names()
1371 *
1372 * Returns:
1373 * < 0 for failure
1374 * 0 for no valid locator name struct
1375 * 1 for valid locator name struct
1376 */
1377 int
read_locator_names(md_error_t * ep,int fd,mddb_mb_t * mbp,mddb_lb_t * lbp,void * bp,int bsize)1378 read_locator_names(
1379 md_error_t *ep,
1380 int fd,
1381 mddb_mb_t *mbp,
1382 mddb_lb_t *lbp,
1383 void *bp,
1384 int bsize
1385 )
1386 {
1387 int lnfirstblk = lbp->lb_lnfirstblk;
1388 mddb_ln_t *lnp = bp;
1389 int rval;
1390
1391 assert(bp != NULL);
1392
1393 if ((rval = phys_read(ep, fd, mbp, lnfirstblk, bp, bsize)) < 0)
1394 return (rval);
1395
1396 return ((lnp->ln_magic == MDDB_MAGIC_LN) ? 1 : 0);
1397 }
1398
1399
1400 int
read_database_block(md_error_t * ep,int fd,mddb_mb_t * mbp,int dbblk,void * bp,int bsize)1401 read_database_block(
1402 md_error_t *ep,
1403 int fd,
1404 mddb_mb_t *mbp,
1405 int dbblk,
1406 void *bp,
1407 int bsize
1408 )
1409 {
1410 mddb_db_t *dbp = bp;
1411 int rval;
1412
1413 assert(bp != NULL);
1414
1415 if ((rval = phys_read(ep, fd, mbp, dbblk, bp, bsize)) < 0)
1416 return (rval);
1417
1418 return ((dbp->db_magic == MDDB_MAGIC_DB) ? 1 : 0);
1419 }
1420
1421 int
read_loc_didblks(md_error_t * ep,int fd,mddb_mb_t * mbp,int didblk,void * bp,int bsize)1422 read_loc_didblks(
1423 md_error_t *ep,
1424 int fd,
1425 mddb_mb_t *mbp,
1426 int didblk,
1427 void *bp,
1428 int bsize
1429 )
1430 {
1431 mddb_did_blk_t *didbp = bp;
1432 int rval;
1433
1434 assert(bp != NULL);
1435
1436 if ((rval = phys_read(ep, fd, mbp, didblk, bp, bsize)) < 0)
1437 return (rval);
1438
1439 return ((didbp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
1440 }
1441
1442
1443 int
read_loc_didinfo(md_error_t * ep,int fd,mddb_mb_t * mbp,int infoblk,void * bp,int bsize)1444 read_loc_didinfo(
1445 md_error_t *ep,
1446 int fd,
1447 mddb_mb_t *mbp,
1448 int infoblk,
1449 void *bp,
1450 int bsize
1451 )
1452 {
1453 int rval = 1;
1454 mddb_did_info_t *infop = bp;
1455
1456 assert(bp != NULL);
1457
1458 if ((rval = phys_read(ep, fd, mbp, infoblk, bp, bsize)) < 0)
1459 return (rval);
1460
1461 return ((infop->info_flags & MDDB_DID_EXISTS) ? 1 : 0);
1462 }
1463
1464 /*
1465 * meta_nm_rec()
1466 *
1467 * Return the DE corresponding to the requested namespace record type.
1468 * Modifies dbp to have a firstentry if one isn't there.
1469 */
1470 static mddb_de_t *
meta_nm_rec(mddb_db_t * dbp,mddb_type_t rectype)1471 meta_nm_rec(mddb_db_t *dbp, mddb_type_t rectype)
1472 {
1473 mddb_de_t *dep;
1474 int desize;
1475
1476 if (dbp->db_firstentry != NULL) {
1477 /* LINTED */
1478 dep = (mddb_de_t *)((caddr_t)(&dbp->db_firstentry)
1479 + sizeof (dbp->db_firstentry));
1480 dbp->db_firstentry = dep;
1481 while (dep && dep->de_next) {
1482 desize = sizeof (*dep) - sizeof (dep->de_blks) +
1483 sizeof (daddr_t) * dep->de_blkcount;
1484 /* LINTED */
1485 dep->de_next = (mddb_de_t *)
1486 ((caddr_t)dep + desize);
1487 dep = dep->de_next;
1488 }
1489 }
1490
1491 for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) {
1492 if (dep->de_type1 == rectype)
1493 break;
1494 }
1495 return (dep);
1496 }
1497
1498 /*
1499 * read_nm_rec()
1500 *
1501 * Reads the NM, NM_DID or NM_DID_SHR record in the mddb and stores the
1502 * configuration data in the buffer 'nm'
1503 *
1504 * Returns:
1505 * < 0 for failure
1506 * 0 for no valid NM/DID_NM/DID_NM_SHR record
1507 * 1 for valid NM/DID_NM/DID_NM_SHR record
1508 *
1509 */
1510 static int
read_nm_rec(md_error_t * ep,int fd,mddb_mb_t * mbp,mddb_lb_t * lbp,char ** nm,mddb_type_t rectype,char * diskname)1511 read_nm_rec(
1512 md_error_t *ep,
1513 int fd,
1514 mddb_mb_t *mbp,
1515 mddb_lb_t *lbp,
1516 char **nm,
1517 mddb_type_t rectype,
1518 char *diskname
1519 )
1520 {
1521 int cnt, dbblk, rval = 0;
1522 char db[DEV_BSIZE];
1523 mddb_de_t *dep;
1524 /*LINTED*/
1525 mddb_db_t *dbp = (mddb_db_t *)&db;
1526 char *tmpnm = NULL;
1527 daddr_t pblk;
1528
1529 for (dbblk = lbp->lb_dbfirstblk;
1530 dbblk != 0;
1531 dbblk = dbp->db_nextblk) {
1532
1533 if ((rval = read_database_block(ep, fd, mbp, dbblk, dbp,
1534 sizeof (db))) <= 0)
1535 return (rval);
1536
1537 /*
1538 * Locate NM/DID_NM/DID_NM_SHR record. Normally there is
1539 * only one record per mddb. There is a rare case when we
1540 * can't expand the record. If this is the case then we
1541 * will have multiple NM/DID_NM/DID_NM_SHR records linked
1542 * with r_next_recid.
1543 *
1544 * For now assume the normal case and handle the extended
1545 * namespace in Phase 2.
1546 */
1547 if ((dep = meta_nm_rec(dbp, rectype)) != NULL)
1548 break;
1549 }
1550
1551 /* If meta_nm_rec() never succeeded, bail out */
1552 if (dep == NULL)
1553 return (0);
1554
1555 /* Read in the appropriate record and return configurations */
1556 tmpnm = (char *)Zalloc(dbtob(dep->de_blkcount));
1557 *nm = tmpnm;
1558
1559 for (cnt = 0; cnt < dep->de_blkcount; cnt++) {
1560 if ((pblk = getphysblk(dep->de_blks[cnt], mbp)) < 0) {
1561 rval = mdmddberror(ep, MDE_DB_BLKRANGE,
1562 NODEV32, MD_LOCAL_SET,
1563 dep->de_blks[cnt], diskname);
1564 return (rval);
1565 }
1566
1567 if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) {
1568 rval = mdsyserror(ep, errno, diskname);
1569 return (rval);
1570 }
1571
1572 if (read(fd, tmpnm, DEV_BSIZE) != DEV_BSIZE) {
1573 rval = mdsyserror(ep, errno, diskname);
1574 return (rval);
1575 }
1576
1577 tmpnm += DEV_BSIZE;
1578 }
1579 return (1);
1580 }
1581
1582 /*
1583 * is_replicated
1584 *
1585 * Determines whether a disk has been replicated or not. It checks to see
1586 * if the device id stored in the master block is the same as the device id
1587 * registered for that disk on the current system. If the two device ids are
1588 * different, then we know that the disk has been replicated.
1589 *
1590 * If need_devid is set and the disk is replicated, fill in the new_devid.
1591 * Also, if need_devid is set, this routine allocates memory for the device
1592 * ids; the caller of this routine is responsible for free'ing up the memory.
1593 *
1594 * Returns:
1595 * MD_IM_SET_REPLICATED if it's a replicated disk
1596 * 0 if it's not a replicated disk
1597 */
1598 static int
is_replicated(int fd,mddb_mb_t * mbp,int need_devid,void ** new_devid)1599 is_replicated(
1600 int fd,
1601 mddb_mb_t *mbp,
1602 int need_devid,
1603 void **new_devid
1604 )
1605 {
1606 ddi_devid_t current_devid;
1607 int retval = 0;
1608 size_t new_devid_len;
1609
1610 if (mbp->mb_devid_magic != MDDB_MAGIC_DE)
1611 return (retval);
1612
1613 if (devid_get(fd, ¤t_devid) != 0)
1614 return (retval);
1615
1616 if (devid_compare((ddi_devid_t)mbp->mb_devid, current_devid) != 0)
1617 retval = MD_IM_SET_REPLICATED;
1618
1619 if (retval && need_devid) {
1620 new_devid_len = devid_sizeof(current_devid);
1621 *new_devid = Zalloc(new_devid_len);
1622 (void) memcpy(*new_devid, (void *)current_devid, new_devid_len);
1623 }
1624
1625 devid_free(current_devid);
1626 return (retval);
1627 }
1628
1629 /*
1630 * free_replicated_disks_list()
1631 *
1632 * this frees up all the memory allocated by build_replicated_disks_list
1633 */
1634 static void
free_replicated_disks_list()1635 free_replicated_disks_list()
1636 {
1637 replicated_disk_t **repl_disk, *temp;
1638 int index;
1639
1640 for (index = 0; index <= MAX_DEVID_LEN; index++) {
1641 repl_disk = &replicated_disk_list[index];
1642
1643 while (*repl_disk != NULL) {
1644 temp = *repl_disk;
1645 *repl_disk = (*repl_disk)->next;
1646
1647 Free(temp->old_devid);
1648 Free(temp->new_devid);
1649 Free(temp);
1650 }
1651 }
1652 }
1653
1654 /*
1655 * build_replicated_disks_list()
1656 *
1657 * Builds a list of disks that have been replicated using either a
1658 * remote replication or a point-in-time replication software. The
1659 * list is stored as a two dimensional sparse array.
1660 *
1661 * Returns
1662 * 1 on success
1663 * 0 on failure
1664 */
1665 int
build_replicated_disks_list(md_error_t * ep,mddrivenamelist_t * dnlp)1666 build_replicated_disks_list(
1667 md_error_t *ep,
1668 mddrivenamelist_t *dnlp
1669 )
1670 {
1671 uint_t sliceno;
1672 int fd = -1;
1673 mddrivenamelist_t *dp;
1674 mdname_t *rsp;
1675 mddb_mb_t *mbp;
1676
1677 mbp = Malloc(DEV_BSIZE);
1678
1679 for (dp = dnlp; dp != NULL; dp = dp->next) {
1680 mddrivename_t *dnp;
1681 void *new_devid;
1682
1683 dnp = dp->drivenamep;
1684 /* determine the replica slice */
1685 if (meta_replicaslice(dnp, &sliceno, ep) != 0)
1686 continue;
1687
1688 /*
1689 * if the replica slice size is zero, don't bother opening
1690 */
1691 if (dnp->vtoc.parts[sliceno].size == 0)
1692 continue;
1693
1694 if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
1695 continue;
1696
1697 if ((fd = open(rsp->rname, O_RDONLY| O_NDELAY)) < 0)
1698 return (mdsyserror(ep, errno, rsp->rname));
1699
1700 /* a drive may not have a master block so we just continue */
1701 if (read_master_block(ep, fd, mbp, DEV_BSIZE) <= 0) {
1702 (void) close(fd);
1703 mdclrerror(ep);
1704 continue;
1705 }
1706
1707 if (is_replicated(fd, mbp, 1, &new_devid)) {
1708 replicated_list_insert(mbp->mb_devid_len,
1709 mbp->mb_devid, new_devid);
1710 }
1711 (void) close(fd);
1712 }
1713 *replicated_disk_list_built = 1;
1714
1715 Free(mbp);
1716 return (1);
1717 }
1718
1719 /*
1720 * free_did_list()
1721 *
1722 * Frees the did_list allocated as part of build_did_list
1723 */
1724 static void
free_did_list(did_list_t * did_listp)1725 free_did_list(
1726 did_list_t *did_listp
1727 )
1728 {
1729 did_list_t *temp, *head;
1730
1731 head = did_listp;
1732
1733 while (head != NULL) {
1734 temp = head;
1735 head = head->next;
1736 if (temp->rdid)
1737 Free(temp->rdid);
1738 if (temp->did)
1739 Free(temp->did);
1740 if (temp->devname)
1741 Free(temp->devname);
1742 if (temp->minor_name)
1743 Free(temp->minor_name);
1744 if (temp->driver_name)
1745 Free(temp->driver_name);
1746 Free(temp);
1747 }
1748 }
1749
1750 /*
1751 * meta_free_im_replica_info
1752 *
1753 * Frees the md_im_replica_info list
1754 */
1755 static void
meta_free_im_replica_info(md_im_replica_info_t * mirp)1756 meta_free_im_replica_info(
1757 md_im_replica_info_t *mirp
1758 )
1759 {
1760 md_im_replica_info_t *r, *temp;
1761
1762 r = mirp;
1763
1764 while (r != NULL) {
1765 temp = r;
1766 r = r->mir_next;
1767
1768 Free(temp);
1769 }
1770 }
1771
1772 /*
1773 * meta_free_im_drive_info
1774 *
1775 * Frees the md_im_drive_info list
1776 */
1777 static void
meta_free_im_drive_info(md_im_drive_info_t * midp)1778 meta_free_im_drive_info(
1779 md_im_drive_info_t *midp
1780 )
1781 {
1782 md_im_drive_info_t *d, *temp;
1783
1784 d = midp;
1785
1786 while (d != NULL) {
1787 temp = d;
1788 d = d->mid_next;
1789
1790 if (temp->mid_available & MD_IM_DISK_NOT_AVAILABLE)
1791 /*
1792 * dnp is not on the drivenamelist and is a temp
1793 * dnp for metaimport if the disk is unavailable.
1794 * We need to specifically free it because of this.
1795 * If the disk is available, standard drivelist freeing
1796 * will kick in so we don't need to do it.
1797 */
1798 metafreedrivename(temp->mid_dnp);
1799 if (temp->mid_devid)
1800 Free(temp->mid_devid);
1801 if (temp->mid_o_devid)
1802 Free(temp->mid_o_devid);
1803 if (temp->mid_driver_name)
1804 Free(temp->mid_driver_name);
1805 if (temp->mid_devname)
1806 Free(temp->mid_devname);
1807 if (temp->mid_replicas) {
1808 meta_free_im_replica_info(temp->mid_replicas);
1809 temp->mid_replicas = NULL;
1810 }
1811 if (temp->overlap) {
1812 meta_free_im_drive_info(temp->overlap);
1813 temp->overlap = NULL;
1814 }
1815 Free(temp);
1816 }
1817 }
1818
1819 /*
1820 * meta_free_im_set_desc
1821 *
1822 * Frees the md_im_set_desc_t list
1823 */
1824 void
meta_free_im_set_desc(md_im_set_desc_t * misp)1825 meta_free_im_set_desc(
1826 md_im_set_desc_t *misp
1827 )
1828 {
1829 md_im_set_desc_t *s, *temp;
1830
1831 s = misp;
1832
1833 while (s != NULL) {
1834 temp = s;
1835 s = s->mis_next;
1836 if (temp->mis_drives) {
1837 meta_free_im_drive_info(temp->mis_drives);
1838 temp->mis_drives = NULL;
1839 }
1840 Free(temp);
1841 }
1842 }
1843
1844 /*
1845 * build_did_list()
1846 *
1847 * Build a list of device ids corresponding to disks in the locator block.
1848 * Memory is allocated here for the nodes in the did_list. The callers of
1849 * this routine must also call free_did_list to free up the memory after
1850 * they're done.
1851 *
1852 * Returns:
1853 * < 0 for failure
1854 * 0 for no valid locator block device id array
1855 * 1 for valid locator block device id array
1856 * ENOTSUP partial diskset, not all disks in a diskset on the
1857 * system where import is being executed
1858 */
1859 static int
build_did_list(md_error_t * ep,int fd,mddb_mb_t * mb,mddb_lb_t * lbp,mddb_did_blk_t * lbdidp,mddb_ln_t * lnp,did_list_t ** did_listp,int replicated)1860 build_did_list(
1861 md_error_t *ep,
1862 int fd,
1863 mddb_mb_t *mb,
1864 mddb_lb_t *lbp,
1865 mddb_did_blk_t *lbdidp,
1866 mddb_ln_t *lnp,
1867 did_list_t **did_listp,
1868 int replicated
1869 )
1870 {
1871 char *search_path = "/dev";
1872 char *minor_name;
1873 int rval, cnt;
1874 devid_nmlist_t *nm;
1875 uint_t did_info_length = 0;
1876 uint_t did_info_firstblk = 0;
1877 did_list_t *new, *head = NULL;
1878 char *bp = NULL, *temp;
1879 mddb_did_info_t *did_info = NULL;
1880 void *did = NULL;
1881 size_t new_devid_len;
1882 int partial = 0;
1883 int partial_replicated = 0;
1884
1885 for (cnt = 0; cnt < MDDB_NLB; cnt++) {
1886 partial_replicated = 0;
1887 did_info = &lbdidp->blk_info[cnt];
1888
1889 if (!(did_info->info_flags & MDDB_DID_EXISTS))
1890 continue;
1891
1892 new = Zalloc(sizeof (did_list_t));
1893 new->did = Zalloc(did_info->info_length);
1894
1895 /*
1896 * If we can re-use the buffer that has already been
1897 * read in then just use it. Otherwise free
1898 * the previous one and alloc a new one
1899 */
1900 if (did_info->info_firstblk != did_info_firstblk) {
1901
1902 did_info_length = dbtob(did_info->info_blkcnt);
1903 did_info_firstblk = did_info->info_firstblk;
1904
1905 if (bp)
1906 Free(bp);
1907 bp = temp = Zalloc(did_info_length);
1908
1909 if ((rval = phys_read(ep, fd, mb, did_info_firstblk,
1910 (void *)bp, did_info_length)) < 0)
1911 return (rval);
1912 } else {
1913 temp = bp;
1914 }
1915
1916 temp += did_info->info_offset;
1917 (void) memcpy(new->did, temp, did_info->info_length);
1918 new->did_index = cnt;
1919 minor_name = did_info->info_minor_name;
1920
1921 /*
1922 * If we are not able to find the ctd mapping corresponding
1923 * to a given device id, it probably means the device id in
1924 * question is not registered with the system.
1925 *
1926 * Highly likely that the only time this happens, we've hit
1927 * a case where not all the disks that are a part of the
1928 * diskset were moved before importing the diskset.
1929 *
1930 * If set is a replicated diskset, then the device id we get
1931 * from 'lb' will be the 'other' did and we need to lookup
1932 * the real one before we call this routine.
1933 */
1934 if (replicated) {
1935 temp = replicated_list_lookup(did_info->info_length,
1936 new->did);
1937 if (temp == NULL) {
1938 /* we have a partial replicated set, fake it */
1939 new_devid_len = devid_sizeof((ddi_devid_t)new->did);
1940 new->rdid = Zalloc(new_devid_len);
1941 (void) memcpy(new->rdid, new->did, new_devid_len);
1942 did = new->rdid;
1943 partial_replicated = 1;
1944 } else {
1945 new_devid_len = devid_sizeof((ddi_devid_t)temp);
1946 new->rdid = Zalloc(new_devid_len);
1947 (void) memcpy(new->rdid, temp, new_devid_len);
1948 did = new->rdid;
1949 }
1950 } else {
1951 did = new->did;
1952 }
1953
1954 if (devid_valid((ddi_devid_t)(did)) == 0) {
1955 return (-1);
1956 }
1957
1958 if (partial_replicated || meta_deviceid_to_nmlist(search_path,
1959 (ddi_devid_t)did, minor_name, &nm) != 0) {
1960 int len = 0;
1961
1962 /*
1963 * Partial diskset case. We'll need to get the
1964 * device information from the metadb instead
1965 * of the output (nm) of meta_deviceid_to_nmlist.
1966 */
1967 len = strlen(lnp->ln_prefixes[0].pre_data) +
1968 strlen(lnp->ln_suffixes[0][cnt].suf_data) + 2;
1969 new->devname = Zalloc(len);
1970 (void) strlcpy(new->devname,
1971 lnp->ln_prefixes[0].pre_data,
1972 strlen(lnp->ln_prefixes[0].pre_data) + 1);
1973 (void) strlcat(new->devname, "/", len);
1974 (void) strlcat(new->devname,
1975 lnp->ln_suffixes[0][cnt].suf_data, len);
1976 new->minor_name = Strdup(minor_name);
1977 new->next = head;
1978 new->available = MD_IM_DISK_NOT_AVAILABLE;
1979 new->driver_name = Strdup(lbp->lb_drvnm[0].dn_data);
1980 new->dev = lbp->lb_locators[cnt].l_dev;
1981 head = new;
1982 partial = ENOTSUP;
1983 continue;
1984 }
1985
1986 /*
1987 * Disk is there. Grab device information from nm structure.
1988 */
1989 assert(nm->devname != NULL);
1990 new->devname = Strdup(nm->devname);
1991 new->dev = nm->dev;
1992 new->minor_name = Strdup(minor_name);
1993 new->available = MD_IM_DISK_AVAILABLE;
1994
1995 devid_free_nmlist(nm);
1996
1997 new->next = head;
1998 head = new;
1999 }
2000
2001 /* Free the last bp */
2002 if (bp)
2003 Free(bp);
2004 *did_listp = head;
2005 if (partial)
2006 return (partial);
2007 return (1);
2008 }
2009 /*
2010 * check_nm_disks
2011 * Checks the disks listed in the shared did namespace to see if they
2012 * are accessable on the system. If not, return ENOTSUP error to
2013 * indicate we have a partial diskset.
2014 * Returns:
2015 * < 0 for failure
2016 * 0 success
2017 * ENOTSUP partial diskset, not all disks in a diskset on the
2018 * system where import is being executed
2019 */
2020 static int
check_nm_disks(struct devid_min_rec * did_nmp,struct devid_shr_rec * did_shrnmp)2021 check_nm_disks(
2022 struct devid_min_rec *did_nmp,
2023 struct devid_shr_rec *did_shrnmp
2024 )
2025 {
2026 char *search_path = "/dev";
2027 char *minor_name = NULL;
2028 uint_t used_size, min_used_size;
2029 ddi_devid_t did;
2030 devid_nmlist_t *nm;
2031 void *did_min_namep;
2032 void *did_shr_namep;
2033 size_t did_nsize, did_shr_nsize;
2034
2035 used_size = did_shrnmp->did_rec_hdr.r_used_size -
2036 sizeof (struct nm_rec_hdr);
2037 min_used_size = did_nmp->min_rec_hdr.r_used_size -
2038 sizeof (struct nm_rec_hdr);
2039 did_shr_namep = (void *)(&did_shrnmp->device_id[0]);
2040 while (used_size > (int)sizeof (struct did_shr_name)) {
2041 did_min_namep = (void *)(&did_nmp->minor_name[0]);
2042 /* grab device id and minor name from the shared spaces */
2043 did = (ddi_devid_t)(((struct did_shr_name *)
2044 did_shr_namep)->did_devid);
2045 if (devid_valid(did) == 0) {
2046 return (-1);
2047 }
2048
2049 /*
2050 * We need to check that the DID_NM and DID_SHR_NM are in
2051 * sync. It is possible that we took a panic between writing
2052 * the two areas to disk. This would be cleaned up on the
2053 * next snarf but we don't know for sure that snarf has even
2054 * happened since we're reading from disk.
2055 */
2056 while (((struct did_shr_name *)did_shr_namep)->did_key !=
2057 ((struct did_min_name *)did_min_namep)->min_devid_key) {
2058 did_nsize = DID_NAMSIZ((struct did_min_name *)
2059 did_min_namep);
2060 did_min_namep = ((void *)((char *)did_min_namep +
2061 did_nsize));
2062 min_used_size -= did_nsize;
2063 if (min_used_size < (int)sizeof (struct did_min_name))
2064 continue;
2065 }
2066 minor_name = ((struct did_min_name *)did_min_namep)->min_name;
2067
2068 /*
2069 * Try to find disk in the system. If we can't find the
2070 * disk, we have a partial diskset.
2071 */
2072 if ((meta_deviceid_to_nmlist(search_path,
2073 did, minor_name, &nm)) != 0) {
2074 /* Partial diskset detected */
2075 return (ENOTSUP);
2076 }
2077 devid_free_nmlist(nm);
2078 used_size -= DID_SHR_NAMSIZ((struct did_shr_name *)
2079 did_shr_namep);
2080 /* increment to next item in the shared spaces */
2081 did_shr_nsize = DID_SHR_NAMSIZ((struct did_shr_name *)
2082 did_shr_namep);
2083 did_shr_namep = ((void *)((char *)did_shr_namep +
2084 did_shr_nsize));
2085 }
2086 return (0);
2087 }
2088
2089
2090 /*
2091 * report_metadb_info()
2092 *
2093 * Generates metadb output for the diskset.
2094 *
2095 */
2096 static void
report_metadb_info(md_im_set_desc_t * misp,char * indent)2097 report_metadb_info(
2098 md_im_set_desc_t *misp,
2099 char *indent
2100 )
2101 {
2102 md_im_drive_info_t *d;
2103 md_im_replica_info_t *r;
2104 char *unk_str = "";
2105 int i;
2106
2107 (void) printf("%s\t%5.5s\t\t%9.9s\t%11.11s\n", indent, gettext("flags"),
2108 gettext("first blk"), gettext("block count"));
2109
2110 unk_str = gettext("unknown");
2111
2112 /*
2113 * Looping through all drives in the diskset to print
2114 * out information about the drive and if the verbose
2115 * option is set print out replica data.
2116 */
2117 for (d = misp->mis_drives; d != NULL; d = d->mid_next) {
2118
2119 if (d->mid_replicas != NULL) {
2120 for (r = d->mid_replicas; r != NULL;
2121 r = r->mir_next) {
2122 (void) printf("%s", indent);
2123 for (i = 0; i < MDDB_FLAGS_LEN; i++) {
2124 if (r->mir_flags & (1 << i)) {
2125 (void) putchar(
2126 MDDB_FLAGS_STRING[i]);
2127 } else {
2128 (void) putchar(' ');
2129 }
2130 }
2131 if ((r->mir_offset == -1) && (r->mir_length
2132 == -1)) {
2133 (void) printf("%7.7s\t\t%7.7s\t",
2134 unk_str, unk_str);
2135 } else if (r->mir_length == -1) {
2136 (void) printf("%i\t\t%7.7s\t",
2137 r->mir_offset, unk_str);
2138 } else {
2139 (void) printf("%i\t\t%i\t",
2140 r->mir_offset, r->mir_length);
2141 }
2142 (void) printf("\t%s\n",
2143 d->mid_devname);
2144 }
2145 }
2146 }
2147 (void) printf("\n");
2148 }
2149
2150 /*
2151 * meta_replica_quorum will determine if the disks in the set to be
2152 * imported have enough valid replicas to have quorum.
2153 *
2154 * RETURN:
2155 * -1 Set doesn't have quorum
2156 * 0 Set does have quorum
2157 */
2158 int
meta_replica_quorum(md_im_set_desc_t * misp)2159 meta_replica_quorum(
2160 md_im_set_desc_t *misp
2161 )
2162 {
2163 md_im_drive_info_t *midp;
2164 md_im_replica_info_t *midr;
2165 int replica_count = 0;
2166
2167 for (midp = misp->mis_drives; midp != NULL;
2168 midp = midp->mid_next) {
2169
2170 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE)
2171 continue;
2172
2173 /*
2174 * The drive is okay. Now count its replicas
2175 */
2176 for (midr = midp->mid_replicas; midr != NULL;
2177 midr = midr->mir_next) {
2178 replica_count++;
2179 }
2180 }
2181
2182 if (misp->mis_active_replicas & 1) {
2183 /* odd number of replicas */
2184 if (replica_count < (misp->mis_active_replicas + 1)/2)
2185 return (-1);
2186 } else {
2187 /* even number of replicas */
2188 if (replica_count <= ((misp->mis_active_replicas + 1)/2))
2189 return (-1);
2190 }
2191
2192 return (0);
2193 }
2194
2195
2196 /*
2197 * Choose the best drive to use for the metaimport command.
2198 */
2199 md_im_drive_info_t *
pick_good_disk(md_im_set_desc_t * misp)2200 pick_good_disk(md_im_set_desc_t *misp)
2201 {
2202 md_timeval32_t *setcrtime; /* set creation time */
2203 md_im_drive_info_t *good_disk = NULL;
2204 md_im_drive_info_t *midp = NULL;
2205 md_im_replica_info_t *mirp;
2206
2207 setcrtime = &(misp->mis_drives->mid_replicas->mir_timestamp);
2208 for (midp = misp->mis_drives; (midp != NULL) && (good_disk == NULL);
2209 midp = midp->mid_next) {
2210 /* drive must be available */
2211 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
2212 continue;
2213 }
2214 for (mirp = midp->mid_replicas; mirp != NULL;
2215 mirp = mirp->mir_next) {
2216 /* replica must be active to be a good one */
2217 if (mirp->mir_flags & MDDB_F_ACTIVE) {
2218 if ((setcrtime->tv_sec ==
2219 midp-> mid_setcreatetimestamp.tv_sec) &&
2220 (setcrtime->tv_usec ==
2221 midp->mid_setcreatetimestamp.tv_usec)) {
2222 good_disk = midp;
2223 break;
2224 }
2225 }
2226 }
2227 }
2228 return (good_disk);
2229 }
2230
2231 /*
2232 * report_set_info()
2233 *
2234 * Returns:
2235 * < 0 for failure
2236 * 0 for success
2237 *
2238 */
2239 static int
report_set_info(md_im_set_desc_t * misp,mddb_mb_t * mb,mddb_lb_t * lbp,mddb_rb_t * nm,pnm_rec_t ** pnm,mdname_t * rsp,int fd,uint_t imp_flags,int set_count,int overlap,md_im_drive_info_t * overlap_disks,md_error_t * ep)2240 report_set_info(
2241 md_im_set_desc_t *misp,
2242 mddb_mb_t *mb,
2243 mddb_lb_t *lbp,
2244 mddb_rb_t *nm,
2245 pnm_rec_t **pnm,
2246 mdname_t *rsp,
2247 int fd,
2248 uint_t imp_flags,
2249 int set_count,
2250 int overlap,
2251 md_im_drive_info_t *overlap_disks,
2252 md_error_t *ep
2253 )
2254 {
2255 int rval = 0;
2256 md_im_drive_info_t *d;
2257 md_im_drive_info_t *good_disk = NULL;
2258 int i;
2259 int in = META_INDENT;
2260 char indent[MAXPATHLEN];
2261 md_timeval32_t lastaccess; /* stores last modified timestamp */
2262 int has_overlap = 0;
2263 int no_quorum = 0;
2264 int partial = 0;
2265
2266 /* Calculates the correct indentation. */
2267 indent[0] = 0;
2268 for (i = 0; i < in; i++)
2269 (void) strlcat(indent, " ", sizeof (indent));
2270
2271 /*
2272 * This will print before the information for the first diskset
2273 * if the verbose option was set.
2274 */
2275 if (set_count == 1) {
2276 if (imp_flags & META_IMP_REPORT) {
2277 (void) printf("\n%s:\n\n",
2278 gettext("Disksets eligible for import"));
2279 }
2280 }
2281
2282 partial = misp->mis_partial;
2283 good_disk = pick_good_disk(misp);
2284 if (good_disk == NULL) {
2285 return (rval);
2286 }
2287
2288 /*
2289 * Make the distinction between a regular diskset and
2290 * a replicated diskset. Also make the distinction
2291 * between a partial vs. full diskset.
2292 */
2293 if (partial == MD_IM_PARTIAL_DISKSET) {
2294 if (misp->mis_flags & MD_IM_SET_REPLICATED) {
2295 if (imp_flags & META_IMP_REPORT) {
2296 (void) printf("%i) %s:\n", set_count, gettext(
2297 "Found partial replicated diskset "
2298 "containing disks"));
2299 } else {
2300 (void) printf("\n%s:\n", gettext(
2301 "Importing partial replicated diskset "
2302 "containing disks"));
2303 }
2304 } else {
2305 if (imp_flags & META_IMP_REPORT) {
2306 (void) printf("%i) %s:\n", set_count, gettext(
2307 "Found partial regular diskset containing "
2308 "disks"));
2309 } else {
2310 (void) printf("\n%s:\n", gettext(
2311 "Importing partial regular diskset "
2312 "containing disks"));
2313 }
2314 }
2315 } else {
2316 if (misp->mis_flags & MD_IM_SET_REPLICATED) {
2317 if (imp_flags & META_IMP_REPORT) {
2318 (void) printf("%i) %s:\n", set_count, gettext(
2319 "Found replicated diskset containing "
2320 "disks"));
2321 } else {
2322 (void) printf("\n%s:\n", gettext(
2323 "Importing replicated diskset containing "
2324 "disks"));
2325 }
2326 } else {
2327 if (imp_flags & META_IMP_REPORT) {
2328 (void) printf("%i) %s:\n", set_count, gettext(
2329 "Found regular diskset containing disks"));
2330 } else {
2331 (void) printf("\n%s:\n", gettext(
2332 "Importing regular diskset containing "
2333 "disks"));
2334 }
2335 }
2336 }
2337
2338 /*
2339 * Check each drive in the set. If it's unavailable or
2340 * an overlap tell the user.
2341 */
2342 for (d = misp->mis_drives; d != NULL; d = d->mid_next) {
2343 (void) fprintf(stdout, " %s", d->mid_dnp->cname);
2344 if (d->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
2345 (void) fprintf(stdout, " (UNAVAIL)");
2346 }
2347 if (overlap) {
2348 md_im_drive_info_t **chain;
2349 /*
2350 * There is the potential for an overlap, see if
2351 * this disk is one of the overlapped disks.
2352 */
2353 for (chain = &overlap_disks; *chain != NULL;
2354 chain = &(*chain)->overlap) {
2355 if (strcmp(d->mid_dnp->cname,
2356 (*chain)->mid_dnp->cname) == 0) {
2357 (void) fprintf(stdout, " (CONFLICT)");
2358 has_overlap = 1;
2359 break;
2360 }
2361 }
2362 }
2363 (void) fprintf(stdout, "\n");
2364 }
2365
2366 /*
2367 * This note explains the (UNAVAIL) that appears next to the
2368 * disks in the diskset that are not available.
2369 */
2370 if (partial) {
2371 (void) printf("%s%s\n%s%s\n\n", indent,
2372 gettext("(UNAVAIL) WARNING: This disk is unavailable on"
2373 " this system."), indent, gettext("Import may corrupt "
2374 "data in the diskset."));
2375 }
2376
2377 /*
2378 * This note explains the (CONFLICT) that appears next to the
2379 * disks whose lb_inittime timestamp does not
2380 * match the rest of the diskset.
2381 */
2382 if (has_overlap) {
2383 (void) printf("%s%s\n%s%s\n\n", indent,
2384 gettext("(CONFLICT) WARNING: This disk has been reused in "
2385 "another diskset or system configuration."), indent,
2386 gettext("Import may corrupt data in the diskset."));
2387 }
2388
2389 /*
2390 * If the verbose flag was given on the command line,
2391 * we will print out the metastat -c information , the
2392 * creation time, and last modified time for the diskset.
2393 */
2394 if (imp_flags & META_IMP_VERBOSE) {
2395 (void) printf("%s%s\n", indent,
2396 gettext("Metadatabase information:"));
2397 report_metadb_info(misp, indent);
2398
2399 /*
2400 * Printing creation time and last modified time.
2401 * Last modified: uses the global variable "lastaccess",
2402 * which is set to the last updated timestamp from all of
2403 * the database blocks(db_timestamp) or record blocks
2404 * (rb_timestamp).
2405 * Creation time is the locator block init time
2406 * (lb_inittime).
2407 */
2408 lastaccess = good_disk->mid_replicas->mir_timestamp;
2409
2410 (void) printf("%s%s\n", indent,
2411 gettext("Metadevice information:"));
2412 rval = report_metastat_info(mb, lbp, nm, pnm, rsp, fd,
2413 &lastaccess, ep);
2414 if (rval < 0) {
2415 return (rval);
2416 }
2417
2418 (void) printf("%s%s:\t%s\n", indent,
2419 gettext("Creation time"),
2420 meta_print_time(&good_disk->mid_replicas->mir_timestamp));
2421 (void) printf("%s%s:\t%s\n", indent,
2422 gettext("Last modified time"),
2423 meta_print_time(&lastaccess));
2424 } else {
2425 /*
2426 * Even if the verbose option is not set, we will print the
2427 * creation time for the diskset.
2428 */
2429 (void) printf("%s%s:\t%s\n", indent, gettext("Creation time"),
2430 meta_print_time(&good_disk->mid_replicas->mir_timestamp));
2431 }
2432
2433
2434 /*
2435 * If the diskset is not actually being imported, then we
2436 * print out extra information about how to import it.
2437 * If the verbose flag was not set, then we will also
2438 * print out information about how to obtain verbose output.
2439 */
2440 if (imp_flags & META_IMP_REPORT) {
2441 /*
2442 * TRANSLATION_NOTE
2443 *
2444 * The translation of the phrase "For more information
2445 * about this set" will be followed by a ":" and a
2446 * suggested command (untranslatable) that the user
2447 * may use to request additional information.
2448 */
2449 if (!(imp_flags & META_IMP_VERBOSE)) {
2450 (void) printf("%s%s:\n%s %s -r -v %s\n", indent,
2451 gettext("For more information about this diskset"),
2452 indent, myname, good_disk->mid_dnp->cname);
2453 }
2454
2455 if (meta_replica_quorum(misp) != 0)
2456 no_quorum = 1;
2457
2458 /*
2459 * TRANSLATION_NOTE
2460 *
2461 * The translation of the phrase "To import this set"
2462 * will be followed by a ":" and a suggested command
2463 * (untranslatable) that the user may use to import
2464 * the specified diskset.
2465 */
2466 if (partial || has_overlap || no_quorum) {
2467 (void) printf("%s%s:\n%s %s -f -s <newsetname> %s\n",
2468 indent, gettext("To import this diskset"), indent,
2469 myname, good_disk->mid_dnp->cname);
2470 } else {
2471 (void) printf("%s%s:\n%s %s -s <newsetname> %s\n",
2472 indent, gettext("To import this diskset"), indent,
2473 myname, good_disk->mid_dnp->cname);
2474 }
2475 }
2476 (void) printf("\n\n");
2477
2478 return (rval);
2479 }
2480
2481
2482 /*
2483 * meta_get_and_report_set_info
2484 *
2485 * Scans a given drive for set specific information. If the given drive
2486 * has a shared metadb, scans the shared metadb for information pertaining
2487 * to the set.
2488 * If imp_flags has META_IMP_PASS1 set don't report.
2489 *
2490 * Returns:
2491 * <0 for failure
2492 * 0 success but no replicas were found
2493 * 1 success and a replica was found
2494 */
2495 int
meta_get_and_report_set_info(mddrivenamelist_t * dp,md_im_set_desc_t ** mispp,int local_mb_ok,uint_t imp_flags,int * set_count,int overlap,md_im_drive_info_t * overlap_disks,md_error_t * ep)2496 meta_get_and_report_set_info(
2497 mddrivenamelist_t *dp,
2498 md_im_set_desc_t **mispp,
2499 int local_mb_ok,
2500 uint_t imp_flags,
2501 int *set_count,
2502 int overlap,
2503 md_im_drive_info_t *overlap_disks,
2504 md_error_t *ep
2505 )
2506 {
2507 uint_t s;
2508 mdname_t *rsp;
2509 int fd;
2510 char mb[DEV_BSIZE];
2511 /*LINTED*/
2512 mddb_mb_t *mbp = (mddb_mb_t *)mb;
2513 char lb[dbtob(MDDB_LBCNT)];
2514 /*LINTED*/
2515 mddb_lb_t *lbp = (mddb_lb_t *)lb;
2516 mddb_did_blk_t *lbdidp = NULL;
2517 mddb_ln_t *lnp = NULL;
2518 int lnsize, lbdid_size;
2519 int rval = 0;
2520 char db[DEV_BSIZE];
2521 /*LINTED*/
2522 mddb_db_t *dbp = (mddb_db_t *)db;
2523 did_list_t *did_listp = NULL;
2524 mddrivenamelist_t *dnlp;
2525 mddrivename_t *dnp;
2526 md_im_names_t cnames = { 0, NULL};
2527 char *nm = NULL, *shrnm = NULL;
2528 char *did_nm = NULL, *did_shrnm = NULL;
2529 struct nm_rec *nmp;
2530 struct nm_shr_rec *snmp;
2531 struct devid_shr_rec *did_shrnmp;
2532 struct devid_min_rec *did_nmp;
2533 int extended_namespace = 0;
2534 int replicated = 0;
2535 int partial = 0;
2536 pnm_rec_t *pnm = NULL; /* list of physical devs in set */
2537 md_im_set_desc_t *misp;
2538
2539 dnp = dp->drivenamep;
2540
2541 /*
2542 * Determine and open the replica slice
2543 */
2544 if (meta_replicaslice(dnp, &s, ep) != 0) {
2545 return (-1);
2546 }
2547
2548 /*
2549 * Test for the size of replica slice in question. If
2550 * the size is zero, we know that this is not a disk that was
2551 * part of a set and it should be silently ignored for import.
2552 */
2553 if (dnp->vtoc.parts[s].size == 0)
2554 return (0);
2555
2556 if ((rsp = metaslicename(dnp, s, ep)) == NULL) {
2557 return (-1);
2558 }
2559
2560 if ((fd = open(rsp->rname, O_RDONLY|O_NDELAY)) < 0)
2561 return (mdsyserror(ep, errno, rsp->cname));
2562
2563 /*
2564 * After the open() succeeds, we should return via the "out"
2565 * label to clean up after ourselves. (Up 'til now, we can
2566 * just return directly, because there are no resources to
2567 * give back.)
2568 */
2569
2570 if ((rval = read_master_block(ep, fd, mbp, sizeof (mb))) <= 0)
2571 goto out;
2572
2573 replicated = is_replicated(fd, mbp, 0, NULL);
2574
2575 if (!local_mb_ok && mbp->mb_setno == 0) {
2576 rval = 0;
2577 goto out;
2578 }
2579
2580 if ((rval = read_locator_block(ep, fd, mbp, lbp, sizeof (lb))) <= 0)
2581 goto out;
2582
2583 /*
2584 * Once the locator block has been read, we need to
2585 * check if the locator block commit count is zero.
2586 * If it is zero, we know that the replica we're dealing
2587 * with is on a disk that was deleted from the disk set;
2588 * and, it potentially has stale data. We need to quit
2589 * in that case
2590 */
2591 if (lbp->lb_commitcnt == 0) {
2592 rval = 0;
2593 goto out;
2594 }
2595
2596 /*
2597 * Make sure that the disk being imported has device id
2598 * namespace present for disksets. If a disk doesn't have
2599 * device id namespace, we skip reading the replica on that disk
2600 */
2601 if (!(lbp->lb_flags & MDDB_DEVID_STYLE)) {
2602 rval = 0;
2603 goto out;
2604 }
2605
2606 /*
2607 * Grab the locator block device id array. Allocate memory for the
2608 * array first.
2609 */
2610 lbdid_size = dbtob(lbp->lb_didblkcnt);
2611 lbdidp = Zalloc(lbdid_size);
2612
2613 if ((rval = read_locator_block_did(ep, fd, mbp, lbp, lbdidp,
2614 lbdid_size)) <= 0)
2615 goto out;
2616
2617 /*
2618 * For a disk that has not been replicated, extract the device ids
2619 * stored in the locator block device id array and store them in
2620 * a list.
2621 *
2622 * If the disk has been replicated using replication software such
2623 * as HDS Truecopy/ShadowImage or EMC SRDF/BCV, the device ids in
2624 * the locator block are invalid and we need to build a list of
2625 * replicated disks.
2626 */
2627 if (imp_flags & META_IMP_PASS1) {
2628 /*
2629 * We need to do this for both passes but
2630 * replicated_disk_list_built is global so we need some way
2631 * to determine which pass we're on. Set it to the appropriate
2632 * pass's flag.
2633 */
2634 replicated_disk_list_built = &replicated_disk_list_built_pass1;
2635 } else {
2636 replicated_disk_list_built = &replicated_disk_list_built_pass2;
2637 }
2638 if (replicated && !(*replicated_disk_list_built)) {
2639 /*
2640 * if there's a replicated diskset involved, we need to
2641 * scan the system one more time and build a list of all
2642 * candidate disks that might be part of that replicated set
2643 */
2644 if (meta_list_disks(ep, &cnames) != 0) {
2645 rval = 0;
2646 goto out;
2647 }
2648 dnlp = meta_prune_cnames(ep, &cnames, 0);
2649 rval = build_replicated_disks_list(ep, dnlp);
2650 if (rval == 0)
2651 goto out;
2652 }
2653
2654 /*
2655 * Until here, we've gotten away with fixed sizes for the
2656 * master block and locator block. The locator names,
2657 * however, are sized (and therefore allocated) dynamically
2658 * according to information in the locator block.
2659 */
2660 lnsize = dbtob(lbp->lb_lnblkcnt);
2661 lnp = Zalloc(lnsize);
2662
2663 if ((rval = read_locator_names(ep, fd, mbp, lbp, lnp, lnsize)) <= 0)
2664 goto out;
2665
2666 rval = build_did_list(ep, fd, mbp, lbp, lbdidp, lnp, &did_listp,
2667 replicated);
2668
2669 /*
2670 * An rval of ENOTSUP means we have a partial diskset. We'll want
2671 * to set the partial variable so we can pass this information
2672 * set_append_wrapper later for placing on the misp list.
2673 */
2674 if (rval == ENOTSUP)
2675 partial = MD_IM_PARTIAL_DISKSET;
2676
2677 if (rval < 0)
2678 goto out;
2679
2680 /*
2681 * Read in the NM record
2682 * If no NM record was found, it still is a valid configuration
2683 * but it also means that we won't find any corresponding DID_NM
2684 * or DID_SHR_NM.
2685 */
2686 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &nm, MDDB_NM, rsp->cname))
2687 < 0)
2688 goto out;
2689 else if (rval == 0)
2690 goto append;
2691
2692 /*
2693 * At this point, we have read in all of the blocks that form
2694 * the nm_rec. We should at least detect the corner case
2695 * mentioned above, in which r_next_recid links to another
2696 * nm_rec. Extended namespace handling is left for Phase 2.
2697 *
2698 * What this should really be is a loop, each iteration of
2699 * which reads in a nm_rec and calls the set_append().
2700 */
2701 /*LINTED*/
2702 nmp = (struct nm_rec *)(nm + sizeof (mddb_rb_t));
2703 if (nmp->r_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2704 extended_namespace = 1;
2705 rval = 0;
2706 goto out;
2707 }
2708
2709 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &shrnm, MDDB_SHR_NM,
2710 rsp->cname)) < 0)
2711 goto out;
2712 else if (rval == 0)
2713 goto append;
2714
2715 /*LINTED*/
2716 snmp = (struct nm_shr_rec *)(shrnm + sizeof (mddb_rb_t));
2717 if (snmp->sr_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2718 extended_namespace = 1;
2719 rval = 0;
2720 goto out;
2721 }
2722
2723 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_nm,
2724 MDDB_DID_NM, rsp->cname)) < 0)
2725 goto out;
2726 else if (rval == 0)
2727 goto append;
2728
2729 /*LINTED*/
2730 did_nmp = (struct devid_min_rec *)(did_nm + sizeof (mddb_rb_t) -
2731 sizeof (int));
2732 if (did_nmp->min_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2733 extended_namespace = 1;
2734 rval = 0;
2735 goto out;
2736 }
2737
2738 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_shrnm,
2739 MDDB_DID_SHR_NM, rsp->cname)) < 0)
2740 goto out;
2741 else if (rval == 0)
2742 goto append;
2743
2744 /*LINTED*/
2745 did_shrnmp = (struct devid_shr_rec *)(did_shrnm + sizeof (mddb_rb_t) -
2746 sizeof (int));
2747 if (did_shrnmp->did_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2748 extended_namespace = 1;
2749 rval = 0;
2750 goto out;
2751 }
2752
2753 /*
2754 * We need to check if all of the disks listed in the namespace
2755 * are actually available. If they aren't we'll return with
2756 * an ENOTSUP error which indicates a partial diskset.
2757 */
2758 rval = check_nm_disks(did_nmp, did_shrnmp);
2759
2760 /*
2761 * An rval of ENOTSUP means we have a partial diskset. We'll want
2762 * to set the partial variable so we can pass this information
2763 * to set_append_wrapper later for placing on the misp list.
2764 */
2765 if (rval == ENOTSUP)
2766 partial = MD_IM_PARTIAL_DISKSET;
2767
2768 if (rval < 0)
2769 goto out;
2770
2771 append:
2772 /* Finally, we've got what we need to process this replica. */
2773 misp = set_append(mispp, did_listp, mbp, lbp,
2774 /*LINTED*/
2775 (mddb_rb_t *)nm, (mddb_rb_t *)shrnm, &pnm, (mddb_rb_t *)did_nm,
2776 /*LINTED*/
2777 (mddb_rb_t *)did_shrnm, (imp_flags | partial | replicated), ep);
2778
2779 if (!(imp_flags & META_IMP_PASS1)) {
2780 *set_count += 1;
2781 rval = report_set_info(misp, mbp, lbp,
2782 /*LINTED*/
2783 (mddb_rb_t *)nm, &pnm, rsp, fd, imp_flags, *set_count,
2784 overlap, overlap_disks, ep);
2785 if (rval < 0)
2786 goto out;
2787 }
2788
2789 /* Return the fact that we found at least one set */
2790 rval = 1;
2791
2792 out:
2793 if (fd >= 0)
2794 (void) close(fd);
2795 if (did_listp != NULL)
2796 free_did_list(did_listp);
2797 if (lnp != NULL)
2798 Free(lnp);
2799 if (nm != NULL)
2800 Free(nm);
2801 if (did_nm != NULL)
2802 Free(did_nm);
2803 if (did_shrnm != NULL)
2804 Free(did_shrnm);
2805 if (pnm != NULL)
2806 free_pnm_rec_list(&pnm);
2807
2808 /*
2809 * If we are at the end of the list, we must free up
2810 * the replicated list too
2811 */
2812 if (dp->next == NULL)
2813 free_replicated_disks_list();
2814
2815 if (extended_namespace)
2816 return (mddserror(ep, MDE_DS_EXTENDEDNM, MD_SET_BAD,
2817 mynode(), NULL, NULL));
2818
2819 return (rval);
2820 }
2821
2822 /*
2823 * Return the minor name associated with a given disk slice
2824 */
2825 static char *
meta_getminor_name(char * devname,md_error_t * ep)2826 meta_getminor_name(
2827 char *devname,
2828 md_error_t *ep
2829 )
2830 {
2831 int fd = -1;
2832 char *minor_name = NULL;
2833 char *ret_minor_name = NULL;
2834
2835 if (devname == NULL)
2836 return (NULL);
2837
2838 if ((fd = open(devname, O_RDONLY|O_NDELAY, 0)) < 0) {
2839 (void) mdsyserror(ep, errno, devname);
2840 return (NULL);
2841 }
2842
2843 if (devid_get_minor_name(fd, &minor_name) == 0) {
2844 ret_minor_name = Strdup(minor_name);
2845 devid_str_free(minor_name);
2846 }
2847
2848 (void) close(fd);
2849 return (ret_minor_name);
2850 }
2851
2852 /*
2853 * meta_update_mb_did
2854 *
2855 * Update or create the master block with the new set number.
2856 * If a non-null devid pointer is given, the devid in the
2857 * master block will also be changed.
2858 *
2859 * This routine is called during the import of a diskset
2860 * (meta_imp_update_mb) and during the take of a diskset that has
2861 * some unresolved replicated drives (meta_unrslv_replicated_mb).
2862 *
2863 * Returns : nothing (void)
2864 */
2865 static void
meta_update_mb_did(mdsetname_t * sp,mddrivename_t * dnp,void * new_devid,int new_devid_len,void * old_devid,int replica_present,int offset,md_error_t * ep)2866 meta_update_mb_did(
2867 mdsetname_t *sp,
2868 mddrivename_t *dnp, /* raw name of drive with mb */
2869 void *new_devid, /* devid to be stored in mb */
2870 int new_devid_len,
2871 void *old_devid, /* old devid stored in mb */
2872 int replica_present, /* does replica follow mb? */
2873 int offset,
2874 md_error_t *ep
2875 )
2876 {
2877 int fd;
2878 struct mddb_mb *mbp;
2879 uint_t sliceno;
2880 mdname_t *rsp;
2881
2882 /* determine the replica slice */
2883 if (meta_replicaslice(dnp, &sliceno, ep) != 0) {
2884 return;
2885 }
2886
2887 /*
2888 * if the replica slice size is zero,
2889 * don't bother opening
2890 */
2891 if (dnp->vtoc.parts[sliceno].size == 0) {
2892 return;
2893 }
2894
2895 if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL) {
2896 return;
2897 }
2898
2899 if ((fd = open(rsp->rname, O_RDWR | O_NDELAY)) < 0) {
2900 return;
2901 }
2902
2903 if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0)
2904 return;
2905
2906 mbp = Zalloc(DEV_BSIZE);
2907 if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE) {
2908 Free(mbp);
2909 return;
2910 }
2911
2912 /* If no replica on disk, check for dummy mb */
2913 if (replica_present == NULL) {
2914 /*
2915 * Check to see if there is a dummy there. If not
2916 * create one. This would happen if the set was
2917 * created before the master block dummy code was
2918 * implemented.
2919 */
2920 if ((mbp->mb_magic != MDDB_MAGIC_DU) ||
2921 (mbp->mb_revision != MDDB_REV_MB)) {
2922 meta_mkdummymaster(sp, fd, offset);
2923 Free(mbp);
2924 return;
2925 }
2926 }
2927
2928 mbp->mb_setno = sp->setno;
2929 if (meta_gettimeofday(&mbp->mb_timestamp) == -1) {
2930 Free(mbp);
2931 return;
2932 }
2933
2934 /*
2935 * If a old_devid is non-NULL then we're are dealing with a
2936 * replicated diskset and the devid needs to be updated.
2937 */
2938 if (old_devid) {
2939 if (mbp->mb_devid_magic == MDDB_MAGIC_DE) {
2940 if (mbp->mb_devid_len)
2941 (void) memset(mbp->mb_devid, 0,
2942 mbp->mb_devid_len);
2943 (void) memcpy(mbp->mb_devid,
2944 (char *)new_devid, new_devid_len);
2945 mbp->mb_devid_len = new_devid_len;
2946 }
2947 }
2948
2949 crcgen((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum,
2950 (uint_t)DEV_BSIZE, (crc_skip_t *)NULL);
2951
2952 /*
2953 * Now write out the changes to disk.
2954 * If an error occurs, just continue on.
2955 * Next take of set will register this drive as
2956 * an unresolved replicated drive and will attempt
2957 * to fix the master block again.
2958 */
2959 if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0) {
2960 Free(mbp);
2961 return;
2962 }
2963 if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE) {
2964 Free(mbp);
2965 return;
2966 }
2967
2968 Free(mbp);
2969 (void) close(fd);
2970 }
2971
2972
2973 /*
2974 * meta_imp_update_mb
2975 *
2976 * Update the master block information during an import.
2977 * Takes an import set descriptor.
2978 *
2979 * Returns : nothing (void)
2980 */
2981 void
meta_imp_update_mb(mdsetname_t * sp,md_im_set_desc_t * misp,md_error_t * ep)2982 meta_imp_update_mb(mdsetname_t *sp, md_im_set_desc_t *misp, md_error_t *ep)
2983 {
2984 md_im_drive_info_t *midp;
2985 mddrivename_t *dnp;
2986 int offset = 16; /* default mb offset is 16 */
2987
2988 for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
2989 /*
2990 * If disk isn't available we can't update, so go to next
2991 */
2992 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
2993 continue;
2994 }
2995
2996 dnp = midp->mid_dnp;
2997
2998 if (midp->mid_replicas) {
2999 md_im_replica_info_t *mirp;
3000
3001 /*
3002 * If we have replicas on this disk we need to make
3003 * sure that we update the master block on every
3004 * replica on the disk.
3005 */
3006 for (mirp = midp->mid_replicas; mirp != NULL;
3007 mirp = mirp->mir_next) {
3008 offset = mirp->mir_offset;
3009 meta_update_mb_did(sp, dnp, midp->mid_devid,
3010 midp->mid_devid_sz, midp->mid_o_devid,
3011 1, offset, ep);
3012 }
3013 } else {
3014 /* No replicas, just update the one dummy mb */
3015 meta_update_mb_did(sp, dnp, midp->mid_devid,
3016 midp->mid_devid_sz, midp->mid_o_devid,
3017 0, offset, ep);
3018 }
3019 if (!mdisok(ep))
3020 return;
3021 }
3022 }
3023
3024 /*
3025 * meta_unrslv_replicated_common
3026 *
3027 * Given a drive_desc and a drivenamelist pointer,
3028 * return the devidp associated with the drive_desc,
3029 * the replicated (new) devidp associated with the drive_desc
3030 * and the specific mddrivename in the drivenamelist that
3031 * matches the replicated (new) devidp.
3032 *
3033 * Typically the drivenamelist pointer would be setup by
3034 * the meta_prune_cnames function.
3035 *
3036 * Calling function must free devidp using devid_free.
3037 *
3038 * Returns 0 - success, found new_devidp and dnp_new.
3039 * Returns 1 - failure, didn't find new devid info
3040 */
3041 static int
meta_unrslv_replicated_common(int myside,md_drive_desc * dd,mddrivenamelist_t * dnlp,ddi_devid_t * devidp,ddi_devid_t * new_devidp,mddrivename_t ** dnp_new,md_error_t * ep)3042 meta_unrslv_replicated_common(
3043 int myside,
3044 md_drive_desc *dd, /* drive list for diskset */
3045 mddrivenamelist_t *dnlp, /* list of drives on current system */
3046 ddi_devid_t *devidp, /* old devid */
3047 ddi_devid_t *new_devidp, /* replicated (new) devid */
3048 mddrivename_t **dnp_new, /* replicated drive name */
3049 md_error_t *ep
3050 )
3051 {
3052 mddrivename_t *dnp; /* drive name of old drive */
3053 mdsidenames_t *sn = NULL;
3054 uint_t rep_slice;
3055 mdname_t *np;
3056 char *minor_name = NULL;
3057 char *devid_str = NULL;
3058 size_t len;
3059 int devid_sz;
3060 mddrivenamelist_t *dp;
3061 ddi_devid_t old_devid; /* devid of old drive */
3062 ddi_devid_t new_devid; /* devid of new replicated drive */
3063 ddi_devid_t dnp_new_devid; /* devid derived from drive */
3064 /* name of replicated drive */
3065
3066 dnp = dd->dd_dnp;
3067
3068 /* Get old devid from drive record */
3069 (void) devid_str_decode(dd->dd_dnp->devid,
3070 &old_devid, NULL);
3071
3072 /* Look up replicated (new) devid */
3073 new_devid = replicated_list_lookup(
3074 devid_sizeof(old_devid), old_devid);
3075
3076 devid_free(old_devid);
3077
3078 if (new_devid == NULL)
3079 return (1);
3080
3081 /*
3082 * Using new_devid, find a drivename entry with a matching devid.
3083 * Use the passed in dnlp since it has the new (replicated) disknames
3084 * in it.
3085 */
3086 for (dp = dnlp; dp != NULL; dp = dp->next) {
3087 (void) devid_str_decode(dp->drivenamep->devid,
3088 &dnp_new_devid, NULL);
3089
3090 if (dnp_new_devid == NULL)
3091 continue;
3092
3093 if (devid_compare(new_devid, dnp_new_devid) == 0) {
3094 devid_free(dnp_new_devid);
3095 break;
3096 }
3097 devid_free(dnp_new_devid);
3098 }
3099
3100 /* If can't find new name for drive - nothing to update */
3101 if (dp == NULL)
3102 return (1);
3103
3104 /*
3105 * Setup returned value to be the drivename structure associated
3106 * with new (replicated) drive.
3107 */
3108 *dnp_new = dp->drivenamep;
3109
3110 /*
3111 * Need to return the new devid including the minor name.
3112 * Find the minor_name here using the sidename or by
3113 * looking in the namespace.
3114 */
3115 for (sn = dnp->side_names; sn != NULL; sn = sn->next) {
3116 if (sn->sideno == myside)
3117 break;
3118 }
3119
3120 /*
3121 * The disk has no side name information
3122 */
3123 if (sn == NULL) {
3124 if ((meta_replicaslice(*dnp_new, &rep_slice, ep) != 0) ||
3125 ((np = metaslicename(*dnp_new, rep_slice, ep))
3126 == NULL)) {
3127 mdclrerror(ep);
3128 return (1);
3129 }
3130
3131 if (np->dev == NODEV64)
3132 return (1);
3133
3134 /*
3135 * minor_name will be NULL if dnp->devid == NULL
3136 * - see metagetvtoc()
3137 */
3138 if (np->minor_name == NULL)
3139 return (1);
3140 else
3141 minor_name = Strdup(np->minor_name);
3142
3143 } else {
3144 minor_name = meta_getdidminorbykey(
3145 MD_LOCAL_SET, sn->sideno + SKEW,
3146 dnp->side_names_key, ep);
3147 if (!mdisok(ep))
3148 return (1);
3149 }
3150 /*
3151 * Now, use the old devid with minor name to lookup
3152 * the replicated (new) devid that will also contain
3153 * a minor name.
3154 */
3155 len = strlen(dnp->devid) + strlen(minor_name) + 2;
3156 devid_str = (char *)Malloc(len);
3157 (void) snprintf(devid_str, len, "%s/%s", dnp->devid,
3158 minor_name);
3159 (void) devid_str_decode(devid_str, devidp, NULL);
3160 Free(devid_str);
3161 devid_sz = devid_sizeof((ddi_devid_t)*devidp);
3162 *new_devidp = replicated_list_lookup(devid_sz, *devidp);
3163 return (0);
3164 }
3165
3166 /*
3167 * meta_unrslv_replicated_mb
3168 *
3169 * Update the master block information during a take.
3170 * Takes an md_drive_desc descriptor.
3171 *
3172 * Returns : nothing (void)
3173 */
3174 void
meta_unrslv_replicated_mb(mdsetname_t * sp,md_drive_desc * dd,mddrivenamelist_t * dnlp,md_error_t * ep)3175 meta_unrslv_replicated_mb(
3176 mdsetname_t *sp,
3177 md_drive_desc *dd, /* drive list for diskset */
3178 mddrivenamelist_t *dnlp, /* list of drives on current system */
3179 md_error_t *ep
3180 )
3181 {
3182 md_drive_desc *d = NULL, *d_save;
3183 mddrivename_t *dnp; /* dnp of old drive */
3184 mddrivename_t *dnp_new; /* dnp of new (replicated) drive */
3185 mddrivename_t *dnp_save; /* saved copy needed to restore */
3186 ddi_devid_t devidp, new_devidp;
3187 int myside;
3188
3189 if ((myside = getmyside(sp, ep)) == MD_SIDEWILD)
3190 return;
3191
3192 for (d = dd; d != NULL; d = d->dd_next) {
3193 dnp = d->dd_dnp;
3194 if (dnp == NULL)
3195 continue;
3196
3197 /* If don't need to update master block - skip it. */
3198 if (!(d->dd_flags & MD_DR_FIX_MB_DID))
3199 continue;
3200
3201 /*
3202 * Get old and replicated (new) devids associated with this
3203 * drive. Also, get the new (replicated) drivename structure.
3204 */
3205 if (meta_unrslv_replicated_common(myside, d, dnlp, &devidp,
3206 &new_devidp, &dnp_new, ep) != 0) {
3207 mdclrerror(ep);
3208 continue;
3209 }
3210
3211 if (new_devidp) {
3212 int offset = 16; /* default mb offset is 16 */
3213 int dbcnt;
3214
3215 if (d->dd_dbcnt) {
3216 /*
3217 * Update each master block on the disk
3218 */
3219 for (dbcnt = d->dd_dbcnt; dbcnt != 0; dbcnt--) {
3220 meta_update_mb_did(sp, dnp_new,
3221 new_devidp,
3222 devid_sizeof(new_devidp), devidp,
3223 1, offset, ep);
3224 offset += d->dd_dbsize;
3225 }
3226 } else {
3227 /* update the one dummy mb */
3228 meta_update_mb_did(sp, dnp_new, new_devidp,
3229 devid_sizeof(new_devidp), devidp,
3230 0, offset, ep);
3231 }
3232 if (!mdisok(ep)) {
3233 devid_free(devidp);
3234 return;
3235 }
3236
3237 /* Set drive record flags to ok */
3238 /* Just update this one drive record. */
3239 d_save = d->dd_next;
3240 dnp_save = d->dd_dnp;
3241 d->dd_next = NULL;
3242 d->dd_dnp = dnp_new;
3243 /* Ignore failure since no bad effect. */
3244 (void) clnt_upd_dr_flags(mynode(), sp, d,
3245 MD_DR_OK, ep);
3246 d->dd_next = d_save;
3247 d->dd_dnp = dnp_save;
3248 }
3249 devid_free(devidp);
3250 }
3251 }
3252
3253 /*
3254 * meta_update_nm_rr_did
3255 *
3256 * Change a devid stored in the diskset namespace and in the local set
3257 * namespace with the new devid.
3258 *
3259 * This routine is called during the import of a diskset
3260 * (meta_imp_update_nn) and during the take of a diskset that has
3261 * some unresolved replicated drives (meta_unrslv_replicated_nm).
3262 *
3263 * Returns : nothing (void)
3264 */
3265 static void
meta_update_nm_rr_did(mdsetname_t * sp,void * old_devid,int old_devid_sz,void * new_devid,int new_devid_sz,int import_flag,md_error_t * ep)3266 meta_update_nm_rr_did(
3267 mdsetname_t *sp,
3268 void *old_devid, /* old devid being replaced */
3269 int old_devid_sz,
3270 void *new_devid, /* devid to be stored in nm */
3271 int new_devid_sz,
3272 int import_flag, /* called during import? */
3273 md_error_t *ep
3274 )
3275 {
3276 struct mddb_config c;
3277
3278 (void) memset(&c, 0, sizeof (c));
3279 c.c_setno = sp->setno;
3280
3281 /* During import to NOT update the local namespace. */
3282 if (import_flag)
3283 c.c_flags = MDDB_C_IMPORT;
3284
3285 c.c_locator.l_devid = (uintptr_t)Malloc(new_devid_sz);
3286 (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
3287 new_devid, new_devid_sz);
3288 c.c_locator.l_devid_sz = new_devid_sz;
3289 c.c_locator.l_devid_flags =
3290 MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
3291 c.c_locator.l_old_devid = (uint64_t)(uintptr_t)Malloc(old_devid_sz);
3292 (void) memcpy((void *)(uintptr_t)c.c_locator.l_old_devid,
3293 old_devid, old_devid_sz);
3294 c.c_locator.l_old_devid_sz = old_devid_sz;
3295 if (metaioctl(MD_IOCUPDATE_NM_RR_DID, &c, &c.c_mde, NULL) != 0) {
3296 (void) mdstealerror(ep, &c.c_mde);
3297 }
3298 Free((void *)(uintptr_t)c.c_locator.l_devid);
3299 Free((void *)(uintptr_t)c.c_locator.l_old_devid);
3300 }
3301
3302 /*
3303 * meta_imp_update_nm
3304 *
3305 * Change a devid stored in the diskset namespace with the new devid.
3306 * This routine is called during the import of a remotely replicated diskset.
3307 *
3308 * Returns : nothing (void)
3309 */
3310 void
meta_imp_update_nm(mdsetname_t * sp,md_im_set_desc_t * misp,md_error_t * ep)3311 meta_imp_update_nm(mdsetname_t *sp, md_im_set_desc_t *misp, md_error_t *ep)
3312 {
3313 md_im_drive_info_t *midp;
3314
3315 for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
3316 /*
3317 * If disk isn't available we can't update, so go to next
3318 */
3319 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
3320 continue;
3321 }
3322
3323 meta_update_nm_rr_did(sp, midp->mid_o_devid,
3324 midp->mid_o_devid_sz, midp->mid_devid,
3325 midp->mid_devid_sz, 1, ep);
3326 if (!mdisok(ep))
3327 return;
3328 }
3329 }
3330
3331 /*
3332 * meta_unrslv_replicated_nm
3333 *
3334 * Change a devid stored in the diskset namespace and in the local set
3335 * namespace with the new devid.
3336 *
3337 * This routine is called during the take of a diskset that has
3338 * some unresolved replicated drives.
3339 *
3340 * Returns : nothing (void)
3341 */
3342 void
meta_unrslv_replicated_nm(mdsetname_t * sp,md_drive_desc * dd,mddrivenamelist_t * dnlp,md_error_t * ep)3343 meta_unrslv_replicated_nm(
3344 mdsetname_t *sp,
3345 md_drive_desc *dd, /* drive list for diskset */
3346 mddrivenamelist_t *dnlp, /* list of drives on current system */
3347 md_error_t *ep
3348 )
3349 {
3350 md_drive_desc *d = NULL;
3351 mddrivename_t *dnp; /* drive name of old drive */
3352 mddrivename_t *dnp_new; /* drive name of new (repl) drive */
3353 ddi_devid_t devidp, new_devidp;
3354 ddi_devid_t old_devid;
3355 char *devid_old_save;
3356 mdsetname_t *local_sp = NULL;
3357 int myside;
3358
3359 if ((myside = getmyside(sp, ep)) == MD_SIDEWILD)
3360 return;
3361
3362 for (d = dd; d != NULL; d = d->dd_next) {
3363 dnp = d->dd_dnp;
3364 if (dnp == NULL)
3365 continue;
3366
3367 /* If don't need to update namespace - skip it. */
3368 if (!(d->dd_flags & MD_DR_FIX_LB_NM_DID))
3369 continue;
3370
3371 /* Get old devid from drive record */
3372 (void) devid_str_decode(d->dd_dnp->devid,
3373 &old_devid, NULL);
3374
3375 /*
3376 * Get old and replicated (new) devids associated with this
3377 * drive. Also, get the new (replicated) drivename structure.
3378 */
3379 if (meta_unrslv_replicated_common(myside, d, dnlp, &devidp,
3380 &new_devidp, &dnp_new, ep) != 0) {
3381 mdclrerror(ep);
3382 continue;
3383 }
3384
3385 if (new_devidp) {
3386 meta_update_nm_rr_did(sp, devidp,
3387 devid_sizeof(devidp), new_devidp,
3388 devid_sizeof(new_devidp), 0, ep);
3389 if (!mdisok(ep)) {
3390 devid_free(devidp);
3391 return;
3392 }
3393 }
3394 devid_free(devidp);
3395
3396 /*
3397 * Using the new devid, fix up the name.
3398 * If meta_upd_ctdnames fails, the next take will re-resolve
3399 * the name from the new devid.
3400 */
3401 local_sp = metasetname(MD_LOCAL_NAME, ep);
3402 devid_old_save = dnp->devid;
3403 dnp->devid = dnp_new->devid;
3404 (void) meta_upd_ctdnames(&local_sp, 0, (myside + SKEW),
3405 dnp, NULL, ep);
3406 mdclrerror(ep);
3407 dnp->devid = devid_old_save;
3408 }
3409 }
3410
3411 static set_t
meta_imp_setno(md_error_t * ep)3412 meta_imp_setno(
3413 md_error_t *ep
3414 )
3415 {
3416 set_t max_sets, setno;
3417 int bool;
3418
3419 if ((max_sets = get_max_sets(ep)) == 0) {
3420 return (MD_SET_BAD);
3421 }
3422
3423 /*
3424 * This code needs to be expanded when we run in SunCluster
3425 * environment SunCluster obtains setno internally
3426 */
3427 for (setno = 1; setno < max_sets; setno++) {
3428 if (clnt_setnumbusy(mynode(), setno,
3429 &bool, ep) == -1) {
3430 setno = MD_SET_BAD;
3431 break;
3432 }
3433 /*
3434 * found one available
3435 */
3436 if (bool == FALSE)
3437 break;
3438 }
3439
3440 if (setno == max_sets) {
3441 setno = MD_SET_BAD;
3442 }
3443
3444 return (setno);
3445 }
3446
3447 int
meta_imp_set(md_im_set_desc_t * misp,char * setname,int force,bool_t dry_run,md_error_t * ep)3448 meta_imp_set(
3449 md_im_set_desc_t *misp,
3450 char *setname,
3451 int force,
3452 bool_t dry_run,
3453 md_error_t *ep
3454 )
3455 {
3456 md_timeval32_t tp;
3457 md_im_drive_info_t *midp;
3458 uint_t rep_slice;
3459 mddrivename_t *dnp;
3460 struct mddb_config c;
3461 mdname_t *np;
3462 md_im_replica_info_t *mirp;
3463 set_t setno;
3464 mdcinfo_t *cinfo;
3465 mdsetname_t *sp;
3466 mddrivenamelist_t *dnlp = NULL;
3467 mddrivenamelist_t **dnlpp = &dnlp;
3468 char *minor_name = NULL;
3469 int stale_flag = 0;
3470 md_set_desc *sd;
3471 int partial_replicated_flag = 0;
3472 md_error_t xep = mdnullerror;
3473 md_setkey_t *cl_sk;
3474
3475 (void) memset(&c, 0, sizeof (c));
3476 (void) strlcpy(c.c_setname, setname, sizeof (c.c_setname));
3477 c.c_sideno = 0;
3478 c.c_flags = MDDB_C_IMPORT;
3479
3480 /*
3481 * Check to see if the setname that the set is being imported into,
3482 * already exists.
3483 */
3484 if (getsetbyname(c.c_setname, ep) != NULL) {
3485 return (mddserror(ep, MDE_DS_SETNAMEBUSY, MD_SET_BAD,
3486 mynode(), NULL, c.c_setname));
3487 }
3488
3489 /*
3490 * Find the next available set number
3491 */
3492 if ((setno = meta_imp_setno(ep)) == MD_SET_BAD) {
3493 return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
3494 mynode(), NULL, c.c_setname));
3495 }
3496
3497 c.c_setno = setno;
3498 if (meta_gettimeofday(&tp) == -1) {
3499 return (mdsyserror(ep, errno, NULL));
3500 }
3501 c.c_timestamp = tp;
3502
3503 /* Check to see if replica quorum requirement is fulfilled */
3504 if (meta_replica_quorum(misp) == -1) {
3505 if (!force) {
3506 return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD,
3507 mynode(), NULL, c.c_setname));
3508 } else {
3509 stale_flag = MD_IMP_STALE_SET;
3510 /*
3511 * If we have a stale diskset, the kernel will
3512 * delete the replicas on the unavailable disks.
3513 * To be consistent, we'll zero out the mirp on those
3514 * disks here.
3515 */
3516 for (midp = misp->mis_drives; midp != NULL;
3517 midp = midp->mid_next) {
3518 if (midp->mid_available ==
3519 MD_IM_DISK_NOT_AVAILABLE) {
3520 midp->mid_replicas = NULL;
3521 }
3522 }
3523 }
3524 }
3525
3526 for (midp = misp->mis_drives; midp != NULL;
3527 midp = midp->mid_next) {
3528
3529 if ((misp->mis_flags & MD_IM_SET_REPLICATED) &&
3530 (partial_replicated_flag == 0) &&
3531 (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE))
3532 partial_replicated_flag = MD_SR_UNRSLV_REPLICATED;
3533
3534 /*
3535 * We pass the list of the drives in the
3536 * set with replicas on them down to the kernel.
3537 */
3538 dnp = midp->mid_dnp;
3539 mirp = midp->mid_replicas;
3540 if (!mirp) {
3541 /*
3542 * No replicas on this disk, go to next disk.
3543 */
3544 continue;
3545 }
3546
3547 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
3548 /*
3549 * The disk isn't there. We'll need to get the
3550 * disk information from the midp list instead
3551 * of going and looking for it. This means it
3552 * will be information relative to the old
3553 * system.
3554 */
3555 minor_name = Strdup(midp->mid_minor_name);
3556 (void) strncpy(c.c_locator.l_driver,
3557 midp->mid_driver_name,
3558 sizeof (c.c_locator.l_driver));
3559 (void) strcpy(c.c_locator.l_devname, midp->mid_devname);
3560 c.c_locator.l_mnum = midp->mid_mnum;
3561
3562 } else {
3563 if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
3564 ((np = metaslicename(dnp, rep_slice, ep))
3565 == NULL)) {
3566 mdclrerror(ep);
3567 continue;
3568 }
3569 (void) strcpy(c.c_locator.l_devname, np->bname);
3570 c.c_locator.l_dev = meta_cmpldev(np->dev);
3571 c.c_locator.l_mnum = meta_getminor(np->dev);
3572 minor_name = meta_getminor_name(np->bname, ep);
3573 if ((cinfo = metagetcinfo(np, ep)) == NULL) {
3574 mdclrerror(ep);
3575 continue;
3576 }
3577
3578 if (cinfo->dname) {
3579 (void) strncpy(c.c_locator.l_driver,
3580 cinfo->dname,
3581 sizeof (c.c_locator.l_driver));
3582 }
3583 }
3584
3585 c.c_locator.l_devid = (uintptr_t)Malloc(midp->mid_devid_sz);
3586 (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
3587 midp->mid_devid, midp->mid_devid_sz);
3588 c.c_locator.l_devid_sz = midp->mid_devid_sz;
3589 c.c_locator.l_devid_flags =
3590 MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
3591 if (midp->mid_o_devid) {
3592 c.c_locator.l_old_devid =
3593 (uint64_t)(uintptr_t)Malloc(midp->mid_o_devid_sz);
3594 (void) memcpy((void *)(uintptr_t)
3595 c.c_locator.l_old_devid,
3596 midp->mid_o_devid, midp->mid_o_devid_sz);
3597 c.c_locator.l_old_devid_sz = midp->mid_o_devid_sz;
3598 }
3599 if (minor_name) {
3600 (void) strncpy(c.c_locator.l_minor_name, minor_name,
3601 sizeof (c.c_locator.l_minor_name));
3602 }
3603
3604 do {
3605 c.c_locator.l_flags = 0;
3606 c.c_locator.l_blkno = mirp->mir_offset;
3607 if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
3608 Free((void *)(uintptr_t)c.c_locator.l_devid);
3609 if (c.c_locator.l_old_devid)
3610 Free((void *)(uintptr_t)
3611 c.c_locator.l_old_devid);
3612 return (mdstealerror(ep, &c.c_mde));
3613 }
3614 mirp = mirp->mir_next;
3615 } while (mirp != NULL);
3616 }
3617
3618 /*
3619 * If the dry run option was specified, flag success
3620 * and exit out
3621 */
3622 if (dry_run == 1) {
3623 md_eprintf("%s\n", dgettext(TEXT_DOMAIN,
3624 "import should be successful"));
3625 Free((void *)(uintptr_t)c.c_locator.l_devid);
3626 if (c.c_locator.l_old_devid)
3627 Free((void *)(uintptr_t)c.c_locator.l_old_devid);
3628 return (0);
3629 }
3630
3631 /*
3632 * Now the kernel should have all the information
3633 * regarding the import diskset replica.
3634 * Tell the kernel to load them up and import the set
3635 */
3636 (void) memset(&c, 0, sizeof (c));
3637 c.c_flags = stale_flag;
3638 c.c_setno = setno;
3639 if (metaioctl(MD_IOCIMP_LOAD, &c, &c.c_mde, NULL) != 0) {
3640 Free((void *)(uintptr_t)c.c_locator.l_devid);
3641 if (c.c_locator.l_old_devid)
3642 Free((void *)(uintptr_t)c.c_locator.l_old_devid);
3643 return (mdstealerror(ep, &c.c_mde));
3644 }
3645
3646 (void) meta_smf_enable(META_SMF_DISKSET, NULL);
3647
3648 /*
3649 * Create a set name for the set.
3650 */
3651 sp = Zalloc(sizeof (*sp));
3652 sp->setname = Strdup(setname);
3653 sp->lockfd = MD_NO_LOCK;
3654 sp->setno = setno;
3655 sd = Zalloc(sizeof (*sd));
3656 (void) strcpy(sd->sd_nodes[0], mynode());
3657 sd->sd_ctime = tp;
3658 sd->sd_genid = 0;
3659
3660 if (misp->mis_flags & MD_IM_SET_REPLICATED) {
3661 /* Update the diskset namespace */
3662 meta_imp_update_nm(sp, misp, ep);
3663
3664 /* Release the diskset - even if update_nm failed */
3665 (void) memset(&c, 0, sizeof (c));
3666 c.c_setno = setno;
3667 /* Don't need device id information from this ioctl */
3668 c.c_locator.l_devid = (uint64_t)0;
3669 c.c_locator.l_devid_flags = 0;
3670 if (metaioctl(MD_RELEASE_SET, &c, &c.c_mde, NULL) != 0) {
3671 if (mdisok(ep))
3672 (void) mdstealerror(ep, &c.c_mde);
3673 Free(sd);
3674 Free(sp);
3675 return (-1);
3676 }
3677
3678 /* If update_nm failed, then fail the import. */
3679 if (!mdisok(ep)) {
3680 Free(sd);
3681 Free(sp);
3682 return (-1);
3683 }
3684 }
3685
3686 /*
3687 * We'll need to update information in the master block due
3688 * to the set number changing and if the case of a replicated
3689 * diskset, the device id changing. May also need to create a
3690 * dummy master block if it's not there.
3691 */
3692 meta_imp_update_mb(sp, misp, ep);
3693 if (!mdisok(ep)) {
3694 Free(sd);
3695 Free(sp);
3696 return (-1);
3697 }
3698
3699 /*
3700 * Create set record for diskset, but record is left in
3701 * MD_SR_ADD state until after drives are added to set.
3702 */
3703 if (clnt_lock_set(mynode(), sp, ep)) {
3704 Free(sd);
3705 Free(sp);
3706 return (-1);
3707 }
3708
3709 if (clnt_createset(mynode(), sp, sd->sd_nodes,
3710 sd->sd_ctime, sd->sd_genid, ep)) {
3711 cl_sk = cl_get_setkey(sp->setno, sp->setname);
3712 (void) clnt_unlock_set(mynode(), cl_sk, &xep);
3713 Free(sd);
3714 Free(sp);
3715 return (-1);
3716 }
3717
3718 Free(sd);
3719
3720 /*
3721 * Create drive records for the disks in the set.
3722 */
3723 for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
3724 dnp = midp->mid_dnp;
3725 if (midp->mid_available & MD_IM_DISK_NOT_AVAILABLE) {
3726 /*
3727 * If the disk isn't available, the dnp->devid is
3728 * no good. It is either blank for the case where
3729 * there is no disk with that devname, or it
3730 * contains the devid for the real disk in the system
3731 * with that name. The problem is, if the disk is
3732 * unavailable, then the devid should be the devid
3733 * of the missing disk. So we're faking a dnp for
3734 * the import. This is needed for creating drive
3735 * records.
3736 */
3737 dnp = Zalloc(sizeof (mddrivename_t));
3738 dnp->side_names_key = midp->mid_dnp->side_names_key;
3739 dnp->type = midp->mid_dnp->type;
3740 dnp->cname = Strdup(midp->mid_dnp->cname);
3741 dnp->rname = Strdup(midp->mid_dnp->rname);
3742 dnp->devid = devid_str_encode(midp->mid_devid,
3743 NULL);
3744 midp->mid_dnp = dnp;
3745 }
3746 dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
3747 }
3748
3749 if (meta_imp_set_adddrives(sp, dnlp, misp, ep)) {
3750 Free(sp);
3751 return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
3752 mynode(), NULL, c.c_setname));
3753 }
3754
3755 /* If drives were added without error, set set_record to OK */
3756 if (clnt_upd_sr_flags(mynode(), sp,
3757 (partial_replicated_flag | MD_SR_OK | MD_SR_MB_DEVID), ep)) {
3758 Free(sp);
3759 return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
3760 mynode(), NULL, c.c_setname));
3761 }
3762
3763 Free(sp);
3764
3765 cl_sk = cl_get_setkey(sp->setno, sp->setname);
3766 if (clnt_unlock_set(mynode(), cl_sk, ep)) {
3767 return (-1);
3768 }
3769 cl_set_setkey(NULL);
3770
3771 Free((void *)(uintptr_t)c.c_locator.l_devid);
3772 if (c.c_locator.l_old_devid)
3773 Free((void *)(uintptr_t)c.c_locator.l_old_devid);
3774 return (0);
3775 }
3776