xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_import.c (revision a6c8042a1adf72d8443c3e91071943bd00e0923c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <assert.h>
29 #include <ctype.h>
30 #include <libdevinfo.h>
31 #include <mdiox.h>
32 #include <meta.h>
33 #include "meta_repartition.h"
34 #include "meta_set_prv.h"
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <sys/lvm/md_mddb.h>
39 #include <sys/lvm/md_names.h>
40 #include <sys/lvm/md_crc.h>
41 #include <sys/lvm/md_convert.h>
42 
43 typedef struct did_list {
44 	void		*rdid;	/* real did if replicated set */
45 	void		*did;	/* did stored in lb */
46 	char		*devname;
47 	dev_t		dev;
48 	uint_t		did_index;
49 	char		*minor_name;
50 	char		*driver_name;
51 	int		available;
52 	struct did_list	*next;
53 } did_list_t;
54 
55 typedef struct replicated_disk {
56 	void			*old_devid;
57 	void 			*new_devid;
58 	struct replicated_disk	*next;
59 } replicated_disk_t;
60 
61 /*
62  * The current implementation limits the max device id length to 256 bytes.
63  * Should the max device id length be increased, this definition would have to
64  * be bumped up accordingly
65  */
66 #define	MAX_DEVID_LEN		256
67 
68 /*
69  * We store a global list of all the replicated disks in the system. In
70  * order to prevent us from performing a linear search on this list, we
71  * store the disks in a two dimensional sparse array. The disks are bucketed
72  * based on the length of their device ids.
73  */
74 static replicated_disk_t *replicated_disk_list[MAX_DEVID_LEN + 1] = {NULL};
75 
76 /*
77  * The list of replicated disks is built just once and this flag is set
78  * once it's done
79  */
80 int replicated_disk_list_built_pass1 = 0;
81 int replicated_disk_list_built_pass2 = 0;
82 int *replicated_disk_list_built;
83 
84 static void free_did_list(did_list_t *did_listp);
85 
86 /*
87  * Map logical blk to physical
88  *
89  * This is based on the routine of the same name in the md kernel module (see
90  * file md_mddb.c), with the following caveats:
91  *
92  * - The kernel routine works on in core master blocks, or mddb_mb_ic_t; this
93  * routine works instead on the mddb_mb_t read directly from the disk
94  */
95 daddr_t
getphysblk(mddb_block_t blk,mddb_mb_t * mbp)96 getphysblk(
97 	mddb_block_t	blk,
98 	mddb_mb_t	*mbp
99 )
100 {
101 	/*
102 	 * Sanity check: is the block within range?  If so, we then assume
103 	 * that the block range map in the master block is valid and
104 	 * consistent with the block count.  Unfortunately, there is no
105 	 * reliable way to validate this assumption.
106 	 */
107 	if (blk >= mbp->mb_blkcnt || blk >= mbp->mb_blkmap.m_consecutive)
108 		return ((daddr_t)-1);
109 
110 	return (mbp->mb_blkmap.m_firstblk + blk);
111 }
112 
113 
114 
115 /*
116  * drive_append()
117  *
118  * Append to tail of linked list of md_im_drive_info_t.
119  *
120  * Will allocate space for new node and copy args into new space.
121  *
122  * Returns pointer to new node.
123  */
124 static md_im_drive_info_t *
drive_append(md_im_drive_info_t ** midpp,mddrivename_t * dnp,did_list_t * nonrep_did_listp,minor_t mnum,md_timeval32_t timestamp,md_im_replica_info_t * mirp)125 drive_append(
126 	md_im_drive_info_t	**midpp,
127 	mddrivename_t		*dnp,
128 	did_list_t		*nonrep_did_listp,
129 	minor_t			mnum,
130 	md_timeval32_t		timestamp,
131 	md_im_replica_info_t	*mirp
132 )
133 {
134 	md_im_drive_info_t	*midp;
135 	int			o_devid_sz;
136 	int			devid_sz;
137 
138 	for (; (*midpp != NULL); midpp = &((*midpp)->mid_next))
139 		;
140 
141 	midp = *midpp = Zalloc(sizeof (md_im_drive_info_t));
142 
143 	midp->mid_dnp = dnp;
144 
145 	/*
146 	 * If rdid is not NULL then we know we are dealing with
147 	 * replicated diskset case. 'devid_sz' will always be the
148 	 * size of a valid devid which can be 'did' or 'rdid'
149 	 */
150 
151 	if (nonrep_did_listp->rdid) {
152 		devid_sz = devid_sizeof(nonrep_did_listp->rdid);
153 		midp->mid_devid = (void *)Malloc(devid_sz);
154 		(void) memcpy(midp->mid_devid, nonrep_did_listp->rdid,
155 		    devid_sz);
156 		/*
157 		 * Also need to store the 'other' devid
158 		 */
159 		o_devid_sz = devid_sizeof((ddi_devid_t)(nonrep_did_listp->did));
160 		midp->mid_o_devid = (void *)Malloc(o_devid_sz);
161 		(void) memcpy(midp->mid_o_devid, nonrep_did_listp->did,
162 		    o_devid_sz);
163 		midp->mid_o_devid_sz = o_devid_sz;
164 	} else {
165 		devid_sz = devid_sizeof(nonrep_did_listp->did);
166 		midp->mid_devid = (void *)Malloc(devid_sz);
167 		/*
168 		 * In the case of regular diskset, midp->mid_o_devid
169 		 * will be a NULL pointer
170 		 */
171 		(void) memcpy(midp->mid_devid, nonrep_did_listp->did, devid_sz);
172 	}
173 
174 	midp->mid_devid_sz = devid_sz;
175 	midp->mid_setcreatetimestamp = timestamp;
176 	midp->mid_available = nonrep_did_listp->available;
177 	if (nonrep_did_listp->minor_name) {
178 		(void) strlcpy(midp->mid_minor_name,
179 		    nonrep_did_listp->minor_name, MDDB_MINOR_NAME_MAX);
180 	}
181 	midp->mid_mnum = mnum;
182 	if (nonrep_did_listp->driver_name)
183 		midp->mid_driver_name = Strdup(nonrep_did_listp->driver_name);
184 	midp->mid_replicas = mirp;
185 	if (nonrep_did_listp->devname)
186 		midp->mid_devname = Strdup(nonrep_did_listp->devname);
187 	return (midp);
188 }
189 
190 
191 
192 /*
193  * drive_append_wrapper()
194  *
195  * Constant time append wrapper; the append function will always walk the list,
196  * this will take a tail argument and use the append function on just the tail
197  * node, doing the appropriate old-tail-next-pointer bookkeeping.
198  */
199 static md_im_drive_info_t **
drive_append_wrapper(md_im_drive_info_t ** tailpp,mddrivename_t * dnp,did_list_t * nonrep_did_listp,minor_t mnum,md_timeval32_t timestamp,md_im_replica_info_t * mirp)200 drive_append_wrapper(
201 	md_im_drive_info_t	**tailpp,
202 	mddrivename_t		*dnp,
203 	did_list_t		*nonrep_did_listp,
204 	minor_t			mnum,
205 	md_timeval32_t		timestamp,
206 	md_im_replica_info_t	*mirp
207 )
208 {
209 	(void) drive_append(tailpp, dnp, nonrep_did_listp, mnum, timestamp,
210 	    mirp);
211 
212 	if ((*tailpp)->mid_next == NULL)
213 		return (tailpp);
214 
215 	return (&((*tailpp)->mid_next));
216 }
217 
218 
219 
220 /*
221  * replica_append()
222  *
223  * Append to tail of linked list of md_im_replica_info_t.
224  *
225  * Will allocate space for new node and copy args into new space.
226  *
227  * Returns pointer to new node.
228  */
229 static md_im_replica_info_t *
replica_append(md_im_replica_info_t ** mirpp,int flags,daddr32_t offset,daddr32_t length,md_timeval32_t timestamp)230 replica_append(
231 	md_im_replica_info_t	**mirpp,
232 	int			flags,
233 	daddr32_t		offset,
234 	daddr32_t		length,
235 	md_timeval32_t		timestamp
236 )
237 {
238 	md_im_replica_info_t	*mirp;
239 
240 	for (; (*mirpp != NULL); mirpp = &((*mirpp)->mir_next))
241 		;
242 
243 	mirp = *mirpp = Zalloc(sizeof (md_im_replica_info_t));
244 
245 	mirp->mir_flags = flags;
246 	mirp->mir_offset = offset;
247 	mirp->mir_length = length;
248 	mirp->mir_timestamp = timestamp;
249 
250 	return (mirp);
251 
252 }
253 
254 
255 
256 /*
257  * replica_append_wrapper()
258  *
259  * Constant time append wrapper; the append function will always walk the list,
260  * this will take a tail argument and use the append function on just the tail
261  * node, doing the appropriate old-tail-next-pointer bookkeeping.
262  */
263 static md_im_replica_info_t **
replica_append_wrapper(md_im_replica_info_t ** tailpp,int flags,daddr32_t offset,daddr32_t length,md_timeval32_t timestamp)264 replica_append_wrapper(
265 	md_im_replica_info_t	**tailpp,
266 	int			flags,
267 	daddr32_t		offset,
268 	daddr32_t		length,
269 	md_timeval32_t		timestamp
270 )
271 {
272 	(void) replica_append(tailpp, flags, offset, length, timestamp);
273 
274 	if ((*tailpp)->mir_next == NULL)
275 		return (tailpp);
276 
277 	return (&(*tailpp)->mir_next);
278 }
279 
280 /*
281  * map_replica_disk()
282  *
283  * Searches the device id list for a specific
284  * disk based on the locator block device id array index.
285  *
286  * Returns a pointer to the did_list node if a match was
287  * found or NULL otherwise.
288  */
289 static did_list_t *
map_replica_disk(did_list_t * did_listp,int did_index)290 map_replica_disk(
291 	did_list_t	*did_listp,
292 	int		did_index
293 )
294 {
295 	did_list_t	*tailp = did_listp;
296 
297 	while (tailp != NULL) {
298 		if (tailp->did_index == did_index)
299 			return (tailp);
300 		tailp = tailp->next;
301 	}
302 
303 	/* not found, return failure */
304 	return (NULL);
305 }
306 
307 /*
308  * replicated_list_lookup()
309  *
310  * looks up a replicated disk entry in the global replicated disk list
311  * based upon the length of that disk's device id. returns the new device id
312  * for the disk.
313  * If you store the returned devid you must create a local copy.
314  */
315 void *
replicated_list_lookup(uint_t devid_len,void * old_devid)316 replicated_list_lookup(
317 	uint_t	devid_len,
318 	void	*old_devid
319 )
320 {
321 	replicated_disk_t *head = NULL;
322 
323 	assert(devid_len <= MAX_DEVID_LEN);
324 	head = replicated_disk_list[devid_len];
325 
326 	if (head == NULL)
327 		return (NULL);
328 
329 	do {
330 		if (devid_compare((ddi_devid_t)old_devid,
331 			(ddi_devid_t)head->old_devid) == 0)
332 			return (head->new_devid);
333 		head = head->next;
334 	} while (head != NULL);
335 
336 	return (NULL);
337 }
338 
339 /*
340  * replicated_list_insert()
341  *
342  * inserts a replicated disk entry into the global replicated disk list
343  */
344 static void
replicated_list_insert(size_t old_devid_len,void * old_devid,void * new_devid)345 replicated_list_insert(
346 	size_t	old_devid_len,
347 	void	*old_devid,
348 	void	*new_devid
349 )
350 {
351 	replicated_disk_t	*repl_disk, **first_entry;
352 	void			*repl_old_devid = NULL;
353 
354 	assert(old_devid_len <= MAX_DEVID_LEN);
355 
356 	repl_disk = Zalloc(sizeof (replicated_disk_t));
357 	repl_old_devid = Zalloc(old_devid_len);
358 	(void) memcpy(repl_old_devid, (void *)old_devid, old_devid_len);
359 
360 	repl_disk->old_devid = repl_old_devid;
361 	repl_disk->new_devid = new_devid;
362 
363 	first_entry = &replicated_disk_list[old_devid_len];
364 
365 	if (*first_entry == NULL) {
366 		*first_entry = repl_disk;
367 		return;
368 	}
369 
370 	repl_disk->next = *first_entry;
371 	replicated_disk_list[old_devid_len] = repl_disk;
372 }
373 
374 /*
375  * get_replica_disks()
376  *
377  * Will step through the locator records in the supplied locator block, and add
378  * each one with an active replica to a supplied list of md_im_drive_info_t, and
379  * add the appropriate replicas to the md_im_replica_info_t contained therein.
380  */
381 static void
get_replica_disks(md_im_set_desc_t * misp,did_list_t * did_listp,mddb_mb_t * mb,mddb_lb_t * lbp,md_error_t * ep)382 get_replica_disks(
383 	md_im_set_desc_t	*misp,
384 	did_list_t		*did_listp,
385 	mddb_mb_t		*mb,
386 	mddb_lb_t		*lbp,
387 	md_error_t		*ep
388 )
389 {
390 	mddrivename_t		*dnp;
391 	int			indx, on_list;
392 	mdsetname_t		*sp = metasetname(MD_LOCAL_NAME, ep);
393 	int			flags;
394 	did_list_t		*replica_disk;
395 	daddr32_t		offset;
396 	daddr32_t		length;
397 	md_timeval32_t		timestamp;
398 	md_im_replica_info_t	**mirpp = NULL;
399 	md_im_drive_info_t	**midpp = &misp->mis_drives;
400 	md_im_drive_info_t	*midp;
401 
402 	for (indx = 0; indx < lbp->lb_loccnt; indx++) {
403 
404 		on_list = 0;
405 		if ((lbp->lb_locators[indx].l_flags == 0) ||
406 		    (lbp->lb_locators[indx].l_flags & MDDB_F_DELETED))
407 			continue;
408 
409 		/*
410 		 * search the device id list for a
411 		 * specific ctds based on the locator
412 		 * block device id array index.
413 		 */
414 		replica_disk = map_replica_disk(did_listp, indx);
415 
416 		assert(replica_disk != NULL);
417 
418 
419 		/*
420 		 * metadrivename() can fail for a slice name
421 		 * if there is not an existing mddrivename_t.
422 		 * So we use metadiskname() to strip the slice
423 		 * number.
424 		 */
425 		dnp = metadrivename(&sp, metadiskname(replica_disk->devname),
426 		    ep);
427 
428 		for (midp = misp->mis_drives; midp != NULL;
429 			midp = midp->mid_next) {
430 			if (dnp == midp->mid_dnp) {
431 				/*
432 				 * You could get a dnp match, but if 1 disk
433 				 * is unavailable and the other isn't, they
434 				 * will have the same dnp due
435 				 * to the name being the same, but in fact
436 				 * are different disks.
437 				 */
438 				if (midp->mid_available ==
439 				    replica_disk->available) {
440 					on_list = 1;
441 					mirpp = &midp->mid_replicas;
442 					break;
443 				}
444 			}
445 		}
446 
447 		/*
448 		 * New on the list so add it
449 		 */
450 		if (!on_list) {
451 			mddb_mb_t	*mbp;
452 			uint_t		sliceno;
453 			mdname_t	*rsp;
454 			int		fd = -1;
455 
456 			mbp = Malloc(DEV_BSIZE);
457 
458 			/*
459 			 * If the disk isn't available, we don't
460 			 * want to try to read from it.
461 			 */
462 			if (replica_disk->available == MD_IM_DISK_AVAILABLE) {
463 				/* determine the replica slice */
464 				if (meta_replicaslice(dnp, &sliceno,
465 				    ep) != 0) {
466 					Free(mbp);
467 					continue;
468 				}
469 
470 				/*
471 				 * if the replica slice size is zero,
472 				 * don't bother opening
473 				 */
474 				if (dnp->vtoc.parts[sliceno].size == 0) {
475 					Free(mbp);
476 					continue;
477 				}
478 
479 				if ((rsp = metaslicename(dnp, sliceno,
480 				    ep)) == NULL) {
481 					Free(mbp);
482 					continue;
483 				}
484 
485 				if ((fd = open(rsp->rname,
486 				    O_RDONLY| O_NDELAY)) < 0) {
487 					Free(mbp);
488 					continue;
489 				}
490 
491 				/*
492 				 * a drive may not have a master block
493 				 */
494 				if (read_master_block(ep, fd, mbp,
495 				    DEV_BSIZE) <= 0) {
496 					mdclrerror(ep);
497 					Free(mbp);
498 					(void) close(fd);
499 					continue;
500 				}
501 
502 				(void) close(fd);
503 			}
504 			midpp = drive_append_wrapper(midpp, dnp,
505 			    replica_disk,
506 			    meta_getminor(replica_disk->dev),
507 			    mbp->mb_setcreatetime, NULL);
508 			mirpp = &((*midpp)->mid_replicas);
509 			Free(mbp);
510 		}
511 
512 		/*
513 		 * For either of these assertions to fail, it implies
514 		 * a NULL return from metadrivename() above.  Since
515 		 * the args came from a presumed valid locator block,
516 		 * that's Bad.
517 		 */
518 		assert(midpp != NULL);
519 		assert(mirpp != NULL);
520 
521 		/*
522 		 * Extract the parameters describing this replica.
523 		 *
524 		 * The magic "1" in the length calculation accounts
525 		 * for the length of the master block, in addition to
526 		 * the block count it describes.  (The master block
527 		 * will always take up one block on the disk, and
528 		 * there will always only be one master block per
529 		 * replica, even though much of the code is structured
530 		 * to handle noncontiguous replicas.)
531 		 */
532 		flags = lbp->lb_locators[indx].l_flags;
533 		offset = lbp->lb_locators[indx].l_blkno;
534 		length = mb->mb_blkcnt + 1;
535 		timestamp = mb->mb_setcreatetime;
536 
537 		mirpp = replica_append_wrapper(mirpp, flags,
538 			offset, length, timestamp);
539 
540 		/*
541 		 * If we're here it means -
542 		 *
543 		 * we've added the disk to the list of
544 		 *    disks.
545 		 */
546 
547 		/*
548 		 * We need to bump up the number of active
549 		 * replica count for each such replica that is
550 		 * active so that it can be used later for replica
551 		 * quorum check.
552 		 */
553 		if (flags & MDDB_F_ACTIVE) {
554 			misp->mis_active_replicas++;
555 		}
556 	}
557 }
558 
559 
560 /*
561  * append_pnm_rec()
562  *
563  * Append pnm_rec_t entry to list of physical devices in the diskset.  Entry
564  * contains a mapping of n_key in NM namespace(or min_key in DID_NM namespace)
565  * to name of the physical device.  This list will be used to ensure that the
566  * correct names of the physical devices are printed in the metastat output--the
567  * NM namespace might have stale information about where the physical devices
568  * were previously located when the diskset was last active.
569  */
570 static void
append_pnm_rec(pnm_rec_t ** pnm,mdkey_t min_key,char * n_name)571 append_pnm_rec(
572 	pnm_rec_t	**pnm,
573 	mdkey_t		min_key,
574 	char		*n_name
575 )
576 {
577 	pnm_rec_t 	*tmp_pnm;
578 	char 		*p;
579 	int 		len;
580 
581 	if ((p = strrchr(n_name, '/')) != NULL)
582 		p++;
583 
584 	/*
585 	 * Allocates pnm_rec_t record for the physical
586 	 * device.
587 	 */
588 	len = strlen(p) + 1; /* Length of name plus Null term */
589 	tmp_pnm  = Malloc(sizeof (pnm_rec_t) + len);
590 	(void) strncpy(tmp_pnm->n_name, p, len);
591 	tmp_pnm->n_key = min_key;
592 
593 	/*
594 	 * Adds new element to head of pnm_rec_t list.
595 	 */
596 	if (*pnm == NULL) {
597 		tmp_pnm->next = NULL;
598 		*pnm = tmp_pnm;
599 	} else {
600 		tmp_pnm->next = *pnm;
601 		*pnm = tmp_pnm;
602 	}
603 }
604 
605 /*
606  * free_pnm_rec_list()
607  *
608  * Freeing all pnm_rec_t entries on the list of physical devices in the
609  * diskset.
610  */
611 void
free_pnm_rec_list(pnm_rec_t ** pnm)612 free_pnm_rec_list(pnm_rec_t **pnm)
613 {
614 	pnm_rec_t	*tmp_pnm, *rm_pnm;
615 
616 	for (tmp_pnm = *pnm; tmp_pnm != NULL; ) {
617 		rm_pnm = tmp_pnm;
618 		tmp_pnm = tmp_pnm->next;
619 		Free(rm_pnm);
620 	}
621 
622 	*pnm = NULL;
623 }
624 
625 
626 /*
627  * get_disks_from_didnamespace()
628  * This function was origionally called: get_nonreplica_disks()
629  *
630  * Extracts the disks without replicas from the locator name space and adds them
631  * to the supplied list of md_im_drive_info_t.
632  * If the print verbose option was given then this function will also
633  * correct the nm namespace so that the n_name is the right ctd name
634  */
635 static void
get_disks_from_didnamespace(md_im_set_desc_t * misp,pnm_rec_t ** pnm,mddb_rb_t * nm,mddb_rb_t * shrnm,mddb_rb_t * did_nm,mddb_rb_t * did_shrnm,uint_t imp_flags,int replicated,md_error_t * ep)636 get_disks_from_didnamespace(
637 	md_im_set_desc_t	*misp,
638 	pnm_rec_t		**pnm,
639 	mddb_rb_t		*nm,
640 	mddb_rb_t		*shrnm,
641 	mddb_rb_t		*did_nm,
642 	mddb_rb_t		*did_shrnm,
643 	uint_t 			imp_flags,
644 	int			replicated,
645 	md_error_t		*ep
646 )
647 {
648 	char			*search_path = "/dev";
649 	devid_nmlist_t		*nmlist;
650 	md_im_drive_info_t	*midp, **midpp = &misp->mis_drives;
651 	mddrivename_t		*dnp;
652 	mdsetname_t		*sp = metasetname(MD_LOCAL_NAME, ep);
653 	mddb_rb_t		*rbp_did = did_nm;
654 	mddb_rb_t		*rbp_did_shr = did_shrnm;
655 	mddb_rb_t		*rbp_nm = nm;
656 	mddb_rb_t		*rbp_shr_nm = shrnm;
657 	int			on_list = 0;
658 	struct devid_min_rec	*did_rec;
659 	struct devid_shr_rec	*did_shr_rec;
660 	struct nm_rec		*namesp_rec;
661 	struct nm_shr_rec	*namesp_shr_rec;
662 	struct did_shr_name	*did;
663 	struct did_min_name	*min;
664 	void			*r_did;	/* NULL if not a replicated diskset */
665 	void			*valid_did;
666 	int			avail = 0;
667 	struct nm_name		*nmp;
668 	struct nm_shared_name	*snmp;
669 	mdkey_t			drv_key, key, dev_key;
670 	minor_t			mnum = 0;
671 	did_list_t		*nonrep_did_listp;
672 	size_t			used_size, offset;
673 
674 	/*
675 	 * We got a pointer to an mddb record, which we expect to contain a
676 	 * name record; extract the pointer thereto.
677 	 */
678 	/* LINTED */
679 	did_rec = (struct devid_min_rec *)((caddr_t)(&rbp_did->rb_data));
680 	/* LINTED */
681 	did_shr_rec = (struct devid_shr_rec *)
682 	    ((caddr_t)(&rbp_did_shr->rb_data));
683 	/* LINTED */
684 	namesp_rec = (struct nm_rec *)((caddr_t)(&rbp_nm->rb_data));
685 	/* LINTED */
686 	namesp_shr_rec = (struct nm_shr_rec *)((caddr_t)(&rbp_shr_nm->rb_data));
687 
688 	/*
689 	 * Skip the nm_rec_hdr and iterate on the array of struct minor_name
690 	 * at the end of the devid_min_rec
691 	 */
692 	for (min = &did_rec->minor_name[0]; min->min_devid_key != 0;
693 	    /* LINTED */
694 	    min = (struct did_min_name *)((char *)min + DID_NAMSIZ(min))) {
695 
696 		on_list = 0;
697 		r_did = NULL;
698 		nonrep_did_listp = Zalloc(sizeof (struct did_list));
699 
700 		/*
701 		 * For a given DID_NM key, locate the corresponding device
702 		 * id from DID_NM_SHR
703 		 */
704 		for (did = &did_shr_rec->device_id[0]; did->did_key != 0;
705 		    /* LINTED */
706 		    did = (struct did_shr_name *)
707 		    ((char *)did + DID_SHR_NAMSIZ(did))) {
708 			/*
709 			 * We got a match, this is the device id we're
710 			 * looking for
711 			 */
712 			if (min->min_devid_key == did->did_key)
713 				break;
714 		}
715 
716 		if (did->did_key == 0) {
717 			/* we didn't find a match */
718 			assert(did->did_key != 0);
719 			md_exit(NULL, 1);
720 		}
721 
722 		/*
723 		 * If replicated diskset
724 		 */
725 		if (replicated) {
726 			size_t		new_devid_len, old_devid_len;
727 			char		*temp;
728 			/*
729 			 * In this case, did->did_devid will
730 			 * be invalid so lookup the real one
731 			 */
732 			temp = replicated_list_lookup(did->did_size,
733 			    did->did_devid);
734 			if (temp == NULL) {
735 				/* we have a partial replicated set, fake it */
736 				new_devid_len = did->did_size;
737 				r_did = Zalloc(new_devid_len);
738 				(void) memcpy(r_did, did->did_devid,
739 				    new_devid_len);
740 			} else {
741 				new_devid_len = devid_sizeof((ddi_devid_t)temp);
742 				r_did = Zalloc(new_devid_len);
743 				(void) memcpy(r_did, temp, new_devid_len);
744 			}
745 			valid_did = r_did;
746 			nonrep_did_listp->rdid = Zalloc(new_devid_len);
747 			(void) memcpy(nonrep_did_listp->rdid, r_did,
748 			    new_devid_len);
749 			old_devid_len =
750 			    devid_sizeof((ddi_devid_t)did->did_devid);
751 			nonrep_did_listp->did = Zalloc(old_devid_len);
752 			(void) memcpy((void *)nonrep_did_listp->did,
753 			    (void *)did->did_devid, old_devid_len);
754 		} else {
755 			size_t		new_devid_len;
756 
757 			valid_did = did->did_devid;
758 			new_devid_len =
759 			    devid_sizeof((ddi_devid_t)did->did_devid);
760 			nonrep_did_listp->did = Zalloc(new_devid_len);
761 			(void) memcpy((void *)nonrep_did_listp->did,
762 			    (void *)did->did_devid, new_devid_len);
763 		}
764 
765 		/*
766 		 * Get a ctds mapping for that device id.
767 		 * Since disk is being imported into this system,
768 		 * just use the first ctds in list.
769 		 */
770 		if (meta_deviceid_to_nmlist(search_path,
771 		    (ddi_devid_t)valid_did,
772 		    &min->min_name[0], &nmlist) == 0) {
773 			/*
774 			 * We know the disk is available. Use the
775 			 * device information in nmlist.
776 			 */
777 			assert(nmlist[0].devname != NULL);
778 			nonrep_did_listp->devname = Strdup(nmlist[0].devname);
779 			nonrep_did_listp->available = MD_IM_DISK_AVAILABLE;
780 			avail = 0;
781 			mnum = meta_getminor(nmlist[0].dev);
782 			devid_free_nmlist(nmlist);
783 		} else {
784 			/*
785 			 * The disk is not available. That means we need to
786 			 * use the (old) device information stored in the
787 			 * namespace.
788 			 */
789 			/* search in nm space for a match */
790 			offset = sizeof (struct nm_rec) -
791 			    sizeof (struct nm_name);
792 			used_size =  namesp_rec->r_rec_hdr.r_used_size - offset;
793 			for (nmp = &namesp_rec->r_name[0]; nmp->n_key != 0;
794 			    /* LINTED */
795 			    nmp = (struct nm_name *)((char *)nmp +
796 			    NAMSIZ(nmp))) {
797 				if (nmp->n_key == min->min_key)
798 					break;
799 			    used_size -=  NAMSIZ(nmp);
800 			    if ((int)used_size <= 0) {
801 				md_exit(NULL, 1);
802 			    }
803 			}
804 
805 			if (nmp->n_key == 0) {
806 				assert(nmp->n_key != 0);
807 				md_exit(NULL, 1);
808 			}
809 			dev_key = nmp->n_dir_key;
810 			snmp = &namesp_shr_rec->sr_name[0];
811 			key = snmp->sn_key;
812 			/*
813 			 * Use the namespace n_dir_key to look in the
814 			 * shared namespace. When we find the matching
815 			 * key, that is the devname and minor number we
816 			 * want.
817 			 */
818 			offset = sizeof (struct nm_shr_rec) -
819 			    sizeof (struct nm_shared_name);
820 			used_size = namesp_shr_rec->sr_rec_hdr.r_used_size -
821 			    offset;
822 			while (key != 0) {
823 				if (dev_key == key) {
824 					/*
825 					 * This complicated looking series
826 					 * of code creates a devname of the
827 					 * form  <sn_name>/<n_name> which
828 					 * will look like /dev/dsk/c1t4d0s0.
829 					 */
830 					nonrep_did_listp->devname =
831 					    Zalloc(strlen(nmp->n_name) +
832 					    strlen(snmp->sn_name) + 2);
833 					(void) strlcpy(
834 					    nonrep_did_listp->devname,
835 					    snmp->sn_name,
836 					    strlen(snmp->sn_name));
837 					(void) strlcat(
838 					    nonrep_did_listp->devname, "/",
839 					    strlen(nmp->n_name) +
840 					    strlen(snmp->sn_name) + 2);
841 					(void) strlcat(
842 					    nonrep_did_listp->devname,
843 					    nmp->n_name,
844 					    strlen(nmp->n_name) +
845 					    strlen(snmp->sn_name) + 2);
846 					mnum = nmp->n_minor;
847 					break;
848 				}
849 				/* LINTED */
850 				snmp = (struct nm_shared_name *)((char *)snmp +
851 				    SHR_NAMSIZ(snmp));
852 				key = snmp->sn_key;
853 				used_size -= SHR_NAMSIZ(snmp);
854 				if ((int)used_size <= 0) {
855 					md_exit(NULL, 1);
856 				}
857 			}
858 			if (key == 0) {
859 				nonrep_did_listp->devname = NULL;
860 				mnum = 0;
861 			}
862 
863 			nonrep_did_listp->available = MD_IM_DISK_NOT_AVAILABLE;
864 			nonrep_did_listp->minor_name = Strdup(min->min_name);
865 			avail = 1;
866 			drv_key = nmp->n_drv_key;
867 			snmp = &namesp_shr_rec->sr_name[0];
868 			key = snmp->sn_key;
869 			/*
870 			 * Use the namespace n_drv_key to look in the
871 			 * shared namespace. When we find the matching
872 			 * key, that is the driver name for the disk.
873 			 */
874 			offset = sizeof (struct nm_shr_rec) -
875 			    sizeof (struct nm_shared_name);
876 			used_size = namesp_shr_rec->sr_rec_hdr.r_used_size -
877 			    offset;
878 			while (key != 0) {
879 				if (drv_key == key) {
880 					nonrep_did_listp->driver_name =
881 					    Strdup(snmp->sn_name);
882 					break;
883 				}
884 				/* LINTED */
885 				snmp = (struct nm_shared_name *)((char *)snmp +
886 				    SHR_NAMSIZ(snmp));
887 				key = snmp->sn_key;
888 				used_size -= SHR_NAMSIZ(snmp);
889 				if ((int)used_size <= 0) {
890 					md_exit(NULL, 1);
891 				}
892 			}
893 			if (key == 0)
894 				nonrep_did_listp->driver_name = NULL;
895 		}
896 		dnp = metadrivename(&sp,
897 		    metadiskname(nonrep_did_listp->devname), ep);
898 		/*
899 		 * Add drive to pnm_rec_t list of physical devices for
900 		 * metastat output.
901 		 */
902 		if (imp_flags & META_IMP_VERBOSE) {
903 			append_pnm_rec(pnm, min->min_key,
904 			    nonrep_did_listp->devname);
905 		}
906 
907 		assert(dnp != NULL);
908 		/* Is it already on the list? */
909 		for (midp = misp->mis_drives; midp != NULL;
910 		    midp = midp->mid_next) {
911 			if (midp->mid_dnp == dnp) {
912 				if (midp->mid_available ==
913 				    nonrep_did_listp->available) {
914 					on_list = 1;
915 					break;
916 				}
917 			}
918 		}
919 
920 		if (!on_list) {
921 			mddb_mb_t	*mbp;
922 			uint_t		sliceno;
923 			mdname_t	*rsp;
924 			int		fd = -1;
925 
926 			mbp = Malloc(DEV_BSIZE);
927 
928 			if (!avail) {
929 				/* determine the replica slice */
930 				if (meta_replicaslice(dnp, &sliceno,
931 				    ep) != 0) {
932 					Free(mbp);
933 					free_did_list(nonrep_did_listp);
934 					continue;
935 				}
936 
937 				/*
938 				 * if the replica slice size is zero,
939 				 * don't bother opening
940 				 */
941 				if (dnp->vtoc.parts[sliceno].size
942 				    == 0) {
943 					Free(mbp);
944 					free_did_list(nonrep_did_listp);
945 					continue;
946 				}
947 
948 				if ((rsp = metaslicename(dnp, sliceno,
949 				    ep)) == NULL) {
950 					Free(mbp);
951 					free_did_list(nonrep_did_listp);
952 					continue;
953 				}
954 
955 				if ((fd = open(rsp->rname,
956 				    O_RDONLY| O_NDELAY)) < 0) {
957 					Free(mbp);
958 					free_did_list(nonrep_did_listp);
959 					continue;
960 				}
961 
962 				/*
963 				 * a drive may not have a master block
964 				 */
965 				if (read_master_block(ep, fd, mbp,
966 				    DEV_BSIZE) <= 0) {
967 					mdclrerror(ep);
968 					Free(mbp);
969 					free_did_list(nonrep_did_listp);
970 					(void) close(fd);
971 					continue;
972 				}
973 
974 				(void) close(fd);
975 			}
976 			/*
977 			 * If it is replicated diskset,
978 			 * r_did will be non-NULL.
979 			 * Passing the devname as NULL because field
980 			 * is not currently used for a non-replica disk.
981 			 */
982 			midpp = drive_append_wrapper(midpp,
983 			    dnp, nonrep_did_listp,
984 			    mnum, mbp->mb_setcreatetime, NULL);
985 			Free(mbp);
986 			free_did_list(nonrep_did_listp);
987 		}
988 	free_did_list(nonrep_did_listp);
989 	}
990 }
991 
992 /*
993  * set_append()
994  *
995  * Append to tail of linked list of md_im_set_desc_t.
996  *
997  * Will allocate space for new node AND populate it by extracting disks with
998  * and without replicas from the locator blocks and locator namespace.
999  *
1000  * Returns pointer to new node.
1001  */
1002 static md_im_set_desc_t *
set_append(md_im_set_desc_t ** mispp,did_list_t * did_listp,mddb_mb_t * mb,mddb_lb_t * lbp,mddb_rb_t * nm,mddb_rb_t * shrnm,pnm_rec_t ** pnm,mddb_rb_t * did_nm,mddb_rb_t * did_shrnm,uint_t imp_flags,md_error_t * ep)1003 set_append(
1004 	md_im_set_desc_t	**mispp,
1005 	did_list_t		*did_listp,
1006 	mddb_mb_t		*mb,
1007 	mddb_lb_t		*lbp,
1008 	mddb_rb_t		*nm,
1009 	mddb_rb_t		*shrnm,
1010 	pnm_rec_t		**pnm,
1011 	mddb_rb_t		*did_nm,
1012 	mddb_rb_t		*did_shrnm,
1013 	uint_t 			imp_flags,
1014 	md_error_t		*ep
1015 )
1016 {
1017 
1018 	md_im_set_desc_t	*misp;
1019 	set_t			setno = mb->mb_setno;
1020 	int			partial = imp_flags & MD_IM_PARTIAL_DISKSET;
1021 	int			replicated = imp_flags & MD_IM_SET_REPLICATED;
1022 
1023 	/* run to end of list */
1024 	for (; (*mispp != NULL); mispp = &((*mispp)->mis_next))
1025 		;
1026 
1027 	/* allocate new list element */
1028 	misp = *mispp = Zalloc(sizeof (md_im_set_desc_t));
1029 
1030 	if (replicated)
1031 		misp->mis_flags = MD_IM_SET_REPLICATED;
1032 
1033 	misp->mis_oldsetno = setno;
1034 	misp->mis_partial = partial;
1035 
1036 	/* Get the disks with and without replicas */
1037 	get_replica_disks(misp, did_listp, mb, lbp, ep);
1038 
1039 	if (nm != NULL && did_nm != NULL && did_shrnm != NULL) {
1040 		get_disks_from_didnamespace(misp, pnm, nm, shrnm, did_nm,
1041 		    did_shrnm, imp_flags, replicated, ep);
1042 	}
1043 
1044 	/*
1045 	 * An error in this struct could come from either of
1046 	 * the above routines;
1047 	 * in both cases, we want to pass it back on up.
1048 	 */
1049 
1050 	return (misp);
1051 }
1052 
1053 
1054 /*
1055  * add_disk_names()
1056  *
1057  * Iterator to walk the minor node tree of the device snapshot, adding only the
1058  * first non-block instance of each non-cdrom minor node to a list of disks.
1059  */
1060 static int
add_disk_names(di_node_t node,di_minor_t minor,void * args)1061 add_disk_names(di_node_t node, di_minor_t minor, void *args)
1062 {
1063 	char			*search_path = "/dev";
1064 	ddi_devid_t		devid = di_devid(node);
1065 	devid_nmlist_t		*nm;
1066 	char			*min = di_minor_name(minor);
1067 	md_im_names_t		*cnames = (md_im_names_t *)args;
1068 	static di_node_t	save_node = NULL;
1069 
1070 	/*
1071 	 * skip CD devices
1072 	 * If a device does not have a device id, we can't
1073 	 * do anything with it so just exclude it from our
1074 	 * list.
1075 	 *
1076 	 * This would also encompass CD devices and floppy
1077 	 * devices that don't have a device id.
1078 	 */
1079 	if (devid == NULL) {
1080 		return (DI_WALK_CONTINUE);
1081 	}
1082 
1083 	/* char disk devices (as opposed to block) */
1084 	if (di_minor_spectype(minor) == S_IFCHR) {
1085 
1086 		/* only first occurrence (slice 0) of each instance */
1087 		if (save_node == NULL || node != save_node) {
1088 			save_node = node;
1089 			if (meta_deviceid_to_nmlist(search_path, devid,
1090 			    min, &nm) == 0) {
1091 				int	index = cnames->min_count++;
1092 
1093 				assert(nm->devname != NULL);
1094 				cnames->min_names =
1095 					Realloc(cnames->min_names,
1096 						cnames->min_count *
1097 						sizeof (char *));
1098 
1099 				assert(cnames->min_names != NULL);
1100 				cnames->min_names[index] =
1101 					metadiskname(nm->devname);
1102 				devid_free_nmlist(nm);
1103 			}
1104 		}
1105 	}
1106 	return (DI_WALK_CONTINUE);
1107 }
1108 
1109 
1110 
1111 /*
1112  * meta_list_disks()
1113  *
1114  * Snapshots the device tree and extracts disk devices from the snapshot.
1115  */
1116 int
meta_list_disks(md_error_t * ep,md_im_names_t * cnames)1117 meta_list_disks(md_error_t *ep, md_im_names_t *cnames)
1118 {
1119 	di_node_t root_node;
1120 
1121 	assert(cnames != NULL);
1122 	cnames->min_count = 0;
1123 	cnames->min_names = NULL;
1124 
1125 	if ((root_node = di_init("/", DINFOCPYALL|DINFOFORCE))
1126 	    == DI_NODE_NIL) {
1127 		return (mdsyserror(ep, errno, NULL));
1128 	}
1129 
1130 	(void) di_walk_minor(root_node, DDI_NT_BLOCK, 0, cnames,
1131 	    add_disk_names);
1132 
1133 	di_fini(root_node);
1134 	return (0);
1135 }
1136 
1137 /*
1138  * meta_imp_drvused
1139  *
1140  * Checks if given drive is mounted, swapped, part of disk configuration
1141  * or in use by SVM.  ep also has error code set up if drive is in use.
1142  *
1143  * Returns 1 if drive is in use.
1144  * Returns 0 if drive is not in use.
1145  */
1146 int
meta_imp_drvused(mdsetname_t * sp,mddrivename_t * dnp,md_error_t * ep)1147 meta_imp_drvused(
1148 	mdsetname_t		*sp,
1149 	mddrivename_t		*dnp,
1150 	md_error_t		*ep
1151 )
1152 {
1153 	md_error_t		status = mdnullerror;
1154 	md_error_t		*db_ep = &status;
1155 
1156 	/*
1157 	 * We pass in db_ep to meta_setup_db_locations
1158 	 * and never ever use the error contained therein
1159 	 * because all we're interested in is a check to
1160 	 * see whether any local metadbs are present.
1161 	 */
1162 	if ((meta_check_drivemounted(sp, dnp, ep) != 0) ||
1163 	    (meta_check_driveswapped(sp, dnp, ep) != 0) ||
1164 	    (((meta_setup_db_locations(db_ep) == 0) &&
1165 	    ((meta_check_drive_inuse(sp, dnp, 1, ep) != 0) ||
1166 	    (meta_check_driveinset(sp, dnp, ep) != 0))))) {
1167 		return (1);
1168 	} else {
1169 		return (0);
1170 	}
1171 }
1172 
1173 /*
1174  * meta_prune_cnames()
1175  *
1176  * Removes in-use disks from the list prior to further processing.
1177  *
1178  * Return value depends on err_on_prune flag: if set, and one or more disks
1179  * are pruned, the return list will be the pruned disks.  If not set, or if no
1180  * disks are pruned, the return list will be the unpruned disks.
1181  */
1182 mddrivenamelist_t *
meta_prune_cnames(md_error_t * ep,md_im_names_t * cnames,int err_on_prune)1183 meta_prune_cnames(
1184 	md_error_t *ep,
1185 	md_im_names_t *cnames,
1186 	int err_on_prune
1187 )
1188 {
1189 	int			d;
1190 	int			fcount = 0;
1191 	mddrivenamelist_t	*dnlp = NULL;
1192 	mddrivenamelist_t	**dnlpp = &dnlp;
1193 	mddrivenamelist_t	*fdnlp = NULL;
1194 	mddrivenamelist_t	**fdnlpp = &fdnlp;
1195 	mdsetname_t		*sp = metasetname(MD_LOCAL_NAME, ep);
1196 
1197 	for (d = 0; d < cnames->min_count; ++d) {
1198 		mddrivename_t	*dnp;
1199 
1200 		dnp = metadrivename(&sp, cnames->min_names[d], ep);
1201 		if (dnp == NULL) {
1202 			/*
1203 			 * Assuming we're interested in knowing about
1204 			 * whatever error occurred, but not in stopping.
1205 			 */
1206 			mde_perror(ep, cnames->min_names[d]);
1207 			mdclrerror(ep);
1208 
1209 			continue;
1210 		}
1211 
1212 		/*
1213 		 * Check if the drive is inuse.
1214 		 */
1215 		if (meta_imp_drvused(sp, dnp, ep)) {
1216 			fdnlpp = meta_drivenamelist_append_wrapper(fdnlpp, dnp);
1217 			fcount++;
1218 			mdclrerror(ep);
1219 		} else {
1220 			dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
1221 		}
1222 	}
1223 
1224 	if (fcount) {
1225 		if (err_on_prune) {
1226 			(void) mddserror(ep, MDE_DS_DRIVEINUSE, 0,
1227 			    NULL, fdnlp->drivenamep->cname, NULL);
1228 			metafreedrivenamelist(dnlp);
1229 			return (fdnlp);
1230 		}
1231 		metafreedrivenamelist(fdnlp);
1232 	}
1233 
1234 	return (dnlp);
1235 }
1236 
1237 /*
1238  * read_master_block()
1239  *
1240  * Returns:
1241  *	< 0 for failure
1242  *	  0 for no valid master block
1243  *	  1 for valid master block
1244  *
1245  * The supplied buffer will be filled in for EITHER 0 or 1.
1246  */
1247 int
read_master_block(md_error_t * ep,int fd,void * bp,int bsize)1248 read_master_block(
1249 	md_error_t	*ep,
1250 	int		fd,
1251 	void		*bp,
1252 	int		bsize
1253 )
1254 {
1255 	mddb_mb_t	*mbp = bp;
1256 	int		rval = 1;
1257 
1258 	assert(bp != NULL);
1259 
1260 	if (lseek(fd, (off_t)dbtob(16), SEEK_SET) < 0)
1261 		return (mdsyserror(ep, errno, NULL));
1262 
1263 	if (read(fd, bp, bsize) != bsize)
1264 		return (mdsyserror(ep, errno, NULL));
1265 
1266 	/*
1267 	 * The master block magic number can either be MDDB_MAGIC_MB in
1268 	 * the case of a real master block, or, it can be MDDB_MAGIC_DU
1269 	 * in the case of a dummy master block
1270 	 */
1271 	if ((mbp->mb_magic != MDDB_MAGIC_MB) &&
1272 	    (mbp->mb_magic != MDDB_MAGIC_DU)) {
1273 		rval = 0;
1274 		(void) mdmddberror(ep, MDE_DB_MASTER, 0, 0, 0, NULL);
1275 	}
1276 
1277 	if (mbp->mb_revision != MDDB_REV_MB) {
1278 		rval = 0;
1279 	}
1280 
1281 	return (rval);
1282 }
1283 
1284 /*
1285  * read_locator_block()
1286  *
1287  * Returns:
1288  *	< 0 for failure
1289  *	  0 for no valid locator block
1290  *	  1 for valid locator block
1291  */
1292 int
read_locator_block(md_error_t * ep,int fd,mddb_mb_t * mbp,void * bp,int bsize)1293 read_locator_block(
1294 	md_error_t	*ep,
1295 	int		fd,
1296 	mddb_mb_t	*mbp,
1297 	void		*bp,
1298 	int		bsize
1299 )
1300 {
1301 	mddb_lb_t	*lbp = bp;
1302 
1303 	assert(bp != NULL);
1304 
1305 	if (lseek(fd, (off_t)dbtob(mbp->mb_blkmap.m_firstblk), SEEK_SET) < 0)
1306 		return (mdsyserror(ep, errno, NULL));
1307 
1308 	if (read(fd, bp, bsize) != bsize)
1309 		return (mdsyserror(ep, errno, NULL));
1310 
1311 	return ((lbp->lb_magic == MDDB_MAGIC_LB) ? 1 : 0);
1312 }
1313 
1314 int
phys_read(md_error_t * ep,int fd,mddb_mb_t * mbp,daddr_t blk,void * bp,int bcount)1315 phys_read(
1316 	md_error_t	*ep,
1317 	int		fd,
1318 	mddb_mb_t	*mbp,
1319 	daddr_t		blk,
1320 	void		*bp,
1321 	int		bcount
1322 )
1323 {
1324 	daddr_t		pblk;
1325 
1326 	if ((pblk = getphysblk(blk, mbp)) < 0)
1327 		return (mdmddberror(ep, MDE_DB_BLKRANGE, NODEV32,
1328 			MD_LOCAL_SET, blk, NULL));
1329 
1330 	if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0)
1331 		return (mdsyserror(ep, errno, NULL));
1332 
1333 	if (read(fd, bp, bcount) != bcount)
1334 		return (mdsyserror(ep, errno, NULL));
1335 
1336 	return (bcount);
1337 }
1338 
1339 /*
1340  * read_locator_block_did()
1341  *
1342  * Returns:
1343  * 	< 0 for failure
1344  *	  0 for no valid locator name struct
1345  *	  1 for valid locator name struct
1346  */
1347 int
read_locator_block_did(md_error_t * ep,int fd,mddb_mb_t * mbp,mddb_lb_t * lbp,void * bp,int bsize)1348 read_locator_block_did(
1349 	md_error_t	*ep,
1350 	int		fd,
1351 	mddb_mb_t	*mbp,
1352 	mddb_lb_t	*lbp,
1353 	void		*bp,
1354 	int		bsize
1355 )
1356 {
1357 	int		lb_didfirstblk = lbp->lb_didfirstblk;
1358 	mddb_did_blk_t	*lbdidp = bp;
1359 	int		rval;
1360 
1361 	assert(bp != NULL);
1362 
1363 	if ((rval = phys_read(ep, fd, mbp, lb_didfirstblk, bp, bsize)) < 0)
1364 		return (rval);
1365 
1366 	return ((lbdidp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
1367 }
1368 
1369 /*
1370  * read_locator_names()
1371  *
1372  * Returns:
1373  *	< 0 for failure
1374  *	  0 for no valid locator name struct
1375  *	  1 for valid locator name struct
1376  */
1377 int
read_locator_names(md_error_t * ep,int fd,mddb_mb_t * mbp,mddb_lb_t * lbp,void * bp,int bsize)1378 read_locator_names(
1379 	md_error_t	*ep,
1380 	int		fd,
1381 	mddb_mb_t	*mbp,
1382 	mddb_lb_t	*lbp,
1383 	void		*bp,
1384 	int		bsize
1385 )
1386 {
1387 	int		lnfirstblk = lbp->lb_lnfirstblk;
1388 	mddb_ln_t	*lnp = bp;
1389 	int		rval;
1390 
1391 	assert(bp != NULL);
1392 
1393 	if ((rval = phys_read(ep, fd, mbp, lnfirstblk, bp, bsize)) < 0)
1394 		return (rval);
1395 
1396 	return ((lnp->ln_magic == MDDB_MAGIC_LN) ? 1 : 0);
1397 }
1398 
1399 
1400 int
read_database_block(md_error_t * ep,int fd,mddb_mb_t * mbp,int dbblk,void * bp,int bsize)1401 read_database_block(
1402 	md_error_t	*ep,
1403 	int		fd,
1404 	mddb_mb_t	*mbp,
1405 	int		dbblk,
1406 	void		*bp,
1407 	int		bsize
1408 )
1409 {
1410 	mddb_db_t	*dbp = bp;
1411 	int		rval;
1412 
1413 	assert(bp != NULL);
1414 
1415 	if ((rval = phys_read(ep, fd, mbp, dbblk, bp, bsize)) < 0)
1416 		return (rval);
1417 
1418 	return ((dbp->db_magic == MDDB_MAGIC_DB) ? 1 : 0);
1419 }
1420 
1421 int
read_loc_didblks(md_error_t * ep,int fd,mddb_mb_t * mbp,int didblk,void * bp,int bsize)1422 read_loc_didblks(
1423 	md_error_t	*ep,
1424 	int		fd,
1425 	mddb_mb_t	*mbp,
1426 	int		didblk,
1427 	void		*bp,
1428 	int		bsize
1429 )
1430 {
1431 	mddb_did_blk_t	*didbp = bp;
1432 	int		rval;
1433 
1434 	assert(bp != NULL);
1435 
1436 	if ((rval = phys_read(ep, fd, mbp, didblk, bp, bsize)) < 0)
1437 		return (rval);
1438 
1439 	return ((didbp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
1440 }
1441 
1442 
1443 int
read_loc_didinfo(md_error_t * ep,int fd,mddb_mb_t * mbp,int infoblk,void * bp,int bsize)1444 read_loc_didinfo(
1445 	md_error_t	*ep,
1446 	int		fd,
1447 	mddb_mb_t	*mbp,
1448 	int		infoblk,
1449 	void		*bp,
1450 	int		bsize
1451 )
1452 {
1453 	int		rval = 1;
1454 	mddb_did_info_t	*infop = bp;
1455 
1456 	assert(bp != NULL);
1457 
1458 	if ((rval = phys_read(ep, fd, mbp, infoblk, bp, bsize)) < 0)
1459 		return (rval);
1460 
1461 	return ((infop->info_flags & MDDB_DID_EXISTS) ? 1 : 0);
1462 }
1463 
1464 /*
1465  * meta_nm_rec()
1466  *
1467  * Return the DE corresponding to the requested namespace record type.
1468  * Modifies dbp to have a firstentry if one isn't there.
1469  */
1470 static mddb_de_t *
meta_nm_rec(mddb_db_t * dbp,mddb_type_t rectype)1471 meta_nm_rec(mddb_db_t *dbp, mddb_type_t rectype)
1472 {
1473 	mddb_de_t *dep;
1474 	int	desize;
1475 
1476 	if (dbp->db_firstentry != NULL) {
1477 		/* LINTED */
1478 		dep = (mddb_de_t *)((caddr_t)(&dbp->db_firstentry)
1479 				    + sizeof (dbp->db_firstentry));
1480 		dbp->db_firstentry = dep;
1481 		while (dep && dep->de_next) {
1482 			desize = sizeof (*dep) - sizeof (dep->de_blks) +
1483 				sizeof (daddr_t) * dep->de_blkcount;
1484 			/* LINTED */
1485 			dep->de_next = (mddb_de_t *)
1486 				((caddr_t)dep + desize);
1487 			dep = dep->de_next;
1488 		}
1489 	}
1490 
1491 	for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) {
1492 		if (dep->de_type1 == rectype)
1493 			break;
1494 	}
1495 	return (dep);
1496 }
1497 
1498 /*
1499  * read_nm_rec()
1500  *
1501  * Reads the NM, NM_DID or NM_DID_SHR record in the mddb and stores the
1502  * configuration data in the buffer 'nm'
1503  *
1504  * Returns:
1505  *	< 0 for failure
1506  *	  0 for no valid NM/DID_NM/DID_NM_SHR record
1507  *	  1 for valid NM/DID_NM/DID_NM_SHR record
1508  *
1509  */
1510 static int
read_nm_rec(md_error_t * ep,int fd,mddb_mb_t * mbp,mddb_lb_t * lbp,char ** nm,mddb_type_t rectype,char * diskname)1511 read_nm_rec(
1512 	md_error_t 	*ep,
1513 	int 		fd,
1514 	mddb_mb_t	*mbp,
1515 	mddb_lb_t	*lbp,
1516 	char		**nm,
1517 	mddb_type_t	rectype,
1518 	char		*diskname
1519 )
1520 {
1521 	int		cnt, dbblk, rval = 0;
1522 	char		db[DEV_BSIZE];
1523 	mddb_de_t	*dep;
1524 	/*LINTED*/
1525 	mddb_db_t	*dbp = (mddb_db_t *)&db;
1526 	char 		*tmpnm = NULL;
1527 	daddr_t		pblk;
1528 
1529 	for (dbblk = lbp->lb_dbfirstblk;
1530 	    dbblk != 0;
1531 	    dbblk = dbp->db_nextblk) {
1532 
1533 		if ((rval = read_database_block(ep, fd, mbp, dbblk, dbp,
1534 		    sizeof (db))) <= 0)
1535 			return (rval);
1536 
1537 		/*
1538 		 * Locate NM/DID_NM/DID_NM_SHR record. Normally there is
1539 		 * only one record per mddb. There is a rare case when we
1540 		 * can't expand the record. If this is the case then we
1541 		 * will have multiple NM/DID_NM/DID_NM_SHR records linked
1542 		 * with r_next_recid.
1543 		 *
1544 		 * For now assume the normal case and handle the extended
1545 		 * namespace in Phase 2.
1546 		 */
1547 		if ((dep = meta_nm_rec(dbp, rectype)) != NULL)
1548 			break;
1549 	}
1550 
1551 	/* If meta_nm_rec() never succeeded, bail out */
1552 	if (dep == NULL)
1553 		return (0);
1554 
1555 	/* Read in the appropriate record and return configurations */
1556 	tmpnm = (char *)Zalloc(dbtob(dep->de_blkcount));
1557 	*nm = tmpnm;
1558 
1559 	for (cnt = 0; cnt < dep->de_blkcount; cnt++) {
1560 		if ((pblk = getphysblk(dep->de_blks[cnt], mbp)) < 0) {
1561 			rval = mdmddberror(ep, MDE_DB_BLKRANGE,
1562 			    NODEV32, MD_LOCAL_SET,
1563 			    dep->de_blks[cnt], diskname);
1564 			return (rval);
1565 		}
1566 
1567 		if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) {
1568 			rval = mdsyserror(ep, errno, diskname);
1569 			return (rval);
1570 		}
1571 
1572 		if (read(fd, tmpnm, DEV_BSIZE) != DEV_BSIZE) {
1573 			rval = mdsyserror(ep, errno, diskname);
1574 			return (rval);
1575 		}
1576 
1577 		tmpnm += DEV_BSIZE;
1578 	}
1579 	return (1);
1580 }
1581 
1582 /*
1583  * is_replicated
1584  *
1585  * Determines whether a disk has been replicated or not. It checks to see
1586  * if the device id stored in the master block is the same as the device id
1587  * registered for that disk on the current system. If the two device ids are
1588  * different, then we know that the disk has been replicated.
1589  *
1590  * If need_devid is set and the disk is replicated, fill in the new_devid.
1591  * Also, if need_devid is set, this routine allocates memory for the device
1592  * ids; the caller of this routine is responsible for free'ing up the memory.
1593  *
1594  * Returns:
1595  * 	MD_IM_SET_REPLICATED	if it's a replicated disk
1596  * 	0 			if it's not a replicated disk
1597  */
1598 static int
is_replicated(int fd,mddb_mb_t * mbp,int need_devid,void ** new_devid)1599 is_replicated(
1600 	int fd,
1601 	mddb_mb_t *mbp,
1602 	int need_devid,
1603 	void **new_devid
1604 )
1605 {
1606 	ddi_devid_t	current_devid;
1607 	int		retval = 0;
1608 	size_t		new_devid_len;
1609 
1610 	if (mbp->mb_devid_magic != MDDB_MAGIC_DE)
1611 		return (retval);
1612 
1613 	if (devid_get(fd, &current_devid) != 0)
1614 		return (retval);
1615 
1616 	if (devid_compare((ddi_devid_t)mbp->mb_devid, current_devid) != 0)
1617 		retval = MD_IM_SET_REPLICATED;
1618 
1619 	if (retval && need_devid) {
1620 		new_devid_len = devid_sizeof(current_devid);
1621 		*new_devid = Zalloc(new_devid_len);
1622 		(void) memcpy(*new_devid, (void *)current_devid, new_devid_len);
1623 	}
1624 
1625 	devid_free(current_devid);
1626 	return (retval);
1627 }
1628 
1629 /*
1630  * free_replicated_disks_list()
1631  *
1632  * this frees up all the memory allocated by build_replicated_disks_list
1633  */
1634 static void
free_replicated_disks_list()1635 free_replicated_disks_list()
1636 {
1637 	replicated_disk_t 	**repl_disk, *temp;
1638 	int 			index;
1639 
1640 	for (index = 0; index <= MAX_DEVID_LEN; index++) {
1641 		repl_disk = &replicated_disk_list[index];
1642 
1643 		while (*repl_disk != NULL) {
1644 			temp = *repl_disk;
1645 			*repl_disk = (*repl_disk)->next;
1646 
1647 			Free(temp->old_devid);
1648 			Free(temp->new_devid);
1649 			Free(temp);
1650 		}
1651 	}
1652 }
1653 
1654 /*
1655  * build_replicated_disks_list()
1656  *
1657  * Builds a list of disks that have been replicated using either a
1658  * remote replication or a point-in-time replication software. The
1659  * list is stored as a two dimensional sparse array.
1660  *
1661  * Returns
1662  * 	1	on success
1663  * 	0 	on failure
1664  */
1665 int
build_replicated_disks_list(md_error_t * ep,mddrivenamelist_t * dnlp)1666 build_replicated_disks_list(
1667 	md_error_t *ep,
1668 	mddrivenamelist_t *dnlp
1669 )
1670 {
1671 	uint_t			sliceno;
1672 	int			fd = -1;
1673 	mddrivenamelist_t	*dp;
1674 	mdname_t		*rsp;
1675 	mddb_mb_t		*mbp;
1676 
1677 	mbp = Malloc(DEV_BSIZE);
1678 
1679 	for (dp = dnlp; dp != NULL; dp = dp->next) {
1680 		mddrivename_t *dnp;
1681 		void *new_devid;
1682 
1683 		dnp = dp->drivenamep;
1684 		/* determine the replica slice */
1685 		if (meta_replicaslice(dnp, &sliceno, ep) != 0)
1686 			continue;
1687 
1688 		/*
1689 		 * if the replica slice size is zero, don't bother opening
1690 		 */
1691 		if (dnp->vtoc.parts[sliceno].size == 0)
1692 			continue;
1693 
1694 		if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
1695 			continue;
1696 
1697 		if ((fd = open(rsp->rname, O_RDONLY| O_NDELAY)) < 0)
1698 			return (mdsyserror(ep, errno, rsp->rname));
1699 
1700 		/* a drive may not have a master block so we just continue */
1701 		if (read_master_block(ep, fd, mbp, DEV_BSIZE) <= 0) {
1702 			(void) close(fd);
1703 			mdclrerror(ep);
1704 			continue;
1705 		}
1706 
1707 		if (is_replicated(fd, mbp, 1, &new_devid)) {
1708 			replicated_list_insert(mbp->mb_devid_len,
1709 			    mbp->mb_devid, new_devid);
1710 		}
1711 		(void) close(fd);
1712 	}
1713 	*replicated_disk_list_built = 1;
1714 
1715 	Free(mbp);
1716 	return (1);
1717 }
1718 
1719 /*
1720  * free_did_list()
1721  *
1722  * Frees the did_list allocated as part of build_did_list
1723  */
1724 static void
free_did_list(did_list_t * did_listp)1725 free_did_list(
1726 	did_list_t	*did_listp
1727 )
1728 {
1729 	did_list_t	*temp, *head;
1730 
1731 	head = did_listp;
1732 
1733 	while (head != NULL) {
1734 		temp = head;
1735 		head = head->next;
1736 		if (temp->rdid)
1737 			Free(temp->rdid);
1738 		if (temp->did)
1739 			Free(temp->did);
1740 		if (temp->devname)
1741 			Free(temp->devname);
1742 		if (temp->minor_name)
1743 			Free(temp->minor_name);
1744 		if (temp->driver_name)
1745 			Free(temp->driver_name);
1746 		Free(temp);
1747 	}
1748 }
1749 
1750 /*
1751  * meta_free_im_replica_info
1752  *
1753  * Frees the md_im_replica_info list
1754  */
1755 static void
meta_free_im_replica_info(md_im_replica_info_t * mirp)1756 meta_free_im_replica_info(
1757 	md_im_replica_info_t	*mirp
1758 )
1759 {
1760 	md_im_replica_info_t	*r, *temp;
1761 
1762 	r = mirp;
1763 
1764 	while (r != NULL) {
1765 		temp = r;
1766 		r = r->mir_next;
1767 
1768 		Free(temp);
1769 	}
1770 }
1771 
1772 /*
1773  * meta_free_im_drive_info
1774  *
1775  * Frees the md_im_drive_info list
1776  */
1777 static void
meta_free_im_drive_info(md_im_drive_info_t * midp)1778 meta_free_im_drive_info(
1779 	md_im_drive_info_t	*midp
1780 )
1781 {
1782 	md_im_drive_info_t	*d, *temp;
1783 
1784 	d = midp;
1785 
1786 	while (d != NULL) {
1787 		temp = d;
1788 		d = d->mid_next;
1789 
1790 		if (temp->mid_available & MD_IM_DISK_NOT_AVAILABLE)
1791 			/*
1792 			 * dnp is not on the drivenamelist and is a temp
1793 			 * dnp for metaimport if the disk is unavailable.
1794 			 * We need to specifically free it because of this.
1795 			 * If the disk is available, standard drivelist freeing
1796 			 * will kick in so we don't need to do it.
1797 			 */
1798 			metafreedrivename(temp->mid_dnp);
1799 		if (temp->mid_devid)
1800 			Free(temp->mid_devid);
1801 		if (temp->mid_o_devid)
1802 			Free(temp->mid_o_devid);
1803 		if (temp->mid_driver_name)
1804 			Free(temp->mid_driver_name);
1805 		if (temp->mid_devname)
1806 			Free(temp->mid_devname);
1807 		if (temp->mid_replicas) {
1808 			meta_free_im_replica_info(temp->mid_replicas);
1809 			temp->mid_replicas = NULL;
1810 		}
1811 		if (temp->overlap) {
1812 			meta_free_im_drive_info(temp->overlap);
1813 			temp->overlap = NULL;
1814 		}
1815 		Free(temp);
1816 	}
1817 }
1818 
1819 /*
1820  * meta_free_im_set_desc
1821  *
1822  * Frees the md_im_set_desc_t list
1823  */
1824 void
meta_free_im_set_desc(md_im_set_desc_t * misp)1825 meta_free_im_set_desc(
1826 	md_im_set_desc_t	*misp
1827 )
1828 {
1829 	md_im_set_desc_t	*s, *temp;
1830 
1831 	s = misp;
1832 
1833 	while (s != NULL) {
1834 		temp = s;
1835 		s = s->mis_next;
1836 		if (temp->mis_drives) {
1837 			meta_free_im_drive_info(temp->mis_drives);
1838 			temp->mis_drives = NULL;
1839 		}
1840 		Free(temp);
1841 	}
1842 }
1843 
1844 /*
1845  * build_did_list()
1846  *
1847  * Build a list of device ids corresponding to disks in the locator block.
1848  * Memory is allocated here for the nodes in the did_list. The callers of
1849  * this routine must also call free_did_list to free up the memory after
1850  * they're done.
1851  *
1852  * Returns:
1853  *	< 0 		for failure
1854  *	  0 		for no valid locator block device id array
1855  *	  1 		for valid locator block device id array
1856  *	  ENOTSUP	partial diskset, not all disks in a diskset on the
1857  *			system where import is being executed
1858  */
1859 static int
build_did_list(md_error_t * ep,int fd,mddb_mb_t * mb,mddb_lb_t * lbp,mddb_did_blk_t * lbdidp,mddb_ln_t * lnp,did_list_t ** did_listp,int replicated)1860 build_did_list(
1861 	md_error_t	*ep,
1862 	int		fd,
1863 	mddb_mb_t	*mb,
1864 	mddb_lb_t	*lbp,
1865 	mddb_did_blk_t	*lbdidp,
1866 	mddb_ln_t	*lnp,
1867 	did_list_t	**did_listp,
1868 	int		replicated
1869 )
1870 {
1871 	char 		*search_path = "/dev";
1872 	char		*minor_name;
1873 	int		rval, cnt;
1874 	devid_nmlist_t	*nm;
1875 	uint_t		did_info_length = 0;
1876 	uint_t		did_info_firstblk = 0;
1877 	did_list_t	*new, *head = NULL;
1878 	char		*bp = NULL, *temp;
1879 	mddb_did_info_t	*did_info = NULL;
1880 	void		*did = NULL;
1881 	size_t		new_devid_len;
1882 	int		partial = 0;
1883 	int		partial_replicated = 0;
1884 
1885 	for (cnt = 0; cnt < MDDB_NLB; cnt++) {
1886 		partial_replicated = 0;
1887 		did_info = &lbdidp->blk_info[cnt];
1888 
1889 		if (!(did_info->info_flags & MDDB_DID_EXISTS))
1890 			continue;
1891 
1892 		new = Zalloc(sizeof (did_list_t));
1893 		new->did = Zalloc(did_info->info_length);
1894 
1895 		/*
1896 		 * If we can re-use the buffer that has already been
1897 		 * read in then just use it.  Otherwise free
1898 		 * the previous one and alloc a new one
1899 		 */
1900 		if (did_info->info_firstblk != did_info_firstblk) {
1901 
1902 			did_info_length = dbtob(did_info->info_blkcnt);
1903 			did_info_firstblk = did_info->info_firstblk;
1904 
1905 			if (bp)
1906 				Free(bp);
1907 			bp = temp = Zalloc(did_info_length);
1908 
1909 			if ((rval = phys_read(ep, fd, mb, did_info_firstblk,
1910 			    (void *)bp, did_info_length)) < 0)
1911 				return (rval);
1912 		} else {
1913 			temp = bp;
1914 		}
1915 
1916 		temp += did_info->info_offset;
1917 		(void) memcpy(new->did, temp, did_info->info_length);
1918 		new->did_index = cnt;
1919 		minor_name = did_info->info_minor_name;
1920 
1921 		/*
1922 		 * If we are not able to find the ctd mapping corresponding
1923 		 * to a given device id, it probably means the device id in
1924 		 * question is not registered with the system.
1925 		 *
1926 		 * Highly likely that the only time this happens, we've hit
1927 		 * a case where not all the disks that are a part of the
1928 		 * diskset were moved before importing the diskset.
1929 		 *
1930 		 * If set is a replicated diskset, then the device id we get
1931 		 * from 'lb' will be the 'other' did and we need to lookup
1932 		 * the real one before we call this routine.
1933 		 */
1934 		if (replicated) {
1935 		    temp = replicated_list_lookup(did_info->info_length,
1936 			new->did);
1937 		    if (temp == NULL) {
1938 			/* we have a partial replicated set, fake it */
1939 			new_devid_len = devid_sizeof((ddi_devid_t)new->did);
1940 			new->rdid = Zalloc(new_devid_len);
1941 			(void) memcpy(new->rdid, new->did, new_devid_len);
1942 			did = new->rdid;
1943 			partial_replicated = 1;
1944 		    } else {
1945 			new_devid_len = devid_sizeof((ddi_devid_t)temp);
1946 			new->rdid = Zalloc(new_devid_len);
1947 			(void) memcpy(new->rdid, temp, new_devid_len);
1948 			did = new->rdid;
1949 		    }
1950 		} else {
1951 		    did = new->did;
1952 		}
1953 
1954 		if (devid_valid((ddi_devid_t)(did)) == 0) {
1955 			return (-1);
1956 		}
1957 
1958 		if (partial_replicated || meta_deviceid_to_nmlist(search_path,
1959 		    (ddi_devid_t)did, minor_name, &nm) != 0) {
1960 			int	len = 0;
1961 
1962 			/*
1963 			 * Partial diskset case. We'll need to get the
1964 			 * device information from the metadb instead
1965 			 * of the output (nm) of meta_deviceid_to_nmlist.
1966 			 */
1967 			len = strlen(lnp->ln_prefixes[0].pre_data) +
1968 			    strlen(lnp->ln_suffixes[0][cnt].suf_data) + 2;
1969 			new->devname = Zalloc(len);
1970 			(void) strlcpy(new->devname,
1971 			    lnp->ln_prefixes[0].pre_data,
1972 			    strlen(lnp->ln_prefixes[0].pre_data) + 1);
1973 			(void) strlcat(new->devname, "/", len);
1974 			(void) strlcat(new->devname,
1975 			    lnp->ln_suffixes[0][cnt].suf_data, len);
1976 			new->minor_name = Strdup(minor_name);
1977 			new->next = head;
1978 			new->available = MD_IM_DISK_NOT_AVAILABLE;
1979 			new->driver_name = Strdup(lbp->lb_drvnm[0].dn_data);
1980 			new->dev = lbp->lb_locators[cnt].l_dev;
1981 			head = new;
1982 			partial = ENOTSUP;
1983 			continue;
1984 		}
1985 
1986 		/*
1987 		 * Disk is there. Grab device information from nm structure.
1988 		 */
1989 		assert(nm->devname != NULL);
1990 		new->devname = Strdup(nm->devname);
1991 		new->dev = nm->dev;
1992 		new->minor_name = Strdup(minor_name);
1993 		new->available = MD_IM_DISK_AVAILABLE;
1994 
1995 		devid_free_nmlist(nm);
1996 
1997 		new->next = head;
1998 		head = new;
1999 	}
2000 
2001 	/* Free the last bp */
2002 	if (bp)
2003 		Free(bp);
2004 	*did_listp = head;
2005 	if (partial)
2006 		return (partial);
2007 	return (1);
2008 }
2009 /*
2010  * check_nm_disks
2011  *	Checks the disks listed in the shared did namespace to see if they
2012  *	are accessable on the system. If not, return ENOTSUP error to
2013  *	indicate we have a partial diskset.
2014  * Returns:
2015  *	< 0 		for failure
2016  *	  0		success
2017  *	  ENOTSUP	partial diskset, not all disks in a diskset on the
2018  *			system where import is being executed
2019  */
2020 static int
check_nm_disks(struct devid_min_rec * did_nmp,struct devid_shr_rec * did_shrnmp)2021 check_nm_disks(
2022 	struct devid_min_rec	*did_nmp,
2023 	struct devid_shr_rec	*did_shrnmp
2024 )
2025 {
2026 	char 		*search_path = "/dev";
2027 	char		*minor_name = NULL;
2028 	uint_t		used_size, min_used_size;
2029 	ddi_devid_t	did;
2030 	devid_nmlist_t	*nm;
2031 	void		*did_min_namep;
2032 	void		*did_shr_namep;
2033 	size_t		did_nsize, did_shr_nsize;
2034 
2035 	used_size = did_shrnmp->did_rec_hdr.r_used_size -
2036 	    sizeof (struct nm_rec_hdr);
2037 	min_used_size = did_nmp->min_rec_hdr.r_used_size -
2038 	    sizeof (struct nm_rec_hdr);
2039 	did_shr_namep = (void *)(&did_shrnmp->device_id[0]);
2040 	while (used_size > (int)sizeof (struct did_shr_name)) {
2041 		did_min_namep = (void *)(&did_nmp->minor_name[0]);
2042 		/* grab device id and minor name from the shared spaces */
2043 		did = (ddi_devid_t)(((struct did_shr_name *)
2044 		    did_shr_namep)->did_devid);
2045 		if (devid_valid(did) == 0) {
2046 			return (-1);
2047 		}
2048 
2049 		/*
2050 		 * We need to check that the DID_NM and DID_SHR_NM are in
2051 		 * sync. It is possible that we took a panic between writing
2052 		 * the two areas to disk. This would be cleaned up on the
2053 		 * next snarf but we don't know for sure that snarf has even
2054 		 * happened since we're reading from disk.
2055 		 */
2056 		while (((struct did_shr_name *)did_shr_namep)->did_key !=
2057 		    ((struct did_min_name *)did_min_namep)->min_devid_key) {
2058 			did_nsize = DID_NAMSIZ((struct did_min_name *)
2059 			    did_min_namep);
2060 			did_min_namep = ((void *)((char *)did_min_namep +
2061 			    did_nsize));
2062 			min_used_size -= did_nsize;
2063 			if (min_used_size < (int)sizeof (struct did_min_name))
2064 				continue;
2065 		}
2066 		minor_name = ((struct did_min_name *)did_min_namep)->min_name;
2067 
2068 		/*
2069 		 * Try to find disk in the system. If we can't find the
2070 		 * disk, we have a partial diskset.
2071 		 */
2072 		if ((meta_deviceid_to_nmlist(search_path,
2073 		    did, minor_name, &nm)) != 0) {
2074 			/* Partial diskset detected */
2075 			return (ENOTSUP);
2076 		}
2077 		devid_free_nmlist(nm);
2078 		used_size -= DID_SHR_NAMSIZ((struct did_shr_name *)
2079 		    did_shr_namep);
2080 		/* increment to next item in the shared spaces */
2081 		did_shr_nsize = DID_SHR_NAMSIZ((struct did_shr_name *)
2082 		    did_shr_namep);
2083 		did_shr_namep = ((void *)((char *)did_shr_namep +
2084 		    did_shr_nsize));
2085 	}
2086 	return (0);
2087 }
2088 
2089 
2090 /*
2091  * report_metadb_info()
2092  *
2093  * Generates metadb output for the diskset.
2094  *
2095  */
2096 static void
report_metadb_info(md_im_set_desc_t * misp,char * indent)2097 report_metadb_info(
2098 	md_im_set_desc_t	*misp,
2099 	char			*indent
2100 )
2101 {
2102 	md_im_drive_info_t	*d;
2103 	md_im_replica_info_t	*r;
2104 	char			*unk_str = "";
2105 	int			i;
2106 
2107 	(void) printf("%s\t%5.5s\t\t%9.9s\t%11.11s\n", indent, gettext("flags"),
2108 	    gettext("first blk"), gettext("block count"));
2109 
2110 	unk_str = gettext("unknown");
2111 
2112 	/*
2113 	 * Looping through all drives in the diskset to print
2114 	 * out information about the drive and if the verbose
2115 	 * option is set print out replica data.
2116 	 */
2117 	for (d = misp->mis_drives; d != NULL; d = d->mid_next) {
2118 
2119 		if (d->mid_replicas != NULL) {
2120 			for (r = d->mid_replicas; r != NULL;
2121 			    r = r->mir_next) {
2122 				(void) printf("%s", indent);
2123 				for (i = 0; i < MDDB_FLAGS_LEN; i++) {
2124 					if (r->mir_flags & (1 << i)) {
2125 						(void) putchar(
2126 						    MDDB_FLAGS_STRING[i]);
2127 					} else {
2128 						(void) putchar(' ');
2129 					}
2130 				}
2131 				if ((r->mir_offset == -1) && (r->mir_length
2132 				    == -1)) {
2133 					(void) printf("%7.7s\t\t%7.7s\t",
2134 					    unk_str, unk_str);
2135 				} else if (r->mir_length == -1) {
2136 					(void) printf("%i\t\t%7.7s\t",
2137 					    r->mir_offset, unk_str);
2138 				} else {
2139 					(void) printf("%i\t\t%i\t",
2140 					    r->mir_offset, r->mir_length);
2141 				}
2142 				(void) printf("\t%s\n",
2143 				    d->mid_devname);
2144 			}
2145 		}
2146 	}
2147 	(void) printf("\n");
2148 }
2149 
2150 /*
2151  * meta_replica_quorum will determine if the disks in the set to be
2152  * imported have enough valid replicas to have quorum.
2153  *
2154  * RETURN:
2155  *	-1	Set doesn't have quorum
2156  *	0	Set does have quorum
2157  */
2158 int
meta_replica_quorum(md_im_set_desc_t * misp)2159 meta_replica_quorum(
2160 	md_im_set_desc_t *misp
2161 )
2162 {
2163 	md_im_drive_info_t	*midp;
2164 	md_im_replica_info_t    *midr;
2165 	int			replica_count = 0;
2166 
2167 	for (midp = misp->mis_drives; midp != NULL;
2168 		midp = midp->mid_next) {
2169 
2170 		if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE)
2171 			continue;
2172 
2173 		/*
2174 		 * The drive is okay. Now count its replicas
2175 		 */
2176 		for (midr = midp->mid_replicas; midr != NULL;
2177 			midr = midr->mir_next) {
2178 			replica_count++;
2179 		}
2180 	}
2181 
2182 	if (misp->mis_active_replicas & 1) {
2183 		/* odd number of replicas */
2184 		if (replica_count < (misp->mis_active_replicas + 1)/2)
2185 			return (-1);
2186 	} else {
2187 		/* even number of replicas */
2188 		if (replica_count <= ((misp->mis_active_replicas + 1)/2))
2189 			return (-1);
2190 	}
2191 
2192 	return (0);
2193 }
2194 
2195 
2196 /*
2197  * Choose the best drive to use for the metaimport command.
2198  */
2199 md_im_drive_info_t *
pick_good_disk(md_im_set_desc_t * misp)2200 pick_good_disk(md_im_set_desc_t *misp)
2201 {
2202 	md_timeval32_t		*setcrtime; /* set creation time */
2203 	md_im_drive_info_t	*good_disk = NULL;
2204 	md_im_drive_info_t	*midp = NULL;
2205 	md_im_replica_info_t	*mirp;
2206 
2207 	setcrtime = &(misp->mis_drives->mid_replicas->mir_timestamp);
2208 	for (midp = misp->mis_drives; (midp != NULL) && (good_disk == NULL);
2209 	    midp = midp->mid_next) {
2210 		/* drive must be available */
2211 		if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
2212 			continue;
2213 		}
2214 		for (mirp = midp->mid_replicas; mirp != NULL;
2215 		    mirp = mirp->mir_next) {
2216 			/* replica must be active to be a good one */
2217 			if (mirp->mir_flags & MDDB_F_ACTIVE) {
2218 				if ((setcrtime->tv_sec ==
2219 				    midp-> mid_setcreatetimestamp.tv_sec) &&
2220 				    (setcrtime->tv_usec ==
2221 				    midp->mid_setcreatetimestamp.tv_usec)) {
2222 					good_disk = midp;
2223 					break;
2224 				}
2225 			}
2226 		}
2227 	}
2228 	return (good_disk);
2229 }
2230 
2231 /*
2232  * report_set_info()
2233  *
2234  * Returns:
2235  *	< 0 for failure
2236  *	  0 for success
2237  *
2238  */
2239 static int
report_set_info(md_im_set_desc_t * misp,mddb_mb_t * mb,mddb_lb_t * lbp,mddb_rb_t * nm,pnm_rec_t ** pnm,mdname_t * rsp,int fd,uint_t imp_flags,int set_count,int overlap,md_im_drive_info_t * overlap_disks,md_error_t * ep)2240 report_set_info(
2241 	md_im_set_desc_t	*misp,
2242 	mddb_mb_t		*mb,
2243 	mddb_lb_t		*lbp,
2244 	mddb_rb_t		*nm,
2245 	pnm_rec_t		**pnm,
2246 	mdname_t		*rsp,
2247 	int			fd,
2248 	uint_t			imp_flags,
2249 	int			set_count,
2250 	int			overlap,
2251 	md_im_drive_info_t	*overlap_disks,
2252 	md_error_t		*ep
2253 )
2254 {
2255 	int 			rval = 0;
2256 	md_im_drive_info_t	*d;
2257 	md_im_drive_info_t	*good_disk = NULL;
2258 	int			i;
2259 	int			in = META_INDENT;
2260 	char			indent[MAXPATHLEN];
2261 	md_timeval32_t		lastaccess; /* stores last modified timestamp */
2262 	int			has_overlap = 0;
2263 	int			no_quorum = 0;
2264 	int			partial = 0;
2265 
2266 	/* Calculates the correct indentation. */
2267 	indent[0] = 0;
2268 	for (i = 0; i < in; i++)
2269 		(void) strlcat(indent, " ", sizeof (indent));
2270 
2271 	/*
2272 	 * This will print before the information for the first diskset
2273 	 * if the verbose option was set.
2274 	 */
2275 	if (set_count == 1) {
2276 		if (imp_flags & META_IMP_REPORT) {
2277 			(void) printf("\n%s:\n\n",
2278 			    gettext("Disksets eligible for import"));
2279 		}
2280 	}
2281 
2282 	partial = misp->mis_partial;
2283 	good_disk = pick_good_disk(misp);
2284 	if (good_disk == NULL) {
2285 		return (rval);
2286 	}
2287 
2288 	/*
2289 	 * Make the distinction between a regular diskset and
2290 	 * a replicated diskset.  Also make the distinction
2291 	 * between a partial vs. full diskset.
2292 	 */
2293 	if (partial == MD_IM_PARTIAL_DISKSET) {
2294 		if (misp->mis_flags & MD_IM_SET_REPLICATED) {
2295 			if (imp_flags & META_IMP_REPORT) {
2296 				(void) printf("%i)  %s:\n", set_count, gettext(
2297 				    "Found partial replicated diskset "
2298 				    "containing disks"));
2299 			} else {
2300 				(void) printf("\n%s:\n", gettext(
2301 				    "Importing partial replicated diskset "
2302 				    "containing disks"));
2303 			}
2304 		} else {
2305 			if (imp_flags & META_IMP_REPORT) {
2306 				(void) printf("%i)  %s:\n", set_count, gettext(
2307 				    "Found partial regular diskset containing "
2308 				    "disks"));
2309 			} else {
2310 				(void) printf("\n%s:\n", gettext(
2311 				    "Importing partial regular diskset "
2312 				    "containing disks"));
2313 			}
2314 		}
2315 	} else {
2316 		if (misp->mis_flags & MD_IM_SET_REPLICATED) {
2317 			if (imp_flags & META_IMP_REPORT) {
2318 				(void) printf("%i)  %s:\n", set_count, gettext(
2319 				    "Found replicated diskset containing "
2320 				    "disks"));
2321 			} else {
2322 				(void) printf("\n%s:\n", gettext(
2323 				    "Importing replicated diskset containing "
2324 				    "disks"));
2325 			}
2326 		} else {
2327 			if (imp_flags & META_IMP_REPORT) {
2328 				(void) printf("%i)  %s:\n", set_count, gettext(
2329 				    "Found regular diskset containing disks"));
2330 			} else {
2331 				(void) printf("\n%s:\n", gettext(
2332 				    "Importing regular diskset containing "
2333 				    "disks"));
2334 			}
2335 		}
2336 	}
2337 
2338 	/*
2339 	 * Check each drive in the set. If it's unavailable or
2340 	 * an overlap tell the user.
2341 	 */
2342 	for (d = misp->mis_drives; d != NULL; d = d->mid_next) {
2343 		(void) fprintf(stdout, "  %s", d->mid_dnp->cname);
2344 		if (d->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
2345 			(void) fprintf(stdout, " (UNAVAIL)");
2346 		}
2347 		if (overlap) {
2348 			md_im_drive_info_t	**chain;
2349 			/*
2350 			 * There is the potential for an overlap, see if
2351 			 * this disk is one of the overlapped disks.
2352 			 */
2353 			for (chain = &overlap_disks; *chain != NULL;
2354 			    chain = &(*chain)->overlap) {
2355 				if (strcmp(d->mid_dnp->cname,
2356 				    (*chain)->mid_dnp->cname) == 0) {
2357 					(void) fprintf(stdout, " (CONFLICT)");
2358 					has_overlap = 1;
2359 					break;
2360 				}
2361 			}
2362 		}
2363 		(void) fprintf(stdout, "\n");
2364 	}
2365 
2366 	/*
2367 	 * This note explains the (UNAVAIL) that appears next to the
2368 	 * disks in the diskset that are not available.
2369 	 */
2370 	if (partial) {
2371 		(void) printf("%s%s\n%s%s\n\n", indent,
2372 		    gettext("(UNAVAIL) WARNING: This disk is unavailable on"
2373 		    " this system."), indent, gettext("Import may corrupt "
2374 		    "data in the diskset."));
2375 	}
2376 
2377 	/*
2378 	 * This note explains the (CONFLICT) that appears next to the
2379 	 * disks whose lb_inittime timestamp does not
2380 	 * match the rest of the diskset.
2381 	 */
2382 	if (has_overlap) {
2383 		(void) printf("%s%s\n%s%s\n\n", indent,
2384 		    gettext("(CONFLICT) WARNING: This disk has been reused in "
2385 		    "another diskset or system configuration."), indent,
2386 		    gettext("Import may corrupt data in the diskset."));
2387 	}
2388 
2389 	/*
2390 	 * If the verbose flag was given on the command line,
2391 	 * we will print out the metastat -c information , the
2392 	 * creation time, and last modified time for the diskset.
2393 	 */
2394 	if (imp_flags & META_IMP_VERBOSE) {
2395 		(void) printf("%s%s\n", indent,
2396 		    gettext("Metadatabase information:"));
2397 		report_metadb_info(misp, indent);
2398 
2399 		/*
2400 		 * Printing creation time and last modified time.
2401 		 * Last modified: uses the global variable "lastaccess",
2402 		 * which is set to the last updated timestamp from all of
2403 		 * the database blocks(db_timestamp) or record blocks
2404 		 * (rb_timestamp).
2405 		 * Creation time is the locator block init time
2406 		 * (lb_inittime).
2407 		 */
2408 		lastaccess = good_disk->mid_replicas->mir_timestamp;
2409 
2410 		(void) printf("%s%s\n", indent,
2411 		    gettext("Metadevice information:"));
2412 		rval = report_metastat_info(mb, lbp, nm, pnm, rsp, fd,
2413 		    &lastaccess, ep);
2414 		if (rval < 0) {
2415 			return (rval);
2416 		}
2417 
2418 		(void) printf("%s%s:\t%s\n", indent,
2419 		    gettext("Creation time"),
2420 		    meta_print_time(&good_disk->mid_replicas->mir_timestamp));
2421 		(void) printf("%s%s:\t%s\n", indent,
2422 		    gettext("Last modified time"),
2423 		    meta_print_time(&lastaccess));
2424 	} else {
2425 		/*
2426 		 * Even if the verbose option is not set, we will print the
2427 		 * creation time for the diskset.
2428 		 */
2429 		(void) printf("%s%s:\t%s\n", indent, gettext("Creation time"),
2430 		    meta_print_time(&good_disk->mid_replicas->mir_timestamp));
2431 	}
2432 
2433 
2434 	/*
2435 	 * If the diskset is not actually being imported, then we
2436 	 * print out extra information about how to import it.
2437 	 * If the verbose flag was not set, then we will also
2438 	 * print out information about how to obtain verbose output.
2439 	 */
2440 	if (imp_flags & META_IMP_REPORT) {
2441 		/*
2442 		 * TRANSLATION_NOTE
2443 		 *
2444 		 * The translation of the phrase "For more information
2445 		 * about this set" will be followed by a ":" and a
2446 		 * suggested command (untranslatable) that the user
2447 		 * may use to request additional information.
2448 		 */
2449 		if (!(imp_flags & META_IMP_VERBOSE)) {
2450 		(void) printf("%s%s:\n%s  %s -r -v %s\n", indent,
2451 		    gettext("For more information about this diskset"),
2452 		    indent, myname, good_disk->mid_dnp->cname);
2453 		}
2454 
2455 		if (meta_replica_quorum(misp) != 0)
2456 			no_quorum = 1;
2457 
2458 		/*
2459 		 * TRANSLATION_NOTE
2460 		 *
2461 		 * The translation of the phrase "To import this set"
2462 		 * will be followed by a ":" and a suggested command
2463 		 * (untranslatable) that the user may use to import
2464 		 * the specified diskset.
2465 		 */
2466 		if (partial || has_overlap || no_quorum) {
2467 			(void) printf("%s%s:\n%s  %s -f -s <newsetname> %s\n",
2468 			    indent, gettext("To import this diskset"), indent,
2469 			    myname, good_disk->mid_dnp->cname);
2470 		} else {
2471 			(void) printf("%s%s:\n%s  %s -s <newsetname> %s\n",
2472 			    indent, gettext("To import this diskset"), indent,
2473 			    myname, good_disk->mid_dnp->cname);
2474 		}
2475 	}
2476 	(void) printf("\n\n");
2477 
2478 	return (rval);
2479 }
2480 
2481 
2482 /*
2483  * meta_get_and_report_set_info
2484  *
2485  * Scans a given drive for set specific information. If the given drive
2486  * has a shared metadb, scans the shared metadb for information pertaining
2487  * to the set.
2488  * If imp_flags has META_IMP_PASS1 set don't report.
2489  *
2490  * Returns:
2491  * 	<0 	for failure
2492  *	0	success but no replicas were found
2493  *	1	success and a replica was found
2494  */
2495 int
meta_get_and_report_set_info(mddrivenamelist_t * dp,md_im_set_desc_t ** mispp,int local_mb_ok,uint_t imp_flags,int * set_count,int overlap,md_im_drive_info_t * overlap_disks,md_error_t * ep)2496 meta_get_and_report_set_info(
2497 	mddrivenamelist_t	*dp,
2498 	md_im_set_desc_t	**mispp,
2499 	int			local_mb_ok,
2500 	uint_t			imp_flags,
2501 	int			*set_count,
2502 	int			overlap,
2503 	md_im_drive_info_t	*overlap_disks,
2504 	md_error_t 		*ep
2505 )
2506 {
2507 	uint_t			s;
2508 	mdname_t		*rsp;
2509 	int			fd;
2510 	char			mb[DEV_BSIZE];
2511 				/*LINTED*/
2512 	mddb_mb_t		*mbp = (mddb_mb_t *)mb;
2513 	char			lb[dbtob(MDDB_LBCNT)];
2514 				/*LINTED*/
2515 	mddb_lb_t		*lbp = (mddb_lb_t *)lb;
2516 	mddb_did_blk_t		*lbdidp = NULL;
2517 	mddb_ln_t		*lnp = NULL;
2518 	int			lnsize, lbdid_size;
2519 	int			rval = 0;
2520 	char			db[DEV_BSIZE];
2521 				/*LINTED*/
2522 	mddb_db_t		*dbp = (mddb_db_t *)db;
2523 	did_list_t		*did_listp = NULL;
2524 	mddrivenamelist_t	*dnlp;
2525 	mddrivename_t 		*dnp;
2526 	md_im_names_t		cnames = { 0, NULL};
2527 	char			*nm = NULL, *shrnm = NULL;
2528 	char			*did_nm = NULL, *did_shrnm = NULL;
2529 	struct nm_rec		*nmp;
2530 	struct nm_shr_rec	*snmp;
2531 	struct devid_shr_rec	*did_shrnmp;
2532 	struct devid_min_rec	*did_nmp;
2533 	int			extended_namespace = 0;
2534 	int			replicated = 0;
2535 	int			partial = 0;
2536 	pnm_rec_t		*pnm = NULL; /* list of physical devs in set */
2537 	md_im_set_desc_t	*misp;
2538 
2539 	dnp = dp->drivenamep;
2540 
2541 	/*
2542 	 * Determine and open the replica slice
2543 	 */
2544 	if (meta_replicaslice(dnp, &s, ep) != 0) {
2545 		return (-1);
2546 	}
2547 
2548 	/*
2549 	 * Test for the size of replica slice in question. If
2550 	 * the size is zero, we know that this is not a disk that was
2551 	 * part of a set and it should be silently ignored for import.
2552 	 */
2553 	if (dnp->vtoc.parts[s].size == 0)
2554 		return (0);
2555 
2556 	if ((rsp = metaslicename(dnp, s, ep)) == NULL) {
2557 		return (-1);
2558 	}
2559 
2560 	if ((fd = open(rsp->rname, O_RDONLY|O_NDELAY)) < 0)
2561 		return (mdsyserror(ep, errno, rsp->cname));
2562 
2563 	/*
2564 	 * After the open() succeeds, we should return via the "out"
2565 	 * label to clean up after ourselves.  (Up 'til now, we can
2566 	 * just return directly, because there are no resources to
2567 	 * give back.)
2568 	 */
2569 
2570 	if ((rval = read_master_block(ep, fd, mbp, sizeof (mb))) <= 0)
2571 		goto out;
2572 
2573 	replicated = is_replicated(fd, mbp, 0, NULL);
2574 
2575 	if (!local_mb_ok && mbp->mb_setno == 0) {
2576 		rval = 0;
2577 		goto out;
2578 	}
2579 
2580 	if ((rval = read_locator_block(ep, fd, mbp, lbp, sizeof (lb))) <= 0)
2581 		goto out;
2582 
2583 	/*
2584 	 * Once the locator block has been read, we need to
2585 	 * check if the locator block commit count is zero.
2586 	 * If it is zero, we know that the replica we're dealing
2587 	 * with is on a disk that was deleted from the disk set;
2588 	 * and, it potentially has stale data. We need to quit
2589 	 * in that case
2590 	 */
2591 	if (lbp->lb_commitcnt == 0) {
2592 		rval = 0;
2593 		goto out;
2594 	}
2595 
2596 	/*
2597 	 * Make sure that the disk being imported has device id
2598 	 * namespace present for disksets. If a disk doesn't have
2599 	 * device id namespace, we skip reading the replica on that disk
2600 	 */
2601 	if (!(lbp->lb_flags & MDDB_DEVID_STYLE)) {
2602 		rval = 0;
2603 		goto out;
2604 	}
2605 
2606 	/*
2607 	 * Grab the locator block device id array. Allocate memory for the
2608 	 * array first.
2609 	 */
2610 	lbdid_size = dbtob(lbp->lb_didblkcnt);
2611 	lbdidp = Zalloc(lbdid_size);
2612 
2613 	if ((rval = read_locator_block_did(ep, fd, mbp, lbp, lbdidp,
2614 	    lbdid_size)) <= 0)
2615 		goto out;
2616 
2617 	/*
2618 	 * For a disk that has not been replicated, extract the device ids
2619 	 * stored in the locator block device id array and store them in
2620 	 * a list.
2621 	 *
2622 	 * If the disk has been replicated using replication software such
2623 	 * as HDS Truecopy/ShadowImage or EMC SRDF/BCV, the device ids in
2624 	 * the locator block are invalid and we need to build a list of
2625 	 * replicated disks.
2626 	 */
2627 	if (imp_flags & META_IMP_PASS1) {
2628 		/*
2629 		 * We need to do this for both passes but
2630 		 * replicated_disk_list_built is global so we need some way
2631 		 * to determine which pass we're on. Set it to the appropriate
2632 		 * pass's flag.
2633 		 */
2634 		replicated_disk_list_built = &replicated_disk_list_built_pass1;
2635 	} else {
2636 		replicated_disk_list_built = &replicated_disk_list_built_pass2;
2637 	}
2638 	if (replicated && !(*replicated_disk_list_built)) {
2639 		/*
2640 		 * if there's a replicated diskset involved, we need to
2641 		 * scan the system one more time and build a list of all
2642 		 * candidate disks that might be part of that replicated set
2643 		 */
2644 		if (meta_list_disks(ep, &cnames) != 0) {
2645 			rval = 0;
2646 			goto out;
2647 		}
2648 		dnlp = meta_prune_cnames(ep, &cnames, 0);
2649 		rval = build_replicated_disks_list(ep, dnlp);
2650 		if (rval == 0)
2651 			goto out;
2652 	}
2653 
2654 	/*
2655 	 * Until here, we've gotten away with fixed sizes for the
2656 	 * master block and locator block.  The locator names,
2657 	 * however, are sized (and therefore allocated) dynamically
2658 	 * according to information in the locator block.
2659 	 */
2660 	lnsize = dbtob(lbp->lb_lnblkcnt);
2661 	lnp = Zalloc(lnsize);
2662 
2663 	if ((rval = read_locator_names(ep, fd, mbp, lbp, lnp, lnsize)) <= 0)
2664 		goto out;
2665 
2666 	rval = build_did_list(ep, fd, mbp, lbp, lbdidp, lnp, &did_listp,
2667 	    replicated);
2668 
2669 	/*
2670 	 * An rval of ENOTSUP means we have a partial diskset. We'll want
2671 	 * to set the partial variable so we can pass this information
2672 	 * set_append_wrapper later for placing on the misp list.
2673 	 */
2674 	if (rval == ENOTSUP)
2675 		partial = MD_IM_PARTIAL_DISKSET;
2676 
2677 	if (rval < 0)
2678 		goto out;
2679 
2680 	/*
2681 	 * Read in the NM record
2682 	 * If no NM record was found, it still is a valid configuration
2683 	 * but it also means that we won't find any corresponding DID_NM
2684 	 * or DID_SHR_NM.
2685 	 */
2686 	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &nm, MDDB_NM, rsp->cname))
2687 	    < 0)
2688 		goto out;
2689 	else if (rval == 0)
2690 		goto append;
2691 
2692 	/*
2693 	 * At this point, we have read in all of the blocks that form
2694 	 * the nm_rec.  We should at least detect the corner case
2695 	 * mentioned above, in which r_next_recid links to another
2696 	 * nm_rec. Extended namespace handling is left for Phase 2.
2697 	 *
2698 	 * What this should really be is a loop, each iteration of
2699 	 * which reads in a nm_rec and calls the set_append().
2700 	 */
2701 	/*LINTED*/
2702 	nmp = (struct nm_rec *)(nm + sizeof (mddb_rb_t));
2703 	if (nmp->r_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2704 		extended_namespace = 1;
2705 		rval = 0;
2706 		goto out;
2707 	}
2708 
2709 	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &shrnm, MDDB_SHR_NM,
2710 	    rsp->cname)) < 0)
2711 		goto out;
2712 	else if (rval == 0)
2713 		goto append;
2714 
2715 	/*LINTED*/
2716 	snmp = (struct nm_shr_rec *)(shrnm + sizeof (mddb_rb_t));
2717 	if (snmp->sr_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2718 		extended_namespace = 1;
2719 		rval = 0;
2720 		goto out;
2721 	}
2722 
2723 	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_nm,
2724 	    MDDB_DID_NM, rsp->cname)) < 0)
2725 		goto out;
2726 	else if (rval == 0)
2727 		goto append;
2728 
2729 	/*LINTED*/
2730 	did_nmp = (struct devid_min_rec *)(did_nm + sizeof (mddb_rb_t) -
2731 	    sizeof (int));
2732 	if (did_nmp->min_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2733 		extended_namespace = 1;
2734 		rval = 0;
2735 		goto out;
2736 	}
2737 
2738 	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_shrnm,
2739 	    MDDB_DID_SHR_NM, rsp->cname)) < 0)
2740 		goto out;
2741 	else if (rval == 0)
2742 		goto append;
2743 
2744 	/*LINTED*/
2745 	did_shrnmp = (struct devid_shr_rec *)(did_shrnm + sizeof (mddb_rb_t) -
2746 	    sizeof (int));
2747 	if (did_shrnmp->did_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2748 		extended_namespace = 1;
2749 		rval = 0;
2750 		goto out;
2751 	}
2752 
2753 	/*
2754 	 * We need to check if all of the disks listed in the namespace
2755 	 * are actually available. If they aren't we'll return with
2756 	 * an ENOTSUP error which indicates a partial diskset.
2757 	 */
2758 	rval = check_nm_disks(did_nmp, did_shrnmp);
2759 
2760 	/*
2761 	 * An rval of ENOTSUP means we have a partial diskset. We'll want
2762 	 * to set the partial variable so we can pass this information
2763 	 * to set_append_wrapper later for placing on the misp list.
2764 	 */
2765 	if (rval == ENOTSUP)
2766 		partial = MD_IM_PARTIAL_DISKSET;
2767 
2768 	if (rval < 0)
2769 		goto out;
2770 
2771 append:
2772 	/* Finally, we've got what we need to process this replica. */
2773 	misp = set_append(mispp, did_listp, mbp, lbp,
2774 	    /*LINTED*/
2775 	    (mddb_rb_t *)nm, (mddb_rb_t *)shrnm, &pnm, (mddb_rb_t *)did_nm,
2776 	    /*LINTED*/
2777 	    (mddb_rb_t *)did_shrnm, (imp_flags | partial | replicated), ep);
2778 
2779 	if (!(imp_flags & META_IMP_PASS1)) {
2780 		*set_count += 1;
2781 		rval = report_set_info(misp, mbp, lbp,
2782 		    /*LINTED*/
2783 		    (mddb_rb_t *)nm, &pnm, rsp, fd, imp_flags, *set_count,
2784 		    overlap, overlap_disks, ep);
2785 		if (rval < 0)
2786 			goto out;
2787 	}
2788 
2789 	/* Return the fact that we found at least one set */
2790 	rval = 1;
2791 
2792 out:
2793 	if (fd >= 0)
2794 		(void) close(fd);
2795 	if (did_listp != NULL)
2796 		free_did_list(did_listp);
2797 	if (lnp != NULL)
2798 		Free(lnp);
2799 	if (nm != NULL)
2800 		Free(nm);
2801 	if (did_nm != NULL)
2802 		Free(did_nm);
2803 	if (did_shrnm != NULL)
2804 		Free(did_shrnm);
2805 	if (pnm != NULL)
2806 		free_pnm_rec_list(&pnm);
2807 
2808 	/*
2809 	 * If we are at the end of the list, we must free up
2810 	 * the replicated list too
2811 	 */
2812 	if (dp->next == NULL)
2813 		free_replicated_disks_list();
2814 
2815 	if (extended_namespace)
2816 		return (mddserror(ep, MDE_DS_EXTENDEDNM, MD_SET_BAD,
2817 		    mynode(), NULL, NULL));
2818 
2819 	return (rval);
2820 }
2821 
2822 /*
2823  * Return the minor name associated with a given disk slice
2824  */
2825 static char *
meta_getminor_name(char * devname,md_error_t * ep)2826 meta_getminor_name(
2827 	char *devname,
2828 	md_error_t *ep
2829 )
2830 {
2831 	int 	fd = -1;
2832 	char 	*minor_name = NULL;
2833 	char	*ret_minor_name = NULL;
2834 
2835 	if (devname == NULL)
2836 		return (NULL);
2837 
2838 	if ((fd = open(devname, O_RDONLY|O_NDELAY, 0)) < 0) {
2839 		(void) mdsyserror(ep, errno, devname);
2840 		return (NULL);
2841 	}
2842 
2843 	if (devid_get_minor_name(fd, &minor_name) == 0) {
2844 		ret_minor_name = Strdup(minor_name);
2845 		devid_str_free(minor_name);
2846 	}
2847 
2848 	(void) close(fd);
2849 	return (ret_minor_name);
2850 }
2851 
2852 /*
2853  * meta_update_mb_did
2854  *
2855  * Update or create the master block with the new set number.
2856  * If a non-null devid pointer is given, the devid in the
2857  * master block will also be changed.
2858  *
2859  * This routine is called during the import of a diskset
2860  * (meta_imp_update_mb) and during the take of a diskset that has
2861  * some unresolved replicated drives (meta_unrslv_replicated_mb).
2862  *
2863  * Returns : nothing (void)
2864  */
2865 static void
meta_update_mb_did(mdsetname_t * sp,mddrivename_t * dnp,void * new_devid,int new_devid_len,void * old_devid,int replica_present,int offset,md_error_t * ep)2866 meta_update_mb_did(
2867 	mdsetname_t	*sp,
2868 	mddrivename_t	*dnp,			/* raw name of drive with mb */
2869 	void		*new_devid,		/* devid to be stored in mb */
2870 	int		new_devid_len,
2871 	void		*old_devid,		/* old devid stored in mb */
2872 	int		replica_present,	/* does replica follow mb? */
2873 	int		offset,
2874 	md_error_t	*ep
2875 )
2876 {
2877 	int			fd;
2878 	struct mddb_mb		*mbp;
2879 	uint_t			sliceno;
2880 	mdname_t		*rsp;
2881 
2882 	/* determine the replica slice */
2883 	if (meta_replicaslice(dnp, &sliceno, ep) != 0) {
2884 		return;
2885 	}
2886 
2887 	/*
2888 	 * if the replica slice size is zero,
2889 	 * don't bother opening
2890 	 */
2891 	if (dnp->vtoc.parts[sliceno].size == 0) {
2892 		return;
2893 	}
2894 
2895 	if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL) {
2896 		return;
2897 	}
2898 
2899 	if ((fd = open(rsp->rname, O_RDWR | O_NDELAY)) < 0) {
2900 		return;
2901 	}
2902 
2903 	if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0)
2904 		return;
2905 
2906 	mbp = Zalloc(DEV_BSIZE);
2907 	if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE) {
2908 		Free(mbp);
2909 		return;
2910 	}
2911 
2912 	/* If no replica on disk, check for dummy mb */
2913 	if (replica_present == NULL) {
2914 		/*
2915 		 * Check to see if there is a dummy there. If not
2916 		 * create one. This would happen if the set was
2917 		 * created before the master block dummy code was
2918 		 * implemented.
2919 		 */
2920 		if ((mbp->mb_magic != MDDB_MAGIC_DU) ||
2921 		    (mbp->mb_revision != MDDB_REV_MB)) {
2922 			meta_mkdummymaster(sp, fd, offset);
2923 			Free(mbp);
2924 			return;
2925 		}
2926 	}
2927 
2928 	mbp->mb_setno = sp->setno;
2929 	if (meta_gettimeofday(&mbp->mb_timestamp) == -1) {
2930 		Free(mbp);
2931 		return;
2932 	}
2933 
2934 	/*
2935 	 * If a old_devid is non-NULL then we're are dealing with a
2936 	 * replicated diskset and the devid needs to be updated.
2937 	 */
2938 	if (old_devid) {
2939 		if (mbp->mb_devid_magic == MDDB_MAGIC_DE) {
2940 			if (mbp->mb_devid_len)
2941 				(void) memset(mbp->mb_devid, 0,
2942 				    mbp->mb_devid_len);
2943 			(void) memcpy(mbp->mb_devid,
2944 			    (char *)new_devid, new_devid_len);
2945 			mbp->mb_devid_len = new_devid_len;
2946 		}
2947 	}
2948 
2949 	crcgen((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum,
2950 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL);
2951 
2952 	/*
2953 	 * Now write out the changes to disk.
2954 	 * If an error occurs, just continue on.
2955 	 * Next take of set will register this drive as
2956 	 * an unresolved replicated drive and will attempt
2957 	 * to fix the master block again.
2958 	 */
2959 	if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0) {
2960 		Free(mbp);
2961 		return;
2962 	}
2963 	if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE) {
2964 		Free(mbp);
2965 		return;
2966 	}
2967 
2968 	Free(mbp);
2969 	(void) close(fd);
2970 }
2971 
2972 
2973 /*
2974  * meta_imp_update_mb
2975  *
2976  * Update the master block information during an import.
2977  * Takes an import set descriptor.
2978  *
2979  * Returns : nothing (void)
2980  */
2981 void
meta_imp_update_mb(mdsetname_t * sp,md_im_set_desc_t * misp,md_error_t * ep)2982 meta_imp_update_mb(mdsetname_t *sp, md_im_set_desc_t *misp, md_error_t *ep)
2983 {
2984 	md_im_drive_info_t	*midp;
2985 	mddrivename_t		*dnp;
2986 	int			offset = 16; /* default mb offset is 16 */
2987 
2988 	for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
2989 		/*
2990 		 * If disk isn't available we can't update, so go to next
2991 		 */
2992 		if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
2993 			continue;
2994 		}
2995 
2996 		dnp = midp->mid_dnp;
2997 
2998 		if (midp->mid_replicas) {
2999 			md_im_replica_info_t	*mirp;
3000 
3001 			/*
3002 			 * If we have replicas on this disk we need to make
3003 			 * sure that we update the master block on every
3004 			 * replica on the disk.
3005 			 */
3006 			for (mirp = midp->mid_replicas; mirp != NULL;
3007 			    mirp = mirp->mir_next) {
3008 				offset = mirp->mir_offset;
3009 				meta_update_mb_did(sp, dnp, midp->mid_devid,
3010 				    midp->mid_devid_sz, midp->mid_o_devid,
3011 				    1, offset, ep);
3012 			}
3013 		} else {
3014 			/* No replicas, just update the one dummy mb */
3015 			meta_update_mb_did(sp, dnp, midp->mid_devid,
3016 			    midp->mid_devid_sz, midp->mid_o_devid,
3017 			    0, offset, ep);
3018 		}
3019 		if (!mdisok(ep))
3020 			return;
3021 	}
3022 }
3023 
3024 /*
3025  * meta_unrslv_replicated_common
3026  *
3027  * Given a drive_desc and a drivenamelist pointer,
3028  * return the devidp associated with the drive_desc,
3029  * the replicated (new) devidp associated with the drive_desc
3030  * and the specific mddrivename in the drivenamelist that
3031  * matches the replicated (new) devidp.
3032  *
3033  * Typically the drivenamelist pointer would be setup by
3034  * the meta_prune_cnames function.
3035  *
3036  * Calling function must free devidp using devid_free.
3037  *
3038  * Returns 0 - success, found new_devidp and dnp_new.
3039  * Returns 1 - failure, didn't find new devid info
3040  */
3041 static int
meta_unrslv_replicated_common(int myside,md_drive_desc * dd,mddrivenamelist_t * dnlp,ddi_devid_t * devidp,ddi_devid_t * new_devidp,mddrivename_t ** dnp_new,md_error_t * ep)3042 meta_unrslv_replicated_common(
3043 	int			myside,
3044 	md_drive_desc		*dd,	/* drive list for diskset */
3045 	mddrivenamelist_t	*dnlp,	/* list of drives on current system */
3046 	ddi_devid_t		*devidp,	/* old devid */
3047 	ddi_devid_t		*new_devidp,	/* replicated (new) devid */
3048 	mddrivename_t		**dnp_new,	/* replicated drive name */
3049 	md_error_t		*ep
3050 )
3051 {
3052 	mddrivename_t		*dnp;	/* drive name of old drive */
3053 	mdsidenames_t		*sn = NULL;
3054 	uint_t			rep_slice;
3055 	mdname_t		*np;
3056 	char			*minor_name = NULL;
3057 	char			*devid_str = NULL;
3058 	size_t			len;
3059 	int			devid_sz;
3060 	mddrivenamelist_t	*dp;
3061 	ddi_devid_t		old_devid; /* devid of old drive */
3062 	ddi_devid_t		new_devid; /* devid of new replicated drive */
3063 	ddi_devid_t		dnp_new_devid; /* devid derived from drive */
3064 						/* name of replicated drive */
3065 
3066 	dnp = dd->dd_dnp;
3067 
3068 	/* Get old devid from drive record */
3069 	(void) devid_str_decode(dd->dd_dnp->devid,
3070 	    &old_devid, NULL);
3071 
3072 	/* Look up replicated (new) devid */
3073 	new_devid = replicated_list_lookup(
3074 	    devid_sizeof(old_devid), old_devid);
3075 
3076 	devid_free(old_devid);
3077 
3078 	if (new_devid == NULL)
3079 		return (1);
3080 
3081 	/*
3082 	 * Using new_devid, find a drivename entry with a matching devid.
3083 	 * Use the passed in dnlp since it has the new (replicated) disknames
3084 	 * in it.
3085 	 */
3086 	for (dp = dnlp; dp != NULL; dp = dp->next) {
3087 		(void) devid_str_decode(dp->drivenamep->devid,
3088 		    &dnp_new_devid, NULL);
3089 
3090 		if (dnp_new_devid == NULL)
3091 			continue;
3092 
3093 		if (devid_compare(new_devid, dnp_new_devid) == 0) {
3094 			devid_free(dnp_new_devid);
3095 			break;
3096 		}
3097 		devid_free(dnp_new_devid);
3098 	}
3099 
3100 	/* If can't find new name for drive - nothing to update */
3101 	if (dp == NULL)
3102 		return (1);
3103 
3104 	/*
3105 	 * Setup returned value to be the drivename structure associated
3106 	 * with new (replicated) drive.
3107 	 */
3108 	*dnp_new = dp->drivenamep;
3109 
3110 	/*
3111 	 * Need to return the new devid including the minor name.
3112 	 * Find the minor_name here using the sidename or by
3113 	 * looking in the namespace.
3114 	 */
3115 	for (sn = dnp->side_names; sn != NULL; sn = sn->next) {
3116 		if (sn->sideno == myside)
3117 			break;
3118 	}
3119 
3120 	/*
3121 	 * The disk has no side name information
3122 	 */
3123 	if (sn == NULL) {
3124 		if ((meta_replicaslice(*dnp_new, &rep_slice, ep) != 0) ||
3125 		    ((np = metaslicename(*dnp_new, rep_slice, ep))
3126 			== NULL)) {
3127 			mdclrerror(ep);
3128 			return (1);
3129 		}
3130 
3131 		if (np->dev == NODEV64)
3132 			return (1);
3133 
3134 		/*
3135 		 * minor_name will be NULL if dnp->devid == NULL
3136 		 * - see metagetvtoc()
3137 		 */
3138 		if (np->minor_name == NULL)
3139 			return (1);
3140 		else
3141 			minor_name = Strdup(np->minor_name);
3142 
3143 	} else {
3144 		minor_name = meta_getdidminorbykey(
3145 			    MD_LOCAL_SET, sn->sideno + SKEW,
3146 			    dnp->side_names_key, ep);
3147 		if (!mdisok(ep))
3148 			return (1);
3149 	}
3150 	/*
3151 	 * Now, use the old devid with minor name to lookup
3152 	 * the replicated (new) devid that will also contain
3153 	 * a minor name.
3154 	 */
3155 	len = strlen(dnp->devid) + strlen(minor_name) + 2;
3156 	devid_str = (char *)Malloc(len);
3157 	(void) snprintf(devid_str, len, "%s/%s", dnp->devid,
3158 	    minor_name);
3159 	(void) devid_str_decode(devid_str, devidp, NULL);
3160 	Free(devid_str);
3161 	devid_sz = devid_sizeof((ddi_devid_t)*devidp);
3162 	*new_devidp = replicated_list_lookup(devid_sz, *devidp);
3163 	return (0);
3164 }
3165 
3166 /*
3167  * meta_unrslv_replicated_mb
3168  *
3169  * Update the master block information during a take.
3170  * Takes an md_drive_desc descriptor.
3171  *
3172  * Returns : nothing (void)
3173  */
3174 void
meta_unrslv_replicated_mb(mdsetname_t * sp,md_drive_desc * dd,mddrivenamelist_t * dnlp,md_error_t * ep)3175 meta_unrslv_replicated_mb(
3176 	mdsetname_t		*sp,
3177 	md_drive_desc		*dd,	/* drive list for diskset */
3178 	mddrivenamelist_t	*dnlp,	/* list of drives on current system */
3179 	md_error_t		*ep
3180 )
3181 {
3182 	md_drive_desc		*d = NULL, *d_save;
3183 	mddrivename_t		*dnp;	   /* dnp of old drive */
3184 	mddrivename_t		*dnp_new;  /* dnp of new (replicated) drive */
3185 	mddrivename_t		*dnp_save; /* saved copy needed to restore */
3186 	ddi_devid_t		devidp, new_devidp;
3187 	int			myside;
3188 
3189 	if ((myside = getmyside(sp, ep)) == MD_SIDEWILD)
3190 		return;
3191 
3192 	for (d = dd; d != NULL; d = d->dd_next) {
3193 		dnp = d->dd_dnp;
3194 		if (dnp == NULL)
3195 			continue;
3196 
3197 		/* If don't need to update master block - skip it. */
3198 		if (!(d->dd_flags & MD_DR_FIX_MB_DID))
3199 			continue;
3200 
3201 		/*
3202 		 * Get old and replicated (new) devids associated with this
3203 		 * drive.  Also, get the new (replicated) drivename structure.
3204 		 */
3205 		if (meta_unrslv_replicated_common(myside, d, dnlp, &devidp,
3206 		    &new_devidp, &dnp_new, ep) != 0) {
3207 			mdclrerror(ep);
3208 			continue;
3209 		}
3210 
3211 		if (new_devidp) {
3212 			int	offset = 16; /* default mb offset is 16 */
3213 			int	dbcnt;
3214 
3215 			if (d->dd_dbcnt) {
3216 				/*
3217 				 * Update each master block on the disk
3218 				 */
3219 				for (dbcnt = d->dd_dbcnt; dbcnt != 0; dbcnt--) {
3220 					meta_update_mb_did(sp, dnp_new,
3221 					    new_devidp,
3222 					    devid_sizeof(new_devidp), devidp,
3223 					    1, offset, ep);
3224 					offset += d->dd_dbsize;
3225 				}
3226 			} else {
3227 				/* update the one dummy mb */
3228 				meta_update_mb_did(sp, dnp_new, new_devidp,
3229 				    devid_sizeof(new_devidp), devidp,
3230 				    0, offset, ep);
3231 			}
3232 			if (!mdisok(ep)) {
3233 				devid_free(devidp);
3234 				return;
3235 			}
3236 
3237 			/* Set drive record flags to ok */
3238 			/* Just update this one drive record. */
3239 			d_save = d->dd_next;
3240 			dnp_save = d->dd_dnp;
3241 			d->dd_next = NULL;
3242 			d->dd_dnp = dnp_new;
3243 			/* Ignore failure since no bad effect. */
3244 			(void) clnt_upd_dr_flags(mynode(), sp, d,
3245 			    MD_DR_OK, ep);
3246 			d->dd_next = d_save;
3247 			d->dd_dnp = dnp_save;
3248 		}
3249 		devid_free(devidp);
3250 	}
3251 }
3252 
3253 /*
3254  * meta_update_nm_rr_did
3255  *
3256  * Change a devid stored in the diskset namespace and in the local set
3257  * namespace with the new devid.
3258  *
3259  * This routine is called during the import of a diskset
3260  * (meta_imp_update_nn) and during the take of a diskset that has
3261  * some unresolved replicated drives (meta_unrslv_replicated_nm).
3262  *
3263  * Returns : nothing (void)
3264  */
3265 static void
meta_update_nm_rr_did(mdsetname_t * sp,void * old_devid,int old_devid_sz,void * new_devid,int new_devid_sz,int import_flag,md_error_t * ep)3266 meta_update_nm_rr_did(
3267 	mdsetname_t	*sp,
3268 	void		*old_devid,		/* old devid being replaced */
3269 	int		old_devid_sz,
3270 	void		*new_devid,		/* devid to be stored in nm */
3271 	int		new_devid_sz,
3272 	int		import_flag,		/* called during import? */
3273 	md_error_t	*ep
3274 )
3275 {
3276 	struct mddb_config	c;
3277 
3278 	(void) memset(&c, 0, sizeof (c));
3279 	c.c_setno = sp->setno;
3280 
3281 	/* During import to NOT update the local namespace. */
3282 	if (import_flag)
3283 		c.c_flags = MDDB_C_IMPORT;
3284 
3285 	c.c_locator.l_devid = (uintptr_t)Malloc(new_devid_sz);
3286 	(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
3287 	    new_devid, new_devid_sz);
3288 	c.c_locator.l_devid_sz = new_devid_sz;
3289 	c.c_locator.l_devid_flags =
3290 	    MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
3291 	c.c_locator.l_old_devid = (uint64_t)(uintptr_t)Malloc(old_devid_sz);
3292 	(void) memcpy((void *)(uintptr_t)c.c_locator.l_old_devid,
3293 	    old_devid, old_devid_sz);
3294 	c.c_locator.l_old_devid_sz = old_devid_sz;
3295 	if (metaioctl(MD_IOCUPDATE_NM_RR_DID, &c, &c.c_mde, NULL) != 0) {
3296 		(void) mdstealerror(ep, &c.c_mde);
3297 	}
3298 	Free((void *)(uintptr_t)c.c_locator.l_devid);
3299 	Free((void *)(uintptr_t)c.c_locator.l_old_devid);
3300 }
3301 
3302 /*
3303  * meta_imp_update_nm
3304  *
3305  * Change a devid stored in the diskset namespace with the new devid.
3306  * This routine is called during the import of a remotely replicated diskset.
3307  *
3308  * Returns : nothing (void)
3309  */
3310 void
meta_imp_update_nm(mdsetname_t * sp,md_im_set_desc_t * misp,md_error_t * ep)3311 meta_imp_update_nm(mdsetname_t *sp, md_im_set_desc_t *misp, md_error_t *ep)
3312 {
3313 	md_im_drive_info_t	*midp;
3314 
3315 	for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
3316 		/*
3317 		 * If disk isn't available we can't update, so go to next
3318 		 */
3319 		if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
3320 			continue;
3321 		}
3322 
3323 		meta_update_nm_rr_did(sp, midp->mid_o_devid,
3324 		    midp->mid_o_devid_sz, midp->mid_devid,
3325 		    midp->mid_devid_sz, 1, ep);
3326 		if (!mdisok(ep))
3327 			return;
3328 	}
3329 }
3330 
3331 /*
3332  * meta_unrslv_replicated_nm
3333  *
3334  * Change a devid stored in the diskset namespace and in the local set
3335  * namespace with the new devid.
3336  *
3337  * This routine is called during the take of a diskset that has
3338  * some unresolved replicated drives.
3339  *
3340  * Returns : nothing (void)
3341  */
3342 void
meta_unrslv_replicated_nm(mdsetname_t * sp,md_drive_desc * dd,mddrivenamelist_t * dnlp,md_error_t * ep)3343 meta_unrslv_replicated_nm(
3344 	mdsetname_t		*sp,
3345 	md_drive_desc		*dd,	/* drive list for diskset */
3346 	mddrivenamelist_t	*dnlp,	/* list of drives on current system */
3347 	md_error_t		*ep
3348 )
3349 {
3350 	md_drive_desc		*d = NULL;
3351 	mddrivename_t		*dnp;	/* drive name of old drive */
3352 	mddrivename_t		*dnp_new; /* drive name of new (repl) drive */
3353 	ddi_devid_t		devidp, new_devidp;
3354 	ddi_devid_t		old_devid;
3355 	char			*devid_old_save;
3356 	mdsetname_t		*local_sp = NULL;
3357 	int			myside;
3358 
3359 	if ((myside = getmyside(sp, ep)) == MD_SIDEWILD)
3360 		return;
3361 
3362 	for (d = dd; d != NULL; d = d->dd_next) {
3363 		dnp = d->dd_dnp;
3364 		if (dnp == NULL)
3365 			continue;
3366 
3367 		/* If don't need to update namespace - skip it. */
3368 		if (!(d->dd_flags & MD_DR_FIX_LB_NM_DID))
3369 			continue;
3370 
3371 		/* Get old devid from drive record */
3372 		(void) devid_str_decode(d->dd_dnp->devid,
3373 		    &old_devid, NULL);
3374 
3375 		/*
3376 		 * Get old and replicated (new) devids associated with this
3377 		 * drive.  Also, get the new (replicated) drivename structure.
3378 		 */
3379 		if (meta_unrslv_replicated_common(myside, d, dnlp, &devidp,
3380 		    &new_devidp, &dnp_new, ep) != 0) {
3381 			mdclrerror(ep);
3382 			continue;
3383 		}
3384 
3385 		if (new_devidp) {
3386 			meta_update_nm_rr_did(sp, devidp,
3387 			    devid_sizeof(devidp), new_devidp,
3388 			    devid_sizeof(new_devidp), 0, ep);
3389 			if (!mdisok(ep)) {
3390 				devid_free(devidp);
3391 				return;
3392 			}
3393 		}
3394 		devid_free(devidp);
3395 
3396 		/*
3397 		 * Using the new devid, fix up the name.
3398 		 * If meta_upd_ctdnames fails, the next take will re-resolve
3399 		 * the name from the new devid.
3400 		 */
3401 		local_sp = metasetname(MD_LOCAL_NAME, ep);
3402 		devid_old_save = dnp->devid;
3403 		dnp->devid = dnp_new->devid;
3404 		(void) meta_upd_ctdnames(&local_sp, 0, (myside + SKEW),
3405 			dnp, NULL, ep);
3406 		mdclrerror(ep);
3407 		dnp->devid = devid_old_save;
3408 	}
3409 }
3410 
3411 static set_t
meta_imp_setno(md_error_t * ep)3412 meta_imp_setno(
3413 	md_error_t *ep
3414 )
3415 {
3416 	set_t	max_sets, setno;
3417 	int	bool;
3418 
3419 	if ((max_sets = get_max_sets(ep)) == 0) {
3420 		return (MD_SET_BAD);
3421 	}
3422 
3423 	/*
3424 	 * This code needs to be expanded when we run in SunCluster
3425 	 * environment SunCluster obtains setno internally
3426 	 */
3427 	for (setno = 1; setno < max_sets; setno++) {
3428 		if (clnt_setnumbusy(mynode(), setno,
3429 			&bool, ep) == -1) {
3430 			setno = MD_SET_BAD;
3431 			break;
3432 		}
3433 		/*
3434 		 * found one available
3435 		 */
3436 		if (bool == FALSE)
3437 			break;
3438 	}
3439 
3440 	if (setno == max_sets) {
3441 		setno = MD_SET_BAD;
3442 	}
3443 
3444 	return (setno);
3445 }
3446 
3447 int
meta_imp_set(md_im_set_desc_t * misp,char * setname,int force,bool_t dry_run,md_error_t * ep)3448 meta_imp_set(
3449 	md_im_set_desc_t *misp,
3450 	char		*setname,
3451 	int		force,
3452 	bool_t		dry_run,
3453 	md_error_t	*ep
3454 )
3455 {
3456 	md_timeval32_t		tp;
3457 	md_im_drive_info_t	*midp;
3458 	uint_t			rep_slice;
3459 	mddrivename_t		*dnp;
3460 	struct mddb_config	c;
3461 	mdname_t		*np;
3462 	md_im_replica_info_t	*mirp;
3463 	set_t			setno;
3464 	mdcinfo_t		*cinfo;
3465 	mdsetname_t		*sp;
3466 	mddrivenamelist_t	*dnlp = NULL;
3467 	mddrivenamelist_t	**dnlpp = &dnlp;
3468 	char			*minor_name = NULL;
3469 	int			stale_flag = 0;
3470 	md_set_desc		*sd;
3471 	int			partial_replicated_flag = 0;
3472 	md_error_t		xep = mdnullerror;
3473 	md_setkey_t		*cl_sk;
3474 
3475 	(void) memset(&c, 0, sizeof (c));
3476 	(void) strlcpy(c.c_setname, setname, sizeof (c.c_setname));
3477 	c.c_sideno = 0;
3478 	c.c_flags = MDDB_C_IMPORT;
3479 
3480 	/*
3481 	 * Check to see if the setname that the set is being imported into,
3482 	 * already exists.
3483 	 */
3484 	if (getsetbyname(c.c_setname, ep) != NULL) {
3485 		return (mddserror(ep, MDE_DS_SETNAMEBUSY, MD_SET_BAD,
3486 		    mynode(), NULL, c.c_setname));
3487 	}
3488 
3489 	/*
3490 	 * Find the next available set number
3491 	 */
3492 	if ((setno = meta_imp_setno(ep)) == MD_SET_BAD) {
3493 		return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
3494 		    mynode(), NULL, c.c_setname));
3495 	}
3496 
3497 	c.c_setno = setno;
3498 	if (meta_gettimeofday(&tp) == -1) {
3499 		return (mdsyserror(ep, errno, NULL));
3500 	}
3501 	c.c_timestamp = tp;
3502 
3503 	/* Check to see if replica quorum requirement is fulfilled */
3504 	if (meta_replica_quorum(misp) == -1) {
3505 		if (!force) {
3506 			return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD,
3507 			    mynode(), NULL, c.c_setname));
3508 		} else {
3509 			stale_flag = MD_IMP_STALE_SET;
3510 			/*
3511 			 * If we have a stale diskset, the kernel will
3512 			 * delete the replicas on the unavailable disks.
3513 			 * To be consistent, we'll zero out the mirp on those
3514 			 * disks here.
3515 			 */
3516 			for (midp = misp->mis_drives; midp != NULL;
3517 			    midp = midp->mid_next) {
3518 				if (midp->mid_available ==
3519 				    MD_IM_DISK_NOT_AVAILABLE) {
3520 					midp->mid_replicas = NULL;
3521 				}
3522 			}
3523 		}
3524 	}
3525 
3526 	for (midp = misp->mis_drives; midp != NULL;
3527 		midp = midp->mid_next) {
3528 
3529 		if ((misp->mis_flags & MD_IM_SET_REPLICATED) &&
3530 		    (partial_replicated_flag == 0) &&
3531 		    (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE))
3532 			partial_replicated_flag = MD_SR_UNRSLV_REPLICATED;
3533 
3534 		/*
3535 		 * We pass the list of the drives in the
3536 		 * set with replicas on them down to the kernel.
3537 		 */
3538 		dnp = midp->mid_dnp;
3539 		mirp = midp->mid_replicas;
3540 		if (!mirp) {
3541 			/*
3542 			 * No replicas on this disk, go to next disk.
3543 			 */
3544 			continue;
3545 		}
3546 
3547 		if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) {
3548 			/*
3549 			 * The disk isn't there. We'll need to get the
3550 			 * disk information from the midp list instead
3551 			 * of going and looking for it. This means it
3552 			 * will be information relative to the old
3553 			 * system.
3554 			 */
3555 			minor_name = Strdup(midp->mid_minor_name);
3556 			(void) strncpy(c.c_locator.l_driver,
3557 			    midp->mid_driver_name,
3558 			    sizeof (c.c_locator.l_driver));
3559 			(void) strcpy(c.c_locator.l_devname, midp->mid_devname);
3560 			c.c_locator.l_mnum = midp->mid_mnum;
3561 
3562 		} else {
3563 			if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
3564 			    ((np = metaslicename(dnp, rep_slice, ep))
3565 			    == NULL)) {
3566 				mdclrerror(ep);
3567 				continue;
3568 			}
3569 			(void) strcpy(c.c_locator.l_devname, np->bname);
3570 			c.c_locator.l_dev = meta_cmpldev(np->dev);
3571 			c.c_locator.l_mnum = meta_getminor(np->dev);
3572 			minor_name = meta_getminor_name(np->bname, ep);
3573 			if ((cinfo = metagetcinfo(np, ep)) == NULL) {
3574 				mdclrerror(ep);
3575 				continue;
3576 			}
3577 
3578 			if (cinfo->dname) {
3579 				(void) strncpy(c.c_locator.l_driver,
3580 				    cinfo->dname,
3581 				    sizeof (c.c_locator.l_driver));
3582 			}
3583 		}
3584 
3585 		c.c_locator.l_devid = (uintptr_t)Malloc(midp->mid_devid_sz);
3586 		(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
3587 		    midp->mid_devid, midp->mid_devid_sz);
3588 		c.c_locator.l_devid_sz = midp->mid_devid_sz;
3589 		c.c_locator.l_devid_flags =
3590 		    MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
3591 		if (midp->mid_o_devid) {
3592 			c.c_locator.l_old_devid =
3593 			    (uint64_t)(uintptr_t)Malloc(midp->mid_o_devid_sz);
3594 			(void) memcpy((void *)(uintptr_t)
3595 			    c.c_locator.l_old_devid,
3596 			    midp->mid_o_devid, midp->mid_o_devid_sz);
3597 			c.c_locator.l_old_devid_sz = midp->mid_o_devid_sz;
3598 		}
3599 		if (minor_name) {
3600 			(void) strncpy(c.c_locator.l_minor_name, minor_name,
3601 			    sizeof (c.c_locator.l_minor_name));
3602 		}
3603 
3604 		do {
3605 			c.c_locator.l_flags = 0;
3606 			c.c_locator.l_blkno = mirp->mir_offset;
3607 			if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
3608 				Free((void *)(uintptr_t)c.c_locator.l_devid);
3609 				if (c.c_locator.l_old_devid)
3610 					Free((void *)(uintptr_t)
3611 					    c.c_locator.l_old_devid);
3612 				return (mdstealerror(ep, &c.c_mde));
3613 			}
3614 			mirp = mirp->mir_next;
3615 		} while (mirp != NULL);
3616 	}
3617 
3618 	/*
3619 	 * If the dry run option was specified, flag success
3620 	 * and exit out
3621 	 */
3622 	if (dry_run == 1) {
3623 		md_eprintf("%s\n", dgettext(TEXT_DOMAIN,
3624 		    "import should be successful"));
3625 		Free((void *)(uintptr_t)c.c_locator.l_devid);
3626 		if (c.c_locator.l_old_devid)
3627 			Free((void *)(uintptr_t)c.c_locator.l_old_devid);
3628 		return (0);
3629 	}
3630 
3631 	/*
3632 	 * Now the kernel should have all the information
3633 	 * regarding the import diskset replica.
3634 	 * Tell the kernel to load them up and import the set
3635 	 */
3636 	(void) memset(&c, 0, sizeof (c));
3637 	c.c_flags = stale_flag;
3638 	c.c_setno = setno;
3639 	if (metaioctl(MD_IOCIMP_LOAD, &c, &c.c_mde, NULL) != 0) {
3640 		Free((void *)(uintptr_t)c.c_locator.l_devid);
3641 		if (c.c_locator.l_old_devid)
3642 			Free((void *)(uintptr_t)c.c_locator.l_old_devid);
3643 		return (mdstealerror(ep, &c.c_mde));
3644 	}
3645 
3646 	(void) meta_smf_enable(META_SMF_DISKSET, NULL);
3647 
3648 	/*
3649 	 * Create a set name for the set.
3650 	 */
3651 	sp = Zalloc(sizeof (*sp));
3652 	sp->setname = Strdup(setname);
3653 	sp->lockfd = MD_NO_LOCK;
3654 	sp->setno = setno;
3655 	sd = Zalloc(sizeof (*sd));
3656 	(void) strcpy(sd->sd_nodes[0], mynode());
3657 	sd->sd_ctime = tp;
3658 	sd->sd_genid = 0;
3659 
3660 	if (misp->mis_flags & MD_IM_SET_REPLICATED) {
3661 		/* Update the diskset namespace */
3662 		meta_imp_update_nm(sp, misp, ep);
3663 
3664 		/* Release the diskset - even if update_nm failed */
3665 		(void) memset(&c, 0, sizeof (c));
3666 		c.c_setno = setno;
3667 		/* Don't need device id information from this ioctl */
3668 		c.c_locator.l_devid = (uint64_t)0;
3669 		c.c_locator.l_devid_flags = 0;
3670 		if (metaioctl(MD_RELEASE_SET, &c, &c.c_mde, NULL) != 0) {
3671 			if (mdisok(ep))
3672 				(void) mdstealerror(ep, &c.c_mde);
3673 			Free(sd);
3674 			Free(sp);
3675 			return (-1);
3676 		}
3677 
3678 		/* If update_nm failed, then fail the import. */
3679 		if (!mdisok(ep)) {
3680 			Free(sd);
3681 			Free(sp);
3682 			return (-1);
3683 		}
3684 	}
3685 
3686 	/*
3687 	 * We'll need to update information in the master block due
3688 	 * to the set number changing and if the case of a replicated
3689 	 * diskset, the device id changing. May also need to create a
3690 	 * dummy master block if it's not there.
3691 	 */
3692 	meta_imp_update_mb(sp, misp, ep);
3693 	if (!mdisok(ep)) {
3694 		Free(sd);
3695 		Free(sp);
3696 		return (-1);
3697 	}
3698 
3699 	/*
3700 	 * Create set record for diskset, but record is left in
3701 	 * MD_SR_ADD state until after drives are added to set.
3702 	 */
3703 	if (clnt_lock_set(mynode(), sp, ep)) {
3704 		Free(sd);
3705 		Free(sp);
3706 		return (-1);
3707 	}
3708 
3709 	if (clnt_createset(mynode(), sp, sd->sd_nodes,
3710 	    sd->sd_ctime, sd->sd_genid, ep)) {
3711 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
3712 		(void) clnt_unlock_set(mynode(), cl_sk, &xep);
3713 		Free(sd);
3714 		Free(sp);
3715 		return (-1);
3716 	}
3717 
3718 	Free(sd);
3719 
3720 	/*
3721 	 * Create drive records for the disks in the set.
3722 	 */
3723 	for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
3724 		dnp = midp->mid_dnp;
3725 		if (midp->mid_available & MD_IM_DISK_NOT_AVAILABLE) {
3726 			/*
3727 			 * If the disk isn't available, the dnp->devid is
3728 			 * no good. It is either blank for the case where
3729 			 * there is no disk with that devname, or it
3730 			 * contains the devid for the real disk in the system
3731 			 * with that name. The problem is, if the disk is
3732 			 * unavailable, then the devid should be the devid
3733 			 * of the missing disk. So we're faking a dnp for
3734 			 * the import. This is needed for creating drive
3735 			 * records.
3736 			 */
3737 			dnp = Zalloc(sizeof (mddrivename_t));
3738 			dnp->side_names_key = midp->mid_dnp->side_names_key;
3739 			dnp->type = midp->mid_dnp->type;
3740 			dnp->cname = Strdup(midp->mid_dnp->cname);
3741 			dnp->rname = Strdup(midp->mid_dnp->rname);
3742 			dnp->devid = devid_str_encode(midp->mid_devid,
3743 			    NULL);
3744 			midp->mid_dnp = dnp;
3745 		}
3746 		dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
3747 	}
3748 
3749 	if (meta_imp_set_adddrives(sp, dnlp, misp, ep)) {
3750 		Free(sp);
3751 		return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
3752 		    mynode(), NULL, c.c_setname));
3753 	}
3754 
3755 	/* If drives were added without error, set set_record to OK */
3756 	if (clnt_upd_sr_flags(mynode(), sp,
3757 	    (partial_replicated_flag | MD_SR_OK | MD_SR_MB_DEVID), ep)) {
3758 		Free(sp);
3759 		return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
3760 		    mynode(), NULL, c.c_setname));
3761 	}
3762 
3763 	Free(sp);
3764 
3765 	cl_sk = cl_get_setkey(sp->setno, sp->setname);
3766 	if (clnt_unlock_set(mynode(), cl_sk, ep)) {
3767 		return (-1);
3768 	}
3769 	cl_set_setkey(NULL);
3770 
3771 	Free((void *)(uintptr_t)c.c_locator.l_devid);
3772 	if (c.c_locator.l_old_devid)
3773 		Free((void *)(uintptr_t)c.c_locator.l_old_devid);
3774 	return (0);
3775 }
3776