xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c (revision a8e9a76e00d1b97ce5e2931d804c43c146eb1eed)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <dlfcn.h>
27 #include <meta.h>
28 #include <metadyn.h>
29 #include <ctype.h>
30 #include <dirent.h>
31 #include <devid.h>
32 #include <sys/param.h>
33 #include <sys/scsi/impl/uscsi.h>
34 #include <sys/scsi/generic/commands.h>
35 #include <sys/scsi/generic/inquiry.h>
36 #include <sys/efi_partition.h>
37 
38 typedef struct ctlr_cache {
39 	char			*ctlr_nm;
40 	int			ctlr_ty;
41 	struct	ctlr_cache	*ctlr_nx;
42 } ctlr_cache_t;
43 
44 static	ctlr_cache_t	*ctlr_cache = NULL;
45 
46 
47 /*
48  * return set for a device
49  */
50 mdsetname_t *
51 metagetset(
52 	mdname_t	*np,
53 	int		bypass_daemon,
54 	md_error_t	*ep
55 )
56 {
57 	mdsetname_t	*sp;
58 
59 	/* metadevice */
60 	if (metaismeta(np))
61 		return (metasetnosetname(MD_MIN2SET(meta_getminor(np->dev)),
62 		    ep));
63 
64 	/* regular device */
65 	if (meta_is_drive_in_anyset(np->drivenamep, &sp, bypass_daemon,
66 	    ep) != 0)
67 		return (NULL);
68 
69 	if (sp != NULL)
70 		return (sp);
71 
72 	return (metasetnosetname(MD_LOCAL_SET, ep));
73 }
74 
75 /*
76  * convert system to md types
77  */
78 static void
79 meta_geom_to_md(
80 	struct dk_geom	*gp,
81 	mdgeom_t	*mdgp
82 )
83 {
84 	(void) memset(mdgp, '\0', sizeof (*mdgp));
85 	mdgp->ncyl = gp->dkg_ncyl;
86 	mdgp->nhead = gp->dkg_nhead;
87 	mdgp->nsect = gp->dkg_nsect;
88 	mdgp->rpm = gp->dkg_rpm;
89 	mdgp->write_reinstruct = gp->dkg_write_reinstruct;
90 	mdgp->read_reinstruct = gp->dkg_read_reinstruct;
91 	mdgp->blk_sz = DEV_BSIZE;
92 }
93 
94 /*
95  * convert efi to md types
96  */
97 static void
98 meta_efi_to_mdgeom(md_unit_t *mdev, struct dk_gpt *gpt, mdgeom_t *mdgp)
99 {
100 	(void) memset(mdgp, '\0', sizeof (*mdgp));
101 
102 	/*
103 	 * Should always get geom from metadevice unit if metadevice.
104 	 * If metadevice is built on top of efi disks then it will
105 	 * have MD_EFI_FG_ values, otherwise it will have geom from
106 	 * the first component.
107 	 */
108 	if (mdev) {
109 		mdgp->ncyl = (mdev->c.un_total_blocks) /
110 		    (mdev->c.un_nhead * mdev->c.un_nsect);
111 		mdgp->nhead = mdev->c.un_nhead;
112 		mdgp->nsect = mdev->c.un_nsect;
113 	} else {
114 		mdgp->ncyl = (gpt->efi_last_u_lba - gpt->efi_first_u_lba)
115 		    / (MD_EFI_FG_HEADS * MD_EFI_FG_SECTORS);
116 		mdgp->nhead = MD_EFI_FG_HEADS;
117 		mdgp->nsect = MD_EFI_FG_SECTORS;
118 	}
119 
120 	mdgp->rpm = MD_EFI_FG_RPM;
121 	mdgp->write_reinstruct = MD_EFI_FG_WRI;
122 	mdgp->read_reinstruct = MD_EFI_FG_RRI;
123 	mdgp->blk_sz = DEV_BSIZE;
124 }
125 
126 static void
127 meta_efi_to_mdvtoc(struct dk_gpt *gpt, mdvtoc_t *mdvp)
128 {
129 	char		typename[EFI_PART_NAME_LEN];
130 	uint_t		i;
131 
132 	(void) memset(mdvp, '\0', sizeof (*mdvp));
133 	mdvp->nparts = gpt->efi_nparts;
134 	if (mdvp->nparts > MD_MAX_PARTS)
135 		return;
136 
137 	mdvp->first_lba = gpt->efi_first_u_lba;
138 	mdvp->last_lba = gpt->efi_last_u_lba;
139 	mdvp->lbasize = gpt->efi_lbasize;
140 
141 	for (i = 0; (i < gpt->efi_nparts); ++i) {
142 		mdvp->parts[i].start = gpt->efi_parts[i].p_start;
143 		mdvp->parts[i].size = gpt->efi_parts[i].p_size;
144 		mdvp->parts[i].tag = gpt->efi_parts[i].p_tag;
145 		mdvp->parts[i].flag = gpt->efi_parts[i].p_flag;
146 		/*
147 		 * It is possible to present an efi label but be using vtoc
148 		 * disks to create a > 1 TB metadevice.  In case the first
149 		 * disk in the underlying metadevice is a vtoc disk and starts
150 		 * at the beginning of the disk it is necessary to convey this
151 		 * information to the user.
152 		 */
153 		if (mdvp->parts[i].size > 0 &&
154 		    mdvp->parts[i].start != 0 && mdvp->nparts == 1) {
155 			mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
156 			mdvp->parts[i].start = 0;
157 		}
158 
159 		/*
160 		 * Due to the lack of a label for the entire partition table,
161 		 * we use p_name of the reserved partition
162 		 */
163 		if ((gpt->efi_parts[i].p_tag == V_RESERVED) &&
164 		    (gpt->efi_parts[i].p_name != NULL)) {
165 			(void) strlcpy(typename, gpt->efi_parts[i].p_name,
166 			    EFI_PART_NAME_LEN);
167 			/* Stop at first (if any) space or tab */
168 			(void) strtok(typename, " \t");
169 			mdvp->typename = Strdup(typename);
170 		}
171 	}
172 }
173 
174 static void
175 meta_mdvtoc_to_efi(mdvtoc_t *mdvp, struct dk_gpt **gpt)
176 {
177 	uint_t		i;
178 	uint_t		lastpart;
179 	size_t		size;
180 
181 	/* first we count how many partitions we have to send */
182 	for (i = 0; i < MD_MAX_PARTS; i++) {
183 		if ((mdvp->parts[i].start == 0) &&
184 		    (mdvp->parts[i].size == 0) &&
185 		    (mdvp->parts[i].tag != V_RESERVED)) {
186 			continue;
187 		}
188 		/* if we are here, we know the partition is really used */
189 		lastpart = i;
190 	}
191 	size = sizeof (struct dk_gpt) + (sizeof (struct dk_part) * lastpart);
192 	*gpt = calloc(size, sizeof (char));
193 
194 	(*gpt)->efi_nparts = lastpart + 1;
195 	(*gpt)->efi_first_u_lba = mdvp->first_lba;
196 	(*gpt)->efi_last_u_lba = mdvp->last_lba;
197 	(*gpt)->efi_lbasize = mdvp->lbasize;
198 	for (i = 0; (i < (*gpt)->efi_nparts); ++i) {
199 		(*gpt)->efi_parts[i].p_start = mdvp->parts[i].start;
200 		(*gpt)->efi_parts[i].p_size = mdvp->parts[i].size;
201 		(*gpt)->efi_parts[i].p_tag = mdvp->parts[i].tag;
202 		(*gpt)->efi_parts[i].p_flag = mdvp->parts[i].flag;
203 		/*
204 		 * Due to the lack of a label for the entire partition table,
205 		 * we use p_name of the reserved partition
206 		 */
207 		if (((*gpt)->efi_parts[i].p_tag == V_RESERVED) &&
208 		    (mdvp->typename != NULL)) {
209 			(void) strlcpy((*gpt)->efi_parts[i].p_name,
210 			    mdvp->typename, EFI_PART_NAME_LEN);
211 		}
212 	}
213 }
214 
215 
216 void
217 ctlr_cache_add(char *nm, int ty)
218 {
219 	ctlr_cache_t	**ccpp;
220 
221 	for (ccpp = &ctlr_cache; *ccpp != NULL; ccpp = &(*ccpp)->ctlr_nx)
222 		if (strcmp((*ccpp)->ctlr_nm, nm) == 0)
223 			return;
224 
225 	*ccpp = Zalloc(sizeof (ctlr_cache_t));
226 	(*ccpp)->ctlr_nm = Strdup(nm);
227 	(*ccpp)->ctlr_ty = ty;
228 }
229 
230 int
231 ctlr_cache_look(char *nm)
232 {
233 	ctlr_cache_t	*tcp;
234 
235 	for (tcp = ctlr_cache; tcp != NULL; tcp = tcp->ctlr_nx)
236 		if (strcmp(tcp->ctlr_nm, nm) == 0)
237 			return (tcp->ctlr_ty);
238 
239 	return (-1);
240 }
241 
242 
243 void
244 metaflushctlrcache(void)
245 {
246 	ctlr_cache_t	*cp, *np;
247 
248 	for (cp = ctlr_cache, np = NULL; cp != NULL; cp = np) {
249 		np = cp->ctlr_nx;
250 		Free(cp->ctlr_nm);
251 		Free(cp);
252 	}
253 	ctlr_cache = NULL;
254 }
255 
256 /*
257  * getdrvnode -- return the driver name based on mdname_t->bname
258  *	Need to free pointer when finished.
259  */
260 char *
261 getdrvnode(mdname_t *np, md_error_t *ep)
262 {
263 	char	*devicespath;
264 	char	*drvnode;
265 	char	*cp;
266 
267 	if ((devicespath = metagetdevicesname(np, ep)) == NULL)
268 		return (NULL);
269 
270 	/*
271 	 * At this point devicespath should be like the following
272 	 * "/devices/<unknow_and_dont_care>/xxxx@vvvv"
273 	 *
274 	 * There's a couple of 'if' statements below which could
275 	 * return an error condition, but I've decide to allow
276 	 * a more open approach regarding the mapping so as to
277 	 * not restrict possible future projects.
278 	 */
279 	if (drvnode = strrchr(devicespath, '/'))
280 		/*
281 		 * drvnode now just "xxxx@vvvv"
282 		 */
283 		drvnode++;
284 
285 	if (cp = strrchr(drvnode, '@'))
286 		/*
287 		 * Now drvnode is just the driver name "xxxx"
288 		 */
289 		*cp = '\0';
290 
291 	cp = Strdup(drvnode);
292 	Free(devicespath);
293 	np->devicesname = NULL;
294 
295 	return (cp);
296 }
297 
298 /*
299  * meta_load_dl -- open dynamic library using LDLIBRARYPATH, a debug
300  *    environment variable METALDPATH, or the default location.
301  */
302 static void *
303 meta_load_dl(mdname_t *np, md_error_t *ep)
304 {
305 	char	*drvnode;
306 	char	newpath[MAXPATHLEN];
307 	char	*p;
308 	void	*cookie;
309 
310 	if ((drvnode = getdrvnode(np, ep)) != NULL) {
311 
312 		/*
313 		 * Library seach algorithm:
314 		 * 1) Use LDLIBRARYPATH which is implied when a non-absolute
315 		 *    path name is passed to dlopen()
316 		 * 2) Use the value of METALDPATH as the directory. Mainly
317 		 *    used for debugging
318 		 * 3) Last search the default location of "/usr/lib"
319 		 */
320 		(void) snprintf(newpath, sizeof (newpath), "lib%s.so.1",
321 		    drvnode);
322 		if ((cookie = dlopen(newpath, RTLD_LAZY)) == NULL) {
323 			if ((p = getenv("METALDPATH")) == NULL)
324 				p = METALDPATH_DEFAULT;
325 			(void) snprintf(newpath, sizeof (newpath),
326 			    "%s/lib%s.so.1", p, drvnode);
327 			Free(drvnode);
328 			if ((cookie = dlopen(newpath, RTLD_LAZY)) != NULL) {
329 				/*
330 				 * Common failure here would be failing to
331 				 * find a libXX.so.1 such as libsd.so.1
332 				 * Some controllers will not have a library
333 				 * because there's no enclosure or name
334 				 * translation required.
335 				 */
336 				return (cookie);
337 			}
338 		} else {
339 			Free(drvnode);
340 			return (cookie);
341 		}
342 	}
343 	return (NULL);
344 }
345 
346 /*
347  * meta_match_names -- possibly convert the driver names returned by CINFO
348  */
349 static void
350 meta_match_names(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
351     md_error_t *ep)
352 {
353 	void		*cookie;
354 	meta_convert_e	((*fptr)(mdname_t *, struct dk_cinfo *, mdcinfo_t *,
355 	    md_error_t *));
356 
357 	if ((cookie = meta_load_dl(np, ep)) != NULL) {
358 		fptr = (meta_convert_e (*)(mdname_t *, struct dk_cinfo *,
359 		    mdcinfo_t *, md_error_t *))dlsym(cookie, "convert_path");
360 		if (fptr != NULL)
361 			(void) (*fptr)(np, cp, mdcp, ep);
362 		(void) dlclose(cookie);
363 	}
364 }
365 
366 /*
367  * meta_match_enclosure -- return any enclosure info if found
368  */
369 int
370 meta_match_enclosure(mdname_t *np, mdcinfo_t *mdcp, md_error_t *ep)
371 {
372 	meta_enclosure_e	e;
373 	meta_enclosure_e	((*fptr)(mdname_t *, mdcinfo_t *,
374 	    md_error_t *));
375 	void			*cookie;
376 
377 	if ((cookie = meta_load_dl(np, ep)) != NULL) {
378 		fptr = (meta_enclosure_e (*)(mdname_t *, mdcinfo_t *,
379 		    md_error_t *))dlsym(cookie, "get_enclosure");
380 		if (fptr != NULL) {
381 			e = (*fptr)(np, mdcp, ep);
382 			switch (e) {
383 			case Enclosure_Error:
384 				/*
385 				 * Looks like this library wanted to handle
386 				 * our device and had an internal error.
387 				 */
388 				return (1);
389 
390 			case Enclosure_Okay:
391 				/*
392 				 * Found a library to handle the request so
393 				 * just return with data provided.
394 				 */
395 				return (0);
396 
397 			case Enclosure_Noop:
398 				/*
399 				 * Need to continue the search
400 				 */
401 				break;
402 			}
403 		}
404 		(void) dlclose(cookie);
405 	}
406 	return (0);
407 }
408 
409 static int
410 meta_cinfo_to_md(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
411     md_error_t *ep)
412 {
413 	/* default */
414 	(void) memset(mdcp, '\0', sizeof (*mdcp));
415 	(void) strncpy(mdcp->cname, cp->dki_cname,
416 	    min((sizeof (mdcp->cname) - 1), sizeof (cp->dki_cname)));
417 	mdcp->ctype = MHD_CTLR_GENERIC;
418 	mdcp->cnum = cp->dki_cnum;
419 	(void) strncpy(mdcp->dname, cp->dki_dname,
420 	    min((sizeof (mdcp->dname) - 1), sizeof (cp->dki_dname)));
421 	mdcp->unit = cp->dki_unit;
422 	mdcp->maxtransfer = cp->dki_maxtransfer;
423 
424 	/*
425 	 * See if the driver name returned from DKIOCINFO
426 	 * is valid or not. In somecases, such as the ap_dmd
427 	 * driver, we need to modify the name that's return
428 	 * for everything to work.
429 	 */
430 	meta_match_names(np, cp, mdcp, ep);
431 
432 	if (meta_match_enclosure(np, mdcp, ep))
433 		return (-1);
434 
435 	/* return success */
436 	return (0);
437 }
438 
439 static void
440 meta_vtoc_to_md(
441 	struct extvtoc	*vp,
442 	mdvtoc_t	*mdvp
443 )
444 {
445 	char		typename[sizeof (vp->v_asciilabel) + 1];
446 	uint_t		i;
447 
448 	(void) memset(mdvp, '\0', sizeof (*mdvp));
449 	(void) strncpy(typename, vp->v_asciilabel,
450 	    sizeof (vp->v_asciilabel));
451 	typename[sizeof (typename) - 1] = '\0';
452 	for (i = 0; ((i < sizeof (typename)) && (typename[i] != '\0')); ++i) {
453 		if ((typename[i] == ' ') || (typename[i] == '\t')) {
454 			typename[i] = '\0';
455 			break;
456 		}
457 	}
458 	mdvp->typename = Strdup(typename);
459 	mdvp->nparts = vp->v_nparts;
460 	for (i = 0; (i < vp->v_nparts); ++i) {
461 		mdvp->parts[i].start = vp->v_part[i].p_start;
462 		mdvp->parts[i].size = vp->v_part[i].p_size;
463 		mdvp->parts[i].tag = vp->v_part[i].p_tag;
464 		mdvp->parts[i].flag = vp->v_part[i].p_flag;
465 		if (vp->v_part[i].p_start == 0 && vp->v_part[i].p_size > 0)
466 			mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
467 	}
468 }
469 
470 /*
471  * free allocations in vtoc
472  */
473 void
474 metafreevtoc(
475 	mdvtoc_t	*vtocp
476 )
477 {
478 	if (vtocp->typename != NULL)
479 		Free(vtocp->typename);
480 	(void) memset(vtocp, 0, sizeof (*vtocp));
481 }
482 
483 /*
484  * return md types
485  */
486 mdvtoc_t *
487 metagetvtoc(
488 	mdname_t	*np,	/* only rname, drivenamep, are setup */
489 	int		nocache,
490 	uint_t		*partnop,
491 	md_error_t	*ep
492 )
493 {
494 	mddrivename_t	*dnp = np->drivenamep;
495 	struct dk_geom	geom;
496 	char		*minor_name = NULL;
497 	char		*rname = np->rname;
498 	int		fd;
499 	int		partno;
500 	int		err = 0;	    /* saves errno from ioctl */
501 	ddi_devid_t	devid;
502 	char		*p;
503 
504 	/* short circuit */
505 	if ((! nocache) && (dnp->vtoc.nparts != 0)) {
506 		if (partnop != NULL) {
507 			/*
508 			 * the following assigment works because the
509 			 * mdname_t structs are always created as part
510 			 * of the drivenamep struct.  When a user
511 			 * creates an mdname_t struct it either
512 			 * uses an existing drivenamep struct or creates
513 			 * a new one and then adds the mdname_t struct
514 			 * as part of its parts_val array.  So what is
515 			 * being computed below is the slice offset in
516 			 * the parts_val array.
517 			 */
518 			*partnop = np - np->drivenamep->parts.parts_val;
519 			assert(*partnop < dnp->parts.parts_len);
520 		}
521 		return (&dnp->vtoc);
522 	}
523 
524 	/* can't get vtoc */
525 	if (! nocache) {
526 		switch (dnp->type) {
527 		case MDT_ACCES:
528 		case MDT_UNKNOWN:
529 			(void) mdsyserror(ep, dnp->errnum, rname);
530 			return (NULL);
531 		}
532 	}
533 
534 	/* get all the info */
535 	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
536 		(void) mdsyserror(ep, errno, rname);
537 		return (NULL);
538 	}
539 
540 	/*
541 	 * The disk is open so this is a good point to get the devid
542 	 * otherwise it will need to be done at another time which
543 	 * means reopening it.
544 	 */
545 	if (devid_get(fd, &devid) != 0) {
546 		/* there is no devid for the disk */
547 		if (((p = getenv("MD_DEBUG")) != NULL) &&
548 		    (strstr(p, "DEVID") != NULL)) {
549 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
550 			    "%s has no device id\n"), np->rname);
551 		}
552 		np->minor_name = (char *)NULL;
553 		dnp->devid = NULL;
554 	} else {
555 		(void) devid_get_minor_name(fd, &minor_name);
556 		/*
557 		 * The minor name could be NULL if the underlying
558 		 * device driver does not support 'minor names'.
559 		 * This means we do not use devid's for this device.
560 		 * SunCluster did driver does not support minor names.
561 		 */
562 		if (minor_name != NULL) {
563 			np->minor_name = Strdup(minor_name);
564 			devid_str_free(minor_name);
565 			dnp->devid = devid_str_encode(devid, NULL);
566 		} else {
567 			np->minor_name = (char *)NULL;
568 			dnp->devid = NULL;
569 
570 			if (((p = getenv("MD_DEBUG")) != NULL) &&
571 			    (strstr(p, "DEVID") != NULL)) {
572 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
573 				    "%s no minor name (no devid)\n"),
574 				    np->rname);
575 			}
576 		}
577 		devid_free(devid);
578 	}
579 
580 	/*
581 	 * if our drivenamep points to a device not supporting DKIOCGGEOM,
582 	 * it's likely to have an EFI label.
583 	 */
584 	(void) memset(&geom, 0, sizeof (geom));
585 	if (ioctl(fd, DKIOCGGEOM, &geom) != 0) {
586 		err = errno;
587 		if (err == ENOTTY) {
588 			(void) mddeverror(ep, MDE_NOT_DISK, NODEV, rname);
589 			(void) close(fd);
590 			return (NULL);
591 		} else if (err != ENOTSUP) {
592 			(void) mdsyserror(ep, err, rname);
593 			(void) close(fd);
594 			return (NULL);
595 		}
596 
597 	}
598 	/*
599 	 * If we are here, there was either no failure on DKIOCGGEOM or
600 	 * the failure was ENOTSUP
601 	 */
602 	if (err == ENOTSUP) {
603 		/* DKIOCGGEOM yielded ENOTSUP => try efi_alloc_and_read */
604 		struct dk_gpt	*gpt;
605 		int		save_errno;
606 		md_unit_t	*mdev = NULL;
607 		mdsetname_t	*sp = NULL;
608 
609 		/* this also sets errno */
610 		partno = efi_alloc_and_read(fd, &gpt);
611 		save_errno = errno;
612 		(void) close(fd);
613 		if (partno < 0) {
614 			efi_free(gpt);
615 			(void) mdsyserror(ep, save_errno, rname);
616 			return (NULL);
617 		}
618 		if (partno >= gpt->efi_nparts) {
619 			efi_free(gpt);
620 			(void) mddeverror(ep, MDE_INVALID_PART, NODEV64,
621 			    rname);
622 			return (NULL);
623 		}
624 
625 		/* convert to our format */
626 		metafreevtoc(&dnp->vtoc);
627 		meta_efi_to_mdvtoc(gpt, &dnp->vtoc);
628 		if (dnp->vtoc.nparts > MD_MAX_PARTS) {
629 			efi_free(gpt);
630 			(void) mddeverror(ep, MDE_TOO_MANY_PARTS, NODEV64,
631 			    rname);
632 			return (NULL);
633 		}
634 		/*
635 		 * libmeta needs at least V_NUMPAR partitions.
636 		 * If we have an EFI partition with less than V_NUMPAR slices,
637 		 * we nevertheless reserve space for V_NUMPAR
638 		 */
639 
640 		if (dnp->vtoc.nparts < V_NUMPAR) {
641 			dnp->vtoc.nparts = V_NUMPAR;
642 		}
643 
644 		/*
645 		 * Is np a metadevice?
646 		 */
647 		if (metaismeta(np)) {
648 			sp = metasetnosetname(MD_MIN2SET(meta_getminor
649 			    (np->dev)), ep);
650 			if (!sp || (mdev = meta_get_mdunit(sp, np, ep)) ==
651 			    NULL) {
652 				efi_free(gpt);
653 				(void) mddeverror(ep, MDE_NOT_META,
654 				    NODEV64, rname);
655 				return (NULL);
656 			}
657 		}
658 
659 		meta_efi_to_mdgeom(mdev, gpt, &dnp->geom);
660 		Free(mdev);
661 		efi_free(gpt);
662 	} else {
663 		/* no error on DKIOCGGEOM, try meta_getvtoc */
664 		struct extvtoc	vtoc;
665 
666 		if (meta_getvtoc(fd, np->cname, &vtoc, &partno, ep) < 0) {
667 			(void) close(fd);
668 			return (NULL);
669 		}
670 		(void) close(fd);
671 
672 		/* convert to our format */
673 		meta_geom_to_md(&geom, &dnp->geom);
674 		metafreevtoc(&dnp->vtoc);
675 		meta_vtoc_to_md(&vtoc, &dnp->vtoc);
676 	}
677 
678 	/* fix up any drives which are now accessible */
679 	if ((nocache) && (dnp->type == MDT_ACCES) &&
680 	    (dnp->vtoc.nparts == dnp->parts.parts_len)) {
681 		dnp->type = MDT_COMP;
682 		dnp->errnum = 0;
683 	}
684 
685 	/* save partno */
686 	assert(partno < dnp->vtoc.nparts);
687 	if (partnop != NULL)
688 		*partnop = partno;
689 
690 	/* return info */
691 	return (&dnp->vtoc);
692 }
693 
694 static void
695 meta_mdvtoc_to_vtoc(
696 	mdvtoc_t	*mdvp,
697 	struct extvtoc	*vp
698 )
699 {
700 	uint_t		i;
701 
702 	(void) memset(&vp->v_part, '\0', sizeof (vp->v_part));
703 	vp->v_nparts = (ushort_t)mdvp->nparts;
704 	for (i = 0; (i < mdvp->nparts); ++i) {
705 		vp->v_part[i].p_start = mdvp->parts[i].start;
706 		vp->v_part[i].p_size  = mdvp->parts[i].size;
707 		vp->v_part[i].p_tag   = mdvp->parts[i].tag;
708 		vp->v_part[i].p_flag  = mdvp->parts[i].flag;
709 	}
710 }
711 
712 /*
713  * Set the vtoc, but use the cached copy to get the info from.
714  * We write np->drivenamep->vtoc to disk.
715  * Before we can do this we read the vtoc in.
716  * if we're dealing with a metadevice and this metadevice is a 64 bit device
717  *	we can use meta_getmdvtoc/meta_setmdvtoc
718  * else
719  * 	we use meta_getvtoc/meta_setvtoc but than we first have to convert
720  *	dnp->vtoc (actually being a mdvtoc_t) into a vtoc_t
721  */
722 int
723 metasetvtoc(
724 	mdname_t	*np,
725 	md_error_t	*ep
726 )
727 {
728 	char		*rname = np->rname;
729 	mddrivename_t	*dnp = np->drivenamep;
730 	int		fd;
731 	int		err;
732 	int 		save_errno;
733 	struct dk_geom	geom;
734 
735 	if ((fd = open(rname, (O_RDONLY | O_NDELAY), 0)) < 0)
736 		return (mdsyserror(ep, errno, rname));
737 
738 	err = ioctl(fd, DKIOCGGEOM, &geom);
739 	save_errno = errno;
740 	if (err == 0) {
741 		struct extvtoc	vtoc;
742 
743 		if (meta_getvtoc(fd, np->cname, &vtoc, NULL, ep) < 0) {
744 			(void) close(fd);
745 			return (-1);
746 		}
747 
748 		meta_mdvtoc_to_vtoc(&dnp->vtoc, &vtoc);
749 
750 		if (meta_setvtoc(fd, np->cname, &vtoc, ep) < 0) {
751 			(void) close(fd);
752 			return (-1);
753 		}
754 	} else if (save_errno == ENOTSUP) {
755 		struct dk_gpt	*gpt;
756 		int		ret;
757 
758 		/* allocation of gpt is done in meta_mdvtoc_to_efi */
759 		meta_mdvtoc_to_efi(&dnp->vtoc, &gpt);
760 
761 		ret = efi_write(fd, gpt);
762 		save_errno = errno;
763 		free(gpt);
764 		if (ret != 0) {
765 			(void) close(fd);
766 			return (mdsyserror(ep, save_errno, rname));
767 		} else {
768 			(void) close(fd);
769 			return (0);
770 		}
771 
772 	} else {
773 		(void) close(fd);
774 		return (mdsyserror(ep, save_errno, rname));
775 	}
776 
777 	(void) close(fd);
778 
779 	return (0);
780 }
781 
782 mdgeom_t *
783 metagetgeom(
784 	mdname_t	*np,	/* only rname, drivenamep, are setup */
785 	md_error_t	*ep
786 )
787 {
788 	if (metagetvtoc(np, FALSE, NULL, ep) == NULL)
789 		return (NULL);
790 	return (&np->drivenamep->geom);
791 }
792 
793 mdcinfo_t *
794 metagetcinfo(
795 	mdname_t	*np,	/* only rname, drivenamep, are setup */
796 	md_error_t	*ep
797 )
798 {
799 	char			*rname = np->rname;
800 	mddrivename_t		*dnp = np->drivenamep;
801 	int			fd;
802 	struct dk_cinfo		cinfo;
803 
804 	/* short circuit */
805 	if (dnp->cinfo.cname[0] != '\0')
806 		return (&dnp->cinfo);
807 
808 	/* get controller info */
809 	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
810 		(void) mdsyserror(ep, errno, rname);
811 		return (NULL);
812 	}
813 	if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
814 		int	save = errno;
815 
816 		(void) close(fd);
817 		if (save == ENOTTY) {
818 			(void) mddeverror(ep, MDE_NOT_DISK, NODEV64, rname);
819 		} else {
820 			(void) mdsyserror(ep, save, rname);
821 		}
822 		return (NULL);
823 	}
824 	(void) close(fd);	/* sd/ssd bug */
825 
826 	/* convert to our format */
827 	if (meta_cinfo_to_md(np, &cinfo, &dnp->cinfo, ep) != 0)
828 		return (NULL);
829 
830 	/* return info */
831 	return (&dnp->cinfo);
832 }
833 
834 /*
835  * get partition number
836  */
837 int
838 metagetpartno(
839 	mdname_t	*np,
840 	md_error_t	*ep
841 )
842 {
843 	mdvtoc_t	*vtocp;
844 	uint_t		partno;
845 
846 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
847 		return (-1);
848 	assert(partno < vtocp->nparts);
849 	return (partno);
850 }
851 
852 /*
853  * get size of device
854  */
855 diskaddr_t
856 metagetsize(
857 	mdname_t	*np,
858 	md_error_t	*ep
859 )
860 {
861 	mdvtoc_t	*vtocp;
862 	uint_t		partno;
863 
864 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
865 		return (MD_DISKADDR_ERROR);
866 	assert(partno < vtocp->nparts);
867 	return (vtocp->parts[partno].size);
868 }
869 
870 /*
871  * get label of device
872  */
873 diskaddr_t
874 metagetlabel(
875 	mdname_t	*np,
876 	md_error_t	*ep
877 )
878 {
879 	mdvtoc_t	*vtocp;
880 	uint_t		partno;
881 
882 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
883 		return (MD_DISKADDR_ERROR);
884 	assert(partno < vtocp->nparts);
885 	return (vtocp->parts[partno].label);
886 }
887 
888 /*
889  * find out where database replicas end
890  */
891 static int
892 mddb_getendblk(
893 	mdsetname_t		*sp,
894 	mdname_t		*np,
895 	diskaddr_t		*endblkp,
896 	md_error_t		*ep
897 )
898 {
899 	md_replicalist_t	*rlp = NULL;
900 	md_replicalist_t	*rl;
901 
902 	/* make sure we have a component */
903 	*endblkp = 0;
904 	if (metaismeta(np))
905 		return (0);
906 
907 	/* get replicas, quit if none */
908 	if (metareplicalist(sp, MD_BASICNAME_OK | PRINT_FAST, &rlp, ep) < 0) {
909 		if (! mdismddberror(ep, MDE_DB_NODB))
910 			return (-1);
911 		mdclrerror(ep);
912 		return (0);
913 	} else if (rlp == NULL)
914 		return (0);
915 
916 	/* go through all the replicas */
917 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
918 		md_replica_t	*rp = rl->rl_repp;
919 		mdname_t	*repnamep = rp->r_namep;
920 		diskaddr_t	dbend;
921 
922 		if (np->dev != repnamep->dev)
923 			continue;
924 		dbend = rp->r_blkno + rp->r_nblk - 1;
925 		if (dbend > *endblkp)
926 			*endblkp = dbend;
927 	}
928 
929 	/* cleanup, return success */
930 	metafreereplicalist(rlp);
931 	return (0);
932 }
933 
934 /*
935  * return cached start block
936  */
937 static diskaddr_t
938 metagetend(
939 	mdsetname_t	*sp,
940 	mdname_t	*np,
941 	md_error_t	*ep
942 )
943 {
944 	diskaddr_t	end_blk = MD_DISKADDR_ERROR;
945 
946 	/* short circuit */
947 	if (np->end_blk != MD_DISKADDR_ERROR)
948 		return (np->end_blk);
949 
950 	/* look for database locations */
951 	if (mddb_getendblk(sp, np, &end_blk, ep) != 0)
952 		return (MD_DISKADDR_ERROR);
953 
954 	/* success */
955 	np->end_blk = end_blk;
956 	return (end_blk);
957 }
958 
959 /*
960  * does device have a metadb
961  */
962 int
963 metahasmddb(
964 	mdsetname_t	*sp,
965 	mdname_t	*np,
966 	md_error_t	*ep
967 )
968 {
969 	if (metagetend(sp, np, ep) == MD_DISKADDR_ERROR)
970 		return (-1);
971 	else if (np->end_blk > 0)
972 		return (1);
973 	else
974 		return (0);
975 }
976 
977 /*
978  * return cached start block
979  */
980 diskaddr_t
981 metagetstart(
982 	mdsetname_t	*sp,
983 	mdname_t	*np,
984 	md_error_t	*ep
985 )
986 {
987 	diskaddr_t	start_blk = MD_DISKADDR_ERROR;
988 
989 	/* short circuit */
990 	if (np->start_blk != MD_DISKADDR_ERROR)
991 		return (np->start_blk);
992 
993 	/* look for database locations */
994 	if ((start_blk = metagetend(sp, np, ep)) == MD_DISKADDR_ERROR)
995 		return (MD_DISKADDR_ERROR);
996 
997 	/* check for label */
998 	if (start_blk == 0) {
999 		start_blk = metagetlabel(np, ep);
1000 		if (start_blk == MD_DISKADDR_ERROR) {
1001 			return (MD_DISKADDR_ERROR);
1002 		}
1003 	}
1004 
1005 	/* roundup to next cylinder */
1006 	if (start_blk != 0) {
1007 		mdgeom_t	*geomp;
1008 
1009 		if ((geomp = metagetgeom(np, ep)) == NULL)
1010 			return (MD_DISKADDR_ERROR);
1011 		start_blk = roundup(start_blk, (geomp->nhead * geomp->nsect));
1012 	}
1013 
1014 	/* success */
1015 	np->start_blk = start_blk;
1016 	return (start_blk);
1017 }
1018 
1019 /*
1020  * return cached devices name
1021  */
1022 char *
1023 metagetdevicesname(
1024 	mdname_t	*np,
1025 	md_error_t	*ep
1026 )
1027 {
1028 	char		path[MAXPATHLEN + 1];
1029 	int		len;
1030 
1031 	/* short circuit */
1032 	if (np->devicesname != NULL)
1033 		return (np->devicesname);
1034 
1035 	/* follow symlink */
1036 	if ((len = readlink(np->bname, path, (sizeof (path) - 1))) < 0) {
1037 		(void) mdsyserror(ep, errno, np->bname);
1038 		return (NULL);
1039 	} else if (len >= sizeof (path)) {
1040 		(void) mdsyserror(ep, ENAMETOOLONG, np->bname);
1041 		return (NULL);
1042 	}
1043 	path[len] = '\0';
1044 	if ((len = strfind(path, "/devices/")) < 0) {
1045 		(void) mddeverror(ep, MDE_DEVICES_NAME, np->dev, np->bname);
1046 		return (NULL);
1047 	}
1048 
1049 	/* return name */
1050 	np->devicesname = Strdup(path + len + strlen("/devices"));
1051 	return (np->devicesname);
1052 }
1053 
1054 /*
1055  * get metadevice misc name
1056  */
1057 char *
1058 metagetmiscname(
1059 	mdname_t		*np,
1060 	md_error_t		*ep
1061 )
1062 {
1063 	mddrivename_t		*dnp = np->drivenamep;
1064 	md_i_driverinfo_t	mid;
1065 
1066 	/* short circuit */
1067 	if (dnp->miscname != NULL)
1068 		return (dnp->miscname);
1069 	if (metachkmeta(np, ep) != 0)
1070 		return (NULL);
1071 
1072 	/* get misc module from driver */
1073 	(void) memset(&mid, 0, sizeof (mid));
1074 	mid.mnum = meta_getminor(np->dev);
1075 	if (metaioctl(MD_IOCGET_DRVNM, &mid, &mid.mde, np->cname) != 0) {
1076 		(void) mdstealerror(ep, &mid.mde);
1077 		return (NULL);
1078 	}
1079 
1080 	/* return miscname */
1081 	dnp->miscname = Strdup(MD_PNTDRIVERNAME(&mid));
1082 	return (dnp->miscname);
1083 }
1084 
1085 /*
1086  * get unit structure from driver
1087  */
1088 md_unit_t *
1089 meta_get_mdunit(
1090 	mdsetname_t	*sp,
1091 	mdname_t	*np,
1092 	md_error_t	*ep
1093 )
1094 {
1095 	md_i_get_t	mig;
1096 	char		*miscname = NULL;
1097 
1098 	/* should have a set */
1099 	assert(sp != NULL);
1100 	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
1101 
1102 	/* get size of unit structure */
1103 	if (metachkmeta(np, ep) != 0)
1104 		return (NULL);
1105 	if ((miscname = metagetmiscname(np, ep)) == NULL)
1106 		return (NULL);
1107 	(void) memset(&mig, '\0', sizeof (mig));
1108 	MD_SETDRIVERNAME(&mig, miscname, sp->setno);
1109 	mig.id = meta_getminor(np->dev);
1110 	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
1111 		(void) mdstealerror(ep, &mig.mde);
1112 		return (NULL);
1113 	}
1114 
1115 	/* get actual unit structure */
1116 	assert(mig.size > 0);
1117 	mig.mdp = (uintptr_t)Zalloc(mig.size);
1118 	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
1119 		(void) mdstealerror(ep, &mig.mde);
1120 		Free((void *)(uintptr_t)mig.mdp);
1121 		return (NULL);
1122 	}
1123 
1124 	return ((md_unit_t *)(uintptr_t)mig.mdp);
1125 }
1126 
1127 /*
1128  * free metadevice unit
1129  */
1130 void
1131 meta_free_unit(
1132 	mddrivename_t	*dnp
1133 )
1134 {
1135 	if (dnp->unitp != NULL) {
1136 		switch (dnp->unitp->type) {
1137 		case MD_DEVICE:
1138 			meta_free_stripe((md_stripe_t *)dnp->unitp);
1139 			break;
1140 		case MD_METAMIRROR:
1141 			meta_free_mirror((md_mirror_t *)dnp->unitp);
1142 			break;
1143 		case MD_METATRANS:
1144 			meta_free_trans((md_trans_t *)dnp->unitp);
1145 			break;
1146 		case MD_METARAID:
1147 			meta_free_raid((md_raid_t *)dnp->unitp);
1148 			break;
1149 		case MD_METASP:
1150 			meta_free_sp((md_sp_t *)dnp->unitp);
1151 			break;
1152 		default:
1153 			assert(0);
1154 			break;
1155 		}
1156 		dnp->unitp = NULL;
1157 	}
1158 }
1159 
1160 /*
1161  * free metadevice name info
1162  */
1163 void
1164 meta_invalidate_name(
1165 	mdname_t	*namep
1166 )
1167 {
1168 	mddrivename_t	*dnp = namep->drivenamep;
1169 
1170 	/* get rid of cached name info */
1171 	if (namep->devicesname != NULL) {
1172 		Free(namep->devicesname);
1173 		namep->devicesname = NULL;
1174 	}
1175 	namep->key = MD_KEYBAD;
1176 	namep->start_blk = -1;
1177 	namep->end_blk = -1;
1178 
1179 	/* get rid of cached drivename info */
1180 	(void) memset(&dnp->geom, 0, sizeof (dnp->geom));
1181 	(void) memset(&dnp->cinfo, 0, sizeof (dnp->cinfo));
1182 	metafreevtoc(&dnp->vtoc);
1183 	metaflushsidenames(dnp);
1184 	dnp->side_names_key = MD_KEYBAD;
1185 	if (dnp->miscname != NULL) {
1186 		Free(dnp->miscname);
1187 		dnp->miscname = NULL;
1188 	}
1189 	meta_free_unit(dnp);
1190 }
1191 
1192 /*
1193  * get metadevice unit
1194  */
1195 md_common_t *
1196 meta_get_unit(
1197 	mdsetname_t	*sp,
1198 	mdname_t	*np,
1199 	md_error_t	*ep
1200 )
1201 {
1202 	char		*miscname;
1203 
1204 	/* short circuit */
1205 	if (np->drivenamep->unitp != NULL)
1206 		return (np->drivenamep->unitp);
1207 	if (metachkmeta(np, ep) != 0)
1208 		return (NULL);
1209 
1210 	/* dispatch */
1211 	if ((miscname = metagetmiscname(np, ep)) == NULL)
1212 		return (NULL);
1213 	else if (strcmp(miscname, MD_STRIPE) == 0)
1214 		return ((md_common_t *)meta_get_stripe(sp, np, ep));
1215 	else if (strcmp(miscname, MD_MIRROR) == 0)
1216 		return ((md_common_t *)meta_get_mirror(sp, np, ep));
1217 	else if (strcmp(miscname, MD_TRANS) == 0)
1218 		return ((md_common_t *)meta_get_trans(sp, np, ep));
1219 	else if (strcmp(miscname, MD_RAID) == 0)
1220 		return ((md_common_t *)meta_get_raid(sp, np, ep));
1221 	else if (strcmp(miscname, MD_SP) == 0)
1222 		return ((md_common_t *)meta_get_sp(sp, np, ep));
1223 	else {
1224 		(void) mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
1225 		    np->cname);
1226 		return (NULL);
1227 	}
1228 }
1229 
1230 
1231 int
1232 meta_isopen(
1233 	mdsetname_t	*sp,
1234 	mdname_t	*np,
1235 	md_error_t	*ep,
1236 	mdcmdopts_t	options
1237 )
1238 {
1239 	md_isopen_t	d;
1240 
1241 	if (metachkmeta(np, ep) != 0)
1242 		return (-1);
1243 
1244 	(void) memset(&d, '\0', sizeof (d));
1245 	d.dev = np->dev;
1246 	if (metaioctl(MD_IOCISOPEN, &d, &d.mde, np->cname) != 0)
1247 		return (mdstealerror(ep, &d.mde));
1248 
1249 	/*
1250 	 * shortcut: if the device is open, no need to check on other nodes,
1251 	 * even in case of a mn metadevice
1252 	 * Also return in case we're told not to check on other nodes.
1253 	 */
1254 	if ((d.isopen != 0) || ((options & MDCMD_MN_OPEN_CHECK) == 0)) {
1255 		return (d.isopen);
1256 	}
1257 
1258 	/*
1259 	 * If the device is closed locally, but it's a mn device,
1260 	 * check on all other nodes, too
1261 	 */
1262 	if (sp->setno != MD_LOCAL_SET) {
1263 		(void) metaget_setdesc(sp, ep); /* not supposed to fail */
1264 		if (sp->setdesc->sd_flags & MD_SR_MN) {
1265 			int		err = 0;
1266 			md_mn_result_t *resp;
1267 			/*
1268 			 * This message is never directly issued.
1269 			 * So we launch it with a suspend override flag.
1270 			 * If the commd is suspended, and this message comes
1271 			 * along it must be sent due to replaying a metainit or
1272 			 * similar. In that case we don't want this message to
1273 			 * be blocked.
1274 			 * If the commd is not suspended, the flag does no harm.
1275 			 * Additionally we don't want the result of the message
1276 			 * cached in the MCT, because we want uptodate results,
1277 			 * and the message doesn't need being logged either.
1278 			 * Hence NO_LOG and NO_MCT
1279 			 */
1280 			err = mdmn_send_message(sp->setno,
1281 			    MD_MN_MSG_CLU_CHECK, MD_MSGF_NO_MCT |
1282 			    MD_MSGF_STOP_ON_ERROR | MD_MSGF_NO_LOG |
1283 			    MD_MSGF_OVERRIDE_SUSPEND, 0, (char *)&d,
1284 			    sizeof (md_isopen_t), &resp, ep);
1285 			if (err == 0) {
1286 				d.isopen = resp->mmr_exitval;
1287 			} else {
1288 				/*
1289 				 * in case some error occurred,
1290 				 * we better say the device is open
1291 				 */
1292 				d.isopen = 1;
1293 			}
1294 			if (resp != (md_mn_result_t *)NULL) {
1295 				free_result(resp);
1296 			}
1297 
1298 		}
1299 	}
1300 
1301 	return (d.isopen);
1302 }
1303