xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c (revision 7257d1b4d25bfac0c802847390e98a464fd787ac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <dlfcn.h>
29 #include <meta.h>
30 #include <metadyn.h>
31 #include <ctype.h>
32 #include <dirent.h>
33 #include <devid.h>
34 #include <sys/param.h>
35 #include <sys/scsi/impl/uscsi.h>
36 #include <sys/scsi/generic/commands.h>
37 #include <sys/scsi/generic/inquiry.h>
38 #include <sys/efi_partition.h>
39 
40 #define	MD_EFI_FG_HEADS		128
41 #define	MD_EFI_FG_SECTORS	256
42 #define	MD_EFI_FG_RPM		7200
43 #define	MD_EFI_FG_WRI		1
44 #define	MD_EFI_FG_RRI		1
45 
46 
47 typedef struct ctlr_cache {
48 	char			*ctlr_nm;
49 	int			ctlr_ty;
50 	struct	ctlr_cache	*ctlr_nx;
51 } ctlr_cache_t;
52 
53 static	ctlr_cache_t	*ctlr_cache = NULL;
54 
55 
56 /*
57  * return set for a device
58  */
59 mdsetname_t *
60 metagetset(
61 	mdname_t	*np,
62 	int		bypass_daemon,
63 	md_error_t	*ep
64 )
65 {
66 	mdsetname_t	*sp;
67 
68 	/* metadevice */
69 	if (metaismeta(np))
70 		return (metasetnosetname(MD_MIN2SET(meta_getminor(np->dev)),
71 						ep));
72 
73 	/* regular device */
74 	if (meta_is_drive_in_anyset(np->drivenamep, &sp, bypass_daemon,
75 	    ep) != 0)
76 		return (NULL);
77 
78 	if (sp != NULL)
79 		return (sp);
80 
81 	return (metasetnosetname(MD_LOCAL_SET, ep));
82 }
83 
84 /*
85  * convert system to md types
86  */
87 static void
88 meta_geom_to_md(
89 	struct dk_geom	*gp,
90 	mdgeom_t	*mdgp
91 )
92 {
93 	(void) memset(mdgp, '\0', sizeof (*mdgp));
94 	mdgp->ncyl = gp->dkg_ncyl;
95 	mdgp->nhead = gp->dkg_nhead;
96 	mdgp->nsect = gp->dkg_nsect;
97 	mdgp->rpm = gp->dkg_rpm;
98 	mdgp->write_reinstruct = gp->dkg_write_reinstruct;
99 	mdgp->read_reinstruct = gp->dkg_read_reinstruct;
100 	mdgp->blk_sz = DEV_BSIZE;
101 }
102 
103 /*
104  * convert efi to md types
105  */
106 static void
107 meta_efi_to_mdgeom(struct dk_gpt *gpt, mdgeom_t	*mdgp)
108 {
109 	(void) memset(mdgp, '\0', sizeof (*mdgp));
110 	mdgp->ncyl = (gpt->efi_last_u_lba - gpt->efi_first_u_lba) /
111 					(MD_EFI_FG_HEADS * MD_EFI_FG_SECTORS);
112 	mdgp->nhead = MD_EFI_FG_HEADS;
113 	mdgp->nsect = MD_EFI_FG_SECTORS;
114 	mdgp->rpm = MD_EFI_FG_RPM;
115 	mdgp->write_reinstruct = MD_EFI_FG_WRI;
116 	mdgp->read_reinstruct = MD_EFI_FG_RRI;
117 	mdgp->blk_sz = DEV_BSIZE;
118 }
119 
120 static void
121 meta_efi_to_mdvtoc(struct dk_gpt *gpt, mdvtoc_t *mdvp)
122 {
123 	char		typename[EFI_PART_NAME_LEN];
124 	uint_t		i;
125 
126 	(void) memset(mdvp, '\0', sizeof (*mdvp));
127 	mdvp->nparts = gpt->efi_nparts;
128 	if (mdvp->nparts > MD_MAX_PARTS)
129 		return;
130 
131 	mdvp->first_lba = gpt->efi_first_u_lba;
132 	mdvp->last_lba = gpt->efi_last_u_lba;
133 	mdvp->lbasize = gpt->efi_lbasize;
134 
135 	for (i = 0; (i < gpt->efi_nparts); ++i) {
136 		mdvp->parts[i].start = gpt->efi_parts[i].p_start;
137 		mdvp->parts[i].size = gpt->efi_parts[i].p_size;
138 		mdvp->parts[i].tag = gpt->efi_parts[i].p_tag;
139 		mdvp->parts[i].flag = gpt->efi_parts[i].p_flag;
140 		/*
141 		 * It is possible to present an efi label but be using vtoc
142 		 * disks to create a > 1 TB metadevice.  In case the first
143 		 * disk in the underlying metadevice is a vtoc disk and starts
144 		 * at the beginning of the disk it is necessary to convey this
145 		 * information to the user.
146 		 */
147 		if (mdvp->parts[i].size > 0 &&
148 		    mdvp->parts[i].start != 0 && mdvp->nparts == 1) {
149 			mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
150 			mdvp->parts[i].start = 0;
151 		}
152 
153 		/*
154 		 * Due to the lack of a label for the entire partition table,
155 		 * we use p_name of the reserved partition
156 		 */
157 		if ((gpt->efi_parts[i].p_tag == V_RESERVED) &&
158 		    (gpt->efi_parts[i].p_name != NULL)) {
159 			(void) strlcpy(typename, gpt->efi_parts[i].p_name,
160 					EFI_PART_NAME_LEN);
161 			/* Stop at first (if any) space or tab */
162 			(void) strtok(typename, " \t");
163 			mdvp->typename = Strdup(typename);
164 		}
165 	}
166 }
167 
168 static void
169 meta_mdvtoc_to_efi(mdvtoc_t *mdvp, struct dk_gpt **gpt)
170 {
171 	char		typename[EFI_PART_NAME_LEN];
172 	uint_t		i;
173 	uint_t		lastpart;
174 	size_t		size;
175 
176 	/* first we count how many partitions we have to send */
177 	for (i = 0; i < MD_MAX_PARTS; i++) {
178 		if ((mdvp->parts[i].start == 0) &&
179 		    (mdvp->parts[i].size == 0) &&
180 		    (mdvp->parts[i].tag != V_RESERVED)) {
181 			continue;
182 		}
183 		/* if we are here, we know the partition is really used */
184 		lastpart = i;
185 	}
186 	size = sizeof (struct dk_gpt) + (sizeof (struct dk_part) * lastpart);
187 	*gpt = calloc(size, sizeof (char));
188 
189 	(*gpt)->efi_nparts = lastpart + 1;
190 	(*gpt)->efi_first_u_lba = mdvp->first_lba;
191 	(*gpt)->efi_last_u_lba = mdvp->last_lba;
192 	(*gpt)->efi_lbasize = mdvp->lbasize;
193 	for (i = 0; (i < (*gpt)->efi_nparts); ++i) {
194 		(*gpt)->efi_parts[i].p_start = mdvp->parts[i].start;
195 		(*gpt)->efi_parts[i].p_size = mdvp->parts[i].size;
196 		(*gpt)->efi_parts[i].p_tag = mdvp->parts[i].tag;
197 		(*gpt)->efi_parts[i].p_flag = mdvp->parts[i].flag;
198 		/*
199 		 * Due to the lack of a label for the entire partition table,
200 		 * we use p_name of the reserved partition
201 		 */
202 		if (((*gpt)->efi_parts[i].p_tag == V_RESERVED) &&
203 			(mdvp->typename != NULL)) {
204 			(void) strlcpy((*gpt)->efi_parts[i].p_name, typename,
205 				EFI_PART_NAME_LEN);
206 		}
207 	}
208 }
209 
210 
211 void
212 ctlr_cache_add(char *nm, int ty)
213 {
214 	ctlr_cache_t	**ccpp;
215 
216 	for (ccpp = &ctlr_cache; *ccpp != NULL; ccpp = &(*ccpp)->ctlr_nx)
217 		if (strcmp((*ccpp)->ctlr_nm, nm) == 0)
218 			return;
219 
220 	*ccpp = Zalloc(sizeof (ctlr_cache_t));
221 	(*ccpp)->ctlr_nm = Strdup(nm);
222 	(*ccpp)->ctlr_ty = ty;
223 }
224 
225 int
226 ctlr_cache_look(char *nm)
227 {
228 	ctlr_cache_t	*tcp;
229 
230 	for (tcp = ctlr_cache; tcp != NULL; tcp = tcp->ctlr_nx)
231 		if (strcmp(tcp->ctlr_nm, nm) == 0)
232 			return (tcp->ctlr_ty);
233 
234 	return (-1);
235 }
236 
237 
238 void
239 metaflushctlrcache(void)
240 {
241 	ctlr_cache_t	*cp, *np;
242 
243 	for (cp = ctlr_cache, np = NULL; cp != NULL; cp = np) {
244 		np = cp->ctlr_nx;
245 		Free(cp->ctlr_nm);
246 		Free(cp);
247 	}
248 	ctlr_cache = NULL;
249 }
250 
251 /*
252  * getdrvnode -- return the driver name based on mdname_t->bname
253  *	Need to free pointer when finished.
254  */
255 char *
256 getdrvnode(mdname_t *np, md_error_t *ep)
257 {
258 	char	*devicespath,
259 		*drvnode,
260 		*cp;
261 
262 	if ((devicespath = metagetdevicesname(np, ep)) == NULL)
263 		return (NULL);
264 
265 	/*
266 	 * At this point devicespath should be like the following
267 	 * "/devices/<unknow_and_dont_care>/xxxx@vvvv"
268 	 *
269 	 * There's a couple of 'if' statements below which could
270 	 * return an error condition, but I've decide to allow
271 	 * a more open approach regarding the mapping so as to
272 	 * not restrict possible future projects.
273 	 */
274 	if (drvnode = strrchr(devicespath, '/'))
275 		/*
276 		 * drvnode now just "xxxx@vvvv"
277 		 */
278 		drvnode++;
279 
280 	if (cp = strrchr(drvnode, '@'))
281 		/*
282 		 * Now drvnode is just the driver name "xxxx"
283 		 */
284 		*cp = '\0';
285 
286 	cp = Strdup(drvnode);
287 	Free(devicespath);
288 	np->devicesname = NULL;
289 
290 	return (cp);
291 }
292 
293 /*
294  * meta_load_dl -- open dynamic library using LDLIBRARYPATH, a debug
295  *    environment variable METALDPATH, or the default location.
296  */
297 static void *
298 meta_load_dl(mdname_t *np, md_error_t *ep)
299 {
300 	char	*drvnode,
301 		newpath[MAXPATHLEN],
302 		*p;
303 	void	*cookie;
304 
305 	if ((drvnode = getdrvnode(np, ep)) != NULL) {
306 
307 		/*
308 		 * Library seach algorithm:
309 		 * 1) Use LDLIBRARYPATH which is implied when a non-absolute
310 		 *    path name is passed to dlopen()
311 		 * 2) Use the value of METALDPATH as the directory. Mainly
312 		 *    used for debugging
313 		 * 3) Last search the default location of "/usr/lib"
314 		 */
315 		(void) snprintf(newpath, sizeof (newpath), "lib%s.so.1",
316 		    drvnode);
317 		if ((cookie = dlopen(newpath, RTLD_LAZY)) == NULL) {
318 			if ((p = getenv("METALDPATH")) == NULL)
319 				p = METALDPATH_DEFAULT;
320 			(void) snprintf(newpath, sizeof (newpath),
321 			    "%s/lib%s.so.1", p, drvnode);
322 			Free(drvnode);
323 			if ((cookie = dlopen(newpath, RTLD_LAZY)) != NULL) {
324 				/*
325 				 * Common failure here would be failing to
326 				 * find a libXX.so.1 such as libsd.so.1
327 				 * Some controllers will not have a library
328 				 * because there's no enclosure or name
329 				 * translation required.
330 				 */
331 				return (cookie);
332 			}
333 		} else {
334 			Free(drvnode);
335 			return (cookie);
336 		}
337 	}
338 	return (NULL);
339 }
340 
341 /*
342  * meta_match_names -- possibly convert the driver names returned by CINFO
343  */
344 static void
345 meta_match_names(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
346     md_error_t *ep)
347 {
348 	void		*cookie;
349 	meta_convert_e	((*fptr)(mdname_t *, struct dk_cinfo *, mdcinfo_t *,
350 			    md_error_t *));
351 
352 	if ((cookie = meta_load_dl(np, ep)) != NULL) {
353 		fptr = (meta_convert_e (*)(mdname_t *, struct dk_cinfo *,
354 		    mdcinfo_t *, md_error_t *))dlsym(cookie, "convert_path");
355 		if (fptr != NULL)
356 			(void) (*fptr)(np, cp, mdcp, ep);
357 		(void) dlclose(cookie);
358 	}
359 }
360 
361 /*
362  * meta_match_enclosure -- return any enclosure info if found
363  */
364 int
365 meta_match_enclosure(mdname_t *np, mdcinfo_t *mdcp, md_error_t *ep)
366 {
367 	meta_enclosure_e	e,
368 				((*fptr)(mdname_t *, mdcinfo_t *,
369 				    md_error_t *));
370 	void			*cookie;
371 
372 	if ((cookie = meta_load_dl(np, ep)) != NULL) {
373 		fptr = (meta_enclosure_e (*)(mdname_t *, mdcinfo_t *,
374 		    md_error_t *))dlsym(cookie, "get_enclosure");
375 		if (fptr != NULL) {
376 			e = (*fptr)(np, mdcp, ep);
377 			switch (e) {
378 			case Enclosure_Error:
379 				/*
380 				 * Looks like this library wanted to handle
381 				 * our device and had an internal error.
382 				 */
383 				return (1);
384 
385 			case Enclosure_Okay:
386 				/*
387 				 * Found a library to handle the request so
388 				 * just return with data provided.
389 				 */
390 				return (0);
391 
392 			case Enclosure_Noop:
393 				/*
394 				 * Need to continue the search
395 				 */
396 				break;
397 			}
398 		}
399 		(void) dlclose(cookie);
400 	}
401 	return (0);
402 }
403 
404 static int
405 meta_cinfo_to_md(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
406     md_error_t *ep)
407 {
408 	/* default */
409 	(void) memset(mdcp, '\0', sizeof (*mdcp));
410 	(void) strncpy(mdcp->cname, cp->dki_cname,
411 	    min((sizeof (mdcp->cname) - 1), sizeof (cp->dki_cname)));
412 	mdcp->ctype = MHD_CTLR_GENERIC;
413 	mdcp->cnum = cp->dki_cnum;
414 	(void) strncpy(mdcp->dname, cp->dki_dname,
415 	    min((sizeof (mdcp->dname) - 1), sizeof (cp->dki_dname)));
416 	mdcp->unit = cp->dki_unit;
417 	mdcp->maxtransfer = cp->dki_maxtransfer;
418 
419 	/*
420 	 * See if the driver name returned from DKIOCINFO
421 	 * is valid or not. In somecases, such as the ap_dmd
422 	 * driver, we need to modify the name that's return
423 	 * for everything to work.
424 	 */
425 	meta_match_names(np, cp, mdcp, ep);
426 
427 	if (meta_match_enclosure(np, mdcp, ep))
428 		return (-1);
429 
430 	/* return success */
431 	return (0);
432 }
433 
434 static void
435 meta_vtoc_to_md(
436 	struct vtoc	*vp,
437 	mdvtoc_t	*mdvp
438 )
439 {
440 	char		typename[sizeof (vp->v_asciilabel) + 1];
441 	uint_t		i;
442 
443 	(void) memset(mdvp, '\0', sizeof (*mdvp));
444 	(void) strncpy(typename, vp->v_asciilabel,
445 	    sizeof (vp->v_asciilabel));
446 	typename[sizeof (typename) - 1] = '\0';
447 	for (i = 0; ((i < sizeof (typename)) && (typename[i] != '\0')); ++i) {
448 		if ((typename[i] == ' ') || (typename[i] == '\t')) {
449 			typename[i] = '\0';
450 			break;
451 		}
452 	}
453 	mdvp->typename = Strdup(typename);
454 	mdvp->nparts = vp->v_nparts;
455 	for (i = 0; (i < vp->v_nparts); ++i) {
456 		mdvp->parts[i].start = vp->v_part[i].p_start;
457 		mdvp->parts[i].size = vp->v_part[i].p_size;
458 		mdvp->parts[i].tag = vp->v_part[i].p_tag;
459 		mdvp->parts[i].flag = vp->v_part[i].p_flag;
460 		if (vp->v_part[i].p_start == 0 && vp->v_part[i].p_size > 0)
461 			mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
462 	}
463 }
464 
465 /*
466  * free allocations in vtoc
467  */
468 void
469 metafreevtoc(
470 	mdvtoc_t	*vtocp
471 )
472 {
473 	if (vtocp->typename != NULL)
474 		Free(vtocp->typename);
475 	(void) memset(vtocp, 0, sizeof (*vtocp));
476 }
477 
478 /*
479  * return md types
480  */
481 mdvtoc_t *
482 metagetvtoc(
483 	mdname_t	*np,	/* only rname, drivenamep, are setup */
484 	int		nocache,
485 	uint_t		*partnop,
486 	md_error_t	*ep
487 )
488 {
489 	mddrivename_t	*dnp = np->drivenamep;
490 	struct dk_geom	geom;
491 	char		*minor_name = NULL;
492 	char		*rname = np->rname;
493 	int		fd;
494 	int		partno;
495 	int		err = 0;	    /* saves errno from ioctl */
496 	ddi_devid_t	devid;
497 	char		*p;
498 
499 	/* short circuit */
500 	if ((! nocache) && (dnp->vtoc.nparts != 0)) {
501 		if (partnop != NULL) {
502 			/*
503 			 * the following assigment works because the
504 			 * mdname_t structs are always created as part
505 			 * of the drivenamep struct.  When a user
506 			 * creates an mdname_t struct it either
507 			 * uses an existing drivenamep struct or creates
508 			 * a new one and then adds the mdname_t struct
509 			 * as part of its parts_val array.  So what is
510 			 * being computed below is the slice offset in
511 			 * the parts_val array.
512 			 */
513 			*partnop = np - np->drivenamep->parts.parts_val;
514 			assert(*partnop < dnp->parts.parts_len);
515 		}
516 		return (&dnp->vtoc);
517 	}
518 
519 	/* can't get vtoc */
520 	if (! nocache) {
521 		switch (dnp->type) {
522 		case MDT_ACCES:
523 		case MDT_UNKNOWN:
524 			(void) mdsyserror(ep, dnp->errnum, rname);
525 			return (NULL);
526 		}
527 	}
528 
529 	/* get all the info */
530 	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
531 		(void) mdsyserror(ep, errno, rname);
532 		return (NULL);
533 	}
534 
535 	/*
536 	 * The disk is open so this is a good point to get the devid
537 	 * otherwise it will need to be done at another time which
538 	 * means reopening it.
539 	 */
540 	if (devid_get(fd, &devid) != 0) {
541 		/* there is no devid for the disk */
542 		if (((p = getenv("MD_DEBUG")) != NULL) &&
543 		    (strstr(p, "DEVID") != NULL)) {
544 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
545 			    "%s has no device id\n"), np->rname);
546 		}
547 		np->minor_name = (char *)NULL;
548 		dnp->devid = NULL;
549 	} else {
550 		(void) devid_get_minor_name(fd, &minor_name);
551 		/*
552 		 * The minor name could be NULL if the underlying
553 		 * device driver does not support 'minor names'.
554 		 * This means we do not use devid's for this device.
555 		 * SunCluster did driver does not support minor names.
556 		 */
557 		if (minor_name != NULL) {
558 			np->minor_name = Strdup(minor_name);
559 			devid_str_free(minor_name);
560 			dnp->devid = devid_str_encode(devid, NULL);
561 		} else {
562 			np->minor_name = (char *)NULL;
563 			dnp->devid = NULL;
564 
565 			if (((p = getenv("MD_DEBUG")) != NULL) &&
566 			    (strstr(p, "DEVID") != NULL)) {
567 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
568 				    "%s no minor name (no devid)\n"),
569 				    np->rname);
570 			}
571 		}
572 		devid_free(devid);
573 	}
574 
575 	/*
576 	 * if our drivenamep points to a device not supporting DKIOCGGEOM,
577 	 * it's likely to have an EFI label.
578 	 */
579 	(void) memset(&geom, 0, sizeof (geom));
580 	if (ioctl(fd, DKIOCGGEOM, &geom) != 0) {
581 		err = errno;
582 		if (err == ENOTTY) {
583 			(void) mddeverror(ep, MDE_NOT_DISK, NODEV, rname);
584 			(void) close(fd);
585 			return (NULL);
586 		} else if (err != ENOTSUP) {
587 			(void) mdsyserror(ep, err, rname);
588 			(void) close(fd);
589 			return (NULL);
590 		}
591 
592 	}
593 	/*
594 	 * If we are here, there was either no failure on DKIOCGGEOM or
595 	 * the failure was ENOTSUP
596 	 */
597 	if (err == ENOTSUP) {
598 		/* DKIOCGGEOM yielded ENOTSUP => try efi_alloc_and_read */
599 		struct dk_gpt	*gpt;
600 		int		save_errno;
601 
602 		/* this also sets errno */
603 		partno = efi_alloc_and_read(fd, &gpt);
604 		save_errno = errno;
605 		(void) close(fd);
606 		if (partno < 0) {
607 			efi_free(gpt);
608 			(void) mdsyserror(ep, save_errno, rname);
609 			return (NULL);
610 		}
611 		if (partno >= gpt->efi_nparts) {
612 			efi_free(gpt);
613 			(void) mddeverror(ep, MDE_INVALID_PART, NODEV64,
614 						rname);
615 			return (NULL);
616 		}
617 
618 		/* convert to our format */
619 		metafreevtoc(&dnp->vtoc);
620 		meta_efi_to_mdvtoc(gpt, &dnp->vtoc);
621 		if (dnp->vtoc.nparts > MD_MAX_PARTS) {
622 			(void) mddeverror(ep, MDE_TOO_MANY_PARTS, NODEV64,
623 			    rname);
624 			return (NULL);
625 		}
626 		/*
627 		 * libmeta needs at least V_NUMPAR partitions.
628 		 * If we have an EFI partition with less than V_NUMPAR slices,
629 		 * we nevertheless reserve space for V_NUMPAR
630 		 */
631 
632 		if (dnp->vtoc.nparts < V_NUMPAR) {
633 			dnp->vtoc.nparts = V_NUMPAR;
634 		}
635 		meta_efi_to_mdgeom(gpt, &dnp->geom);
636 		efi_free(gpt);
637 	} else {
638 		/* no error on DKIOCGGEOM, try meta_getvtoc */
639 		struct vtoc	vtoc;
640 
641 		if (meta_getvtoc(fd, np->cname, &vtoc, &partno, ep) < 0) {
642 			(void) close(fd);
643 			return (NULL);
644 		}
645 		(void) close(fd);
646 
647 		/* convert to our format */
648 		meta_geom_to_md(&geom, &dnp->geom);
649 		metafreevtoc(&dnp->vtoc);
650 		meta_vtoc_to_md(&vtoc, &dnp->vtoc);
651 	}
652 
653 	/* fix up any drives which are now accessible */
654 	if ((nocache) && (dnp->type == MDT_ACCES) &&
655 	    (dnp->vtoc.nparts == dnp->parts.parts_len)) {
656 		dnp->type = MDT_COMP;
657 		dnp->errnum = 0;
658 	}
659 
660 	/* save partno */
661 	assert(partno < dnp->vtoc.nparts);
662 	if (partnop != NULL)
663 		*partnop = partno;
664 
665 	/* return info */
666 	return (&dnp->vtoc);
667 }
668 
669 static void
670 meta_mdvtoc_to_vtoc(
671 	mdvtoc_t	*mdvp,
672 	struct vtoc	*vp
673 )
674 {
675 	uint_t		i;
676 
677 	(void) memset(&vp->v_part, '\0', sizeof (vp->v_part));
678 	vp->v_nparts = (ushort_t)mdvp->nparts;
679 	for (i = 0; (i < mdvp->nparts); ++i) {
680 		vp->v_part[i].p_start = (daddr32_t)mdvp->parts[i].start;
681 		vp->v_part[i].p_size  = (daddr32_t)mdvp->parts[i].size;
682 		vp->v_part[i].p_tag   = mdvp->parts[i].tag;
683 		vp->v_part[i].p_flag  = mdvp->parts[i].flag;
684 	}
685 }
686 
687 /*
688  * Set the vtoc, but use the cached copy to get the info from.
689  * We write np->drivenamep->vtoc to disk.
690  * Before we can do this we read the vtoc in.
691  * if we're dealing with a metadevice and this metadevice is a 64 bit device
692  *	we can use meta_getmdvtoc/meta_setmdvtoc
693  * else
694  * 	we use meta_getvtoc/meta_setvtoc but than we first have to convert
695  *	dnp->vtoc (actually being a mdvtoc_t) into a vtoc_t
696  */
697 int
698 metasetvtoc(
699 	mdname_t	*np,
700 	md_error_t	*ep
701 )
702 {
703 	char		*rname = np->rname;
704 	mddrivename_t	*dnp = np->drivenamep;
705 	int		fd;
706 	int		err;
707 	int 		save_errno;
708 	struct dk_geom	geom;
709 
710 	if ((fd = open(rname, (O_RDONLY | O_NDELAY), 0)) < 0)
711 		return (mdsyserror(ep, errno, rname));
712 
713 	err = ioctl(fd, DKIOCGGEOM, &geom);
714 	save_errno = errno;
715 	if (err == 0) {
716 		struct vtoc	vtoc;
717 
718 		if (meta_getvtoc(fd, np->cname, &vtoc, NULL, ep) < 0) {
719 			(void) close(fd);
720 			return (-1);
721 		}
722 
723 		meta_mdvtoc_to_vtoc(&dnp->vtoc, &vtoc);
724 
725 		if (meta_setvtoc(fd, np->cname, &vtoc, ep) < 0) {
726 			(void) close(fd);
727 			return (-1);
728 		}
729 	} else if (save_errno == ENOTSUP) {
730 		struct dk_gpt	*gpt;
731 		int		ret;
732 
733 		/* allocation of gpt is done in meta_mdvtoc_to_efi */
734 		meta_mdvtoc_to_efi(&dnp->vtoc, &gpt);
735 
736 		ret = efi_write(fd, gpt);
737 		save_errno = errno;
738 		free(gpt);
739 		if (ret != 0) {
740 			(void) close(fd);
741 			return (mdsyserror(ep, save_errno, rname));
742 		} else {
743 			(void) close(fd);
744 			return (0);
745 		}
746 
747 	} else {
748 		(void) close(fd);
749 		return (mdsyserror(ep, save_errno, rname));
750 	}
751 
752 	(void) close(fd);
753 
754 	return (0);
755 }
756 
757 mdgeom_t *
758 metagetgeom(
759 	mdname_t	*np,	/* only rname, drivenamep, are setup */
760 	md_error_t	*ep
761 )
762 {
763 	if (metagetvtoc(np, FALSE, NULL, ep) == NULL)
764 		return (NULL);
765 	return (&np->drivenamep->geom);
766 }
767 
768 mdcinfo_t *
769 metagetcinfo(
770 	mdname_t	*np,	/* only rname, drivenamep, are setup */
771 	md_error_t	*ep
772 )
773 {
774 	char			*rname = np->rname;
775 	mddrivename_t		*dnp = np->drivenamep;
776 	int			fd;
777 	struct dk_cinfo		cinfo;
778 
779 	/* short circuit */
780 	if (dnp->cinfo.cname[0] != '\0')
781 		return (&dnp->cinfo);
782 
783 	/* get controller info */
784 	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
785 		(void) mdsyserror(ep, errno, rname);
786 		return (NULL);
787 	}
788 	if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
789 		int	save = errno;
790 
791 		(void) close(fd);
792 		if (save == ENOTTY) {
793 			(void) mddeverror(ep, MDE_NOT_DISK, NODEV64, rname);
794 		} else {
795 			(void) mdsyserror(ep, save, rname);
796 		}
797 		return (NULL);
798 	}
799 	(void) close(fd);	/* sd/ssd bug */
800 
801 	/* convert to our format */
802 	if (meta_cinfo_to_md(np, &cinfo, &dnp->cinfo, ep) != 0)
803 		return (NULL);
804 
805 	/* return info */
806 	return (&dnp->cinfo);
807 }
808 
809 /*
810  * get partition number
811  */
812 int
813 metagetpartno(
814 	mdname_t	*np,
815 	md_error_t	*ep
816 )
817 {
818 	mdvtoc_t	*vtocp;
819 	uint_t		partno;
820 
821 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
822 		return (-1);
823 	assert(partno < vtocp->nparts);
824 	return (partno);
825 }
826 
827 /*
828  * get size of device
829  */
830 diskaddr_t
831 metagetsize(
832 	mdname_t	*np,
833 	md_error_t	*ep
834 )
835 {
836 	mdvtoc_t	*vtocp;
837 	uint_t		partno;
838 
839 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
840 		return (MD_DISKADDR_ERROR);
841 	assert(partno < vtocp->nparts);
842 	return (vtocp->parts[partno].size);
843 }
844 
845 /*
846  * get label of device
847  */
848 diskaddr_t
849 metagetlabel(
850 	mdname_t	*np,
851 	md_error_t	*ep
852 )
853 {
854 	mdvtoc_t	*vtocp;
855 	uint_t		partno;
856 
857 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
858 		return (MD_DISKADDR_ERROR);
859 	assert(partno < vtocp->nparts);
860 	return (vtocp->parts[partno].label);
861 }
862 
863 /*
864  * find out where database replicas end
865  */
866 static int
867 mddb_getendblk(
868 	mdsetname_t		*sp,
869 	mdname_t		*np,
870 	diskaddr_t		*endblkp,
871 	md_error_t		*ep
872 )
873 {
874 	md_replicalist_t	*rlp = NULL;
875 	md_replicalist_t	*rl;
876 
877 	/* make sure we have a component */
878 	*endblkp = 0;
879 	if (metaismeta(np))
880 		return (0);
881 
882 	/* get replicas, quit if none */
883 	if (metareplicalist(sp, MD_BASICNAME_OK | PRINT_FAST, &rlp, ep) < 0) {
884 		if (! mdismddberror(ep, MDE_DB_NODB))
885 			return (-1);
886 		mdclrerror(ep);
887 		return (0);
888 	} else if (rlp == NULL)
889 		return (0);
890 
891 	/* go through all the replicas */
892 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
893 		md_replica_t	*rp = rl->rl_repp;
894 		mdname_t	*repnamep = rp->r_namep;
895 		diskaddr_t	dbend;
896 
897 		if (np->dev != repnamep->dev)
898 			continue;
899 		dbend = rp->r_blkno + rp->r_nblk - 1;
900 		if (dbend > *endblkp)
901 			*endblkp = dbend;
902 	}
903 
904 	/* cleanup, return success */
905 	metafreereplicalist(rlp);
906 	return (0);
907 }
908 
909 /*
910  * return cached start block
911  */
912 static diskaddr_t
913 metagetend(
914 	mdsetname_t	*sp,
915 	mdname_t	*np,
916 	md_error_t	*ep
917 )
918 {
919 	diskaddr_t	end_blk = MD_DISKADDR_ERROR;
920 
921 	/* short circuit */
922 	if (np->end_blk != MD_DISKADDR_ERROR)
923 		return (np->end_blk);
924 
925 	/* look for database locations */
926 	if (mddb_getendblk(sp, np, &end_blk, ep) != 0)
927 		return (MD_DISKADDR_ERROR);
928 
929 	/* success */
930 	np->end_blk = end_blk;
931 	return (end_blk);
932 }
933 
934 /*
935  * does device have a metadb
936  */
937 int
938 metahasmddb(
939 	mdsetname_t	*sp,
940 	mdname_t	*np,
941 	md_error_t	*ep
942 )
943 {
944 	if (metagetend(sp, np, ep) == MD_DISKADDR_ERROR)
945 		return (-1);
946 	else if (np->end_blk > 0)
947 		return (1);
948 	else
949 		return (0);
950 }
951 
952 /*
953  * return cached start block
954  */
955 diskaddr_t
956 metagetstart(
957 	mdsetname_t	*sp,
958 	mdname_t	*np,
959 	md_error_t	*ep
960 )
961 {
962 	diskaddr_t	start_blk = MD_DISKADDR_ERROR;
963 
964 	/* short circuit */
965 	if (np->start_blk != MD_DISKADDR_ERROR)
966 		return (np->start_blk);
967 
968 	/* look for database locations */
969 	if ((start_blk = metagetend(sp, np, ep)) == MD_DISKADDR_ERROR)
970 		return (MD_DISKADDR_ERROR);
971 
972 	/* check for label */
973 	if (start_blk == 0) {
974 		start_blk = metagetlabel(np, ep);
975 		if (start_blk == MD_DISKADDR_ERROR) {
976 			return (MD_DISKADDR_ERROR);
977 		}
978 	}
979 
980 	/* roundup to next cylinder */
981 	if (start_blk != 0) {
982 		mdgeom_t	*geomp;
983 
984 		if ((geomp = metagetgeom(np, ep)) == NULL)
985 			return (MD_DISKADDR_ERROR);
986 		start_blk = roundup(start_blk, (geomp->nhead * geomp->nsect));
987 	}
988 
989 	/* success */
990 	np->start_blk = start_blk;
991 	return (start_blk);
992 }
993 
994 /*
995  * return cached devices name
996  */
997 char *
998 metagetdevicesname(
999 	mdname_t	*np,
1000 	md_error_t	*ep
1001 )
1002 {
1003 	char		path[MAXPATHLEN + 1];
1004 	int		len;
1005 
1006 	/* short circuit */
1007 	if (np->devicesname != NULL)
1008 		return (np->devicesname);
1009 
1010 	/* follow symlink */
1011 	if ((len = readlink(np->bname, path, (sizeof (path) - 1))) < 0) {
1012 		(void) mdsyserror(ep, errno, np->bname);
1013 		return (NULL);
1014 	} else if (len >= sizeof (path)) {
1015 		(void) mdsyserror(ep, ENAMETOOLONG, np->bname);
1016 		return (NULL);
1017 	}
1018 	path[len] = '\0';
1019 	if ((len = strfind(path, "/devices/")) < 0) {
1020 		(void) mddeverror(ep, MDE_DEVICES_NAME, np->dev, np->bname);
1021 		return (NULL);
1022 	}
1023 
1024 	/* return name */
1025 	np->devicesname = Strdup(path + len + strlen("/devices"));
1026 	return (np->devicesname);
1027 }
1028 
1029 /*
1030  * get metadevice misc name
1031  */
1032 char *
1033 metagetmiscname(
1034 	mdname_t		*np,
1035 	md_error_t		*ep
1036 )
1037 {
1038 	mddrivename_t		*dnp = np->drivenamep;
1039 	md_i_driverinfo_t	mid;
1040 
1041 	/* short circuit */
1042 	if (dnp->miscname != NULL)
1043 		return (dnp->miscname);
1044 	if (metachkmeta(np, ep) != 0)
1045 		return (NULL);
1046 
1047 	/* get misc module from driver */
1048 	(void) memset(&mid, 0, sizeof (mid));
1049 	mid.mnum = meta_getminor(np->dev);
1050 	if (metaioctl(MD_IOCGET_DRVNM, &mid, &mid.mde, np->cname) != 0) {
1051 		(void) mdstealerror(ep, &mid.mde);
1052 		return (NULL);
1053 	}
1054 
1055 	/* return miscname */
1056 	dnp->miscname = Strdup(MD_PNTDRIVERNAME(&mid));
1057 	return (dnp->miscname);
1058 }
1059 
1060 /*
1061  * get unit structure from driver
1062  */
1063 md_unit_t *
1064 meta_get_mdunit(
1065 	mdsetname_t	*sp,
1066 	mdname_t	*np,
1067 	md_error_t	*ep
1068 )
1069 {
1070 	md_i_get_t	mig;
1071 	char		*miscname = NULL;
1072 
1073 	/* should have a set */
1074 	assert(sp != NULL);
1075 	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
1076 
1077 	/* get size of unit structure */
1078 	if (metachkmeta(np, ep) != 0)
1079 		return (NULL);
1080 	if ((miscname = metagetmiscname(np, ep)) == NULL)
1081 		return (NULL);
1082 	(void) memset(&mig, '\0', sizeof (mig));
1083 	MD_SETDRIVERNAME(&mig, miscname, sp->setno);
1084 	mig.id = meta_getminor(np->dev);
1085 	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
1086 		(void) mdstealerror(ep, &mig.mde);
1087 		return (NULL);
1088 	}
1089 
1090 	/* get actual unit structure */
1091 	assert(mig.size > 0);
1092 	mig.mdp = (uintptr_t)Zalloc(mig.size);
1093 	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
1094 		(void) mdstealerror(ep, &mig.mde);
1095 		Free((void *)(uintptr_t)mig.mdp);
1096 		return (NULL);
1097 	}
1098 
1099 	return ((md_unit_t *)(uintptr_t)mig.mdp);
1100 }
1101 
1102 /*
1103  * free metadevice unit
1104  */
1105 void
1106 meta_free_unit(
1107 	mddrivename_t	*dnp
1108 )
1109 {
1110 	if (dnp->unitp != NULL) {
1111 		switch (dnp->unitp->type) {
1112 		case MD_DEVICE:
1113 			meta_free_stripe((md_stripe_t *)dnp->unitp);
1114 			break;
1115 		case MD_METAMIRROR:
1116 			meta_free_mirror((md_mirror_t *)dnp->unitp);
1117 			break;
1118 		case MD_METATRANS:
1119 			meta_free_trans((md_trans_t *)dnp->unitp);
1120 			break;
1121 		case MD_METARAID:
1122 			meta_free_raid((md_raid_t *)dnp->unitp);
1123 			break;
1124 		case MD_METASP:
1125 			meta_free_sp((md_sp_t *)dnp->unitp);
1126 			break;
1127 		default:
1128 			assert(0);
1129 			break;
1130 		}
1131 		dnp->unitp = NULL;
1132 	}
1133 }
1134 
1135 /*
1136  * free metadevice name info
1137  */
1138 void
1139 meta_invalidate_name(
1140 	mdname_t	*namep
1141 )
1142 {
1143 	mddrivename_t	*dnp = namep->drivenamep;
1144 
1145 	/* get rid of cached name info */
1146 	if (namep->devicesname != NULL) {
1147 		Free(namep->devicesname);
1148 		namep->devicesname = NULL;
1149 	}
1150 	namep->key = MD_KEYBAD;
1151 	namep->start_blk = -1;
1152 	namep->end_blk = -1;
1153 
1154 	/* get rid of cached drivename info */
1155 	(void) memset(&dnp->geom, 0, sizeof (dnp->geom));
1156 	(void) memset(&dnp->cinfo, 0, sizeof (dnp->cinfo));
1157 	metafreevtoc(&dnp->vtoc);
1158 	metaflushsidenames(dnp);
1159 	dnp->side_names_key = MD_KEYBAD;
1160 	if (dnp->miscname != NULL) {
1161 		Free(dnp->miscname);
1162 		dnp->miscname = NULL;
1163 	}
1164 	meta_free_unit(dnp);
1165 }
1166 
1167 /*
1168  * get metadevice unit
1169  */
1170 md_common_t *
1171 meta_get_unit(
1172 	mdsetname_t	*sp,
1173 	mdname_t	*np,
1174 	md_error_t	*ep
1175 )
1176 {
1177 	char		*miscname;
1178 
1179 	/* short circuit */
1180 	if (np->drivenamep->unitp != NULL)
1181 		return (np->drivenamep->unitp);
1182 	if (metachkmeta(np, ep) != 0)
1183 		return (NULL);
1184 
1185 	/* dispatch */
1186 	if ((miscname = metagetmiscname(np, ep)) == NULL)
1187 		return (NULL);
1188 	else if (strcmp(miscname, MD_STRIPE) == 0)
1189 		return ((md_common_t *)meta_get_stripe(sp, np, ep));
1190 	else if (strcmp(miscname, MD_MIRROR) == 0)
1191 		return ((md_common_t *)meta_get_mirror(sp, np, ep));
1192 	else if (strcmp(miscname, MD_TRANS) == 0)
1193 		return ((md_common_t *)meta_get_trans(sp, np, ep));
1194 	else if (strcmp(miscname, MD_RAID) == 0)
1195 		return ((md_common_t *)meta_get_raid(sp, np, ep));
1196 	else if (strcmp(miscname, MD_SP) == 0)
1197 		return ((md_common_t *)meta_get_sp(sp, np, ep));
1198 	else {
1199 		(void) mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
1200 		    np->cname);
1201 		return (NULL);
1202 	}
1203 }
1204 
1205 
1206 int
1207 meta_isopen(
1208 	mdsetname_t	*sp,
1209 	mdname_t	*np,
1210 	md_error_t	*ep,
1211 	mdcmdopts_t	options
1212 )
1213 {
1214 	md_isopen_t	d;
1215 
1216 	if (metachkmeta(np, ep) != 0)
1217 		return (-1);
1218 
1219 	(void) memset(&d, '\0', sizeof (d));
1220 	d.dev = np->dev;
1221 	if (metaioctl(MD_IOCISOPEN, &d, &d.mde, np->cname) != 0)
1222 		return (mdstealerror(ep, &d.mde));
1223 
1224 	/*
1225 	 * shortcut: if the device is open, no need to check on other nodes,
1226 	 * even in case of a mn metadevice
1227 	 * Also return in case we're told not to check on other nodes.
1228 	 */
1229 	if ((d.isopen != 0) || ((options & MDCMD_MN_OPEN_CHECK) == 0)) {
1230 		return (d.isopen);
1231 	}
1232 
1233 	/*
1234 	 * If the device is closed locally, but it's a mn device,
1235 	 * check on all other nodes, too
1236 	 */
1237 	if (sp->setno != MD_LOCAL_SET) {
1238 		(void) metaget_setdesc(sp, ep); /* not supposed to fail */
1239 		if (sp->setdesc->sd_flags & MD_SR_MN) {
1240 			int		err = 0;
1241 			md_mn_result_t *resp;
1242 			/*
1243 			 * This message is never directly issued.
1244 			 * So we launch it with a suspend override flag.
1245 			 * If the commd is suspended, and this message comes
1246 			 * along it must be sent due to replaying a metainit or
1247 			 * similar. In that case we don't want this message to
1248 			 * be blocked.
1249 			 * If the commd is not suspended, the flag does no harm.
1250 			 * Additionally we don't want the result of the message
1251 			 * cached in the MCT, because we want uptodate results,
1252 			 * and the message doesn't need being logged either.
1253 			 * Hence NO_LOG and NO_MCT
1254 			 */
1255 			err = mdmn_send_message(
1256 				sp->setno,
1257 				MD_MN_MSG_CLU_CHECK,
1258 				MD_MSGF_NO_MCT | MD_MSGF_STOP_ON_ERROR |
1259 				MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND,
1260 				(char *)&d, sizeof (md_isopen_t),
1261 				&resp, ep);
1262 			if (err == 0) {
1263 				d.isopen = resp->mmr_exitval;
1264 			} else {
1265 				/*
1266 				 * in case some error occurred,
1267 				 * we better say the device is open
1268 				 */
1269 				d.isopen = 1;
1270 			}
1271 			if (resp != (md_mn_result_t *)NULL) {
1272 				free_result(resp);
1273 			}
1274 
1275 		}
1276 	}
1277 
1278 	return (d.isopen);
1279 }
1280