xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c (revision 35366b936dd27e7a780ce1c1fccdf6e3c3defe69)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <dlfcn.h>
28 #include <meta.h>
29 #include <metadyn.h>
30 #include <ctype.h>
31 #include <dirent.h>
32 #include <devid.h>
33 #include <sys/param.h>
34 #include <sys/scsi/impl/uscsi.h>
35 #include <sys/scsi/generic/commands.h>
36 #include <sys/scsi/generic/inquiry.h>
37 #include <sys/efi_partition.h>
38 
39 typedef struct ctlr_cache {
40 	char			*ctlr_nm;
41 	int			ctlr_ty;
42 	struct	ctlr_cache	*ctlr_nx;
43 } ctlr_cache_t;
44 
45 static	ctlr_cache_t	*ctlr_cache = NULL;
46 
47 
48 /*
49  * return set for a device
50  */
51 mdsetname_t *
52 metagetset(
53 	mdname_t	*np,
54 	int		bypass_daemon,
55 	md_error_t	*ep
56 )
57 {
58 	mdsetname_t	*sp;
59 
60 	/* metadevice */
61 	if (metaismeta(np))
62 		return (metasetnosetname(MD_MIN2SET(meta_getminor(np->dev)),
63 		    ep));
64 
65 	/* regular device */
66 	if (meta_is_drive_in_anyset(np->drivenamep, &sp, bypass_daemon,
67 	    ep) != 0)
68 		return (NULL);
69 
70 	if (sp != NULL)
71 		return (sp);
72 
73 	return (metasetnosetname(MD_LOCAL_SET, ep));
74 }
75 
76 /*
77  * convert system to md types
78  */
79 static void
80 meta_geom_to_md(
81 	struct dk_geom	*gp,
82 	mdgeom_t	*mdgp
83 )
84 {
85 	(void) memset(mdgp, '\0', sizeof (*mdgp));
86 	mdgp->ncyl = gp->dkg_ncyl;
87 	mdgp->nhead = gp->dkg_nhead;
88 	mdgp->nsect = gp->dkg_nsect;
89 	mdgp->rpm = gp->dkg_rpm;
90 	mdgp->write_reinstruct = gp->dkg_write_reinstruct;
91 	mdgp->read_reinstruct = gp->dkg_read_reinstruct;
92 	mdgp->blk_sz = DEV_BSIZE;
93 }
94 
95 /*
96  * convert efi to md types
97  */
98 static void
99 meta_efi_to_mdgeom(struct dk_gpt *gpt, mdgeom_t	*mdgp)
100 {
101 	(void) memset(mdgp, '\0', sizeof (*mdgp));
102 	mdgp->ncyl = (gpt->efi_last_u_lba - gpt->efi_first_u_lba) /
103 	    (MD_EFI_FG_HEADS * MD_EFI_FG_SECTORS);
104 	mdgp->nhead = MD_EFI_FG_HEADS;
105 	mdgp->nsect = MD_EFI_FG_SECTORS;
106 	mdgp->rpm = MD_EFI_FG_RPM;
107 	mdgp->write_reinstruct = MD_EFI_FG_WRI;
108 	mdgp->read_reinstruct = MD_EFI_FG_RRI;
109 	mdgp->blk_sz = DEV_BSIZE;
110 }
111 
112 static void
113 meta_efi_to_mdvtoc(struct dk_gpt *gpt, mdvtoc_t *mdvp)
114 {
115 	char		typename[EFI_PART_NAME_LEN];
116 	uint_t		i;
117 
118 	(void) memset(mdvp, '\0', sizeof (*mdvp));
119 	mdvp->nparts = gpt->efi_nparts;
120 	if (mdvp->nparts > MD_MAX_PARTS)
121 		return;
122 
123 	mdvp->first_lba = gpt->efi_first_u_lba;
124 	mdvp->last_lba = gpt->efi_last_u_lba;
125 	mdvp->lbasize = gpt->efi_lbasize;
126 
127 	for (i = 0; (i < gpt->efi_nparts); ++i) {
128 		mdvp->parts[i].start = gpt->efi_parts[i].p_start;
129 		mdvp->parts[i].size = gpt->efi_parts[i].p_size;
130 		mdvp->parts[i].tag = gpt->efi_parts[i].p_tag;
131 		mdvp->parts[i].flag = gpt->efi_parts[i].p_flag;
132 		/*
133 		 * It is possible to present an efi label but be using vtoc
134 		 * disks to create a > 1 TB metadevice.  In case the first
135 		 * disk in the underlying metadevice is a vtoc disk and starts
136 		 * at the beginning of the disk it is necessary to convey this
137 		 * information to the user.
138 		 */
139 		if (mdvp->parts[i].size > 0 &&
140 		    mdvp->parts[i].start != 0 && mdvp->nparts == 1) {
141 			mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
142 			mdvp->parts[i].start = 0;
143 		}
144 
145 		/*
146 		 * Due to the lack of a label for the entire partition table,
147 		 * we use p_name of the reserved partition
148 		 */
149 		if ((gpt->efi_parts[i].p_tag == V_RESERVED) &&
150 		    (gpt->efi_parts[i].p_name != NULL)) {
151 			(void) strlcpy(typename, gpt->efi_parts[i].p_name,
152 			    EFI_PART_NAME_LEN);
153 			/* Stop at first (if any) space or tab */
154 			(void) strtok(typename, " \t");
155 			mdvp->typename = Strdup(typename);
156 		}
157 	}
158 }
159 
160 static void
161 meta_mdvtoc_to_efi(mdvtoc_t *mdvp, struct dk_gpt **gpt)
162 {
163 	uint_t		i;
164 	uint_t		lastpart;
165 	size_t		size;
166 
167 	/* first we count how many partitions we have to send */
168 	for (i = 0; i < MD_MAX_PARTS; i++) {
169 		if ((mdvp->parts[i].start == 0) &&
170 		    (mdvp->parts[i].size == 0) &&
171 		    (mdvp->parts[i].tag != V_RESERVED)) {
172 			continue;
173 		}
174 		/* if we are here, we know the partition is really used */
175 		lastpart = i;
176 	}
177 	size = sizeof (struct dk_gpt) + (sizeof (struct dk_part) * lastpart);
178 	*gpt = calloc(size, sizeof (char));
179 
180 	(*gpt)->efi_nparts = lastpart + 1;
181 	(*gpt)->efi_first_u_lba = mdvp->first_lba;
182 	(*gpt)->efi_last_u_lba = mdvp->last_lba;
183 	(*gpt)->efi_lbasize = mdvp->lbasize;
184 	for (i = 0; (i < (*gpt)->efi_nparts); ++i) {
185 		(*gpt)->efi_parts[i].p_start = mdvp->parts[i].start;
186 		(*gpt)->efi_parts[i].p_size = mdvp->parts[i].size;
187 		(*gpt)->efi_parts[i].p_tag = mdvp->parts[i].tag;
188 		(*gpt)->efi_parts[i].p_flag = mdvp->parts[i].flag;
189 		/*
190 		 * Due to the lack of a label for the entire partition table,
191 		 * we use p_name of the reserved partition
192 		 */
193 		if (((*gpt)->efi_parts[i].p_tag == V_RESERVED) &&
194 		    (mdvp->typename != NULL)) {
195 			(void) strlcpy((*gpt)->efi_parts[i].p_name,
196 			    mdvp->typename, EFI_PART_NAME_LEN);
197 		}
198 	}
199 }
200 
201 
202 void
203 ctlr_cache_add(char *nm, int ty)
204 {
205 	ctlr_cache_t	**ccpp;
206 
207 	for (ccpp = &ctlr_cache; *ccpp != NULL; ccpp = &(*ccpp)->ctlr_nx)
208 		if (strcmp((*ccpp)->ctlr_nm, nm) == 0)
209 			return;
210 
211 	*ccpp = Zalloc(sizeof (ctlr_cache_t));
212 	(*ccpp)->ctlr_nm = Strdup(nm);
213 	(*ccpp)->ctlr_ty = ty;
214 }
215 
216 int
217 ctlr_cache_look(char *nm)
218 {
219 	ctlr_cache_t	*tcp;
220 
221 	for (tcp = ctlr_cache; tcp != NULL; tcp = tcp->ctlr_nx)
222 		if (strcmp(tcp->ctlr_nm, nm) == 0)
223 			return (tcp->ctlr_ty);
224 
225 	return (-1);
226 }
227 
228 
229 void
230 metaflushctlrcache(void)
231 {
232 	ctlr_cache_t	*cp, *np;
233 
234 	for (cp = ctlr_cache, np = NULL; cp != NULL; cp = np) {
235 		np = cp->ctlr_nx;
236 		Free(cp->ctlr_nm);
237 		Free(cp);
238 	}
239 	ctlr_cache = NULL;
240 }
241 
242 /*
243  * getdrvnode -- return the driver name based on mdname_t->bname
244  *	Need to free pointer when finished.
245  */
246 char *
247 getdrvnode(mdname_t *np, md_error_t *ep)
248 {
249 	char	*devicespath;
250 	char	*drvnode;
251 	char	*cp;
252 
253 	if ((devicespath = metagetdevicesname(np, ep)) == NULL)
254 		return (NULL);
255 
256 	/*
257 	 * At this point devicespath should be like the following
258 	 * "/devices/<unknow_and_dont_care>/xxxx@vvvv"
259 	 *
260 	 * There's a couple of 'if' statements below which could
261 	 * return an error condition, but I've decide to allow
262 	 * a more open approach regarding the mapping so as to
263 	 * not restrict possible future projects.
264 	 */
265 	if (drvnode = strrchr(devicespath, '/'))
266 		/*
267 		 * drvnode now just "xxxx@vvvv"
268 		 */
269 		drvnode++;
270 
271 	if (cp = strrchr(drvnode, '@'))
272 		/*
273 		 * Now drvnode is just the driver name "xxxx"
274 		 */
275 		*cp = '\0';
276 
277 	cp = Strdup(drvnode);
278 	Free(devicespath);
279 	np->devicesname = NULL;
280 
281 	return (cp);
282 }
283 
284 /*
285  * meta_load_dl -- open dynamic library using LDLIBRARYPATH, a debug
286  *    environment variable METALDPATH, or the default location.
287  */
288 static void *
289 meta_load_dl(mdname_t *np, md_error_t *ep)
290 {
291 	char	*drvnode;
292 	char	newpath[MAXPATHLEN];
293 	char	*p;
294 	void	*cookie;
295 
296 	if ((drvnode = getdrvnode(np, ep)) != NULL) {
297 
298 		/*
299 		 * Library seach algorithm:
300 		 * 1) Use LDLIBRARYPATH which is implied when a non-absolute
301 		 *    path name is passed to dlopen()
302 		 * 2) Use the value of METALDPATH as the directory. Mainly
303 		 *    used for debugging
304 		 * 3) Last search the default location of "/usr/lib"
305 		 */
306 		(void) snprintf(newpath, sizeof (newpath), "lib%s.so.1",
307 		    drvnode);
308 		if ((cookie = dlopen(newpath, RTLD_LAZY)) == NULL) {
309 			if ((p = getenv("METALDPATH")) == NULL)
310 				p = METALDPATH_DEFAULT;
311 			(void) snprintf(newpath, sizeof (newpath),
312 			    "%s/lib%s.so.1", p, drvnode);
313 			Free(drvnode);
314 			if ((cookie = dlopen(newpath, RTLD_LAZY)) != NULL) {
315 				/*
316 				 * Common failure here would be failing to
317 				 * find a libXX.so.1 such as libsd.so.1
318 				 * Some controllers will not have a library
319 				 * because there's no enclosure or name
320 				 * translation required.
321 				 */
322 				return (cookie);
323 			}
324 		} else {
325 			Free(drvnode);
326 			return (cookie);
327 		}
328 	}
329 	return (NULL);
330 }
331 
332 /*
333  * meta_match_names -- possibly convert the driver names returned by CINFO
334  */
335 static void
336 meta_match_names(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
337     md_error_t *ep)
338 {
339 	void		*cookie;
340 	meta_convert_e	((*fptr)(mdname_t *, struct dk_cinfo *, mdcinfo_t *,
341 	    md_error_t *));
342 
343 	if ((cookie = meta_load_dl(np, ep)) != NULL) {
344 		fptr = (meta_convert_e (*)(mdname_t *, struct dk_cinfo *,
345 		    mdcinfo_t *, md_error_t *))dlsym(cookie, "convert_path");
346 		if (fptr != NULL)
347 			(void) (*fptr)(np, cp, mdcp, ep);
348 		(void) dlclose(cookie);
349 	}
350 }
351 
352 /*
353  * meta_match_enclosure -- return any enclosure info if found
354  */
355 int
356 meta_match_enclosure(mdname_t *np, mdcinfo_t *mdcp, md_error_t *ep)
357 {
358 	meta_enclosure_e	e;
359 	meta_enclosure_e	((*fptr)(mdname_t *, mdcinfo_t *,
360 	    md_error_t *));
361 	void			*cookie;
362 
363 	if ((cookie = meta_load_dl(np, ep)) != NULL) {
364 		fptr = (meta_enclosure_e (*)(mdname_t *, mdcinfo_t *,
365 		    md_error_t *))dlsym(cookie, "get_enclosure");
366 		if (fptr != NULL) {
367 			e = (*fptr)(np, mdcp, ep);
368 			switch (e) {
369 			case Enclosure_Error:
370 				/*
371 				 * Looks like this library wanted to handle
372 				 * our device and had an internal error.
373 				 */
374 				return (1);
375 
376 			case Enclosure_Okay:
377 				/*
378 				 * Found a library to handle the request so
379 				 * just return with data provided.
380 				 */
381 				return (0);
382 
383 			case Enclosure_Noop:
384 				/*
385 				 * Need to continue the search
386 				 */
387 				break;
388 			}
389 		}
390 		(void) dlclose(cookie);
391 	}
392 	return (0);
393 }
394 
395 static int
396 meta_cinfo_to_md(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
397     md_error_t *ep)
398 {
399 	/* default */
400 	(void) memset(mdcp, '\0', sizeof (*mdcp));
401 	(void) strncpy(mdcp->cname, cp->dki_cname,
402 	    min((sizeof (mdcp->cname) - 1), sizeof (cp->dki_cname)));
403 	mdcp->ctype = MHD_CTLR_GENERIC;
404 	mdcp->cnum = cp->dki_cnum;
405 	(void) strncpy(mdcp->dname, cp->dki_dname,
406 	    min((sizeof (mdcp->dname) - 1), sizeof (cp->dki_dname)));
407 	mdcp->unit = cp->dki_unit;
408 	mdcp->maxtransfer = cp->dki_maxtransfer;
409 
410 	/*
411 	 * See if the driver name returned from DKIOCINFO
412 	 * is valid or not. In somecases, such as the ap_dmd
413 	 * driver, we need to modify the name that's return
414 	 * for everything to work.
415 	 */
416 	meta_match_names(np, cp, mdcp, ep);
417 
418 	if (meta_match_enclosure(np, mdcp, ep))
419 		return (-1);
420 
421 	/* return success */
422 	return (0);
423 }
424 
425 static void
426 meta_vtoc_to_md(
427 	struct extvtoc	*vp,
428 	mdvtoc_t	*mdvp
429 )
430 {
431 	char		typename[sizeof (vp->v_asciilabel) + 1];
432 	uint_t		i;
433 
434 	(void) memset(mdvp, '\0', sizeof (*mdvp));
435 	(void) strncpy(typename, vp->v_asciilabel,
436 	    sizeof (vp->v_asciilabel));
437 	typename[sizeof (typename) - 1] = '\0';
438 	for (i = 0; ((i < sizeof (typename)) && (typename[i] != '\0')); ++i) {
439 		if ((typename[i] == ' ') || (typename[i] == '\t')) {
440 			typename[i] = '\0';
441 			break;
442 		}
443 	}
444 	mdvp->typename = Strdup(typename);
445 	mdvp->nparts = vp->v_nparts;
446 	for (i = 0; (i < vp->v_nparts); ++i) {
447 		mdvp->parts[i].start = vp->v_part[i].p_start;
448 		mdvp->parts[i].size = vp->v_part[i].p_size;
449 		mdvp->parts[i].tag = vp->v_part[i].p_tag;
450 		mdvp->parts[i].flag = vp->v_part[i].p_flag;
451 		if (vp->v_part[i].p_start == 0 && vp->v_part[i].p_size > 0)
452 			mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
453 	}
454 }
455 
456 /*
457  * free allocations in vtoc
458  */
459 void
460 metafreevtoc(
461 	mdvtoc_t	*vtocp
462 )
463 {
464 	if (vtocp->typename != NULL)
465 		Free(vtocp->typename);
466 	(void) memset(vtocp, 0, sizeof (*vtocp));
467 }
468 
469 /*
470  * return md types
471  */
472 mdvtoc_t *
473 metagetvtoc(
474 	mdname_t	*np,	/* only rname, drivenamep, are setup */
475 	int		nocache,
476 	uint_t		*partnop,
477 	md_error_t	*ep
478 )
479 {
480 	mddrivename_t	*dnp = np->drivenamep;
481 	struct dk_geom	geom;
482 	char		*minor_name = NULL;
483 	char		*rname = np->rname;
484 	int		fd;
485 	int		partno;
486 	int		err = 0;	    /* saves errno from ioctl */
487 	ddi_devid_t	devid;
488 	char		*p;
489 
490 	/* short circuit */
491 	if ((! nocache) && (dnp->vtoc.nparts != 0)) {
492 		if (partnop != NULL) {
493 			/*
494 			 * the following assigment works because the
495 			 * mdname_t structs are always created as part
496 			 * of the drivenamep struct.  When a user
497 			 * creates an mdname_t struct it either
498 			 * uses an existing drivenamep struct or creates
499 			 * a new one and then adds the mdname_t struct
500 			 * as part of its parts_val array.  So what is
501 			 * being computed below is the slice offset in
502 			 * the parts_val array.
503 			 */
504 			*partnop = np - np->drivenamep->parts.parts_val;
505 			assert(*partnop < dnp->parts.parts_len);
506 		}
507 		return (&dnp->vtoc);
508 	}
509 
510 	/* can't get vtoc */
511 	if (! nocache) {
512 		switch (dnp->type) {
513 		case MDT_ACCES:
514 		case MDT_UNKNOWN:
515 			(void) mdsyserror(ep, dnp->errnum, rname);
516 			return (NULL);
517 		}
518 	}
519 
520 	/* get all the info */
521 	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
522 		(void) mdsyserror(ep, errno, rname);
523 		return (NULL);
524 	}
525 
526 	/*
527 	 * The disk is open so this is a good point to get the devid
528 	 * otherwise it will need to be done at another time which
529 	 * means reopening it.
530 	 */
531 	if (devid_get(fd, &devid) != 0) {
532 		/* there is no devid for the disk */
533 		if (((p = getenv("MD_DEBUG")) != NULL) &&
534 		    (strstr(p, "DEVID") != NULL)) {
535 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
536 			    "%s has no device id\n"), np->rname);
537 		}
538 		np->minor_name = (char *)NULL;
539 		dnp->devid = NULL;
540 	} else {
541 		(void) devid_get_minor_name(fd, &minor_name);
542 		/*
543 		 * The minor name could be NULL if the underlying
544 		 * device driver does not support 'minor names'.
545 		 * This means we do not use devid's for this device.
546 		 * SunCluster did driver does not support minor names.
547 		 */
548 		if (minor_name != NULL) {
549 			np->minor_name = Strdup(minor_name);
550 			devid_str_free(minor_name);
551 			dnp->devid = devid_str_encode(devid, NULL);
552 		} else {
553 			np->minor_name = (char *)NULL;
554 			dnp->devid = NULL;
555 
556 			if (((p = getenv("MD_DEBUG")) != NULL) &&
557 			    (strstr(p, "DEVID") != NULL)) {
558 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
559 				    "%s no minor name (no devid)\n"),
560 				    np->rname);
561 			}
562 		}
563 		devid_free(devid);
564 	}
565 
566 	/*
567 	 * if our drivenamep points to a device not supporting DKIOCGGEOM,
568 	 * it's likely to have an EFI label.
569 	 */
570 	(void) memset(&geom, 0, sizeof (geom));
571 	if (ioctl(fd, DKIOCGGEOM, &geom) != 0) {
572 		err = errno;
573 		if (err == ENOTTY) {
574 			(void) mddeverror(ep, MDE_NOT_DISK, NODEV, rname);
575 			(void) close(fd);
576 			return (NULL);
577 		} else if (err != ENOTSUP) {
578 			(void) mdsyserror(ep, err, rname);
579 			(void) close(fd);
580 			return (NULL);
581 		}
582 
583 	}
584 	/*
585 	 * If we are here, there was either no failure on DKIOCGGEOM or
586 	 * the failure was ENOTSUP
587 	 */
588 	if (err == ENOTSUP) {
589 		/* DKIOCGGEOM yielded ENOTSUP => try efi_alloc_and_read */
590 		struct dk_gpt	*gpt;
591 		int		save_errno;
592 
593 		/* this also sets errno */
594 		partno = efi_alloc_and_read(fd, &gpt);
595 		save_errno = errno;
596 		(void) close(fd);
597 		if (partno < 0) {
598 			efi_free(gpt);
599 			(void) mdsyserror(ep, save_errno, rname);
600 			return (NULL);
601 		}
602 		if (partno >= gpt->efi_nparts) {
603 			efi_free(gpt);
604 			(void) mddeverror(ep, MDE_INVALID_PART, NODEV64,
605 			    rname);
606 			return (NULL);
607 		}
608 
609 		/* convert to our format */
610 		metafreevtoc(&dnp->vtoc);
611 		meta_efi_to_mdvtoc(gpt, &dnp->vtoc);
612 		if (dnp->vtoc.nparts > MD_MAX_PARTS) {
613 			(void) mddeverror(ep, MDE_TOO_MANY_PARTS, NODEV64,
614 			    rname);
615 			return (NULL);
616 		}
617 		/*
618 		 * libmeta needs at least V_NUMPAR partitions.
619 		 * If we have an EFI partition with less than V_NUMPAR slices,
620 		 * we nevertheless reserve space for V_NUMPAR
621 		 */
622 
623 		if (dnp->vtoc.nparts < V_NUMPAR) {
624 			dnp->vtoc.nparts = V_NUMPAR;
625 		}
626 		meta_efi_to_mdgeom(gpt, &dnp->geom);
627 		efi_free(gpt);
628 	} else {
629 		/* no error on DKIOCGGEOM, try meta_getvtoc */
630 		struct extvtoc	vtoc;
631 
632 		if (meta_getvtoc(fd, np->cname, &vtoc, &partno, ep) < 0) {
633 			(void) close(fd);
634 			return (NULL);
635 		}
636 		(void) close(fd);
637 
638 		/* convert to our format */
639 		meta_geom_to_md(&geom, &dnp->geom);
640 		metafreevtoc(&dnp->vtoc);
641 		meta_vtoc_to_md(&vtoc, &dnp->vtoc);
642 	}
643 
644 	/* fix up any drives which are now accessible */
645 	if ((nocache) && (dnp->type == MDT_ACCES) &&
646 	    (dnp->vtoc.nparts == dnp->parts.parts_len)) {
647 		dnp->type = MDT_COMP;
648 		dnp->errnum = 0;
649 	}
650 
651 	/* save partno */
652 	assert(partno < dnp->vtoc.nparts);
653 	if (partnop != NULL)
654 		*partnop = partno;
655 
656 	/* return info */
657 	return (&dnp->vtoc);
658 }
659 
660 static void
661 meta_mdvtoc_to_vtoc(
662 	mdvtoc_t	*mdvp,
663 	struct extvtoc	*vp
664 )
665 {
666 	uint_t		i;
667 
668 	(void) memset(&vp->v_part, '\0', sizeof (vp->v_part));
669 	vp->v_nparts = (ushort_t)mdvp->nparts;
670 	for (i = 0; (i < mdvp->nparts); ++i) {
671 		vp->v_part[i].p_start = mdvp->parts[i].start;
672 		vp->v_part[i].p_size  = mdvp->parts[i].size;
673 		vp->v_part[i].p_tag   = mdvp->parts[i].tag;
674 		vp->v_part[i].p_flag  = mdvp->parts[i].flag;
675 	}
676 }
677 
678 /*
679  * Set the vtoc, but use the cached copy to get the info from.
680  * We write np->drivenamep->vtoc to disk.
681  * Before we can do this we read the vtoc in.
682  * if we're dealing with a metadevice and this metadevice is a 64 bit device
683  *	we can use meta_getmdvtoc/meta_setmdvtoc
684  * else
685  * 	we use meta_getvtoc/meta_setvtoc but than we first have to convert
686  *	dnp->vtoc (actually being a mdvtoc_t) into a vtoc_t
687  */
688 int
689 metasetvtoc(
690 	mdname_t	*np,
691 	md_error_t	*ep
692 )
693 {
694 	char		*rname = np->rname;
695 	mddrivename_t	*dnp = np->drivenamep;
696 	int		fd;
697 	int		err;
698 	int 		save_errno;
699 	struct dk_geom	geom;
700 
701 	if ((fd = open(rname, (O_RDONLY | O_NDELAY), 0)) < 0)
702 		return (mdsyserror(ep, errno, rname));
703 
704 	err = ioctl(fd, DKIOCGGEOM, &geom);
705 	save_errno = errno;
706 	if (err == 0) {
707 		struct extvtoc	vtoc;
708 
709 		if (meta_getvtoc(fd, np->cname, &vtoc, NULL, ep) < 0) {
710 			(void) close(fd);
711 			return (-1);
712 		}
713 
714 		meta_mdvtoc_to_vtoc(&dnp->vtoc, &vtoc);
715 
716 		if (meta_setvtoc(fd, np->cname, &vtoc, ep) < 0) {
717 			(void) close(fd);
718 			return (-1);
719 		}
720 	} else if (save_errno == ENOTSUP) {
721 		struct dk_gpt	*gpt;
722 		int		ret;
723 
724 		/* allocation of gpt is done in meta_mdvtoc_to_efi */
725 		meta_mdvtoc_to_efi(&dnp->vtoc, &gpt);
726 
727 		ret = efi_write(fd, gpt);
728 		save_errno = errno;
729 		free(gpt);
730 		if (ret != 0) {
731 			(void) close(fd);
732 			return (mdsyserror(ep, save_errno, rname));
733 		} else {
734 			(void) close(fd);
735 			return (0);
736 		}
737 
738 	} else {
739 		(void) close(fd);
740 		return (mdsyserror(ep, save_errno, rname));
741 	}
742 
743 	(void) close(fd);
744 
745 	return (0);
746 }
747 
748 mdgeom_t *
749 metagetgeom(
750 	mdname_t	*np,	/* only rname, drivenamep, are setup */
751 	md_error_t	*ep
752 )
753 {
754 	if (metagetvtoc(np, FALSE, NULL, ep) == NULL)
755 		return (NULL);
756 	return (&np->drivenamep->geom);
757 }
758 
759 mdcinfo_t *
760 metagetcinfo(
761 	mdname_t	*np,	/* only rname, drivenamep, are setup */
762 	md_error_t	*ep
763 )
764 {
765 	char			*rname = np->rname;
766 	mddrivename_t		*dnp = np->drivenamep;
767 	int			fd;
768 	struct dk_cinfo		cinfo;
769 
770 	/* short circuit */
771 	if (dnp->cinfo.cname[0] != '\0')
772 		return (&dnp->cinfo);
773 
774 	/* get controller info */
775 	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
776 		(void) mdsyserror(ep, errno, rname);
777 		return (NULL);
778 	}
779 	if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
780 		int	save = errno;
781 
782 		(void) close(fd);
783 		if (save == ENOTTY) {
784 			(void) mddeverror(ep, MDE_NOT_DISK, NODEV64, rname);
785 		} else {
786 			(void) mdsyserror(ep, save, rname);
787 		}
788 		return (NULL);
789 	}
790 	(void) close(fd);	/* sd/ssd bug */
791 
792 	/* convert to our format */
793 	if (meta_cinfo_to_md(np, &cinfo, &dnp->cinfo, ep) != 0)
794 		return (NULL);
795 
796 	/* return info */
797 	return (&dnp->cinfo);
798 }
799 
800 /*
801  * get partition number
802  */
803 int
804 metagetpartno(
805 	mdname_t	*np,
806 	md_error_t	*ep
807 )
808 {
809 	mdvtoc_t	*vtocp;
810 	uint_t		partno;
811 
812 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
813 		return (-1);
814 	assert(partno < vtocp->nparts);
815 	return (partno);
816 }
817 
818 /*
819  * get size of device
820  */
821 diskaddr_t
822 metagetsize(
823 	mdname_t	*np,
824 	md_error_t	*ep
825 )
826 {
827 	mdvtoc_t	*vtocp;
828 	uint_t		partno;
829 
830 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
831 		return (MD_DISKADDR_ERROR);
832 	assert(partno < vtocp->nparts);
833 	return (vtocp->parts[partno].size);
834 }
835 
836 /*
837  * get label of device
838  */
839 diskaddr_t
840 metagetlabel(
841 	mdname_t	*np,
842 	md_error_t	*ep
843 )
844 {
845 	mdvtoc_t	*vtocp;
846 	uint_t		partno;
847 
848 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
849 		return (MD_DISKADDR_ERROR);
850 	assert(partno < vtocp->nparts);
851 	return (vtocp->parts[partno].label);
852 }
853 
854 /*
855  * find out where database replicas end
856  */
857 static int
858 mddb_getendblk(
859 	mdsetname_t		*sp,
860 	mdname_t		*np,
861 	diskaddr_t		*endblkp,
862 	md_error_t		*ep
863 )
864 {
865 	md_replicalist_t	*rlp = NULL;
866 	md_replicalist_t	*rl;
867 
868 	/* make sure we have a component */
869 	*endblkp = 0;
870 	if (metaismeta(np))
871 		return (0);
872 
873 	/* get replicas, quit if none */
874 	if (metareplicalist(sp, MD_BASICNAME_OK | PRINT_FAST, &rlp, ep) < 0) {
875 		if (! mdismddberror(ep, MDE_DB_NODB))
876 			return (-1);
877 		mdclrerror(ep);
878 		return (0);
879 	} else if (rlp == NULL)
880 		return (0);
881 
882 	/* go through all the replicas */
883 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
884 		md_replica_t	*rp = rl->rl_repp;
885 		mdname_t	*repnamep = rp->r_namep;
886 		diskaddr_t	dbend;
887 
888 		if (np->dev != repnamep->dev)
889 			continue;
890 		dbend = rp->r_blkno + rp->r_nblk - 1;
891 		if (dbend > *endblkp)
892 			*endblkp = dbend;
893 	}
894 
895 	/* cleanup, return success */
896 	metafreereplicalist(rlp);
897 	return (0);
898 }
899 
900 /*
901  * return cached start block
902  */
903 static diskaddr_t
904 metagetend(
905 	mdsetname_t	*sp,
906 	mdname_t	*np,
907 	md_error_t	*ep
908 )
909 {
910 	diskaddr_t	end_blk = MD_DISKADDR_ERROR;
911 
912 	/* short circuit */
913 	if (np->end_blk != MD_DISKADDR_ERROR)
914 		return (np->end_blk);
915 
916 	/* look for database locations */
917 	if (mddb_getendblk(sp, np, &end_blk, ep) != 0)
918 		return (MD_DISKADDR_ERROR);
919 
920 	/* success */
921 	np->end_blk = end_blk;
922 	return (end_blk);
923 }
924 
925 /*
926  * does device have a metadb
927  */
928 int
929 metahasmddb(
930 	mdsetname_t	*sp,
931 	mdname_t	*np,
932 	md_error_t	*ep
933 )
934 {
935 	if (metagetend(sp, np, ep) == MD_DISKADDR_ERROR)
936 		return (-1);
937 	else if (np->end_blk > 0)
938 		return (1);
939 	else
940 		return (0);
941 }
942 
943 /*
944  * return cached start block
945  */
946 diskaddr_t
947 metagetstart(
948 	mdsetname_t	*sp,
949 	mdname_t	*np,
950 	md_error_t	*ep
951 )
952 {
953 	diskaddr_t	start_blk = MD_DISKADDR_ERROR;
954 
955 	/* short circuit */
956 	if (np->start_blk != MD_DISKADDR_ERROR)
957 		return (np->start_blk);
958 
959 	/* look for database locations */
960 	if ((start_blk = metagetend(sp, np, ep)) == MD_DISKADDR_ERROR)
961 		return (MD_DISKADDR_ERROR);
962 
963 	/* check for label */
964 	if (start_blk == 0) {
965 		start_blk = metagetlabel(np, ep);
966 		if (start_blk == MD_DISKADDR_ERROR) {
967 			return (MD_DISKADDR_ERROR);
968 		}
969 	}
970 
971 	/* roundup to next cylinder */
972 	if (start_blk != 0) {
973 		mdgeom_t	*geomp;
974 
975 		if ((geomp = metagetgeom(np, ep)) == NULL)
976 			return (MD_DISKADDR_ERROR);
977 		start_blk = roundup(start_blk, (geomp->nhead * geomp->nsect));
978 	}
979 
980 	/* success */
981 	np->start_blk = start_blk;
982 	return (start_blk);
983 }
984 
985 /*
986  * return cached devices name
987  */
988 char *
989 metagetdevicesname(
990 	mdname_t	*np,
991 	md_error_t	*ep
992 )
993 {
994 	char		path[MAXPATHLEN + 1];
995 	int		len;
996 
997 	/* short circuit */
998 	if (np->devicesname != NULL)
999 		return (np->devicesname);
1000 
1001 	/* follow symlink */
1002 	if ((len = readlink(np->bname, path, (sizeof (path) - 1))) < 0) {
1003 		(void) mdsyserror(ep, errno, np->bname);
1004 		return (NULL);
1005 	} else if (len >= sizeof (path)) {
1006 		(void) mdsyserror(ep, ENAMETOOLONG, np->bname);
1007 		return (NULL);
1008 	}
1009 	path[len] = '\0';
1010 	if ((len = strfind(path, "/devices/")) < 0) {
1011 		(void) mddeverror(ep, MDE_DEVICES_NAME, np->dev, np->bname);
1012 		return (NULL);
1013 	}
1014 
1015 	/* return name */
1016 	np->devicesname = Strdup(path + len + strlen("/devices"));
1017 	return (np->devicesname);
1018 }
1019 
1020 /*
1021  * get metadevice misc name
1022  */
1023 char *
1024 metagetmiscname(
1025 	mdname_t		*np,
1026 	md_error_t		*ep
1027 )
1028 {
1029 	mddrivename_t		*dnp = np->drivenamep;
1030 	md_i_driverinfo_t	mid;
1031 
1032 	/* short circuit */
1033 	if (dnp->miscname != NULL)
1034 		return (dnp->miscname);
1035 	if (metachkmeta(np, ep) != 0)
1036 		return (NULL);
1037 
1038 	/* get misc module from driver */
1039 	(void) memset(&mid, 0, sizeof (mid));
1040 	mid.mnum = meta_getminor(np->dev);
1041 	if (metaioctl(MD_IOCGET_DRVNM, &mid, &mid.mde, np->cname) != 0) {
1042 		(void) mdstealerror(ep, &mid.mde);
1043 		return (NULL);
1044 	}
1045 
1046 	/* return miscname */
1047 	dnp->miscname = Strdup(MD_PNTDRIVERNAME(&mid));
1048 	return (dnp->miscname);
1049 }
1050 
1051 /*
1052  * get unit structure from driver
1053  */
1054 md_unit_t *
1055 meta_get_mdunit(
1056 	mdsetname_t	*sp,
1057 	mdname_t	*np,
1058 	md_error_t	*ep
1059 )
1060 {
1061 	md_i_get_t	mig;
1062 	char		*miscname = NULL;
1063 
1064 	/* should have a set */
1065 	assert(sp != NULL);
1066 	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
1067 
1068 	/* get size of unit structure */
1069 	if (metachkmeta(np, ep) != 0)
1070 		return (NULL);
1071 	if ((miscname = metagetmiscname(np, ep)) == NULL)
1072 		return (NULL);
1073 	(void) memset(&mig, '\0', sizeof (mig));
1074 	MD_SETDRIVERNAME(&mig, miscname, sp->setno);
1075 	mig.id = meta_getminor(np->dev);
1076 	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
1077 		(void) mdstealerror(ep, &mig.mde);
1078 		return (NULL);
1079 	}
1080 
1081 	/* get actual unit structure */
1082 	assert(mig.size > 0);
1083 	mig.mdp = (uintptr_t)Zalloc(mig.size);
1084 	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
1085 		(void) mdstealerror(ep, &mig.mde);
1086 		Free((void *)(uintptr_t)mig.mdp);
1087 		return (NULL);
1088 	}
1089 
1090 	return ((md_unit_t *)(uintptr_t)mig.mdp);
1091 }
1092 
1093 /*
1094  * free metadevice unit
1095  */
1096 void
1097 meta_free_unit(
1098 	mddrivename_t	*dnp
1099 )
1100 {
1101 	if (dnp->unitp != NULL) {
1102 		switch (dnp->unitp->type) {
1103 		case MD_DEVICE:
1104 			meta_free_stripe((md_stripe_t *)dnp->unitp);
1105 			break;
1106 		case MD_METAMIRROR:
1107 			meta_free_mirror((md_mirror_t *)dnp->unitp);
1108 			break;
1109 		case MD_METATRANS:
1110 			meta_free_trans((md_trans_t *)dnp->unitp);
1111 			break;
1112 		case MD_METARAID:
1113 			meta_free_raid((md_raid_t *)dnp->unitp);
1114 			break;
1115 		case MD_METASP:
1116 			meta_free_sp((md_sp_t *)dnp->unitp);
1117 			break;
1118 		default:
1119 			assert(0);
1120 			break;
1121 		}
1122 		dnp->unitp = NULL;
1123 	}
1124 }
1125 
1126 /*
1127  * free metadevice name info
1128  */
1129 void
1130 meta_invalidate_name(
1131 	mdname_t	*namep
1132 )
1133 {
1134 	mddrivename_t	*dnp = namep->drivenamep;
1135 
1136 	/* get rid of cached name info */
1137 	if (namep->devicesname != NULL) {
1138 		Free(namep->devicesname);
1139 		namep->devicesname = NULL;
1140 	}
1141 	namep->key = MD_KEYBAD;
1142 	namep->start_blk = -1;
1143 	namep->end_blk = -1;
1144 
1145 	/* get rid of cached drivename info */
1146 	(void) memset(&dnp->geom, 0, sizeof (dnp->geom));
1147 	(void) memset(&dnp->cinfo, 0, sizeof (dnp->cinfo));
1148 	metafreevtoc(&dnp->vtoc);
1149 	metaflushsidenames(dnp);
1150 	dnp->side_names_key = MD_KEYBAD;
1151 	if (dnp->miscname != NULL) {
1152 		Free(dnp->miscname);
1153 		dnp->miscname = NULL;
1154 	}
1155 	meta_free_unit(dnp);
1156 }
1157 
1158 /*
1159  * get metadevice unit
1160  */
1161 md_common_t *
1162 meta_get_unit(
1163 	mdsetname_t	*sp,
1164 	mdname_t	*np,
1165 	md_error_t	*ep
1166 )
1167 {
1168 	char		*miscname;
1169 
1170 	/* short circuit */
1171 	if (np->drivenamep->unitp != NULL)
1172 		return (np->drivenamep->unitp);
1173 	if (metachkmeta(np, ep) != 0)
1174 		return (NULL);
1175 
1176 	/* dispatch */
1177 	if ((miscname = metagetmiscname(np, ep)) == NULL)
1178 		return (NULL);
1179 	else if (strcmp(miscname, MD_STRIPE) == 0)
1180 		return ((md_common_t *)meta_get_stripe(sp, np, ep));
1181 	else if (strcmp(miscname, MD_MIRROR) == 0)
1182 		return ((md_common_t *)meta_get_mirror(sp, np, ep));
1183 	else if (strcmp(miscname, MD_TRANS) == 0)
1184 		return ((md_common_t *)meta_get_trans(sp, np, ep));
1185 	else if (strcmp(miscname, MD_RAID) == 0)
1186 		return ((md_common_t *)meta_get_raid(sp, np, ep));
1187 	else if (strcmp(miscname, MD_SP) == 0)
1188 		return ((md_common_t *)meta_get_sp(sp, np, ep));
1189 	else {
1190 		(void) mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
1191 		    np->cname);
1192 		return (NULL);
1193 	}
1194 }
1195 
1196 
1197 int
1198 meta_isopen(
1199 	mdsetname_t	*sp,
1200 	mdname_t	*np,
1201 	md_error_t	*ep,
1202 	mdcmdopts_t	options
1203 )
1204 {
1205 	md_isopen_t	d;
1206 
1207 	if (metachkmeta(np, ep) != 0)
1208 		return (-1);
1209 
1210 	(void) memset(&d, '\0', sizeof (d));
1211 	d.dev = np->dev;
1212 	if (metaioctl(MD_IOCISOPEN, &d, &d.mde, np->cname) != 0)
1213 		return (mdstealerror(ep, &d.mde));
1214 
1215 	/*
1216 	 * shortcut: if the device is open, no need to check on other nodes,
1217 	 * even in case of a mn metadevice
1218 	 * Also return in case we're told not to check on other nodes.
1219 	 */
1220 	if ((d.isopen != 0) || ((options & MDCMD_MN_OPEN_CHECK) == 0)) {
1221 		return (d.isopen);
1222 	}
1223 
1224 	/*
1225 	 * If the device is closed locally, but it's a mn device,
1226 	 * check on all other nodes, too
1227 	 */
1228 	if (sp->setno != MD_LOCAL_SET) {
1229 		(void) metaget_setdesc(sp, ep); /* not supposed to fail */
1230 		if (sp->setdesc->sd_flags & MD_SR_MN) {
1231 			int		err = 0;
1232 			md_mn_result_t *resp;
1233 			/*
1234 			 * This message is never directly issued.
1235 			 * So we launch it with a suspend override flag.
1236 			 * If the commd is suspended, and this message comes
1237 			 * along it must be sent due to replaying a metainit or
1238 			 * similar. In that case we don't want this message to
1239 			 * be blocked.
1240 			 * If the commd is not suspended, the flag does no harm.
1241 			 * Additionally we don't want the result of the message
1242 			 * cached in the MCT, because we want uptodate results,
1243 			 * and the message doesn't need being logged either.
1244 			 * Hence NO_LOG and NO_MCT
1245 			 */
1246 			err = mdmn_send_message(sp->setno,
1247 			    MD_MN_MSG_CLU_CHECK, MD_MSGF_NO_MCT |
1248 			    MD_MSGF_STOP_ON_ERROR | MD_MSGF_NO_LOG |
1249 			    MD_MSGF_OVERRIDE_SUSPEND, 0, (char *)&d,
1250 			    sizeof (md_isopen_t), &resp, ep);
1251 			if (err == 0) {
1252 				d.isopen = resp->mmr_exitval;
1253 			} else {
1254 				/*
1255 				 * in case some error occurred,
1256 				 * we better say the device is open
1257 				 */
1258 				d.isopen = 1;
1259 			}
1260 			if (resp != (md_mn_result_t *)NULL) {
1261 				free_result(resp);
1262 			}
1263 
1264 		}
1265 	}
1266 
1267 	return (d.isopen);
1268 }
1269