xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_nameinfo.c (revision 03494a9880d80f834bec10a1e8f0a2f8f7c97bf4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <dlfcn.h>
28 #include <meta.h>
29 #include <metadyn.h>
30 #include <ctype.h>
31 #include <dirent.h>
32 #include <devid.h>
33 #include <sys/param.h>
34 #include <sys/scsi/impl/uscsi.h>
35 #include <sys/scsi/generic/commands.h>
36 #include <sys/scsi/generic/inquiry.h>
37 #include <sys/efi_partition.h>
38 
39 typedef struct ctlr_cache {
40 	char			*ctlr_nm;
41 	int			ctlr_ty;
42 	struct	ctlr_cache	*ctlr_nx;
43 } ctlr_cache_t;
44 
45 static	ctlr_cache_t	*ctlr_cache = NULL;
46 
47 
48 /*
49  * return set for a device
50  */
51 mdsetname_t *
52 metagetset(
53 	mdname_t	*np,
54 	int		bypass_daemon,
55 	md_error_t	*ep
56 )
57 {
58 	mdsetname_t	*sp;
59 
60 	/* metadevice */
61 	if (metaismeta(np))
62 		return (metasetnosetname(MD_MIN2SET(meta_getminor(np->dev)),
63 		    ep));
64 
65 	/* regular device */
66 	if (meta_is_drive_in_anyset(np->drivenamep, &sp, bypass_daemon,
67 	    ep) != 0)
68 		return (NULL);
69 
70 	if (sp != NULL)
71 		return (sp);
72 
73 	return (metasetnosetname(MD_LOCAL_SET, ep));
74 }
75 
76 /*
77  * convert system to md types
78  */
79 static void
80 meta_geom_to_md(
81 	struct dk_geom	*gp,
82 	mdgeom_t	*mdgp
83 )
84 {
85 	(void) memset(mdgp, '\0', sizeof (*mdgp));
86 	mdgp->ncyl = gp->dkg_ncyl;
87 	mdgp->nhead = gp->dkg_nhead;
88 	mdgp->nsect = gp->dkg_nsect;
89 	mdgp->rpm = gp->dkg_rpm;
90 	mdgp->write_reinstruct = gp->dkg_write_reinstruct;
91 	mdgp->read_reinstruct = gp->dkg_read_reinstruct;
92 	mdgp->blk_sz = DEV_BSIZE;
93 }
94 
95 /*
96  * convert efi to md types
97  */
98 static void
99 meta_efi_to_mdgeom(struct dk_gpt *gpt, mdgeom_t	*mdgp)
100 {
101 	(void) memset(mdgp, '\0', sizeof (*mdgp));
102 	mdgp->ncyl = (gpt->efi_last_u_lba - gpt->efi_first_u_lba) /
103 	    (MD_EFI_FG_HEADS * MD_EFI_FG_SECTORS);
104 	mdgp->nhead = MD_EFI_FG_HEADS;
105 	mdgp->nsect = MD_EFI_FG_SECTORS;
106 	mdgp->rpm = MD_EFI_FG_RPM;
107 	mdgp->write_reinstruct = MD_EFI_FG_WRI;
108 	mdgp->read_reinstruct = MD_EFI_FG_RRI;
109 	mdgp->blk_sz = DEV_BSIZE;
110 }
111 
112 static void
113 meta_efi_to_mdvtoc(struct dk_gpt *gpt, mdvtoc_t *mdvp)
114 {
115 	char		typename[EFI_PART_NAME_LEN];
116 	uint_t		i;
117 
118 	(void) memset(mdvp, '\0', sizeof (*mdvp));
119 	mdvp->nparts = gpt->efi_nparts;
120 	if (mdvp->nparts > MD_MAX_PARTS)
121 		return;
122 
123 	mdvp->first_lba = gpt->efi_first_u_lba;
124 	mdvp->last_lba = gpt->efi_last_u_lba;
125 	mdvp->lbasize = gpt->efi_lbasize;
126 
127 	for (i = 0; (i < gpt->efi_nparts); ++i) {
128 		mdvp->parts[i].start = gpt->efi_parts[i].p_start;
129 		mdvp->parts[i].size = gpt->efi_parts[i].p_size;
130 		mdvp->parts[i].tag = gpt->efi_parts[i].p_tag;
131 		mdvp->parts[i].flag = gpt->efi_parts[i].p_flag;
132 		/*
133 		 * It is possible to present an efi label but be using vtoc
134 		 * disks to create a > 1 TB metadevice.  In case the first
135 		 * disk in the underlying metadevice is a vtoc disk and starts
136 		 * at the beginning of the disk it is necessary to convey this
137 		 * information to the user.
138 		 */
139 		if (mdvp->parts[i].size > 0 &&
140 		    mdvp->parts[i].start != 0 && mdvp->nparts == 1) {
141 			mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
142 			mdvp->parts[i].start = 0;
143 		}
144 
145 		/*
146 		 * Due to the lack of a label for the entire partition table,
147 		 * we use p_name of the reserved partition
148 		 */
149 		if ((gpt->efi_parts[i].p_tag == V_RESERVED) &&
150 		    (gpt->efi_parts[i].p_name != NULL)) {
151 			(void) strlcpy(typename, gpt->efi_parts[i].p_name,
152 			    EFI_PART_NAME_LEN);
153 			/* Stop at first (if any) space or tab */
154 			(void) strtok(typename, " \t");
155 			mdvp->typename = Strdup(typename);
156 		}
157 	}
158 }
159 
160 static void
161 meta_mdvtoc_to_efi(mdvtoc_t *mdvp, struct dk_gpt **gpt)
162 {
163 	char		typename[EFI_PART_NAME_LEN];
164 	uint_t		i;
165 	uint_t		lastpart;
166 	size_t		size;
167 
168 	/* first we count how many partitions we have to send */
169 	for (i = 0; i < MD_MAX_PARTS; i++) {
170 		if ((mdvp->parts[i].start == 0) &&
171 		    (mdvp->parts[i].size == 0) &&
172 		    (mdvp->parts[i].tag != V_RESERVED)) {
173 			continue;
174 		}
175 		/* if we are here, we know the partition is really used */
176 		lastpart = i;
177 	}
178 	size = sizeof (struct dk_gpt) + (sizeof (struct dk_part) * lastpart);
179 	*gpt = calloc(size, sizeof (char));
180 
181 	(*gpt)->efi_nparts = lastpart + 1;
182 	(*gpt)->efi_first_u_lba = mdvp->first_lba;
183 	(*gpt)->efi_last_u_lba = mdvp->last_lba;
184 	(*gpt)->efi_lbasize = mdvp->lbasize;
185 	for (i = 0; (i < (*gpt)->efi_nparts); ++i) {
186 		(*gpt)->efi_parts[i].p_start = mdvp->parts[i].start;
187 		(*gpt)->efi_parts[i].p_size = mdvp->parts[i].size;
188 		(*gpt)->efi_parts[i].p_tag = mdvp->parts[i].tag;
189 		(*gpt)->efi_parts[i].p_flag = mdvp->parts[i].flag;
190 		/*
191 		 * Due to the lack of a label for the entire partition table,
192 		 * we use p_name of the reserved partition
193 		 */
194 		if (((*gpt)->efi_parts[i].p_tag == V_RESERVED) &&
195 		    (mdvp->typename != NULL)) {
196 			(void) strlcpy((*gpt)->efi_parts[i].p_name, typename,
197 			    EFI_PART_NAME_LEN);
198 		}
199 	}
200 }
201 
202 
203 void
204 ctlr_cache_add(char *nm, int ty)
205 {
206 	ctlr_cache_t	**ccpp;
207 
208 	for (ccpp = &ctlr_cache; *ccpp != NULL; ccpp = &(*ccpp)->ctlr_nx)
209 		if (strcmp((*ccpp)->ctlr_nm, nm) == 0)
210 			return;
211 
212 	*ccpp = Zalloc(sizeof (ctlr_cache_t));
213 	(*ccpp)->ctlr_nm = Strdup(nm);
214 	(*ccpp)->ctlr_ty = ty;
215 }
216 
217 int
218 ctlr_cache_look(char *nm)
219 {
220 	ctlr_cache_t	*tcp;
221 
222 	for (tcp = ctlr_cache; tcp != NULL; tcp = tcp->ctlr_nx)
223 		if (strcmp(tcp->ctlr_nm, nm) == 0)
224 			return (tcp->ctlr_ty);
225 
226 	return (-1);
227 }
228 
229 
230 void
231 metaflushctlrcache(void)
232 {
233 	ctlr_cache_t	*cp, *np;
234 
235 	for (cp = ctlr_cache, np = NULL; cp != NULL; cp = np) {
236 		np = cp->ctlr_nx;
237 		Free(cp->ctlr_nm);
238 		Free(cp);
239 	}
240 	ctlr_cache = NULL;
241 }
242 
243 /*
244  * getdrvnode -- return the driver name based on mdname_t->bname
245  *	Need to free pointer when finished.
246  */
247 char *
248 getdrvnode(mdname_t *np, md_error_t *ep)
249 {
250 	char	*devicespath;
251 	char	*drvnode;
252 	char	*cp;
253 
254 	if ((devicespath = metagetdevicesname(np, ep)) == NULL)
255 		return (NULL);
256 
257 	/*
258 	 * At this point devicespath should be like the following
259 	 * "/devices/<unknow_and_dont_care>/xxxx@vvvv"
260 	 *
261 	 * There's a couple of 'if' statements below which could
262 	 * return an error condition, but I've decide to allow
263 	 * a more open approach regarding the mapping so as to
264 	 * not restrict possible future projects.
265 	 */
266 	if (drvnode = strrchr(devicespath, '/'))
267 		/*
268 		 * drvnode now just "xxxx@vvvv"
269 		 */
270 		drvnode++;
271 
272 	if (cp = strrchr(drvnode, '@'))
273 		/*
274 		 * Now drvnode is just the driver name "xxxx"
275 		 */
276 		*cp = '\0';
277 
278 	cp = Strdup(drvnode);
279 	Free(devicespath);
280 	np->devicesname = NULL;
281 
282 	return (cp);
283 }
284 
285 /*
286  * meta_load_dl -- open dynamic library using LDLIBRARYPATH, a debug
287  *    environment variable METALDPATH, or the default location.
288  */
289 static void *
290 meta_load_dl(mdname_t *np, md_error_t *ep)
291 {
292 	char	*drvnode;
293 	char	newpath[MAXPATHLEN];
294 	char	*p;
295 	void	*cookie;
296 
297 	if ((drvnode = getdrvnode(np, ep)) != NULL) {
298 
299 		/*
300 		 * Library seach algorithm:
301 		 * 1) Use LDLIBRARYPATH which is implied when a non-absolute
302 		 *    path name is passed to dlopen()
303 		 * 2) Use the value of METALDPATH as the directory. Mainly
304 		 *    used for debugging
305 		 * 3) Last search the default location of "/usr/lib"
306 		 */
307 		(void) snprintf(newpath, sizeof (newpath), "lib%s.so.1",
308 		    drvnode);
309 		if ((cookie = dlopen(newpath, RTLD_LAZY)) == NULL) {
310 			if ((p = getenv("METALDPATH")) == NULL)
311 				p = METALDPATH_DEFAULT;
312 			(void) snprintf(newpath, sizeof (newpath),
313 			    "%s/lib%s.so.1", p, drvnode);
314 			Free(drvnode);
315 			if ((cookie = dlopen(newpath, RTLD_LAZY)) != NULL) {
316 				/*
317 				 * Common failure here would be failing to
318 				 * find a libXX.so.1 such as libsd.so.1
319 				 * Some controllers will not have a library
320 				 * because there's no enclosure or name
321 				 * translation required.
322 				 */
323 				return (cookie);
324 			}
325 		} else {
326 			Free(drvnode);
327 			return (cookie);
328 		}
329 	}
330 	return (NULL);
331 }
332 
333 /*
334  * meta_match_names -- possibly convert the driver names returned by CINFO
335  */
336 static void
337 meta_match_names(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
338     md_error_t *ep)
339 {
340 	void		*cookie;
341 	meta_convert_e	((*fptr)(mdname_t *, struct dk_cinfo *, mdcinfo_t *,
342 	    md_error_t *));
343 
344 	if ((cookie = meta_load_dl(np, ep)) != NULL) {
345 		fptr = (meta_convert_e (*)(mdname_t *, struct dk_cinfo *,
346 		    mdcinfo_t *, md_error_t *))dlsym(cookie, "convert_path");
347 		if (fptr != NULL)
348 			(void) (*fptr)(np, cp, mdcp, ep);
349 		(void) dlclose(cookie);
350 	}
351 }
352 
353 /*
354  * meta_match_enclosure -- return any enclosure info if found
355  */
356 int
357 meta_match_enclosure(mdname_t *np, mdcinfo_t *mdcp, md_error_t *ep)
358 {
359 	meta_enclosure_e	e;
360 	meta_enclosure_e	((*fptr)(mdname_t *, mdcinfo_t *,
361 	    md_error_t *));
362 	void			*cookie;
363 
364 	if ((cookie = meta_load_dl(np, ep)) != NULL) {
365 		fptr = (meta_enclosure_e (*)(mdname_t *, mdcinfo_t *,
366 		    md_error_t *))dlsym(cookie, "get_enclosure");
367 		if (fptr != NULL) {
368 			e = (*fptr)(np, mdcp, ep);
369 			switch (e) {
370 			case Enclosure_Error:
371 				/*
372 				 * Looks like this library wanted to handle
373 				 * our device and had an internal error.
374 				 */
375 				return (1);
376 
377 			case Enclosure_Okay:
378 				/*
379 				 * Found a library to handle the request so
380 				 * just return with data provided.
381 				 */
382 				return (0);
383 
384 			case Enclosure_Noop:
385 				/*
386 				 * Need to continue the search
387 				 */
388 				break;
389 			}
390 		}
391 		(void) dlclose(cookie);
392 	}
393 	return (0);
394 }
395 
396 static int
397 meta_cinfo_to_md(mdname_t *np, struct dk_cinfo *cp, mdcinfo_t *mdcp,
398     md_error_t *ep)
399 {
400 	/* default */
401 	(void) memset(mdcp, '\0', sizeof (*mdcp));
402 	(void) strncpy(mdcp->cname, cp->dki_cname,
403 	    min((sizeof (mdcp->cname) - 1), sizeof (cp->dki_cname)));
404 	mdcp->ctype = MHD_CTLR_GENERIC;
405 	mdcp->cnum = cp->dki_cnum;
406 	(void) strncpy(mdcp->dname, cp->dki_dname,
407 	    min((sizeof (mdcp->dname) - 1), sizeof (cp->dki_dname)));
408 	mdcp->unit = cp->dki_unit;
409 	mdcp->maxtransfer = cp->dki_maxtransfer;
410 
411 	/*
412 	 * See if the driver name returned from DKIOCINFO
413 	 * is valid or not. In somecases, such as the ap_dmd
414 	 * driver, we need to modify the name that's return
415 	 * for everything to work.
416 	 */
417 	meta_match_names(np, cp, mdcp, ep);
418 
419 	if (meta_match_enclosure(np, mdcp, ep))
420 		return (-1);
421 
422 	/* return success */
423 	return (0);
424 }
425 
426 static void
427 meta_vtoc_to_md(
428 	struct extvtoc	*vp,
429 	mdvtoc_t	*mdvp
430 )
431 {
432 	char		typename[sizeof (vp->v_asciilabel) + 1];
433 	uint_t		i;
434 
435 	(void) memset(mdvp, '\0', sizeof (*mdvp));
436 	(void) strncpy(typename, vp->v_asciilabel,
437 	    sizeof (vp->v_asciilabel));
438 	typename[sizeof (typename) - 1] = '\0';
439 	for (i = 0; ((i < sizeof (typename)) && (typename[i] != '\0')); ++i) {
440 		if ((typename[i] == ' ') || (typename[i] == '\t')) {
441 			typename[i] = '\0';
442 			break;
443 		}
444 	}
445 	mdvp->typename = Strdup(typename);
446 	mdvp->nparts = vp->v_nparts;
447 	for (i = 0; (i < vp->v_nparts); ++i) {
448 		mdvp->parts[i].start = vp->v_part[i].p_start;
449 		mdvp->parts[i].size = vp->v_part[i].p_size;
450 		mdvp->parts[i].tag = vp->v_part[i].p_tag;
451 		mdvp->parts[i].flag = vp->v_part[i].p_flag;
452 		if (vp->v_part[i].p_start == 0 && vp->v_part[i].p_size > 0)
453 			mdvp->parts[i].label = btodb(DK_LABEL_SIZE);
454 	}
455 }
456 
457 /*
458  * free allocations in vtoc
459  */
460 void
461 metafreevtoc(
462 	mdvtoc_t	*vtocp
463 )
464 {
465 	if (vtocp->typename != NULL)
466 		Free(vtocp->typename);
467 	(void) memset(vtocp, 0, sizeof (*vtocp));
468 }
469 
470 /*
471  * return md types
472  */
473 mdvtoc_t *
474 metagetvtoc(
475 	mdname_t	*np,	/* only rname, drivenamep, are setup */
476 	int		nocache,
477 	uint_t		*partnop,
478 	md_error_t	*ep
479 )
480 {
481 	mddrivename_t	*dnp = np->drivenamep;
482 	struct dk_geom	geom;
483 	char		*minor_name = NULL;
484 	char		*rname = np->rname;
485 	int		fd;
486 	int		partno;
487 	int		err = 0;	    /* saves errno from ioctl */
488 	ddi_devid_t	devid;
489 	char		*p;
490 
491 	/* short circuit */
492 	if ((! nocache) && (dnp->vtoc.nparts != 0)) {
493 		if (partnop != NULL) {
494 			/*
495 			 * the following assigment works because the
496 			 * mdname_t structs are always created as part
497 			 * of the drivenamep struct.  When a user
498 			 * creates an mdname_t struct it either
499 			 * uses an existing drivenamep struct or creates
500 			 * a new one and then adds the mdname_t struct
501 			 * as part of its parts_val array.  So what is
502 			 * being computed below is the slice offset in
503 			 * the parts_val array.
504 			 */
505 			*partnop = np - np->drivenamep->parts.parts_val;
506 			assert(*partnop < dnp->parts.parts_len);
507 		}
508 		return (&dnp->vtoc);
509 	}
510 
511 	/* can't get vtoc */
512 	if (! nocache) {
513 		switch (dnp->type) {
514 		case MDT_ACCES:
515 		case MDT_UNKNOWN:
516 			(void) mdsyserror(ep, dnp->errnum, rname);
517 			return (NULL);
518 		}
519 	}
520 
521 	/* get all the info */
522 	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
523 		(void) mdsyserror(ep, errno, rname);
524 		return (NULL);
525 	}
526 
527 	/*
528 	 * The disk is open so this is a good point to get the devid
529 	 * otherwise it will need to be done at another time which
530 	 * means reopening it.
531 	 */
532 	if (devid_get(fd, &devid) != 0) {
533 		/* there is no devid for the disk */
534 		if (((p = getenv("MD_DEBUG")) != NULL) &&
535 		    (strstr(p, "DEVID") != NULL)) {
536 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
537 			    "%s has no device id\n"), np->rname);
538 		}
539 		np->minor_name = (char *)NULL;
540 		dnp->devid = NULL;
541 	} else {
542 		(void) devid_get_minor_name(fd, &minor_name);
543 		/*
544 		 * The minor name could be NULL if the underlying
545 		 * device driver does not support 'minor names'.
546 		 * This means we do not use devid's for this device.
547 		 * SunCluster did driver does not support minor names.
548 		 */
549 		if (minor_name != NULL) {
550 			np->minor_name = Strdup(minor_name);
551 			devid_str_free(minor_name);
552 			dnp->devid = devid_str_encode(devid, NULL);
553 		} else {
554 			np->minor_name = (char *)NULL;
555 			dnp->devid = NULL;
556 
557 			if (((p = getenv("MD_DEBUG")) != NULL) &&
558 			    (strstr(p, "DEVID") != NULL)) {
559 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
560 				    "%s no minor name (no devid)\n"),
561 				    np->rname);
562 			}
563 		}
564 		devid_free(devid);
565 	}
566 
567 	/*
568 	 * if our drivenamep points to a device not supporting DKIOCGGEOM,
569 	 * it's likely to have an EFI label.
570 	 */
571 	(void) memset(&geom, 0, sizeof (geom));
572 	if (ioctl(fd, DKIOCGGEOM, &geom) != 0) {
573 		err = errno;
574 		if (err == ENOTTY) {
575 			(void) mddeverror(ep, MDE_NOT_DISK, NODEV, rname);
576 			(void) close(fd);
577 			return (NULL);
578 		} else if (err != ENOTSUP) {
579 			(void) mdsyserror(ep, err, rname);
580 			(void) close(fd);
581 			return (NULL);
582 		}
583 
584 	}
585 	/*
586 	 * If we are here, there was either no failure on DKIOCGGEOM or
587 	 * the failure was ENOTSUP
588 	 */
589 	if (err == ENOTSUP) {
590 		/* DKIOCGGEOM yielded ENOTSUP => try efi_alloc_and_read */
591 		struct dk_gpt	*gpt;
592 		int		save_errno;
593 
594 		/* this also sets errno */
595 		partno = efi_alloc_and_read(fd, &gpt);
596 		save_errno = errno;
597 		(void) close(fd);
598 		if (partno < 0) {
599 			efi_free(gpt);
600 			(void) mdsyserror(ep, save_errno, rname);
601 			return (NULL);
602 		}
603 		if (partno >= gpt->efi_nparts) {
604 			efi_free(gpt);
605 			(void) mddeverror(ep, MDE_INVALID_PART, NODEV64,
606 			    rname);
607 			return (NULL);
608 		}
609 
610 		/* convert to our format */
611 		metafreevtoc(&dnp->vtoc);
612 		meta_efi_to_mdvtoc(gpt, &dnp->vtoc);
613 		if (dnp->vtoc.nparts > MD_MAX_PARTS) {
614 			(void) mddeverror(ep, MDE_TOO_MANY_PARTS, NODEV64,
615 			    rname);
616 			return (NULL);
617 		}
618 		/*
619 		 * libmeta needs at least V_NUMPAR partitions.
620 		 * If we have an EFI partition with less than V_NUMPAR slices,
621 		 * we nevertheless reserve space for V_NUMPAR
622 		 */
623 
624 		if (dnp->vtoc.nparts < V_NUMPAR) {
625 			dnp->vtoc.nparts = V_NUMPAR;
626 		}
627 		meta_efi_to_mdgeom(gpt, &dnp->geom);
628 		efi_free(gpt);
629 	} else {
630 		/* no error on DKIOCGGEOM, try meta_getvtoc */
631 		struct extvtoc	vtoc;
632 
633 		if (meta_getvtoc(fd, np->cname, &vtoc, &partno, ep) < 0) {
634 			(void) close(fd);
635 			return (NULL);
636 		}
637 		(void) close(fd);
638 
639 		/* convert to our format */
640 		meta_geom_to_md(&geom, &dnp->geom);
641 		metafreevtoc(&dnp->vtoc);
642 		meta_vtoc_to_md(&vtoc, &dnp->vtoc);
643 	}
644 
645 	/* fix up any drives which are now accessible */
646 	if ((nocache) && (dnp->type == MDT_ACCES) &&
647 	    (dnp->vtoc.nparts == dnp->parts.parts_len)) {
648 		dnp->type = MDT_COMP;
649 		dnp->errnum = 0;
650 	}
651 
652 	/* save partno */
653 	assert(partno < dnp->vtoc.nparts);
654 	if (partnop != NULL)
655 		*partnop = partno;
656 
657 	/* return info */
658 	return (&dnp->vtoc);
659 }
660 
661 static void
662 meta_mdvtoc_to_vtoc(
663 	mdvtoc_t	*mdvp,
664 	struct extvtoc	*vp
665 )
666 {
667 	uint_t		i;
668 
669 	(void) memset(&vp->v_part, '\0', sizeof (vp->v_part));
670 	vp->v_nparts = (ushort_t)mdvp->nparts;
671 	for (i = 0; (i < mdvp->nparts); ++i) {
672 		vp->v_part[i].p_start = mdvp->parts[i].start;
673 		vp->v_part[i].p_size  = mdvp->parts[i].size;
674 		vp->v_part[i].p_tag   = mdvp->parts[i].tag;
675 		vp->v_part[i].p_flag  = mdvp->parts[i].flag;
676 	}
677 }
678 
679 /*
680  * Set the vtoc, but use the cached copy to get the info from.
681  * We write np->drivenamep->vtoc to disk.
682  * Before we can do this we read the vtoc in.
683  * if we're dealing with a metadevice and this metadevice is a 64 bit device
684  *	we can use meta_getmdvtoc/meta_setmdvtoc
685  * else
686  * 	we use meta_getvtoc/meta_setvtoc but than we first have to convert
687  *	dnp->vtoc (actually being a mdvtoc_t) into a vtoc_t
688  */
689 int
690 metasetvtoc(
691 	mdname_t	*np,
692 	md_error_t	*ep
693 )
694 {
695 	char		*rname = np->rname;
696 	mddrivename_t	*dnp = np->drivenamep;
697 	int		fd;
698 	int		err;
699 	int 		save_errno;
700 	struct dk_geom	geom;
701 
702 	if ((fd = open(rname, (O_RDONLY | O_NDELAY), 0)) < 0)
703 		return (mdsyserror(ep, errno, rname));
704 
705 	err = ioctl(fd, DKIOCGGEOM, &geom);
706 	save_errno = errno;
707 	if (err == 0) {
708 		struct extvtoc	vtoc;
709 
710 		if (meta_getvtoc(fd, np->cname, &vtoc, NULL, ep) < 0) {
711 			(void) close(fd);
712 			return (-1);
713 		}
714 
715 		meta_mdvtoc_to_vtoc(&dnp->vtoc, &vtoc);
716 
717 		if (meta_setvtoc(fd, np->cname, &vtoc, ep) < 0) {
718 			(void) close(fd);
719 			return (-1);
720 		}
721 	} else if (save_errno == ENOTSUP) {
722 		struct dk_gpt	*gpt;
723 		int		ret;
724 
725 		/* allocation of gpt is done in meta_mdvtoc_to_efi */
726 		meta_mdvtoc_to_efi(&dnp->vtoc, &gpt);
727 
728 		ret = efi_write(fd, gpt);
729 		save_errno = errno;
730 		free(gpt);
731 		if (ret != 0) {
732 			(void) close(fd);
733 			return (mdsyserror(ep, save_errno, rname));
734 		} else {
735 			(void) close(fd);
736 			return (0);
737 		}
738 
739 	} else {
740 		(void) close(fd);
741 		return (mdsyserror(ep, save_errno, rname));
742 	}
743 
744 	(void) close(fd);
745 
746 	return (0);
747 }
748 
749 mdgeom_t *
750 metagetgeom(
751 	mdname_t	*np,	/* only rname, drivenamep, are setup */
752 	md_error_t	*ep
753 )
754 {
755 	if (metagetvtoc(np, FALSE, NULL, ep) == NULL)
756 		return (NULL);
757 	return (&np->drivenamep->geom);
758 }
759 
760 mdcinfo_t *
761 metagetcinfo(
762 	mdname_t	*np,	/* only rname, drivenamep, are setup */
763 	md_error_t	*ep
764 )
765 {
766 	char			*rname = np->rname;
767 	mddrivename_t		*dnp = np->drivenamep;
768 	int			fd;
769 	struct dk_cinfo		cinfo;
770 
771 	/* short circuit */
772 	if (dnp->cinfo.cname[0] != '\0')
773 		return (&dnp->cinfo);
774 
775 	/* get controller info */
776 	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
777 		(void) mdsyserror(ep, errno, rname);
778 		return (NULL);
779 	}
780 	if (ioctl(fd, DKIOCINFO, &cinfo) != 0) {
781 		int	save = errno;
782 
783 		(void) close(fd);
784 		if (save == ENOTTY) {
785 			(void) mddeverror(ep, MDE_NOT_DISK, NODEV64, rname);
786 		} else {
787 			(void) mdsyserror(ep, save, rname);
788 		}
789 		return (NULL);
790 	}
791 	(void) close(fd);	/* sd/ssd bug */
792 
793 	/* convert to our format */
794 	if (meta_cinfo_to_md(np, &cinfo, &dnp->cinfo, ep) != 0)
795 		return (NULL);
796 
797 	/* return info */
798 	return (&dnp->cinfo);
799 }
800 
801 /*
802  * get partition number
803  */
804 int
805 metagetpartno(
806 	mdname_t	*np,
807 	md_error_t	*ep
808 )
809 {
810 	mdvtoc_t	*vtocp;
811 	uint_t		partno;
812 
813 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
814 		return (-1);
815 	assert(partno < vtocp->nparts);
816 	return (partno);
817 }
818 
819 /*
820  * get size of device
821  */
822 diskaddr_t
823 metagetsize(
824 	mdname_t	*np,
825 	md_error_t	*ep
826 )
827 {
828 	mdvtoc_t	*vtocp;
829 	uint_t		partno;
830 
831 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
832 		return (MD_DISKADDR_ERROR);
833 	assert(partno < vtocp->nparts);
834 	return (vtocp->parts[partno].size);
835 }
836 
837 /*
838  * get label of device
839  */
840 diskaddr_t
841 metagetlabel(
842 	mdname_t	*np,
843 	md_error_t	*ep
844 )
845 {
846 	mdvtoc_t	*vtocp;
847 	uint_t		partno;
848 
849 	if ((vtocp = metagetvtoc(np, FALSE, &partno, ep)) == NULL)
850 		return (MD_DISKADDR_ERROR);
851 	assert(partno < vtocp->nparts);
852 	return (vtocp->parts[partno].label);
853 }
854 
855 /*
856  * find out where database replicas end
857  */
858 static int
859 mddb_getendblk(
860 	mdsetname_t		*sp,
861 	mdname_t		*np,
862 	diskaddr_t		*endblkp,
863 	md_error_t		*ep
864 )
865 {
866 	md_replicalist_t	*rlp = NULL;
867 	md_replicalist_t	*rl;
868 
869 	/* make sure we have a component */
870 	*endblkp = 0;
871 	if (metaismeta(np))
872 		return (0);
873 
874 	/* get replicas, quit if none */
875 	if (metareplicalist(sp, MD_BASICNAME_OK | PRINT_FAST, &rlp, ep) < 0) {
876 		if (! mdismddberror(ep, MDE_DB_NODB))
877 			return (-1);
878 		mdclrerror(ep);
879 		return (0);
880 	} else if (rlp == NULL)
881 		return (0);
882 
883 	/* go through all the replicas */
884 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
885 		md_replica_t	*rp = rl->rl_repp;
886 		mdname_t	*repnamep = rp->r_namep;
887 		diskaddr_t	dbend;
888 
889 		if (np->dev != repnamep->dev)
890 			continue;
891 		dbend = rp->r_blkno + rp->r_nblk - 1;
892 		if (dbend > *endblkp)
893 			*endblkp = dbend;
894 	}
895 
896 	/* cleanup, return success */
897 	metafreereplicalist(rlp);
898 	return (0);
899 }
900 
901 /*
902  * return cached start block
903  */
904 static diskaddr_t
905 metagetend(
906 	mdsetname_t	*sp,
907 	mdname_t	*np,
908 	md_error_t	*ep
909 )
910 {
911 	diskaddr_t	end_blk = MD_DISKADDR_ERROR;
912 
913 	/* short circuit */
914 	if (np->end_blk != MD_DISKADDR_ERROR)
915 		return (np->end_blk);
916 
917 	/* look for database locations */
918 	if (mddb_getendblk(sp, np, &end_blk, ep) != 0)
919 		return (MD_DISKADDR_ERROR);
920 
921 	/* success */
922 	np->end_blk = end_blk;
923 	return (end_blk);
924 }
925 
926 /*
927  * does device have a metadb
928  */
929 int
930 metahasmddb(
931 	mdsetname_t	*sp,
932 	mdname_t	*np,
933 	md_error_t	*ep
934 )
935 {
936 	if (metagetend(sp, np, ep) == MD_DISKADDR_ERROR)
937 		return (-1);
938 	else if (np->end_blk > 0)
939 		return (1);
940 	else
941 		return (0);
942 }
943 
944 /*
945  * return cached start block
946  */
947 diskaddr_t
948 metagetstart(
949 	mdsetname_t	*sp,
950 	mdname_t	*np,
951 	md_error_t	*ep
952 )
953 {
954 	diskaddr_t	start_blk = MD_DISKADDR_ERROR;
955 
956 	/* short circuit */
957 	if (np->start_blk != MD_DISKADDR_ERROR)
958 		return (np->start_blk);
959 
960 	/* look for database locations */
961 	if ((start_blk = metagetend(sp, np, ep)) == MD_DISKADDR_ERROR)
962 		return (MD_DISKADDR_ERROR);
963 
964 	/* check for label */
965 	if (start_blk == 0) {
966 		start_blk = metagetlabel(np, ep);
967 		if (start_blk == MD_DISKADDR_ERROR) {
968 			return (MD_DISKADDR_ERROR);
969 		}
970 	}
971 
972 	/* roundup to next cylinder */
973 	if (start_blk != 0) {
974 		mdgeom_t	*geomp;
975 
976 		if ((geomp = metagetgeom(np, ep)) == NULL)
977 			return (MD_DISKADDR_ERROR);
978 		start_blk = roundup(start_blk, (geomp->nhead * geomp->nsect));
979 	}
980 
981 	/* success */
982 	np->start_blk = start_blk;
983 	return (start_blk);
984 }
985 
986 /*
987  * return cached devices name
988  */
989 char *
990 metagetdevicesname(
991 	mdname_t	*np,
992 	md_error_t	*ep
993 )
994 {
995 	char		path[MAXPATHLEN + 1];
996 	int		len;
997 
998 	/* short circuit */
999 	if (np->devicesname != NULL)
1000 		return (np->devicesname);
1001 
1002 	/* follow symlink */
1003 	if ((len = readlink(np->bname, path, (sizeof (path) - 1))) < 0) {
1004 		(void) mdsyserror(ep, errno, np->bname);
1005 		return (NULL);
1006 	} else if (len >= sizeof (path)) {
1007 		(void) mdsyserror(ep, ENAMETOOLONG, np->bname);
1008 		return (NULL);
1009 	}
1010 	path[len] = '\0';
1011 	if ((len = strfind(path, "/devices/")) < 0) {
1012 		(void) mddeverror(ep, MDE_DEVICES_NAME, np->dev, np->bname);
1013 		return (NULL);
1014 	}
1015 
1016 	/* return name */
1017 	np->devicesname = Strdup(path + len + strlen("/devices"));
1018 	return (np->devicesname);
1019 }
1020 
1021 /*
1022  * get metadevice misc name
1023  */
1024 char *
1025 metagetmiscname(
1026 	mdname_t		*np,
1027 	md_error_t		*ep
1028 )
1029 {
1030 	mddrivename_t		*dnp = np->drivenamep;
1031 	md_i_driverinfo_t	mid;
1032 
1033 	/* short circuit */
1034 	if (dnp->miscname != NULL)
1035 		return (dnp->miscname);
1036 	if (metachkmeta(np, ep) != 0)
1037 		return (NULL);
1038 
1039 	/* get misc module from driver */
1040 	(void) memset(&mid, 0, sizeof (mid));
1041 	mid.mnum = meta_getminor(np->dev);
1042 	if (metaioctl(MD_IOCGET_DRVNM, &mid, &mid.mde, np->cname) != 0) {
1043 		(void) mdstealerror(ep, &mid.mde);
1044 		return (NULL);
1045 	}
1046 
1047 	/* return miscname */
1048 	dnp->miscname = Strdup(MD_PNTDRIVERNAME(&mid));
1049 	return (dnp->miscname);
1050 }
1051 
1052 /*
1053  * get unit structure from driver
1054  */
1055 md_unit_t *
1056 meta_get_mdunit(
1057 	mdsetname_t	*sp,
1058 	mdname_t	*np,
1059 	md_error_t	*ep
1060 )
1061 {
1062 	md_i_get_t	mig;
1063 	char		*miscname = NULL;
1064 
1065 	/* should have a set */
1066 	assert(sp != NULL);
1067 	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
1068 
1069 	/* get size of unit structure */
1070 	if (metachkmeta(np, ep) != 0)
1071 		return (NULL);
1072 	if ((miscname = metagetmiscname(np, ep)) == NULL)
1073 		return (NULL);
1074 	(void) memset(&mig, '\0', sizeof (mig));
1075 	MD_SETDRIVERNAME(&mig, miscname, sp->setno);
1076 	mig.id = meta_getminor(np->dev);
1077 	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
1078 		(void) mdstealerror(ep, &mig.mde);
1079 		return (NULL);
1080 	}
1081 
1082 	/* get actual unit structure */
1083 	assert(mig.size > 0);
1084 	mig.mdp = (uintptr_t)Zalloc(mig.size);
1085 	if (metaioctl(MD_IOCGET, &mig, &mig.mde, np->cname) != 0) {
1086 		(void) mdstealerror(ep, &mig.mde);
1087 		Free((void *)(uintptr_t)mig.mdp);
1088 		return (NULL);
1089 	}
1090 
1091 	return ((md_unit_t *)(uintptr_t)mig.mdp);
1092 }
1093 
1094 /*
1095  * free metadevice unit
1096  */
1097 void
1098 meta_free_unit(
1099 	mddrivename_t	*dnp
1100 )
1101 {
1102 	if (dnp->unitp != NULL) {
1103 		switch (dnp->unitp->type) {
1104 		case MD_DEVICE:
1105 			meta_free_stripe((md_stripe_t *)dnp->unitp);
1106 			break;
1107 		case MD_METAMIRROR:
1108 			meta_free_mirror((md_mirror_t *)dnp->unitp);
1109 			break;
1110 		case MD_METATRANS:
1111 			meta_free_trans((md_trans_t *)dnp->unitp);
1112 			break;
1113 		case MD_METARAID:
1114 			meta_free_raid((md_raid_t *)dnp->unitp);
1115 			break;
1116 		case MD_METASP:
1117 			meta_free_sp((md_sp_t *)dnp->unitp);
1118 			break;
1119 		default:
1120 			assert(0);
1121 			break;
1122 		}
1123 		dnp->unitp = NULL;
1124 	}
1125 }
1126 
1127 /*
1128  * free metadevice name info
1129  */
1130 void
1131 meta_invalidate_name(
1132 	mdname_t	*namep
1133 )
1134 {
1135 	mddrivename_t	*dnp = namep->drivenamep;
1136 
1137 	/* get rid of cached name info */
1138 	if (namep->devicesname != NULL) {
1139 		Free(namep->devicesname);
1140 		namep->devicesname = NULL;
1141 	}
1142 	namep->key = MD_KEYBAD;
1143 	namep->start_blk = -1;
1144 	namep->end_blk = -1;
1145 
1146 	/* get rid of cached drivename info */
1147 	(void) memset(&dnp->geom, 0, sizeof (dnp->geom));
1148 	(void) memset(&dnp->cinfo, 0, sizeof (dnp->cinfo));
1149 	metafreevtoc(&dnp->vtoc);
1150 	metaflushsidenames(dnp);
1151 	dnp->side_names_key = MD_KEYBAD;
1152 	if (dnp->miscname != NULL) {
1153 		Free(dnp->miscname);
1154 		dnp->miscname = NULL;
1155 	}
1156 	meta_free_unit(dnp);
1157 }
1158 
1159 /*
1160  * get metadevice unit
1161  */
1162 md_common_t *
1163 meta_get_unit(
1164 	mdsetname_t	*sp,
1165 	mdname_t	*np,
1166 	md_error_t	*ep
1167 )
1168 {
1169 	char		*miscname;
1170 
1171 	/* short circuit */
1172 	if (np->drivenamep->unitp != NULL)
1173 		return (np->drivenamep->unitp);
1174 	if (metachkmeta(np, ep) != 0)
1175 		return (NULL);
1176 
1177 	/* dispatch */
1178 	if ((miscname = metagetmiscname(np, ep)) == NULL)
1179 		return (NULL);
1180 	else if (strcmp(miscname, MD_STRIPE) == 0)
1181 		return ((md_common_t *)meta_get_stripe(sp, np, ep));
1182 	else if (strcmp(miscname, MD_MIRROR) == 0)
1183 		return ((md_common_t *)meta_get_mirror(sp, np, ep));
1184 	else if (strcmp(miscname, MD_TRANS) == 0)
1185 		return ((md_common_t *)meta_get_trans(sp, np, ep));
1186 	else if (strcmp(miscname, MD_RAID) == 0)
1187 		return ((md_common_t *)meta_get_raid(sp, np, ep));
1188 	else if (strcmp(miscname, MD_SP) == 0)
1189 		return ((md_common_t *)meta_get_sp(sp, np, ep));
1190 	else {
1191 		(void) mdmderror(ep, MDE_UNKNOWN_TYPE, meta_getminor(np->dev),
1192 		    np->cname);
1193 		return (NULL);
1194 	}
1195 }
1196 
1197 
1198 int
1199 meta_isopen(
1200 	mdsetname_t	*sp,
1201 	mdname_t	*np,
1202 	md_error_t	*ep,
1203 	mdcmdopts_t	options
1204 )
1205 {
1206 	md_isopen_t	d;
1207 
1208 	if (metachkmeta(np, ep) != 0)
1209 		return (-1);
1210 
1211 	(void) memset(&d, '\0', sizeof (d));
1212 	d.dev = np->dev;
1213 	if (metaioctl(MD_IOCISOPEN, &d, &d.mde, np->cname) != 0)
1214 		return (mdstealerror(ep, &d.mde));
1215 
1216 	/*
1217 	 * shortcut: if the device is open, no need to check on other nodes,
1218 	 * even in case of a mn metadevice
1219 	 * Also return in case we're told not to check on other nodes.
1220 	 */
1221 	if ((d.isopen != 0) || ((options & MDCMD_MN_OPEN_CHECK) == 0)) {
1222 		return (d.isopen);
1223 	}
1224 
1225 	/*
1226 	 * If the device is closed locally, but it's a mn device,
1227 	 * check on all other nodes, too
1228 	 */
1229 	if (sp->setno != MD_LOCAL_SET) {
1230 		(void) metaget_setdesc(sp, ep); /* not supposed to fail */
1231 		if (sp->setdesc->sd_flags & MD_SR_MN) {
1232 			int		err = 0;
1233 			md_mn_result_t *resp;
1234 			/*
1235 			 * This message is never directly issued.
1236 			 * So we launch it with a suspend override flag.
1237 			 * If the commd is suspended, and this message comes
1238 			 * along it must be sent due to replaying a metainit or
1239 			 * similar. In that case we don't want this message to
1240 			 * be blocked.
1241 			 * If the commd is not suspended, the flag does no harm.
1242 			 * Additionally we don't want the result of the message
1243 			 * cached in the MCT, because we want uptodate results,
1244 			 * and the message doesn't need being logged either.
1245 			 * Hence NO_LOG and NO_MCT
1246 			 */
1247 			err = mdmn_send_message(sp->setno,
1248 			    MD_MN_MSG_CLU_CHECK, MD_MSGF_NO_MCT |
1249 			    MD_MSGF_STOP_ON_ERROR | MD_MSGF_NO_LOG |
1250 			    MD_MSGF_OVERRIDE_SUSPEND, 0, (char *)&d,
1251 			    sizeof (md_isopen_t), &resp, ep);
1252 			if (err == 0) {
1253 				d.isopen = resp->mmr_exitval;
1254 			} else {
1255 				/*
1256 				 * in case some error occurred,
1257 				 * we better say the device is open
1258 				 */
1259 				d.isopen = 1;
1260 			}
1261 			if (resp != (md_mn_result_t *)NULL) {
1262 				free_result(resp);
1263 			}
1264 
1265 		}
1266 	}
1267 
1268 	return (d.isopen);
1269 }
1270