xref: /illumos-gate/usr/src/boot/common/disk.c (revision fec047081731fd77caf46ec0471c501b2cb33894)
1 /*
2  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include <sys/disk.h>
30 #include <sys/queue.h>
31 #include <stand.h>
32 #include <stdarg.h>
33 #include <inttypes.h>
34 #include <bootstrap.h>
35 #include <part.h>
36 
37 #include "disk.h"
38 
39 #ifdef DISK_DEBUG
40 #define	DPRINTF(fmt, args...)	printf("%s: " fmt "\n", __func__, ## args)
41 #else
42 #define	DPRINTF(fmt, args...)	((void)0)
43 #endif
44 
45 struct open_disk {
46 	struct ptable		*table;
47 	uint64_t		mediasize;
48 	uint64_t		entrysize;
49 	uint_t			sectorsize;
50 };
51 
52 struct print_args {
53 	struct disk_devdesc	*dev;
54 	const char		*prefix;
55 	int			verbose;
56 };
57 
58 /* Convert size to a human-readable number. */
59 static char *
60 display_size(uint64_t size, uint_t sectorsize)
61 {
62 	static char buf[80];
63 	char unit;
64 
65 	size = size * sectorsize / 1024;
66 	unit = 'K';
67 	if (size >= 10485760000LL) {
68 		size /= 1073741824;
69 		unit = 'T';
70 	} else if (size >= 10240000) {
71 		size /= 1048576;
72 		unit = 'G';
73 	} else if (size >= 10000) {
74 		size /= 1024;
75 		unit = 'M';
76 	}
77 	snprintf(buf, sizeof (buf), "%4" PRIu64 "%cB", size, unit);
78 	return (buf);
79 }
80 
81 static int
82 ptblread(void *d, void *buf, size_t blocks, uint64_t offset)
83 {
84 	struct disk_devdesc *dev;
85 	struct open_disk *od;
86 
87 	dev = (struct disk_devdesc *)d;
88 	od = (struct open_disk *)dev->dd.d_opendata;
89 
90 	/*
91 	 * The strategy function assumes the offset is in units of 512 byte
92 	 * sectors. For larger sector sizes, we need to adjust the offset to
93 	 * match the actual sector size.
94 	 */
95 	offset *= (od->sectorsize / 512);
96 	/*
97 	 * As the GPT backup partition is located at the end of the disk,
98 	 * to avoid reading past disk end, flag bcache not to use RA.
99 	 */
100 	return (dev->dd.d_dev->dv_strategy(dev, F_READ | F_NORA, offset,
101 	    blocks * od->sectorsize, (char *)buf, NULL));
102 }
103 
104 static int
105 ptable_print(void *arg, const char *pname, const struct ptable_entry *part)
106 {
107 	struct disk_devdesc dev;
108 	struct print_args *pa, bsd;
109 	struct open_disk *od;
110 	struct ptable *table;
111 	char line[80];
112 	int res;
113 	uint_t sectsize;
114 	uint64_t partsize;
115 
116 	pa = (struct print_args *)arg;
117 	od = (struct open_disk *)pa->dev->dd.d_opendata;
118 	sectsize = od->sectorsize;
119 	partsize = part->end - part->start + 1;
120 	snprintf(line, sizeof (line), "  %s%s: %s", pa->prefix, pname,
121 	    parttype2str(part->type));
122 	if (pager_output(line))
123 		return (1);
124 
125 	if (pa->verbose) {
126 		/* Emit extra tab when the line is shorter than 3 tab stops */
127 		if (strlen(line) < 24)
128 			(void) pager_output("\t");
129 
130 		snprintf(line, sizeof (line), "\t%s",
131 		    display_size(partsize, sectsize));
132 		if (pager_output(line))
133 			return (1);
134 	}
135 	if (pager_output("\n"))
136 		return (1);
137 	res = 0;
138 	if (part->type == PART_FREEBSD || part->type == PART_SOLARIS2) {
139 		/* Open slice with BSD or VTOC label */
140 		dev.dd.d_dev = pa->dev->dd.d_dev;
141 		dev.dd.d_unit = pa->dev->dd.d_unit;
142 		dev.d_slice = part->index;
143 		dev.d_partition = D_PARTNONE;
144 		if (disk_open(&dev, partsize, sectsize) == 0) {
145 			table = ptable_open(&dev, partsize, sectsize, ptblread);
146 			if (table != NULL) {
147 				snprintf(line, sizeof (line), "  %s%s",
148 				    pa->prefix, pname);
149 				bsd.dev = &dev;
150 				bsd.prefix = line;
151 				bsd.verbose = pa->verbose;
152 				res = ptable_iterate(table, &bsd, ptable_print);
153 				ptable_close(table);
154 			}
155 			disk_close(&dev);
156 		}
157 	}
158 
159 	return (res);
160 }
161 
162 int
163 disk_print(struct disk_devdesc *dev, char *prefix, int verbose)
164 {
165 	struct open_disk *od;
166 	struct print_args pa;
167 
168 	/* Disk should be opened */
169 	od = (struct open_disk *)dev->dd.d_opendata;
170 	pa.dev = dev;
171 	pa.prefix = prefix;
172 	pa.verbose = verbose;
173 	return (ptable_iterate(od->table, &pa, ptable_print));
174 }
175 
176 int
177 disk_read(struct disk_devdesc *dev, void *buf, uint64_t offset, uint_t blocks)
178 {
179 	struct open_disk *od;
180 	int ret;
181 
182 	od = (struct open_disk *)dev->dd.d_opendata;
183 	ret = dev->dd.d_dev->dv_strategy(dev, F_READ, dev->d_offset + offset,
184 	    blocks * od->sectorsize, buf, NULL);
185 
186 	return (ret);
187 }
188 
189 int
190 disk_write(struct disk_devdesc *dev, void *buf, uint64_t offset, uint_t blocks)
191 {
192 	struct open_disk *od;
193 	int ret;
194 
195 	od = (struct open_disk *)dev->dd.d_opendata;
196 	ret = dev->dd.d_dev->dv_strategy(dev, F_WRITE, dev->d_offset + offset,
197 	    blocks * od->sectorsize, buf, NULL);
198 
199 	return (ret);
200 }
201 
202 int
203 disk_ioctl(struct disk_devdesc *dev, unsigned long cmd, void *data)
204 {
205 	struct open_disk *od = dev->dd.d_opendata;
206 
207 	if (od == NULL)
208 		return (ENOTTY);
209 
210 	switch (cmd) {
211 	case DIOCGSECTORSIZE:
212 		*(uint_t *)data = od->sectorsize;
213 		break;
214 	case DIOCGMEDIASIZE:
215 		if (dev->d_offset == 0)
216 			*(uint64_t *)data = od->mediasize;
217 		else
218 			*(uint64_t *)data = od->entrysize * od->sectorsize;
219 		break;
220 	default:
221 		return (ENOTTY);
222 	}
223 
224 	return (0);
225 }
226 
227 int
228 disk_open(struct disk_devdesc *dev, uint64_t mediasize, uint_t sectorsize)
229 {
230 	struct disk_devdesc partdev;
231 	struct open_disk *od;
232 	struct ptable *table;
233 	struct ptable_entry part;
234 	int rc, slice, partition;
235 
236 	if (sectorsize == 0) {
237 		DPRINTF("unknown sector size");
238 		return (ENXIO);
239 	}
240 	rc = 0;
241 	od = (struct open_disk *)malloc(sizeof (struct open_disk));
242 	if (od == NULL) {
243 		DPRINTF("no memory");
244 		return (ENOMEM);
245 	}
246 	dev->dd.d_opendata = od;
247 	od->entrysize = 0;
248 	od->mediasize = mediasize;
249 	od->sectorsize = sectorsize;
250 	/*
251 	 * While we are reading disk metadata, make sure we do it relative
252 	 * to the start of the disk
253 	 */
254 	memcpy(&partdev, dev, sizeof(partdev));
255 	partdev.d_offset = 0;
256 	partdev.d_slice = D_SLICENONE;
257 	partdev.d_partition = D_PARTNONE;
258 
259 	dev->d_offset = 0;
260 	table = NULL;
261 	slice = dev->d_slice;
262 	partition = dev->d_partition;
263 
264 	DPRINTF("%s unit %d, slice %d, partition %d => %p", disk_fmtdev(dev),
265 	    dev->dd.d_unit, dev->d_slice, dev->d_partition, od);
266 
267 	/* Determine disk layout. */
268 	od->table = ptable_open(&partdev, mediasize / sectorsize, sectorsize,
269 	    ptblread);
270 	if (od->table == NULL) {
271 		DPRINTF("Can't read partition table");
272 		rc = ENXIO;
273 		goto out;
274 	}
275 
276 	if (ptable_getsize(od->table, &mediasize) != 0) {
277 		rc = ENXIO;
278 		goto out;
279 	}
280 	od->mediasize = mediasize;
281 
282 	if ((ptable_gettype(od->table) == PTABLE_BSD ||
283 	    ptable_gettype(od->table) == PTABLE_VTOC) &&
284 	    partition >= 0) {
285 		/* It doesn't matter what value has d_slice */
286 		rc = ptable_getpart(od->table, &part, partition);
287 		if (rc == 0) {
288 			dev->d_offset = part.start;
289 			od->entrysize = part.end - part.start + 1;
290 		}
291 	} else if (ptable_gettype(od->table) == PTABLE_ISO9660) {
292 		dev->d_offset = 0;
293 		od->entrysize = mediasize;
294 	} else if (slice >= 0) {
295 		/* Try to get information about partition */
296 		if (slice == 0)
297 			rc = ptable_getbestpart(od->table, &part);
298 		else
299 			rc = ptable_getpart(od->table, &part, slice);
300 		if (rc != 0) /* Partition doesn't exist */
301 			goto out;
302 		dev->d_offset = part.start;
303 		od->entrysize = part.end - part.start + 1;
304 		slice = part.index;
305 		if (ptable_gettype(od->table) == PTABLE_GPT) {
306 			partition = D_PARTISGPT;
307 			goto out; /* Nothing more to do */
308 		} else if (partition == D_PARTISGPT) {
309 			/*
310 			 * When we try to open GPT partition, but partition
311 			 * table isn't GPT, reset partition value to
312 			 * D_PARTWILD and try to autodetect appropriate value.
313 			 */
314 			partition = D_PARTWILD;
315 		}
316 
317 		/*
318 		 * If partition is D_PARTNONE, then disk_open() was called
319 		 * to open raw MBR slice.
320 		 */
321 		if (partition == D_PARTNONE)
322 			goto out;
323 
324 		/*
325 		 * If partition is D_PARTWILD and we are looking at a
326 		 * BSD/VTOC slice, then try to read label, otherwise return
327 		 * the whole MBR slice.
328 		 */
329 		if (partition == D_PARTWILD) {
330 			switch (part.type) {
331 			case PART_FREEBSD:
332 			case PART_SOLARIS2:
333 				break;
334 			default:
335 				goto out;
336 			}
337 		}
338 		/* Try to read label */
339 		table = ptable_open(dev, part.end - part.start + 1,
340 		    od->sectorsize, ptblread);
341 		if (table == NULL) {
342 			DPRINTF("Can't read BSD/VTOC label");
343 			rc = ENXIO;
344 			goto out;
345 		}
346 		/*
347 		 * If slice contains BSD/VTOC label and partition < 0, then
348 		 * assume the 'a' partition. Otherwise just return the
349 		 * whole MBR slice, because it can contain ZFS.
350 		 */
351 		if (partition < 0) {
352 			if (ptable_gettype(table) != PTABLE_BSD &&
353 			    ptable_gettype(table) != PTABLE_VTOC)
354 				goto out;
355 			partition = 0;
356 		}
357 		rc = ptable_getpart(table, &part, partition);
358 		if (rc != 0)
359 			goto out;
360 		dev->d_offset += part.start;
361 		od->entrysize = part.end - part.start + 1;
362 	}
363 out:
364 	if (table != NULL)
365 		ptable_close(table);
366 
367 	if (rc != 0) {
368 		if (od->table != NULL)
369 			ptable_close(od->table);
370 		free(od);
371 		DPRINTF("%s could not open", disk_fmtdev(dev));
372 	} else {
373 		/* Save the slice and partition number to the dev */
374 		dev->d_slice = slice;
375 		dev->d_partition = partition;
376 		DPRINTF("%s offset %" PRIu64 " => %p", disk_fmtdev(dev),
377 		    dev->d_offset, od);
378 	}
379 	return (rc);
380 }
381 
382 int
383 disk_close(struct disk_devdesc *dev)
384 {
385 	struct open_disk *od;
386 
387 	od = (struct open_disk *)dev->dd.d_opendata;
388 	DPRINTF("%s closed => %p", disk_fmtdev(dev), od);
389 	ptable_close(od->table);
390 	free(od);
391 	return (0);
392 }
393 
394 char *
395 disk_fmtdev(struct disk_devdesc *dev)
396 {
397 	static char buf[128];
398 	char *cp;
399 
400 	cp = buf + sprintf(buf, "%s%d", dev->dd.d_dev->dv_name, dev->dd.d_unit);
401 	if (dev->d_slice > D_SLICENONE) {
402 #ifdef LOADER_GPT_SUPPORT
403 		if (dev->d_partition == D_PARTISGPT) {
404 			sprintf(cp, "p%d:", dev->d_slice);
405 			return (buf);
406 		} else
407 #endif
408 #ifdef LOADER_MBR_SUPPORT
409 			cp += sprintf(cp, "s%d", dev->d_slice);
410 #endif
411 	}
412 	if (dev->d_partition > D_PARTNONE)
413 		cp += sprintf(cp, "%c", dev->d_partition + 'a');
414 	strcat(cp, ":");
415 	return (buf);
416 }
417 
418 int
419 disk_parsedev(struct disk_devdesc *dev, const char *devspec, const char **path)
420 {
421 	int unit, slice, partition;
422 	const char *np;
423 	char *cp;
424 
425 	np = devspec;
426 	unit = -1;
427 	/*
428 	 * If there is path/file info after the device info, then any missing
429 	 * slice or partition info should be considered a request to search for
430 	 * an appropriate partition.  Otherwise we want to open the raw device
431 	 * itself and not try to fill in missing info by searching.
432 	 */
433 	if ((cp = strchr(np, ':')) != NULL && cp[1] != '\0') {
434 		slice = D_SLICEWILD;
435 		partition = D_PARTWILD;
436 	} else {
437 		slice = D_SLICENONE;
438 		partition = D_PARTNONE;
439 	}
440 
441 	if (*np != '\0' && *np != ':') {
442 		unit = strtol(np, &cp, 10);
443 		if (cp == np)
444 			return (EUNIT);
445 #ifdef LOADER_GPT_SUPPORT
446 		if (*cp == 'p') {
447 			np = cp + 1;
448 			slice = strtol(np, &cp, 10);
449 			if (np == cp)
450 				return (ESLICE);
451 			/* we don't support nested partitions on GPT */
452 			if (*cp != '\0' && *cp != ':')
453 				return (EINVAL);
454 			partition = D_PARTISGPT;
455 		} else
456 #endif
457 #ifdef LOADER_MBR_SUPPORT
458 		if (*cp == 's') {
459 			np = cp + 1;
460 			slice = strtol(np, &cp, 10);
461 			if (np == cp)
462 				return (ESLICE);
463 		}
464 #endif
465 		if (*cp != '\0' && *cp != ':') {
466 			partition = *cp - 'a';
467 			if (partition < 0)
468 				return (EPART);
469 			cp++;
470 		}
471 	} else
472 		return (EINVAL);
473 
474 	if (*cp != '\0' && *cp != ':')
475 		return (EINVAL);
476 	dev->dd.d_unit = unit;
477 	dev->d_slice = slice;
478 	dev->d_partition = partition;
479 	if (path != NULL)
480 		*path = (*cp == '\0') ? cp: cp + 1;
481 	return (0);
482 }
483