xref: /titanic_52/usr/src/boot/sys/boot/common/part.c (revision 7f3006fcbb4aa276c70b4b6b9e3844b293f8a5b4)
1 /*
2  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 
29 #include <stand.h>
30 #include <sys/param.h>
31 #include <sys/diskmbr.h>
32 #include <sys/disklabel.h>
33 #include <sys/endian.h>
34 #include <sys/gpt.h>
35 #include <sys/stddef.h>
36 #include <sys/queue.h>
37 #include <sys/vtoc.h>
38 
39 #include <crc32.h>
40 #include <part.h>
41 #include <uuid.h>
42 
43 #ifdef PART_DEBUG
44 #define	DEBUG(fmt, args...) printf("%s: " fmt "\n", __func__, ## args)
45 #else
46 #define	DEBUG(fmt, args...)
47 #endif
48 
49 #ifdef LOADER_GPT_SUPPORT
50 #define	MAXTBLSZ	64
51 static const uuid_t gpt_uuid_unused = GPT_ENT_TYPE_UNUSED;
52 static const uuid_t gpt_uuid_ms_basic_data = GPT_ENT_TYPE_MS_BASIC_DATA;
53 static const uuid_t gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS;
54 static const uuid_t gpt_uuid_efi = GPT_ENT_TYPE_EFI;
55 static const uuid_t gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD;
56 static const uuid_t gpt_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT;
57 static const uuid_t gpt_uuid_freebsd_nandfs = GPT_ENT_TYPE_FREEBSD_NANDFS;
58 static const uuid_t gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
59 static const uuid_t gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS;
60 static const uuid_t gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM;
61 static const uuid_t gpt_uuid_illumos_boot = GPT_ENT_TYPE_ILLUMOS_BOOT;
62 static const uuid_t gpt_uuid_illumos_ufs = GPT_ENT_TYPE_ILLUMOS_UFS;
63 static const uuid_t gpt_uuid_illumos_zfs = GPT_ENT_TYPE_ILLUMOS_ZFS;
64 static const uuid_t gpt_uuid_reserved = GPT_ENT_TYPE_RESERVED;
65 #endif
66 
67 struct pentry {
68 	struct ptable_entry	part;
69 	uint64_t		flags;
70 	union {
71 		uint8_t bsd;
72 		uint8_t	mbr;
73 		uuid_t	gpt;
74 		uint16_t vtoc8;
75 		uint16_t vtoc;
76 	} type;
77 	STAILQ_ENTRY(pentry)	entry;
78 };
79 
80 struct ptable {
81 	enum ptable_type	type;
82 	uint16_t		sectorsize;
83 	uint64_t		sectors;
84 
85 	STAILQ_HEAD(, pentry)	entries;
86 };
87 
88 static struct parttypes {
89 	enum partition_type	type;
90 	const char		*desc;
91 } ptypes[] = {
92 	{ PART_UNKNOWN,		"Unknown" },
93 	{ PART_EFI,		"EFI" },
94 	{ PART_FREEBSD,		"FreeBSD" },
95 	{ PART_FREEBSD_BOOT,	"FreeBSD boot" },
96 	{ PART_FREEBSD_NANDFS,	"FreeBSD nandfs" },
97 	{ PART_FREEBSD_UFS,	"FreeBSD UFS" },
98 	{ PART_FREEBSD_ZFS,	"FreeBSD ZFS" },
99 	{ PART_FREEBSD_SWAP,	"FreeBSD swap" },
100 	{ PART_FREEBSD_VINUM,	"FreeBSD vinum" },
101 	{ PART_LINUX,		"Linux" },
102 	{ PART_LINUX_SWAP,	"Linux swap" },
103 	{ PART_DOS,		"DOS/Windows" },
104 	{ PART_SOLARIS2,	"Solaris 2" },
105 	{ PART_ILLUMOS_UFS,	"illumos UFS" },
106 	{ PART_ILLUMOS_ZFS,	"illumos ZFS" },
107 	{ PART_RESERVED,	"Reserved" },
108 	{ PART_VTOC_BOOT,	"boot" },
109 	{ PART_VTOC_ROOT,	"root" },
110 	{ PART_VTOC_SWAP,	"swap" },
111 	{ PART_VTOC_USR,	"usr" },
112 	{ PART_VTOC_STAND,	"stand" },
113 	{ PART_VTOC_VAR,	"var" },
114 	{ PART_VTOC_HOME,	"home" }
115 };
116 
117 const char *
118 parttype2str(enum partition_type type)
119 {
120 	size_t i;
121 
122 	for (i = 0; i < nitems(ptypes); i++)
123 		if (ptypes[i].type == type)
124 			return (ptypes[i].desc);
125 	return (ptypes[0].desc);
126 }
127 
128 #ifdef LOADER_GPT_SUPPORT
129 static void
130 uuid_letoh(uuid_t *uuid)
131 {
132 
133 	uuid->time_low = le32toh(uuid->time_low);
134 	uuid->time_mid = le16toh(uuid->time_mid);
135 	uuid->time_hi_and_version = le16toh(uuid->time_hi_and_version);
136 }
137 
138 static enum partition_type
139 gpt_parttype(uuid_t type)
140 {
141 
142 	if (uuid_equal(&type, &gpt_uuid_efi, NULL))
143 		return (PART_EFI);
144 	else if (uuid_equal(&type, &gpt_uuid_ms_basic_data, NULL))
145 		return (PART_DOS);
146 	else if (uuid_equal(&type, &gpt_uuid_freebsd_boot, NULL))
147 		return (PART_FREEBSD_BOOT);
148 	else if (uuid_equal(&type, &gpt_uuid_freebsd_ufs, NULL))
149 		return (PART_FREEBSD_UFS);
150 	else if (uuid_equal(&type, &gpt_uuid_freebsd_zfs, NULL))
151 		return (PART_FREEBSD_ZFS);
152 	else if (uuid_equal(&type, &gpt_uuid_freebsd_swap, NULL))
153 		return (PART_FREEBSD_SWAP);
154 	else if (uuid_equal(&type, &gpt_uuid_freebsd_vinum, NULL))
155 		return (PART_FREEBSD_VINUM);
156 	else if (uuid_equal(&type, &gpt_uuid_freebsd_nandfs, NULL))
157 		return (PART_FREEBSD_NANDFS);
158 	else if (uuid_equal(&type, &gpt_uuid_freebsd, NULL))
159 		return (PART_FREEBSD);
160 	else if (uuid_equal(&type, &gpt_uuid_illumos_boot, NULL))
161 		return (PART_VTOC_BOOT);
162 	else if (uuid_equal(&type, &gpt_uuid_illumos_ufs, NULL))
163 		return (PART_ILLUMOS_UFS);
164 	else if (uuid_equal(&type, &gpt_uuid_illumos_zfs, NULL))
165 		return (PART_ILLUMOS_ZFS);
166 	else if (uuid_equal(&type, &gpt_uuid_reserved, NULL))
167 		return (PART_RESERVED);
168 	return (PART_UNKNOWN);
169 }
170 
171 static struct gpt_hdr *
172 gpt_checkhdr(struct gpt_hdr *hdr, uint64_t lba_self,
173     uint64_t lba_last __attribute((unused)), uint16_t sectorsize)
174 {
175 	uint32_t sz, crc;
176 
177 	if (memcmp(hdr->hdr_sig, GPT_HDR_SIG, sizeof (hdr->hdr_sig)) != 0) {
178 		DEBUG("no GPT signature");
179 		return (NULL);
180 	}
181 	sz = le32toh(hdr->hdr_size);
182 	if (sz < 92 || sz > sectorsize) {
183 		DEBUG("invalid GPT header size: %d", sz);
184 		return (NULL);
185 	}
186 	crc = le32toh(hdr->hdr_crc_self);
187 	hdr->hdr_crc_self = 0;
188 	if (crc32(hdr, sz) != crc) {
189 		DEBUG("GPT header's CRC doesn't match");
190 		return (NULL);
191 	}
192 	hdr->hdr_crc_self = crc;
193 	hdr->hdr_revision = le32toh(hdr->hdr_revision);
194 	if (hdr->hdr_revision < GPT_HDR_REVISION) {
195 		DEBUG("unsupported GPT revision %d", hdr->hdr_revision);
196 		return (NULL);
197 	}
198 	hdr->hdr_lba_self = le64toh(hdr->hdr_lba_self);
199 	if (hdr->hdr_lba_self != lba_self) {
200 		DEBUG("self LBA doesn't match");
201 		return (NULL);
202 	}
203 	hdr->hdr_lba_alt = le64toh(hdr->hdr_lba_alt);
204 	if (hdr->hdr_lba_alt == hdr->hdr_lba_self) {
205 		DEBUG("invalid alternate LBA");
206 		return (NULL);
207 	}
208 	hdr->hdr_entries = le32toh(hdr->hdr_entries);
209 	hdr->hdr_entsz = le32toh(hdr->hdr_entsz);
210 	if (hdr->hdr_entries == 0 ||
211 	    hdr->hdr_entsz < sizeof (struct gpt_ent) ||
212 	    sectorsize % hdr->hdr_entsz != 0) {
213 		DEBUG("invalid entry size or number of entries");
214 		return (NULL);
215 	}
216 	hdr->hdr_lba_start = le64toh(hdr->hdr_lba_start);
217 	hdr->hdr_lba_end = le64toh(hdr->hdr_lba_end);
218 	hdr->hdr_lba_table = le64toh(hdr->hdr_lba_table);
219 	hdr->hdr_crc_table = le32toh(hdr->hdr_crc_table);
220 	uuid_letoh(&hdr->hdr_uuid);
221 	return (hdr);
222 }
223 
224 static int
225 gpt_checktbl(const struct gpt_hdr *hdr, uint8_t *tbl, size_t size,
226     uint64_t lba_last __attribute((unused)))
227 {
228 	struct gpt_ent *ent;
229 	uint32_t i, cnt;
230 
231 	cnt = size / hdr->hdr_entsz;
232 	if (hdr->hdr_entries <= cnt) {
233 		cnt = hdr->hdr_entries;
234 		/* Check CRC only when buffer size is enough for table. */
235 		if (hdr->hdr_crc_table !=
236 		    crc32(tbl, hdr->hdr_entries * hdr->hdr_entsz)) {
237 			DEBUG("GPT table's CRC doesn't match");
238 			return (-1);
239 		}
240 	}
241 	for (i = 0; i < cnt; i++) {
242 		ent = (struct gpt_ent *)(tbl + i * hdr->hdr_entsz);
243 		uuid_letoh(&ent->ent_type);
244 		if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
245 			continue;
246 		ent->ent_lba_start = le64toh(ent->ent_lba_start);
247 		ent->ent_lba_end = le64toh(ent->ent_lba_end);
248 	}
249 	return (0);
250 }
251 
252 static struct ptable *
253 ptable_gptread(struct ptable *table, void *dev, diskread_t dread)
254 {
255 	struct pentry *entry;
256 	struct gpt_hdr *phdr, hdr;
257 	struct gpt_ent *ent;
258 	uint8_t *buf, *tbl;
259 	uint64_t offset;
260 	int pri, sec;
261 	size_t size, i;
262 
263 	buf = malloc(table->sectorsize);
264 	if (buf == NULL)
265 		return (NULL);
266 	tbl = malloc(table->sectorsize * MAXTBLSZ);
267 	if (tbl == NULL) {
268 		free(buf);
269 		return (NULL);
270 	}
271 	/* Read the primary GPT header. */
272 	if (dread(dev, buf, 1, 1) != 0) {
273 		ptable_close(table);
274 		table = NULL;
275 		goto out;
276 	}
277 	pri = sec = 0;
278 	/* Check the primary GPT header. */
279 	phdr = gpt_checkhdr((struct gpt_hdr *)buf, 1, table->sectors - 1,
280 	    table->sectorsize);
281 	if (phdr != NULL) {
282 		/* Read the primary GPT table. */
283 		size = MIN(MAXTBLSZ, (phdr->hdr_entries * phdr->hdr_entsz +
284 		    table->sectorsize - 1) / table->sectorsize);
285 		if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
286 		    gpt_checktbl(phdr, tbl, size * table->sectorsize,
287 		    table->sectors - 1) == 0) {
288 			memcpy(&hdr, phdr, sizeof (hdr));
289 			pri = 1;
290 		}
291 	}
292 	offset = pri ? hdr.hdr_lba_alt: table->sectors - 1;
293 	/* Read the backup GPT header. */
294 	if (dread(dev, buf, 1, offset) != 0)
295 		phdr = NULL;
296 	else
297 		phdr = gpt_checkhdr((struct gpt_hdr *)buf, offset,
298 		    table->sectors - 1, table->sectorsize);
299 	if (phdr != NULL) {
300 		/*
301 		 * Compare primary and backup headers.
302 		 * If they are equal, then we do not need to read backup
303 		 * table. If they are different, then prefer backup header
304 		 * and try to read backup table.
305 		 */
306 		if (pri == 0 ||
307 		    uuid_equal(&hdr.hdr_uuid, &phdr->hdr_uuid, NULL) == 0 ||
308 		    hdr.hdr_revision != phdr->hdr_revision ||
309 		    hdr.hdr_size != phdr->hdr_size ||
310 		    hdr.hdr_lba_start != phdr->hdr_lba_start ||
311 		    hdr.hdr_lba_end != phdr->hdr_lba_end ||
312 		    hdr.hdr_entries != phdr->hdr_entries ||
313 		    hdr.hdr_entsz != phdr->hdr_entsz ||
314 		    hdr.hdr_crc_table != phdr->hdr_crc_table) {
315 			/* Read the backup GPT table. */
316 			size = MIN(MAXTBLSZ, (phdr->hdr_entries *
317 			    phdr->hdr_entsz + table->sectorsize - 1) /
318 			    table->sectorsize);
319 			if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
320 			    gpt_checktbl(phdr, tbl, size * table->sectorsize,
321 			    table->sectors - 1) == 0) {
322 				memcpy(&hdr, phdr, sizeof (hdr));
323 				sec = 1;
324 			}
325 		}
326 	}
327 	if (pri == 0 && sec == 0) {
328 		/* Both primary and backup tables are invalid. */
329 		table->type = PTABLE_NONE;
330 		goto out;
331 	}
332 	DEBUG("GPT detected");
333 	size = MIN(hdr.hdr_entries * hdr.hdr_entsz,
334 	    MAXTBLSZ * table->sectorsize);
335 
336 	/*
337 	 * If the disk's sector count is smaller than the sector count recorded
338 	 * in the disk's GPT table header, set the table->sectors to the value
339 	 * recorded in GPT tables. This is done to work around buggy firmware
340 	 * that returns truncated disk sizes.
341 	 *
342 	 * Note, this is still not a foolproof way to get disk's size. For
343 	 * example, an image file can be truncated when copied to smaller media.
344 	 */
345 	if (hdr.hdr_lba_alt + 1 > table->sectors)
346 		table->sectors = hdr.hdr_lba_alt + 1;
347 
348 	for (i = 0; i < size / hdr.hdr_entsz; i++) {
349 		ent = (struct gpt_ent *)(tbl + i * hdr.hdr_entsz);
350 		if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
351 			continue;
352 
353 		/* Simple sanity checks. */
354 		if (ent->ent_lba_start < hdr.hdr_lba_start ||
355 		    ent->ent_lba_end > hdr.hdr_lba_end ||
356 		    ent->ent_lba_start > ent->ent_lba_end)
357 			continue;
358 
359 		entry = malloc(sizeof (*entry));
360 		if (entry == NULL)
361 			break;
362 		entry->part.start = ent->ent_lba_start;
363 		entry->part.end = ent->ent_lba_end;
364 		entry->part.index = i + 1;
365 		entry->part.type = gpt_parttype(ent->ent_type);
366 		entry->flags = le64toh(ent->ent_attr);
367 		memcpy(&entry->type.gpt, &ent->ent_type, sizeof (uuid_t));
368 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
369 		DEBUG("new GPT partition added");
370 	}
371 out:
372 	free(buf);
373 	free(tbl);
374 	return (table);
375 }
376 #endif /* LOADER_GPT_SUPPORT */
377 
378 #ifdef LOADER_MBR_SUPPORT
379 /* We do not need to support too many EBR partitions in the loader */
380 #define	MAXEBRENTRIES		8
381 static enum partition_type
382 mbr_parttype(uint8_t type)
383 {
384 
385 	switch (type) {
386 	case DOSPTYP_386BSD:
387 		return (PART_FREEBSD);
388 	case DOSPTYP_LINSWP:
389 		return (PART_LINUX_SWAP);
390 	case DOSPTYP_LINUX:
391 		return (PART_LINUX);
392 	case DOSPTYP_SUNIXOS2:
393 		return (PART_SOLARIS2);
394 	case 0x01:
395 	case 0x04:
396 	case 0x06:
397 	case 0x07:
398 	case 0x0b:
399 	case 0x0c:
400 	case 0x0e:
401 		return (PART_DOS);
402 	}
403 	return (PART_UNKNOWN);
404 }
405 
406 static struct ptable *
407 ptable_ebrread(struct ptable *table, void *dev, diskread_t dread)
408 {
409 	struct dos_partition *dp;
410 	struct pentry *e1, *entry;
411 	uint32_t start, end, offset;
412 	uint8_t *buf;
413 	int i, idx;
414 
415 	STAILQ_FOREACH(e1, &table->entries, entry) {
416 		if (e1->type.mbr == DOSPTYP_EXT ||
417 		    e1->type.mbr == DOSPTYP_EXTLBA)
418 			break;
419 	}
420 	if (e1 == NULL)
421 		return (table);
422 	idx = 5;
423 	offset = e1->part.start;
424 	buf = malloc(table->sectorsize);
425 	if (buf == NULL)
426 		return (table);
427 	DEBUG("EBR detected");
428 	for (i = 0; i < MAXEBRENTRIES; i++) {
429 #if 0	/* Some BIOSes return an incorrect number of sectors */
430 		if (offset >= table->sectors)
431 			break;
432 #endif
433 		if (dread(dev, buf, 1, offset) != 0)
434 			break;
435 		dp = (struct dos_partition *)(buf + DOSPARTOFF);
436 		if (dp[0].dp_typ == 0)
437 			break;
438 		start = le32toh(dp[0].dp_start);
439 		if (dp[0].dp_typ == DOSPTYP_EXT &&
440 		    dp[1].dp_typ == 0) {
441 			offset = e1->part.start + start;
442 			continue;
443 		}
444 		end = le32toh(dp[0].dp_size);
445 		entry = malloc(sizeof (*entry));
446 		if (entry == NULL)
447 			break;
448 		entry->part.start = offset + start;
449 		entry->part.end = entry->part.start + end - 1;
450 		entry->part.index = idx++;
451 		entry->part.type = mbr_parttype(dp[0].dp_typ);
452 		entry->flags = dp[0].dp_flag;
453 		entry->type.mbr = dp[0].dp_typ;
454 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
455 		DEBUG("new EBR partition added");
456 		if (dp[1].dp_typ == 0)
457 			break;
458 		offset = e1->part.start + le32toh(dp[1].dp_start);
459 	}
460 	free(buf);
461 	return (table);
462 }
463 #endif /* LOADER_MBR_SUPPORT */
464 
465 static enum partition_type
466 bsd_parttype(uint8_t type)
467 {
468 
469 	switch (type) {
470 	case FS_NANDFS:
471 		return (PART_FREEBSD_NANDFS);
472 	case FS_SWAP:
473 		return (PART_FREEBSD_SWAP);
474 	case FS_BSDFFS:
475 		return (PART_FREEBSD_UFS);
476 	case FS_VINUM:
477 		return (PART_FREEBSD_VINUM);
478 	case FS_ZFS:
479 		return (PART_FREEBSD_ZFS);
480 	}
481 	return (PART_UNKNOWN);
482 }
483 
484 static struct ptable *
485 ptable_bsdread(struct ptable *table, void *dev, diskread_t dread)
486 {
487 	struct disklabel *dl;
488 	struct partition *part;
489 	struct pentry *entry;
490 	uint8_t *buf;
491 	uint32_t raw_offset;
492 	int i;
493 
494 	if (table->sectorsize < sizeof (struct disklabel)) {
495 		DEBUG("Too small sectorsize");
496 		return (table);
497 	}
498 	buf = malloc(table->sectorsize);
499 	if (buf == NULL)
500 		return (table);
501 	if (dread(dev, buf, 1, 1) != 0) {
502 		DEBUG("read failed");
503 		ptable_close(table);
504 		table = NULL;
505 		goto out;
506 	}
507 	dl = (struct disklabel *)buf;
508 	if (le32toh(dl->d_magic) != DISKMAGIC &&
509 	    le32toh(dl->d_magic2) != DISKMAGIC)
510 		goto out;
511 	if (le32toh(dl->d_secsize) != table->sectorsize) {
512 		DEBUG("unsupported sector size");
513 		goto out;
514 	}
515 	dl->d_npartitions = le16toh(dl->d_npartitions);
516 	if (dl->d_npartitions > 20 || dl->d_npartitions < 8) {
517 		DEBUG("invalid number of partitions");
518 		goto out;
519 	}
520 	DEBUG("BSD detected");
521 	part = &dl->d_partitions[0];
522 	raw_offset = le32toh(part[RAW_PART].p_offset);
523 	for (i = 0; i < dl->d_npartitions; i++, part++) {
524 		if (i == RAW_PART)
525 			continue;
526 		if (part->p_size == 0)
527 			continue;
528 		entry = malloc(sizeof (*entry));
529 		if (entry == NULL)
530 			break;
531 		entry->part.start = le32toh(part->p_offset) - raw_offset;
532 		entry->part.end = entry->part.start +
533 		    le32toh(part->p_size) - 1;
534 		entry->part.type = bsd_parttype(part->p_fstype);
535 		entry->part.index = i; /* starts from zero */
536 		entry->type.bsd = part->p_fstype;
537 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
538 		DEBUG("new BSD partition added");
539 	}
540 	table->type = PTABLE_BSD;
541 out:
542 	free(buf);
543 	return (table);
544 }
545 
546 #ifdef LOADER_VTOC8_SUPPORT
547 static enum partition_type
548 vtoc8_parttype(uint16_t type)
549 {
550 
551 	switch (type) {
552 	case VTOC_TAG_FREEBSD_NANDFS:
553 		return (PART_FREEBSD_NANDFS);
554 	case VTOC_TAG_FREEBSD_SWAP:
555 		return (PART_FREEBSD_SWAP);
556 	case VTOC_TAG_FREEBSD_UFS:
557 		return (PART_FREEBSD_UFS);
558 	case VTOC_TAG_FREEBSD_VINUM:
559 		return (PART_FREEBSD_VINUM);
560 	case VTOC_TAG_FREEBSD_ZFS:
561 		return (PART_FREEBSD_ZFS);
562 	};
563 	return (PART_UNKNOWN);
564 }
565 
566 static struct ptable *
567 ptable_vtoc8read(struct ptable *table, void *dev, diskread_t dread)
568 {
569 	struct pentry *entry;
570 	struct vtoc8 *dl;
571 	uint8_t *buf;
572 	uint16_t sum, heads, sectors;
573 	int i;
574 
575 	if (table->sectorsize != sizeof (struct vtoc8))
576 		return (table);
577 	buf = malloc(table->sectorsize);
578 	if (buf == NULL)
579 		return (table);
580 	if (dread(dev, buf, 1, 0) != 0) {
581 		DEBUG("read failed");
582 		ptable_close(table);
583 		table = NULL;
584 		goto out;
585 	}
586 	dl = (struct vtoc8 *)buf;
587 	/* Check the sum */
588 	for (i = sum = 0; i < sizeof (struct vtoc8); i += sizeof (sum))
589 		sum ^= be16dec(buf + i);
590 	if (sum != 0) {
591 		DEBUG("incorrect checksum");
592 		goto out;
593 	}
594 	if (be16toh(dl->nparts) != VTOC8_NPARTS) {
595 		DEBUG("invalid number of entries");
596 		goto out;
597 	}
598 	sectors = be16toh(dl->nsecs);
599 	heads = be16toh(dl->nheads);
600 	if (sectors * heads == 0) {
601 		DEBUG("invalid geometry");
602 		goto out;
603 	}
604 	DEBUG("VTOC8 detected");
605 	for (i = 0; i < VTOC8_NPARTS; i++) {
606 		dl->part[i].tag = be16toh(dl->part[i].tag);
607 		if (i == VTOC_RAW_PART ||
608 		    dl->part[i].tag == VTOC_TAG_UNASSIGNED)
609 			continue;
610 		entry = malloc(sizeof (*entry));
611 		if (entry == NULL)
612 			break;
613 		entry->part.start = be32toh(dl->map[i].cyl) * heads * sectors;
614 		entry->part.end = be32toh(dl->map[i].nblks) +
615 		    entry->part.start - 1;
616 		entry->part.type = vtoc8_parttype(dl->part[i].tag);
617 		entry->part.index = i; /* starts from zero */
618 		entry->type.vtoc8 = dl->part[i].tag;
619 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
620 		DEBUG("new VTOC8 partition added");
621 	}
622 	table->type = PTABLE_VTOC8;
623 out:
624 	free(buf);
625 	return (table);
626 
627 }
628 #endif /* LOADER_VTOC8_SUPPORT */
629 
630 static enum partition_type
631 vtoc_parttype(uint16_t type)
632 {
633 	switch (type) {
634 	case VTOC_TAG_BOOT:
635 		return (PART_VTOC_BOOT);
636 	case VTOC_TAG_ROOT:
637 		return (PART_VTOC_ROOT);
638 	case VTOC_TAG_SWAP:
639 		return (PART_VTOC_SWAP);
640 	case VTOC_TAG_USR:
641 		return (PART_VTOC_USR);
642 	case VTOC_TAG_BACKUP:
643 		return (PART_VTOC_BACKUP);
644 	case VTOC_TAG_STAND:
645 		return (PART_VTOC_STAND);
646 	case VTOC_TAG_VAR:
647 		return (PART_VTOC_VAR);
648 	case VTOC_TAG_HOME:
649 		return (PART_VTOC_HOME);
650 	};
651 	return (PART_UNKNOWN);
652 }
653 
654 static struct ptable *
655 ptable_dklabelread(struct ptable *table, void *dev, diskread_t dread)
656 {
657 	struct pentry *entry;
658 	struct dk_label *dl;
659 	struct dk_vtoc *dv;
660 	uint8_t *buf;
661 	int i;
662 
663 	if (table->sectorsize < sizeof (struct dk_label)) {
664 		DEBUG("Too small sectorsize");
665 		return (table);
666 	}
667 	buf = malloc(table->sectorsize);
668 	if (buf == NULL)
669 		return (table);
670 	if (dread(dev, buf, 1, DK_LABEL_LOC) != 0) {
671 		DEBUG("read failed");
672 		ptable_close(table);
673 		table = NULL;
674 		goto out;
675 	}
676 	dl = (struct dk_label *)buf;
677 	dv = (struct dk_vtoc *)&dl->dkl_vtoc;
678 
679 	if (dl->dkl_magic != VTOC_MAGIC) {
680 		DEBUG("dk_label magic error");
681 		goto out;
682 	}
683 	if (dv->v_sanity != VTOC_SANITY) {
684 		DEBUG("this vtoc is not sane");
685 		goto out;
686 	}
687 	if (dv->v_nparts != NDKMAP) {
688 		DEBUG("invalid number of entries");
689 		goto out;
690 	}
691 	DEBUG("VTOC detected");
692 	for (i = 0; i < NDKMAP; i++) {
693 		if (i == VTOC_RAW_PART ||	/* skip slice 2 and empty */
694 		    dv->v_part[i].p_size == 0)
695 			continue;
696 		entry = malloc(sizeof (*entry));
697 		if (entry == NULL)
698 			break;
699 		entry->part.start = dv->v_part[i].p_start;
700 		entry->part.end = dv->v_part[i].p_size +
701 		    entry->part.start - 1;
702 		entry->part.type = vtoc_parttype(dv->v_part[i].p_tag);
703 		entry->part.index = i; /* starts from zero */
704 		entry->type.vtoc = dv->v_part[i].p_tag;
705 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
706 		DEBUG("new VTOC partition added");
707 	}
708 	table->type = PTABLE_VTOC;
709 out:
710 	free(buf);
711 	return (table);
712 }
713 
714 struct ptable *
715 ptable_open(void *dev, uint64_t sectors, uint16_t sectorsize, diskread_t *dread)
716 {
717 	struct dos_partition *dp;
718 	struct ptable *table;
719 	uint8_t *buf;
720 	int i, count;
721 #ifdef LOADER_MBR_SUPPORT
722 	struct pentry *entry;
723 	uint32_t start, end;
724 	int has_ext;
725 #endif
726 	table = NULL;
727 	buf = malloc(sectorsize);
728 	if (buf == NULL)
729 		return (NULL);
730 	/* First, read the MBR. */
731 	if (dread(dev, buf, 1, DOSBBSECTOR) != 0) {
732 		DEBUG("read failed");
733 		goto out;
734 	}
735 
736 	table = malloc(sizeof (*table));
737 	if (table == NULL)
738 		goto out;
739 	table->sectors = sectors;
740 	table->sectorsize = sectorsize;
741 	table->type = PTABLE_NONE;
742 	STAILQ_INIT(&table->entries);
743 
744 	if (ptable_dklabelread(table, dev, dread) == NULL) { /* Read error. */
745 		table = NULL;
746 		goto out;
747 	} else if (table->type == PTABLE_VTOC)
748 		goto out;
749 
750 #ifdef LOADER_VTOC8_SUPPORT
751 	if (be16dec(buf + offsetof(struct vtoc8, magic)) == VTOC_MAGIC) {
752 		if (ptable_vtoc8read(table, dev, dread) == NULL) {
753 			/* Read error. */
754 			table = NULL;
755 			goto out;
756 		} else if (table->type == PTABLE_VTOC8)
757 			goto out;
758 	}
759 #endif
760 	/* Check the BSD label. */
761 	if (ptable_bsdread(table, dev, dread) == NULL) { /* Read error. */
762 		table = NULL;
763 		goto out;
764 	} else if (table->type == PTABLE_BSD)
765 		goto out;
766 
767 #if defined(LOADER_GPT_SUPPORT) || defined(LOADER_MBR_SUPPORT)
768 	/* Check the MBR magic. */
769 	if (buf[DOSMAGICOFFSET] != 0x55 ||
770 	    buf[DOSMAGICOFFSET + 1] != 0xaa) {
771 		DEBUG("magic sequence not found");
772 #if defined(LOADER_GPT_SUPPORT)
773 		/* There is no PMBR, check that we have backup GPT */
774 		table->type = PTABLE_GPT;
775 		table = ptable_gptread(table, dev, dread);
776 #endif
777 		goto out;
778 	}
779 	/* Check that we have PMBR. Also do some validation. */
780 	dp = (struct dos_partition *)(buf + DOSPARTOFF);
781 	for (i = 0, count = 0; i < NDOSPART; i++) {
782 		if (dp[i].dp_flag != 0 && dp[i].dp_flag != 0x80) {
783 			DEBUG("invalid partition flag %x", dp[i].dp_flag);
784 			goto out;
785 		}
786 #ifdef LOADER_GPT_SUPPORT
787 		if (dp[i].dp_typ == DOSPTYP_PMBR) {
788 			table->type = PTABLE_GPT;
789 			DEBUG("PMBR detected");
790 		}
791 #endif
792 		if (dp[i].dp_typ != 0)
793 			count++;
794 	}
795 	/* Do we have some invalid values? */
796 	if (table->type == PTABLE_GPT && count > 1) {
797 		if (dp[1].dp_typ != DOSPTYP_HFS) {
798 			table->type = PTABLE_NONE;
799 			DEBUG("Incorrect PMBR, ignore it");
800 		} else {
801 			DEBUG("Bootcamp detected");
802 		}
803 	}
804 #ifdef LOADER_GPT_SUPPORT
805 	if (table->type == PTABLE_GPT) {
806 		table = ptable_gptread(table, dev, dread);
807 		goto out;
808 	}
809 #endif
810 #ifdef LOADER_MBR_SUPPORT
811 	/* Read MBR. */
812 	DEBUG("MBR detected");
813 	table->type = PTABLE_MBR;
814 	for (i = has_ext = 0; i < NDOSPART; i++) {
815 		if (dp[i].dp_typ == 0)
816 			continue;
817 		start = le32dec(&(dp[i].dp_start));
818 		end = le32dec(&(dp[i].dp_size));
819 		if (start == 0 || end == 0)
820 			continue;
821 #if 0	/* Some BIOSes return an incorrect number of sectors */
822 		if (start + end - 1 >= sectors)
823 			continue;	/* XXX: ignore */
824 #endif
825 		if (dp[i].dp_typ == DOSPTYP_EXT ||
826 		    dp[i].dp_typ == DOSPTYP_EXTLBA)
827 			has_ext = 1;
828 		entry = malloc(sizeof (*entry));
829 		if (entry == NULL)
830 			break;
831 		entry->part.start = start;
832 		entry->part.end = start + end - 1;
833 		entry->part.index = i + 1;
834 		entry->part.type = mbr_parttype(dp[i].dp_typ);
835 		entry->flags = dp[i].dp_flag;
836 		entry->type.mbr = dp[i].dp_typ;
837 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
838 		DEBUG("new MBR partition added");
839 	}
840 	if (has_ext) {
841 		table = ptable_ebrread(table, dev, dread);
842 		/* FALLTHROUGH */
843 	}
844 #endif /* LOADER_MBR_SUPPORT */
845 #endif /* LOADER_MBR_SUPPORT || LOADER_GPT_SUPPORT */
846 out:
847 	free(buf);
848 	return (table);
849 }
850 
851 void
852 ptable_close(struct ptable *table)
853 {
854 	struct pentry *entry;
855 
856 	while (!STAILQ_EMPTY(&table->entries)) {
857 		entry = STAILQ_FIRST(&table->entries);
858 		STAILQ_REMOVE_HEAD(&table->entries, entry);
859 		free(entry);
860 	}
861 	free(table);
862 }
863 
864 enum ptable_type
865 ptable_gettype(const struct ptable *table)
866 {
867 
868 	return (table->type);
869 }
870 
871 int
872 ptable_getsize(const struct ptable *table, uint64_t *sizep)
873 {
874 	uint64_t tmp = table->sectors * table->sectorsize;
875 
876 	if (tmp < table->sectors)
877 		return (EOVERFLOW);
878 
879 	if (sizep != NULL)
880 		*sizep = tmp;
881 	return (0);
882 }
883 
884 int
885 ptable_getpart(const struct ptable *table, struct ptable_entry *part, int idx)
886 {
887 	struct pentry *entry;
888 
889 	if (part == NULL || table == NULL)
890 		return (EINVAL);
891 
892 	STAILQ_FOREACH(entry, &table->entries, entry) {
893 		if (entry->part.index != idx)
894 			continue;
895 		memcpy(part, &entry->part, sizeof (*part));
896 		return (0);
897 	}
898 	return (ENOENT);
899 }
900 
901 /*
902  * Search for a slice with the following preferences:
903  *
904  * 1: Active illumos slice
905  * 2: Non-active illumos slice
906  * 3: Active Linux slice
907  * 4: non-active Linux slice
908  * 5: Active FAT/FAT32 slice
909  * 6: non-active FAT/FAT32 slice
910  */
911 #define	PREF_RAWDISK	0
912 #define	PREF_ILLUMOS_ACT	1
913 #define	PREF_ILLUMOS	2
914 #define	PREF_LINUX_ACT	3
915 #define	PREF_LINUX	4
916 #define	PREF_DOS_ACT	5
917 #define	PREF_DOS	6
918 #define	PREF_NONE	7
919 int
920 ptable_getbestpart(const struct ptable *table, struct ptable_entry *part)
921 {
922 	struct pentry *entry, *best;
923 	int pref, preflevel;
924 
925 	if (part == NULL || table == NULL)
926 		return (EINVAL);
927 
928 	best = NULL;
929 	preflevel = pref = PREF_NONE;
930 	STAILQ_FOREACH(entry, &table->entries, entry) {
931 #ifdef LOADER_MBR_SUPPORT
932 		if (table->type == PTABLE_MBR) {
933 			switch (entry->type.mbr) {
934 			case DOSPTYP_SUNIXOS2:
935 				pref = entry->flags & 0x80 ? PREF_ILLUMOS_ACT:
936 				    PREF_ILLUMOS;
937 				break;
938 			case DOSPTYP_LINUX:
939 				pref = entry->flags & 0x80 ? PREF_LINUX_ACT:
940 				    PREF_LINUX;
941 				break;
942 			case 0x01:		/* DOS/Windows */
943 			case 0x04:
944 			case 0x06:
945 			case 0x0c:
946 			case 0x0e:
947 			case DOSPTYP_FAT32:
948 				pref = entry->flags & 0x80 ? PREF_DOS_ACT:
949 				    PREF_DOS;
950 				break;
951 			default:
952 				pref = PREF_NONE;
953 			}
954 		}
955 #endif /* LOADER_MBR_SUPPORT */
956 #ifdef LOADER_GPT_SUPPORT
957 		if (table->type == PTABLE_GPT) {
958 			if (entry->part.type == PART_DOS)
959 				pref = PREF_DOS;
960 			else if (entry->part.type == PART_ILLUMOS_ZFS)
961 				pref = PREF_ILLUMOS;
962 			else
963 				pref = PREF_NONE;
964 		}
965 #endif /* LOADER_GPT_SUPPORT */
966 		if (pref < preflevel) {
967 			preflevel = pref;
968 			best = entry;
969 		}
970 	}
971 	if (best != NULL) {
972 		memcpy(part, &best->part, sizeof (*part));
973 		return (0);
974 	}
975 	return (ENOENT);
976 }
977 
978 /*
979  * iterate will stop if iterator will return non 0.
980  */
981 int
982 ptable_iterate(const struct ptable *table, void *arg, ptable_iterate_t *iter)
983 {
984 	struct pentry *entry;
985 	char name[32];
986 	int ret = 0;
987 
988 	name[0] = '\0';
989 	STAILQ_FOREACH(entry, &table->entries, entry) {
990 #ifdef LOADER_MBR_SUPPORT
991 		if (table->type == PTABLE_MBR)
992 			sprintf(name, "s%d", entry->part.index);
993 		else
994 #endif
995 #ifdef LOADER_GPT_SUPPORT
996 		if (table->type == PTABLE_GPT)
997 			sprintf(name, "p%d", entry->part.index);
998 		else
999 #endif
1000 #ifdef LOADER_VTOC8_SUPPORT
1001 		if (table->type == PTABLE_VTOC8)
1002 			sprintf(name, "%c", (uint8_t)'a' +
1003 			    entry->part.index);
1004 		else
1005 #endif
1006 		if (table->type == PTABLE_VTOC)
1007 			sprintf(name, "%c", (uint8_t)'a' +
1008 			    entry->part.index);
1009 		else
1010 		if (table->type == PTABLE_BSD)
1011 			sprintf(name, "%c", (uint8_t)'a' +
1012 			    entry->part.index);
1013 		ret = iter(arg, name, &entry->part);
1014 		if (ret != 0)
1015 			return (ret);
1016 	}
1017 	return (ret);
1018 }
1019