xref: /illumos-gate/usr/src/boot/common/part.c (revision 8119dad84d6416f13557b0ba8e2aaf9064cbcfd3)
1 /*
2  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 
29 #include <stand.h>
30 #include <stddef.h>
31 #include <sys/param.h>
32 #include <sys/diskmbr.h>
33 #include <sys/disklabel.h>
34 #include <sys/endian.h>
35 #include <sys/gpt.h>
36 #include <sys/queue.h>
37 #include <sys/vtoc.h>
38 
39 #include <fs/cd9660/iso.h>
40 
41 #include <zlib.h>
42 #include <part.h>
43 #include <uuid.h>
44 
45 #ifdef PART_DEBUG
46 #define	DPRINTF(fmt, args...) printf("%s: " fmt "\n", __func__, ## args)
47 #else
48 #define	DPRINTF(fmt, args...)	((void)0)
49 #endif
50 
51 #ifdef LOADER_GPT_SUPPORT
52 #define	MAXTBLSZ	64
53 static const uuid_t gpt_uuid_unused = GPT_ENT_TYPE_UNUSED;
54 static const uuid_t gpt_uuid_ms_basic_data = GPT_ENT_TYPE_MS_BASIC_DATA;
55 static const uuid_t gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS;
56 static const uuid_t gpt_uuid_efi = GPT_ENT_TYPE_EFI;
57 static const uuid_t gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD;
58 static const uuid_t gpt_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT;
59 static const uuid_t gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
60 static const uuid_t gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS;
61 static const uuid_t gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM;
62 static const uuid_t gpt_uuid_illumos_boot = GPT_ENT_TYPE_ILLUMOS_BOOT;
63 static const uuid_t gpt_uuid_illumos_ufs = GPT_ENT_TYPE_ILLUMOS_UFS;
64 static const uuid_t gpt_uuid_illumos_zfs = GPT_ENT_TYPE_ILLUMOS_ZFS;
65 static const uuid_t gpt_uuid_reserved = GPT_ENT_TYPE_RESERVED;
66 static const uuid_t gpt_uuid_apple_apfs = GPT_ENT_TYPE_APPLE_APFS;
67 #endif
68 
69 struct pentry {
70 	struct ptable_entry	part;
71 	uint64_t		flags;
72 	union {
73 		uint8_t bsd;
74 		uint8_t	mbr;
75 		uuid_t	gpt;
76 		uint16_t vtoc8;
77 		uint16_t vtoc;
78 	} type;
79 	STAILQ_ENTRY(pentry)	entry;
80 };
81 
82 struct ptable {
83 	enum ptable_type	type;
84 	uint16_t		sectorsize;
85 	uint64_t		sectors;
86 
87 	STAILQ_HEAD(, pentry)	entries;
88 };
89 
90 static struct parttypes {
91 	enum partition_type	type;
92 	const char		*desc;
93 } ptypes[] = {
94 	{ PART_UNKNOWN,		"Unknown" },
95 	{ PART_EFI,		"EFI" },
96 	{ PART_FREEBSD,		"FreeBSD" },
97 	{ PART_FREEBSD_BOOT,	"FreeBSD boot" },
98 	{ PART_FREEBSD_UFS,	"FreeBSD UFS" },
99 	{ PART_FREEBSD_ZFS,	"FreeBSD ZFS" },
100 	{ PART_FREEBSD_SWAP,	"FreeBSD swap" },
101 	{ PART_FREEBSD_VINUM,	"FreeBSD vinum" },
102 	{ PART_LINUX,		"Linux" },
103 	{ PART_LINUX_SWAP,	"Linux swap" },
104 	{ PART_DOS,		"DOS/Windows" },
105 	{ PART_ISO9660,		"ISO9660" },
106 	{ PART_SOLARIS2,	"Solaris 2" },
107 	{ PART_ILLUMOS_UFS,	"illumos UFS" },
108 	{ PART_ILLUMOS_ZFS,	"illumos ZFS" },
109 	{ PART_RESERVED,	"Reserved" },
110 	{ PART_VTOC_BOOT,	"boot" },
111 	{ PART_VTOC_ROOT,	"root" },
112 	{ PART_VTOC_SWAP,	"swap" },
113 	{ PART_VTOC_USR,	"usr" },
114 	{ PART_VTOC_STAND,	"stand" },
115 	{ PART_VTOC_VAR,	"var" },
116 	{ PART_VTOC_HOME,	"home" },
117 	{ PART_APFS,		"APFS" }
118 };
119 
120 const char *
121 parttype2str(enum partition_type type)
122 {
123 	size_t i;
124 
125 	for (i = 0; i < nitems(ptypes); i++)
126 		if (ptypes[i].type == type)
127 			return (ptypes[i].desc);
128 	return (ptypes[0].desc);
129 }
130 
131 #ifdef LOADER_GPT_SUPPORT
132 static void
133 uuid_letoh(uuid_t *uuid)
134 {
135 
136 	uuid->time_low = le32toh(uuid->time_low);
137 	uuid->time_mid = le16toh(uuid->time_mid);
138 	uuid->time_hi_and_version = le16toh(uuid->time_hi_and_version);
139 }
140 
141 static enum partition_type
142 gpt_parttype(uuid_t type)
143 {
144 
145 	if (uuid_equal(&type, &gpt_uuid_efi, NULL))
146 		return (PART_EFI);
147 	else if (uuid_equal(&type, &gpt_uuid_ms_basic_data, NULL))
148 		return (PART_DOS);
149 	else if (uuid_equal(&type, &gpt_uuid_freebsd_boot, NULL))
150 		return (PART_FREEBSD_BOOT);
151 	else if (uuid_equal(&type, &gpt_uuid_freebsd_ufs, NULL))
152 		return (PART_FREEBSD_UFS);
153 	else if (uuid_equal(&type, &gpt_uuid_freebsd_zfs, NULL))
154 		return (PART_FREEBSD_ZFS);
155 	else if (uuid_equal(&type, &gpt_uuid_freebsd_swap, NULL))
156 		return (PART_FREEBSD_SWAP);
157 	else if (uuid_equal(&type, &gpt_uuid_freebsd_vinum, NULL))
158 		return (PART_FREEBSD_VINUM);
159 	else if (uuid_equal(&type, &gpt_uuid_freebsd, NULL))
160 		return (PART_FREEBSD);
161 	else if (uuid_equal(&type, &gpt_uuid_illumos_boot, NULL))
162 		return (PART_VTOC_BOOT);
163 	else if (uuid_equal(&type, &gpt_uuid_illumos_ufs, NULL))
164 		return (PART_ILLUMOS_UFS);
165 	else if (uuid_equal(&type, &gpt_uuid_illumos_zfs, NULL))
166 		return (PART_ILLUMOS_ZFS);
167 	else if (uuid_equal(&type, &gpt_uuid_reserved, NULL))
168 		return (PART_RESERVED);
169 	else if (uuid_equal(&type, &gpt_uuid_apple_apfs, NULL))
170 		return (PART_APFS);
171 	return (PART_UNKNOWN);
172 }
173 
174 static struct gpt_hdr *
175 gpt_checkhdr(struct gpt_hdr *hdr, uint64_t lba_self,
176     uint64_t lba_last __attribute((unused)), uint16_t sectorsize)
177 {
178 	uint32_t sz, crc;
179 
180 	if (memcmp(hdr->hdr_sig, GPT_HDR_SIG, sizeof (hdr->hdr_sig)) != 0) {
181 		DPRINTF("no GPT signature");
182 		return (NULL);
183 	}
184 	sz = le32toh(hdr->hdr_size);
185 	if (sz < 92 || sz > sectorsize) {
186 		DPRINTF("invalid GPT header size: %u", sz);
187 		return (NULL);
188 	}
189 	crc = le32toh(hdr->hdr_crc_self);
190 	hdr->hdr_crc_self = crc32(0, Z_NULL, 0);
191 	if (crc32(hdr->hdr_crc_self, (const Bytef *)hdr, sz) != crc) {
192 		DPRINTF("GPT header's CRC doesn't match");
193 		return (NULL);
194 	}
195 	hdr->hdr_crc_self = crc;
196 	hdr->hdr_revision = le32toh(hdr->hdr_revision);
197 	if (hdr->hdr_revision < GPT_HDR_REVISION) {
198 		DPRINTF("unsupported GPT revision %u", hdr->hdr_revision);
199 		return (NULL);
200 	}
201 	hdr->hdr_lba_self = le64toh(hdr->hdr_lba_self);
202 	if (hdr->hdr_lba_self != lba_self) {
203 		DPRINTF("self LBA doesn't match");
204 		return (NULL);
205 	}
206 	hdr->hdr_lba_alt = le64toh(hdr->hdr_lba_alt);
207 	if (hdr->hdr_lba_alt == hdr->hdr_lba_self) {
208 		DPRINTF("invalid alternate LBA");
209 		return (NULL);
210 	}
211 	hdr->hdr_entries = le32toh(hdr->hdr_entries);
212 	hdr->hdr_entsz = le32toh(hdr->hdr_entsz);
213 	if (hdr->hdr_entries == 0 ||
214 	    hdr->hdr_entsz < sizeof (struct gpt_ent) ||
215 	    sectorsize % hdr->hdr_entsz != 0) {
216 		DPRINTF("invalid entry size or number of entries");
217 		return (NULL);
218 	}
219 	hdr->hdr_lba_start = le64toh(hdr->hdr_lba_start);
220 	hdr->hdr_lba_end = le64toh(hdr->hdr_lba_end);
221 	hdr->hdr_lba_table = le64toh(hdr->hdr_lba_table);
222 	hdr->hdr_crc_table = le32toh(hdr->hdr_crc_table);
223 	uuid_letoh(&hdr->hdr_uuid);
224 	return (hdr);
225 }
226 
227 static int
228 gpt_checktbl(const struct gpt_hdr *hdr, uint8_t *tbl, size_t size,
229     uint64_t lba_last __attribute((unused)))
230 {
231 	struct gpt_ent *ent;
232 	uint32_t i, cnt;
233 
234 	cnt = size / hdr->hdr_entsz;
235 	if (hdr->hdr_entries <= cnt) {
236 		cnt = hdr->hdr_entries;
237 		/* Check CRC only when buffer size is enough for table. */
238 		if (hdr->hdr_crc_table !=
239 		    crc32(0, tbl, hdr->hdr_entries * hdr->hdr_entsz)) {
240 			DPRINTF("GPT table's CRC doesn't match");
241 			return (-1);
242 		}
243 	}
244 	for (i = 0; i < cnt; i++) {
245 		ent = (struct gpt_ent *)(tbl + i * hdr->hdr_entsz);
246 		uuid_letoh(&ent->ent_type);
247 		if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
248 			continue;
249 		ent->ent_lba_start = le64toh(ent->ent_lba_start);
250 		ent->ent_lba_end = le64toh(ent->ent_lba_end);
251 	}
252 	return (0);
253 }
254 
255 static struct ptable *
256 ptable_gptread(struct ptable *table, void *dev, diskread_t dread)
257 {
258 	struct pentry *entry;
259 	struct gpt_hdr *phdr, hdr;
260 	struct gpt_ent *ent;
261 	uint8_t *buf, *tbl;
262 	uint64_t offset;
263 	int pri, sec;
264 	size_t size, i;
265 
266 	buf = malloc(table->sectorsize);
267 	if (buf == NULL)
268 		return (NULL);
269 	tbl = malloc(table->sectorsize * MAXTBLSZ);
270 	if (tbl == NULL) {
271 		free(buf);
272 		return (NULL);
273 	}
274 	/* Read the primary GPT header. */
275 	if (dread(dev, buf, 1, 1) != 0) {
276 		ptable_close(table);
277 		table = NULL;
278 		goto out;
279 	}
280 	pri = sec = 0;
281 	/* Check the primary GPT header. */
282 	phdr = gpt_checkhdr((struct gpt_hdr *)buf, 1, table->sectors - 1,
283 	    table->sectorsize);
284 	if (phdr != NULL) {
285 		/* Read the primary GPT table. */
286 		size = MIN(MAXTBLSZ, (phdr->hdr_entries * phdr->hdr_entsz +
287 		    table->sectorsize - 1) / table->sectorsize);
288 		if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
289 		    gpt_checktbl(phdr, tbl, size * table->sectorsize,
290 		    table->sectors - 1) == 0) {
291 			memcpy(&hdr, phdr, sizeof (hdr));
292 			pri = 1;
293 		}
294 	}
295 	offset = pri ? hdr.hdr_lba_alt: table->sectors - 1;
296 	/* Read the backup GPT header. */
297 	if (dread(dev, buf, 1, offset) != 0)
298 		phdr = NULL;
299 	else
300 		phdr = gpt_checkhdr((struct gpt_hdr *)buf, offset,
301 		    table->sectors - 1, table->sectorsize);
302 	if (phdr != NULL) {
303 		/*
304 		 * Compare primary and backup headers.
305 		 * If they are equal, then we do not need to read backup
306 		 * table. If they are different, then prefer backup header
307 		 * and try to read backup table.
308 		 */
309 		if (pri == 0 ||
310 		    uuid_equal(&hdr.hdr_uuid, &phdr->hdr_uuid, NULL) == 0 ||
311 		    hdr.hdr_revision != phdr->hdr_revision ||
312 		    hdr.hdr_size != phdr->hdr_size ||
313 		    hdr.hdr_lba_start != phdr->hdr_lba_start ||
314 		    hdr.hdr_lba_end != phdr->hdr_lba_end ||
315 		    hdr.hdr_entries != phdr->hdr_entries ||
316 		    hdr.hdr_entsz != phdr->hdr_entsz ||
317 		    hdr.hdr_crc_table != phdr->hdr_crc_table) {
318 			/* Read the backup GPT table. */
319 			size = MIN(MAXTBLSZ, (phdr->hdr_entries *
320 			    phdr->hdr_entsz + table->sectorsize - 1) /
321 			    table->sectorsize);
322 			if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
323 			    gpt_checktbl(phdr, tbl, size * table->sectorsize,
324 			    table->sectors - 1) == 0) {
325 				memcpy(&hdr, phdr, sizeof (hdr));
326 				sec = 1;
327 			}
328 		}
329 	}
330 	if (pri == 0 && sec == 0) {
331 		/* Both primary and backup tables are invalid. */
332 		table->type = PTABLE_NONE;
333 		goto out;
334 	}
335 	DPRINTF("GPT detected");
336 	size = MIN(hdr.hdr_entries * hdr.hdr_entsz,
337 	    MAXTBLSZ * table->sectorsize);
338 
339 	/*
340 	 * If the disk's sector count is smaller than the sector count recorded
341 	 * in the disk's GPT table header, set the table->sectors to the value
342 	 * recorded in GPT tables. This is done to work around buggy firmware
343 	 * that returns truncated disk sizes.
344 	 *
345 	 * Note, this is still not a foolproof way to get disk's size. For
346 	 * example, an image file can be truncated when copied to smaller media.
347 	 */
348 	table->sectors = hdr.hdr_lba_alt + 1;
349 
350 	for (i = 0; i < size / hdr.hdr_entsz; i++) {
351 		ent = (struct gpt_ent *)(tbl + i * hdr.hdr_entsz);
352 		if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
353 			continue;
354 
355 		/* Simple sanity checks. */
356 		if (ent->ent_lba_start < hdr.hdr_lba_start ||
357 		    ent->ent_lba_end > hdr.hdr_lba_end ||
358 		    ent->ent_lba_start > ent->ent_lba_end)
359 			continue;
360 
361 		entry = malloc(sizeof (*entry));
362 		if (entry == NULL)
363 			break;
364 		entry->part.start = ent->ent_lba_start;
365 		entry->part.end = ent->ent_lba_end;
366 		entry->part.index = i + 1;
367 		entry->part.type = gpt_parttype(ent->ent_type);
368 		entry->flags = le64toh(ent->ent_attr);
369 		memcpy(&entry->type.gpt, &ent->ent_type, sizeof (uuid_t));
370 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
371 		DPRINTF("new GPT partition added");
372 	}
373 out:
374 	free(buf);
375 	free(tbl);
376 	return (table);
377 }
378 #endif /* LOADER_GPT_SUPPORT */
379 
380 #ifdef LOADER_MBR_SUPPORT
381 /* We do not need to support too many EBR partitions in the loader */
382 #define	MAXEBRENTRIES		8
383 static enum partition_type
384 mbr_parttype(uint8_t type)
385 {
386 
387 	switch (type) {
388 	case DOSPTYP_386BSD:
389 		return (PART_FREEBSD);
390 	case DOSPTYP_LINSWP:
391 		return (PART_LINUX_SWAP);
392 	case DOSPTYP_LINUX:
393 		return (PART_LINUX);
394 	case DOSPTYP_SUNIXOS2:
395 		return (PART_SOLARIS2);
396 	case 0x01:
397 	case 0x04:
398 	case 0x06:
399 	case 0x07:
400 	case 0x0b:
401 	case 0x0c:
402 	case 0x0e:
403 		return (PART_DOS);
404 	}
405 	return (PART_UNKNOWN);
406 }
407 
408 static struct ptable *
409 ptable_ebrread(struct ptable *table, void *dev, diskread_t dread)
410 {
411 	struct dos_partition *dp;
412 	struct pentry *e1, *entry;
413 	uint32_t start, end, offset;
414 	uint8_t *buf;
415 	int i, idx;
416 
417 	STAILQ_FOREACH(e1, &table->entries, entry) {
418 		if (e1->type.mbr == DOSPTYP_EXT ||
419 		    e1->type.mbr == DOSPTYP_EXTLBA)
420 			break;
421 	}
422 	if (e1 == NULL)
423 		return (table);
424 	idx = 5;
425 	offset = e1->part.start;
426 	buf = malloc(table->sectorsize);
427 	if (buf == NULL)
428 		return (table);
429 	DPRINTF("EBR detected");
430 	for (i = 0; i < MAXEBRENTRIES; i++) {
431 #if 0	/* Some BIOSes return an incorrect number of sectors */
432 		if (offset >= table->sectors)
433 			break;
434 #endif
435 		if (dread(dev, buf, 1, offset) != 0)
436 			break;
437 		dp = (struct dos_partition *)(buf + DOSPARTOFF);
438 		if (dp[0].dp_typ == 0)
439 			break;
440 		start = le32toh(dp[0].dp_start);
441 		if (dp[0].dp_typ == DOSPTYP_EXT &&
442 		    dp[1].dp_typ == 0) {
443 			offset = e1->part.start + start;
444 			continue;
445 		}
446 		end = le32toh(dp[0].dp_size);
447 		entry = malloc(sizeof (*entry));
448 		if (entry == NULL)
449 			break;
450 		entry->part.start = offset + start;
451 		entry->part.end = entry->part.start + end - 1;
452 		entry->part.index = idx++;
453 		entry->part.type = mbr_parttype(dp[0].dp_typ);
454 		entry->flags = dp[0].dp_flag;
455 		entry->type.mbr = dp[0].dp_typ;
456 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
457 		DPRINTF("new EBR partition added");
458 		if (dp[1].dp_typ == 0)
459 			break;
460 		offset = e1->part.start + le32toh(dp[1].dp_start);
461 	}
462 	free(buf);
463 	return (table);
464 }
465 #endif /* LOADER_MBR_SUPPORT */
466 
467 static enum partition_type
468 bsd_parttype(uint8_t type)
469 {
470 
471 	switch (type) {
472 	case FS_SWAP:
473 		return (PART_FREEBSD_SWAP);
474 	case FS_BSDFFS:
475 		return (PART_FREEBSD_UFS);
476 	case FS_VINUM:
477 		return (PART_FREEBSD_VINUM);
478 	case FS_ZFS:
479 		return (PART_FREEBSD_ZFS);
480 	}
481 	return (PART_UNKNOWN);
482 }
483 
484 static struct ptable *
485 ptable_bsdread(struct ptable *table, void *dev, diskread_t dread)
486 {
487 	struct disklabel *dl;
488 	struct partition *part;
489 	struct pentry *entry;
490 	uint8_t *buf;
491 	uint32_t raw_offset;
492 	int i;
493 
494 	if (table->sectorsize < sizeof (struct disklabel)) {
495 		DPRINTF("Too small sectorsize");
496 		return (table);
497 	}
498 	buf = malloc(table->sectorsize);
499 	if (buf == NULL)
500 		return (table);
501 	if (dread(dev, buf, 1, 1) != 0) {
502 		DPRINTF("read failed");
503 		ptable_close(table);
504 		table = NULL;
505 		goto out;
506 	}
507 	dl = (struct disklabel *)buf;
508 	if (le32toh(dl->d_magic) != DISKMAGIC &&
509 	    le32toh(dl->d_magic2) != DISKMAGIC)
510 		goto out;
511 	if (le32toh(dl->d_secsize) != table->sectorsize) {
512 		DPRINTF("unsupported sector size");
513 		goto out;
514 	}
515 	dl->d_npartitions = le16toh(dl->d_npartitions);
516 	if (dl->d_npartitions > 20 || dl->d_npartitions < 8) {
517 		DPRINTF("invalid number of partitions");
518 		goto out;
519 	}
520 	DPRINTF("BSD detected");
521 	part = &dl->d_partitions[0];
522 	raw_offset = le32toh(part[RAW_PART].p_offset);
523 	for (i = 0; i < dl->d_npartitions; i++, part++) {
524 		if (i == RAW_PART)
525 			continue;
526 		if (part->p_size == 0)
527 			continue;
528 		entry = malloc(sizeof (*entry));
529 		if (entry == NULL)
530 			break;
531 		entry->part.start = le32toh(part->p_offset) - raw_offset;
532 		entry->part.end = entry->part.start +
533 		    le32toh(part->p_size) - 1;
534 		entry->part.type = bsd_parttype(part->p_fstype);
535 		entry->part.index = i; /* starts from zero */
536 		entry->type.bsd = part->p_fstype;
537 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
538 		DPRINTF("new BSD partition added");
539 	}
540 	table->type = PTABLE_BSD;
541 out:
542 	free(buf);
543 	return (table);
544 }
545 
546 #ifdef LOADER_VTOC8_SUPPORT
547 static enum partition_type
548 vtoc8_parttype(uint16_t type)
549 {
550 
551 	switch (type) {
552 	case VTOC_TAG_FREEBSD_SWAP:
553 		return (PART_FREEBSD_SWAP);
554 	case VTOC_TAG_FREEBSD_UFS:
555 		return (PART_FREEBSD_UFS);
556 	case VTOC_TAG_FREEBSD_VINUM:
557 		return (PART_FREEBSD_VINUM);
558 	case VTOC_TAG_FREEBSD_ZFS:
559 		return (PART_FREEBSD_ZFS);
560 	};
561 	return (PART_UNKNOWN);
562 }
563 
564 static struct ptable *
565 ptable_vtoc8read(struct ptable *table, void *dev, diskread_t dread)
566 {
567 	struct pentry *entry;
568 	struct vtoc8 *dl;
569 	uint8_t *buf;
570 	uint16_t sum, heads, sectors;
571 	int i;
572 
573 	if (table->sectorsize != sizeof (struct vtoc8))
574 		return (table);
575 	buf = malloc(table->sectorsize);
576 	if (buf == NULL)
577 		return (table);
578 	if (dread(dev, buf, 1, 0) != 0) {
579 		DPRINTF("read failed");
580 		ptable_close(table);
581 		table = NULL;
582 		goto out;
583 	}
584 	dl = (struct vtoc8 *)buf;
585 	/* Check the sum */
586 	for (i = sum = 0; i < sizeof (struct vtoc8); i += sizeof (sum))
587 		sum ^= be16dec(buf + i);
588 	if (sum != 0) {
589 		DPRINTF("incorrect checksum");
590 		goto out;
591 	}
592 	if (be16toh(dl->nparts) != VTOC8_NPARTS) {
593 		DPRINTF("invalid number of entries");
594 		goto out;
595 	}
596 	sectors = be16toh(dl->nsecs);
597 	heads = be16toh(dl->nheads);
598 	if (sectors * heads == 0) {
599 		DPRINTF("invalid geometry");
600 		goto out;
601 	}
602 	DPRINTF("VTOC8 detected");
603 	for (i = 0; i < VTOC8_NPARTS; i++) {
604 		dl->part[i].tag = be16toh(dl->part[i].tag);
605 		if (i == VTOC_RAW_PART ||
606 		    dl->part[i].tag == VTOC_TAG_UNASSIGNED)
607 			continue;
608 		entry = malloc(sizeof (*entry));
609 		if (entry == NULL)
610 			break;
611 		entry->part.start = be32toh(dl->map[i].cyl) * heads * sectors;
612 		entry->part.end = be32toh(dl->map[i].nblks) +
613 		    entry->part.start - 1;
614 		entry->part.type = vtoc8_parttype(dl->part[i].tag);
615 		entry->part.index = i; /* starts from zero */
616 		entry->type.vtoc8 = dl->part[i].tag;
617 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
618 		DPRINTF("new VTOC8 partition added");
619 	}
620 	table->type = PTABLE_VTOC8;
621 out:
622 	free(buf);
623 	return (table);
624 
625 }
626 #endif /* LOADER_VTOC8_SUPPORT */
627 
628 static enum partition_type
629 vtoc_parttype(uint16_t type)
630 {
631 	switch (type) {
632 	case VTOC_TAG_BOOT:
633 		return (PART_VTOC_BOOT);
634 	case VTOC_TAG_ROOT:
635 		return (PART_VTOC_ROOT);
636 	case VTOC_TAG_SWAP:
637 		return (PART_VTOC_SWAP);
638 	case VTOC_TAG_USR:
639 		return (PART_VTOC_USR);
640 	case VTOC_TAG_BACKUP:
641 		return (PART_VTOC_BACKUP);
642 	case VTOC_TAG_STAND:
643 		return (PART_VTOC_STAND);
644 	case VTOC_TAG_VAR:
645 		return (PART_VTOC_VAR);
646 	case VTOC_TAG_HOME:
647 		return (PART_VTOC_HOME);
648 	};
649 	return (PART_UNKNOWN);
650 }
651 
652 static struct ptable *
653 ptable_dklabelread(struct ptable *table, void *dev, diskread_t dread)
654 {
655 	struct pentry *entry;
656 	struct dk_label *dl;
657 	struct dk_vtoc *dv;
658 	uint8_t *buf;
659 	int i;
660 
661 	if (table->sectorsize < sizeof (struct dk_label)) {
662 		DPRINTF("Too small sectorsize");
663 		return (table);
664 	}
665 	buf = malloc(table->sectorsize);
666 	if (buf == NULL)
667 		return (table);
668 	if (dread(dev, buf, 1, DK_LABEL_LOC) != 0) {
669 		DPRINTF("read failed");
670 		ptable_close(table);
671 		table = NULL;
672 		goto out;
673 	}
674 	dl = (struct dk_label *)buf;
675 	dv = (struct dk_vtoc *)&dl->dkl_vtoc;
676 
677 	if (dl->dkl_magic != VTOC_MAGIC) {
678 		DPRINTF("dk_label magic error");
679 		goto out;
680 	}
681 	if (dv->v_sanity != VTOC_SANITY) {
682 		DPRINTF("this vtoc is not sane");
683 		goto out;
684 	}
685 	if (dv->v_nparts != NDKMAP) {
686 		DPRINTF("invalid number of entries");
687 		goto out;
688 	}
689 	DPRINTF("VTOC detected");
690 	for (i = 0; i < NDKMAP; i++) {
691 		if (i == VTOC_RAW_PART ||	/* skip slice 2 and empty */
692 		    dv->v_part[i].p_size == 0)
693 			continue;
694 		entry = malloc(sizeof (*entry));
695 		if (entry == NULL)
696 			break;
697 		entry->part.start = dv->v_part[i].p_start;
698 		entry->part.end = dv->v_part[i].p_size +
699 		    entry->part.start - 1;
700 		entry->part.type = vtoc_parttype(dv->v_part[i].p_tag);
701 		entry->part.index = i; /* starts from zero */
702 		entry->type.vtoc = dv->v_part[i].p_tag;
703 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
704 		DPRINTF("new VTOC partition added");
705 	}
706 	table->type = PTABLE_VTOC;
707 out:
708 	free(buf);
709 	return (table);
710 }
711 
712 #define	cdb2devb(bno)	((bno) * ISO_DEFAULT_BLOCK_SIZE / table->sectorsize)
713 
714 static struct ptable *
715 ptable_iso9660read(struct ptable *table, void *dev, diskread_t dread)
716 {
717 	uint8_t *buf;
718 	struct iso_primary_descriptor *vd;
719 	struct pentry *entry;
720 
721 	buf = malloc(table->sectorsize);
722 	if (buf == NULL)
723 		return (table);
724 
725 	if (dread(dev, buf, 1, cdb2devb(16)) != 0) {
726 		DPRINTF("read failed");
727 		ptable_close(table);
728 		table = NULL;
729 		goto out;
730 	}
731 	vd = (struct iso_primary_descriptor *)buf;
732 	if (bcmp(vd->id, ISO_STANDARD_ID, sizeof (vd->id)) != 0)
733 		goto out;
734 
735 	entry = malloc(sizeof (*entry));
736 	if (entry == NULL)
737 		goto out;
738 	entry->part.start = 0;
739 	entry->part.end = table->sectors;
740 	entry->part.type = PART_ISO9660;
741 	entry->part.index = 0;
742 	STAILQ_INSERT_TAIL(&table->entries, entry, entry);
743 
744 	table->type = PTABLE_ISO9660;
745 
746 out:
747 	free(buf);
748 	return (table);
749 }
750 
751 struct ptable *
752 ptable_open(void *dev, uint64_t sectors, uint16_t sectorsize, diskread_t *dread)
753 {
754 	struct dos_partition *dp;
755 	struct ptable *table;
756 	uint8_t *buf;
757 	int i;
758 #ifdef LOADER_MBR_SUPPORT
759 	struct pentry *entry;
760 	uint32_t start, end;
761 	int has_ext;
762 #endif
763 	table = NULL;
764 	dp = NULL;
765 	buf = malloc(sectorsize);
766 	if (buf == NULL)
767 		return (NULL);
768 	/* First, read the MBR. */
769 	if (dread(dev, buf, 1, DOSBBSECTOR) != 0) {
770 		DPRINTF("read failed");
771 		goto out;
772 	}
773 
774 	table = malloc(sizeof (*table));
775 	if (table == NULL)
776 		goto out;
777 	table->sectors = sectors;
778 	table->sectorsize = sectorsize;
779 	table->type = PTABLE_NONE;
780 	STAILQ_INIT(&table->entries);
781 
782 	if (ptable_iso9660read(table, dev, dread) == NULL) {
783 		/* Read error. */
784 		table = NULL;
785 		goto out;
786 	} else if (table->type == PTABLE_ISO9660)
787 		goto out;
788 
789 	if (ptable_dklabelread(table, dev, dread) == NULL) { /* Read error. */
790 		table = NULL;
791 		goto out;
792 	} else if (table->type == PTABLE_VTOC)
793 		goto out;
794 
795 #ifdef LOADER_VTOC8_SUPPORT
796 	if (be16dec(buf + offsetof(struct vtoc8, magic)) == VTOC_MAGIC) {
797 		if (ptable_vtoc8read(table, dev, dread) == NULL) {
798 			/* Read error. */
799 			table = NULL;
800 			goto out;
801 		} else if (table->type == PTABLE_VTOC8)
802 			goto out;
803 	}
804 #endif
805 	/* Check the BSD label. */
806 	if (ptable_bsdread(table, dev, dread) == NULL) { /* Read error. */
807 		table = NULL;
808 		goto out;
809 	} else if (table->type == PTABLE_BSD)
810 		goto out;
811 
812 #if defined(LOADER_GPT_SUPPORT) || defined(LOADER_MBR_SUPPORT)
813 	/* Check the MBR magic. */
814 	if (buf[DOSMAGICOFFSET] != 0x55 ||
815 	    buf[DOSMAGICOFFSET + 1] != 0xaa) {
816 		DPRINTF("magic sequence not found");
817 #if defined(LOADER_GPT_SUPPORT)
818 		/* There is no PMBR, check that we have backup GPT */
819 		table->type = PTABLE_GPT;
820 		table = ptable_gptread(table, dev, dread);
821 #endif
822 		goto out;
823 	}
824 	/* Check that we have PMBR. Also do some validation. */
825 	dp = malloc(NDOSPART * sizeof (struct dos_partition));
826 	if (dp == NULL)
827 		goto out;
828 	bcopy(buf + DOSPARTOFF, dp, NDOSPART * sizeof (struct dos_partition));
829 
830 	/*
831 	 * macOS can create PMBR partition in a hybrid MBR; that is, an MBR
832 	 * partition which has a DOSTYP_PMBR entry defined to start at sector 1.
833 	 * After the DOSTYP_PMBR, there may be other paritions. A UEFI
834 	 * compliant PMBR has no other partitions.
835 	 */
836 	for (i = 0; i < NDOSPART; i++) {
837 		if (dp[i].dp_flag != 0 && dp[i].dp_flag != 0x80) {
838 			DPRINTF("invalid partition flag %x", dp[i].dp_flag);
839 			goto out;
840 		}
841 #ifdef LOADER_GPT_SUPPORT
842 		if (dp[i].dp_typ == DOSPTYP_PMBR && dp[i].dp_start == 1) {
843 			table->type = PTABLE_GPT;
844 			DPRINTF("PMBR detected");
845 		}
846 #endif
847 	}
848 #ifdef LOADER_GPT_SUPPORT
849 	if (table->type == PTABLE_GPT) {
850 		table = ptable_gptread(table, dev, dread);
851 		goto out;
852 	}
853 #endif
854 #ifdef LOADER_MBR_SUPPORT
855 	/* Read MBR. */
856 	DPRINTF("MBR detected");
857 	table->type = PTABLE_MBR;
858 	for (i = has_ext = 0; i < NDOSPART; i++) {
859 		if (dp[i].dp_typ == 0)
860 			continue;
861 		start = le32dec(&(dp[i].dp_start));
862 		end = le32dec(&(dp[i].dp_size));
863 		if (start == 0 || end == 0)
864 			continue;
865 #if 0	/* Some BIOSes return an incorrect number of sectors */
866 		if (start + end - 1 >= sectors)
867 			continue;	/* XXX: ignore */
868 #endif
869 		if (dp[i].dp_typ == DOSPTYP_EXT ||
870 		    dp[i].dp_typ == DOSPTYP_EXTLBA)
871 			has_ext = 1;
872 		entry = malloc(sizeof (*entry));
873 		if (entry == NULL)
874 			break;
875 		entry->part.start = start;
876 		entry->part.end = start + end - 1;
877 		entry->part.index = i + 1;
878 		entry->part.type = mbr_parttype(dp[i].dp_typ);
879 		entry->flags = dp[i].dp_flag;
880 		entry->type.mbr = dp[i].dp_typ;
881 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
882 		DPRINTF("new MBR partition added");
883 	}
884 	if (has_ext) {
885 		table = ptable_ebrread(table, dev, dread);
886 		/* FALLTHROUGH */
887 	}
888 #endif /* LOADER_MBR_SUPPORT */
889 #endif /* LOADER_MBR_SUPPORT || LOADER_GPT_SUPPORT */
890 out:
891 	free(dp);
892 	free(buf);
893 	return (table);
894 }
895 
896 void
897 ptable_close(struct ptable *table)
898 {
899 	struct pentry *entry;
900 
901 	if (table == NULL)
902 		return;
903 
904 	while (!STAILQ_EMPTY(&table->entries)) {
905 		entry = STAILQ_FIRST(&table->entries);
906 		STAILQ_REMOVE_HEAD(&table->entries, entry);
907 		free(entry);
908 	}
909 	free(table);
910 }
911 
912 enum ptable_type
913 ptable_gettype(const struct ptable *table)
914 {
915 
916 	return (table->type);
917 }
918 
919 int
920 ptable_getsize(const struct ptable *table, uint64_t *sizep)
921 {
922 	uint64_t tmp = table->sectors * table->sectorsize;
923 
924 	if (tmp < table->sectors)
925 		return (EOVERFLOW);
926 
927 	if (sizep != NULL)
928 		*sizep = tmp;
929 	return (0);
930 }
931 
932 int
933 ptable_getpart(const struct ptable *table, struct ptable_entry *part, int idx)
934 {
935 	struct pentry *entry;
936 
937 	if (part == NULL || table == NULL)
938 		return (EINVAL);
939 
940 	STAILQ_FOREACH(entry, &table->entries, entry) {
941 		if (entry->part.index != idx)
942 			continue;
943 		memcpy(part, &entry->part, sizeof (*part));
944 		return (0);
945 	}
946 	return (ENOENT);
947 }
948 
949 /*
950  * Search for a slice with the following preferences:
951  *
952  * 1: Active illumos slice
953  * 2: Non-active illumos slice
954  * 3: Active Linux slice
955  * 4: non-active Linux slice
956  * 5: Active FAT/FAT32 slice
957  * 6: non-active FAT/FAT32 slice
958  */
959 #define	PREF_RAWDISK	0
960 #define	PREF_ILLUMOS_ACT	1
961 #define	PREF_ILLUMOS	2
962 #define	PREF_LINUX_ACT	3
963 #define	PREF_LINUX	4
964 #define	PREF_DOS_ACT	5
965 #define	PREF_DOS	6
966 #define	PREF_NONE	7
967 int
968 ptable_getbestpart(const struct ptable *table, struct ptable_entry *part)
969 {
970 	struct pentry *entry, *best;
971 	int pref, preflevel;
972 
973 	if (part == NULL || table == NULL)
974 		return (EINVAL);
975 
976 	best = NULL;
977 	preflevel = pref = PREF_NONE;
978 	STAILQ_FOREACH(entry, &table->entries, entry) {
979 #ifdef LOADER_MBR_SUPPORT
980 		if (table->type == PTABLE_MBR) {
981 			switch (entry->type.mbr) {
982 			case DOSPTYP_SUNIXOS2:
983 				pref = entry->flags & 0x80 ? PREF_ILLUMOS_ACT:
984 				    PREF_ILLUMOS;
985 				break;
986 			case DOSPTYP_LINUX:
987 				pref = entry->flags & 0x80 ? PREF_LINUX_ACT:
988 				    PREF_LINUX;
989 				break;
990 			case 0x01:		/* DOS/Windows */
991 			case 0x04:
992 			case 0x06:
993 			case 0x0c:
994 			case 0x0e:
995 			case DOSPTYP_FAT32:
996 				pref = entry->flags & 0x80 ? PREF_DOS_ACT:
997 				    PREF_DOS;
998 				break;
999 			default:
1000 				pref = PREF_NONE;
1001 			}
1002 		}
1003 #endif /* LOADER_MBR_SUPPORT */
1004 #ifdef LOADER_GPT_SUPPORT
1005 		if (table->type == PTABLE_GPT) {
1006 			if (entry->part.type == PART_DOS)
1007 				pref = PREF_DOS;
1008 			else if (entry->part.type == PART_ILLUMOS_ZFS)
1009 				pref = PREF_ILLUMOS;
1010 			else
1011 				pref = PREF_NONE;
1012 		}
1013 #endif /* LOADER_GPT_SUPPORT */
1014 		if (pref < preflevel) {
1015 			preflevel = pref;
1016 			best = entry;
1017 		}
1018 	}
1019 	if (best != NULL) {
1020 		memcpy(part, &best->part, sizeof (*part));
1021 		return (0);
1022 	}
1023 	return (ENOENT);
1024 }
1025 
1026 /*
1027  * iterate will stop if iterator will return non 0.
1028  */
1029 int
1030 ptable_iterate(const struct ptable *table, void *arg, ptable_iterate_t *iter)
1031 {
1032 	struct pentry *entry;
1033 	char name[32];
1034 	int ret = 0;
1035 
1036 	name[0] = '\0';
1037 	STAILQ_FOREACH(entry, &table->entries, entry) {
1038 #ifdef LOADER_MBR_SUPPORT
1039 		if (table->type == PTABLE_MBR)
1040 			sprintf(name, "s%d", entry->part.index);
1041 		else
1042 #endif
1043 #ifdef LOADER_GPT_SUPPORT
1044 		if (table->type == PTABLE_GPT)
1045 			sprintf(name, "p%d", entry->part.index);
1046 		else
1047 #endif
1048 #ifdef LOADER_VTOC8_SUPPORT
1049 		if (table->type == PTABLE_VTOC8)
1050 			sprintf(name, "%c", (uint8_t)'a' +
1051 			    entry->part.index);
1052 		else
1053 #endif
1054 		if (table->type == PTABLE_VTOC)
1055 			sprintf(name, "%c", (uint8_t)'a' +
1056 			    entry->part.index);
1057 		else
1058 		if (table->type == PTABLE_BSD)
1059 			sprintf(name, "%c", (uint8_t)'a' +
1060 			    entry->part.index);
1061 		ret = iter(arg, name, &entry->part);
1062 		if (ret != 0)
1063 			return (ret);
1064 	}
1065 	return (ret);
1066 }
1067