xref: /freebsd/sys/geom/part/g_part_gpt.c (revision 10aa369afd9946da18ae51b07aeadc3314fba56d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2002, 2005-2007, 2011 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/bio.h>
34 #include <sys/diskmbr.h>
35 #include <sys/gsb_crc32.h>
36 #include <sys/endian.h>
37 #include <sys/gpt.h>
38 #include <sys/kernel.h>
39 #include <sys/kobj.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/queue.h>
45 #include <sys/sbuf.h>
46 #include <sys/systm.h>
47 #include <sys/sysctl.h>
48 #include <sys/uuid.h>
49 #include <geom/geom.h>
50 #include <geom/geom_int.h>
51 #include <geom/part/g_part.h>
52 
53 #include "g_part_if.h"
54 
55 FEATURE(geom_part_gpt, "GEOM partitioning class for GPT partitions support");
56 
57 SYSCTL_DECL(_kern_geom_part);
58 static SYSCTL_NODE(_kern_geom_part, OID_AUTO, gpt,
59     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
60     "GEOM_PART_GPT GUID Partition Table");
61 
62 static u_int allow_nesting = 0;
63 SYSCTL_UINT(_kern_geom_part_gpt, OID_AUTO, allow_nesting,
64     CTLFLAG_RWTUN, &allow_nesting, 0, "Allow GPT to be nested inside other schemes");
65 
66 CTASSERT(offsetof(struct gpt_hdr, padding) == 92);
67 CTASSERT(sizeof(struct gpt_ent) == 128);
68 
69 extern u_int geom_part_check_integrity;
70 
71 #define	EQUUID(a,b)	(memcmp(a, b, sizeof(struct uuid)) == 0)
72 
73 #define	MBRSIZE		512
74 
75 enum gpt_elt {
76 	GPT_ELT_PRIHDR,
77 	GPT_ELT_PRITBL,
78 	GPT_ELT_SECHDR,
79 	GPT_ELT_SECTBL,
80 	GPT_ELT_COUNT
81 };
82 
83 enum gpt_state {
84 	GPT_STATE_UNKNOWN,	/* Not determined. */
85 	GPT_STATE_MISSING,	/* No signature found. */
86 	GPT_STATE_CORRUPT,	/* Checksum mismatch. */
87 	GPT_STATE_INVALID,	/* Nonconformant/invalid. */
88 	GPT_STATE_UNSUPPORTED,  /* Not supported. */
89 	GPT_STATE_OK		/* Perfectly fine. */
90 };
91 
92 struct g_part_gpt_table {
93 	struct g_part_table	base;
94 	u_char			mbr[MBRSIZE];
95 	struct gpt_hdr		*hdr;
96 	quad_t			lba[GPT_ELT_COUNT];
97 	enum gpt_state		state[GPT_ELT_COUNT];
98 	int			bootcamp;
99 };
100 
101 struct g_part_gpt_entry {
102 	struct g_part_entry	base;
103 	struct gpt_ent		ent;
104 };
105 
106 static void g_gpt_printf_utf16(struct sbuf *, uint16_t *, size_t);
107 static void g_gpt_utf8_to_utf16(const uint8_t *, uint16_t *, size_t);
108 static void g_gpt_set_defaults(struct g_part_table *, struct g_provider *);
109 
110 static int g_part_gpt_add(struct g_part_table *, struct g_part_entry *,
111     struct g_part_parms *);
112 static int g_part_gpt_bootcode(struct g_part_table *, struct g_part_parms *);
113 static int g_part_gpt_create(struct g_part_table *, struct g_part_parms *);
114 static int g_part_gpt_destroy(struct g_part_table *, struct g_part_parms *);
115 static void g_part_gpt_dumpconf(struct g_part_table *, struct g_part_entry *,
116     struct sbuf *, const char *);
117 static int g_part_gpt_dumpto(struct g_part_table *, struct g_part_entry *);
118 static int g_part_gpt_modify(struct g_part_table *, struct g_part_entry *,
119     struct g_part_parms *);
120 static const char *g_part_gpt_name(struct g_part_table *, struct g_part_entry *,
121     char *, size_t);
122 static int g_part_gpt_probe(struct g_part_table *, struct g_consumer *);
123 static int g_part_gpt_read(struct g_part_table *, struct g_consumer *);
124 static int g_part_gpt_setunset(struct g_part_table *table,
125     struct g_part_entry *baseentry, const char *attrib, unsigned int set);
126 static const char *g_part_gpt_type(struct g_part_table *, struct g_part_entry *,
127     char *, size_t);
128 static int g_part_gpt_write(struct g_part_table *, struct g_consumer *);
129 static int g_part_gpt_resize(struct g_part_table *, struct g_part_entry *,
130     struct g_part_parms *);
131 static int g_part_gpt_recover(struct g_part_table *);
132 
133 static kobj_method_t g_part_gpt_methods[] = {
134 	KOBJMETHOD(g_part_add,		g_part_gpt_add),
135 	KOBJMETHOD(g_part_bootcode,	g_part_gpt_bootcode),
136 	KOBJMETHOD(g_part_create,	g_part_gpt_create),
137 	KOBJMETHOD(g_part_destroy,	g_part_gpt_destroy),
138 	KOBJMETHOD(g_part_dumpconf,	g_part_gpt_dumpconf),
139 	KOBJMETHOD(g_part_dumpto,	g_part_gpt_dumpto),
140 	KOBJMETHOD(g_part_modify,	g_part_gpt_modify),
141 	KOBJMETHOD(g_part_resize,	g_part_gpt_resize),
142 	KOBJMETHOD(g_part_name,		g_part_gpt_name),
143 	KOBJMETHOD(g_part_probe,	g_part_gpt_probe),
144 	KOBJMETHOD(g_part_read,		g_part_gpt_read),
145 	KOBJMETHOD(g_part_recover,	g_part_gpt_recover),
146 	KOBJMETHOD(g_part_setunset,	g_part_gpt_setunset),
147 	KOBJMETHOD(g_part_type,		g_part_gpt_type),
148 	KOBJMETHOD(g_part_write,	g_part_gpt_write),
149 	{ 0, 0 }
150 };
151 
152 #define MAXENTSIZE 1024
153 
154 static struct g_part_scheme g_part_gpt_scheme = {
155 	"GPT",
156 	g_part_gpt_methods,
157 	sizeof(struct g_part_gpt_table),
158 	.gps_entrysz = sizeof(struct g_part_gpt_entry),
159 	.gps_minent = 128,
160 	.gps_maxent = 4096,
161 	.gps_bootcodesz = MBRSIZE,
162 };
163 G_PART_SCHEME_DECLARE(g_part_gpt);
164 MODULE_VERSION(geom_part_gpt, 0);
165 
166 static struct uuid gpt_uuid_apple_apfs = GPT_ENT_TYPE_APPLE_APFS;
167 static struct uuid gpt_uuid_apple_boot = GPT_ENT_TYPE_APPLE_BOOT;
168 static struct uuid gpt_uuid_apple_core_storage =
169     GPT_ENT_TYPE_APPLE_CORE_STORAGE;
170 static struct uuid gpt_uuid_apple_hfs = GPT_ENT_TYPE_APPLE_HFS;
171 static struct uuid gpt_uuid_apple_label = GPT_ENT_TYPE_APPLE_LABEL;
172 static struct uuid gpt_uuid_apple_raid = GPT_ENT_TYPE_APPLE_RAID;
173 static struct uuid gpt_uuid_apple_raid_offline = GPT_ENT_TYPE_APPLE_RAID_OFFLINE;
174 static struct uuid gpt_uuid_apple_tv_recovery = GPT_ENT_TYPE_APPLE_TV_RECOVERY;
175 static struct uuid gpt_uuid_apple_ufs = GPT_ENT_TYPE_APPLE_UFS;
176 static struct uuid gpt_uuid_apple_zfs = GPT_ENT_TYPE_APPLE_ZFS;
177 static struct uuid gpt_uuid_bios_boot = GPT_ENT_TYPE_BIOS_BOOT;
178 static struct uuid gpt_uuid_chromeos_firmware = GPT_ENT_TYPE_CHROMEOS_FIRMWARE;
179 static struct uuid gpt_uuid_chromeos_kernel = GPT_ENT_TYPE_CHROMEOS_KERNEL;
180 static struct uuid gpt_uuid_chromeos_reserved = GPT_ENT_TYPE_CHROMEOS_RESERVED;
181 static struct uuid gpt_uuid_chromeos_root = GPT_ENT_TYPE_CHROMEOS_ROOT;
182 static struct uuid gpt_uuid_dfbsd_ccd = GPT_ENT_TYPE_DRAGONFLY_CCD;
183 static struct uuid gpt_uuid_dfbsd_hammer = GPT_ENT_TYPE_DRAGONFLY_HAMMER;
184 static struct uuid gpt_uuid_dfbsd_hammer2 = GPT_ENT_TYPE_DRAGONFLY_HAMMER2;
185 static struct uuid gpt_uuid_dfbsd_label32 = GPT_ENT_TYPE_DRAGONFLY_LABEL32;
186 static struct uuid gpt_uuid_dfbsd_label64 = GPT_ENT_TYPE_DRAGONFLY_LABEL64;
187 static struct uuid gpt_uuid_dfbsd_legacy = GPT_ENT_TYPE_DRAGONFLY_LEGACY;
188 static struct uuid gpt_uuid_dfbsd_swap = GPT_ENT_TYPE_DRAGONFLY_SWAP;
189 static struct uuid gpt_uuid_dfbsd_ufs1 = GPT_ENT_TYPE_DRAGONFLY_UFS1;
190 static struct uuid gpt_uuid_dfbsd_vinum = GPT_ENT_TYPE_DRAGONFLY_VINUM;
191 static struct uuid gpt_uuid_efi = GPT_ENT_TYPE_EFI;
192 static struct uuid gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD;
193 static struct uuid gpt_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT;
194 static struct uuid gpt_uuid_freebsd_nandfs = GPT_ENT_TYPE_FREEBSD_NANDFS;
195 static struct uuid gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
196 static struct uuid gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS;
197 static struct uuid gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM;
198 static struct uuid gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS;
199 static struct uuid gpt_uuid_hifive_fsbl = GPT_ENT_TYPE_HIFIVE_FSBL;
200 static struct uuid gpt_uuid_hifive_bbl = GPT_ENT_TYPE_HIFIVE_BBL;
201 static struct uuid gpt_uuid_linux_data = GPT_ENT_TYPE_LINUX_DATA;
202 static struct uuid gpt_uuid_linux_lvm = GPT_ENT_TYPE_LINUX_LVM;
203 static struct uuid gpt_uuid_linux_raid = GPT_ENT_TYPE_LINUX_RAID;
204 static struct uuid gpt_uuid_linux_swap = GPT_ENT_TYPE_LINUX_SWAP;
205 static struct uuid gpt_uuid_mbr = GPT_ENT_TYPE_MBR;
206 static struct uuid gpt_uuid_ms_basic_data = GPT_ENT_TYPE_MS_BASIC_DATA;
207 static struct uuid gpt_uuid_ms_ldm_data = GPT_ENT_TYPE_MS_LDM_DATA;
208 static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
209 static struct uuid gpt_uuid_ms_recovery = GPT_ENT_TYPE_MS_RECOVERY;
210 static struct uuid gpt_uuid_ms_reserved = GPT_ENT_TYPE_MS_RESERVED;
211 static struct uuid gpt_uuid_ms_spaces = GPT_ENT_TYPE_MS_SPACES;
212 static struct uuid gpt_uuid_netbsd_ccd = GPT_ENT_TYPE_NETBSD_CCD;
213 static struct uuid gpt_uuid_netbsd_cgd = GPT_ENT_TYPE_NETBSD_CGD;
214 static struct uuid gpt_uuid_netbsd_ffs = GPT_ENT_TYPE_NETBSD_FFS;
215 static struct uuid gpt_uuid_netbsd_lfs = GPT_ENT_TYPE_NETBSD_LFS;
216 static struct uuid gpt_uuid_netbsd_raid = GPT_ENT_TYPE_NETBSD_RAID;
217 static struct uuid gpt_uuid_netbsd_swap = GPT_ENT_TYPE_NETBSD_SWAP;
218 static struct uuid gpt_uuid_openbsd_data = GPT_ENT_TYPE_OPENBSD_DATA;
219 static struct uuid gpt_uuid_prep_boot = GPT_ENT_TYPE_PREP_BOOT;
220 static struct uuid gpt_uuid_solaris_boot = GPT_ENT_TYPE_SOLARIS_BOOT;
221 static struct uuid gpt_uuid_solaris_root = GPT_ENT_TYPE_SOLARIS_ROOT;
222 static struct uuid gpt_uuid_solaris_swap = GPT_ENT_TYPE_SOLARIS_SWAP;
223 static struct uuid gpt_uuid_solaris_backup = GPT_ENT_TYPE_SOLARIS_BACKUP;
224 static struct uuid gpt_uuid_solaris_var = GPT_ENT_TYPE_SOLARIS_VAR;
225 static struct uuid gpt_uuid_solaris_home = GPT_ENT_TYPE_SOLARIS_HOME;
226 static struct uuid gpt_uuid_solaris_altsec = GPT_ENT_TYPE_SOLARIS_ALTSEC;
227 static struct uuid gpt_uuid_solaris_reserved = GPT_ENT_TYPE_SOLARIS_RESERVED;
228 static struct uuid gpt_uuid_unused = GPT_ENT_TYPE_UNUSED;
229 static struct uuid gpt_uuid_vmfs = GPT_ENT_TYPE_VMFS;
230 static struct uuid gpt_uuid_vmkdiag = GPT_ENT_TYPE_VMKDIAG;
231 static struct uuid gpt_uuid_vmreserved = GPT_ENT_TYPE_VMRESERVED;
232 static struct uuid gpt_uuid_vmvsanhdr = GPT_ENT_TYPE_VMVSANHDR;
233 
234 static struct g_part_uuid_alias {
235 	struct uuid *uuid;
236 	int alias;
237 	int mbrtype;
238 } gpt_uuid_alias_match[] = {
239 	{ &gpt_uuid_apple_apfs,		G_PART_ALIAS_APPLE_APFS,	 0 },
240 	{ &gpt_uuid_apple_boot,		G_PART_ALIAS_APPLE_BOOT,	 0xab },
241 	{ &gpt_uuid_apple_core_storage,	G_PART_ALIAS_APPLE_CORE_STORAGE, 0 },
242 	{ &gpt_uuid_apple_hfs,		G_PART_ALIAS_APPLE_HFS,		 0xaf },
243 	{ &gpt_uuid_apple_label,	G_PART_ALIAS_APPLE_LABEL,	 0 },
244 	{ &gpt_uuid_apple_raid,		G_PART_ALIAS_APPLE_RAID,	 0 },
245 	{ &gpt_uuid_apple_raid_offline,	G_PART_ALIAS_APPLE_RAID_OFFLINE, 0 },
246 	{ &gpt_uuid_apple_tv_recovery,	G_PART_ALIAS_APPLE_TV_RECOVERY,	 0 },
247 	{ &gpt_uuid_apple_ufs,		G_PART_ALIAS_APPLE_UFS,		 0 },
248 	{ &gpt_uuid_apple_zfs,		G_PART_ALIAS_APPLE_ZFS,		 0 },
249 	{ &gpt_uuid_bios_boot,		G_PART_ALIAS_BIOS_BOOT,		 0 },
250 	{ &gpt_uuid_chromeos_firmware,	G_PART_ALIAS_CHROMEOS_FIRMWARE,	 0 },
251 	{ &gpt_uuid_chromeos_kernel,	G_PART_ALIAS_CHROMEOS_KERNEL,	 0 },
252 	{ &gpt_uuid_chromeos_reserved,	G_PART_ALIAS_CHROMEOS_RESERVED,	 0 },
253 	{ &gpt_uuid_chromeos_root,	G_PART_ALIAS_CHROMEOS_ROOT,	 0 },
254 	{ &gpt_uuid_dfbsd_ccd,		G_PART_ALIAS_DFBSD_CCD,		 0 },
255 	{ &gpt_uuid_dfbsd_hammer,	G_PART_ALIAS_DFBSD_HAMMER,	 0 },
256 	{ &gpt_uuid_dfbsd_hammer2,	G_PART_ALIAS_DFBSD_HAMMER2,	 0 },
257 	{ &gpt_uuid_dfbsd_label32,	G_PART_ALIAS_DFBSD,		 0xa5 },
258 	{ &gpt_uuid_dfbsd_label64,	G_PART_ALIAS_DFBSD64,		 0xa5 },
259 	{ &gpt_uuid_dfbsd_legacy,	G_PART_ALIAS_DFBSD_LEGACY,	 0 },
260 	{ &gpt_uuid_dfbsd_swap,		G_PART_ALIAS_DFBSD_SWAP,	 0 },
261 	{ &gpt_uuid_dfbsd_ufs1,		G_PART_ALIAS_DFBSD_UFS,		 0 },
262 	{ &gpt_uuid_dfbsd_vinum,	G_PART_ALIAS_DFBSD_VINUM,	 0 },
263 	{ &gpt_uuid_efi, 		G_PART_ALIAS_EFI,		 0xee },
264 	{ &gpt_uuid_freebsd,		G_PART_ALIAS_FREEBSD,		 0xa5 },
265 	{ &gpt_uuid_freebsd_boot, 	G_PART_ALIAS_FREEBSD_BOOT,	 0 },
266 	{ &gpt_uuid_freebsd_nandfs, 	G_PART_ALIAS_FREEBSD_NANDFS,	 0 },
267 	{ &gpt_uuid_freebsd_swap,	G_PART_ALIAS_FREEBSD_SWAP,	 0 },
268 	{ &gpt_uuid_freebsd_ufs,	G_PART_ALIAS_FREEBSD_UFS,	 0 },
269 	{ &gpt_uuid_freebsd_vinum,	G_PART_ALIAS_FREEBSD_VINUM,	 0 },
270 	{ &gpt_uuid_freebsd_zfs,	G_PART_ALIAS_FREEBSD_ZFS,	 0 },
271 	{ &gpt_uuid_hifive_fsbl,	G_PART_ALIAS_HIFIVE_FSBL,	 0 },
272 	{ &gpt_uuid_hifive_bbl,		G_PART_ALIAS_HIFIVE_BBL,	 0 },
273 	{ &gpt_uuid_linux_data,		G_PART_ALIAS_LINUX_DATA,	 0x0b },
274 	{ &gpt_uuid_linux_lvm,		G_PART_ALIAS_LINUX_LVM,		 0 },
275 	{ &gpt_uuid_linux_raid,		G_PART_ALIAS_LINUX_RAID,	 0 },
276 	{ &gpt_uuid_linux_swap,		G_PART_ALIAS_LINUX_SWAP,	 0 },
277 	{ &gpt_uuid_mbr,		G_PART_ALIAS_MBR,		 0 },
278 	{ &gpt_uuid_ms_basic_data,	G_PART_ALIAS_MS_BASIC_DATA,	 0x0b },
279 	{ &gpt_uuid_ms_ldm_data,	G_PART_ALIAS_MS_LDM_DATA,	 0 },
280 	{ &gpt_uuid_ms_ldm_metadata,	G_PART_ALIAS_MS_LDM_METADATA,	 0 },
281 	{ &gpt_uuid_ms_recovery,	G_PART_ALIAS_MS_RECOVERY,	 0 },
282 	{ &gpt_uuid_ms_reserved,	G_PART_ALIAS_MS_RESERVED,	 0 },
283 	{ &gpt_uuid_ms_spaces,		G_PART_ALIAS_MS_SPACES,		 0 },
284 	{ &gpt_uuid_netbsd_ccd,		G_PART_ALIAS_NETBSD_CCD,	 0 },
285 	{ &gpt_uuid_netbsd_cgd,		G_PART_ALIAS_NETBSD_CGD,	 0 },
286 	{ &gpt_uuid_netbsd_ffs,		G_PART_ALIAS_NETBSD_FFS,	 0 },
287 	{ &gpt_uuid_netbsd_lfs,		G_PART_ALIAS_NETBSD_LFS,	 0 },
288 	{ &gpt_uuid_netbsd_raid,	G_PART_ALIAS_NETBSD_RAID,	 0 },
289 	{ &gpt_uuid_netbsd_swap,	G_PART_ALIAS_NETBSD_SWAP,	 0 },
290 	{ &gpt_uuid_openbsd_data,	G_PART_ALIAS_OPENBSD_DATA,	 0 },
291 	{ &gpt_uuid_prep_boot,		G_PART_ALIAS_PREP_BOOT,		 0x41 },
292 	{ &gpt_uuid_solaris_boot,	G_PART_ALIAS_SOLARIS_BOOT,	 0 },
293 	{ &gpt_uuid_solaris_root,	G_PART_ALIAS_SOLARIS_ROOT,	 0 },
294 	{ &gpt_uuid_solaris_swap,	G_PART_ALIAS_SOLARIS_SWAP,	 0 },
295 	{ &gpt_uuid_solaris_backup,	G_PART_ALIAS_SOLARIS_BACKUP,	 0 },
296 	{ &gpt_uuid_solaris_var,	G_PART_ALIAS_SOLARIS_VAR,	 0 },
297 	{ &gpt_uuid_solaris_home,	G_PART_ALIAS_SOLARIS_HOME,	 0 },
298 	{ &gpt_uuid_solaris_altsec,	G_PART_ALIAS_SOLARIS_ALTSEC,	 0 },
299 	{ &gpt_uuid_solaris_reserved,	G_PART_ALIAS_SOLARIS_RESERVED,	 0 },
300 	{ &gpt_uuid_vmfs,		G_PART_ALIAS_VMFS,		 0 },
301 	{ &gpt_uuid_vmkdiag,		G_PART_ALIAS_VMKDIAG,		 0 },
302 	{ &gpt_uuid_vmreserved,		G_PART_ALIAS_VMRESERVED,	 0 },
303 	{ &gpt_uuid_vmvsanhdr,		G_PART_ALIAS_VMVSANHDR,		 0 },
304 	{ NULL, 0, 0 }
305 };
306 
307 static int
308 gpt_write_mbr_entry(u_char *mbr, int idx, int typ, quad_t start,
309     quad_t end)
310 {
311 
312 	if (typ == 0 || start > UINT32_MAX || end > UINT32_MAX)
313 		return (EINVAL);
314 
315 	mbr += DOSPARTOFF + idx * DOSPARTSIZE;
316 	mbr[0] = 0;
317 	if (start == 1) {
318 		/*
319 		 * Treat the PMBR partition specially to maximize
320 		 * interoperability with BIOSes.
321 		 */
322 		mbr[1] = mbr[3] = 0;
323 		mbr[2] = 2;
324 	} else
325 		mbr[1] = mbr[2] = mbr[3] = 0xff;
326 	mbr[4] = typ;
327 	mbr[5] = mbr[6] = mbr[7] = 0xff;
328 	le32enc(mbr + 8, (uint32_t)start);
329 	le32enc(mbr + 12, (uint32_t)(end - start + 1));
330 	return (0);
331 }
332 
333 static int
334 gpt_map_type(struct uuid *t)
335 {
336 	struct g_part_uuid_alias *uap;
337 
338 	for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) {
339 		if (EQUUID(t, uap->uuid))
340 			return (uap->mbrtype);
341 	}
342 	return (0);
343 }
344 
345 static void
346 gpt_create_pmbr(struct g_part_gpt_table *table, struct g_provider *pp)
347 {
348 
349 	bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART);
350 	gpt_write_mbr_entry(table->mbr, 0, 0xee, 1,
351 	    MIN(pp->mediasize / pp->sectorsize - 1, UINT32_MAX));
352 	le16enc(table->mbr + DOSMAGICOFFSET, DOSMAGIC);
353 }
354 
355 /*
356  * Under Boot Camp the PMBR partition (type 0xEE) doesn't cover the
357  * whole disk anymore. Rather, it covers the GPT table and the EFI
358  * system partition only. This way the HFS+ partition and any FAT
359  * partitions can be added to the MBR without creating an overlap.
360  */
361 static int
362 gpt_is_bootcamp(struct g_part_gpt_table *table, const char *provname)
363 {
364 	uint8_t *p;
365 
366 	p = table->mbr + DOSPARTOFF;
367 	if (p[4] != 0xee || le32dec(p + 8) != 1)
368 		return (0);
369 
370 	p += DOSPARTSIZE;
371 	if (p[4] != 0xaf)
372 		return (0);
373 
374 	printf("GEOM: %s: enabling Boot Camp\n", provname);
375 	return (1);
376 }
377 
378 static void
379 gpt_update_bootcamp(struct g_part_table *basetable, struct g_provider *pp)
380 {
381 	struct g_part_entry *baseentry;
382 	struct g_part_gpt_entry *entry;
383 	struct g_part_gpt_table *table;
384 	int bootable, error, index, slices, typ;
385 
386 	table = (struct g_part_gpt_table *)basetable;
387 
388 	bootable = -1;
389 	for (index = 0; index < NDOSPART; index++) {
390 		if (table->mbr[DOSPARTOFF + DOSPARTSIZE * index])
391 			bootable = index;
392 	}
393 
394 	bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART);
395 	slices = 0;
396 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
397 		if (baseentry->gpe_deleted)
398 			continue;
399 		index = baseentry->gpe_index - 1;
400 		if (index >= NDOSPART)
401 			continue;
402 
403 		entry = (struct g_part_gpt_entry *)baseentry;
404 
405 		switch (index) {
406 		case 0:	/* This must be the EFI system partition. */
407 			if (!EQUUID(&entry->ent.ent_type, &gpt_uuid_efi))
408 				goto disable;
409 			error = gpt_write_mbr_entry(table->mbr, index, 0xee,
410 			    1ull, entry->ent.ent_lba_end);
411 			break;
412 		case 1:	/* This must be the HFS+ partition. */
413 			if (!EQUUID(&entry->ent.ent_type, &gpt_uuid_apple_hfs))
414 				goto disable;
415 			error = gpt_write_mbr_entry(table->mbr, index, 0xaf,
416 			    entry->ent.ent_lba_start, entry->ent.ent_lba_end);
417 			break;
418 		default:
419 			typ = gpt_map_type(&entry->ent.ent_type);
420 			error = gpt_write_mbr_entry(table->mbr, index, typ,
421 			    entry->ent.ent_lba_start, entry->ent.ent_lba_end);
422 			break;
423 		}
424 		if (error)
425 			continue;
426 
427 		if (index == bootable)
428 			table->mbr[DOSPARTOFF + DOSPARTSIZE * index] = 0x80;
429 		slices |= 1 << index;
430 	}
431 	if ((slices & 3) == 3)
432 		return;
433 
434  disable:
435 	table->bootcamp = 0;
436 	gpt_create_pmbr(table, pp);
437 }
438 
439 static struct gpt_hdr *
440 gpt_read_hdr(struct g_part_gpt_table *table, struct g_consumer *cp,
441     enum gpt_elt elt)
442 {
443 	struct gpt_hdr *buf, *hdr;
444 	struct g_provider *pp;
445 	quad_t lba, last;
446 	int error;
447 	uint32_t crc, sz;
448 
449 	pp = cp->provider;
450 	last = (pp->mediasize / pp->sectorsize) - 1;
451 	table->state[elt] = GPT_STATE_MISSING;
452 	/*
453 	 * If the primary header is valid look for secondary
454 	 * header in AlternateLBA, otherwise in the last medium's LBA.
455 	 */
456 	if (elt == GPT_ELT_SECHDR) {
457 		if (table->state[GPT_ELT_PRIHDR] != GPT_STATE_OK)
458 			table->lba[elt] = last;
459 	} else
460 		table->lba[elt] = 1;
461 	buf = g_read_data(cp, table->lba[elt] * pp->sectorsize, pp->sectorsize,
462 	    &error);
463 	if (buf == NULL)
464 		return (NULL);
465 	hdr = NULL;
466 	if (memcmp(buf->hdr_sig, GPT_HDR_SIG, sizeof(buf->hdr_sig)) != 0)
467 		goto fail;
468 
469 	table->state[elt] = GPT_STATE_CORRUPT;
470 	sz = le32toh(buf->hdr_size);
471 	if (sz < 92 || sz > pp->sectorsize)
472 		goto fail;
473 
474 	hdr = g_malloc(sz, M_WAITOK | M_ZERO);
475 	bcopy(buf, hdr, sz);
476 	hdr->hdr_size = sz;
477 
478 	crc = le32toh(buf->hdr_crc_self);
479 	buf->hdr_crc_self = 0;
480 	if (crc32(buf, sz) != crc)
481 		goto fail;
482 	hdr->hdr_crc_self = crc;
483 
484 	table->state[elt] = GPT_STATE_INVALID;
485 	hdr->hdr_revision = le32toh(buf->hdr_revision);
486 	if (hdr->hdr_revision < GPT_HDR_REVISION)
487 		goto fail;
488 	hdr->hdr_lba_self = le64toh(buf->hdr_lba_self);
489 	if (hdr->hdr_lba_self != table->lba[elt])
490 		goto fail;
491 	hdr->hdr_lba_alt = le64toh(buf->hdr_lba_alt);
492 	if (hdr->hdr_lba_alt == hdr->hdr_lba_self)
493 		goto fail;
494 	if (hdr->hdr_lba_alt > last && geom_part_check_integrity)
495 		goto fail;
496 
497 	/* Check the managed area. */
498 	hdr->hdr_lba_start = le64toh(buf->hdr_lba_start);
499 	if (hdr->hdr_lba_start < 2 || hdr->hdr_lba_start >= last)
500 		goto fail;
501 	hdr->hdr_lba_end = le64toh(buf->hdr_lba_end);
502 	if (hdr->hdr_lba_end < hdr->hdr_lba_start || hdr->hdr_lba_end >= last)
503 		goto fail;
504 
505 	/* Check the table location and size of the table. */
506 	hdr->hdr_entries = le32toh(buf->hdr_entries);
507 	hdr->hdr_entsz = le32toh(buf->hdr_entsz);
508 	if (hdr->hdr_entries == 0 || hdr->hdr_entsz < 128 ||
509 	    (hdr->hdr_entsz & 7) != 0)
510 		goto fail;
511 	hdr->hdr_lba_table = le64toh(buf->hdr_lba_table);
512 	if (hdr->hdr_lba_table < 2 || hdr->hdr_lba_table >= last)
513 		goto fail;
514 	if (hdr->hdr_lba_table >= hdr->hdr_lba_start &&
515 	    hdr->hdr_lba_table <= hdr->hdr_lba_end)
516 		goto fail;
517 	lba = hdr->hdr_lba_table +
518 	    howmany((uint64_t)hdr->hdr_entries * hdr->hdr_entsz,
519 	        pp->sectorsize) - 1;
520 	if (lba >= last)
521 		goto fail;
522 	if (lba >= hdr->hdr_lba_start && lba <= hdr->hdr_lba_end)
523 		goto fail;
524 
525 	table->state[elt] = GPT_STATE_OK;
526 	le_uuid_dec(&buf->hdr_uuid, &hdr->hdr_uuid);
527 	hdr->hdr_crc_table = le32toh(buf->hdr_crc_table);
528 
529 	/* save LBA for secondary header */
530 	if (elt == GPT_ELT_PRIHDR)
531 		table->lba[GPT_ELT_SECHDR] = hdr->hdr_lba_alt;
532 
533 	g_free(buf);
534 	return (hdr);
535 
536  fail:
537 	g_free(hdr);
538 	g_free(buf);
539 	return (NULL);
540 }
541 
542 static struct gpt_ent *
543 gpt_read_tbl(struct g_part_gpt_table *table, struct g_consumer *cp,
544     enum gpt_elt elt, struct gpt_hdr *hdr)
545 {
546 	struct g_provider *pp;
547 	struct gpt_ent *ent, *tbl;
548 	char *buf, *p;
549 	unsigned int idx, sectors, tblsz, size;
550 	int error;
551 
552 	if (hdr == NULL)
553 		return (NULL);
554 	if (hdr->hdr_entries > g_part_gpt_scheme.gps_maxent ||
555 	    hdr->hdr_entsz > MAXENTSIZE) {
556 		table->state[elt] = GPT_STATE_UNSUPPORTED;
557 		return (NULL);
558 	}
559 
560 	pp = cp->provider;
561 	table->lba[elt] = hdr->hdr_lba_table;
562 
563 	table->state[elt] = GPT_STATE_MISSING;
564 	tblsz = hdr->hdr_entries * hdr->hdr_entsz;
565 	sectors = howmany(tblsz, pp->sectorsize);
566 	buf = g_malloc(sectors * pp->sectorsize, M_WAITOK | M_ZERO);
567 	for (idx = 0; idx < sectors; idx += maxphys / pp->sectorsize) {
568 		size = (sectors - idx > maxphys / pp->sectorsize) ?  maxphys:
569 		    (sectors - idx) * pp->sectorsize;
570 		p = g_read_data(cp, (table->lba[elt] + idx) * pp->sectorsize,
571 		    size, &error);
572 		if (p == NULL) {
573 			g_free(buf);
574 			return (NULL);
575 		}
576 		bcopy(p, buf + idx * pp->sectorsize, size);
577 		g_free(p);
578 	}
579 	table->state[elt] = GPT_STATE_CORRUPT;
580 	if (crc32(buf, tblsz) != hdr->hdr_crc_table) {
581 		g_free(buf);
582 		return (NULL);
583 	}
584 
585 	table->state[elt] = GPT_STATE_OK;
586 	tbl = g_malloc(hdr->hdr_entries * sizeof(struct gpt_ent),
587 	    M_WAITOK | M_ZERO);
588 
589 	for (idx = 0, ent = tbl, p = buf;
590 	     idx < hdr->hdr_entries;
591 	     idx++, ent++, p += hdr->hdr_entsz) {
592 		le_uuid_dec(p, &ent->ent_type);
593 		le_uuid_dec(p + 16, &ent->ent_uuid);
594 		ent->ent_lba_start = le64dec(p + 32);
595 		ent->ent_lba_end = le64dec(p + 40);
596 		ent->ent_attr = le64dec(p + 48);
597 		/* Keep UTF-16 in little-endian. */
598 		bcopy(p + 56, ent->ent_name, sizeof(ent->ent_name));
599 	}
600 
601 	g_free(buf);
602 	return (tbl);
603 }
604 
605 static int
606 gpt_matched_hdrs(struct gpt_hdr *pri, struct gpt_hdr *sec)
607 {
608 
609 	if (pri == NULL || sec == NULL)
610 		return (0);
611 
612 	if (!EQUUID(&pri->hdr_uuid, &sec->hdr_uuid))
613 		return (0);
614 	return ((pri->hdr_revision == sec->hdr_revision &&
615 	    pri->hdr_size == sec->hdr_size &&
616 	    pri->hdr_lba_start == sec->hdr_lba_start &&
617 	    pri->hdr_lba_end == sec->hdr_lba_end &&
618 	    pri->hdr_entries == sec->hdr_entries &&
619 	    pri->hdr_entsz == sec->hdr_entsz &&
620 	    pri->hdr_crc_table == sec->hdr_crc_table) ? 1 : 0);
621 }
622 
623 static int
624 gpt_parse_type(const char *type, struct uuid *uuid)
625 {
626 	struct uuid tmp;
627 	const char *alias;
628 	int error;
629 	struct g_part_uuid_alias *uap;
630 
631 	if (type[0] == '!') {
632 		error = parse_uuid(type + 1, &tmp);
633 		if (error)
634 			return (error);
635 		if (EQUUID(&tmp, &gpt_uuid_unused))
636 			return (EINVAL);
637 		*uuid = tmp;
638 		return (0);
639 	}
640 	for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) {
641 		alias = g_part_alias_name(uap->alias);
642 		if (!strcasecmp(type, alias)) {
643 			*uuid = *uap->uuid;
644 			return (0);
645 		}
646 	}
647 	return (EINVAL);
648 }
649 
650 static int
651 g_part_gpt_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
652     struct g_part_parms *gpp)
653 {
654 	struct g_part_gpt_entry *entry;
655 	int error;
656 
657 	entry = (struct g_part_gpt_entry *)baseentry;
658 	error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type);
659 	if (error)
660 		return (error);
661 	kern_uuidgen(&entry->ent.ent_uuid, 1);
662 	entry->ent.ent_lba_start = baseentry->gpe_start;
663 	entry->ent.ent_lba_end = baseentry->gpe_end;
664 	if (baseentry->gpe_deleted) {
665 		entry->ent.ent_attr = 0;
666 		bzero(entry->ent.ent_name, sizeof(entry->ent.ent_name));
667 	}
668 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
669 		g_gpt_utf8_to_utf16(gpp->gpp_label, entry->ent.ent_name,
670 		    sizeof(entry->ent.ent_name) /
671 		    sizeof(entry->ent.ent_name[0]));
672 	return (0);
673 }
674 
675 static int
676 g_part_gpt_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
677 {
678 	struct g_part_gpt_table *table;
679 	size_t codesz;
680 
681 	codesz = DOSPARTOFF;
682 	table = (struct g_part_gpt_table *)basetable;
683 	bzero(table->mbr, codesz);
684 	codesz = MIN(codesz, gpp->gpp_codesize);
685 	if (codesz > 0)
686 		bcopy(gpp->gpp_codeptr, table->mbr, codesz);
687 	return (0);
688 }
689 
690 static int
691 g_part_gpt_create(struct g_part_table *basetable, struct g_part_parms *gpp)
692 {
693 	struct g_provider *pp;
694 	struct g_part_gpt_table *table;
695 	size_t tblsz;
696 
697 	/* Our depth should be 0 unless nesting was explicitly enabled. */
698 	if (!allow_nesting && basetable->gpt_depth != 0)
699 		return (ENXIO);
700 
701 	table = (struct g_part_gpt_table *)basetable;
702 	pp = gpp->gpp_provider;
703 	tblsz = howmany(basetable->gpt_entries * sizeof(struct gpt_ent),
704 	    pp->sectorsize);
705 	if (pp->sectorsize < MBRSIZE ||
706 	    pp->mediasize < (3 + 2 * tblsz + basetable->gpt_entries) *
707 	    pp->sectorsize)
708 		return (ENOSPC);
709 
710 	gpt_create_pmbr(table, pp);
711 
712 	/* Allocate space for the header */
713 	table->hdr = g_malloc(sizeof(struct gpt_hdr), M_WAITOK | M_ZERO);
714 
715 	bcopy(GPT_HDR_SIG, table->hdr->hdr_sig, sizeof(table->hdr->hdr_sig));
716 	table->hdr->hdr_revision = GPT_HDR_REVISION;
717 	table->hdr->hdr_size = offsetof(struct gpt_hdr, padding);
718 	kern_uuidgen(&table->hdr->hdr_uuid, 1);
719 	table->hdr->hdr_entries = basetable->gpt_entries;
720 	table->hdr->hdr_entsz = sizeof(struct gpt_ent);
721 
722 	g_gpt_set_defaults(basetable, pp);
723 	return (0);
724 }
725 
726 static int
727 g_part_gpt_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
728 {
729 	struct g_part_gpt_table *table;
730 	struct g_provider *pp;
731 
732 	table = (struct g_part_gpt_table *)basetable;
733 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
734 	g_free(table->hdr);
735 	table->hdr = NULL;
736 
737 	/*
738 	 * Wipe the first 2 sectors and last one to clear the partitioning.
739 	 * Wipe sectors only if they have valid metadata.
740 	 */
741 	if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK)
742 		basetable->gpt_smhead |= 3;
743 	if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK &&
744 	    table->lba[GPT_ELT_SECHDR] == pp->mediasize / pp->sectorsize - 1)
745 		basetable->gpt_smtail |= 1;
746 	return (0);
747 }
748 
749 static void
750 g_part_gpt_efimedia(struct g_part_gpt_entry *entry, struct sbuf *sb)
751 {
752 	sbuf_printf(sb, "HD(%d,GPT,", entry->base.gpe_index);
753 	sbuf_printf_uuid(sb, &entry->ent.ent_uuid);
754 	sbuf_printf(sb, ",%#jx,%#jx)", (intmax_t)entry->base.gpe_start,
755 	    (intmax_t)(entry->base.gpe_end - entry->base.gpe_start + 1));
756 }
757 
758 static void
759 g_part_gpt_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry,
760     struct sbuf *sb, const char *indent)
761 {
762 	struct g_part_gpt_entry *entry;
763 
764 	entry = (struct g_part_gpt_entry *)baseentry;
765 	if (indent == NULL) {
766 		/* conftxt: libdisk compatibility */
767 		sbuf_cat(sb, " xs GPT xt ");
768 		sbuf_printf_uuid(sb, &entry->ent.ent_type);
769 	} else if (entry != NULL) {
770 		/* confxml: partition entry information */
771 		sbuf_printf(sb, "%s<label>", indent);
772 		g_gpt_printf_utf16(sb, entry->ent.ent_name,
773 		    sizeof(entry->ent.ent_name) >> 1);
774 		sbuf_cat(sb, "</label>\n");
775 		if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTME)
776 			sbuf_printf(sb, "%s<attrib>bootme</attrib>\n", indent);
777 		if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTONCE) {
778 			sbuf_printf(sb, "%s<attrib>bootonce</attrib>\n",
779 			    indent);
780 		}
781 		if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTFAILED) {
782 			sbuf_printf(sb, "%s<attrib>bootfailed</attrib>\n",
783 			    indent);
784 		}
785 		sbuf_printf(sb, "%s<rawtype>", indent);
786 		sbuf_printf_uuid(sb, &entry->ent.ent_type);
787 		sbuf_cat(sb, "</rawtype>\n");
788 		sbuf_printf(sb, "%s<rawuuid>", indent);
789 		sbuf_printf_uuid(sb, &entry->ent.ent_uuid);
790 		sbuf_cat(sb, "</rawuuid>\n");
791 		sbuf_printf(sb, "%s<efimedia>", indent);
792 		g_part_gpt_efimedia(entry, sb);
793 		sbuf_cat(sb, "</efimedia>\n");
794 	} else {
795 		/* confxml: scheme information */
796 	}
797 }
798 
799 static int
800 g_part_gpt_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
801 {
802 	struct g_part_gpt_entry *entry;
803 
804 	entry = (struct g_part_gpt_entry *)baseentry;
805 	return ((EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd_swap) ||
806 	    EQUUID(&entry->ent.ent_type, &gpt_uuid_linux_swap) ||
807 	    EQUUID(&entry->ent.ent_type, &gpt_uuid_dfbsd_swap)) ? 1 : 0);
808 }
809 
810 static int
811 g_part_gpt_modify(struct g_part_table *basetable,
812     struct g_part_entry *baseentry, struct g_part_parms *gpp)
813 {
814 	struct g_part_gpt_entry *entry;
815 	int error;
816 
817 	entry = (struct g_part_gpt_entry *)baseentry;
818 	if (gpp->gpp_parms & G_PART_PARM_TYPE) {
819 		error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type);
820 		if (error)
821 			return (error);
822 	}
823 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
824 		g_gpt_utf8_to_utf16(gpp->gpp_label, entry->ent.ent_name,
825 		    sizeof(entry->ent.ent_name) /
826 		    sizeof(entry->ent.ent_name[0]));
827 	return (0);
828 }
829 
830 static int
831 g_part_gpt_resize(struct g_part_table *basetable,
832     struct g_part_entry *baseentry, struct g_part_parms *gpp)
833 {
834 	struct g_part_gpt_entry *entry;
835 
836 	if (baseentry == NULL)
837 		return (g_part_gpt_recover(basetable));
838 
839 	entry = (struct g_part_gpt_entry *)baseentry;
840 	baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1;
841 	entry->ent.ent_lba_end = baseentry->gpe_end;
842 
843 	return (0);
844 }
845 
846 static const char *
847 g_part_gpt_name(struct g_part_table *table, struct g_part_entry *baseentry,
848     char *buf, size_t bufsz)
849 {
850 	struct g_part_gpt_entry *entry;
851 	char c;
852 
853 	entry = (struct g_part_gpt_entry *)baseentry;
854 	c = (EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd)) ? 's' : 'p';
855 	snprintf(buf, bufsz, "%c%d", c, baseentry->gpe_index);
856 	return (buf);
857 }
858 
859 static int
860 g_part_gpt_probe(struct g_part_table *table, struct g_consumer *cp)
861 {
862 	struct g_provider *pp;
863 	u_char *buf;
864 	int error, index, pri, res;
865 
866 	/* Our depth should be 0 unless nesting was explicitly enabled. */
867 	if (!allow_nesting && table->gpt_depth != 0)
868 		return (ENXIO);
869 
870 	pp = cp->provider;
871 
872 	/*
873 	 * Sanity-check the provider. Since the first sector on the provider
874 	 * must be a PMBR and a PMBR is 512 bytes large, the sector size
875 	 * must be at least 512 bytes.  Also, since the theoretical minimum
876 	 * number of sectors needed by GPT is 6, any medium that has less
877 	 * than 6 sectors is never going to be able to hold a GPT. The
878 	 * number 6 comes from:
879 	 *	1 sector for the PMBR
880 	 *	2 sectors for the GPT headers (each 1 sector)
881 	 *	2 sectors for the GPT tables (each 1 sector)
882 	 *	1 sector for an actual partition
883 	 * It's better to catch this pathological case early than behaving
884 	 * pathologically later on...
885 	 */
886 	if (pp->sectorsize < MBRSIZE || pp->mediasize < 6 * pp->sectorsize)
887 		return (ENOSPC);
888 
889 	/*
890 	 * Check that there's a MBR or a PMBR. If it's a PMBR, we return
891 	 * as the highest priority on a match, otherwise we assume some
892 	 * GPT-unaware tool has destroyed the GPT by recreating a MBR and
893 	 * we really want the MBR scheme to take precedence.
894 	 */
895 	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
896 	if (buf == NULL)
897 		return (error);
898 	res = le16dec(buf + DOSMAGICOFFSET);
899 	pri = G_PART_PROBE_PRI_LOW;
900 	if (res == DOSMAGIC) {
901 		for (index = 0; index < NDOSPART; index++) {
902 			if (buf[DOSPARTOFF + DOSPARTSIZE * index + 4] == 0xee)
903 				pri = G_PART_PROBE_PRI_HIGH;
904 		}
905 		g_free(buf);
906 
907 		/* Check that there's a primary header. */
908 		buf = g_read_data(cp, pp->sectorsize, pp->sectorsize, &error);
909 		if (buf == NULL)
910 			return (error);
911 		res = memcmp(buf, GPT_HDR_SIG, 8);
912 		g_free(buf);
913 		if (res == 0)
914 			return (pri);
915 	} else
916 		g_free(buf);
917 
918 	/* No primary? Check that there's a secondary. */
919 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
920 	    &error);
921 	if (buf == NULL)
922 		return (error);
923 	res = memcmp(buf, GPT_HDR_SIG, 8);
924 	g_free(buf);
925 	return ((res == 0) ? pri : ENXIO);
926 }
927 
928 static int
929 g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp)
930 {
931 	struct gpt_hdr *prihdr, *sechdr;
932 	struct gpt_ent *tbl, *pritbl, *sectbl;
933 	struct g_provider *pp;
934 	struct g_part_gpt_table *table;
935 	struct g_part_gpt_entry *entry;
936 	u_char *buf;
937 	uint64_t last;
938 	int error, index;
939 
940 	table = (struct g_part_gpt_table *)basetable;
941 	pp = cp->provider;
942 	last = (pp->mediasize / pp->sectorsize) - 1;
943 
944 	/* Read the PMBR */
945 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
946 	if (buf == NULL)
947 		return (error);
948 	bcopy(buf, table->mbr, MBRSIZE);
949 	g_free(buf);
950 
951 	/* Read the primary header and table. */
952 	prihdr = gpt_read_hdr(table, cp, GPT_ELT_PRIHDR);
953 	if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK) {
954 		pritbl = gpt_read_tbl(table, cp, GPT_ELT_PRITBL, prihdr);
955 	} else {
956 		table->state[GPT_ELT_PRITBL] = GPT_STATE_MISSING;
957 		pritbl = NULL;
958 	}
959 
960 	/* Read the secondary header and table. */
961 	sechdr = gpt_read_hdr(table, cp, GPT_ELT_SECHDR);
962 	if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK) {
963 		sectbl = gpt_read_tbl(table, cp, GPT_ELT_SECTBL, sechdr);
964 	} else {
965 		table->state[GPT_ELT_SECTBL] = GPT_STATE_MISSING;
966 		sectbl = NULL;
967 	}
968 
969 	/* Fail if we haven't got any good tables at all. */
970 	if (table->state[GPT_ELT_PRITBL] != GPT_STATE_OK &&
971 	    table->state[GPT_ELT_SECTBL] != GPT_STATE_OK) {
972 		if (table->state[GPT_ELT_PRITBL] == GPT_STATE_UNSUPPORTED &&
973 		    table->state[GPT_ELT_SECTBL] == GPT_STATE_UNSUPPORTED &&
974 		    gpt_matched_hdrs(prihdr, sechdr)) {
975 			printf("GEOM: %s: unsupported GPT detected.\n",
976 			    pp->name);
977 			printf(
978 		    "GEOM: %s: number of GPT entries: %u, entry size: %uB.\n",
979 			    pp->name, prihdr->hdr_entries, prihdr->hdr_entsz);
980 			printf(
981     "GEOM: %s: maximum supported number of GPT entries: %u, entry size: %uB.\n",
982 			    pp->name, g_part_gpt_scheme.gps_maxent, MAXENTSIZE);
983 			printf("GEOM: %s: GPT rejected.\n", pp->name);
984 		} else {
985 			printf("GEOM: %s: corrupt or invalid GPT detected.\n",
986 			    pp->name);
987 			printf(
988 		    "GEOM: %s: GPT rejected -- may not be recoverable.\n",
989 			    pp->name);
990 		}
991 		g_free(prihdr);
992 		g_free(pritbl);
993 		g_free(sechdr);
994 		g_free(sectbl);
995 		return (EINVAL);
996 	}
997 
998 	/*
999 	 * If both headers are good but they disagree with each other,
1000 	 * then invalidate one. We prefer to keep the primary header,
1001 	 * unless the primary table is corrupt.
1002 	 */
1003 	if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK &&
1004 	    table->state[GPT_ELT_SECHDR] == GPT_STATE_OK &&
1005 	    !gpt_matched_hdrs(prihdr, sechdr)) {
1006 		if (table->state[GPT_ELT_PRITBL] == GPT_STATE_OK) {
1007 			table->state[GPT_ELT_SECHDR] = GPT_STATE_INVALID;
1008 			table->state[GPT_ELT_SECTBL] = GPT_STATE_MISSING;
1009 			g_free(sechdr);
1010 			sechdr = NULL;
1011 		} else {
1012 			table->state[GPT_ELT_PRIHDR] = GPT_STATE_INVALID;
1013 			table->state[GPT_ELT_PRITBL] = GPT_STATE_MISSING;
1014 			g_free(prihdr);
1015 			prihdr = NULL;
1016 		}
1017 	}
1018 
1019 	if (table->state[GPT_ELT_PRITBL] != GPT_STATE_OK) {
1020 		printf("GEOM: %s: the primary GPT table is corrupt or "
1021 		    "invalid.\n", pp->name);
1022 		printf("GEOM: %s: using the secondary instead -- recovery "
1023 		    "strongly advised.\n", pp->name);
1024 		table->hdr = sechdr;
1025 		basetable->gpt_corrupt = 1;
1026 		g_free(prihdr);
1027 		tbl = sectbl;
1028 		g_free(pritbl);
1029 	} else {
1030 		if (table->state[GPT_ELT_SECTBL] != GPT_STATE_OK) {
1031 			printf("GEOM: %s: the secondary GPT table is corrupt "
1032 			    "or invalid.\n", pp->name);
1033 			printf("GEOM: %s: using the primary only -- recovery "
1034 			    "suggested.\n", pp->name);
1035 			basetable->gpt_corrupt = 1;
1036 		} else if (table->lba[GPT_ELT_SECHDR] != last) {
1037 			printf( "GEOM: %s: the secondary GPT header is not in "
1038 			    "the last LBA.\n", pp->name);
1039 			basetable->gpt_corrupt = 1;
1040 		}
1041 		table->hdr = prihdr;
1042 		g_free(sechdr);
1043 		tbl = pritbl;
1044 		g_free(sectbl);
1045 	}
1046 
1047 	basetable->gpt_first = table->hdr->hdr_lba_start;
1048 	basetable->gpt_last = table->hdr->hdr_lba_end;
1049 	basetable->gpt_entries = table->hdr->hdr_entries;
1050 
1051 	for (index = basetable->gpt_entries - 1; index >= 0; index--) {
1052 		if (EQUUID(&tbl[index].ent_type, &gpt_uuid_unused))
1053 			continue;
1054 		entry = (struct g_part_gpt_entry *)g_part_new_entry(
1055 		    basetable, index + 1, tbl[index].ent_lba_start,
1056 		    tbl[index].ent_lba_end);
1057 		entry->ent = tbl[index];
1058 	}
1059 
1060 	g_free(tbl);
1061 
1062 	/*
1063 	 * Under Mac OS X, the MBR mirrors the first 4 GPT partitions
1064 	 * if (and only if) any FAT32 or FAT16 partitions have been
1065 	 * created. This happens irrespective of whether Boot Camp is
1066 	 * used/enabled, though it's generally understood to be done
1067 	 * to support legacy Windows under Boot Camp. We refer to this
1068 	 * mirroring simply as Boot Camp. We try to detect Boot Camp
1069 	 * so that we can update the MBR if and when GPT changes have
1070 	 * been made. Note that we do not enable Boot Camp if not
1071 	 * previously enabled because we can't assume that we're on a
1072 	 * Mac alongside Mac OS X.
1073 	 */
1074 	table->bootcamp = gpt_is_bootcamp(table, pp->name);
1075 
1076 	return (0);
1077 }
1078 
1079 static int
1080 g_part_gpt_recover(struct g_part_table *basetable)
1081 {
1082 	struct g_part_gpt_table *table;
1083 	struct g_provider *pp;
1084 
1085 	table = (struct g_part_gpt_table *)basetable;
1086 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
1087 	gpt_create_pmbr(table, pp);
1088 	g_gpt_set_defaults(basetable, pp);
1089 	basetable->gpt_corrupt = 0;
1090 	return (0);
1091 }
1092 
1093 static int
1094 g_part_gpt_setunset(struct g_part_table *basetable,
1095     struct g_part_entry *baseentry, const char *attrib, unsigned int set)
1096 {
1097 	struct g_part_gpt_entry *entry;
1098 	struct g_part_gpt_table *table;
1099 	struct g_provider *pp;
1100 	uint8_t *p;
1101 	uint64_t attr;
1102 	int i;
1103 
1104 	table = (struct g_part_gpt_table *)basetable;
1105 	entry = (struct g_part_gpt_entry *)baseentry;
1106 
1107 	if (strcasecmp(attrib, "active") == 0) {
1108 		if (table->bootcamp) {
1109 			/* The active flag must be set on a valid entry. */
1110 			if (entry == NULL)
1111 				return (ENXIO);
1112 			if (baseentry->gpe_index > NDOSPART)
1113 				return (EINVAL);
1114 			for (i = 0; i < NDOSPART; i++) {
1115 				p = &table->mbr[DOSPARTOFF + i * DOSPARTSIZE];
1116 				p[0] = (i == baseentry->gpe_index - 1)
1117 				    ? ((set) ? 0x80 : 0) : 0;
1118 			}
1119 		} else {
1120 			/* The PMBR is marked as active without an entry. */
1121 			if (entry != NULL)
1122 				return (ENXIO);
1123 			for (i = 0; i < NDOSPART; i++) {
1124 				p = &table->mbr[DOSPARTOFF + i * DOSPARTSIZE];
1125 				p[0] = (p[4] == 0xee) ? ((set) ? 0x80 : 0) : 0;
1126 			}
1127 		}
1128 		return (0);
1129 	} else if (strcasecmp(attrib, "lenovofix") == 0) {
1130 		/*
1131 		 * Write the 0xee GPT entry to slot #1 (2nd slot) in the pMBR.
1132 		 * This workaround allows Lenovo X220, T420, T520, etc to boot
1133 		 * from GPT Partitions in BIOS mode.
1134 		 */
1135 
1136 		if (entry != NULL)
1137 			return (ENXIO);
1138 
1139 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
1140 		bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART);
1141 		gpt_write_mbr_entry(table->mbr, ((set) ? 1 : 0), 0xee, 1,
1142 		    MIN(pp->mediasize / pp->sectorsize - 1, UINT32_MAX));
1143 		return (0);
1144 	}
1145 
1146 	if (entry == NULL)
1147 		return (ENODEV);
1148 
1149 	attr = 0;
1150 	if (strcasecmp(attrib, "bootme") == 0) {
1151 		attr |= GPT_ENT_ATTR_BOOTME;
1152 	} else if (strcasecmp(attrib, "bootonce") == 0) {
1153 		attr |= GPT_ENT_ATTR_BOOTONCE;
1154 		if (set)
1155 			attr |= GPT_ENT_ATTR_BOOTME;
1156 	} else if (strcasecmp(attrib, "bootfailed") == 0) {
1157 		/*
1158 		 * It should only be possible to unset BOOTFAILED, but it might
1159 		 * be useful for test purposes to also be able to set it.
1160 		 */
1161 		attr |= GPT_ENT_ATTR_BOOTFAILED;
1162 	}
1163 	if (attr == 0)
1164 		return (EINVAL);
1165 
1166 	if (set)
1167 		attr = entry->ent.ent_attr | attr;
1168 	else
1169 		attr = entry->ent.ent_attr & ~attr;
1170 	if (attr != entry->ent.ent_attr) {
1171 		entry->ent.ent_attr = attr;
1172 		if (!baseentry->gpe_created)
1173 			baseentry->gpe_modified = 1;
1174 	}
1175 	return (0);
1176 }
1177 
1178 static const char *
1179 g_part_gpt_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
1180     char *buf, size_t bufsz)
1181 {
1182 	struct g_part_gpt_entry *entry;
1183 	struct uuid *type;
1184 	struct g_part_uuid_alias *uap;
1185 
1186 	entry = (struct g_part_gpt_entry *)baseentry;
1187 	type = &entry->ent.ent_type;
1188 	for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++)
1189 		if (EQUUID(type, uap->uuid))
1190 			return (g_part_alias_name(uap->alias));
1191 	buf[0] = '!';
1192 	snprintf_uuid(buf + 1, bufsz - 1, type);
1193 
1194 	return (buf);
1195 }
1196 
1197 static int
1198 g_part_gpt_write(struct g_part_table *basetable, struct g_consumer *cp)
1199 {
1200 	unsigned char *buf, *bp;
1201 	struct g_provider *pp;
1202 	struct g_part_entry *baseentry;
1203 	struct g_part_gpt_entry *entry;
1204 	struct g_part_gpt_table *table;
1205 	size_t tblsz;
1206 	uint32_t crc;
1207 	int error, index;
1208 
1209 	pp = cp->provider;
1210 	table = (struct g_part_gpt_table *)basetable;
1211 	tblsz = howmany(table->hdr->hdr_entries * table->hdr->hdr_entsz,
1212 	    pp->sectorsize);
1213 
1214 	/* Reconstruct the MBR from the GPT if under Boot Camp. */
1215 	if (table->bootcamp)
1216 		gpt_update_bootcamp(basetable, pp);
1217 
1218 	/* Write the PMBR */
1219 	buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
1220 	bcopy(table->mbr, buf, MBRSIZE);
1221 	error = g_write_data(cp, 0, buf, pp->sectorsize);
1222 	g_free(buf);
1223 	if (error)
1224 		return (error);
1225 
1226 	/* Allocate space for the header and entries. */
1227 	buf = g_malloc((tblsz + 1) * pp->sectorsize, M_WAITOK | M_ZERO);
1228 
1229 	memcpy(buf, table->hdr->hdr_sig, sizeof(table->hdr->hdr_sig));
1230 	le32enc(buf + 8, table->hdr->hdr_revision);
1231 	le32enc(buf + 12, table->hdr->hdr_size);
1232 	le64enc(buf + 40, table->hdr->hdr_lba_start);
1233 	le64enc(buf + 48, table->hdr->hdr_lba_end);
1234 	le_uuid_enc(buf + 56, &table->hdr->hdr_uuid);
1235 	le32enc(buf + 80, table->hdr->hdr_entries);
1236 	le32enc(buf + 84, table->hdr->hdr_entsz);
1237 
1238 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
1239 		if (baseentry->gpe_deleted)
1240 			continue;
1241 		entry = (struct g_part_gpt_entry *)baseentry;
1242 		index = baseentry->gpe_index - 1;
1243 		bp = buf + pp->sectorsize + table->hdr->hdr_entsz * index;
1244 		le_uuid_enc(bp, &entry->ent.ent_type);
1245 		le_uuid_enc(bp + 16, &entry->ent.ent_uuid);
1246 		le64enc(bp + 32, entry->ent.ent_lba_start);
1247 		le64enc(bp + 40, entry->ent.ent_lba_end);
1248 		le64enc(bp + 48, entry->ent.ent_attr);
1249 		memcpy(bp + 56, entry->ent.ent_name,
1250 		    sizeof(entry->ent.ent_name));
1251 	}
1252 
1253 	crc = crc32(buf + pp->sectorsize,
1254 	    table->hdr->hdr_entries * table->hdr->hdr_entsz);
1255 	le32enc(buf + 88, crc);
1256 
1257 	/* Write primary meta-data. */
1258 	le32enc(buf + 16, 0);	/* hdr_crc_self. */
1259 	le64enc(buf + 24, table->lba[GPT_ELT_PRIHDR]);	/* hdr_lba_self. */
1260 	le64enc(buf + 32, table->lba[GPT_ELT_SECHDR]);	/* hdr_lba_alt. */
1261 	le64enc(buf + 72, table->lba[GPT_ELT_PRITBL]);	/* hdr_lba_table. */
1262 	crc = crc32(buf, table->hdr->hdr_size);
1263 	le32enc(buf + 16, crc);
1264 
1265 	for (index = 0; index < tblsz; index += maxphys / pp->sectorsize) {
1266 		error = g_write_data(cp,
1267 		    (table->lba[GPT_ELT_PRITBL] + index) * pp->sectorsize,
1268 		    buf + (index + 1) * pp->sectorsize,
1269 		    (tblsz - index > maxphys / pp->sectorsize) ? maxphys :
1270 		    (tblsz - index) * pp->sectorsize);
1271 		if (error)
1272 			goto out;
1273 	}
1274 	error = g_write_data(cp, table->lba[GPT_ELT_PRIHDR] * pp->sectorsize,
1275 	    buf, pp->sectorsize);
1276 	if (error)
1277 		goto out;
1278 
1279 	/* Write secondary meta-data. */
1280 	le32enc(buf + 16, 0);	/* hdr_crc_self. */
1281 	le64enc(buf + 24, table->lba[GPT_ELT_SECHDR]);	/* hdr_lba_self. */
1282 	le64enc(buf + 32, table->lba[GPT_ELT_PRIHDR]);	/* hdr_lba_alt. */
1283 	le64enc(buf + 72, table->lba[GPT_ELT_SECTBL]);	/* hdr_lba_table. */
1284 	crc = crc32(buf, table->hdr->hdr_size);
1285 	le32enc(buf + 16, crc);
1286 
1287 	for (index = 0; index < tblsz; index += maxphys / pp->sectorsize) {
1288 		error = g_write_data(cp,
1289 		    (table->lba[GPT_ELT_SECTBL] + index) * pp->sectorsize,
1290 		    buf + (index + 1) * pp->sectorsize,
1291 		    (tblsz - index > maxphys / pp->sectorsize) ? maxphys :
1292 		    (tblsz - index) * pp->sectorsize);
1293 		if (error)
1294 			goto out;
1295 	}
1296 	error = g_write_data(cp, table->lba[GPT_ELT_SECHDR] * pp->sectorsize,
1297 	    buf, pp->sectorsize);
1298 
1299  out:
1300 	g_free(buf);
1301 	return (error);
1302 }
1303 
1304 static void
1305 g_gpt_set_defaults(struct g_part_table *basetable, struct g_provider *pp)
1306 {
1307 	struct g_part_entry *baseentry;
1308 	struct g_part_gpt_entry *entry;
1309 	struct g_part_gpt_table *table;
1310 	quad_t start, end, min, max;
1311 	quad_t lba, last;
1312 	size_t spb, tblsz;
1313 
1314 	table = (struct g_part_gpt_table *)basetable;
1315 	last = pp->mediasize / pp->sectorsize - 1;
1316 	tblsz = howmany(basetable->gpt_entries * sizeof(struct gpt_ent),
1317 	    pp->sectorsize);
1318 
1319 	table->lba[GPT_ELT_PRIHDR] = 1;
1320 	table->lba[GPT_ELT_PRITBL] = 2;
1321 	table->lba[GPT_ELT_SECHDR] = last;
1322 	table->lba[GPT_ELT_SECTBL] = last - tblsz;
1323 	table->state[GPT_ELT_PRIHDR] = GPT_STATE_OK;
1324 	table->state[GPT_ELT_PRITBL] = GPT_STATE_OK;
1325 	table->state[GPT_ELT_SECHDR] = GPT_STATE_OK;
1326 	table->state[GPT_ELT_SECTBL] = GPT_STATE_OK;
1327 
1328 	max = start = 2 + tblsz;
1329 	min = end = last - tblsz - 1;
1330 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
1331 		if (baseentry->gpe_deleted)
1332 			continue;
1333 		entry = (struct g_part_gpt_entry *)baseentry;
1334 		if (entry->ent.ent_lba_start < min)
1335 			min = entry->ent.ent_lba_start;
1336 		if (entry->ent.ent_lba_end > max)
1337 			max = entry->ent.ent_lba_end;
1338 	}
1339 	spb = 4096 / pp->sectorsize;
1340 	if (spb > 1) {
1341 		lba = start + ((start % spb) ? spb - start % spb : 0);
1342 		if (lba <= min)
1343 			start = lba;
1344 		lba = end - (end + 1) % spb;
1345 		if (max <= lba)
1346 			end = lba;
1347 	}
1348 	table->hdr->hdr_lba_start = start;
1349 	table->hdr->hdr_lba_end = end;
1350 
1351 	basetable->gpt_first = start;
1352 	basetable->gpt_last = end;
1353 }
1354 
1355 static void
1356 g_gpt_printf_utf16(struct sbuf *sb, uint16_t *str, size_t len)
1357 {
1358 	u_int bo;
1359 	uint32_t ch;
1360 	uint16_t c;
1361 
1362 	bo = LITTLE_ENDIAN;	/* GPT is little-endian */
1363 	while (len > 0 && *str != 0) {
1364 		ch = (bo == BIG_ENDIAN) ? be16toh(*str) : le16toh(*str);
1365 		str++, len--;
1366 		if ((ch & 0xf800) == 0xd800) {
1367 			if (len > 0) {
1368 				c = (bo == BIG_ENDIAN) ? be16toh(*str)
1369 				    : le16toh(*str);
1370 				str++, len--;
1371 			} else
1372 				c = 0xfffd;
1373 			if ((ch & 0x400) == 0 && (c & 0xfc00) == 0xdc00) {
1374 				ch = ((ch & 0x3ff) << 10) + (c & 0x3ff);
1375 				ch += 0x10000;
1376 			} else
1377 				ch = 0xfffd;
1378 		} else if (ch == 0xfffe) { /* BOM (U+FEFF) swapped. */
1379 			bo = (bo == BIG_ENDIAN) ? LITTLE_ENDIAN : BIG_ENDIAN;
1380 			continue;
1381 		} else if (ch == 0xfeff) /* BOM (U+FEFF) unswapped. */
1382 			continue;
1383 
1384 		/* Write the Unicode character in UTF-8 */
1385 		if (ch < 0x80)
1386 			g_conf_printf_escaped(sb, "%c", ch);
1387 		else if (ch < 0x800)
1388 			g_conf_printf_escaped(sb, "%c%c", 0xc0 | (ch >> 6),
1389 			    0x80 | (ch & 0x3f));
1390 		else if (ch < 0x10000)
1391 			g_conf_printf_escaped(sb, "%c%c%c", 0xe0 | (ch >> 12),
1392 			    0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f));
1393 		else if (ch < 0x200000)
1394 			g_conf_printf_escaped(sb, "%c%c%c%c", 0xf0 |
1395 			    (ch >> 18), 0x80 | ((ch >> 12) & 0x3f),
1396 			    0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f));
1397 	}
1398 }
1399 
1400 static void
1401 g_gpt_utf8_to_utf16(const uint8_t *s8, uint16_t *s16, size_t s16len)
1402 {
1403 	size_t s16idx, s8idx;
1404 	uint32_t utfchar;
1405 	unsigned int c, utfbytes;
1406 
1407 	s8idx = s16idx = 0;
1408 	utfchar = 0;
1409 	utfbytes = 0;
1410 	bzero(s16, s16len << 1);
1411 	while (s8[s8idx] != 0 && s16idx < s16len) {
1412 		c = s8[s8idx++];
1413 		if ((c & 0xc0) != 0x80) {
1414 			/* Initial characters. */
1415 			if (utfbytes != 0) {
1416 				/* Incomplete encoding of previous char. */
1417 				s16[s16idx++] = htole16(0xfffd);
1418 			}
1419 			if ((c & 0xf8) == 0xf0) {
1420 				utfchar = c & 0x07;
1421 				utfbytes = 3;
1422 			} else if ((c & 0xf0) == 0xe0) {
1423 				utfchar = c & 0x0f;
1424 				utfbytes = 2;
1425 			} else if ((c & 0xe0) == 0xc0) {
1426 				utfchar = c & 0x1f;
1427 				utfbytes = 1;
1428 			} else {
1429 				utfchar = c & 0x7f;
1430 				utfbytes = 0;
1431 			}
1432 		} else {
1433 			/* Followup characters. */
1434 			if (utfbytes > 0) {
1435 				utfchar = (utfchar << 6) + (c & 0x3f);
1436 				utfbytes--;
1437 			} else if (utfbytes == 0)
1438 				utfbytes = ~0;
1439 		}
1440 		/*
1441 		 * Write the complete Unicode character as UTF-16 when we
1442 		 * have all the UTF-8 charactars collected.
1443 		 */
1444 		if (utfbytes == 0) {
1445 			/*
1446 			 * If we need to write 2 UTF-16 characters, but
1447 			 * we only have room for 1, then we truncate the
1448 			 * string by writing a 0 instead.
1449 			 */
1450 			if (utfchar >= 0x10000 && s16idx < s16len - 1) {
1451 				s16[s16idx++] =
1452 				    htole16(0xd800 | ((utfchar >> 10) - 0x40));
1453 				s16[s16idx++] =
1454 				    htole16(0xdc00 | (utfchar & 0x3ff));
1455 			} else
1456 				s16[s16idx++] = (utfchar >= 0x10000) ? 0 :
1457 				    htole16(utfchar);
1458 		}
1459 	}
1460 	/*
1461 	 * If our input string was truncated, append an invalid encoding
1462 	 * character to the output string.
1463 	 */
1464 	if (utfbytes != 0 && s16idx < s16len)
1465 		s16[s16idx++] = htole16(0xfffd);
1466 }
1467