xref: /freebsd/sys/geom/part/g_part_ldm.c (revision c6ec7d31830ab1c80edae95ad5e4b9dba10c47ac)
1 /*-
2  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/diskmbr.h>
33 #include <sys/endian.h>
34 #include <sys/gpt.h>
35 #include <sys/kernel.h>
36 #include <sys/kobj.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mutex.h>
41 #include <sys/queue.h>
42 #include <sys/sbuf.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/uuid.h>
46 #include <geom/geom.h>
47 #include <geom/part/g_part.h>
48 
49 #include "g_part_if.h"
50 
51 FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
52 
53 SYSCTL_DECL(_kern_geom_part);
54 static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm, CTLFLAG_RW, 0,
55     "GEOM_PART_LDM Logical Disk Manager");
56 
57 static u_int ldm_debug = 0;
58 TUNABLE_INT("kern.geom.part.ldm.debug", &ldm_debug);
59 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug,
60     CTLFLAG_RW | CTLFLAG_TUN, &ldm_debug, 0, "Debug level");
61 
62 /*
63  * This allows access to mirrored LDM volumes. Since we do not
64  * doing mirroring here, it is not enabled by default.
65  */
66 static u_int show_mirrors = 0;
67 TUNABLE_INT("kern.geom.part.ldm.show_mirrors", &show_mirrors);
68 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors,
69     CTLFLAG_RW | CTLFLAG_TUN, &show_mirrors, 0, "Show mirrored volumes");
70 
71 #define	LDM_DEBUG(lvl, fmt, ...)	do {				\
72 	if (ldm_debug >= (lvl)) {					\
73 		printf("GEOM_PART: " fmt "\n", __VA_ARGS__);		\
74 	}								\
75 } while (0)
76 #define	LDM_DUMP(buf, size)	do {					\
77 	if (ldm_debug > 1) {						\
78 		hexdump(buf, size, NULL, 0);				\
79 	}								\
80 } while (0)
81 
82 /*
83  * There are internal representations of LDM structures.
84  *
85  * We do not keep all fields of on-disk structures, only most useful.
86  * All numbers in an on-disk structures are in big-endian format.
87  */
88 
89 /*
90  * Private header is 512 bytes long. There are three copies on each disk.
91  * Offset and sizes are in sectors. Location of each copy:
92  * - the first offset is relative to the disk start;
93  * - the second and third offset are relative to the LDM database start.
94  *
95  * On a disk partitioned with GPT, the LDM has not first private header.
96  */
97 #define	LDM_PH_MBRINDEX		0
98 #define	LDM_PH_GPTINDEX		2
99 static const uint64_t	ldm_ph_off[] = {6, 1856, 2047};
100 #define	LDM_VERSION_2K		0x2000b
101 #define	LDM_VERSION_VISTA	0x2000c
102 #define	LDM_PH_VERSION_OFF	0x00c
103 #define	LDM_PH_DISKGUID_OFF	0x030
104 #define	LDM_PH_DGGUID_OFF	0x0b0
105 #define	LDM_PH_DGNAME_OFF	0x0f0
106 #define	LDM_PH_START_OFF	0x11b
107 #define	LDM_PH_SIZE_OFF		0x123
108 #define	LDM_PH_DB_OFF		0x12b
109 #define	LDM_PH_DBSIZE_OFF	0x133
110 #define	LDM_PH_TH1_OFF		0x13b
111 #define	LDM_PH_TH2_OFF		0x143
112 #define	LDM_PH_CONFSIZE_OFF	0x153
113 #define	LDM_PH_LOGSIZE_OFF	0x15b
114 #define	LDM_PH_SIGN		"PRIVHEAD"
115 struct ldm_privhdr {
116 	struct uuid	disk_guid;
117 	struct uuid	dg_guid;
118 	u_char		dg_name[32];
119 	uint64_t	start;		/* logical disk start */
120 	uint64_t	size;		/* logical disk size */
121 	uint64_t	db_offset;	/* LDM database start */
122 #define	LDM_DB_SIZE		2048
123 	uint64_t	db_size;	/* LDM database size */
124 #define	LDM_TH_COUNT		2
125 	uint64_t	th_offset[LDM_TH_COUNT]; /* TOC header offsets */
126 	uint64_t	conf_size;	/* configuration size */
127 	uint64_t	log_size;	/* size of log */
128 };
129 
130 /*
131  * Table of contents header is 512 bytes long.
132  * There are two identical copies at offsets from the private header.
133  * Offsets are relative to the LDM database start.
134  */
135 #define	LDM_TH_SIGN		"TOCBLOCK"
136 #define	LDM_TH_NAME1		"config"
137 #define	LDM_TH_NAME2		"log"
138 #define	LDM_TH_NAME1_OFF	0x024
139 #define	LDM_TH_CONF_OFF		0x02e
140 #define	LDM_TH_CONFSIZE_OFF	0x036
141 #define	LDM_TH_NAME2_OFF	0x046
142 #define	LDM_TH_LOG_OFF		0x050
143 #define	LDM_TH_LOGSIZE_OFF	0x058
144 struct ldm_tochdr {
145 	uint64_t	conf_offset;	/* configuration offset */
146 	uint64_t	log_offset;	/* log offset */
147 };
148 
149 /*
150  * LDM database header is 512 bytes long.
151  */
152 #define	LDM_VMDB_SIGN		"VMDB"
153 #define	LDM_DB_LASTSEQ_OFF	0x004
154 #define	LDM_DB_SIZE_OFF		0x008
155 #define	LDM_DB_STATUS_OFF	0x010
156 #define	LDM_DB_VERSION_OFF	0x012
157 #define	LDM_DB_DGNAME_OFF	0x016
158 #define	LDM_DB_DGGUID_OFF	0x035
159 struct ldm_vmdbhdr {
160 	uint32_t	last_seq;	/* sequence number of last VBLK */
161 	uint32_t	size;		/* size of VBLK */
162 };
163 
164 /*
165  * The LDM database configuration section contains VMDB header and
166  * many VBLKs. Each VBLK represents a disk group, disk partition,
167  * component or volume.
168  *
169  * The most interesting for us are volumes, they are represents
170  * partitions in the GEOM_PART meaning. But volume VBLK does not
171  * contain all information needed to create GEOM provider. And we
172  * should get this information from the related VBLK. This is how
173  * VBLK releated:
174  *	Volumes <- Components <- Partitions -> Disks
175  *
176  * One volume can contain several components. In this case LDM
177  * does mirroring of volume data to each component.
178  *
179  * Also each component can contain several partitions (spanned or
180  * striped volumes).
181  */
182 
183 struct ldm_component {
184 	uint64_t	id;		/* object id */
185 	uint64_t	vol_id;		/* parent volume object id */
186 
187 	int		count;
188 	LIST_HEAD(, ldm_partition) partitions;
189 	LIST_ENTRY(ldm_component) entry;
190 };
191 
192 struct ldm_volume {
193 	uint64_t	id;		/* object id */
194 	uint64_t	size;		/* volume size */
195 	uint8_t		number;		/* used for ordering */
196 	uint8_t		part_type;	/* partition type */
197 
198 	int		count;
199 	LIST_HEAD(, ldm_component) components;
200 	LIST_ENTRY(ldm_volume)	entry;
201 };
202 
203 struct ldm_disk {
204 	uint64_t	id;		/* object id */
205 	struct uuid	guid;		/* disk guid */
206 
207 	LIST_ENTRY(ldm_disk) entry;
208 };
209 
210 #if 0
211 struct ldm_disk_group {
212 	uint64_t	id;		/* object id */
213 	struct uuid	guid;		/* disk group guid */
214 	u_char		name[32];	/* disk group name */
215 
216 	LIST_ENTRY(ldm_disk_group) entry;
217 };
218 #endif
219 
220 struct ldm_partition {
221 	uint64_t	id;		/* object id */
222 	uint64_t	disk_id;	/* disk object id */
223 	uint64_t	comp_id;	/* parent component object id */
224 	uint64_t	start;		/* offset relative to disk start */
225 	uint64_t	offset;		/* offset for spanned volumes */
226 	uint64_t	size;		/* partition size */
227 
228 	LIST_ENTRY(ldm_partition) entry;
229 };
230 
231 /*
232  * Each VBLK is 128 bytes long and has standard 16 bytes header.
233  * Some of VBLK's fields are fixed size, but others has variable size.
234  * Fields with variable size are prefixed with one byte length marker.
235  * Some fields are strings and also can have fixed size and variable.
236  * Strings with fixed size are NULL-terminated, others are not.
237  * All VBLKs have same several first fields:
238  *	Offset		Size		Description
239  *	---------------+---------------+--------------------------
240  *	0x00		16		standard VBLK header
241  *	0x10		2		update status
242  *	0x13		1		VBLK type
243  *	0x18		PS		object id
244  *	0x18+		PN		object name
245  *
246  *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
247  *  o 'P' in size column means 'prefixed' (variable-width),
248  *    'S' - string, 'N' - number.
249  */
250 #define	LDM_VBLK_SIGN		"VBLK"
251 #define	LDM_VBLK_SEQ_OFF	0x04
252 #define	LDM_VBLK_GROUP_OFF	0x08
253 #define	LDM_VBLK_INDEX_OFF	0x0c
254 #define	LDM_VBLK_COUNT_OFF	0x0e
255 #define	LDM_VBLK_TYPE_OFF	0x13
256 #define	LDM_VBLK_OID_OFF	0x18
257 struct ldm_vblkhdr {
258 	uint32_t	seq;		/* sequence number */
259 	uint32_t	group;		/* group number */
260 	uint16_t	index;		/* index in the group */
261 	uint16_t	count;		/* number of entries in the group */
262 };
263 
264 #define	LDM_VBLK_T_COMPONENT	0x32
265 #define	LDM_VBLK_T_PARTITION	0x33
266 #define	LDM_VBLK_T_DISK		0x34
267 #define	LDM_VBLK_T_DISKGROUP	0x35
268 #define	LDM_VBLK_T_DISK4	0x44
269 #define	LDM_VBLK_T_DISKGROUP4	0x45
270 #define	LDM_VBLK_T_VOLUME	0x51
271 struct ldm_vblk {
272 	uint8_t		type;		/* VBLK type */
273 	union {
274 		uint64_t		id;
275 		struct ldm_volume	vol;
276 		struct ldm_component	comp;
277 		struct ldm_disk		disk;
278 		struct ldm_partition	part;
279 #if 0
280 		struct ldm_disk_group	disk_group;
281 #endif
282 	} u;
283 	LIST_ENTRY(ldm_vblk) entry;
284 };
285 
286 /*
287  * Some VBLKs contains a bit more data than can fit into 128 bytes. These
288  * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
289  * should be placed into continuous memory buffer. We can determine xVBLK
290  * by the count field in the standard VBLK header (count > 1).
291  */
292 struct ldm_xvblk {
293 	uint32_t	group;		/* xVBLK group number */
294 	uint32_t	size;		/* the total size of xVBLK */
295 	uint8_t		map;		/* bitmask of currently saved VBLKs */
296 	u_char		*data;		/* xVBLK data */
297 
298 	LIST_ENTRY(ldm_xvblk)	entry;
299 };
300 
301 /* The internal representation of LDM database. */
302 struct ldm_db {
303 	struct ldm_privhdr		ph;	/* private header */
304 	struct ldm_tochdr		th;	/* TOC header */
305 	struct ldm_vmdbhdr		dh;	/* VMDB header */
306 
307 	LIST_HEAD(, ldm_volume)		volumes;
308 	LIST_HEAD(, ldm_disk)		disks;
309 	LIST_HEAD(, ldm_vblk)		vblks;
310 	LIST_HEAD(, ldm_xvblk)		xvblks;
311 };
312 
313 static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
314 
315 struct g_part_ldm_table {
316 	struct g_part_table	base;
317 	uint64_t		db_offset;
318 	int			is_gpt;
319 };
320 struct g_part_ldm_entry {
321 	struct g_part_entry	base;
322 	uint8_t			type;
323 };
324 
325 static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
326     struct g_part_parms *);
327 static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
328 static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
329 static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
330 static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
331     struct sbuf *, const char *);
332 static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
333 static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
334     struct g_part_parms *);
335 static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *,
336     char *, size_t);
337 static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
338 static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
339 static int g_part_ldm_setunset(struct g_part_table *, struct g_part_entry *,
340     const char *, unsigned int);
341 static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *,
342     char *, size_t);
343 static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
344 static int g_part_ldm_resize(struct g_part_table *, struct g_part_entry *,
345     struct g_part_parms *);
346 
347 static kobj_method_t g_part_ldm_methods[] = {
348 	KOBJMETHOD(g_part_add,		g_part_ldm_add),
349 	KOBJMETHOD(g_part_bootcode,	g_part_ldm_bootcode),
350 	KOBJMETHOD(g_part_create,	g_part_ldm_create),
351 	KOBJMETHOD(g_part_destroy,	g_part_ldm_destroy),
352 	KOBJMETHOD(g_part_dumpconf,	g_part_ldm_dumpconf),
353 	KOBJMETHOD(g_part_dumpto,	g_part_ldm_dumpto),
354 	KOBJMETHOD(g_part_modify,	g_part_ldm_modify),
355 	KOBJMETHOD(g_part_resize,	g_part_ldm_resize),
356 	KOBJMETHOD(g_part_name,		g_part_ldm_name),
357 	KOBJMETHOD(g_part_probe,	g_part_ldm_probe),
358 	KOBJMETHOD(g_part_read,		g_part_ldm_read),
359 	KOBJMETHOD(g_part_setunset,	g_part_ldm_setunset),
360 	KOBJMETHOD(g_part_type,		g_part_ldm_type),
361 	KOBJMETHOD(g_part_write,	g_part_ldm_write),
362 	{ 0, 0 }
363 };
364 
365 static struct g_part_scheme g_part_ldm_scheme = {
366 	"LDM",
367 	g_part_ldm_methods,
368 	sizeof(struct g_part_ldm_table),
369 	.gps_entrysz = sizeof(struct g_part_ldm_entry)
370 };
371 G_PART_SCHEME_DECLARE(g_part_ldm);
372 
373 static struct g_part_ldm_alias {
374 	u_char		typ;
375 	int		alias;
376 } ldm_alias_match[] = {
377 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
378 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
379 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
380 	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
381 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
382 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
383 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
384 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
385 };
386 
387 static u_char*
388 ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
389 {
390 	struct g_provider *pp;
391 	u_char *buf;
392 
393 	pp = cp->provider;
394 	buf = g_read_data(cp, off, pp->sectorsize, error);
395 	if (buf == NULL)
396 		return (NULL);
397 
398 	if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
399 		LDM_DEBUG(1, "%s: invalid LDM private header signature",
400 		    pp->name);
401 		g_free(buf);
402 		buf = NULL;
403 		*error = EINVAL;
404 	}
405 	return (buf);
406 }
407 
408 static int
409 ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
410     const u_char *buf)
411 {
412 	uint32_t version;
413 	int error;
414 
415 	memset(hdr, 0, sizeof(*hdr));
416 	version = be32dec(buf + LDM_PH_VERSION_OFF);
417 	if (version != LDM_VERSION_2K &&
418 	    version != LDM_VERSION_VISTA) {
419 		LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
420 		    cp->provider->name, version >> 16,
421 		    version & 0xFFFF);
422 		return (ENXIO);
423 	}
424 	error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
425 	if (error != 0)
426 		return (error);
427 	error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
428 	if (error != 0)
429 		return (error);
430 	strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
431 	hdr->start = be64dec(buf + LDM_PH_START_OFF);
432 	hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
433 	hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
434 	hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
435 	hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
436 	hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
437 	hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
438 	hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
439 	return (0);
440 }
441 
442 static int
443 ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
444 {
445 	struct g_consumer *cp2;
446 	struct g_provider *pp;
447 	struct ldm_privhdr hdr;
448 	uint64_t offset, last;
449 	int error, found, i;
450 	u_char *buf;
451 
452 	pp = cp->provider;
453 	if (is_gpt) {
454 		/*
455 		 * The last LBA is used in several checks below, for the
456 		 * GPT case it should be calculated relative to the whole
457 		 * disk.
458 		 */
459 		cp2 = LIST_FIRST(&pp->geom->consumer);
460 		last =
461 		    cp2->provider->mediasize / cp2->provider->sectorsize - 1;
462 	} else
463 		last = pp->mediasize / pp->sectorsize - 1;
464 	for (found = 0, i = is_gpt;
465 	    i < sizeof(ldm_ph_off) / sizeof(ldm_ph_off[0]); i++) {
466 		offset = ldm_ph_off[i];
467 		/*
468 		 * In the GPT case consumer is attached to the LDM metadata
469 		 * partition and we don't need add db_offset.
470 		 */
471 		if (!is_gpt)
472 			offset += db->ph.db_offset;
473 		if (i == LDM_PH_MBRINDEX) {
474 			/*
475 			 * Prepare to errors and setup new base offset
476 			 * to read backup private headers. Assume that LDM
477 			 * database is in the last 1Mbyte area.
478 			 */
479 			db->ph.db_offset = last - LDM_DB_SIZE;
480 		}
481 		buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
482 		if (buf == NULL) {
483 			LDM_DEBUG(1, "%s: failed to read private header "
484 			    "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
485 			continue;
486 		}
487 		error = ldm_privhdr_parse(cp, &hdr, buf);
488 		if (error != 0) {
489 			LDM_DEBUG(1, "%s: failed to parse private "
490 			    "header %d", pp->name, i);
491 			LDM_DUMP(buf, pp->sectorsize);
492 			g_free(buf);
493 			continue;
494 		}
495 		g_free(buf);
496 		if (hdr.start > last ||
497 		    hdr.start + hdr.size - 1 > last ||
498 		    (hdr.start + hdr.size - 1 > hdr.db_offset && !is_gpt) ||
499 		    hdr.db_size != LDM_DB_SIZE ||
500 		    hdr.db_offset + LDM_DB_SIZE - 1 > last ||
501 		    hdr.th_offset[0] >= LDM_DB_SIZE ||
502 		    hdr.th_offset[1] >= LDM_DB_SIZE ||
503 		    hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
504 			LDM_DEBUG(1, "%s: invalid values in the "
505 			    "private header %d", pp->name, i);
506 			LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
507 			    "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
508 			    "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
509 			    "last: %jd", pp->name, hdr.start, hdr.size,
510 			    hdr.db_offset, hdr.db_size, hdr.th_offset[0],
511 			    hdr.th_offset[1], hdr.conf_size, hdr.log_size,
512 			    last);
513 			continue;
514 		}
515 		if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
516 			LDM_DEBUG(0, "%s: private headers are not equal",
517 			    pp->name);
518 			if (i > 1) {
519 				/*
520 				 * We have different headers in the LDM.
521 				 * We can not trust this metadata.
522 				 */
523 				LDM_DEBUG(0, "%s: refuse LDM metadata",
524 				    pp->name);
525 				return (EINVAL);
526 			}
527 			/*
528 			 * We already have read primary private header
529 			 * and it differs from this backup one.
530 			 * Prefer the backup header and save it.
531 			 */
532 			found = 0;
533 		}
534 		if (found == 0)
535 			memcpy(&db->ph, &hdr, sizeof(hdr));
536 		found = 1;
537 	}
538 	if (found == 0) {
539 		LDM_DEBUG(1, "%s: valid LDM private header not found",
540 		    pp->name);
541 		return (ENXIO);
542 	}
543 	return (0);
544 }
545 
546 static int
547 ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
548 {
549 	struct g_part_table *gpt;
550 	struct g_part_entry *e;
551 	struct g_consumer *cp2;
552 	int error;
553 
554 	cp2 = LIST_NEXT(cp, consumer);
555 	g_topology_lock();
556 	gpt = cp->provider->geom->softc;
557 	error = 0;
558 	LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
559 		if (cp->provider == e->gpe_pp) {
560 			/* ms-ldm-metadata partition */
561 			if (e->gpe_start != db->ph.db_offset ||
562 			    e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
563 				error++;
564 		} else if (cp2->provider == e->gpe_pp) {
565 			/* ms-ldm-data partition */
566 			if (e->gpe_start != db->ph.start ||
567 			    e->gpe_end != db->ph.start + db->ph.size - 1)
568 				error++;
569 		}
570 		if (error != 0) {
571 			LDM_DEBUG(0, "%s: GPT partition %d boundaries "
572 			    "do not match with the LDM metadata",
573 			    e->gpe_pp->name, e->gpe_index);
574 			error = ENXIO;
575 			break;
576 		}
577 	}
578 	g_topology_unlock();
579 	return (error);
580 }
581 
582 static int
583 ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
584 {
585 	struct g_provider *pp;
586 	struct ldm_tochdr hdr;
587 	uint64_t offset, conf_size, log_size;
588 	int error, found, i;
589 	u_char *buf;
590 
591 	pp = cp->provider;
592 	for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
593 		offset = db->ph.db_offset + db->ph.th_offset[i];
594 		buf = g_read_data(cp,
595 		    offset * pp->sectorsize, pp->sectorsize, &error);
596 		if (buf == NULL) {
597 			LDM_DEBUG(1, "%s: failed to read TOC header "
598 			    "at LBA %ju", pp->name, (uintmax_t)offset);
599 			continue;
600 		}
601 		if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
602 		    memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
603 		    strlen(LDM_TH_NAME1)) != 0 ||
604 		    memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
605 		    strlen(LDM_TH_NAME2)) != 0) {
606 			LDM_DEBUG(1, "%s: failed to parse TOC header "
607 			    "at LBA %ju", pp->name, (uintmax_t)offset);
608 			LDM_DUMP(buf, pp->sectorsize);
609 			g_free(buf);
610 			continue;
611 		}
612 		hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
613 		hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
614 		conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
615 		log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
616 		if (conf_size != db->ph.conf_size ||
617 		    hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
618 		    log_size != db->ph.log_size ||
619 		    hdr.log_offset + log_size >= LDM_DB_SIZE) {
620 			LDM_DEBUG(1, "%s: invalid values in the "
621 			    "TOC header at LBA %ju", pp->name,
622 			    (uintmax_t)offset);
623 			LDM_DUMP(buf, pp->sectorsize);
624 			g_free(buf);
625 			continue;
626 		}
627 		g_free(buf);
628 		if (found == 0)
629 			memcpy(&db->th, &hdr, sizeof(hdr));
630 		found = 1;
631 	}
632 	if (found == 0) {
633 		LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
634 		    pp->name);
635 		return (ENXIO);
636 	}
637 	return (0);
638 }
639 
640 static int
641 ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
642 {
643 	struct g_provider *pp;
644 	struct uuid dg_guid;
645 	uint64_t offset;
646 	uint32_t version;
647 	int error;
648 	u_char *buf;
649 
650 	pp = cp->provider;
651 	offset = db->ph.db_offset + db->th.conf_offset;
652 	buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
653 	    &error);
654 	if (buf == NULL) {
655 		LDM_DEBUG(0, "%s: failed to read VMDB header at "
656 		    "LBA %ju", pp->name, (uintmax_t)offset);
657 		return (error);
658 	}
659 	if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
660 		g_free(buf);
661 		LDM_DEBUG(0, "%s: failed to parse VMDB header at "
662 		    "LBA %ju", pp->name, (uintmax_t)offset);
663 		return (ENXIO);
664 	}
665 	/* Check version. */
666 	version = be32dec(buf + LDM_DB_VERSION_OFF);
667 	if (version != 0x4000A) {
668 		g_free(buf);
669 		LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
670 		    pp->name, version >> 16, version & 0xFFFF);
671 		return (ENXIO);
672 	}
673 	/*
674 	 * Check VMDB update status:
675 	 *	1 - in a consistent state;
676 	 *	2 - in a creation phase;
677 	 *	3 - in a deletion phase;
678 	 */
679 	if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
680 		g_free(buf);
681 		LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
682 		    pp->name);
683 		return (ENXIO);
684 	}
685 	db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
686 	db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
687 	error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
688 	/* Compare disk group name and guid from VMDB and private headers */
689 	if (error != 0 || db->dh.size == 0 ||
690 	    pp->sectorsize % db->dh.size != 0 ||
691 	    strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
692 	    memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
693 	    db->dh.size * db->dh.last_seq >
694 	    db->ph.conf_size * pp->sectorsize) {
695 		LDM_DEBUG(0, "%s: invalid values in the VMDB header",
696 		    pp->name);
697 		LDM_DUMP(buf, pp->sectorsize);
698 		g_free(buf);
699 		return (EINVAL);
700 	}
701 	g_free(buf);
702 	return (0);
703 }
704 
705 static int
706 ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
707 {
708 	struct ldm_xvblk *blk;
709 	size_t size;
710 
711 	size = db->dh.size - 16;
712 	LIST_FOREACH(blk, &db->xvblks, entry)
713 		if (blk->group == vh->group)
714 			break;
715 	if (blk == NULL) {
716 		blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
717 		blk->group = vh->group;
718 		blk->size = size * vh->count + 16;
719 		blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
720 		blk->map = 0xFF << vh->count;
721 		LIST_INSERT_HEAD(&db->xvblks, blk, entry);
722 	}
723 	if ((blk->map & (1 << vh->index)) != 0) {
724 		/* Block with given index has been already saved. */
725 		return (EINVAL);
726 	}
727 	/* Copy the data block to the place related to index. */
728 	memcpy(blk->data + size * vh->index + 16, p + 16, size);
729 	blk->map |= 1 << vh->index;
730 	return (0);
731 }
732 
733 /* Read the variable-width numeric field and return new offset */
734 static int
735 ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
736 {
737 	uint64_t num;
738 	uint8_t len;
739 
740 	len = buf[offset++];
741 	if (len > sizeof(uint64_t) || len + offset >= range)
742 		return (-1);
743 	for (num = 0; len > 0; len--)
744 		num = (num << 8) | buf[offset++];
745 	*result = num;
746 	return (offset);
747 }
748 
749 /* Read the variable-width string and return new offset */
750 static int
751 ldm_vstr_get(const u_char *buf, int offset, u_char *result,
752     size_t maxlen, size_t range)
753 {
754 	uint8_t len;
755 
756 	len = buf[offset++];
757 	if (len >= maxlen || len + offset >= range)
758 		return (-1);
759 	memcpy(result, buf + offset, len);
760 	result[len] = '\0';
761 	return (offset + len);
762 }
763 
764 /* Just skip the variable-width variable and return new offset */
765 static int
766 ldm_vparm_skip(const u_char *buf, int offset, size_t range)
767 {
768 	uint8_t len;
769 
770 	len = buf[offset++];
771 	if (offset + len >= range)
772 		return (-1);
773 
774 	return (offset + len);
775 }
776 
777 static int
778 ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
779 {
780 	struct ldm_vblk *blk;
781 	struct ldm_volume *volume, *last;
782 	const char *errstr;
783 	u_char vstr[64];
784 	int error, offset;
785 
786 	blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
787 	blk->type = p[LDM_VBLK_TYPE_OFF];
788 	offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
789 	if (offset < 0) {
790 		errstr = "object id";
791 		goto fail;
792 	}
793 	offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
794 	if (offset < 0) {
795 		errstr = "object name";
796 		goto fail;
797 	}
798 	switch (blk->type) {
799 	/*
800 	 * Component VBLK fields:
801 	 * Offset	Size	Description
802 	 * ------------+-------+------------------------
803 	 *  0x18+	PS	volume state
804 	 *  0x18+5	PN	component children count
805 	 *  0x1D+16	PN	parent's volume object id
806 	 *  0x2D+1	PN	stripe size
807 	 */
808 	case LDM_VBLK_T_COMPONENT:
809 		offset = ldm_vparm_skip(p, offset, size);
810 		if (offset < 0) {
811 			errstr = "volume state";
812 			goto fail;
813 		}
814 		offset = ldm_vparm_skip(p, offset + 5, size);
815 		if (offset < 0) {
816 			errstr = "children count";
817 			goto fail;
818 		}
819 		offset = ldm_vnum_get(p, offset + 16,
820 		    &blk->u.comp.vol_id, size);
821 		if (offset < 0) {
822 			errstr = "volume id";
823 			goto fail;
824 		}
825 		break;
826 	/*
827 	 * Partition VBLK fields:
828 	 * Offset	Size	Description
829 	 * ------------+-------+------------------------
830 	 *  0x18+12	8	partition start offset
831 	 *  0x18+20	8	volume offset
832 	 *  0x18+28	PN	partition size
833 	 *  0x34+	PN	parent's component object id
834 	 *  0x34+	PN	disk's object id
835 	 */
836 	case LDM_VBLK_T_PARTITION:
837 		if (offset + 28 >= size) {
838 			errstr = "too small buffer";
839 			goto fail;
840 		}
841 		blk->u.part.start = be64dec(p + offset + 12);
842 		blk->u.part.offset = be64dec(p + offset + 20);
843 		offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
844 		if (offset < 0) {
845 			errstr = "partition size";
846 			goto fail;
847 		}
848 		offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
849 		if (offset < 0) {
850 			errstr = "component id";
851 			goto fail;
852 		}
853 		offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
854 		if (offset < 0) {
855 			errstr = "disk id";
856 			goto fail;
857 		}
858 		break;
859 	/*
860 	 * Disk VBLK fields:
861 	 * Offset	Size	Description
862 	 * ------------+-------+------------------------
863 	 *  0x18+	PS	disk GUID
864 	 */
865 	case LDM_VBLK_T_DISK:
866 		errstr = "disk guid";
867 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
868 		if (offset < 0)
869 			goto fail;
870 		error = parse_uuid(vstr, &blk->u.disk.guid);
871 		if (error != 0)
872 			goto fail;
873 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
874 		break;
875 	/*
876 	 * Disk group VBLK fields:
877 	 * Offset	Size	Description
878 	 * ------------+-------+------------------------
879 	 *  0x18+	PS	disk group GUID
880 	 */
881 	case LDM_VBLK_T_DISKGROUP:
882 #if 0
883 		strncpy(blk->u.disk_group.name, vstr,
884 		    sizeof(blk->u.disk_group.name));
885 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
886 		if (offset < 0) {
887 			errstr = "disk group guid";
888 			goto fail;
889 		}
890 		error = parse_uuid(name, &blk->u.disk_group.guid);
891 		if (error != 0) {
892 			errstr = "disk group guid";
893 			goto fail;
894 		}
895 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
896 #endif
897 		break;
898 	/*
899 	 * Disk VBLK fields:
900 	 * Offset	Size	Description
901 	 * ------------+-------+------------------------
902 	 *  0x18+	16	disk GUID
903 	 */
904 	case LDM_VBLK_T_DISK4:
905 		be_uuid_dec(p + offset, &blk->u.disk.guid);
906 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
907 		break;
908 	/*
909 	 * Disk group VBLK fields:
910 	 * Offset	Size	Description
911 	 * ------------+-------+------------------------
912 	 *  0x18+	16	disk GUID
913 	 */
914 	case LDM_VBLK_T_DISKGROUP4:
915 #if 0
916 		strncpy(blk->u.disk_group.name, vstr,
917 		    sizeof(blk->u.disk_group.name));
918 		be_uuid_dec(p + offset, &blk->u.disk.guid);
919 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
920 #endif
921 		break;
922 	/*
923 	 * Volume VBLK fields:
924 	 * Offset	Size	Description
925 	 * ------------+-------+------------------------
926 	 *  0x18+	PS	volume type
927 	 *  0x18+	PS	unknown
928 	 *  0x18+	14(S)	volume state
929 	 *  0x18+16	1	volume number
930 	 *  0x18+21	PN	volume children count
931 	 *  0x2D+16	PN	volume size
932 	 *  0x3D+4	1	partition type
933 	 */
934 	case LDM_VBLK_T_VOLUME:
935 		offset = ldm_vparm_skip(p, offset, size);
936 		if (offset < 0) {
937 			errstr = "volume type";
938 			goto fail;
939 		}
940 		offset = ldm_vparm_skip(p, offset, size);
941 		if (offset < 0) {
942 			errstr = "unknown param";
943 			goto fail;
944 		}
945 		if (offset + 21 >= size) {
946 			errstr = "too small buffer";
947 			goto fail;
948 		}
949 		blk->u.vol.number = p[offset + 16];
950 		offset = ldm_vparm_skip(p, offset + 21, size);
951 		if (offset < 0) {
952 			errstr = "children count";
953 			goto fail;
954 		}
955 		offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
956 		if (offset < 0) {
957 			errstr = "volume size";
958 			goto fail;
959 		}
960 		if (offset + 4 >= size) {
961 			errstr = "too small buffer";
962 			goto fail;
963 		}
964 		blk->u.vol.part_type = p[offset + 4];
965 		/* keep volumes ordered by volume number */
966 		last = NULL;
967 		LIST_FOREACH(volume, &db->volumes, entry) {
968 			if (volume->number > blk->u.vol.number)
969 				break;
970 			last = volume;
971 		}
972 		if (last != NULL)
973 			LIST_INSERT_AFTER(last, &blk->u.vol, entry);
974 		else
975 			LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
976 		break;
977 	default:
978 		LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
979 		LDM_DUMP(p, size);
980 	}
981 	LIST_INSERT_HEAD(&db->vblks, blk, entry);
982 	return (0);
983 fail:
984 	LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
985 	    errstr, blk->type);
986 	LDM_DUMP(p, size);
987 	g_free(blk);
988 	return (EINVAL);
989 }
990 
991 static void
992 ldm_vmdb_free(struct ldm_db *db)
993 {
994 	struct ldm_vblk *vblk;
995 	struct ldm_xvblk *xvblk;
996 
997 	while (!LIST_EMPTY(&db->xvblks)) {
998 		xvblk = LIST_FIRST(&db->xvblks);
999 		LIST_REMOVE(xvblk, entry);
1000 		g_free(xvblk->data);
1001 		g_free(xvblk);
1002 	}
1003 	while (!LIST_EMPTY(&db->vblks)) {
1004 		vblk = LIST_FIRST(&db->vblks);
1005 		LIST_REMOVE(vblk, entry);
1006 		g_free(vblk);
1007 	}
1008 }
1009 
1010 static int
1011 ldm_vmdb_parse(struct ldm_db *db, struct g_consumer *cp)
1012 {
1013 	struct g_provider *pp;
1014 	struct ldm_vblk *vblk;
1015 	struct ldm_xvblk *xvblk;
1016 	struct ldm_volume *volume;
1017 	struct ldm_component *comp;
1018 	struct ldm_vblkhdr vh;
1019 	u_char *buf, *p;
1020 	size_t size, n, sectors;
1021 	uint64_t offset;
1022 	int error;
1023 
1024 	pp = cp->provider;
1025 	size = (db->dh.last_seq * db->dh.size +
1026 	    pp->sectorsize - 1) / pp->sectorsize;
1027 	size -= 1; /* one sector takes vmdb header */
1028 	for (n = 0; n < size; n += MAXPHYS / pp->sectorsize) {
1029 		offset = db->ph.db_offset + db->th.conf_offset + n + 1;
1030 		sectors = (size - n) > (MAXPHYS / pp->sectorsize) ?
1031 		    MAXPHYS / pp->sectorsize: size - n;
1032 		/* read VBLKs */
1033 		buf = g_read_data(cp, offset * pp->sectorsize,
1034 		    sectors * pp->sectorsize, &error);
1035 		if (buf == NULL) {
1036 			LDM_DEBUG(0, "%s: failed to read VBLK\n",
1037 			    pp->name);
1038 			goto fail;
1039 		}
1040 		for (p = buf; p < buf + sectors * pp->sectorsize;
1041 		    p += db->dh.size) {
1042 			if (memcmp(p, LDM_VBLK_SIGN,
1043 			    strlen(LDM_VBLK_SIGN)) != 0) {
1044 				LDM_DEBUG(0, "%s: no VBLK signature\n",
1045 				    pp->name);
1046 				LDM_DUMP(p, db->dh.size);
1047 				goto fail;
1048 			}
1049 			vh.seq = be32dec(p + LDM_VBLK_SEQ_OFF);
1050 			vh.group = be32dec(p + LDM_VBLK_GROUP_OFF);
1051 			/* skip empty blocks */
1052 			if (vh.seq == 0 || vh.group == 0)
1053 				continue;
1054 			vh.index = be16dec(p + LDM_VBLK_INDEX_OFF);
1055 			vh.count = be16dec(p + LDM_VBLK_COUNT_OFF);
1056 			if (vh.count == 0 || vh.count > 4 ||
1057 			    vh.seq > db->dh.last_seq) {
1058 				LDM_DEBUG(0, "%s: invalid values "
1059 				    "in the VBLK header\n", pp->name);
1060 				LDM_DUMP(p, db->dh.size);
1061 				goto fail;
1062 			}
1063 			if (vh.count > 1) {
1064 				error = ldm_xvblk_handle(db, &vh, p);
1065 				if (error != 0) {
1066 					LDM_DEBUG(0, "%s: xVBLK "
1067 					    "is corrupted\n", pp->name);
1068 					LDM_DUMP(p, db->dh.size);
1069 					goto fail;
1070 				}
1071 				continue;
1072 			}
1073 			if (be16dec(p + 16) != 0)
1074 				LDM_DEBUG(1, "%s: VBLK update"
1075 				    " status is %u\n", pp->name,
1076 				    be16dec(p + 16));
1077 			error = ldm_vblk_handle(db, p, db->dh.size);
1078 			if (error != 0)
1079 				goto fail;
1080 		}
1081 		g_free(buf);
1082 		buf = NULL;
1083 	}
1084 	/* Parse xVBLKs */
1085 	while (!LIST_EMPTY(&db->xvblks)) {
1086 		xvblk = LIST_FIRST(&db->xvblks);
1087 		if (xvblk->map == 0xFF) {
1088 			error = ldm_vblk_handle(db, xvblk->data, xvblk->size);
1089 			if (error != 0)
1090 				goto fail;
1091 		} else {
1092 			LDM_DEBUG(0, "%s: incomplete or corrupt "
1093 			    "xVBLK found\n", pp->name);
1094 			goto fail;
1095 		}
1096 		LIST_REMOVE(xvblk, entry);
1097 		g_free(xvblk->data);
1098 		g_free(xvblk);
1099 	}
1100 	/* construct all VBLKs relations */
1101 	LIST_FOREACH(volume, &db->volumes, entry) {
1102 		LIST_FOREACH(vblk, &db->vblks, entry)
1103 			if (vblk->type == LDM_VBLK_T_COMPONENT &&
1104 			    vblk->u.comp.vol_id == volume->id) {
1105 				LIST_INSERT_HEAD(&volume->components,
1106 				    &vblk->u.comp, entry);
1107 				volume->count++;
1108 			}
1109 		LIST_FOREACH(comp, &volume->components, entry)
1110 			LIST_FOREACH(vblk, &db->vblks, entry)
1111 				if (vblk->type == LDM_VBLK_T_PARTITION &&
1112 				    vblk->u.part.comp_id == comp->id) {
1113 					LIST_INSERT_HEAD(&comp->partitions,
1114 					    &vblk->u.part, entry);
1115 					comp->count++;
1116 				}
1117 	}
1118 	return (0);
1119 fail:
1120 	ldm_vmdb_free(db);
1121 	g_free(buf);
1122 	return (ENXIO);
1123 }
1124 
1125 static int
1126 g_part_ldm_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
1127     struct g_part_parms *gpp)
1128 {
1129 
1130 	return (ENOSYS);
1131 }
1132 
1133 static int
1134 g_part_ldm_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
1135 {
1136 
1137 	return (ENOSYS);
1138 }
1139 
1140 static int
1141 g_part_ldm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
1142 {
1143 
1144 	return (ENOSYS);
1145 }
1146 
1147 static int
1148 g_part_ldm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
1149 {
1150 	struct g_part_ldm_table *table;
1151 	struct g_provider *pp;
1152 
1153 	table = (struct g_part_ldm_table *)basetable;
1154 	/*
1155 	 * To destroy LDM on a disk partitioned with GPT we should delete
1156 	 * ms-ldm-metadata partition, but we can't do this via standard
1157 	 * GEOM_PART method.
1158 	 */
1159 	if (table->is_gpt)
1160 		return (ENOSYS);
1161 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
1162 	/*
1163 	 * To destroy LDM we should wipe MBR, first private header and
1164 	 * backup private headers.
1165 	 */
1166 	basetable->gpt_smhead = (1 << ldm_ph_off[0]) | 1;
1167 	/*
1168 	 * Don't touch last backup private header when LDM database is
1169 	 * not located in the last 1MByte area.
1170 	 * XXX: can't remove all blocks.
1171 	 */
1172 	if (table->db_offset + LDM_DB_SIZE ==
1173 	    pp->mediasize / pp->sectorsize)
1174 		basetable->gpt_smtail = 1;
1175 	return (0);
1176 }
1177 
1178 static void
1179 g_part_ldm_dumpconf(struct g_part_table *basetable,
1180     struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
1181 {
1182 	struct g_part_ldm_entry *entry;
1183 
1184 	entry = (struct g_part_ldm_entry *)baseentry;
1185 	if (indent == NULL) {
1186 		/* conftxt: libdisk compatibility */
1187 		sbuf_printf(sb, " xs LDM xt %u", entry->type);
1188 	} else if (entry != NULL) {
1189 		/* confxml: partition entry information */
1190 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
1191 		    entry->type);
1192 	} else {
1193 		/* confxml: scheme information */
1194 	}
1195 }
1196 
1197 static int
1198 g_part_ldm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
1199 {
1200 
1201 	return (0);
1202 }
1203 
1204 static int
1205 g_part_ldm_modify(struct g_part_table *basetable,
1206     struct g_part_entry *baseentry, struct g_part_parms *gpp)
1207 {
1208 
1209 	return (ENOSYS);
1210 }
1211 
1212 static int
1213 g_part_ldm_resize(struct g_part_table *basetable,
1214     struct g_part_entry *baseentry, struct g_part_parms *gpp)
1215 {
1216 
1217 	return (ENOSYS);
1218 }
1219 
1220 static const char *
1221 g_part_ldm_name(struct g_part_table *table, struct g_part_entry *baseentry,
1222     char *buf, size_t bufsz)
1223 {
1224 
1225 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
1226 	return (buf);
1227 }
1228 
1229 static int
1230 ldm_gpt_probe(struct g_part_table *basetable, struct g_consumer *cp)
1231 {
1232 	struct g_part_ldm_table *table;
1233 	struct g_part_table *gpt;
1234 	struct g_part_entry *entry;
1235 	struct g_consumer *cp2;
1236 	struct gpt_ent *part;
1237 	u_char *buf;
1238 	int error;
1239 
1240 	/*
1241 	 * XXX: We use some knowlege about GEOM_PART_GPT internal
1242 	 * structures, but it is easier than parse GPT by himself.
1243 	 */
1244 	g_topology_lock();
1245 	gpt = cp->provider->geom->softc;
1246 	LIST_FOREACH(entry, &gpt->gpt_entry, gpe_entry) {
1247 		part = (struct gpt_ent *)(entry + 1);
1248 		/* Search ms-ldm-metadata partition */
1249 		if (memcmp(&part->ent_type,
1250 		    &gpt_uuid_ms_ldm_metadata, sizeof(struct uuid)) != 0 ||
1251 		    entry->gpe_end - entry->gpe_start < LDM_DB_SIZE - 1)
1252 			continue;
1253 
1254 		/* Create new consumer and attach it to metadata partition */
1255 		cp2 = g_new_consumer(cp->geom);
1256 		error = g_attach(cp2, entry->gpe_pp);
1257 		if (error != 0) {
1258 			g_destroy_consumer(cp2);
1259 			g_topology_unlock();
1260 			return (ENXIO);
1261 		}
1262 		error = g_access(cp2, 1, 0, 0);
1263 		if (error != 0) {
1264 			g_detach(cp2);
1265 			g_destroy_consumer(cp2);
1266 			g_topology_unlock();
1267 			return (ENXIO);
1268 		}
1269 		g_topology_unlock();
1270 
1271 		LDM_DEBUG(2, "%s: LDM metadata partition %s found in the GPT",
1272 		    cp->provider->name, cp2->provider->name);
1273 		/* Read the LDM private header */
1274 		buf = ldm_privhdr_read(cp2,
1275 		    ldm_ph_off[LDM_PH_GPTINDEX] * cp2->provider->sectorsize,
1276 		    &error);
1277 		if (buf != NULL) {
1278 			table = (struct g_part_ldm_table *)basetable;
1279 			table->is_gpt = 1;
1280 			g_free(buf);
1281 			return (G_PART_PROBE_PRI_HIGH);
1282 		}
1283 
1284 		/* second consumer is no longer needed. */
1285 		g_topology_lock();
1286 		g_access(cp2, -1, 0, 0);
1287 		g_detach(cp2);
1288 		g_destroy_consumer(cp2);
1289 		break;
1290 	}
1291 	g_topology_unlock();
1292 	return (ENXIO);
1293 }
1294 
1295 static int
1296 g_part_ldm_probe(struct g_part_table *basetable, struct g_consumer *cp)
1297 {
1298 	struct g_provider *pp;
1299 	u_char *buf, type[64];
1300 	int error, idx;
1301 
1302 
1303 	pp = cp->provider;
1304 	if (pp->sectorsize != 512)
1305 		return (ENXIO);
1306 
1307 	error = g_getattr("PART::scheme", cp, &type);
1308 	if (error == 0 && strcmp(type, "GPT") == 0) {
1309 		if (g_getattr("PART::type", cp, &type) != 0 ||
1310 		    strcmp(type, "ms-ldm-data") != 0)
1311 			return (ENXIO);
1312 		error = ldm_gpt_probe(basetable, cp);
1313 		return (error);
1314 	}
1315 
1316 	if (basetable->gpt_depth != 0)
1317 		return (ENXIO);
1318 
1319 	/* LDM has 1M metadata area */
1320 	if (pp->mediasize <= 1024 * 1024)
1321 		return (ENOSPC);
1322 
1323 	/* Check that there's a MBR */
1324 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
1325 	if (buf == NULL)
1326 		return (error);
1327 
1328 	if (le16dec(buf + DOSMAGICOFFSET) != DOSMAGIC) {
1329 		g_free(buf);
1330 		return (ENXIO);
1331 	}
1332 	error = ENXIO;
1333 	/* Check that we have LDM partitions in the MBR */
1334 	for (idx = 0; idx < NDOSPART && error != 0; idx++) {
1335 		if (buf[DOSPARTOFF + idx * DOSPARTSIZE + 4] == DOSPTYP_LDM)
1336 			error = 0;
1337 	}
1338 	g_free(buf);
1339 	if (error == 0) {
1340 		LDM_DEBUG(2, "%s: LDM data partitions found in MBR",
1341 		    pp->name);
1342 		/* Read the LDM private header */
1343 		buf = ldm_privhdr_read(cp,
1344 		    ldm_ph_off[LDM_PH_MBRINDEX] * pp->sectorsize, &error);
1345 		if (buf == NULL)
1346 			return (error);
1347 		g_free(buf);
1348 		return (G_PART_PROBE_PRI_HIGH);
1349 	}
1350 	return (error);
1351 }
1352 
1353 static int
1354 g_part_ldm_read(struct g_part_table *basetable, struct g_consumer *cp)
1355 {
1356 	struct g_part_ldm_table *table;
1357 	struct g_part_ldm_entry *entry;
1358 	struct g_consumer *cp2;
1359 	struct ldm_component *comp;
1360 	struct ldm_partition *part;
1361 	struct ldm_volume *vol;
1362 	struct ldm_disk *disk;
1363 	struct ldm_db db;
1364 	int error, index, skipped;
1365 
1366 	table = (struct g_part_ldm_table *)basetable;
1367 	memset(&db, 0, sizeof(db));
1368 	cp2 = cp;					/* ms-ldm-data */
1369 	if (table->is_gpt)
1370 		cp = LIST_FIRST(&cp->geom->consumer);	/* ms-ldm-metadata */
1371 	/* Read and parse LDM private headers. */
1372 	error = ldm_privhdr_check(&db, cp, table->is_gpt);
1373 	if (error != 0)
1374 		goto gpt_cleanup;
1375 	basetable->gpt_first = table->is_gpt ? 0: db.ph.start;
1376 	basetable->gpt_last = basetable->gpt_first + db.ph.size - 1;
1377 	table->db_offset = db.ph.db_offset;
1378 	/* Make additional checks for GPT */
1379 	if (table->is_gpt) {
1380 		error = ldm_gpt_check(&db, cp);
1381 		if (error != 0)
1382 			goto gpt_cleanup;
1383 		/*
1384 		 * Now we should reset database offset to zero, because our
1385 		 * consumer cp is attached to the ms-ldm-metadata partition
1386 		 * and we don't need add db_offset to read from it.
1387 		 */
1388 		db.ph.db_offset = 0;
1389 	}
1390 	/* Read and parse LDM TOC headers. */
1391 	error = ldm_tochdr_check(&db, cp);
1392 	if (error != 0)
1393 		goto gpt_cleanup;
1394 	/* Read and parse LDM VMDB header. */
1395 	error = ldm_vmdbhdr_check(&db, cp);
1396 	if (error != 0)
1397 		goto gpt_cleanup;
1398 	error = ldm_vmdb_parse(&db, cp);
1399 	/*
1400 	 * For the GPT case we must detach and destroy
1401 	 * second consumer before return.
1402 	 */
1403 gpt_cleanup:
1404 	if (table->is_gpt) {
1405 		g_topology_lock();
1406 		g_access(cp, -1, 0, 0);
1407 		g_detach(cp);
1408 		g_destroy_consumer(cp);
1409 		g_topology_unlock();
1410 		cp = cp2;
1411 	}
1412 	if (error != 0)
1413 		return (error);
1414 	/* Search current disk in the disk list. */
1415 	LIST_FOREACH(disk, &db.disks, entry)
1416 	    if (memcmp(&disk->guid, &db.ph.disk_guid,
1417 		sizeof(struct uuid)) == 0)
1418 		    break;
1419 	if (disk == NULL) {
1420 		LDM_DEBUG(1, "%s: no LDM volumes on this disk",
1421 		    cp->provider->name);
1422 		ldm_vmdb_free(&db);
1423 		return (ENXIO);
1424 	}
1425 	index = 1;
1426 	LIST_FOREACH(vol, &db.volumes, entry) {
1427 		LIST_FOREACH(comp, &vol->components, entry) {
1428 			/* Skip volumes from different disks. */
1429 			part = LIST_FIRST(&comp->partitions);
1430 			if (part->disk_id != disk->id)
1431 				continue;
1432 			skipped = 0;
1433 			/* We don't support spanned and striped volumes. */
1434 			if (comp->count > 1 || part->offset != 0) {
1435 				LDM_DEBUG(1, "%s: LDM volume component "
1436 				    "%ju has %u partitions. Skipped",
1437 				    cp->provider->name, (uintmax_t)comp->id,
1438 				    comp->count);
1439 				skipped = 1;
1440 			}
1441 			/*
1442 			 * Allow mirrored volumes only when they are explicitly
1443 			 * allowed with kern.geom.part.ldm.show_mirrors=1.
1444 			 */
1445 			if (vol->count > 1 && show_mirrors == 0) {
1446 				LDM_DEBUG(1, "%s: LDM volume %ju has %u "
1447 				    "components. Skipped",
1448 				    cp->provider->name, (uintmax_t)vol->id,
1449 				    vol->count);
1450 				skipped = 1;
1451 			}
1452 			entry = (struct g_part_ldm_entry *)g_part_new_entry(
1453 			    basetable, index++,
1454 			    basetable->gpt_first + part->start,
1455 			    basetable->gpt_first + part->start +
1456 			    part->size - 1);
1457 			/*
1458 			 * Mark skipped partition as ms-ldm-data partition.
1459 			 * We do not support them, but it is better to show
1460 			 * that we have something there, than just show
1461 			 * free space.
1462 			 */
1463 			if (skipped == 0)
1464 				entry->type = vol->part_type;
1465 			else
1466 				entry->type = DOSPTYP_LDM;
1467 			LDM_DEBUG(1, "%s: new volume id: %ju, start: %ju,"
1468 			    " end: %ju, type: 0x%02x\n", cp->provider->name,
1469 			    (uintmax_t)part->id,(uintmax_t)part->start +
1470 			    basetable->gpt_first, (uintmax_t)part->start +
1471 			    part->size + basetable->gpt_first - 1,
1472 			    vol->part_type);
1473 		}
1474 	}
1475 	ldm_vmdb_free(&db);
1476 	return (error);
1477 }
1478 
1479 static int
1480 g_part_ldm_setunset(struct g_part_table *table, struct g_part_entry *baseentry,
1481     const char *attrib, unsigned int set)
1482 {
1483 
1484 	return (ENOSYS);
1485 }
1486 
1487 static const char *
1488 g_part_ldm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
1489     char *buf, size_t bufsz)
1490 {
1491 	struct g_part_ldm_entry *entry;
1492 	int i;
1493 
1494 	entry = (struct g_part_ldm_entry *)baseentry;
1495 	for (i = 0;
1496 	    i < sizeof(ldm_alias_match) / sizeof(ldm_alias_match[0]); i++) {
1497 		if (ldm_alias_match[i].typ == entry->type)
1498 			return (g_part_alias_name(ldm_alias_match[i].alias));
1499 	}
1500 	snprintf(buf, bufsz, "!%d", entry->type);
1501 	return (buf);
1502 }
1503 
1504 static int
1505 g_part_ldm_write(struct g_part_table *basetable, struct g_consumer *cp)
1506 {
1507 
1508 	return (ENOSYS);
1509 }
1510