xref: /freebsd/sys/geom/part/g_part_ldm.c (revision 031beb4e239bfce798af17f5fe8dba8bcaf13d99)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/diskmbr.h>
33 #include <sys/endian.h>
34 #include <sys/gpt.h>
35 #include <sys/kernel.h>
36 #include <sys/kobj.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mutex.h>
41 #include <sys/queue.h>
42 #include <sys/sbuf.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/uuid.h>
46 #include <geom/geom.h>
47 #include <geom/part/g_part.h>
48 
49 #include "g_part_if.h"
50 
51 FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
52 
53 SYSCTL_DECL(_kern_geom_part);
54 static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm,
55     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
56     "GEOM_PART_LDM Logical Disk Manager");
57 
58 static u_int ldm_debug = 0;
59 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug,
60     CTLFLAG_RWTUN, &ldm_debug, 0, "Debug level");
61 
62 /*
63  * This allows access to mirrored LDM volumes. Since we do not
64  * doing mirroring here, it is not enabled by default.
65  */
66 static u_int show_mirrors = 0;
67 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors,
68     CTLFLAG_RWTUN, &show_mirrors, 0, "Show mirrored volumes");
69 
70 #define	LDM_DEBUG(lvl, fmt, ...)	do {				\
71 	if (ldm_debug >= (lvl)) {					\
72 		printf("GEOM_PART: " fmt "\n", __VA_ARGS__);		\
73 	}								\
74 } while (0)
75 #define	LDM_DUMP(buf, size)	do {					\
76 	if (ldm_debug > 1) {						\
77 		hexdump(buf, size, NULL, 0);				\
78 	}								\
79 } while (0)
80 
81 /*
82  * There are internal representations of LDM structures.
83  *
84  * We do not keep all fields of on-disk structures, only most useful.
85  * All numbers in an on-disk structures are in big-endian format.
86  */
87 
88 /*
89  * Private header is 512 bytes long. There are three copies on each disk.
90  * Offset and sizes are in sectors. Location of each copy:
91  * - the first offset is relative to the disk start;
92  * - the second and third offset are relative to the LDM database start.
93  *
94  * On a disk partitioned with GPT, the LDM has not first private header.
95  */
96 #define	LDM_PH_MBRINDEX		0
97 #define	LDM_PH_GPTINDEX		2
98 static const uint64_t	ldm_ph_off[] = {6, 1856, 2047};
99 #define	LDM_VERSION_2K		0x2000b
100 #define	LDM_VERSION_VISTA	0x2000c
101 #define	LDM_PH_VERSION_OFF	0x00c
102 #define	LDM_PH_DISKGUID_OFF	0x030
103 #define	LDM_PH_DGGUID_OFF	0x0b0
104 #define	LDM_PH_DGNAME_OFF	0x0f0
105 #define	LDM_PH_START_OFF	0x11b
106 #define	LDM_PH_SIZE_OFF		0x123
107 #define	LDM_PH_DB_OFF		0x12b
108 #define	LDM_PH_DBSIZE_OFF	0x133
109 #define	LDM_PH_TH1_OFF		0x13b
110 #define	LDM_PH_TH2_OFF		0x143
111 #define	LDM_PH_CONFSIZE_OFF	0x153
112 #define	LDM_PH_LOGSIZE_OFF	0x15b
113 #define	LDM_PH_SIGN		"PRIVHEAD"
114 struct ldm_privhdr {
115 	struct uuid	disk_guid;
116 	struct uuid	dg_guid;
117 	u_char		dg_name[32];
118 	uint64_t	start;		/* logical disk start */
119 	uint64_t	size;		/* logical disk size */
120 	uint64_t	db_offset;	/* LDM database start */
121 #define	LDM_DB_SIZE		2048
122 	uint64_t	db_size;	/* LDM database size */
123 #define	LDM_TH_COUNT		2
124 	uint64_t	th_offset[LDM_TH_COUNT]; /* TOC header offsets */
125 	uint64_t	conf_size;	/* configuration size */
126 	uint64_t	log_size;	/* size of log */
127 };
128 
129 /*
130  * Table of contents header is 512 bytes long.
131  * There are two identical copies at offsets from the private header.
132  * Offsets are relative to the LDM database start.
133  */
134 #define	LDM_TH_SIGN		"TOCBLOCK"
135 #define	LDM_TH_NAME1		"config"
136 #define	LDM_TH_NAME2		"log"
137 #define	LDM_TH_NAME1_OFF	0x024
138 #define	LDM_TH_CONF_OFF		0x02e
139 #define	LDM_TH_CONFSIZE_OFF	0x036
140 #define	LDM_TH_NAME2_OFF	0x046
141 #define	LDM_TH_LOG_OFF		0x050
142 #define	LDM_TH_LOGSIZE_OFF	0x058
143 struct ldm_tochdr {
144 	uint64_t	conf_offset;	/* configuration offset */
145 	uint64_t	log_offset;	/* log offset */
146 };
147 
148 /*
149  * LDM database header is 512 bytes long.
150  */
151 #define	LDM_VMDB_SIGN		"VMDB"
152 #define	LDM_DB_LASTSEQ_OFF	0x004
153 #define	LDM_DB_SIZE_OFF		0x008
154 #define	LDM_DB_STATUS_OFF	0x010
155 #define	LDM_DB_VERSION_OFF	0x012
156 #define	LDM_DB_DGNAME_OFF	0x016
157 #define	LDM_DB_DGGUID_OFF	0x035
158 struct ldm_vmdbhdr {
159 	uint32_t	last_seq;	/* sequence number of last VBLK */
160 	uint32_t	size;		/* size of VBLK */
161 };
162 
163 /*
164  * The LDM database configuration section contains VMDB header and
165  * many VBLKs. Each VBLK represents a disk group, disk partition,
166  * component or volume.
167  *
168  * The most interesting for us are volumes, they are represents
169  * partitions in the GEOM_PART meaning. But volume VBLK does not
170  * contain all information needed to create GEOM provider. And we
171  * should get this information from the related VBLK. This is how
172  * VBLK releated:
173  *	Volumes <- Components <- Partitions -> Disks
174  *
175  * One volume can contain several components. In this case LDM
176  * does mirroring of volume data to each component.
177  *
178  * Also each component can contain several partitions (spanned or
179  * striped volumes).
180  */
181 
182 struct ldm_component {
183 	uint64_t	id;		/* object id */
184 	uint64_t	vol_id;		/* parent volume object id */
185 
186 	int		count;
187 	LIST_HEAD(, ldm_partition) partitions;
188 	LIST_ENTRY(ldm_component) entry;
189 };
190 
191 struct ldm_volume {
192 	uint64_t	id;		/* object id */
193 	uint64_t	size;		/* volume size */
194 	uint8_t		number;		/* used for ordering */
195 	uint8_t		part_type;	/* partition type */
196 
197 	int		count;
198 	LIST_HEAD(, ldm_component) components;
199 	LIST_ENTRY(ldm_volume)	entry;
200 };
201 
202 struct ldm_disk {
203 	uint64_t	id;		/* object id */
204 	struct uuid	guid;		/* disk guid */
205 
206 	LIST_ENTRY(ldm_disk) entry;
207 };
208 
209 #if 0
210 struct ldm_disk_group {
211 	uint64_t	id;		/* object id */
212 	struct uuid	guid;		/* disk group guid */
213 	u_char		name[32];	/* disk group name */
214 
215 	LIST_ENTRY(ldm_disk_group) entry;
216 };
217 #endif
218 
219 struct ldm_partition {
220 	uint64_t	id;		/* object id */
221 	uint64_t	disk_id;	/* disk object id */
222 	uint64_t	comp_id;	/* parent component object id */
223 	uint64_t	start;		/* offset relative to disk start */
224 	uint64_t	offset;		/* offset for spanned volumes */
225 	uint64_t	size;		/* partition size */
226 
227 	LIST_ENTRY(ldm_partition) entry;
228 };
229 
230 /*
231  * Each VBLK is 128 bytes long and has standard 16 bytes header.
232  * Some of VBLK's fields are fixed size, but others has variable size.
233  * Fields with variable size are prefixed with one byte length marker.
234  * Some fields are strings and also can have fixed size and variable.
235  * Strings with fixed size are NULL-terminated, others are not.
236  * All VBLKs have same several first fields:
237  *	Offset		Size		Description
238  *	---------------+---------------+--------------------------
239  *	0x00		16		standard VBLK header
240  *	0x10		2		update status
241  *	0x13		1		VBLK type
242  *	0x18		PS		object id
243  *	0x18+		PN		object name
244  *
245  *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
246  *  o 'P' in size column means 'prefixed' (variable-width),
247  *    'S' - string, 'N' - number.
248  */
249 #define	LDM_VBLK_SIGN		"VBLK"
250 #define	LDM_VBLK_SEQ_OFF	0x04
251 #define	LDM_VBLK_GROUP_OFF	0x08
252 #define	LDM_VBLK_INDEX_OFF	0x0c
253 #define	LDM_VBLK_COUNT_OFF	0x0e
254 #define	LDM_VBLK_TYPE_OFF	0x13
255 #define	LDM_VBLK_OID_OFF	0x18
256 struct ldm_vblkhdr {
257 	uint32_t	seq;		/* sequence number */
258 	uint32_t	group;		/* group number */
259 	uint16_t	index;		/* index in the group */
260 	uint16_t	count;		/* number of entries in the group */
261 };
262 
263 #define	LDM_VBLK_T_COMPONENT	0x32
264 #define	LDM_VBLK_T_PARTITION	0x33
265 #define	LDM_VBLK_T_DISK		0x34
266 #define	LDM_VBLK_T_DISKGROUP	0x35
267 #define	LDM_VBLK_T_DISK4	0x44
268 #define	LDM_VBLK_T_DISKGROUP4	0x45
269 #define	LDM_VBLK_T_VOLUME	0x51
270 struct ldm_vblk {
271 	uint8_t		type;		/* VBLK type */
272 	union {
273 		uint64_t		id;
274 		struct ldm_volume	vol;
275 		struct ldm_component	comp;
276 		struct ldm_disk		disk;
277 		struct ldm_partition	part;
278 #if 0
279 		struct ldm_disk_group	disk_group;
280 #endif
281 	} u;
282 	LIST_ENTRY(ldm_vblk) entry;
283 };
284 
285 /*
286  * Some VBLKs contains a bit more data than can fit into 128 bytes. These
287  * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
288  * should be placed into continuous memory buffer. We can determine xVBLK
289  * by the count field in the standard VBLK header (count > 1).
290  */
291 struct ldm_xvblk {
292 	uint32_t	group;		/* xVBLK group number */
293 	uint32_t	size;		/* the total size of xVBLK */
294 	uint8_t		map;		/* bitmask of currently saved VBLKs */
295 	u_char		*data;		/* xVBLK data */
296 
297 	LIST_ENTRY(ldm_xvblk)	entry;
298 };
299 
300 /* The internal representation of LDM database. */
301 struct ldm_db {
302 	struct ldm_privhdr		ph;	/* private header */
303 	struct ldm_tochdr		th;	/* TOC header */
304 	struct ldm_vmdbhdr		dh;	/* VMDB header */
305 
306 	LIST_HEAD(, ldm_volume)		volumes;
307 	LIST_HEAD(, ldm_disk)		disks;
308 	LIST_HEAD(, ldm_vblk)		vblks;
309 	LIST_HEAD(, ldm_xvblk)		xvblks;
310 };
311 
312 static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
313 
314 struct g_part_ldm_table {
315 	struct g_part_table	base;
316 	uint64_t		db_offset;
317 	int			is_gpt;
318 };
319 struct g_part_ldm_entry {
320 	struct g_part_entry	base;
321 	uint8_t			type;
322 };
323 
324 static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
325     struct g_part_parms *);
326 static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
327 static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
328 static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
329 static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
330     struct sbuf *, const char *);
331 static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
332 static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
333     struct g_part_parms *);
334 static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *,
335     char *, size_t);
336 static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
337 static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
338 static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *,
339     char *, size_t);
340 static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
341 
342 static kobj_method_t g_part_ldm_methods[] = {
343 	KOBJMETHOD(g_part_add,		g_part_ldm_add),
344 	KOBJMETHOD(g_part_bootcode,	g_part_ldm_bootcode),
345 	KOBJMETHOD(g_part_create,	g_part_ldm_create),
346 	KOBJMETHOD(g_part_destroy,	g_part_ldm_destroy),
347 	KOBJMETHOD(g_part_dumpconf,	g_part_ldm_dumpconf),
348 	KOBJMETHOD(g_part_dumpto,	g_part_ldm_dumpto),
349 	KOBJMETHOD(g_part_modify,	g_part_ldm_modify),
350 	KOBJMETHOD(g_part_name,		g_part_ldm_name),
351 	KOBJMETHOD(g_part_probe,	g_part_ldm_probe),
352 	KOBJMETHOD(g_part_read,		g_part_ldm_read),
353 	KOBJMETHOD(g_part_type,		g_part_ldm_type),
354 	KOBJMETHOD(g_part_write,	g_part_ldm_write),
355 	{ 0, 0 }
356 };
357 
358 static struct g_part_scheme g_part_ldm_scheme = {
359 	"LDM",
360 	g_part_ldm_methods,
361 	sizeof(struct g_part_ldm_table),
362 	.gps_entrysz = sizeof(struct g_part_ldm_entry)
363 };
364 G_PART_SCHEME_DECLARE(g_part_ldm);
365 MODULE_VERSION(geom_part_ldm, 0);
366 
367 static struct g_part_ldm_alias {
368 	u_char		typ;
369 	int		alias;
370 } ldm_alias_match[] = {
371 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
372 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
373 	{ DOSPTYP_FAT32LBA,	G_PART_ALIAS_MS_FAT32LBA },
374 	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
375 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
376 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
377 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
378 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
379 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
380 };
381 
382 static u_char*
383 ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
384 {
385 	struct g_provider *pp;
386 	u_char *buf;
387 
388 	pp = cp->provider;
389 	buf = g_read_data(cp, off, pp->sectorsize, error);
390 	if (buf == NULL)
391 		return (NULL);
392 
393 	if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
394 		LDM_DEBUG(1, "%s: invalid LDM private header signature",
395 		    pp->name);
396 		g_free(buf);
397 		buf = NULL;
398 		*error = EINVAL;
399 	}
400 	return (buf);
401 }
402 
403 static int
404 ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
405     const u_char *buf)
406 {
407 	uint32_t version;
408 	int error;
409 
410 	memset(hdr, 0, sizeof(*hdr));
411 	version = be32dec(buf + LDM_PH_VERSION_OFF);
412 	if (version != LDM_VERSION_2K &&
413 	    version != LDM_VERSION_VISTA) {
414 		LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
415 		    cp->provider->name, version >> 16,
416 		    version & 0xFFFF);
417 		return (ENXIO);
418 	}
419 	error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
420 	if (error != 0)
421 		return (error);
422 	error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
423 	if (error != 0)
424 		return (error);
425 	strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
426 	hdr->start = be64dec(buf + LDM_PH_START_OFF);
427 	hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
428 	hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
429 	hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
430 	hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
431 	hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
432 	hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
433 	hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
434 	return (0);
435 }
436 
437 static int
438 ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
439 {
440 	struct g_consumer *cp2;
441 	struct g_provider *pp;
442 	struct ldm_privhdr hdr;
443 	uint64_t offset, last;
444 	int error, found, i;
445 	u_char *buf;
446 
447 	pp = cp->provider;
448 	if (is_gpt) {
449 		/*
450 		 * The last LBA is used in several checks below, for the
451 		 * GPT case it should be calculated relative to the whole
452 		 * disk.
453 		 */
454 		cp2 = LIST_FIRST(&pp->geom->consumer);
455 		last =
456 		    cp2->provider->mediasize / cp2->provider->sectorsize - 1;
457 	} else
458 		last = pp->mediasize / pp->sectorsize - 1;
459 	for (found = 0, i = is_gpt; i < nitems(ldm_ph_off); i++) {
460 		offset = ldm_ph_off[i];
461 		/*
462 		 * In the GPT case consumer is attached to the LDM metadata
463 		 * partition and we don't need add db_offset.
464 		 */
465 		if (!is_gpt)
466 			offset += db->ph.db_offset;
467 		if (i == LDM_PH_MBRINDEX) {
468 			/*
469 			 * Prepare to errors and setup new base offset
470 			 * to read backup private headers. Assume that LDM
471 			 * database is in the last 1Mbyte area.
472 			 */
473 			db->ph.db_offset = last - LDM_DB_SIZE;
474 		}
475 		buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
476 		if (buf == NULL) {
477 			LDM_DEBUG(1, "%s: failed to read private header "
478 			    "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
479 			continue;
480 		}
481 		error = ldm_privhdr_parse(cp, &hdr, buf);
482 		if (error != 0) {
483 			LDM_DEBUG(1, "%s: failed to parse private "
484 			    "header %d", pp->name, i);
485 			LDM_DUMP(buf, pp->sectorsize);
486 			g_free(buf);
487 			continue;
488 		}
489 		g_free(buf);
490 		if (hdr.start > last ||
491 		    hdr.start + hdr.size - 1 > last ||
492 		    (hdr.start + hdr.size - 1 > hdr.db_offset && !is_gpt) ||
493 		    hdr.db_size != LDM_DB_SIZE ||
494 		    hdr.db_offset + LDM_DB_SIZE - 1 > last ||
495 		    hdr.th_offset[0] >= LDM_DB_SIZE ||
496 		    hdr.th_offset[1] >= LDM_DB_SIZE ||
497 		    hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
498 			LDM_DEBUG(1, "%s: invalid values in the "
499 			    "private header %d", pp->name, i);
500 			LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
501 			    "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
502 			    "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
503 			    "last: %jd", pp->name, hdr.start, hdr.size,
504 			    hdr.db_offset, hdr.db_size, hdr.th_offset[0],
505 			    hdr.th_offset[1], hdr.conf_size, hdr.log_size,
506 			    last);
507 			continue;
508 		}
509 		if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
510 			LDM_DEBUG(0, "%s: private headers are not equal",
511 			    pp->name);
512 			if (i > 1) {
513 				/*
514 				 * We have different headers in the LDM.
515 				 * We can not trust this metadata.
516 				 */
517 				LDM_DEBUG(0, "%s: refuse LDM metadata",
518 				    pp->name);
519 				return (EINVAL);
520 			}
521 			/*
522 			 * We already have read primary private header
523 			 * and it differs from this backup one.
524 			 * Prefer the backup header and save it.
525 			 */
526 			found = 0;
527 		}
528 		if (found == 0)
529 			memcpy(&db->ph, &hdr, sizeof(hdr));
530 		found = 1;
531 	}
532 	if (found == 0) {
533 		LDM_DEBUG(1, "%s: valid LDM private header not found",
534 		    pp->name);
535 		return (ENXIO);
536 	}
537 	return (0);
538 }
539 
540 static int
541 ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
542 {
543 	struct g_part_table *gpt;
544 	struct g_part_entry *e;
545 	struct g_consumer *cp2;
546 	int error;
547 
548 	cp2 = LIST_NEXT(cp, consumer);
549 	g_topology_lock();
550 	gpt = cp->provider->geom->softc;
551 	error = 0;
552 	LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
553 		if (cp->provider == e->gpe_pp) {
554 			/* ms-ldm-metadata partition */
555 			if (e->gpe_start != db->ph.db_offset ||
556 			    e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
557 				error++;
558 		} else if (cp2->provider == e->gpe_pp) {
559 			/* ms-ldm-data partition */
560 			if (e->gpe_start != db->ph.start ||
561 			    e->gpe_end != db->ph.start + db->ph.size - 1)
562 				error++;
563 		}
564 		if (error != 0) {
565 			LDM_DEBUG(0, "%s: GPT partition %d boundaries "
566 			    "do not match with the LDM metadata",
567 			    e->gpe_pp->name, e->gpe_index);
568 			error = ENXIO;
569 			break;
570 		}
571 	}
572 	g_topology_unlock();
573 	return (error);
574 }
575 
576 static int
577 ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
578 {
579 	struct g_provider *pp;
580 	struct ldm_tochdr hdr;
581 	uint64_t offset, conf_size, log_size;
582 	int error, found, i;
583 	u_char *buf;
584 
585 	pp = cp->provider;
586 	for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
587 		offset = db->ph.db_offset + db->ph.th_offset[i];
588 		buf = g_read_data(cp,
589 		    offset * pp->sectorsize, pp->sectorsize, &error);
590 		if (buf == NULL) {
591 			LDM_DEBUG(1, "%s: failed to read TOC header "
592 			    "at LBA %ju", pp->name, (uintmax_t)offset);
593 			continue;
594 		}
595 		if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
596 		    memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
597 		    strlen(LDM_TH_NAME1)) != 0 ||
598 		    memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
599 		    strlen(LDM_TH_NAME2)) != 0) {
600 			LDM_DEBUG(1, "%s: failed to parse TOC header "
601 			    "at LBA %ju", pp->name, (uintmax_t)offset);
602 			LDM_DUMP(buf, pp->sectorsize);
603 			g_free(buf);
604 			continue;
605 		}
606 		hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
607 		hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
608 		conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
609 		log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
610 		if (conf_size != db->ph.conf_size ||
611 		    hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
612 		    log_size != db->ph.log_size ||
613 		    hdr.log_offset + log_size >= LDM_DB_SIZE) {
614 			LDM_DEBUG(1, "%s: invalid values in the "
615 			    "TOC header at LBA %ju", pp->name,
616 			    (uintmax_t)offset);
617 			LDM_DUMP(buf, pp->sectorsize);
618 			g_free(buf);
619 			continue;
620 		}
621 		g_free(buf);
622 		if (found == 0)
623 			memcpy(&db->th, &hdr, sizeof(hdr));
624 		found = 1;
625 	}
626 	if (found == 0) {
627 		LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
628 		    pp->name);
629 		return (ENXIO);
630 	}
631 	return (0);
632 }
633 
634 static int
635 ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
636 {
637 	struct g_provider *pp;
638 	struct uuid dg_guid;
639 	uint64_t offset;
640 	uint32_t version;
641 	int error;
642 	u_char *buf;
643 
644 	pp = cp->provider;
645 	offset = db->ph.db_offset + db->th.conf_offset;
646 	buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
647 	    &error);
648 	if (buf == NULL) {
649 		LDM_DEBUG(0, "%s: failed to read VMDB header at "
650 		    "LBA %ju", pp->name, (uintmax_t)offset);
651 		return (error);
652 	}
653 	if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
654 		g_free(buf);
655 		LDM_DEBUG(0, "%s: failed to parse VMDB header at "
656 		    "LBA %ju", pp->name, (uintmax_t)offset);
657 		return (ENXIO);
658 	}
659 	/* Check version. */
660 	version = be32dec(buf + LDM_DB_VERSION_OFF);
661 	if (version != 0x4000A) {
662 		g_free(buf);
663 		LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
664 		    pp->name, version >> 16, version & 0xFFFF);
665 		return (ENXIO);
666 	}
667 	/*
668 	 * Check VMDB update status:
669 	 *	1 - in a consistent state;
670 	 *	2 - in a creation phase;
671 	 *	3 - in a deletion phase;
672 	 */
673 	if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
674 		g_free(buf);
675 		LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
676 		    pp->name);
677 		return (ENXIO);
678 	}
679 	db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
680 	db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
681 	error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
682 	/* Compare disk group name and guid from VMDB and private headers */
683 	if (error != 0 || db->dh.size == 0 ||
684 	    pp->sectorsize % db->dh.size != 0 ||
685 	    strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
686 	    memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
687 	    db->dh.size * db->dh.last_seq >
688 	    db->ph.conf_size * pp->sectorsize) {
689 		LDM_DEBUG(0, "%s: invalid values in the VMDB header",
690 		    pp->name);
691 		LDM_DUMP(buf, pp->sectorsize);
692 		g_free(buf);
693 		return (EINVAL);
694 	}
695 	g_free(buf);
696 	return (0);
697 }
698 
699 static int
700 ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
701 {
702 	struct ldm_xvblk *blk;
703 	size_t size;
704 
705 	size = db->dh.size - 16;
706 	LIST_FOREACH(blk, &db->xvblks, entry)
707 		if (blk->group == vh->group)
708 			break;
709 	if (blk == NULL) {
710 		blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
711 		blk->group = vh->group;
712 		blk->size = size * vh->count + 16;
713 		blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
714 		blk->map = 0xFF << vh->count;
715 		LIST_INSERT_HEAD(&db->xvblks, blk, entry);
716 	}
717 	if ((blk->map & (1 << vh->index)) != 0) {
718 		/* Block with given index has been already saved. */
719 		return (EINVAL);
720 	}
721 	/* Copy the data block to the place related to index. */
722 	memcpy(blk->data + size * vh->index + 16, p + 16, size);
723 	blk->map |= 1 << vh->index;
724 	return (0);
725 }
726 
727 /* Read the variable-width numeric field and return new offset */
728 static int
729 ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
730 {
731 	uint64_t num;
732 	uint8_t len;
733 
734 	len = buf[offset++];
735 	if (len > sizeof(uint64_t) || len + offset >= range)
736 		return (-1);
737 	for (num = 0; len > 0; len--)
738 		num = (num << 8) | buf[offset++];
739 	*result = num;
740 	return (offset);
741 }
742 
743 /* Read the variable-width string and return new offset */
744 static int
745 ldm_vstr_get(const u_char *buf, int offset, u_char *result,
746     size_t maxlen, size_t range)
747 {
748 	uint8_t len;
749 
750 	len = buf[offset++];
751 	if (len >= maxlen || len + offset >= range)
752 		return (-1);
753 	memcpy(result, buf + offset, len);
754 	result[len] = '\0';
755 	return (offset + len);
756 }
757 
758 /* Just skip the variable-width variable and return new offset */
759 static int
760 ldm_vparm_skip(const u_char *buf, int offset, size_t range)
761 {
762 	uint8_t len;
763 
764 	len = buf[offset++];
765 	if (offset + len >= range)
766 		return (-1);
767 
768 	return (offset + len);
769 }
770 
771 static int
772 ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
773 {
774 	struct ldm_vblk *blk;
775 	struct ldm_volume *volume, *last;
776 	const char *errstr;
777 	u_char vstr[64];
778 	int error, offset;
779 
780 	blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
781 	blk->type = p[LDM_VBLK_TYPE_OFF];
782 	offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
783 	if (offset < 0) {
784 		errstr = "object id";
785 		goto fail;
786 	}
787 	offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
788 	if (offset < 0) {
789 		errstr = "object name";
790 		goto fail;
791 	}
792 	switch (blk->type) {
793 	/*
794 	 * Component VBLK fields:
795 	 * Offset	Size	Description
796 	 * ------------+-------+------------------------
797 	 *  0x18+	PS	volume state
798 	 *  0x18+5	PN	component children count
799 	 *  0x1D+16	PN	parent's volume object id
800 	 *  0x2D+1	PN	stripe size
801 	 */
802 	case LDM_VBLK_T_COMPONENT:
803 		offset = ldm_vparm_skip(p, offset, size);
804 		if (offset < 0) {
805 			errstr = "volume state";
806 			goto fail;
807 		}
808 		offset = ldm_vparm_skip(p, offset + 5, size);
809 		if (offset < 0) {
810 			errstr = "children count";
811 			goto fail;
812 		}
813 		offset = ldm_vnum_get(p, offset + 16,
814 		    &blk->u.comp.vol_id, size);
815 		if (offset < 0) {
816 			errstr = "volume id";
817 			goto fail;
818 		}
819 		break;
820 	/*
821 	 * Partition VBLK fields:
822 	 * Offset	Size	Description
823 	 * ------------+-------+------------------------
824 	 *  0x18+12	8	partition start offset
825 	 *  0x18+20	8	volume offset
826 	 *  0x18+28	PN	partition size
827 	 *  0x34+	PN	parent's component object id
828 	 *  0x34+	PN	disk's object id
829 	 */
830 	case LDM_VBLK_T_PARTITION:
831 		if (offset + 28 >= size) {
832 			errstr = "too small buffer";
833 			goto fail;
834 		}
835 		blk->u.part.start = be64dec(p + offset + 12);
836 		blk->u.part.offset = be64dec(p + offset + 20);
837 		offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
838 		if (offset < 0) {
839 			errstr = "partition size";
840 			goto fail;
841 		}
842 		offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
843 		if (offset < 0) {
844 			errstr = "component id";
845 			goto fail;
846 		}
847 		offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
848 		if (offset < 0) {
849 			errstr = "disk id";
850 			goto fail;
851 		}
852 		break;
853 	/*
854 	 * Disk VBLK fields:
855 	 * Offset	Size	Description
856 	 * ------------+-------+------------------------
857 	 *  0x18+	PS	disk GUID
858 	 */
859 	case LDM_VBLK_T_DISK:
860 		errstr = "disk guid";
861 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
862 		if (offset < 0)
863 			goto fail;
864 		error = parse_uuid(vstr, &blk->u.disk.guid);
865 		if (error != 0)
866 			goto fail;
867 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
868 		break;
869 	/*
870 	 * Disk group VBLK fields:
871 	 * Offset	Size	Description
872 	 * ------------+-------+------------------------
873 	 *  0x18+	PS	disk group GUID
874 	 */
875 	case LDM_VBLK_T_DISKGROUP:
876 #if 0
877 		strncpy(blk->u.disk_group.name, vstr,
878 		    sizeof(blk->u.disk_group.name));
879 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
880 		if (offset < 0) {
881 			errstr = "disk group guid";
882 			goto fail;
883 		}
884 		error = parse_uuid(name, &blk->u.disk_group.guid);
885 		if (error != 0) {
886 			errstr = "disk group guid";
887 			goto fail;
888 		}
889 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
890 #endif
891 		break;
892 	/*
893 	 * Disk VBLK fields:
894 	 * Offset	Size	Description
895 	 * ------------+-------+------------------------
896 	 *  0x18+	16	disk GUID
897 	 */
898 	case LDM_VBLK_T_DISK4:
899 		be_uuid_dec(p + offset, &blk->u.disk.guid);
900 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
901 		break;
902 	/*
903 	 * Disk group VBLK fields:
904 	 * Offset	Size	Description
905 	 * ------------+-------+------------------------
906 	 *  0x18+	16	disk GUID
907 	 */
908 	case LDM_VBLK_T_DISKGROUP4:
909 #if 0
910 		strncpy(blk->u.disk_group.name, vstr,
911 		    sizeof(blk->u.disk_group.name));
912 		be_uuid_dec(p + offset, &blk->u.disk.guid);
913 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
914 #endif
915 		break;
916 	/*
917 	 * Volume VBLK fields:
918 	 * Offset	Size	Description
919 	 * ------------+-------+------------------------
920 	 *  0x18+	PS	volume type
921 	 *  0x18+	PS	unknown
922 	 *  0x18+	14(S)	volume state
923 	 *  0x18+16	1	volume number
924 	 *  0x18+21	PN	volume children count
925 	 *  0x2D+16	PN	volume size
926 	 *  0x3D+4	1	partition type
927 	 */
928 	case LDM_VBLK_T_VOLUME:
929 		offset = ldm_vparm_skip(p, offset, size);
930 		if (offset < 0) {
931 			errstr = "volume type";
932 			goto fail;
933 		}
934 		offset = ldm_vparm_skip(p, offset, size);
935 		if (offset < 0) {
936 			errstr = "unknown param";
937 			goto fail;
938 		}
939 		if (offset + 21 >= size) {
940 			errstr = "too small buffer";
941 			goto fail;
942 		}
943 		blk->u.vol.number = p[offset + 16];
944 		offset = ldm_vparm_skip(p, offset + 21, size);
945 		if (offset < 0) {
946 			errstr = "children count";
947 			goto fail;
948 		}
949 		offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
950 		if (offset < 0) {
951 			errstr = "volume size";
952 			goto fail;
953 		}
954 		if (offset + 4 >= size) {
955 			errstr = "too small buffer";
956 			goto fail;
957 		}
958 		blk->u.vol.part_type = p[offset + 4];
959 		/* keep volumes ordered by volume number */
960 		last = NULL;
961 		LIST_FOREACH(volume, &db->volumes, entry) {
962 			if (volume->number > blk->u.vol.number)
963 				break;
964 			last = volume;
965 		}
966 		if (last != NULL)
967 			LIST_INSERT_AFTER(last, &blk->u.vol, entry);
968 		else
969 			LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
970 		break;
971 	default:
972 		LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
973 		LDM_DUMP(p, size);
974 	}
975 	LIST_INSERT_HEAD(&db->vblks, blk, entry);
976 	return (0);
977 fail:
978 	LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
979 	    errstr, blk->type);
980 	LDM_DUMP(p, size);
981 	g_free(blk);
982 	return (EINVAL);
983 }
984 
985 static void
986 ldm_vmdb_free(struct ldm_db *db)
987 {
988 	struct ldm_vblk *vblk;
989 	struct ldm_xvblk *xvblk;
990 
991 	while (!LIST_EMPTY(&db->xvblks)) {
992 		xvblk = LIST_FIRST(&db->xvblks);
993 		LIST_REMOVE(xvblk, entry);
994 		g_free(xvblk->data);
995 		g_free(xvblk);
996 	}
997 	while (!LIST_EMPTY(&db->vblks)) {
998 		vblk = LIST_FIRST(&db->vblks);
999 		LIST_REMOVE(vblk, entry);
1000 		g_free(vblk);
1001 	}
1002 }
1003 
1004 static int
1005 ldm_vmdb_parse(struct ldm_db *db, struct g_consumer *cp)
1006 {
1007 	struct g_provider *pp;
1008 	struct ldm_vblk *vblk;
1009 	struct ldm_xvblk *xvblk;
1010 	struct ldm_volume *volume;
1011 	struct ldm_component *comp;
1012 	struct ldm_vblkhdr vh;
1013 	u_char *buf, *p;
1014 	size_t size, n, sectors;
1015 	uint64_t offset;
1016 	int error;
1017 
1018 	pp = cp->provider;
1019 	size = howmany(db->dh.last_seq * db->dh.size, pp->sectorsize);
1020 	size -= 1; /* one sector takes vmdb header */
1021 	for (n = 0; n < size; n += maxphys / pp->sectorsize) {
1022 		offset = db->ph.db_offset + db->th.conf_offset + n + 1;
1023 		sectors = (size - n) > (maxphys / pp->sectorsize) ?
1024 		    maxphys / pp->sectorsize : size - n;
1025 		/* read VBLKs */
1026 		buf = g_read_data(cp, offset * pp->sectorsize,
1027 		    sectors * pp->sectorsize, &error);
1028 		if (buf == NULL) {
1029 			LDM_DEBUG(0, "%s: failed to read VBLK\n",
1030 			    pp->name);
1031 			goto fail;
1032 		}
1033 		for (p = buf; p < buf + sectors * pp->sectorsize;
1034 		    p += db->dh.size) {
1035 			if (memcmp(p, LDM_VBLK_SIGN,
1036 			    strlen(LDM_VBLK_SIGN)) != 0) {
1037 				LDM_DEBUG(0, "%s: no VBLK signature\n",
1038 				    pp->name);
1039 				LDM_DUMP(p, db->dh.size);
1040 				goto fail;
1041 			}
1042 			vh.seq = be32dec(p + LDM_VBLK_SEQ_OFF);
1043 			vh.group = be32dec(p + LDM_VBLK_GROUP_OFF);
1044 			/* skip empty blocks */
1045 			if (vh.seq == 0 || vh.group == 0)
1046 				continue;
1047 			vh.index = be16dec(p + LDM_VBLK_INDEX_OFF);
1048 			vh.count = be16dec(p + LDM_VBLK_COUNT_OFF);
1049 			if (vh.count == 0 || vh.count > 4 ||
1050 			    vh.seq > db->dh.last_seq) {
1051 				LDM_DEBUG(0, "%s: invalid values "
1052 				    "in the VBLK header\n", pp->name);
1053 				LDM_DUMP(p, db->dh.size);
1054 				goto fail;
1055 			}
1056 			if (vh.count > 1) {
1057 				error = ldm_xvblk_handle(db, &vh, p);
1058 				if (error != 0) {
1059 					LDM_DEBUG(0, "%s: xVBLK "
1060 					    "is corrupted\n", pp->name);
1061 					LDM_DUMP(p, db->dh.size);
1062 					goto fail;
1063 				}
1064 				continue;
1065 			}
1066 			if (be16dec(p + 16) != 0)
1067 				LDM_DEBUG(1, "%s: VBLK update"
1068 				    " status is %u\n", pp->name,
1069 				    be16dec(p + 16));
1070 			error = ldm_vblk_handle(db, p, db->dh.size);
1071 			if (error != 0)
1072 				goto fail;
1073 		}
1074 		g_free(buf);
1075 		buf = NULL;
1076 	}
1077 	/* Parse xVBLKs */
1078 	while (!LIST_EMPTY(&db->xvblks)) {
1079 		xvblk = LIST_FIRST(&db->xvblks);
1080 		if (xvblk->map == 0xFF) {
1081 			error = ldm_vblk_handle(db, xvblk->data, xvblk->size);
1082 			if (error != 0)
1083 				goto fail;
1084 		} else {
1085 			LDM_DEBUG(0, "%s: incomplete or corrupt "
1086 			    "xVBLK found\n", pp->name);
1087 			goto fail;
1088 		}
1089 		LIST_REMOVE(xvblk, entry);
1090 		g_free(xvblk->data);
1091 		g_free(xvblk);
1092 	}
1093 	/* construct all VBLKs relations */
1094 	LIST_FOREACH(volume, &db->volumes, entry) {
1095 		LIST_FOREACH(vblk, &db->vblks, entry)
1096 			if (vblk->type == LDM_VBLK_T_COMPONENT &&
1097 			    vblk->u.comp.vol_id == volume->id) {
1098 				LIST_INSERT_HEAD(&volume->components,
1099 				    &vblk->u.comp, entry);
1100 				volume->count++;
1101 			}
1102 		LIST_FOREACH(comp, &volume->components, entry)
1103 			LIST_FOREACH(vblk, &db->vblks, entry)
1104 				if (vblk->type == LDM_VBLK_T_PARTITION &&
1105 				    vblk->u.part.comp_id == comp->id) {
1106 					LIST_INSERT_HEAD(&comp->partitions,
1107 					    &vblk->u.part, entry);
1108 					comp->count++;
1109 				}
1110 	}
1111 	return (0);
1112 fail:
1113 	ldm_vmdb_free(db);
1114 	g_free(buf);
1115 	return (ENXIO);
1116 }
1117 
1118 static int
1119 g_part_ldm_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
1120     struct g_part_parms *gpp)
1121 {
1122 
1123 	return (ENOSYS);
1124 }
1125 
1126 static int
1127 g_part_ldm_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
1128 {
1129 
1130 	return (ENOSYS);
1131 }
1132 
1133 static int
1134 g_part_ldm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
1135 {
1136 
1137 	return (ENOSYS);
1138 }
1139 
1140 static int
1141 g_part_ldm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
1142 {
1143 	struct g_part_ldm_table *table;
1144 	struct g_provider *pp;
1145 
1146 	table = (struct g_part_ldm_table *)basetable;
1147 	/*
1148 	 * To destroy LDM on a disk partitioned with GPT we should delete
1149 	 * ms-ldm-metadata partition, but we can't do this via standard
1150 	 * GEOM_PART method.
1151 	 */
1152 	if (table->is_gpt)
1153 		return (ENOSYS);
1154 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
1155 	/*
1156 	 * To destroy LDM we should wipe MBR, first private header and
1157 	 * backup private headers.
1158 	 */
1159 	basetable->gpt_smhead = (1 << ldm_ph_off[0]) | 1;
1160 	/*
1161 	 * Don't touch last backup private header when LDM database is
1162 	 * not located in the last 1MByte area.
1163 	 * XXX: can't remove all blocks.
1164 	 */
1165 	if (table->db_offset + LDM_DB_SIZE ==
1166 	    pp->mediasize / pp->sectorsize)
1167 		basetable->gpt_smtail = 1;
1168 	return (0);
1169 }
1170 
1171 static void
1172 g_part_ldm_dumpconf(struct g_part_table *basetable,
1173     struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
1174 {
1175 	struct g_part_ldm_entry *entry;
1176 
1177 	entry = (struct g_part_ldm_entry *)baseentry;
1178 	if (indent == NULL) {
1179 		/* conftxt: libdisk compatibility */
1180 		sbuf_printf(sb, " xs LDM xt %u", entry->type);
1181 	} else if (entry != NULL) {
1182 		/* confxml: partition entry information */
1183 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
1184 		    entry->type);
1185 	} else {
1186 		/* confxml: scheme information */
1187 	}
1188 }
1189 
1190 static int
1191 g_part_ldm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
1192 {
1193 
1194 	return (0);
1195 }
1196 
1197 static int
1198 g_part_ldm_modify(struct g_part_table *basetable,
1199     struct g_part_entry *baseentry, struct g_part_parms *gpp)
1200 {
1201 
1202 	return (ENOSYS);
1203 }
1204 
1205 static const char *
1206 g_part_ldm_name(struct g_part_table *table, struct g_part_entry *baseentry,
1207     char *buf, size_t bufsz)
1208 {
1209 
1210 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
1211 	return (buf);
1212 }
1213 
1214 static int
1215 ldm_gpt_probe(struct g_part_table *basetable, struct g_consumer *cp)
1216 {
1217 	struct g_part_ldm_table *table;
1218 	struct g_part_table *gpt;
1219 	struct g_part_entry *entry;
1220 	struct g_consumer *cp2;
1221 	struct gpt_ent *part;
1222 	u_char *buf;
1223 	int error;
1224 
1225 	/*
1226 	 * XXX: We use some knowledge about GEOM_PART_GPT internal
1227 	 * structures, but it is easier than parse GPT by himself.
1228 	 */
1229 	g_topology_lock();
1230 	gpt = cp->provider->geom->softc;
1231 	LIST_FOREACH(entry, &gpt->gpt_entry, gpe_entry) {
1232 		part = (struct gpt_ent *)(entry + 1);
1233 		/* Search ms-ldm-metadata partition */
1234 		if (memcmp(&part->ent_type,
1235 		    &gpt_uuid_ms_ldm_metadata, sizeof(struct uuid)) != 0 ||
1236 		    entry->gpe_end - entry->gpe_start < LDM_DB_SIZE - 1)
1237 			continue;
1238 
1239 		/* Create new consumer and attach it to metadata partition */
1240 		cp2 = g_new_consumer(cp->geom);
1241 		error = g_attach(cp2, entry->gpe_pp);
1242 		if (error != 0) {
1243 			g_destroy_consumer(cp2);
1244 			g_topology_unlock();
1245 			return (ENXIO);
1246 		}
1247 		error = g_access(cp2, 1, 0, 0);
1248 		if (error != 0) {
1249 			g_detach(cp2);
1250 			g_destroy_consumer(cp2);
1251 			g_topology_unlock();
1252 			return (ENXIO);
1253 		}
1254 		g_topology_unlock();
1255 
1256 		LDM_DEBUG(2, "%s: LDM metadata partition %s found in the GPT",
1257 		    cp->provider->name, cp2->provider->name);
1258 		/* Read the LDM private header */
1259 		buf = ldm_privhdr_read(cp2,
1260 		    ldm_ph_off[LDM_PH_GPTINDEX] * cp2->provider->sectorsize,
1261 		    &error);
1262 		if (buf != NULL) {
1263 			table = (struct g_part_ldm_table *)basetable;
1264 			table->is_gpt = 1;
1265 			g_free(buf);
1266 			return (G_PART_PROBE_PRI_HIGH);
1267 		}
1268 
1269 		/* second consumer is no longer needed. */
1270 		g_topology_lock();
1271 		g_access(cp2, -1, 0, 0);
1272 		g_detach(cp2);
1273 		g_destroy_consumer(cp2);
1274 		break;
1275 	}
1276 	g_topology_unlock();
1277 	return (ENXIO);
1278 }
1279 
1280 static int
1281 g_part_ldm_probe(struct g_part_table *basetable, struct g_consumer *cp)
1282 {
1283 	struct g_provider *pp;
1284 	u_char *buf, type[64];
1285 	int error, idx;
1286 
1287 	pp = cp->provider;
1288 	if (pp->sectorsize != 512)
1289 		return (ENXIO);
1290 
1291 	error = g_getattr("PART::scheme", cp, &type);
1292 	if (error == 0 && strcmp(type, "GPT") == 0) {
1293 		if (g_getattr("PART::type", cp, &type) != 0 ||
1294 		    strcmp(type, "ms-ldm-data") != 0)
1295 			return (ENXIO);
1296 		error = ldm_gpt_probe(basetable, cp);
1297 		return (error);
1298 	}
1299 
1300 	if (basetable->gpt_depth != 0)
1301 		return (ENXIO);
1302 
1303 	/* LDM has 1M metadata area */
1304 	if (pp->mediasize <= 1024 * 1024)
1305 		return (ENOSPC);
1306 
1307 	/* Check that there's a MBR */
1308 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
1309 	if (buf == NULL)
1310 		return (error);
1311 
1312 	if (le16dec(buf + DOSMAGICOFFSET) != DOSMAGIC) {
1313 		g_free(buf);
1314 		return (ENXIO);
1315 	}
1316 	error = ENXIO;
1317 	/* Check that we have LDM partitions in the MBR */
1318 	for (idx = 0; idx < NDOSPART && error != 0; idx++) {
1319 		if (buf[DOSPARTOFF + idx * DOSPARTSIZE + 4] == DOSPTYP_LDM)
1320 			error = 0;
1321 	}
1322 	g_free(buf);
1323 	if (error == 0) {
1324 		LDM_DEBUG(2, "%s: LDM data partitions found in MBR",
1325 		    pp->name);
1326 		/* Read the LDM private header */
1327 		buf = ldm_privhdr_read(cp,
1328 		    ldm_ph_off[LDM_PH_MBRINDEX] * pp->sectorsize, &error);
1329 		if (buf == NULL)
1330 			return (error);
1331 		g_free(buf);
1332 		return (G_PART_PROBE_PRI_HIGH);
1333 	}
1334 	return (error);
1335 }
1336 
1337 static int
1338 g_part_ldm_read(struct g_part_table *basetable, struct g_consumer *cp)
1339 {
1340 	struct g_part_ldm_table *table;
1341 	struct g_part_ldm_entry *entry;
1342 	struct g_consumer *cp2;
1343 	struct ldm_component *comp;
1344 	struct ldm_partition *part;
1345 	struct ldm_volume *vol;
1346 	struct ldm_disk *disk;
1347 	struct ldm_db db;
1348 	int error, index, skipped;
1349 
1350 	table = (struct g_part_ldm_table *)basetable;
1351 	memset(&db, 0, sizeof(db));
1352 	cp2 = cp;					/* ms-ldm-data */
1353 	if (table->is_gpt)
1354 		cp = LIST_FIRST(&cp->geom->consumer);	/* ms-ldm-metadata */
1355 	/* Read and parse LDM private headers. */
1356 	error = ldm_privhdr_check(&db, cp, table->is_gpt);
1357 	if (error != 0)
1358 		goto gpt_cleanup;
1359 	basetable->gpt_first = table->is_gpt ? 0: db.ph.start;
1360 	basetable->gpt_last = basetable->gpt_first + db.ph.size - 1;
1361 	table->db_offset = db.ph.db_offset;
1362 	/* Make additional checks for GPT */
1363 	if (table->is_gpt) {
1364 		error = ldm_gpt_check(&db, cp);
1365 		if (error != 0)
1366 			goto gpt_cleanup;
1367 		/*
1368 		 * Now we should reset database offset to zero, because our
1369 		 * consumer cp is attached to the ms-ldm-metadata partition
1370 		 * and we don't need add db_offset to read from it.
1371 		 */
1372 		db.ph.db_offset = 0;
1373 	}
1374 	/* Read and parse LDM TOC headers. */
1375 	error = ldm_tochdr_check(&db, cp);
1376 	if (error != 0)
1377 		goto gpt_cleanup;
1378 	/* Read and parse LDM VMDB header. */
1379 	error = ldm_vmdbhdr_check(&db, cp);
1380 	if (error != 0)
1381 		goto gpt_cleanup;
1382 	error = ldm_vmdb_parse(&db, cp);
1383 	/*
1384 	 * For the GPT case we must detach and destroy
1385 	 * second consumer before return.
1386 	 */
1387 gpt_cleanup:
1388 	if (table->is_gpt) {
1389 		g_topology_lock();
1390 		g_access(cp, -1, 0, 0);
1391 		g_detach(cp);
1392 		g_destroy_consumer(cp);
1393 		g_topology_unlock();
1394 		cp = cp2;
1395 	}
1396 	if (error != 0)
1397 		return (error);
1398 	/* Search current disk in the disk list. */
1399 	LIST_FOREACH(disk, &db.disks, entry)
1400 	    if (memcmp(&disk->guid, &db.ph.disk_guid,
1401 		sizeof(struct uuid)) == 0)
1402 		    break;
1403 	if (disk == NULL) {
1404 		LDM_DEBUG(1, "%s: no LDM volumes on this disk",
1405 		    cp->provider->name);
1406 		ldm_vmdb_free(&db);
1407 		return (ENXIO);
1408 	}
1409 	index = 1;
1410 	LIST_FOREACH(vol, &db.volumes, entry) {
1411 		LIST_FOREACH(comp, &vol->components, entry) {
1412 			/* Skip volumes from different disks. */
1413 			part = LIST_FIRST(&comp->partitions);
1414 			if (part->disk_id != disk->id)
1415 				continue;
1416 			skipped = 0;
1417 			/* We don't support spanned and striped volumes. */
1418 			if (comp->count > 1 || part->offset != 0) {
1419 				LDM_DEBUG(1, "%s: LDM volume component "
1420 				    "%ju has %u partitions. Skipped",
1421 				    cp->provider->name, (uintmax_t)comp->id,
1422 				    comp->count);
1423 				skipped = 1;
1424 			}
1425 			/*
1426 			 * Allow mirrored volumes only when they are explicitly
1427 			 * allowed with kern.geom.part.ldm.show_mirrors=1.
1428 			 */
1429 			if (vol->count > 1 && show_mirrors == 0) {
1430 				LDM_DEBUG(1, "%s: LDM volume %ju has %u "
1431 				    "components. Skipped",
1432 				    cp->provider->name, (uintmax_t)vol->id,
1433 				    vol->count);
1434 				skipped = 1;
1435 			}
1436 			entry = (struct g_part_ldm_entry *)g_part_new_entry(
1437 			    basetable, index++,
1438 			    basetable->gpt_first + part->start,
1439 			    basetable->gpt_first + part->start +
1440 			    part->size - 1);
1441 			/*
1442 			 * Mark skipped partition as ms-ldm-data partition.
1443 			 * We do not support them, but it is better to show
1444 			 * that we have something there, than just show
1445 			 * free space.
1446 			 */
1447 			if (skipped == 0)
1448 				entry->type = vol->part_type;
1449 			else
1450 				entry->type = DOSPTYP_LDM;
1451 			LDM_DEBUG(1, "%s: new volume id: %ju, start: %ju,"
1452 			    " end: %ju, type: 0x%02x\n", cp->provider->name,
1453 			    (uintmax_t)part->id,(uintmax_t)part->start +
1454 			    basetable->gpt_first, (uintmax_t)part->start +
1455 			    part->size + basetable->gpt_first - 1,
1456 			    vol->part_type);
1457 		}
1458 	}
1459 	ldm_vmdb_free(&db);
1460 	return (error);
1461 }
1462 
1463 static const char *
1464 g_part_ldm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
1465     char *buf, size_t bufsz)
1466 {
1467 	struct g_part_ldm_entry *entry;
1468 	int i;
1469 
1470 	entry = (struct g_part_ldm_entry *)baseentry;
1471 	for (i = 0; i < nitems(ldm_alias_match); i++) {
1472 		if (ldm_alias_match[i].typ == entry->type)
1473 			return (g_part_alias_name(ldm_alias_match[i].alias));
1474 	}
1475 	snprintf(buf, bufsz, "!%d", entry->type);
1476 	return (buf);
1477 }
1478 
1479 static int
1480 g_part_ldm_write(struct g_part_table *basetable, struct g_consumer *cp)
1481 {
1482 
1483 	return (ENOSYS);
1484 }
1485