xref: /freebsd/sys/geom/part/g_part_ldm.c (revision 97549c34ecaf74580941fdc9c5bd1050e4b1f6ce)
1 /*-
2  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/diskmbr.h>
33 #include <sys/endian.h>
34 #include <sys/gpt.h>
35 #include <sys/kernel.h>
36 #include <sys/kobj.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mutex.h>
41 #include <sys/queue.h>
42 #include <sys/sbuf.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/uuid.h>
46 #include <geom/geom.h>
47 #include <geom/part/g_part.h>
48 
49 #include "g_part_if.h"
50 
51 FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
52 
53 SYSCTL_DECL(_kern_geom_part);
54 static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm, CTLFLAG_RW, 0,
55     "GEOM_PART_LDM Logical Disk Manager");
56 
57 static u_int ldm_debug = 0;
58 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug,
59     CTLFLAG_RWTUN, &ldm_debug, 0, "Debug level");
60 
61 /*
62  * This allows access to mirrored LDM volumes. Since we do not
63  * doing mirroring here, it is not enabled by default.
64  */
65 static u_int show_mirrors = 0;
66 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors,
67     CTLFLAG_RWTUN, &show_mirrors, 0, "Show mirrored volumes");
68 
69 #define	LDM_DEBUG(lvl, fmt, ...)	do {				\
70 	if (ldm_debug >= (lvl)) {					\
71 		printf("GEOM_PART: " fmt "\n", __VA_ARGS__);		\
72 	}								\
73 } while (0)
74 #define	LDM_DUMP(buf, size)	do {					\
75 	if (ldm_debug > 1) {						\
76 		hexdump(buf, size, NULL, 0);				\
77 	}								\
78 } while (0)
79 
80 /*
81  * There are internal representations of LDM structures.
82  *
83  * We do not keep all fields of on-disk structures, only most useful.
84  * All numbers in an on-disk structures are in big-endian format.
85  */
86 
87 /*
88  * Private header is 512 bytes long. There are three copies on each disk.
89  * Offset and sizes are in sectors. Location of each copy:
90  * - the first offset is relative to the disk start;
91  * - the second and third offset are relative to the LDM database start.
92  *
93  * On a disk partitioned with GPT, the LDM has not first private header.
94  */
95 #define	LDM_PH_MBRINDEX		0
96 #define	LDM_PH_GPTINDEX		2
97 static const uint64_t	ldm_ph_off[] = {6, 1856, 2047};
98 #define	LDM_VERSION_2K		0x2000b
99 #define	LDM_VERSION_VISTA	0x2000c
100 #define	LDM_PH_VERSION_OFF	0x00c
101 #define	LDM_PH_DISKGUID_OFF	0x030
102 #define	LDM_PH_DGGUID_OFF	0x0b0
103 #define	LDM_PH_DGNAME_OFF	0x0f0
104 #define	LDM_PH_START_OFF	0x11b
105 #define	LDM_PH_SIZE_OFF		0x123
106 #define	LDM_PH_DB_OFF		0x12b
107 #define	LDM_PH_DBSIZE_OFF	0x133
108 #define	LDM_PH_TH1_OFF		0x13b
109 #define	LDM_PH_TH2_OFF		0x143
110 #define	LDM_PH_CONFSIZE_OFF	0x153
111 #define	LDM_PH_LOGSIZE_OFF	0x15b
112 #define	LDM_PH_SIGN		"PRIVHEAD"
113 struct ldm_privhdr {
114 	struct uuid	disk_guid;
115 	struct uuid	dg_guid;
116 	u_char		dg_name[32];
117 	uint64_t	start;		/* logical disk start */
118 	uint64_t	size;		/* logical disk size */
119 	uint64_t	db_offset;	/* LDM database start */
120 #define	LDM_DB_SIZE		2048
121 	uint64_t	db_size;	/* LDM database size */
122 #define	LDM_TH_COUNT		2
123 	uint64_t	th_offset[LDM_TH_COUNT]; /* TOC header offsets */
124 	uint64_t	conf_size;	/* configuration size */
125 	uint64_t	log_size;	/* size of log */
126 };
127 
128 /*
129  * Table of contents header is 512 bytes long.
130  * There are two identical copies at offsets from the private header.
131  * Offsets are relative to the LDM database start.
132  */
133 #define	LDM_TH_SIGN		"TOCBLOCK"
134 #define	LDM_TH_NAME1		"config"
135 #define	LDM_TH_NAME2		"log"
136 #define	LDM_TH_NAME1_OFF	0x024
137 #define	LDM_TH_CONF_OFF		0x02e
138 #define	LDM_TH_CONFSIZE_OFF	0x036
139 #define	LDM_TH_NAME2_OFF	0x046
140 #define	LDM_TH_LOG_OFF		0x050
141 #define	LDM_TH_LOGSIZE_OFF	0x058
142 struct ldm_tochdr {
143 	uint64_t	conf_offset;	/* configuration offset */
144 	uint64_t	log_offset;	/* log offset */
145 };
146 
147 /*
148  * LDM database header is 512 bytes long.
149  */
150 #define	LDM_VMDB_SIGN		"VMDB"
151 #define	LDM_DB_LASTSEQ_OFF	0x004
152 #define	LDM_DB_SIZE_OFF		0x008
153 #define	LDM_DB_STATUS_OFF	0x010
154 #define	LDM_DB_VERSION_OFF	0x012
155 #define	LDM_DB_DGNAME_OFF	0x016
156 #define	LDM_DB_DGGUID_OFF	0x035
157 struct ldm_vmdbhdr {
158 	uint32_t	last_seq;	/* sequence number of last VBLK */
159 	uint32_t	size;		/* size of VBLK */
160 };
161 
162 /*
163  * The LDM database configuration section contains VMDB header and
164  * many VBLKs. Each VBLK represents a disk group, disk partition,
165  * component or volume.
166  *
167  * The most interesting for us are volumes, they are represents
168  * partitions in the GEOM_PART meaning. But volume VBLK does not
169  * contain all information needed to create GEOM provider. And we
170  * should get this information from the related VBLK. This is how
171  * VBLK releated:
172  *	Volumes <- Components <- Partitions -> Disks
173  *
174  * One volume can contain several components. In this case LDM
175  * does mirroring of volume data to each component.
176  *
177  * Also each component can contain several partitions (spanned or
178  * striped volumes).
179  */
180 
181 struct ldm_component {
182 	uint64_t	id;		/* object id */
183 	uint64_t	vol_id;		/* parent volume object id */
184 
185 	int		count;
186 	LIST_HEAD(, ldm_partition) partitions;
187 	LIST_ENTRY(ldm_component) entry;
188 };
189 
190 struct ldm_volume {
191 	uint64_t	id;		/* object id */
192 	uint64_t	size;		/* volume size */
193 	uint8_t		number;		/* used for ordering */
194 	uint8_t		part_type;	/* partition type */
195 
196 	int		count;
197 	LIST_HEAD(, ldm_component) components;
198 	LIST_ENTRY(ldm_volume)	entry;
199 };
200 
201 struct ldm_disk {
202 	uint64_t	id;		/* object id */
203 	struct uuid	guid;		/* disk guid */
204 
205 	LIST_ENTRY(ldm_disk) entry;
206 };
207 
208 #if 0
209 struct ldm_disk_group {
210 	uint64_t	id;		/* object id */
211 	struct uuid	guid;		/* disk group guid */
212 	u_char		name[32];	/* disk group name */
213 
214 	LIST_ENTRY(ldm_disk_group) entry;
215 };
216 #endif
217 
218 struct ldm_partition {
219 	uint64_t	id;		/* object id */
220 	uint64_t	disk_id;	/* disk object id */
221 	uint64_t	comp_id;	/* parent component object id */
222 	uint64_t	start;		/* offset relative to disk start */
223 	uint64_t	offset;		/* offset for spanned volumes */
224 	uint64_t	size;		/* partition size */
225 
226 	LIST_ENTRY(ldm_partition) entry;
227 };
228 
229 /*
230  * Each VBLK is 128 bytes long and has standard 16 bytes header.
231  * Some of VBLK's fields are fixed size, but others has variable size.
232  * Fields with variable size are prefixed with one byte length marker.
233  * Some fields are strings and also can have fixed size and variable.
234  * Strings with fixed size are NULL-terminated, others are not.
235  * All VBLKs have same several first fields:
236  *	Offset		Size		Description
237  *	---------------+---------------+--------------------------
238  *	0x00		16		standard VBLK header
239  *	0x10		2		update status
240  *	0x13		1		VBLK type
241  *	0x18		PS		object id
242  *	0x18+		PN		object name
243  *
244  *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
245  *  o 'P' in size column means 'prefixed' (variable-width),
246  *    'S' - string, 'N' - number.
247  */
248 #define	LDM_VBLK_SIGN		"VBLK"
249 #define	LDM_VBLK_SEQ_OFF	0x04
250 #define	LDM_VBLK_GROUP_OFF	0x08
251 #define	LDM_VBLK_INDEX_OFF	0x0c
252 #define	LDM_VBLK_COUNT_OFF	0x0e
253 #define	LDM_VBLK_TYPE_OFF	0x13
254 #define	LDM_VBLK_OID_OFF	0x18
255 struct ldm_vblkhdr {
256 	uint32_t	seq;		/* sequence number */
257 	uint32_t	group;		/* group number */
258 	uint16_t	index;		/* index in the group */
259 	uint16_t	count;		/* number of entries in the group */
260 };
261 
262 #define	LDM_VBLK_T_COMPONENT	0x32
263 #define	LDM_VBLK_T_PARTITION	0x33
264 #define	LDM_VBLK_T_DISK		0x34
265 #define	LDM_VBLK_T_DISKGROUP	0x35
266 #define	LDM_VBLK_T_DISK4	0x44
267 #define	LDM_VBLK_T_DISKGROUP4	0x45
268 #define	LDM_VBLK_T_VOLUME	0x51
269 struct ldm_vblk {
270 	uint8_t		type;		/* VBLK type */
271 	union {
272 		uint64_t		id;
273 		struct ldm_volume	vol;
274 		struct ldm_component	comp;
275 		struct ldm_disk		disk;
276 		struct ldm_partition	part;
277 #if 0
278 		struct ldm_disk_group	disk_group;
279 #endif
280 	} u;
281 	LIST_ENTRY(ldm_vblk) entry;
282 };
283 
284 /*
285  * Some VBLKs contains a bit more data than can fit into 128 bytes. These
286  * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
287  * should be placed into continuous memory buffer. We can determine xVBLK
288  * by the count field in the standard VBLK header (count > 1).
289  */
290 struct ldm_xvblk {
291 	uint32_t	group;		/* xVBLK group number */
292 	uint32_t	size;		/* the total size of xVBLK */
293 	uint8_t		map;		/* bitmask of currently saved VBLKs */
294 	u_char		*data;		/* xVBLK data */
295 
296 	LIST_ENTRY(ldm_xvblk)	entry;
297 };
298 
299 /* The internal representation of LDM database. */
300 struct ldm_db {
301 	struct ldm_privhdr		ph;	/* private header */
302 	struct ldm_tochdr		th;	/* TOC header */
303 	struct ldm_vmdbhdr		dh;	/* VMDB header */
304 
305 	LIST_HEAD(, ldm_volume)		volumes;
306 	LIST_HEAD(, ldm_disk)		disks;
307 	LIST_HEAD(, ldm_vblk)		vblks;
308 	LIST_HEAD(, ldm_xvblk)		xvblks;
309 };
310 
311 static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
312 
313 struct g_part_ldm_table {
314 	struct g_part_table	base;
315 	uint64_t		db_offset;
316 	int			is_gpt;
317 };
318 struct g_part_ldm_entry {
319 	struct g_part_entry	base;
320 	uint8_t			type;
321 };
322 
323 static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
324     struct g_part_parms *);
325 static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
326 static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
327 static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
328 static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
329     struct sbuf *, const char *);
330 static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
331 static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
332     struct g_part_parms *);
333 static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *,
334     char *, size_t);
335 static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
336 static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
337 static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *,
338     char *, size_t);
339 static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
340 
341 static kobj_method_t g_part_ldm_methods[] = {
342 	KOBJMETHOD(g_part_add,		g_part_ldm_add),
343 	KOBJMETHOD(g_part_bootcode,	g_part_ldm_bootcode),
344 	KOBJMETHOD(g_part_create,	g_part_ldm_create),
345 	KOBJMETHOD(g_part_destroy,	g_part_ldm_destroy),
346 	KOBJMETHOD(g_part_dumpconf,	g_part_ldm_dumpconf),
347 	KOBJMETHOD(g_part_dumpto,	g_part_ldm_dumpto),
348 	KOBJMETHOD(g_part_modify,	g_part_ldm_modify),
349 	KOBJMETHOD(g_part_name,		g_part_ldm_name),
350 	KOBJMETHOD(g_part_probe,	g_part_ldm_probe),
351 	KOBJMETHOD(g_part_read,		g_part_ldm_read),
352 	KOBJMETHOD(g_part_type,		g_part_ldm_type),
353 	KOBJMETHOD(g_part_write,	g_part_ldm_write),
354 	{ 0, 0 }
355 };
356 
357 static struct g_part_scheme g_part_ldm_scheme = {
358 	"LDM",
359 	g_part_ldm_methods,
360 	sizeof(struct g_part_ldm_table),
361 	.gps_entrysz = sizeof(struct g_part_ldm_entry)
362 };
363 G_PART_SCHEME_DECLARE(g_part_ldm);
364 
365 static struct g_part_ldm_alias {
366 	u_char		typ;
367 	int		alias;
368 } ldm_alias_match[] = {
369 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
370 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
371 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
372 	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
373 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
374 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
375 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
376 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
377 };
378 
379 static u_char*
380 ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
381 {
382 	struct g_provider *pp;
383 	u_char *buf;
384 
385 	pp = cp->provider;
386 	buf = g_read_data(cp, off, pp->sectorsize, error);
387 	if (buf == NULL)
388 		return (NULL);
389 
390 	if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
391 		LDM_DEBUG(1, "%s: invalid LDM private header signature",
392 		    pp->name);
393 		g_free(buf);
394 		buf = NULL;
395 		*error = EINVAL;
396 	}
397 	return (buf);
398 }
399 
400 static int
401 ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
402     const u_char *buf)
403 {
404 	uint32_t version;
405 	int error;
406 
407 	memset(hdr, 0, sizeof(*hdr));
408 	version = be32dec(buf + LDM_PH_VERSION_OFF);
409 	if (version != LDM_VERSION_2K &&
410 	    version != LDM_VERSION_VISTA) {
411 		LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
412 		    cp->provider->name, version >> 16,
413 		    version & 0xFFFF);
414 		return (ENXIO);
415 	}
416 	error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
417 	if (error != 0)
418 		return (error);
419 	error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
420 	if (error != 0)
421 		return (error);
422 	strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
423 	hdr->start = be64dec(buf + LDM_PH_START_OFF);
424 	hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
425 	hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
426 	hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
427 	hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
428 	hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
429 	hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
430 	hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
431 	return (0);
432 }
433 
434 static int
435 ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
436 {
437 	struct g_consumer *cp2;
438 	struct g_provider *pp;
439 	struct ldm_privhdr hdr;
440 	uint64_t offset, last;
441 	int error, found, i;
442 	u_char *buf;
443 
444 	pp = cp->provider;
445 	if (is_gpt) {
446 		/*
447 		 * The last LBA is used in several checks below, for the
448 		 * GPT case it should be calculated relative to the whole
449 		 * disk.
450 		 */
451 		cp2 = LIST_FIRST(&pp->geom->consumer);
452 		last =
453 		    cp2->provider->mediasize / cp2->provider->sectorsize - 1;
454 	} else
455 		last = pp->mediasize / pp->sectorsize - 1;
456 	for (found = 0, i = is_gpt; i < nitems(ldm_ph_off); i++) {
457 		offset = ldm_ph_off[i];
458 		/*
459 		 * In the GPT case consumer is attached to the LDM metadata
460 		 * partition and we don't need add db_offset.
461 		 */
462 		if (!is_gpt)
463 			offset += db->ph.db_offset;
464 		if (i == LDM_PH_MBRINDEX) {
465 			/*
466 			 * Prepare to errors and setup new base offset
467 			 * to read backup private headers. Assume that LDM
468 			 * database is in the last 1Mbyte area.
469 			 */
470 			db->ph.db_offset = last - LDM_DB_SIZE;
471 		}
472 		buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
473 		if (buf == NULL) {
474 			LDM_DEBUG(1, "%s: failed to read private header "
475 			    "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
476 			continue;
477 		}
478 		error = ldm_privhdr_parse(cp, &hdr, buf);
479 		if (error != 0) {
480 			LDM_DEBUG(1, "%s: failed to parse private "
481 			    "header %d", pp->name, i);
482 			LDM_DUMP(buf, pp->sectorsize);
483 			g_free(buf);
484 			continue;
485 		}
486 		g_free(buf);
487 		if (hdr.start > last ||
488 		    hdr.start + hdr.size - 1 > last ||
489 		    (hdr.start + hdr.size - 1 > hdr.db_offset && !is_gpt) ||
490 		    hdr.db_size != LDM_DB_SIZE ||
491 		    hdr.db_offset + LDM_DB_SIZE - 1 > last ||
492 		    hdr.th_offset[0] >= LDM_DB_SIZE ||
493 		    hdr.th_offset[1] >= LDM_DB_SIZE ||
494 		    hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
495 			LDM_DEBUG(1, "%s: invalid values in the "
496 			    "private header %d", pp->name, i);
497 			LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
498 			    "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
499 			    "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
500 			    "last: %jd", pp->name, hdr.start, hdr.size,
501 			    hdr.db_offset, hdr.db_size, hdr.th_offset[0],
502 			    hdr.th_offset[1], hdr.conf_size, hdr.log_size,
503 			    last);
504 			continue;
505 		}
506 		if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
507 			LDM_DEBUG(0, "%s: private headers are not equal",
508 			    pp->name);
509 			if (i > 1) {
510 				/*
511 				 * We have different headers in the LDM.
512 				 * We can not trust this metadata.
513 				 */
514 				LDM_DEBUG(0, "%s: refuse LDM metadata",
515 				    pp->name);
516 				return (EINVAL);
517 			}
518 			/*
519 			 * We already have read primary private header
520 			 * and it differs from this backup one.
521 			 * Prefer the backup header and save it.
522 			 */
523 			found = 0;
524 		}
525 		if (found == 0)
526 			memcpy(&db->ph, &hdr, sizeof(hdr));
527 		found = 1;
528 	}
529 	if (found == 0) {
530 		LDM_DEBUG(1, "%s: valid LDM private header not found",
531 		    pp->name);
532 		return (ENXIO);
533 	}
534 	return (0);
535 }
536 
537 static int
538 ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
539 {
540 	struct g_part_table *gpt;
541 	struct g_part_entry *e;
542 	struct g_consumer *cp2;
543 	int error;
544 
545 	cp2 = LIST_NEXT(cp, consumer);
546 	g_topology_lock();
547 	gpt = cp->provider->geom->softc;
548 	error = 0;
549 	LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
550 		if (cp->provider == e->gpe_pp) {
551 			/* ms-ldm-metadata partition */
552 			if (e->gpe_start != db->ph.db_offset ||
553 			    e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
554 				error++;
555 		} else if (cp2->provider == e->gpe_pp) {
556 			/* ms-ldm-data partition */
557 			if (e->gpe_start != db->ph.start ||
558 			    e->gpe_end != db->ph.start + db->ph.size - 1)
559 				error++;
560 		}
561 		if (error != 0) {
562 			LDM_DEBUG(0, "%s: GPT partition %d boundaries "
563 			    "do not match with the LDM metadata",
564 			    e->gpe_pp->name, e->gpe_index);
565 			error = ENXIO;
566 			break;
567 		}
568 	}
569 	g_topology_unlock();
570 	return (error);
571 }
572 
573 static int
574 ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
575 {
576 	struct g_provider *pp;
577 	struct ldm_tochdr hdr;
578 	uint64_t offset, conf_size, log_size;
579 	int error, found, i;
580 	u_char *buf;
581 
582 	pp = cp->provider;
583 	for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
584 		offset = db->ph.db_offset + db->ph.th_offset[i];
585 		buf = g_read_data(cp,
586 		    offset * pp->sectorsize, pp->sectorsize, &error);
587 		if (buf == NULL) {
588 			LDM_DEBUG(1, "%s: failed to read TOC header "
589 			    "at LBA %ju", pp->name, (uintmax_t)offset);
590 			continue;
591 		}
592 		if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
593 		    memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
594 		    strlen(LDM_TH_NAME1)) != 0 ||
595 		    memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
596 		    strlen(LDM_TH_NAME2)) != 0) {
597 			LDM_DEBUG(1, "%s: failed to parse TOC header "
598 			    "at LBA %ju", pp->name, (uintmax_t)offset);
599 			LDM_DUMP(buf, pp->sectorsize);
600 			g_free(buf);
601 			continue;
602 		}
603 		hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
604 		hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
605 		conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
606 		log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
607 		if (conf_size != db->ph.conf_size ||
608 		    hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
609 		    log_size != db->ph.log_size ||
610 		    hdr.log_offset + log_size >= LDM_DB_SIZE) {
611 			LDM_DEBUG(1, "%s: invalid values in the "
612 			    "TOC header at LBA %ju", pp->name,
613 			    (uintmax_t)offset);
614 			LDM_DUMP(buf, pp->sectorsize);
615 			g_free(buf);
616 			continue;
617 		}
618 		g_free(buf);
619 		if (found == 0)
620 			memcpy(&db->th, &hdr, sizeof(hdr));
621 		found = 1;
622 	}
623 	if (found == 0) {
624 		LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
625 		    pp->name);
626 		return (ENXIO);
627 	}
628 	return (0);
629 }
630 
631 static int
632 ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
633 {
634 	struct g_provider *pp;
635 	struct uuid dg_guid;
636 	uint64_t offset;
637 	uint32_t version;
638 	int error;
639 	u_char *buf;
640 
641 	pp = cp->provider;
642 	offset = db->ph.db_offset + db->th.conf_offset;
643 	buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
644 	    &error);
645 	if (buf == NULL) {
646 		LDM_DEBUG(0, "%s: failed to read VMDB header at "
647 		    "LBA %ju", pp->name, (uintmax_t)offset);
648 		return (error);
649 	}
650 	if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
651 		g_free(buf);
652 		LDM_DEBUG(0, "%s: failed to parse VMDB header at "
653 		    "LBA %ju", pp->name, (uintmax_t)offset);
654 		return (ENXIO);
655 	}
656 	/* Check version. */
657 	version = be32dec(buf + LDM_DB_VERSION_OFF);
658 	if (version != 0x4000A) {
659 		g_free(buf);
660 		LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
661 		    pp->name, version >> 16, version & 0xFFFF);
662 		return (ENXIO);
663 	}
664 	/*
665 	 * Check VMDB update status:
666 	 *	1 - in a consistent state;
667 	 *	2 - in a creation phase;
668 	 *	3 - in a deletion phase;
669 	 */
670 	if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
671 		g_free(buf);
672 		LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
673 		    pp->name);
674 		return (ENXIO);
675 	}
676 	db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
677 	db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
678 	error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
679 	/* Compare disk group name and guid from VMDB and private headers */
680 	if (error != 0 || db->dh.size == 0 ||
681 	    pp->sectorsize % db->dh.size != 0 ||
682 	    strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
683 	    memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
684 	    db->dh.size * db->dh.last_seq >
685 	    db->ph.conf_size * pp->sectorsize) {
686 		LDM_DEBUG(0, "%s: invalid values in the VMDB header",
687 		    pp->name);
688 		LDM_DUMP(buf, pp->sectorsize);
689 		g_free(buf);
690 		return (EINVAL);
691 	}
692 	g_free(buf);
693 	return (0);
694 }
695 
696 static int
697 ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
698 {
699 	struct ldm_xvblk *blk;
700 	size_t size;
701 
702 	size = db->dh.size - 16;
703 	LIST_FOREACH(blk, &db->xvblks, entry)
704 		if (blk->group == vh->group)
705 			break;
706 	if (blk == NULL) {
707 		blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
708 		blk->group = vh->group;
709 		blk->size = size * vh->count + 16;
710 		blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
711 		blk->map = 0xFF << vh->count;
712 		LIST_INSERT_HEAD(&db->xvblks, blk, entry);
713 	}
714 	if ((blk->map & (1 << vh->index)) != 0) {
715 		/* Block with given index has been already saved. */
716 		return (EINVAL);
717 	}
718 	/* Copy the data block to the place related to index. */
719 	memcpy(blk->data + size * vh->index + 16, p + 16, size);
720 	blk->map |= 1 << vh->index;
721 	return (0);
722 }
723 
724 /* Read the variable-width numeric field and return new offset */
725 static int
726 ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
727 {
728 	uint64_t num;
729 	uint8_t len;
730 
731 	len = buf[offset++];
732 	if (len > sizeof(uint64_t) || len + offset >= range)
733 		return (-1);
734 	for (num = 0; len > 0; len--)
735 		num = (num << 8) | buf[offset++];
736 	*result = num;
737 	return (offset);
738 }
739 
740 /* Read the variable-width string and return new offset */
741 static int
742 ldm_vstr_get(const u_char *buf, int offset, u_char *result,
743     size_t maxlen, size_t range)
744 {
745 	uint8_t len;
746 
747 	len = buf[offset++];
748 	if (len >= maxlen || len + offset >= range)
749 		return (-1);
750 	memcpy(result, buf + offset, len);
751 	result[len] = '\0';
752 	return (offset + len);
753 }
754 
755 /* Just skip the variable-width variable and return new offset */
756 static int
757 ldm_vparm_skip(const u_char *buf, int offset, size_t range)
758 {
759 	uint8_t len;
760 
761 	len = buf[offset++];
762 	if (offset + len >= range)
763 		return (-1);
764 
765 	return (offset + len);
766 }
767 
768 static int
769 ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
770 {
771 	struct ldm_vblk *blk;
772 	struct ldm_volume *volume, *last;
773 	const char *errstr;
774 	u_char vstr[64];
775 	int error, offset;
776 
777 	blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
778 	blk->type = p[LDM_VBLK_TYPE_OFF];
779 	offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
780 	if (offset < 0) {
781 		errstr = "object id";
782 		goto fail;
783 	}
784 	offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
785 	if (offset < 0) {
786 		errstr = "object name";
787 		goto fail;
788 	}
789 	switch (blk->type) {
790 	/*
791 	 * Component VBLK fields:
792 	 * Offset	Size	Description
793 	 * ------------+-------+------------------------
794 	 *  0x18+	PS	volume state
795 	 *  0x18+5	PN	component children count
796 	 *  0x1D+16	PN	parent's volume object id
797 	 *  0x2D+1	PN	stripe size
798 	 */
799 	case LDM_VBLK_T_COMPONENT:
800 		offset = ldm_vparm_skip(p, offset, size);
801 		if (offset < 0) {
802 			errstr = "volume state";
803 			goto fail;
804 		}
805 		offset = ldm_vparm_skip(p, offset + 5, size);
806 		if (offset < 0) {
807 			errstr = "children count";
808 			goto fail;
809 		}
810 		offset = ldm_vnum_get(p, offset + 16,
811 		    &blk->u.comp.vol_id, size);
812 		if (offset < 0) {
813 			errstr = "volume id";
814 			goto fail;
815 		}
816 		break;
817 	/*
818 	 * Partition VBLK fields:
819 	 * Offset	Size	Description
820 	 * ------------+-------+------------------------
821 	 *  0x18+12	8	partition start offset
822 	 *  0x18+20	8	volume offset
823 	 *  0x18+28	PN	partition size
824 	 *  0x34+	PN	parent's component object id
825 	 *  0x34+	PN	disk's object id
826 	 */
827 	case LDM_VBLK_T_PARTITION:
828 		if (offset + 28 >= size) {
829 			errstr = "too small buffer";
830 			goto fail;
831 		}
832 		blk->u.part.start = be64dec(p + offset + 12);
833 		blk->u.part.offset = be64dec(p + offset + 20);
834 		offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
835 		if (offset < 0) {
836 			errstr = "partition size";
837 			goto fail;
838 		}
839 		offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
840 		if (offset < 0) {
841 			errstr = "component id";
842 			goto fail;
843 		}
844 		offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
845 		if (offset < 0) {
846 			errstr = "disk id";
847 			goto fail;
848 		}
849 		break;
850 	/*
851 	 * Disk VBLK fields:
852 	 * Offset	Size	Description
853 	 * ------------+-------+------------------------
854 	 *  0x18+	PS	disk GUID
855 	 */
856 	case LDM_VBLK_T_DISK:
857 		errstr = "disk guid";
858 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
859 		if (offset < 0)
860 			goto fail;
861 		error = parse_uuid(vstr, &blk->u.disk.guid);
862 		if (error != 0)
863 			goto fail;
864 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
865 		break;
866 	/*
867 	 * Disk group VBLK fields:
868 	 * Offset	Size	Description
869 	 * ------------+-------+------------------------
870 	 *  0x18+	PS	disk group GUID
871 	 */
872 	case LDM_VBLK_T_DISKGROUP:
873 #if 0
874 		strncpy(blk->u.disk_group.name, vstr,
875 		    sizeof(blk->u.disk_group.name));
876 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
877 		if (offset < 0) {
878 			errstr = "disk group guid";
879 			goto fail;
880 		}
881 		error = parse_uuid(name, &blk->u.disk_group.guid);
882 		if (error != 0) {
883 			errstr = "disk group guid";
884 			goto fail;
885 		}
886 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
887 #endif
888 		break;
889 	/*
890 	 * Disk VBLK fields:
891 	 * Offset	Size	Description
892 	 * ------------+-------+------------------------
893 	 *  0x18+	16	disk GUID
894 	 */
895 	case LDM_VBLK_T_DISK4:
896 		be_uuid_dec(p + offset, &blk->u.disk.guid);
897 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
898 		break;
899 	/*
900 	 * Disk group VBLK fields:
901 	 * Offset	Size	Description
902 	 * ------------+-------+------------------------
903 	 *  0x18+	16	disk GUID
904 	 */
905 	case LDM_VBLK_T_DISKGROUP4:
906 #if 0
907 		strncpy(blk->u.disk_group.name, vstr,
908 		    sizeof(blk->u.disk_group.name));
909 		be_uuid_dec(p + offset, &blk->u.disk.guid);
910 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
911 #endif
912 		break;
913 	/*
914 	 * Volume VBLK fields:
915 	 * Offset	Size	Description
916 	 * ------------+-------+------------------------
917 	 *  0x18+	PS	volume type
918 	 *  0x18+	PS	unknown
919 	 *  0x18+	14(S)	volume state
920 	 *  0x18+16	1	volume number
921 	 *  0x18+21	PN	volume children count
922 	 *  0x2D+16	PN	volume size
923 	 *  0x3D+4	1	partition type
924 	 */
925 	case LDM_VBLK_T_VOLUME:
926 		offset = ldm_vparm_skip(p, offset, size);
927 		if (offset < 0) {
928 			errstr = "volume type";
929 			goto fail;
930 		}
931 		offset = ldm_vparm_skip(p, offset, size);
932 		if (offset < 0) {
933 			errstr = "unknown param";
934 			goto fail;
935 		}
936 		if (offset + 21 >= size) {
937 			errstr = "too small buffer";
938 			goto fail;
939 		}
940 		blk->u.vol.number = p[offset + 16];
941 		offset = ldm_vparm_skip(p, offset + 21, size);
942 		if (offset < 0) {
943 			errstr = "children count";
944 			goto fail;
945 		}
946 		offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
947 		if (offset < 0) {
948 			errstr = "volume size";
949 			goto fail;
950 		}
951 		if (offset + 4 >= size) {
952 			errstr = "too small buffer";
953 			goto fail;
954 		}
955 		blk->u.vol.part_type = p[offset + 4];
956 		/* keep volumes ordered by volume number */
957 		last = NULL;
958 		LIST_FOREACH(volume, &db->volumes, entry) {
959 			if (volume->number > blk->u.vol.number)
960 				break;
961 			last = volume;
962 		}
963 		if (last != NULL)
964 			LIST_INSERT_AFTER(last, &blk->u.vol, entry);
965 		else
966 			LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
967 		break;
968 	default:
969 		LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
970 		LDM_DUMP(p, size);
971 	}
972 	LIST_INSERT_HEAD(&db->vblks, blk, entry);
973 	return (0);
974 fail:
975 	LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
976 	    errstr, blk->type);
977 	LDM_DUMP(p, size);
978 	g_free(blk);
979 	return (EINVAL);
980 }
981 
982 static void
983 ldm_vmdb_free(struct ldm_db *db)
984 {
985 	struct ldm_vblk *vblk;
986 	struct ldm_xvblk *xvblk;
987 
988 	while (!LIST_EMPTY(&db->xvblks)) {
989 		xvblk = LIST_FIRST(&db->xvblks);
990 		LIST_REMOVE(xvblk, entry);
991 		g_free(xvblk->data);
992 		g_free(xvblk);
993 	}
994 	while (!LIST_EMPTY(&db->vblks)) {
995 		vblk = LIST_FIRST(&db->vblks);
996 		LIST_REMOVE(vblk, entry);
997 		g_free(vblk);
998 	}
999 }
1000 
1001 static int
1002 ldm_vmdb_parse(struct ldm_db *db, struct g_consumer *cp)
1003 {
1004 	struct g_provider *pp;
1005 	struct ldm_vblk *vblk;
1006 	struct ldm_xvblk *xvblk;
1007 	struct ldm_volume *volume;
1008 	struct ldm_component *comp;
1009 	struct ldm_vblkhdr vh;
1010 	u_char *buf, *p;
1011 	size_t size, n, sectors;
1012 	uint64_t offset;
1013 	int error;
1014 
1015 	pp = cp->provider;
1016 	size = howmany(db->dh.last_seq * db->dh.size, pp->sectorsize);
1017 	size -= 1; /* one sector takes vmdb header */
1018 	for (n = 0; n < size; n += MAXPHYS / pp->sectorsize) {
1019 		offset = db->ph.db_offset + db->th.conf_offset + n + 1;
1020 		sectors = (size - n) > (MAXPHYS / pp->sectorsize) ?
1021 		    MAXPHYS / pp->sectorsize: size - n;
1022 		/* read VBLKs */
1023 		buf = g_read_data(cp, offset * pp->sectorsize,
1024 		    sectors * pp->sectorsize, &error);
1025 		if (buf == NULL) {
1026 			LDM_DEBUG(0, "%s: failed to read VBLK\n",
1027 			    pp->name);
1028 			goto fail;
1029 		}
1030 		for (p = buf; p < buf + sectors * pp->sectorsize;
1031 		    p += db->dh.size) {
1032 			if (memcmp(p, LDM_VBLK_SIGN,
1033 			    strlen(LDM_VBLK_SIGN)) != 0) {
1034 				LDM_DEBUG(0, "%s: no VBLK signature\n",
1035 				    pp->name);
1036 				LDM_DUMP(p, db->dh.size);
1037 				goto fail;
1038 			}
1039 			vh.seq = be32dec(p + LDM_VBLK_SEQ_OFF);
1040 			vh.group = be32dec(p + LDM_VBLK_GROUP_OFF);
1041 			/* skip empty blocks */
1042 			if (vh.seq == 0 || vh.group == 0)
1043 				continue;
1044 			vh.index = be16dec(p + LDM_VBLK_INDEX_OFF);
1045 			vh.count = be16dec(p + LDM_VBLK_COUNT_OFF);
1046 			if (vh.count == 0 || vh.count > 4 ||
1047 			    vh.seq > db->dh.last_seq) {
1048 				LDM_DEBUG(0, "%s: invalid values "
1049 				    "in the VBLK header\n", pp->name);
1050 				LDM_DUMP(p, db->dh.size);
1051 				goto fail;
1052 			}
1053 			if (vh.count > 1) {
1054 				error = ldm_xvblk_handle(db, &vh, p);
1055 				if (error != 0) {
1056 					LDM_DEBUG(0, "%s: xVBLK "
1057 					    "is corrupted\n", pp->name);
1058 					LDM_DUMP(p, db->dh.size);
1059 					goto fail;
1060 				}
1061 				continue;
1062 			}
1063 			if (be16dec(p + 16) != 0)
1064 				LDM_DEBUG(1, "%s: VBLK update"
1065 				    " status is %u\n", pp->name,
1066 				    be16dec(p + 16));
1067 			error = ldm_vblk_handle(db, p, db->dh.size);
1068 			if (error != 0)
1069 				goto fail;
1070 		}
1071 		g_free(buf);
1072 		buf = NULL;
1073 	}
1074 	/* Parse xVBLKs */
1075 	while (!LIST_EMPTY(&db->xvblks)) {
1076 		xvblk = LIST_FIRST(&db->xvblks);
1077 		if (xvblk->map == 0xFF) {
1078 			error = ldm_vblk_handle(db, xvblk->data, xvblk->size);
1079 			if (error != 0)
1080 				goto fail;
1081 		} else {
1082 			LDM_DEBUG(0, "%s: incomplete or corrupt "
1083 			    "xVBLK found\n", pp->name);
1084 			goto fail;
1085 		}
1086 		LIST_REMOVE(xvblk, entry);
1087 		g_free(xvblk->data);
1088 		g_free(xvblk);
1089 	}
1090 	/* construct all VBLKs relations */
1091 	LIST_FOREACH(volume, &db->volumes, entry) {
1092 		LIST_FOREACH(vblk, &db->vblks, entry)
1093 			if (vblk->type == LDM_VBLK_T_COMPONENT &&
1094 			    vblk->u.comp.vol_id == volume->id) {
1095 				LIST_INSERT_HEAD(&volume->components,
1096 				    &vblk->u.comp, entry);
1097 				volume->count++;
1098 			}
1099 		LIST_FOREACH(comp, &volume->components, entry)
1100 			LIST_FOREACH(vblk, &db->vblks, entry)
1101 				if (vblk->type == LDM_VBLK_T_PARTITION &&
1102 				    vblk->u.part.comp_id == comp->id) {
1103 					LIST_INSERT_HEAD(&comp->partitions,
1104 					    &vblk->u.part, entry);
1105 					comp->count++;
1106 				}
1107 	}
1108 	return (0);
1109 fail:
1110 	ldm_vmdb_free(db);
1111 	g_free(buf);
1112 	return (ENXIO);
1113 }
1114 
1115 static int
1116 g_part_ldm_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
1117     struct g_part_parms *gpp)
1118 {
1119 
1120 	return (ENOSYS);
1121 }
1122 
1123 static int
1124 g_part_ldm_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
1125 {
1126 
1127 	return (ENOSYS);
1128 }
1129 
1130 static int
1131 g_part_ldm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
1132 {
1133 
1134 	return (ENOSYS);
1135 }
1136 
1137 static int
1138 g_part_ldm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
1139 {
1140 	struct g_part_ldm_table *table;
1141 	struct g_provider *pp;
1142 
1143 	table = (struct g_part_ldm_table *)basetable;
1144 	/*
1145 	 * To destroy LDM on a disk partitioned with GPT we should delete
1146 	 * ms-ldm-metadata partition, but we can't do this via standard
1147 	 * GEOM_PART method.
1148 	 */
1149 	if (table->is_gpt)
1150 		return (ENOSYS);
1151 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
1152 	/*
1153 	 * To destroy LDM we should wipe MBR, first private header and
1154 	 * backup private headers.
1155 	 */
1156 	basetable->gpt_smhead = (1 << ldm_ph_off[0]) | 1;
1157 	/*
1158 	 * Don't touch last backup private header when LDM database is
1159 	 * not located in the last 1MByte area.
1160 	 * XXX: can't remove all blocks.
1161 	 */
1162 	if (table->db_offset + LDM_DB_SIZE ==
1163 	    pp->mediasize / pp->sectorsize)
1164 		basetable->gpt_smtail = 1;
1165 	return (0);
1166 }
1167 
1168 static void
1169 g_part_ldm_dumpconf(struct g_part_table *basetable,
1170     struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
1171 {
1172 	struct g_part_ldm_entry *entry;
1173 
1174 	entry = (struct g_part_ldm_entry *)baseentry;
1175 	if (indent == NULL) {
1176 		/* conftxt: libdisk compatibility */
1177 		sbuf_printf(sb, " xs LDM xt %u", entry->type);
1178 	} else if (entry != NULL) {
1179 		/* confxml: partition entry information */
1180 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
1181 		    entry->type);
1182 	} else {
1183 		/* confxml: scheme information */
1184 	}
1185 }
1186 
1187 static int
1188 g_part_ldm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
1189 {
1190 
1191 	return (0);
1192 }
1193 
1194 static int
1195 g_part_ldm_modify(struct g_part_table *basetable,
1196     struct g_part_entry *baseentry, struct g_part_parms *gpp)
1197 {
1198 
1199 	return (ENOSYS);
1200 }
1201 
1202 static const char *
1203 g_part_ldm_name(struct g_part_table *table, struct g_part_entry *baseentry,
1204     char *buf, size_t bufsz)
1205 {
1206 
1207 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
1208 	return (buf);
1209 }
1210 
1211 static int
1212 ldm_gpt_probe(struct g_part_table *basetable, struct g_consumer *cp)
1213 {
1214 	struct g_part_ldm_table *table;
1215 	struct g_part_table *gpt;
1216 	struct g_part_entry *entry;
1217 	struct g_consumer *cp2;
1218 	struct gpt_ent *part;
1219 	u_char *buf;
1220 	int error;
1221 
1222 	/*
1223 	 * XXX: We use some knowledge about GEOM_PART_GPT internal
1224 	 * structures, but it is easier than parse GPT by himself.
1225 	 */
1226 	g_topology_lock();
1227 	gpt = cp->provider->geom->softc;
1228 	LIST_FOREACH(entry, &gpt->gpt_entry, gpe_entry) {
1229 		part = (struct gpt_ent *)(entry + 1);
1230 		/* Search ms-ldm-metadata partition */
1231 		if (memcmp(&part->ent_type,
1232 		    &gpt_uuid_ms_ldm_metadata, sizeof(struct uuid)) != 0 ||
1233 		    entry->gpe_end - entry->gpe_start < LDM_DB_SIZE - 1)
1234 			continue;
1235 
1236 		/* Create new consumer and attach it to metadata partition */
1237 		cp2 = g_new_consumer(cp->geom);
1238 		error = g_attach(cp2, entry->gpe_pp);
1239 		if (error != 0) {
1240 			g_destroy_consumer(cp2);
1241 			g_topology_unlock();
1242 			return (ENXIO);
1243 		}
1244 		error = g_access(cp2, 1, 0, 0);
1245 		if (error != 0) {
1246 			g_detach(cp2);
1247 			g_destroy_consumer(cp2);
1248 			g_topology_unlock();
1249 			return (ENXIO);
1250 		}
1251 		g_topology_unlock();
1252 
1253 		LDM_DEBUG(2, "%s: LDM metadata partition %s found in the GPT",
1254 		    cp->provider->name, cp2->provider->name);
1255 		/* Read the LDM private header */
1256 		buf = ldm_privhdr_read(cp2,
1257 		    ldm_ph_off[LDM_PH_GPTINDEX] * cp2->provider->sectorsize,
1258 		    &error);
1259 		if (buf != NULL) {
1260 			table = (struct g_part_ldm_table *)basetable;
1261 			table->is_gpt = 1;
1262 			g_free(buf);
1263 			return (G_PART_PROBE_PRI_HIGH);
1264 		}
1265 
1266 		/* second consumer is no longer needed. */
1267 		g_topology_lock();
1268 		g_access(cp2, -1, 0, 0);
1269 		g_detach(cp2);
1270 		g_destroy_consumer(cp2);
1271 		break;
1272 	}
1273 	g_topology_unlock();
1274 	return (ENXIO);
1275 }
1276 
1277 static int
1278 g_part_ldm_probe(struct g_part_table *basetable, struct g_consumer *cp)
1279 {
1280 	struct g_provider *pp;
1281 	u_char *buf, type[64];
1282 	int error, idx;
1283 
1284 
1285 	pp = cp->provider;
1286 	if (pp->sectorsize != 512)
1287 		return (ENXIO);
1288 
1289 	error = g_getattr("PART::scheme", cp, &type);
1290 	if (error == 0 && strcmp(type, "GPT") == 0) {
1291 		if (g_getattr("PART::type", cp, &type) != 0 ||
1292 		    strcmp(type, "ms-ldm-data") != 0)
1293 			return (ENXIO);
1294 		error = ldm_gpt_probe(basetable, cp);
1295 		return (error);
1296 	}
1297 
1298 	if (basetable->gpt_depth != 0)
1299 		return (ENXIO);
1300 
1301 	/* LDM has 1M metadata area */
1302 	if (pp->mediasize <= 1024 * 1024)
1303 		return (ENOSPC);
1304 
1305 	/* Check that there's a MBR */
1306 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
1307 	if (buf == NULL)
1308 		return (error);
1309 
1310 	if (le16dec(buf + DOSMAGICOFFSET) != DOSMAGIC) {
1311 		g_free(buf);
1312 		return (ENXIO);
1313 	}
1314 	error = ENXIO;
1315 	/* Check that we have LDM partitions in the MBR */
1316 	for (idx = 0; idx < NDOSPART && error != 0; idx++) {
1317 		if (buf[DOSPARTOFF + idx * DOSPARTSIZE + 4] == DOSPTYP_LDM)
1318 			error = 0;
1319 	}
1320 	g_free(buf);
1321 	if (error == 0) {
1322 		LDM_DEBUG(2, "%s: LDM data partitions found in MBR",
1323 		    pp->name);
1324 		/* Read the LDM private header */
1325 		buf = ldm_privhdr_read(cp,
1326 		    ldm_ph_off[LDM_PH_MBRINDEX] * pp->sectorsize, &error);
1327 		if (buf == NULL)
1328 			return (error);
1329 		g_free(buf);
1330 		return (G_PART_PROBE_PRI_HIGH);
1331 	}
1332 	return (error);
1333 }
1334 
1335 static int
1336 g_part_ldm_read(struct g_part_table *basetable, struct g_consumer *cp)
1337 {
1338 	struct g_part_ldm_table *table;
1339 	struct g_part_ldm_entry *entry;
1340 	struct g_consumer *cp2;
1341 	struct ldm_component *comp;
1342 	struct ldm_partition *part;
1343 	struct ldm_volume *vol;
1344 	struct ldm_disk *disk;
1345 	struct ldm_db db;
1346 	int error, index, skipped;
1347 
1348 	table = (struct g_part_ldm_table *)basetable;
1349 	memset(&db, 0, sizeof(db));
1350 	cp2 = cp;					/* ms-ldm-data */
1351 	if (table->is_gpt)
1352 		cp = LIST_FIRST(&cp->geom->consumer);	/* ms-ldm-metadata */
1353 	/* Read and parse LDM private headers. */
1354 	error = ldm_privhdr_check(&db, cp, table->is_gpt);
1355 	if (error != 0)
1356 		goto gpt_cleanup;
1357 	basetable->gpt_first = table->is_gpt ? 0: db.ph.start;
1358 	basetable->gpt_last = basetable->gpt_first + db.ph.size - 1;
1359 	table->db_offset = db.ph.db_offset;
1360 	/* Make additional checks for GPT */
1361 	if (table->is_gpt) {
1362 		error = ldm_gpt_check(&db, cp);
1363 		if (error != 0)
1364 			goto gpt_cleanup;
1365 		/*
1366 		 * Now we should reset database offset to zero, because our
1367 		 * consumer cp is attached to the ms-ldm-metadata partition
1368 		 * and we don't need add db_offset to read from it.
1369 		 */
1370 		db.ph.db_offset = 0;
1371 	}
1372 	/* Read and parse LDM TOC headers. */
1373 	error = ldm_tochdr_check(&db, cp);
1374 	if (error != 0)
1375 		goto gpt_cleanup;
1376 	/* Read and parse LDM VMDB header. */
1377 	error = ldm_vmdbhdr_check(&db, cp);
1378 	if (error != 0)
1379 		goto gpt_cleanup;
1380 	error = ldm_vmdb_parse(&db, cp);
1381 	/*
1382 	 * For the GPT case we must detach and destroy
1383 	 * second consumer before return.
1384 	 */
1385 gpt_cleanup:
1386 	if (table->is_gpt) {
1387 		g_topology_lock();
1388 		g_access(cp, -1, 0, 0);
1389 		g_detach(cp);
1390 		g_destroy_consumer(cp);
1391 		g_topology_unlock();
1392 		cp = cp2;
1393 	}
1394 	if (error != 0)
1395 		return (error);
1396 	/* Search current disk in the disk list. */
1397 	LIST_FOREACH(disk, &db.disks, entry)
1398 	    if (memcmp(&disk->guid, &db.ph.disk_guid,
1399 		sizeof(struct uuid)) == 0)
1400 		    break;
1401 	if (disk == NULL) {
1402 		LDM_DEBUG(1, "%s: no LDM volumes on this disk",
1403 		    cp->provider->name);
1404 		ldm_vmdb_free(&db);
1405 		return (ENXIO);
1406 	}
1407 	index = 1;
1408 	LIST_FOREACH(vol, &db.volumes, entry) {
1409 		LIST_FOREACH(comp, &vol->components, entry) {
1410 			/* Skip volumes from different disks. */
1411 			part = LIST_FIRST(&comp->partitions);
1412 			if (part->disk_id != disk->id)
1413 				continue;
1414 			skipped = 0;
1415 			/* We don't support spanned and striped volumes. */
1416 			if (comp->count > 1 || part->offset != 0) {
1417 				LDM_DEBUG(1, "%s: LDM volume component "
1418 				    "%ju has %u partitions. Skipped",
1419 				    cp->provider->name, (uintmax_t)comp->id,
1420 				    comp->count);
1421 				skipped = 1;
1422 			}
1423 			/*
1424 			 * Allow mirrored volumes only when they are explicitly
1425 			 * allowed with kern.geom.part.ldm.show_mirrors=1.
1426 			 */
1427 			if (vol->count > 1 && show_mirrors == 0) {
1428 				LDM_DEBUG(1, "%s: LDM volume %ju has %u "
1429 				    "components. Skipped",
1430 				    cp->provider->name, (uintmax_t)vol->id,
1431 				    vol->count);
1432 				skipped = 1;
1433 			}
1434 			entry = (struct g_part_ldm_entry *)g_part_new_entry(
1435 			    basetable, index++,
1436 			    basetable->gpt_first + part->start,
1437 			    basetable->gpt_first + part->start +
1438 			    part->size - 1);
1439 			/*
1440 			 * Mark skipped partition as ms-ldm-data partition.
1441 			 * We do not support them, but it is better to show
1442 			 * that we have something there, than just show
1443 			 * free space.
1444 			 */
1445 			if (skipped == 0)
1446 				entry->type = vol->part_type;
1447 			else
1448 				entry->type = DOSPTYP_LDM;
1449 			LDM_DEBUG(1, "%s: new volume id: %ju, start: %ju,"
1450 			    " end: %ju, type: 0x%02x\n", cp->provider->name,
1451 			    (uintmax_t)part->id,(uintmax_t)part->start +
1452 			    basetable->gpt_first, (uintmax_t)part->start +
1453 			    part->size + basetable->gpt_first - 1,
1454 			    vol->part_type);
1455 		}
1456 	}
1457 	ldm_vmdb_free(&db);
1458 	return (error);
1459 }
1460 
1461 static const char *
1462 g_part_ldm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
1463     char *buf, size_t bufsz)
1464 {
1465 	struct g_part_ldm_entry *entry;
1466 	int i;
1467 
1468 	entry = (struct g_part_ldm_entry *)baseentry;
1469 	for (i = 0; i < nitems(ldm_alias_match); i++) {
1470 		if (ldm_alias_match[i].typ == entry->type)
1471 			return (g_part_alias_name(ldm_alias_match[i].alias));
1472 	}
1473 	snprintf(buf, bufsz, "!%d", entry->type);
1474 	return (buf);
1475 }
1476 
1477 static int
1478 g_part_ldm_write(struct g_part_table *basetable, struct g_consumer *cp)
1479 {
1480 
1481 	return (ENOSYS);
1482 }
1483