xref: /titanic_52/usr/src/uts/sun4v/io/vdsk_common.c (revision b02e9a2d4d2071d770e5aa9ae8f83f2bbe1f2ced)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/crc32.h>
30 #include <sys/cred.h>
31 #include <sys/ddi.h>
32 #include <sys/dkio.h>
33 #include <sys/file.h>
34 #include <sys/kmem.h>
35 #include <sys/sunddi.h>
36 #include <sys/sunldi.h>
37 #include <sys/types.h>
38 #include <sys/varargs.h>
39 #include <sys/vtoc.h>
40 
41 #include <sys/vdsk_common.h>
42 
43 /*
44  * Hooks for EFI support
45  */
46 
47 /*
48  * This code is a port of the functions efi_alloc_read() and efi_free() from
49  * the libefi userland library to the kernel so that the vDisk drivers (vdc
50  * and vds) can read EFI data. We will certaintly be able to remove that code
51  * once RFE 6213117 is implemented.
52  */
53 
54 #define	VD_IOCTL_FLAGS  (FEXCL | FREAD | FWRITE | FKIOCTL)
55 
56 #define	VD_EFI_DEBUG	if (vd_efi_debug) vd_efi_print
57 
58 /*
59  * The number of blocks the EFI label takes up (round up to nearest
60  * block)
61  */
62 #define	NBLOCKS(p, l)	(1 + ((((p) * (int)sizeof (efi_gpe_t))  + \
63 				((l) - 1)) / (l)))
64 /* number of partitions -- limited by what we can malloc */
65 #define	MAX_PARTS	((4294967295UL - sizeof (struct dk_gpt)) / \
66 			    sizeof (struct dk_part))
67 
68 /*
69  * The vd_efi_alloc_and_read() function will use some ioctls to get EFI data
70  * but the way we issue ioctl is different depending if we are on the vDisk
71  * server side (vds) or on the vDisk client side.
72  *
73  * On the server side (vds), we reference a layered device (ldi_handle_t) so we
74  * will use the LDI interface to execute ioctls (ldi_ioctl()). On the client
75  * side (vdc), we reference a vdc device (with a dev_t) so we directly invoke
76  * the function of the vdc driver implementing ioctls (vd_process_ioctl()).
77  */
78 #define	VD_EFI_CALLER_VDS	0
79 #define	VD_EFI_CALLER_VDC	1
80 
81 typedef struct vd_efi_dev {
82 	int caller;
83 	union {
84 		ldi_handle_t vds;
85 		dev_t vdc;
86 	} ioctl_dev;
87 } vd_efi_dev_t;
88 
89 static int (*vdc_ioctl_func)(dev_t dev, int cmd, caddr_t arg, int mode) = NULL;
90 
91 static int vd_efi_debug = 1;
92 
93 static struct uuid_to_ptag {
94 	struct uuid	uuid;
95 } conversion_array[] = {
96 	{ EFI_UNUSED },
97 	{ EFI_BOOT },
98 	{ EFI_ROOT },
99 	{ EFI_SWAP },
100 	{ EFI_USR },
101 	{ EFI_BACKUP },
102 	{ 0 },			/* STAND is never used */
103 	{ EFI_VAR },
104 	{ EFI_HOME },
105 	{ EFI_ALTSCTR },
106 	{ 0 },			/* CACHE (cachefs) is never used */
107 	{ EFI_RESERVED },
108 	{ EFI_SYSTEM },
109 	{ EFI_LEGACY_MBR },
110 	{ EFI_RESV3 },
111 	{ EFI_RESV4 },
112 	{ EFI_MSFT_RESV },
113 	{ EFI_DELL_BASIC },
114 	{ EFI_DELL_RAID },
115 	{ EFI_DELL_SWAP },
116 	{ EFI_DELL_LVM },
117 	{ EFI_DELL_RESV }
118 };
119 
120 static void
121 vd_efi_print(const char *format, ...)
122 {
123 	va_list args;
124 
125 	va_start(args, format);
126 	vcmn_err(CE_CONT, format, args);
127 	va_end(args);
128 }
129 
130 /*
131  * Return a 32-bit CRC of the contents of the buffer.
132  *
133  * The seed is 0xffffffff and the result is XORed with 0xffffffff
134  * because this is what the Itanium firmware expects.
135  */
136 unsigned int
137 vd_efi_crc32(const unsigned char *s, unsigned int len)
138 {
139 	unsigned int crc32val;
140 
141 	CRC32(crc32val, s, len, -1U, crc32_table);
142 
143 	return (crc32val ^ -1U);
144 }
145 
146 static int
147 vd_ioctl(vd_efi_dev_t *dev, int cmd, void *arg, int flag,
148     cred_t *cred, int *rvalp)
149 {
150 	int error;
151 
152 	if (dev->caller == VD_EFI_CALLER_VDS) {
153 		error = ldi_ioctl(dev->ioctl_dev.vds, cmd,
154 		    (intptr_t)arg, flag, cred, rvalp);
155 	} else {
156 		ASSERT(vdc_ioctl_func != NULL);
157 		error = (*vdc_ioctl_func)(dev->ioctl_dev.vdc, cmd,
158 		    arg, flag);
159 	}
160 
161 	return (error);
162 }
163 
164 static int
165 vd_efi_ioctl(vd_efi_dev_t *dev, int cmd, dk_efi_t *dk_ioc)
166 {
167 	void *data = dk_ioc->dki_data;
168 	int error;
169 
170 	dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data;
171 	error = vd_ioctl(dev, cmd, (caddr_t)dk_ioc, VD_IOCTL_FLAGS,
172 	    kcred, NULL);
173 	dk_ioc->dki_data = data;
174 
175 	return (error);
176 }
177 
178 static int
179 vd_efi_check_label(vd_efi_dev_t *dev, dk_efi_t *dk_ioc)
180 {
181 	efi_gpt_t *efi;
182 	uint_t crc;
183 	int status;
184 
185 	if ((status = vd_efi_ioctl(dev, DKIOCGETEFI, dk_ioc)) != 0)
186 		return (status);
187 
188 	efi = dk_ioc->dki_data;
189 	if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) {
190 		VD_EFI_DEBUG("Bad EFI signature: 0x%llx != 0x%llx\n",
191 		    (long long)efi->efi_gpt_Signature,
192 		    (long long)LE_64(EFI_SIGNATURE));
193 		return (EINVAL);
194 	}
195 
196 	/*
197 	 * check CRC of the header; the size of the header should
198 	 * never be larger than one block
199 	 */
200 	crc = efi->efi_gpt_HeaderCRC32;
201 	efi->efi_gpt_HeaderCRC32 = 0;
202 
203 	if (((len_t)LE_32(efi->efi_gpt_HeaderSize) > dk_ioc->dki_length) ||
204 	    crc != LE_32(vd_efi_crc32((unsigned char *)efi,
205 	    LE_32(efi->efi_gpt_HeaderSize)))) {
206 		VD_EFI_DEBUG("Bad EFI CRC: 0x%x != 0x%x\n",
207 		    crc, LE_32(vd_efi_crc32((unsigned char *)efi,
208 		    sizeof (struct efi_gpt))));
209 		return (EINVAL);
210 	}
211 
212 	return (0);
213 }
214 
215 static int
216 vd_efi_read(vd_efi_dev_t *dev, struct dk_gpt *vtoc)
217 {
218 	int			i, j, status;
219 	int			label_len;
220 	int			md_flag = 0;
221 	struct dk_minfo		disk_info;
222 	dk_efi_t		dk_ioc;
223 	efi_gpt_t		*efi;
224 	efi_gpe_t		*efi_parts;
225 	struct dk_cinfo		dki_info;
226 	uint32_t		user_length;
227 
228 	/*
229 	 * get the partition number for this file descriptor.
230 	 */
231 	if ((status = vd_ioctl(dev, DKIOCINFO, &dki_info, VD_IOCTL_FLAGS,
232 	    kcred, NULL)) != 0) {
233 		VD_EFI_DEBUG("DKIOCINFO error 0x%x\n", status);
234 		return (status);
235 	}
236 	if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
237 	    (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
238 		md_flag++;
239 	}
240 	/* get the LBA size */
241 	if ((status = vd_ioctl(dev, DKIOCGMEDIAINFO, &disk_info, VD_IOCTL_FLAGS,
242 	    kcred, NULL)) != 0) {
243 		VD_EFI_DEBUG("assuming LBA 512 bytes %d\n", status);
244 		disk_info.dki_lbsize = DEV_BSIZE;
245 	}
246 	if (disk_info.dki_lbsize == 0) {
247 		VD_EFI_DEBUG("efi_read: assuming LBA 512 bytes\n");
248 		disk_info.dki_lbsize = DEV_BSIZE;
249 	}
250 	/*
251 	 * Read the EFI GPT to figure out how many partitions we need
252 	 * to deal with.
253 	 */
254 	dk_ioc.dki_lba = 1;
255 	if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) {
256 		label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize;
257 	} else {
258 		label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) +
259 				    disk_info.dki_lbsize;
260 		if (label_len % disk_info.dki_lbsize) {
261 			/* pad to physical sector size */
262 			label_len += disk_info.dki_lbsize;
263 			label_len &= ~(disk_info.dki_lbsize - 1);
264 		}
265 	}
266 
267 	dk_ioc.dki_data = kmem_alloc(label_len, KM_SLEEP);
268 	dk_ioc.dki_length = label_len;
269 	user_length = vtoc->efi_nparts;
270 	efi = dk_ioc.dki_data;
271 	if (md_flag) {
272 		if ((status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_ioc)) != 0)
273 			return (status);
274 	} else if ((status = vd_efi_check_label(dev, &dk_ioc)) == EINVAL) {
275 		/* no valid label here; try the alternate */
276 		dk_ioc.dki_lba = disk_info.dki_capacity - 1;
277 		dk_ioc.dki_length = disk_info.dki_lbsize;
278 		if (vd_efi_check_label(dev, &dk_ioc) == 0) {
279 			VD_EFI_DEBUG("efi_read: primary label corrupt; "
280 			    "using backup\n");
281 			dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
282 			vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT;
283 			vtoc->efi_nparts =
284 			    LE_32(efi->efi_gpt_NumberOfPartitionEntries);
285 			/*
286 			 * partitions are between last usable LBA and
287 			 * backup partition header
288 			 */
289 			dk_ioc.dki_data++;
290 			dk_ioc.dki_length = disk_info.dki_capacity -
291 						    dk_ioc.dki_lba - 1;
292 			dk_ioc.dki_length *= disk_info.dki_lbsize;
293 			if (dk_ioc.dki_length > (len_t)label_len) {
294 				status = EINVAL;
295 			} else {
296 				status = vd_efi_ioctl(dev, DKIOCGETEFI,
297 				    &dk_ioc);
298 			}
299 		}
300 	}
301 	if (status != 0) {
302 		kmem_free(efi, label_len);
303 		return (status);
304 	}
305 
306 	/* partitions start in the next block */
307 	/* LINTED -- always longlong aligned */
308 	efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize);
309 
310 	/*
311 	 * Assemble this into a "dk_gpt" struct for easier
312 	 * digestibility by applications.
313 	 */
314 	vtoc->efi_version = LE_32(efi->efi_gpt_Revision);
315 	vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries);
316 	vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry);
317 	vtoc->efi_lbasize = disk_info.dki_lbsize;
318 	vtoc->efi_last_lba = disk_info.dki_capacity - 1;
319 	vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA);
320 	vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA);
321 	UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID);
322 
323 	/*
324 	 * If the array the user passed in is too small, set the length
325 	 * to what it needs to be and return
326 	 */
327 	if (user_length < vtoc->efi_nparts) {
328 		kmem_free(efi, label_len);
329 		return (EINVAL);
330 	}
331 
332 	for (i = 0; i < vtoc->efi_nparts; i++) {
333 
334 	    UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid,
335 		efi_parts[i].efi_gpe_PartitionTypeGUID);
336 
337 	    for (j = 0;
338 		j < sizeof (conversion_array) / sizeof (struct uuid_to_ptag);
339 		j++) {
340 
341 		    if (bcmp(&vtoc->efi_parts[i].p_guid,
342 			&conversion_array[j].uuid,
343 			sizeof (struct uuid)) == 0) {
344 			    vtoc->efi_parts[i].p_tag = j;
345 			    break;
346 		    }
347 	    }
348 	    if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
349 		    continue;
350 	    vtoc->efi_parts[i].p_flag =
351 		LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs);
352 	    vtoc->efi_parts[i].p_start =
353 		LE_64(efi_parts[i].efi_gpe_StartingLBA);
354 	    vtoc->efi_parts[i].p_size =
355 		LE_64(efi_parts[i].efi_gpe_EndingLBA) -
356 		    vtoc->efi_parts[i].p_start + 1;
357 	    for (j = 0; j < EFI_PART_NAME_LEN; j++) {
358 		vtoc->efi_parts[i].p_name[j] =
359 		    (uchar_t)LE_16(efi_parts[i].efi_gpe_PartitionName[j]);
360 	    }
361 
362 	    UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid,
363 		efi_parts[i].efi_gpe_UniquePartitionGUID);
364 	}
365 	kmem_free(efi, label_len);
366 
367 	return (0);
368 }
369 
370 /*
371  * Read EFI - return 0 upon success.
372  */
373 static int
374 vd_efi_alloc_and_read(vd_efi_dev_t *dev, struct dk_gpt **vtoc, size_t *vtoc_len)
375 {
376 	int status;
377 	uint32_t nparts;
378 	int length;
379 
380 	/* figure out the number of entries that would fit into 16K */
381 	nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t);
382 	length = (int) sizeof (struct dk_gpt) +
383 	    (int) sizeof (struct dk_part) * (nparts - 1);
384 
385 	*vtoc = kmem_zalloc(length, KM_SLEEP);
386 	(*vtoc)->efi_nparts = nparts;
387 	status = vd_efi_read(dev, *vtoc);
388 
389 	if ((status == EINVAL) && (*vtoc)->efi_nparts > nparts) {
390 		kmem_free(*vtoc, length);
391 		length = (int) sizeof (struct dk_gpt) +
392 				(int) sizeof (struct dk_part) *
393 				((*vtoc)->efi_nparts - 1);
394 		nparts = (*vtoc)->efi_nparts;
395 		*vtoc = kmem_alloc(length, KM_SLEEP);
396 		status = vd_efi_read(dev, *vtoc);
397 	}
398 
399 	if (status != 0) {
400 		VD_EFI_DEBUG("read of EFI table failed with error=%d\n",
401 		    status);
402 		kmem_free(*vtoc, length);
403 		*vtoc = NULL;
404 		*vtoc_len = 0;
405 		return (status);
406 	}
407 
408 	*vtoc_len = length;
409 	return (0);
410 }
411 
412 int
413 vdc_efi_alloc_and_read(dev_t dev, struct dk_gpt **vtoc, size_t *vtoc_len)
414 {
415 	vd_efi_dev_t efi_dev;
416 
417 	ASSERT(vdc_ioctl_func != NULL);
418 
419 	efi_dev.caller = VD_EFI_CALLER_VDC;
420 	efi_dev.ioctl_dev.vdc = dev;
421 
422 	return (vd_efi_alloc_and_read(&efi_dev, vtoc, vtoc_len));
423 }
424 
425 int
426 vds_efi_alloc_and_read(ldi_handle_t dev, struct dk_gpt **vtoc, size_t *vtoc_len)
427 {
428 	vd_efi_dev_t efi_dev;
429 
430 	efi_dev.caller = VD_EFI_CALLER_VDS;
431 	efi_dev.ioctl_dev.vds = dev;
432 
433 	return (vd_efi_alloc_and_read(&efi_dev, vtoc, vtoc_len));
434 }
435 
436 void
437 vd_efi_free(struct dk_gpt *ptr, size_t length)
438 {
439 	kmem_free(ptr, length);
440 }
441 
442 void
443 vdc_efi_init(int (*func)(dev_t, int, caddr_t, int))
444 {
445 	vdc_ioctl_func = func;
446 }
447 
448 void
449 vdc_efi_fini(void)
450 {
451 	vdc_ioctl_func = NULL;
452 }
453 
454 /*
455  * This function stores EFI data (as returned by efi_alloc_and_read()) into
456  * a vtoc structure. The vDisk driver uses a vtoc structure to store generic
457  * information about disk partitions.
458  */
459 void
460 vd_efi_to_vtoc(struct dk_gpt *efi, struct vtoc *vtoc)
461 {
462 	int i, nparts;
463 
464 	bzero(vtoc, sizeof (struct vtoc));
465 
466 	vtoc->v_sanity = VTOC_SANE;
467 
468 	nparts = efi->efi_nparts;
469 	for (i = 0; i < nparts; i++) {
470 		if (efi->efi_parts[i].p_tag != V_RESERVED)
471 			continue;
472 		bcopy(efi->efi_parts[i].p_name, vtoc->v_volume,
473 		    LEN_DKL_VVOL);
474 		bcopy(efi->efi_parts[i].p_name, vtoc->v_asciilabel,
475 		    EFI_PART_NAME_LEN);
476 		break;
477 	}
478 
479 	vtoc->v_sectorsz = efi->efi_lbasize;
480 	vtoc->v_nparts = nparts;
481 	for (i = 0; i < nparts; i++) {
482 		/*
483 		 * EFI can have more than 8 partitions. However the current
484 		 * implementation of EFI on Solaris only support 7 partitions
485 		 * (s0 to s6). There is no partition s7 but the minor number
486 		 * corresponding to slice 7 is used to represent the whole
487 		 * disk which data are stored in the "Sun Reserved" partition.
488 		 * So we use the entry 7 of the vtoc structure to store
489 		 * information about the whole disk.
490 		 */
491 		if (efi->efi_parts[i].p_tag == V_RESERVED) {
492 			vtoc->v_part[VD_EFI_WD_SLICE].p_tag =
493 				efi->efi_parts[i].p_tag;
494 			vtoc->v_part[VD_EFI_WD_SLICE].p_flag =
495 				efi->efi_parts[i].p_flag;
496 			vtoc->v_part[VD_EFI_WD_SLICE].p_start =
497 				efi->efi_parts[i].p_start;
498 			vtoc->v_part[VD_EFI_WD_SLICE].p_size =
499 				efi->efi_parts[i].p_size;
500 			continue;
501 		}
502 
503 		if (i >= VD_EFI_WD_SLICE) {
504 			continue;
505 		}
506 
507 		vtoc->v_part[i].p_tag = efi->efi_parts[i].p_tag;
508 		if (efi->efi_parts[i].p_tag != V_UNASSIGNED) {
509 			vtoc->v_part[i].p_flag = efi->efi_parts[i].p_flag;
510 			vtoc->v_part[i].p_start = efi->efi_parts[i].p_start;
511 			vtoc->v_part[i].p_size = efi->efi_parts[i].p_size;
512 		}
513 	}
514 }
515