xref: /titanic_50/usr/src/uts/sun4v/io/vdsk_common.c (revision d84f0041660230c140a3895b8bfc7e428c7ccb1d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/crc32.h>
28 #include <sys/cred.h>
29 #include <sys/ddi.h>
30 #include <sys/dkio.h>
31 #include <sys/file.h>
32 #include <sys/kmem.h>
33 #include <sys/sunddi.h>
34 #include <sys/sunldi.h>
35 #include <sys/types.h>
36 #include <sys/varargs.h>
37 #include <sys/vtoc.h>
38 
39 #include <sys/vdsk_common.h>
40 
41 /*
42  * Hooks for EFI support
43  */
44 
45 /*
46  * This code provides generic functions to the vds and vdc drivers to read
47  * EFI labels from the disk backend and to get the EFI GPT and GPE. This is
48  * inspired from the libefi userland library and the cmlb driver. We will
49  * certainly be able to remove that code if RFE 6213117 is ever implemented.
50  */
51 
52 #ifdef DEBUG
53 
54 #define	VD_EFI_DEBUG	if (vd_efi_debug) vd_efi_print
55 
56 static int vd_efi_debug = 0;
57 
58 #else
59 
60 #define	VD_EFI_DEBUG(...)
61 
62 #endif
63 
64 #define	VD_EFI_GPE_LEN(vdisk, nparts) \
65 	((((sizeof (efi_gpe_t) * (nparts) - 1) / (vdisk)->block_size) + 1) * \
66 	(vdisk)->block_size)
67 
68 static void
vd_efi_print(const char * format,...)69 vd_efi_print(const char *format, ...)
70 {
71 	va_list args;
72 
73 	va_start(args, format);
74 	vcmn_err(CE_CONT, format, args);
75 	va_end(args);
76 }
77 
78 /*
79  * Return a 32-bit CRC of the contents of the buffer.
80  *
81  * The seed is 0xffffffff and the result is XORed with 0xffffffff
82  * because this is what the Itanium firmware expects.
83  */
84 unsigned int
vd_efi_crc32(const unsigned char * s,unsigned int len)85 vd_efi_crc32(const unsigned char *s, unsigned int len)
86 {
87 	unsigned int crc32val;
88 
89 	CRC32(crc32val, s, len, -1U, crc32_table);
90 
91 	return (crc32val ^ -1U);
92 }
93 
94 static int
vd_efi_ioctl(vd_efi_dev_t * dev,int cmd,void * arg)95 vd_efi_ioctl(vd_efi_dev_t *dev, int cmd, void *arg)
96 {
97 	int status;
98 
99 	ASSERT(dev->vdisk_ioctl != NULL);
100 	ASSERT(dev->vdisk != NULL);
101 	status = (*dev->vdisk_ioctl)(dev->vdisk, cmd, (uintptr_t)arg);
102 
103 	return (status);
104 }
105 
106 /*
107  * Swap GPT data to match with the system endianness.
108  */
109 static void
vd_efi_swap_gpt(efi_gpt_t * gpt)110 vd_efi_swap_gpt(efi_gpt_t *gpt)
111 {
112 	gpt->efi_gpt_Signature = LE_64(gpt->efi_gpt_Signature);
113 	gpt->efi_gpt_Revision = LE_32(gpt->efi_gpt_Revision);
114 	gpt->efi_gpt_HeaderSize = LE_32(gpt->efi_gpt_HeaderSize);
115 	gpt->efi_gpt_HeaderCRC32 = LE_32(gpt->efi_gpt_HeaderCRC32);
116 	gpt->efi_gpt_MyLBA = LE_64(gpt->efi_gpt_MyLBA);
117 	gpt->efi_gpt_AlternateLBA = LE_64(gpt->efi_gpt_AlternateLBA);
118 	gpt->efi_gpt_FirstUsableLBA = LE_64(gpt->efi_gpt_FirstUsableLBA);
119 	gpt->efi_gpt_LastUsableLBA = LE_64(gpt->efi_gpt_LastUsableLBA);
120 	UUID_LE_CONVERT(gpt->efi_gpt_DiskGUID, gpt->efi_gpt_DiskGUID);
121 	gpt->efi_gpt_PartitionEntryLBA = LE_64(gpt->efi_gpt_PartitionEntryLBA);
122 	gpt->efi_gpt_NumberOfPartitionEntries =
123 	    LE_32(gpt->efi_gpt_NumberOfPartitionEntries);
124 	gpt->efi_gpt_SizeOfPartitionEntry =
125 	    LE_32(gpt->efi_gpt_SizeOfPartitionEntry);
126 	gpt->efi_gpt_PartitionEntryArrayCRC32 =
127 	    LE_32(gpt->efi_gpt_PartitionEntryArrayCRC32);
128 }
129 
130 /*
131  * Swap GPE data to match with the system endianness.
132  */
133 static void
vd_efi_swap_gpe(efi_gpe_t * gpe,int nparts)134 vd_efi_swap_gpe(efi_gpe_t *gpe, int nparts)
135 {
136 	int i, j;
137 
138 	for (i = 0; i < nparts; i++) {
139 		UUID_LE_CONVERT(gpe[i].efi_gpe_PartitionTypeGUID,
140 		    gpe[i].efi_gpe_PartitionTypeGUID);
141 		UUID_LE_CONVERT(gpe[i].efi_gpe_UniquePartitionGUID,
142 		    gpe[i].efi_gpe_UniquePartitionGUID);
143 		gpe[i].efi_gpe_StartingLBA = LE_64(gpe[i].efi_gpe_StartingLBA);
144 		gpe[i].efi_gpe_EndingLBA = LE_64(gpe[i].efi_gpe_EndingLBA);
145 		gpe[i].efi_gpe_Attributes.PartitionAttrs =
146 		    LE_16(gpe[i].efi_gpe_Attributes.PartitionAttrs);
147 		for (j = 0; j < EFI_PART_NAME_LEN; j++) {
148 			gpe[i].efi_gpe_PartitionName[j] =
149 			    LE_16(gpe[i].efi_gpe_PartitionName[j]);
150 		}
151 	}
152 }
153 
154 /*
155  * Check that an EFI GPT is valid. This function should be called with a raw
156  * EFI GPT i.e. GPT data should be in little endian format as indicated in the
157  * EFI specification and they should not have been swapped to match with the
158  * system endianness.
159  */
160 static int
vd_efi_check_gpt(vd_efi_dev_t * dev,efi_gpt_t * gpt)161 vd_efi_check_gpt(vd_efi_dev_t *dev, efi_gpt_t *gpt)
162 {
163 	uint_t crc_stored, crc_computed;
164 
165 	if (gpt->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) {
166 		VD_EFI_DEBUG("Bad EFI signature: 0x%llx != 0x%llx\n",
167 		    (long long)gpt->efi_gpt_Signature,
168 		    (long long)LE_64(EFI_SIGNATURE));
169 		return (EINVAL);
170 	}
171 
172 	/*
173 	 * check CRC of the header; the size of the header should
174 	 * never be larger than one block
175 	 */
176 	if (LE_32(gpt->efi_gpt_HeaderSize) > dev->block_size) {
177 		VD_EFI_DEBUG("Header size (%u bytes) larger than one block"
178 		    "(%u bytes)\n", LE_32(gpt->efi_gpt_HeaderSize),
179 		    dev->block_size);
180 		return (EINVAL);
181 	}
182 
183 	crc_stored = LE_32(gpt->efi_gpt_HeaderCRC32);
184 	gpt->efi_gpt_HeaderCRC32 = LE_32(0);
185 	crc_computed = vd_efi_crc32((unsigned char *)gpt,
186 	    LE_32(gpt->efi_gpt_HeaderSize));
187 	gpt->efi_gpt_HeaderCRC32 = LE_32(crc_stored);
188 
189 	if (crc_stored != crc_computed) {
190 		VD_EFI_DEBUG("Bad EFI CRC: 0x%x != 0x%x\n",
191 		    crc_stored, crc_computed);
192 			return (EINVAL);
193 	}
194 
195 	return (0);
196 }
197 
198 /*
199  * Allocate and read the EFI GPT and GPE from the disk backend. Note that the
200  * on-disk GPT and GPE are stored in little endian format but this function
201  * returns them using the endianness of the system so that any field in the
202  * GPT/GPE structures can be directly accessible without any further conversion.
203  * The caller is responsible for freeing the allocated structures by calling
204  * vd_efi_free().
205  */
206 int
vd_efi_alloc_and_read(vd_efi_dev_t * dev,efi_gpt_t ** efi_gpt,efi_gpe_t ** efi_gpe)207 vd_efi_alloc_and_read(vd_efi_dev_t *dev, efi_gpt_t **efi_gpt,
208     efi_gpe_t **efi_gpe)
209 {
210 	dk_efi_t		dk_efi;
211 	efi_gpt_t		*gpt = NULL;
212 	efi_gpe_t		*gpe = NULL;
213 	efi_gpt_t		*data = NULL;
214 	size_t			gpt_len, gpe_len, data_len;
215 	int 			nparts, status;
216 
217 	ASSERT(dev->block_size >= sizeof (efi_gpt_t));
218 	gpt_len = dev->block_size;
219 	gpt = kmem_zalloc(gpt_len, KM_SLEEP);
220 
221 	/*
222 	 * Read the EFI GPT.
223 	 */
224 	dk_efi.dki_lba = 1;
225 	dk_efi.dki_data = gpt;
226 	dk_efi.dki_length = gpt_len;
227 
228 	status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi);
229 
230 	if (status == EINVAL) {
231 		/*
232 		 * Because the DKIOCGETEFI ioctl was initially incorrectly
233 		 * implemented for a ZFS volume, the ioctl can fail with
234 		 * EINVAL if it is done on a ZFS volume managed by an old
235 		 * version of Solaris. This can happen if a ZFS volume is
236 		 * exported as a single-slice disk by a service domain
237 		 * running Solaris older than Solaris 10 Update 6.
238 		 *
239 		 * So we retry the ioctl to read both the GPT and the GPE at
240 		 * the same time accordingly to the old implementation.
241 		 */
242 		data_len = sizeof (efi_gpt_t) + sizeof (efi_gpe_t);
243 		data = kmem_zalloc(data_len, KM_SLEEP);
244 
245 		dk_efi.dki_lba = 1;
246 		dk_efi.dki_data = data;
247 		dk_efi.dki_length = data_len;
248 		status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi);
249 
250 		if (status == 0)
251 			bcopy(data, gpt, sizeof (efi_gpt_t));
252 	}
253 
254 	if (status != 0) {
255 		VD_EFI_DEBUG("DKIOCGETEFI (GPT, LBA=1) error %d\n", status);
256 		goto errdone;
257 	}
258 
259 	if ((status = vd_efi_check_gpt(dev, gpt)) != 0) {
260 		/*
261 		 * No valid label here; try the alternate. The alternate GPT is
262 		 * located in the last block of the disk.
263 		 */
264 		dk_efi.dki_lba = dev->disk_size - 1;
265 		dk_efi.dki_data = gpt;
266 		dk_efi.dki_length = gpt_len;
267 
268 		if ((status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi)) != 0) {
269 			VD_EFI_DEBUG("DKIOCGETEFI (LBA=%lu) error %d\n",
270 			    dev->disk_size - 1, status);
271 			goto errdone;
272 		}
273 
274 		if ((status = vd_efi_check_gpt(dev, gpt)) != 0)
275 			goto errdone;
276 
277 		VD_EFI_DEBUG("efi_read: primary label corrupt; using backup\n");
278 	}
279 
280 	/* swap GPT data after checking the GPT is valid */
281 	vd_efi_swap_gpt(gpt);
282 
283 	/*
284 	 * Read the EFI GPE.
285 	 */
286 	nparts = gpt->efi_gpt_NumberOfPartitionEntries;
287 
288 	if (nparts > NDKMAP + 1) {
289 		VD_EFI_DEBUG("Too many EFI partitions (%u)", nparts);
290 		status = EINVAL;
291 		goto errdone;
292 	}
293 
294 	if (nparts == 0) {
295 		VD_EFI_DEBUG("No partition defined");
296 		status = EINVAL;
297 		goto errdone;
298 	}
299 
300 	gpe_len = VD_EFI_GPE_LEN(dev, nparts);
301 	gpe = kmem_zalloc(gpe_len, KM_SLEEP);
302 
303 	if (data != NULL) {
304 		/*
305 		 * The data variable is not NULL if we have used the old ioctl
306 		 * implementation for a ZFS volume. In that case, we only expect
307 		 * one partition and GPE data are already available in the data
308 		 * buffer, right after GPT data.
309 		 */
310 		if (nparts != 1) {
311 			VD_EFI_DEBUG("Unexpected number of partitions (%u)",
312 			    nparts);
313 			status = EINVAL;
314 			goto errdone;
315 		}
316 
317 		bcopy(data + 1, gpe, sizeof (efi_gpe_t));
318 
319 	} else {
320 		dk_efi.dki_lba = gpt->efi_gpt_PartitionEntryLBA;
321 		dk_efi.dki_data = (efi_gpt_t *)gpe;
322 		dk_efi.dki_length = gpe_len;
323 
324 		if ((status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi)) != 0) {
325 			VD_EFI_DEBUG("DKIOCGETEFI (GPE, LBA=%lu) error %d\n",
326 			    gpt->efi_gpt_PartitionEntryLBA, status);
327 			goto errdone;
328 		}
329 	}
330 
331 	vd_efi_swap_gpe(gpe, nparts);
332 
333 	*efi_gpt = gpt;
334 	*efi_gpe = gpe;
335 
336 errdone:
337 
338 	if (data != NULL)
339 		kmem_free(data, data_len);
340 
341 	if (status != 0) {
342 		if (gpe != NULL)
343 			kmem_free(gpe, gpe_len);
344 		if (gpt != NULL)
345 			kmem_free(gpt, gpt_len);
346 	}
347 
348 	return (status);
349 }
350 
351 /*
352  * Free the EFI GPE and GPT structures returned by vd_efi_alloc_and_read().
353  */
354 void
vd_efi_free(vd_efi_dev_t * dev,efi_gpt_t * gpt,efi_gpe_t * gpe)355 vd_efi_free(vd_efi_dev_t *dev, efi_gpt_t *gpt, efi_gpe_t *gpe)
356 {
357 	kmem_free(gpe, VD_EFI_GPE_LEN(dev,
358 	    gpt->efi_gpt_NumberOfPartitionEntries));
359 	kmem_free(gpt, dev->block_size);
360 }
361