1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
26 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27 * Copyright (c) 2018 Datto Inc.
28 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
29 * Copyright (c) 2017, Intel Corporation.
30 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
31 */
32
33 #include <errno.h>
34 #include <libintl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <libgen.h>
40 #include <zone.h>
41 #include <sys/stat.h>
42 #include <sys/efi_partition.h>
43 #include <sys/systeminfo.h>
44 #include <sys/zfs_ioctl.h>
45 #include <sys/vdev_disk.h>
46 #include <dlfcn.h>
47 #include <libzutil.h>
48
49 #include "zfs_namecheck.h"
50 #include "zfs_prop.h"
51 #include "../../libzfs_impl.h"
52 #include "zfs_comutil.h"
53 #include "zfeature_common.h"
54
55 /*
56 * If the device has being dynamically expanded then we need to relabel
57 * the disk to use the new unallocated space.
58 */
59 int
zpool_relabel_disk(libzfs_handle_t * hdl,const char * path,const char * msg)60 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
61 {
62 int fd, error;
63
64 if ((fd = open(path, O_RDWR|O_DIRECT|O_CLOEXEC)) < 0) {
65 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
66 "relabel '%s': unable to open device: %d"), path, errno);
67 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
68 }
69
70 /*
71 * It's possible that we might encounter an error if the device
72 * does not have any unallocated space left. If so, we simply
73 * ignore that error and continue on.
74 */
75 error = efi_use_whole_disk(fd);
76
77 /* Flush the buffers to disk and invalidate the page cache. */
78 (void) fsync(fd);
79 (void) ioctl(fd, BLKFLSBUF);
80
81 (void) close(fd);
82 if (error && error != VT_ENOSPC) {
83 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
84 "relabel '%s': unable to read disk capacity"), path);
85 return (zfs_error(hdl, EZFS_NOCAP, msg));
86 }
87 return (0);
88 }
89
90 /*
91 * Read the EFI label from the config, if a label does not exist then
92 * pass back the error to the caller. If the caller has passed a non-NULL
93 * diskaddr argument then we set it to the starting address of the EFI
94 * partition.
95 */
96 static int
read_efi_label(nvlist_t * config,diskaddr_t * sb)97 read_efi_label(nvlist_t *config, diskaddr_t *sb)
98 {
99 const char *path;
100 int fd;
101 char diskname[MAXPATHLEN];
102 int err = -1;
103
104 if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
105 return (err);
106
107 (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
108 strrchr(path, '/'));
109 if ((fd = open(diskname, O_RDONLY|O_DIRECT|O_CLOEXEC)) >= 0) {
110 struct dk_gpt *vtoc;
111
112 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
113 if (sb != NULL)
114 *sb = vtoc->efi_parts[0].p_start;
115 efi_free(vtoc);
116 }
117 (void) close(fd);
118 }
119 return (err);
120 }
121
122 /*
123 * determine where a partition starts on a disk in the current
124 * configuration
125 */
126 static diskaddr_t
find_start_block(nvlist_t * config)127 find_start_block(nvlist_t *config)
128 {
129 nvlist_t **child;
130 uint_t c, children;
131 diskaddr_t sb = MAXOFFSET_T;
132 uint64_t wholedisk;
133
134 if (nvlist_lookup_nvlist_array(config,
135 ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
136 if (nvlist_lookup_uint64(config,
137 ZPOOL_CONFIG_WHOLE_DISK,
138 &wholedisk) != 0 || !wholedisk) {
139 return (MAXOFFSET_T);
140 }
141 if (read_efi_label(config, &sb) < 0)
142 sb = MAXOFFSET_T;
143 return (sb);
144 }
145
146 for (c = 0; c < children; c++) {
147 sb = find_start_block(child[c]);
148 if (sb != MAXOFFSET_T) {
149 return (sb);
150 }
151 }
152 return (MAXOFFSET_T);
153 }
154
155 static int
zpool_label_disk_check(char * path)156 zpool_label_disk_check(char *path)
157 {
158 struct dk_gpt *vtoc;
159 int fd, err;
160
161 if ((fd = open(path, O_RDONLY|O_DIRECT|O_CLOEXEC)) < 0)
162 return (errno);
163
164 if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
165 (void) close(fd);
166 return (err);
167 }
168
169 if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
170 efi_free(vtoc);
171 (void) close(fd);
172 return (EIDRM);
173 }
174
175 efi_free(vtoc);
176 (void) close(fd);
177 return (0);
178 }
179
180 /*
181 * Generate a unique partition name for the ZFS member. Partitions must
182 * have unique names to ensure udev will be able to create symlinks under
183 * /dev/disk/by-partlabel/ for all pool members. The partition names are
184 * of the form <pool>-<unique-id>.
185 */
186 static void
zpool_label_name(char * label_name,int label_size)187 zpool_label_name(char *label_name, int label_size)
188 {
189 uint64_t id = 0;
190 int fd;
191
192 fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC);
193 if (fd >= 0) {
194 if (read(fd, &id, sizeof (id)) != sizeof (id))
195 id = 0;
196
197 close(fd);
198 }
199
200 if (id == 0)
201 id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
202
203 snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
204 }
205
206 /*
207 * Label an individual disk. The name provided is the short name,
208 * stripped of any leading /dev path.
209 */
210 int
zpool_label_disk(libzfs_handle_t * hdl,zpool_handle_t * zhp,const char * name)211 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
212 {
213 char path[MAXPATHLEN];
214 struct dk_gpt *vtoc;
215 int rval, fd;
216 size_t resv = EFI_MIN_RESV_SIZE;
217 uint64_t slice_size;
218 diskaddr_t start_block;
219 char errbuf[ERRBUFLEN];
220
221 /* prepare an error message just in case */
222 (void) snprintf(errbuf, sizeof (errbuf),
223 dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
224
225 if (zhp) {
226 nvlist_t *nvroot = fnvlist_lookup_nvlist(zhp->zpool_config,
227 ZPOOL_CONFIG_VDEV_TREE);
228
229 if (zhp->zpool_start_block == 0)
230 start_block = find_start_block(nvroot);
231 else
232 start_block = zhp->zpool_start_block;
233 zhp->zpool_start_block = start_block;
234 } else {
235 /* new pool */
236 start_block = NEW_START_BLOCK;
237 }
238
239 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
240
241 if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL|O_CLOEXEC)) < 0) {
242 /*
243 * This shouldn't happen. We've long since verified that this
244 * is a valid device.
245 */
246 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
247 "label '%s': unable to open device: %d"), path, errno);
248 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
249 }
250
251 if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
252 /*
253 * The only way this can fail is if we run out of memory, or we
254 * were unable to read the disk's capacity
255 */
256 if (errno == ENOMEM)
257 (void) no_memory(hdl);
258
259 (void) close(fd);
260 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
261 "label '%s': unable to read disk capacity"), path);
262
263 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
264 }
265
266 slice_size = vtoc->efi_last_u_lba + 1;
267 slice_size -= EFI_MIN_RESV_SIZE;
268 if (start_block == MAXOFFSET_T)
269 start_block = NEW_START_BLOCK;
270 slice_size -= start_block;
271 slice_size = P2ALIGN_TYPED(slice_size, PARTITION_END_ALIGNMENT,
272 uint64_t);
273
274 vtoc->efi_parts[0].p_start = start_block;
275 vtoc->efi_parts[0].p_size = slice_size;
276
277 if (vtoc->efi_parts[0].p_size * vtoc->efi_lbasize < SPA_MINDEVSIZE) {
278 (void) close(fd);
279 efi_free(vtoc);
280
281 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
282 "label '%s': partition would be less than the minimum "
283 "device size (64M)"), path);
284 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
285 }
286
287 /*
288 * Why we use V_USR: V_BACKUP confuses users, and is considered
289 * disposable by some EFI utilities (since EFI doesn't have a backup
290 * slice). V_UNASSIGNED is supposed to be used only for zero size
291 * partitions, and efi_write() will fail if we use it.
292 * Other available types were all pretty specific.
293 * V_USR is as close to reality as we
294 * can get, in the absence of V_OTHER.
295 */
296 vtoc->efi_parts[0].p_tag = V_USR;
297 zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
298
299 vtoc->efi_parts[8].p_start = slice_size + start_block;
300 vtoc->efi_parts[8].p_size = resv;
301 vtoc->efi_parts[8].p_tag = V_RESERVED;
302
303 rval = efi_write(fd, vtoc);
304
305 /* Flush the buffers to disk and invalidate the page cache. */
306 (void) fsync(fd);
307 (void) ioctl(fd, BLKFLSBUF);
308
309 if (rval == 0)
310 rval = efi_rescan(fd);
311
312 /*
313 * Some block drivers (like pcata) may not support EFI GPT labels.
314 * Print out a helpful error message directing the user to manually
315 * label the disk and give a specific slice.
316 */
317 if (rval != 0) {
318 (void) close(fd);
319 efi_free(vtoc);
320
321 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
322 "parted(8) and then provide a specific slice: %d"), rval);
323 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
324 }
325
326 (void) close(fd);
327 efi_free(vtoc);
328
329 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
330 (void) zfs_append_partition(path, MAXPATHLEN);
331
332 /* Wait to udev to signal use the device has settled. */
333 rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
334 if (rval) {
335 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
336 "detect device partitions on '%s': %d"), path, rval);
337 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
338 }
339
340 /* We can't be to paranoid. Read the label back and verify it. */
341 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
342 rval = zpool_label_disk_check(path);
343 if (rval) {
344 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
345 "EFI label on '%s' is damaged. Ensure\nthis device "
346 "is not in use, and is functioning properly: %d"),
347 path, rval);
348 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
349 }
350 return (0);
351 }
352