xref: /illumos-gate/usr/src/lib/libzfs/common/libzfs_import.c (revision 13b136d3061155363c62c9f6568d25b8b27da8f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
25  * Copyright 2015 RackTop Systems.
26  * Copyright 2017 Nexenta Systems, Inc.
27  */
28 
29 /*
30  * Pool import support functions.
31  *
32  * To import a pool, we rely on reading the configuration information from the
33  * ZFS label of each device.  If we successfully read the label, then we
34  * organize the configuration information in the following hierarchy:
35  *
36  *	pool guid -> toplevel vdev guid -> label txg
37  *
38  * Duplicate entries matching this same tuple will be discarded.  Once we have
39  * examined every device, we pick the best label txg config for each toplevel
40  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
41  * update any paths that have changed.  Finally, we attempt to import the pool
42  * using our derived config, and record the results.
43  */
44 
45 #include <ctype.h>
46 #include <devid.h>
47 #include <dirent.h>
48 #include <errno.h>
49 #include <libintl.h>
50 #include <stddef.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <sys/stat.h>
54 #include <unistd.h>
55 #include <fcntl.h>
56 #include <sys/vtoc.h>
57 #include <sys/dktp/fdisk.h>
58 #include <sys/efi_partition.h>
59 #include <thread_pool.h>
60 
61 #include <sys/vdev_impl.h>
62 #include <libzutil.h>
63 
64 #include "libzfs.h"
65 #include "libzfs_impl.h"
66 
67 /*
68  * Returns true if the named pool matches the given GUID.
69  */
70 static int
71 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
72     boolean_t *isactive)
73 {
74 	zpool_handle_t *zhp;
75 	uint64_t theguid;
76 
77 	if (zpool_open_silent(hdl, name, &zhp) != 0)
78 		return (-1);
79 
80 	if (zhp == NULL) {
81 		*isactive = B_FALSE;
82 		return (0);
83 	}
84 
85 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
86 	    &theguid) == 0);
87 
88 	zpool_close(zhp);
89 
90 	*isactive = (theguid == guid);
91 	return (0);
92 }
93 
94 static nvlist_t *
95 refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
96 {
97 	nvlist_t *nvl;
98 	zfs_cmd_t zc = {"\0"};
99 	int err, dstbuf_size;
100 
101 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
102 		return (NULL);
103 
104 	dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 4);
105 
106 	if (zcmd_alloc_dst_nvlist(hdl, &zc, dstbuf_size) != 0) {
107 		zcmd_free_nvlists(&zc);
108 		return (NULL);
109 	}
110 
111 	while ((err = zfs_ioctl(hdl, ZFS_IOC_POOL_TRYIMPORT,
112 	    &zc)) != 0 && errno == ENOMEM) {
113 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
114 			zcmd_free_nvlists(&zc);
115 			return (NULL);
116 		}
117 	}
118 
119 	if (err) {
120 		zcmd_free_nvlists(&zc);
121 		return (NULL);
122 	}
123 
124 	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
125 		zcmd_free_nvlists(&zc);
126 		return (NULL);
127 	}
128 
129 	zcmd_free_nvlists(&zc);
130 	return (nvl);
131 }
132 
133 static nvlist_t *
134 refresh_config_libzfs(void *handle, nvlist_t *tryconfig)
135 {
136 	return (refresh_config((libzfs_handle_t *)handle, tryconfig));
137 }
138 
139 static int
140 pool_active_libzfs(void *handle, const char *name, uint64_t guid,
141     boolean_t *isactive)
142 {
143 	return (pool_active((libzfs_handle_t *)handle, name, guid, isactive));
144 }
145 
146 const pool_config_ops_t libzfs_config_ops = {
147 	.pco_refresh_config = refresh_config_libzfs,
148 	.pco_pool_active = pool_active_libzfs,
149 };
150 
151 /*
152  * Return the offset of the given label.
153  */
154 static uint64_t
155 label_offset(uint64_t size, int l)
156 {
157 	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
158 	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
159 	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
160 }
161 
162 /*
163  * Given a file descriptor, clear (zero) the label information.
164  */
165 int
166 zpool_clear_label(int fd)
167 {
168 	struct stat64 statbuf;
169 	int l;
170 	vdev_label_t *label;
171 	uint64_t size;
172 
173 	if (fstat64(fd, &statbuf) == -1)
174 		return (0);
175 	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
176 
177 	if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
178 		return (-1);
179 
180 	for (l = 0; l < VDEV_LABELS; l++) {
181 		if (pwrite64(fd, label, sizeof (vdev_label_t),
182 		    label_offset(size, l)) != sizeof (vdev_label_t)) {
183 			free(label);
184 			return (-1);
185 		}
186 	}
187 
188 	free(label);
189 	return (0);
190 }
191 
192 boolean_t
193 find_guid(nvlist_t *nv, uint64_t guid)
194 {
195 	uint64_t tmp;
196 	nvlist_t **child;
197 	uint_t c, children;
198 
199 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
200 	if (tmp == guid)
201 		return (B_TRUE);
202 
203 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
204 	    &child, &children) == 0) {
205 		for (c = 0; c < children; c++)
206 			if (find_guid(child[c], guid))
207 				return (B_TRUE);
208 	}
209 
210 	return (B_FALSE);
211 }
212 
213 typedef struct aux_cbdata {
214 	const char	*cb_type;
215 	uint64_t	cb_guid;
216 	zpool_handle_t	*cb_zhp;
217 } aux_cbdata_t;
218 
219 static int
220 find_aux(zpool_handle_t *zhp, void *data)
221 {
222 	aux_cbdata_t *cbp = data;
223 	nvlist_t **list;
224 	uint_t i, count;
225 	uint64_t guid;
226 	nvlist_t *nvroot;
227 
228 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
229 	    &nvroot) == 0);
230 
231 	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
232 	    &list, &count) == 0) {
233 		for (i = 0; i < count; i++) {
234 			verify(nvlist_lookup_uint64(list[i],
235 			    ZPOOL_CONFIG_GUID, &guid) == 0);
236 			if (guid == cbp->cb_guid) {
237 				cbp->cb_zhp = zhp;
238 				return (1);
239 			}
240 		}
241 	}
242 
243 	zpool_close(zhp);
244 	return (0);
245 }
246 
247 /*
248  * Determines if the pool is in use.  If so, it returns true and the state of
249  * the pool as well as the name of the pool.  Both strings are allocated and
250  * must be freed by the caller.
251  */
252 int
253 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
254     boolean_t *inuse)
255 {
256 	nvlist_t *config;
257 	char *name;
258 	boolean_t ret;
259 	uint64_t guid, vdev_guid;
260 	zpool_handle_t *zhp;
261 	nvlist_t *pool_config;
262 	uint64_t stateval, isspare;
263 	aux_cbdata_t cb = { 0 };
264 	boolean_t isactive;
265 
266 	*inuse = B_FALSE;
267 
268 	if (zpool_read_label(fd, &config, NULL) != 0 && errno == ENOMEM) {
269 		(void) no_memory(hdl);
270 		return (-1);
271 	}
272 
273 	if (config == NULL)
274 		return (0);
275 
276 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
277 	    &stateval) == 0);
278 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
279 	    &vdev_guid) == 0);
280 
281 	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
282 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
283 		    &name) == 0);
284 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
285 		    &guid) == 0);
286 	}
287 
288 	switch (stateval) {
289 	case POOL_STATE_EXPORTED:
290 		/*
291 		 * A pool with an exported state may in fact be imported
292 		 * read-only, so check the in-core state to see if it's
293 		 * active and imported read-only.  If it is, set
294 		 * its state to active.
295 		 */
296 		if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
297 		    (zhp = zpool_open_canfail(hdl, name)) != NULL) {
298 			if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
299 				stateval = POOL_STATE_ACTIVE;
300 
301 			/*
302 			 * All we needed the zpool handle for is the
303 			 * readonly prop check.
304 			 */
305 			zpool_close(zhp);
306 		}
307 
308 		ret = B_TRUE;
309 		break;
310 
311 	case POOL_STATE_ACTIVE:
312 		/*
313 		 * For an active pool, we have to determine if it's really part
314 		 * of a currently active pool (in which case the pool will exist
315 		 * and the guid will be the same), or whether it's part of an
316 		 * active pool that was disconnected without being explicitly
317 		 * exported.
318 		 */
319 		if (pool_active(hdl, name, guid, &isactive) != 0) {
320 			nvlist_free(config);
321 			return (-1);
322 		}
323 
324 		if (isactive) {
325 			/*
326 			 * Because the device may have been removed while
327 			 * offlined, we only report it as active if the vdev is
328 			 * still present in the config.  Otherwise, pretend like
329 			 * it's not in use.
330 			 */
331 			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
332 			    (pool_config = zpool_get_config(zhp, NULL))
333 			    != NULL) {
334 				nvlist_t *nvroot;
335 
336 				verify(nvlist_lookup_nvlist(pool_config,
337 				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
338 				ret = find_guid(nvroot, vdev_guid);
339 			} else {
340 				ret = B_FALSE;
341 			}
342 
343 			/*
344 			 * If this is an active spare within another pool, we
345 			 * treat it like an unused hot spare.  This allows the
346 			 * user to create a pool with a hot spare that currently
347 			 * in use within another pool.  Since we return B_TRUE,
348 			 * libdiskmgt will continue to prevent generic consumers
349 			 * from using the device.
350 			 */
351 			if (ret && nvlist_lookup_uint64(config,
352 			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
353 				stateval = POOL_STATE_SPARE;
354 
355 			if (zhp != NULL)
356 				zpool_close(zhp);
357 		} else {
358 			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
359 			ret = B_TRUE;
360 		}
361 		break;
362 
363 	case POOL_STATE_SPARE:
364 		/*
365 		 * For a hot spare, it can be either definitively in use, or
366 		 * potentially active.  To determine if it's in use, we iterate
367 		 * over all pools in the system and search for one with a spare
368 		 * with a matching guid.
369 		 *
370 		 * Due to the shared nature of spares, we don't actually report
371 		 * the potentially active case as in use.  This means the user
372 		 * can freely create pools on the hot spares of exported pools,
373 		 * but to do otherwise makes the resulting code complicated, and
374 		 * we end up having to deal with this case anyway.
375 		 */
376 		cb.cb_zhp = NULL;
377 		cb.cb_guid = vdev_guid;
378 		cb.cb_type = ZPOOL_CONFIG_SPARES;
379 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
380 			name = (char *)zpool_get_name(cb.cb_zhp);
381 			ret = B_TRUE;
382 		} else {
383 			ret = B_FALSE;
384 		}
385 		break;
386 
387 	case POOL_STATE_L2CACHE:
388 
389 		/*
390 		 * Check if any pool is currently using this l2cache device.
391 		 */
392 		cb.cb_zhp = NULL;
393 		cb.cb_guid = vdev_guid;
394 		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
395 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
396 			name = (char *)zpool_get_name(cb.cb_zhp);
397 			ret = B_TRUE;
398 		} else {
399 			ret = B_FALSE;
400 		}
401 		break;
402 
403 	default:
404 		ret = B_FALSE;
405 	}
406 
407 
408 	if (ret) {
409 		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
410 			if (cb.cb_zhp)
411 				zpool_close(cb.cb_zhp);
412 			nvlist_free(config);
413 			return (-1);
414 		}
415 		*state = (pool_state_t)stateval;
416 	}
417 
418 	if (cb.cb_zhp)
419 		zpool_close(cb.cb_zhp);
420 
421 	nvlist_free(config);
422 	*inuse = ret;
423 	return (0);
424 }
425