xref: /freebsd/sys/contrib/openzfs/lib/libzfs/libzfs_import.c (revision da5137abdf463bb5fee85061958a14dd12bc043e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25  * Copyright 2015 RackTop Systems.
26  * Copyright (c) 2016, Intel Corporation.
27  */
28 
29 #include <errno.h>
30 #include <libintl.h>
31 #include <libgen.h>
32 #include <stddef.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <sys/vdev_impl.h>
38 #include <libzfs.h>
39 #include "libzfs_impl.h"
40 #include <libzutil.h>
41 #include <sys/arc_impl.h>
42 
43 /*
44  * Returns true if the named pool matches the given GUID.
45  */
46 static int
47 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
48     boolean_t *isactive)
49 {
50 	zpool_handle_t *zhp;
51 
52 	if (zpool_open_silent(hdl, name, &zhp) != 0)
53 		return (-1);
54 
55 	if (zhp == NULL) {
56 		*isactive = B_FALSE;
57 		return (0);
58 	}
59 
60 	uint64_t theguid = fnvlist_lookup_uint64(zhp->zpool_config,
61 	    ZPOOL_CONFIG_POOL_GUID);
62 
63 	zpool_close(zhp);
64 
65 	*isactive = (theguid == guid);
66 	return (0);
67 }
68 
69 static nvlist_t *
70 refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
71 {
72 	nvlist_t *nvl;
73 	zfs_cmd_t zc = {"\0"};
74 	int err, dstbuf_size;
75 
76 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
77 		return (NULL);
78 
79 	dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 32);
80 
81 	if (zcmd_alloc_dst_nvlist(hdl, &zc, dstbuf_size) != 0) {
82 		zcmd_free_nvlists(&zc);
83 		return (NULL);
84 	}
85 
86 	while ((err = zfs_ioctl(hdl, ZFS_IOC_POOL_TRYIMPORT,
87 	    &zc)) != 0 && errno == ENOMEM) {
88 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
89 			zcmd_free_nvlists(&zc);
90 			return (NULL);
91 		}
92 	}
93 
94 	if (err) {
95 		zcmd_free_nvlists(&zc);
96 		return (NULL);
97 	}
98 
99 	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
100 		zcmd_free_nvlists(&zc);
101 		return (NULL);
102 	}
103 
104 	zcmd_free_nvlists(&zc);
105 	return (nvl);
106 }
107 
108 static nvlist_t *
109 refresh_config_libzfs(void *handle, nvlist_t *tryconfig)
110 {
111 	return (refresh_config((libzfs_handle_t *)handle, tryconfig));
112 }
113 
114 static int
115 pool_active_libzfs(void *handle, const char *name, uint64_t guid,
116     boolean_t *isactive)
117 {
118 	return (pool_active((libzfs_handle_t *)handle, name, guid, isactive));
119 }
120 
121 const pool_config_ops_t libzfs_config_ops = {
122 	.pco_refresh_config = refresh_config_libzfs,
123 	.pco_pool_active = pool_active_libzfs,
124 };
125 
126 /*
127  * Return the offset of the given label.
128  */
129 static uint64_t
130 label_offset(uint64_t size, int l)
131 {
132 	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
133 	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
134 	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
135 }
136 
137 /*
138  * Given a file descriptor, clear (zero) the label information.  This function
139  * is used in the appliance stack as part of the ZFS sysevent module and
140  * to implement the "zpool labelclear" command.
141  */
142 int
143 zpool_clear_label(int fd)
144 {
145 	struct stat64 statbuf;
146 	int l;
147 	vdev_label_t *label;
148 	l2arc_dev_hdr_phys_t *l2dhdr;
149 	uint64_t size;
150 	int labels_cleared = 0, header_cleared = 0;
151 	boolean_t clear_l2arc_header = B_FALSE;
152 
153 	if (fstat64_blk(fd, &statbuf) == -1)
154 		return (0);
155 
156 	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
157 
158 	if ((label = calloc(1, sizeof (vdev_label_t))) == NULL)
159 		return (-1);
160 
161 	if ((l2dhdr = calloc(1, sizeof (l2arc_dev_hdr_phys_t))) == NULL) {
162 		free(label);
163 		return (-1);
164 	}
165 
166 	for (l = 0; l < VDEV_LABELS; l++) {
167 		uint64_t state, guid, l2cache;
168 		nvlist_t *config;
169 
170 		if (pread64(fd, label, sizeof (vdev_label_t),
171 		    label_offset(size, l)) != sizeof (vdev_label_t)) {
172 			continue;
173 		}
174 
175 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
176 		    sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0) {
177 			continue;
178 		}
179 
180 		/* Skip labels which do not have a valid guid. */
181 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
182 		    &guid) != 0 || guid == 0) {
183 			nvlist_free(config);
184 			continue;
185 		}
186 
187 		/* Skip labels which are not in a known valid state. */
188 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
189 		    &state) != 0 || state > POOL_STATE_L2CACHE) {
190 			nvlist_free(config);
191 			continue;
192 		}
193 
194 		/* If the device is a cache device clear the header. */
195 		if (!clear_l2arc_header) {
196 			if (nvlist_lookup_uint64(config,
197 			    ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 &&
198 			    l2cache == POOL_STATE_L2CACHE) {
199 				clear_l2arc_header = B_TRUE;
200 			}
201 		}
202 
203 		nvlist_free(config);
204 
205 		/*
206 		 * A valid label was found, overwrite this label's nvlist
207 		 * and uberblocks with zeros on disk.  This is done to prevent
208 		 * system utilities, like blkid, from incorrectly detecting a
209 		 * partial label.  The leading pad space is left untouched.
210 		 */
211 		memset(label, 0, sizeof (vdev_label_t));
212 		size_t label_size = sizeof (vdev_label_t) - (2 * VDEV_PAD_SIZE);
213 
214 		if (pwrite64(fd, label, label_size, label_offset(size, l) +
215 		    (2 * VDEV_PAD_SIZE)) == label_size) {
216 			labels_cleared++;
217 		}
218 	}
219 
220 	/* Clear the L2ARC header. */
221 	if (clear_l2arc_header) {
222 		memset(l2dhdr, 0, sizeof (l2arc_dev_hdr_phys_t));
223 		if (pwrite64(fd, l2dhdr, sizeof (l2arc_dev_hdr_phys_t),
224 		    VDEV_LABEL_START_SIZE) == sizeof (l2arc_dev_hdr_phys_t)) {
225 			header_cleared++;
226 		}
227 	}
228 
229 	free(label);
230 	free(l2dhdr);
231 
232 	if (labels_cleared == 0)
233 		return (-1);
234 
235 	return (0);
236 }
237 
238 static boolean_t
239 find_guid(nvlist_t *nv, uint64_t guid)
240 {
241 	nvlist_t **child;
242 	uint_t c, children;
243 
244 	if (fnvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID) == guid)
245 		return (B_TRUE);
246 
247 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
248 	    &child, &children) == 0) {
249 		for (c = 0; c < children; c++)
250 			if (find_guid(child[c], guid))
251 				return (B_TRUE);
252 	}
253 
254 	return (B_FALSE);
255 }
256 
257 typedef struct aux_cbdata {
258 	const char	*cb_type;
259 	uint64_t	cb_guid;
260 	zpool_handle_t	*cb_zhp;
261 } aux_cbdata_t;
262 
263 static int
264 find_aux(zpool_handle_t *zhp, void *data)
265 {
266 	aux_cbdata_t *cbp = data;
267 	nvlist_t **list;
268 	uint_t count;
269 
270 	nvlist_t *nvroot = fnvlist_lookup_nvlist(zhp->zpool_config,
271 	    ZPOOL_CONFIG_VDEV_TREE);
272 
273 	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
274 	    &list, &count) == 0) {
275 		for (uint_t i = 0; i < count; i++) {
276 			uint64_t guid = fnvlist_lookup_uint64(list[i],
277 			    ZPOOL_CONFIG_GUID);
278 			if (guid == cbp->cb_guid) {
279 				cbp->cb_zhp = zhp;
280 				return (1);
281 			}
282 		}
283 	}
284 
285 	zpool_close(zhp);
286 	return (0);
287 }
288 
289 /*
290  * Determines if the pool is in use.  If so, it returns true and the state of
291  * the pool as well as the name of the pool.  Name string is allocated and
292  * must be freed by the caller.
293  */
294 int
295 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
296     boolean_t *inuse)
297 {
298 	nvlist_t *config;
299 	char *name = NULL;
300 	boolean_t ret;
301 	uint64_t guid = 0, vdev_guid;
302 	zpool_handle_t *zhp;
303 	nvlist_t *pool_config;
304 	uint64_t stateval, isspare;
305 	aux_cbdata_t cb = { 0 };
306 	boolean_t isactive;
307 
308 	*inuse = B_FALSE;
309 
310 	if (zpool_read_label(fd, &config, NULL) != 0) {
311 		(void) no_memory(hdl);
312 		return (-1);
313 	}
314 
315 	if (config == NULL)
316 		return (0);
317 
318 	stateval = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE);
319 	vdev_guid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID);
320 
321 	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
322 		name = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME);
323 		guid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID);
324 	}
325 
326 	switch (stateval) {
327 	case POOL_STATE_EXPORTED:
328 		/*
329 		 * A pool with an exported state may in fact be imported
330 		 * read-only, so check the in-core state to see if it's
331 		 * active and imported read-only.  If it is, set
332 		 * its state to active.
333 		 */
334 		if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
335 		    (zhp = zpool_open_canfail(hdl, name)) != NULL) {
336 			if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
337 				stateval = POOL_STATE_ACTIVE;
338 
339 			/*
340 			 * All we needed the zpool handle for is the
341 			 * readonly prop check.
342 			 */
343 			zpool_close(zhp);
344 		}
345 
346 		ret = B_TRUE;
347 		break;
348 
349 	case POOL_STATE_ACTIVE:
350 		/*
351 		 * For an active pool, we have to determine if it's really part
352 		 * of a currently active pool (in which case the pool will exist
353 		 * and the guid will be the same), or whether it's part of an
354 		 * active pool that was disconnected without being explicitly
355 		 * exported.
356 		 */
357 		if (pool_active(hdl, name, guid, &isactive) != 0) {
358 			nvlist_free(config);
359 			return (-1);
360 		}
361 
362 		if (isactive) {
363 			/*
364 			 * Because the device may have been removed while
365 			 * offlined, we only report it as active if the vdev is
366 			 * still present in the config.  Otherwise, pretend like
367 			 * it's not in use.
368 			 */
369 			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
370 			    (pool_config = zpool_get_config(zhp, NULL))
371 			    != NULL) {
372 				nvlist_t *nvroot = fnvlist_lookup_nvlist(
373 				    pool_config, ZPOOL_CONFIG_VDEV_TREE);
374 				ret = find_guid(nvroot, vdev_guid);
375 			} else {
376 				ret = B_FALSE;
377 			}
378 
379 			/*
380 			 * If this is an active spare within another pool, we
381 			 * treat it like an unused hot spare.  This allows the
382 			 * user to create a pool with a hot spare that currently
383 			 * in use within another pool.  Since we return B_TRUE,
384 			 * libdiskmgt will continue to prevent generic consumers
385 			 * from using the device.
386 			 */
387 			if (ret && nvlist_lookup_uint64(config,
388 			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
389 				stateval = POOL_STATE_SPARE;
390 
391 			if (zhp != NULL)
392 				zpool_close(zhp);
393 		} else {
394 			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
395 			ret = B_TRUE;
396 		}
397 		break;
398 
399 	case POOL_STATE_SPARE:
400 		/*
401 		 * For a hot spare, it can be either definitively in use, or
402 		 * potentially active.  To determine if it's in use, we iterate
403 		 * over all pools in the system and search for one with a spare
404 		 * with a matching guid.
405 		 *
406 		 * Due to the shared nature of spares, we don't actually report
407 		 * the potentially active case as in use.  This means the user
408 		 * can freely create pools on the hot spares of exported pools,
409 		 * but to do otherwise makes the resulting code complicated, and
410 		 * we end up having to deal with this case anyway.
411 		 */
412 		cb.cb_zhp = NULL;
413 		cb.cb_guid = vdev_guid;
414 		cb.cb_type = ZPOOL_CONFIG_SPARES;
415 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
416 			name = (char *)zpool_get_name(cb.cb_zhp);
417 			ret = B_TRUE;
418 		} else {
419 			ret = B_FALSE;
420 		}
421 		break;
422 
423 	case POOL_STATE_L2CACHE:
424 
425 		/*
426 		 * Check if any pool is currently using this l2cache device.
427 		 */
428 		cb.cb_zhp = NULL;
429 		cb.cb_guid = vdev_guid;
430 		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
431 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
432 			name = (char *)zpool_get_name(cb.cb_zhp);
433 			ret = B_TRUE;
434 		} else {
435 			ret = B_FALSE;
436 		}
437 		break;
438 
439 	default:
440 		ret = B_FALSE;
441 	}
442 
443 
444 	if (ret) {
445 		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
446 			if (cb.cb_zhp)
447 				zpool_close(cb.cb_zhp);
448 			nvlist_free(config);
449 			return (-1);
450 		}
451 		*state = (pool_state_t)stateval;
452 	}
453 
454 	if (cb.cb_zhp)
455 		zpool_close(cb.cb_zhp);
456 
457 	nvlist_free(config);
458 	*inuse = ret;
459 	return (0);
460 }
461