xref: /freebsd/sys/contrib/openzfs/lib/libzfs/libzfs_import.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
26  * Copyright 2015 RackTop Systems.
27  * Copyright (c) 2016, Intel Corporation.
28  */
29 
30 #include <errno.h>
31 #include <libintl.h>
32 #include <libgen.h>
33 #include <stddef.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <sys/stat.h>
37 #include <unistd.h>
38 #include <sys/vdev_impl.h>
39 #include <libzfs.h>
40 #include "libzfs_impl.h"
41 #include <libzutil.h>
42 #include <sys/arc_impl.h>
43 
44 /*
45  * Returns true if the named pool matches the given GUID.
46  */
47 static int
pool_active(libzfs_handle_t * hdl,const char * name,uint64_t guid,boolean_t * isactive)48 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
49     boolean_t *isactive)
50 {
51 	zpool_handle_t *zhp;
52 
53 	if (zpool_open_silent(hdl, name, &zhp) != 0)
54 		return (-1);
55 
56 	if (zhp == NULL) {
57 		*isactive = B_FALSE;
58 		return (0);
59 	}
60 
61 	uint64_t theguid = fnvlist_lookup_uint64(zhp->zpool_config,
62 	    ZPOOL_CONFIG_POOL_GUID);
63 
64 	zpool_close(zhp);
65 
66 	*isactive = (theguid == guid);
67 	return (0);
68 }
69 
70 static nvlist_t *
refresh_config(libzfs_handle_t * hdl,nvlist_t * config)71 refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
72 {
73 	nvlist_t *nvl;
74 	zfs_cmd_t zc = {"\0"};
75 	int err, dstbuf_size;
76 
77 	zcmd_write_conf_nvlist(hdl, &zc, config);
78 
79 	dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 32);
80 
81 	zcmd_alloc_dst_nvlist(hdl, &zc, dstbuf_size);
82 
83 	while ((err = zfs_ioctl(hdl, ZFS_IOC_POOL_TRYIMPORT,
84 	    &zc)) != 0 && errno == ENOMEM)
85 		zcmd_expand_dst_nvlist(hdl, &zc);
86 
87 	if (err) {
88 		zcmd_free_nvlists(&zc);
89 		return (NULL);
90 	}
91 
92 	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
93 		zcmd_free_nvlists(&zc);
94 		return (NULL);
95 	}
96 
97 	zcmd_free_nvlists(&zc);
98 	return (nvl);
99 }
100 
101 static nvlist_t *
refresh_config_libzfs(void * handle,nvlist_t * tryconfig)102 refresh_config_libzfs(void *handle, nvlist_t *tryconfig)
103 {
104 	return (refresh_config((libzfs_handle_t *)handle, tryconfig));
105 }
106 
107 static int
pool_active_libzfs(void * handle,const char * name,uint64_t guid,boolean_t * isactive)108 pool_active_libzfs(void *handle, const char *name, uint64_t guid,
109     boolean_t *isactive)
110 {
111 	return (pool_active((libzfs_handle_t *)handle, name, guid, isactive));
112 }
113 
114 const pool_config_ops_t libzfs_config_ops = {
115 	.pco_refresh_config = refresh_config_libzfs,
116 	.pco_pool_active = pool_active_libzfs,
117 };
118 
119 /*
120  * Return the offset of the given label.
121  */
122 static uint64_t
label_offset(uint64_t size,int l)123 label_offset(uint64_t size, int l)
124 {
125 	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
126 	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
127 	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
128 }
129 
130 /*
131  * Given a file descriptor, clear (zero) the label information.  This function
132  * is used in the appliance stack as part of the ZFS sysevent module and
133  * to implement the "zpool labelclear" command.
134  */
135 int
zpool_clear_label(int fd)136 zpool_clear_label(int fd)
137 {
138 	struct stat64 statbuf;
139 	int l;
140 	vdev_label_t *label;
141 	uint64_t size;
142 	boolean_t labels_cleared = B_FALSE, clear_l2arc_header = B_FALSE,
143 	    header_cleared = B_FALSE;
144 
145 	if (fstat64_blk(fd, &statbuf) == -1)
146 		return (0);
147 
148 	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
149 
150 	if ((label = calloc(1, sizeof (vdev_label_t))) == NULL)
151 		return (-1);
152 
153 	for (l = 0; l < VDEV_LABELS; l++) {
154 		uint64_t state, guid, l2cache;
155 		nvlist_t *config;
156 
157 		if (pread64(fd, label, sizeof (vdev_label_t),
158 		    label_offset(size, l)) != sizeof (vdev_label_t)) {
159 			continue;
160 		}
161 
162 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
163 		    sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0) {
164 			continue;
165 		}
166 
167 		/* Skip labels which do not have a valid guid. */
168 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
169 		    &guid) != 0 || guid == 0) {
170 			nvlist_free(config);
171 			continue;
172 		}
173 
174 		/* Skip labels which are not in a known valid state. */
175 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
176 		    &state) != 0 || state > POOL_STATE_L2CACHE) {
177 			nvlist_free(config);
178 			continue;
179 		}
180 
181 		/* If the device is a cache device clear the header. */
182 		if (!clear_l2arc_header) {
183 			if (nvlist_lookup_uint64(config,
184 			    ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 &&
185 			    l2cache == POOL_STATE_L2CACHE) {
186 				clear_l2arc_header = B_TRUE;
187 			}
188 		}
189 
190 		nvlist_free(config);
191 
192 		/*
193 		 * A valid label was found, overwrite this label's nvlist
194 		 * and uberblocks with zeros on disk.  This is done to prevent
195 		 * system utilities, like blkid, from incorrectly detecting a
196 		 * partial label.  The leading pad space is left untouched.
197 		 */
198 		memset(label, 0, sizeof (vdev_label_t));
199 		size_t label_size = sizeof (vdev_label_t) - (2 * VDEV_PAD_SIZE);
200 
201 		if (pwrite64(fd, label, label_size, label_offset(size, l) +
202 		    (2 * VDEV_PAD_SIZE)) == label_size)
203 			labels_cleared = B_TRUE;
204 	}
205 
206 	if (clear_l2arc_header) {
207 		_Static_assert(sizeof (*label) >= sizeof (l2arc_dev_hdr_phys_t),
208 		    "label < l2arc_dev_hdr_phys_t");
209 		memset(label, 0, sizeof (l2arc_dev_hdr_phys_t));
210 		if (pwrite64(fd, label, sizeof (l2arc_dev_hdr_phys_t),
211 		    VDEV_LABEL_START_SIZE) == sizeof (l2arc_dev_hdr_phys_t))
212 			header_cleared = B_TRUE;
213 	}
214 
215 	free(label);
216 
217 	if (!labels_cleared || (clear_l2arc_header && !header_cleared))
218 		return (-1);
219 
220 	return (0);
221 }
222 
223 static boolean_t
find_guid(nvlist_t * nv,uint64_t guid)224 find_guid(nvlist_t *nv, uint64_t guid)
225 {
226 	nvlist_t **child;
227 	uint_t c, children;
228 
229 	if (fnvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID) == guid)
230 		return (B_TRUE);
231 
232 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
233 	    &child, &children) == 0) {
234 		for (c = 0; c < children; c++)
235 			if (find_guid(child[c], guid))
236 				return (B_TRUE);
237 	}
238 
239 	return (B_FALSE);
240 }
241 
242 typedef struct aux_cbdata {
243 	const char	*cb_type;
244 	uint64_t	cb_guid;
245 	zpool_handle_t	*cb_zhp;
246 } aux_cbdata_t;
247 
248 static int
find_aux(zpool_handle_t * zhp,void * data)249 find_aux(zpool_handle_t *zhp, void *data)
250 {
251 	aux_cbdata_t *cbp = data;
252 	nvlist_t **list;
253 	uint_t count;
254 
255 	nvlist_t *nvroot = fnvlist_lookup_nvlist(zhp->zpool_config,
256 	    ZPOOL_CONFIG_VDEV_TREE);
257 
258 	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
259 	    &list, &count) == 0) {
260 		for (uint_t i = 0; i < count; i++) {
261 			uint64_t guid = fnvlist_lookup_uint64(list[i],
262 			    ZPOOL_CONFIG_GUID);
263 			if (guid == cbp->cb_guid) {
264 				cbp->cb_zhp = zhp;
265 				return (1);
266 			}
267 		}
268 	}
269 
270 	zpool_close(zhp);
271 	return (0);
272 }
273 
274 /*
275  * Determines if the pool is in use.  If so, it returns true and the state of
276  * the pool as well as the name of the pool.  Name string is allocated and
277  * must be freed by the caller.
278  */
279 int
zpool_in_use(libzfs_handle_t * hdl,int fd,pool_state_t * state,char ** namestr,boolean_t * inuse)280 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
281     boolean_t *inuse)
282 {
283 	nvlist_t *config;
284 	const char *name = NULL;
285 	boolean_t ret;
286 	uint64_t guid = 0, vdev_guid;
287 	zpool_handle_t *zhp;
288 	nvlist_t *pool_config;
289 	uint64_t stateval, isspare;
290 	aux_cbdata_t cb = { 0 };
291 	boolean_t isactive;
292 
293 	*inuse = B_FALSE;
294 
295 	if (zpool_read_label(fd, &config, NULL) != 0)
296 		return (-1);
297 
298 	if (config == NULL)
299 		return (0);
300 
301 	stateval = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE);
302 	vdev_guid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID);
303 
304 	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
305 		name = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME);
306 		guid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID);
307 	}
308 
309 	switch (stateval) {
310 	case POOL_STATE_EXPORTED:
311 		/*
312 		 * A pool with an exported state may in fact be imported
313 		 * read-only, so check the in-core state to see if it's
314 		 * active and imported read-only.  If it is, set
315 		 * its state to active.
316 		 */
317 		if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
318 		    (zhp = zpool_open_canfail(hdl, name)) != NULL) {
319 			if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
320 				stateval = POOL_STATE_ACTIVE;
321 
322 			/*
323 			 * All we needed the zpool handle for is the
324 			 * readonly prop check.
325 			 */
326 			zpool_close(zhp);
327 		}
328 
329 		ret = B_TRUE;
330 		break;
331 
332 	case POOL_STATE_ACTIVE:
333 		/*
334 		 * For an active pool, we have to determine if it's really part
335 		 * of a currently active pool (in which case the pool will exist
336 		 * and the guid will be the same), or whether it's part of an
337 		 * active pool that was disconnected without being explicitly
338 		 * exported.
339 		 */
340 		if (pool_active(hdl, name, guid, &isactive) != 0) {
341 			nvlist_free(config);
342 			return (-1);
343 		}
344 
345 		if (isactive) {
346 			/*
347 			 * Because the device may have been removed while
348 			 * offlined, we only report it as active if the vdev is
349 			 * still present in the config.  Otherwise, pretend like
350 			 * it's not in use.
351 			 */
352 			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
353 			    (pool_config = zpool_get_config(zhp, NULL))
354 			    != NULL) {
355 				nvlist_t *nvroot = fnvlist_lookup_nvlist(
356 				    pool_config, ZPOOL_CONFIG_VDEV_TREE);
357 				ret = find_guid(nvroot, vdev_guid);
358 			} else {
359 				ret = B_FALSE;
360 			}
361 
362 			/*
363 			 * If this is an active spare within another pool, we
364 			 * treat it like an unused hot spare.  This allows the
365 			 * user to create a pool with a hot spare that currently
366 			 * in use within another pool.  Since we return B_TRUE,
367 			 * libdiskmgt will continue to prevent generic consumers
368 			 * from using the device.
369 			 */
370 			if (ret && nvlist_lookup_uint64(config,
371 			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
372 				stateval = POOL_STATE_SPARE;
373 
374 			if (zhp != NULL)
375 				zpool_close(zhp);
376 		} else {
377 			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
378 			ret = B_TRUE;
379 		}
380 		break;
381 
382 	case POOL_STATE_SPARE:
383 		/*
384 		 * For a hot spare, it can be either definitively in use, or
385 		 * potentially active.  To determine if it's in use, we iterate
386 		 * over all pools in the system and search for one with a spare
387 		 * with a matching guid.
388 		 *
389 		 * Due to the shared nature of spares, we don't actually report
390 		 * the potentially active case as in use.  This means the user
391 		 * can freely create pools on the hot spares of exported pools,
392 		 * but to do otherwise makes the resulting code complicated, and
393 		 * we end up having to deal with this case anyway.
394 		 */
395 		cb.cb_zhp = NULL;
396 		cb.cb_guid = vdev_guid;
397 		cb.cb_type = ZPOOL_CONFIG_SPARES;
398 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
399 			name = (char *)zpool_get_name(cb.cb_zhp);
400 			ret = B_TRUE;
401 		} else {
402 			ret = B_FALSE;
403 		}
404 		break;
405 
406 	case POOL_STATE_L2CACHE:
407 
408 		/*
409 		 * Check if any pool is currently using this l2cache device.
410 		 */
411 		cb.cb_zhp = NULL;
412 		cb.cb_guid = vdev_guid;
413 		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
414 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
415 			name = (char *)zpool_get_name(cb.cb_zhp);
416 			ret = B_TRUE;
417 		} else {
418 			ret = B_FALSE;
419 		}
420 		break;
421 
422 	default:
423 		ret = B_FALSE;
424 	}
425 
426 
427 	if (ret) {
428 		*namestr = zfs_strdup(hdl, name);
429 		*state = (pool_state_t)stateval;
430 	}
431 
432 	if (cb.cb_zhp)
433 		zpool_close(cb.cb_zhp);
434 
435 	nvlist_free(config);
436 	*inuse = ret;
437 	return (0);
438 }
439