1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 26 * Copyright 2015 RackTop Systems. 27 * Copyright (c) 2016, Intel Corporation. 28 */ 29 30 #include <errno.h> 31 #include <libintl.h> 32 #include <libgen.h> 33 #include <stddef.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <sys/stat.h> 37 #include <unistd.h> 38 #include <sys/vdev_impl.h> 39 #include <libzfs.h> 40 #include "libzfs_impl.h" 41 #include <libzutil.h> 42 #include <sys/arc_impl.h> 43 44 /* 45 * Returns true if the named pool matches the given GUID. 46 */ 47 static int 48 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid, 49 boolean_t *isactive) 50 { 51 zpool_handle_t *zhp; 52 53 if (zpool_open_silent(hdl, name, &zhp) != 0) 54 return (-1); 55 56 if (zhp == NULL) { 57 *isactive = B_FALSE; 58 return (0); 59 } 60 61 uint64_t theguid = fnvlist_lookup_uint64(zhp->zpool_config, 62 ZPOOL_CONFIG_POOL_GUID); 63 64 zpool_close(zhp); 65 66 *isactive = (theguid == guid); 67 return (0); 68 } 69 70 static nvlist_t * 71 refresh_config(libzfs_handle_t *hdl, nvlist_t *config) 72 { 73 nvlist_t *nvl; 74 zfs_cmd_t zc = {"\0"}; 75 int err, dstbuf_size; 76 77 zcmd_write_conf_nvlist(hdl, &zc, config); 78 79 dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 32); 80 81 zcmd_alloc_dst_nvlist(hdl, &zc, dstbuf_size); 82 83 while ((err = zfs_ioctl(hdl, ZFS_IOC_POOL_TRYIMPORT, 84 &zc)) != 0 && errno == ENOMEM) 85 zcmd_expand_dst_nvlist(hdl, &zc); 86 87 if (err) { 88 zcmd_free_nvlists(&zc); 89 return (NULL); 90 } 91 92 if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) { 93 zcmd_free_nvlists(&zc); 94 return (NULL); 95 } 96 97 zcmd_free_nvlists(&zc); 98 return (nvl); 99 } 100 101 static nvlist_t * 102 refresh_config_libzfs(void *handle, nvlist_t *tryconfig) 103 { 104 return (refresh_config((libzfs_handle_t *)handle, tryconfig)); 105 } 106 107 static int 108 pool_active_libzfs(void *handle, const char *name, uint64_t guid, 109 boolean_t *isactive) 110 { 111 return (pool_active((libzfs_handle_t *)handle, name, guid, isactive)); 112 } 113 114 const pool_config_ops_t libzfs_config_ops = { 115 .pco_refresh_config = refresh_config_libzfs, 116 .pco_pool_active = pool_active_libzfs, 117 }; 118 119 /* 120 * Return the offset of the given label. 121 */ 122 static uint64_t 123 label_offset(uint64_t size, int l) 124 { 125 ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); 126 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 127 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); 128 } 129 130 /* 131 * Given a file descriptor, clear (zero) the label information. This function 132 * is used in the appliance stack as part of the ZFS sysevent module and 133 * to implement the "zpool labelclear" command. 134 */ 135 int 136 zpool_clear_label(int fd) 137 { 138 struct stat64 statbuf; 139 int l; 140 vdev_label_t *label; 141 uint64_t size; 142 boolean_t labels_cleared = B_FALSE, clear_l2arc_header = B_FALSE, 143 header_cleared = B_FALSE; 144 145 if (fstat64_blk(fd, &statbuf) == -1) 146 return (0); 147 148 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); 149 150 if ((label = calloc(1, sizeof (vdev_label_t))) == NULL) 151 return (-1); 152 153 for (l = 0; l < VDEV_LABELS; l++) { 154 uint64_t state, guid, l2cache; 155 nvlist_t *config; 156 157 if (pread64(fd, label, sizeof (vdev_label_t), 158 label_offset(size, l)) != sizeof (vdev_label_t)) { 159 continue; 160 } 161 162 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, 163 sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0) { 164 continue; 165 } 166 167 /* Skip labels which do not have a valid guid. */ 168 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, 169 &guid) != 0 || guid == 0) { 170 nvlist_free(config); 171 continue; 172 } 173 174 /* Skip labels which are not in a known valid state. */ 175 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 176 &state) != 0 || state > POOL_STATE_L2CACHE) { 177 nvlist_free(config); 178 continue; 179 } 180 181 /* If the device is a cache device clear the header. */ 182 if (!clear_l2arc_header) { 183 if (nvlist_lookup_uint64(config, 184 ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 && 185 l2cache == POOL_STATE_L2CACHE) { 186 clear_l2arc_header = B_TRUE; 187 } 188 } 189 190 nvlist_free(config); 191 192 /* 193 * A valid label was found, overwrite this label's nvlist 194 * and uberblocks with zeros on disk. This is done to prevent 195 * system utilities, like blkid, from incorrectly detecting a 196 * partial label. The leading pad space is left untouched. 197 */ 198 memset(label, 0, sizeof (vdev_label_t)); 199 size_t label_size = sizeof (vdev_label_t) - (2 * VDEV_PAD_SIZE); 200 201 if (pwrite64(fd, label, label_size, label_offset(size, l) + 202 (2 * VDEV_PAD_SIZE)) == label_size) 203 labels_cleared = B_TRUE; 204 } 205 206 if (clear_l2arc_header) { 207 _Static_assert(sizeof (*label) >= sizeof (l2arc_dev_hdr_phys_t), 208 "label < l2arc_dev_hdr_phys_t"); 209 memset(label, 0, sizeof (l2arc_dev_hdr_phys_t)); 210 if (pwrite64(fd, label, sizeof (l2arc_dev_hdr_phys_t), 211 VDEV_LABEL_START_SIZE) == sizeof (l2arc_dev_hdr_phys_t)) 212 header_cleared = B_TRUE; 213 } 214 215 free(label); 216 217 if (!labels_cleared || (clear_l2arc_header && !header_cleared)) 218 return (-1); 219 220 return (0); 221 } 222 223 static boolean_t 224 find_guid(nvlist_t *nv, uint64_t guid) 225 { 226 nvlist_t **child; 227 uint_t c, children; 228 229 if (fnvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID) == guid) 230 return (B_TRUE); 231 232 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 233 &child, &children) == 0) { 234 for (c = 0; c < children; c++) 235 if (find_guid(child[c], guid)) 236 return (B_TRUE); 237 } 238 239 return (B_FALSE); 240 } 241 242 typedef struct aux_cbdata { 243 const char *cb_type; 244 uint64_t cb_guid; 245 zpool_handle_t *cb_zhp; 246 } aux_cbdata_t; 247 248 static int 249 find_aux(zpool_handle_t *zhp, void *data) 250 { 251 aux_cbdata_t *cbp = data; 252 nvlist_t **list; 253 uint_t count; 254 255 nvlist_t *nvroot = fnvlist_lookup_nvlist(zhp->zpool_config, 256 ZPOOL_CONFIG_VDEV_TREE); 257 258 if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type, 259 &list, &count) == 0) { 260 for (uint_t i = 0; i < count; i++) { 261 uint64_t guid = fnvlist_lookup_uint64(list[i], 262 ZPOOL_CONFIG_GUID); 263 if (guid == cbp->cb_guid) { 264 cbp->cb_zhp = zhp; 265 return (1); 266 } 267 } 268 } 269 270 zpool_close(zhp); 271 return (0); 272 } 273 274 /* 275 * Determines if the pool is in use. If so, it returns true and the state of 276 * the pool as well as the name of the pool. Name string is allocated and 277 * must be freed by the caller. 278 */ 279 int 280 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, 281 boolean_t *inuse) 282 { 283 nvlist_t *config; 284 const char *name = NULL; 285 boolean_t ret; 286 uint64_t guid = 0, vdev_guid; 287 zpool_handle_t *zhp; 288 nvlist_t *pool_config; 289 uint64_t stateval, isspare; 290 aux_cbdata_t cb = { 0 }; 291 boolean_t isactive; 292 293 *inuse = B_FALSE; 294 295 if (zpool_read_label(fd, &config, NULL) != 0) 296 return (-1); 297 298 if (config == NULL) 299 return (0); 300 301 stateval = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE); 302 vdev_guid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID); 303 304 if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) { 305 name = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME); 306 guid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID); 307 } 308 309 switch (stateval) { 310 case POOL_STATE_EXPORTED: 311 /* 312 * A pool with an exported state may in fact be imported 313 * read-only, so check the in-core state to see if it's 314 * active and imported read-only. If it is, set 315 * its state to active. 316 */ 317 if (pool_active(hdl, name, guid, &isactive) == 0 && isactive && 318 (zhp = zpool_open_canfail(hdl, name)) != NULL) { 319 if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL)) 320 stateval = POOL_STATE_ACTIVE; 321 322 /* 323 * All we needed the zpool handle for is the 324 * readonly prop check. 325 */ 326 zpool_close(zhp); 327 } 328 329 ret = B_TRUE; 330 break; 331 332 case POOL_STATE_ACTIVE: 333 /* 334 * For an active pool, we have to determine if it's really part 335 * of a currently active pool (in which case the pool will exist 336 * and the guid will be the same), or whether it's part of an 337 * active pool that was disconnected without being explicitly 338 * exported. 339 */ 340 if (pool_active(hdl, name, guid, &isactive) != 0) { 341 nvlist_free(config); 342 return (-1); 343 } 344 345 if (isactive) { 346 /* 347 * Because the device may have been removed while 348 * offlined, we only report it as active if the vdev is 349 * still present in the config. Otherwise, pretend like 350 * it's not in use. 351 */ 352 if ((zhp = zpool_open_canfail(hdl, name)) != NULL && 353 (pool_config = zpool_get_config(zhp, NULL)) 354 != NULL) { 355 nvlist_t *nvroot = fnvlist_lookup_nvlist( 356 pool_config, ZPOOL_CONFIG_VDEV_TREE); 357 ret = find_guid(nvroot, vdev_guid); 358 } else { 359 ret = B_FALSE; 360 } 361 362 /* 363 * If this is an active spare within another pool, we 364 * treat it like an unused hot spare. This allows the 365 * user to create a pool with a hot spare that currently 366 * in use within another pool. Since we return B_TRUE, 367 * libdiskmgt will continue to prevent generic consumers 368 * from using the device. 369 */ 370 if (ret && nvlist_lookup_uint64(config, 371 ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) 372 stateval = POOL_STATE_SPARE; 373 374 if (zhp != NULL) 375 zpool_close(zhp); 376 } else { 377 stateval = POOL_STATE_POTENTIALLY_ACTIVE; 378 ret = B_TRUE; 379 } 380 break; 381 382 case POOL_STATE_SPARE: 383 /* 384 * For a hot spare, it can be either definitively in use, or 385 * potentially active. To determine if it's in use, we iterate 386 * over all pools in the system and search for one with a spare 387 * with a matching guid. 388 * 389 * Due to the shared nature of spares, we don't actually report 390 * the potentially active case as in use. This means the user 391 * can freely create pools on the hot spares of exported pools, 392 * but to do otherwise makes the resulting code complicated, and 393 * we end up having to deal with this case anyway. 394 */ 395 cb.cb_zhp = NULL; 396 cb.cb_guid = vdev_guid; 397 cb.cb_type = ZPOOL_CONFIG_SPARES; 398 if (zpool_iter(hdl, find_aux, &cb) == 1) { 399 name = (char *)zpool_get_name(cb.cb_zhp); 400 ret = B_TRUE; 401 } else { 402 ret = B_FALSE; 403 } 404 break; 405 406 case POOL_STATE_L2CACHE: 407 408 /* 409 * Check if any pool is currently using this l2cache device. 410 */ 411 cb.cb_zhp = NULL; 412 cb.cb_guid = vdev_guid; 413 cb.cb_type = ZPOOL_CONFIG_L2CACHE; 414 if (zpool_iter(hdl, find_aux, &cb) == 1) { 415 name = (char *)zpool_get_name(cb.cb_zhp); 416 ret = B_TRUE; 417 } else { 418 ret = B_FALSE; 419 } 420 break; 421 422 default: 423 ret = B_FALSE; 424 } 425 426 427 if (ret) { 428 *namestr = zfs_strdup(hdl, name); 429 *state = (pool_state_t)stateval; 430 } 431 432 if (cb.cb_zhp) 433 zpool_close(cb.cb_zhp); 434 435 nvlist_free(config); 436 *inuse = ret; 437 return (0); 438 } 439