1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
26 * Copyright (c) 2016, 2017 Intel Corporation.
27 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
28 */
29
30 /*
31 * Functions to convert between a list of vdevs and an nvlist representing the
32 * configuration. Each entry in the list can be one of:
33 *
34 * Device vdevs
35 * disk=(path=..., devid=...)
36 * file=(path=...)
37 *
38 * Group vdevs
39 * raidz[1|2]=(...)
40 * mirror=(...)
41 *
42 * Hot spares
43 *
44 * While the underlying implementation supports it, group vdevs cannot contain
45 * other group vdevs. All userland verification of devices is contained within
46 * this file. If successful, the nvlist returned can be passed directly to the
47 * kernel; we've done as much verification as possible in userland.
48 *
49 * Hot spares are a special case, and passed down as an array of disk vdevs, at
50 * the same level as the root of the vdev tree.
51 *
52 * The only function exported by this file is 'make_root_vdev'. The
53 * function performs several passes:
54 *
55 * 1. Construct the vdev specification. Performs syntax validation and
56 * makes sure each device is valid.
57 * 2. Check for devices in use. Using libblkid to make sure that no
58 * devices are also in use. Some can be overridden using the 'force'
59 * flag, others cannot.
60 * 3. Check for replication errors if the 'force' flag is not specified.
61 * validates that the replication level is consistent across the
62 * entire pool.
63 * 4. Call libzfs to label any whole disks with an EFI label.
64 */
65
66 #include <assert.h>
67 #include <ctype.h>
68 #include <errno.h>
69 #include <fcntl.h>
70 #include <libintl.h>
71 #include <libnvpair.h>
72 #include <libzutil.h>
73 #include <limits.h>
74 #include <sys/spa.h>
75 #include <stdio.h>
76 #include <string.h>
77 #include <unistd.h>
78 #include "zpool_util.h"
79 #include <sys/zfs_context.h>
80
81 #include <scsi/scsi.h>
82 #include <scsi/sg.h>
83 #include <sys/efi_partition.h>
84 #include <sys/stat.h>
85 #include <sys/mntent.h>
86 #include <uuid/uuid.h>
87 #include <blkid/blkid.h>
88
89 typedef struct vdev_disk_db_entry
90 {
91 /* 24 byte name + 1 byte NULL terminator to make GCC happy */
92 char id[25];
93 int sector_size;
94 } vdev_disk_db_entry_t;
95
96 /*
97 * Database of block devices that lie about physical sector sizes. The
98 * identification string must be precisely 24 characters to avoid false
99 * negatives
100 */
101 static vdev_disk_db_entry_t vdev_disk_database[] = {
102 {"ATA ADATA SSD S396 3", 8192},
103 {"ATA APPLE SSD SM128E", 8192},
104 {"ATA APPLE SSD SM256E", 8192},
105 {"ATA APPLE SSD SM512E", 8192},
106 {"ATA APPLE SSD SM768E", 8192},
107 {"ATA C400-MTFDDAC064M", 8192},
108 {"ATA C400-MTFDDAC128M", 8192},
109 {"ATA C400-MTFDDAC256M", 8192},
110 {"ATA C400-MTFDDAC512M", 8192},
111 {"ATA Corsair Force 3 ", 8192},
112 {"ATA Corsair Force GS", 8192},
113 {"ATA INTEL SSDSA2CT04", 8192},
114 {"ATA INTEL SSDSA2BZ10", 8192},
115 {"ATA INTEL SSDSA2BZ20", 8192},
116 {"ATA INTEL SSDSA2BZ30", 8192},
117 {"ATA INTEL SSDSA2CW04", 8192},
118 {"ATA INTEL SSDSA2CW08", 8192},
119 {"ATA INTEL SSDSA2CW12", 8192},
120 {"ATA INTEL SSDSA2CW16", 8192},
121 {"ATA INTEL SSDSA2CW30", 8192},
122 {"ATA INTEL SSDSA2CW60", 8192},
123 {"ATA INTEL SSDSC2CT06", 8192},
124 {"ATA INTEL SSDSC2CT12", 8192},
125 {"ATA INTEL SSDSC2CT18", 8192},
126 {"ATA INTEL SSDSC2CT24", 8192},
127 {"ATA INTEL SSDSC2CW06", 8192},
128 {"ATA INTEL SSDSC2CW12", 8192},
129 {"ATA INTEL SSDSC2CW18", 8192},
130 {"ATA INTEL SSDSC2CW24", 8192},
131 {"ATA INTEL SSDSC2CW48", 8192},
132 {"ATA KINGSTON SH100S3", 8192},
133 {"ATA KINGSTON SH103S3", 8192},
134 {"ATA M4-CT064M4SSD2 ", 8192},
135 {"ATA M4-CT128M4SSD2 ", 8192},
136 {"ATA M4-CT256M4SSD2 ", 8192},
137 {"ATA M4-CT512M4SSD2 ", 8192},
138 {"ATA OCZ-AGILITY2 ", 8192},
139 {"ATA OCZ-AGILITY3 ", 8192},
140 {"ATA OCZ-VERTEX2 3.5 ", 8192},
141 {"ATA OCZ-VERTEX3 ", 8192},
142 {"ATA OCZ-VERTEX3 LT ", 8192},
143 {"ATA OCZ-VERTEX3 MI ", 8192},
144 {"ATA OCZ-VERTEX4 ", 8192},
145 {"ATA SAMSUNG MZ7WD120", 8192},
146 {"ATA SAMSUNG MZ7WD240", 8192},
147 {"ATA SAMSUNG MZ7WD480", 8192},
148 {"ATA SAMSUNG MZ7WD960", 8192},
149 {"ATA SAMSUNG SSD 830 ", 8192},
150 {"ATA Samsung SSD 840 ", 8192},
151 {"ATA SanDisk SSD U100", 8192},
152 {"ATA TOSHIBA THNSNH06", 8192},
153 {"ATA TOSHIBA THNSNH12", 8192},
154 {"ATA TOSHIBA THNSNH25", 8192},
155 {"ATA TOSHIBA THNSNH51", 8192},
156 {"ATA APPLE SSD TS064C", 4096},
157 {"ATA APPLE SSD TS128C", 4096},
158 {"ATA APPLE SSD TS256C", 4096},
159 {"ATA APPLE SSD TS512C", 4096},
160 {"ATA INTEL SSDSA2M040", 4096},
161 {"ATA INTEL SSDSA2M080", 4096},
162 {"ATA INTEL SSDSA2M160", 4096},
163 {"ATA INTEL SSDSC2MH12", 4096},
164 {"ATA INTEL SSDSC2MH25", 4096},
165 {"ATA OCZ CORE_SSD ", 4096},
166 {"ATA OCZ-VERTEX ", 4096},
167 {"ATA SAMSUNG MCCOE32G", 4096},
168 {"ATA SAMSUNG MCCOE64G", 4096},
169 {"ATA SAMSUNG SSD PM80", 4096},
170 /* Flash drives optimized for 4KB IOs on larger pages */
171 {"ATA INTEL SSDSC2BA10", 4096},
172 {"ATA INTEL SSDSC2BA20", 4096},
173 {"ATA INTEL SSDSC2BA40", 4096},
174 {"ATA INTEL SSDSC2BA80", 4096},
175 {"ATA INTEL SSDSC2BB08", 4096},
176 {"ATA INTEL SSDSC2BB12", 4096},
177 {"ATA INTEL SSDSC2BB16", 4096},
178 {"ATA INTEL SSDSC2BB24", 4096},
179 {"ATA INTEL SSDSC2BB30", 4096},
180 {"ATA INTEL SSDSC2BB40", 4096},
181 {"ATA INTEL SSDSC2BB48", 4096},
182 {"ATA INTEL SSDSC2BB60", 4096},
183 {"ATA INTEL SSDSC2BB80", 4096},
184 {"ATA INTEL SSDSC2BW24", 4096},
185 {"ATA INTEL SSDSC2BW48", 4096},
186 {"ATA INTEL SSDSC2BP24", 4096},
187 {"ATA INTEL SSDSC2BP48", 4096},
188 {"NA SmrtStorSDLKAE9W", 4096},
189 {"NVMe Amazon EC2 NVMe ", 4096},
190 /* Imported from Open Solaris */
191 {"ATA MARVELL SD88SA02", 4096},
192 /* Advanced format Hard drives */
193 {"ATA Hitachi HDS5C303", 4096},
194 {"ATA SAMSUNG HD204UI ", 4096},
195 {"ATA ST2000DL004 HD20", 4096},
196 {"ATA WDC WD10EARS-00M", 4096},
197 {"ATA WDC WD10EARS-00S", 4096},
198 {"ATA WDC WD10EARS-00Z", 4096},
199 {"ATA WDC WD15EARS-00M", 4096},
200 {"ATA WDC WD15EARS-00S", 4096},
201 {"ATA WDC WD15EARS-00Z", 4096},
202 {"ATA WDC WD20EARS-00M", 4096},
203 {"ATA WDC WD20EARS-00S", 4096},
204 {"ATA WDC WD20EARS-00Z", 4096},
205 {"ATA WDC WD1600BEVT-0", 4096},
206 {"ATA WDC WD2500BEVT-0", 4096},
207 {"ATA WDC WD3200BEVT-0", 4096},
208 {"ATA WDC WD5000BEVT-0", 4096},
209 };
210
211
212 #define INQ_REPLY_LEN 96
213 #define INQ_CMD_LEN 6
214
215 static const int vdev_disk_database_size =
216 sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
217
218 boolean_t
check_sector_size_database(char * path,int * sector_size)219 check_sector_size_database(char *path, int *sector_size)
220 {
221 unsigned char inq_buff[INQ_REPLY_LEN];
222 unsigned char sense_buffer[32];
223 unsigned char inq_cmd_blk[INQ_CMD_LEN] =
224 {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
225 sg_io_hdr_t io_hdr;
226 int error;
227 int fd;
228 int i;
229
230 /* Prepare INQUIRY command */
231 memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
232 io_hdr.interface_id = 'S';
233 io_hdr.cmd_len = sizeof (inq_cmd_blk);
234 io_hdr.mx_sb_len = sizeof (sense_buffer);
235 io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
236 io_hdr.dxfer_len = INQ_REPLY_LEN;
237 io_hdr.dxferp = inq_buff;
238 io_hdr.cmdp = inq_cmd_blk;
239 io_hdr.sbp = sense_buffer;
240 io_hdr.timeout = 10; /* 10 milliseconds is ample time */
241
242 if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
243 return (B_FALSE);
244
245 error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
246
247 (void) close(fd);
248
249 if (error < 0)
250 return (B_FALSE);
251
252 if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
253 return (B_FALSE);
254
255 for (i = 0; i < vdev_disk_database_size; i++) {
256 if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
257 continue;
258
259 *sector_size = vdev_disk_database[i].sector_size;
260 return (B_TRUE);
261 }
262
263 return (B_FALSE);
264 }
265
266 static int
check_slice(const char * path,blkid_cache cache,int force,boolean_t isspare)267 check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
268 {
269 int err;
270 char *value;
271
272 /* No valid type detected device is safe to use */
273 value = blkid_get_tag_value(cache, "TYPE", path);
274 if (value == NULL)
275 return (0);
276
277 /*
278 * If libblkid detects a ZFS device, we check the device
279 * using check_file() to see if it's safe. The one safe
280 * case is a spare device shared between multiple pools.
281 */
282 if (strcmp(value, "zfs_member") == 0) {
283 err = check_file(path, force, isspare);
284 } else {
285 if (force) {
286 err = 0;
287 } else {
288 err = -1;
289 vdev_error(gettext("%s contains a filesystem of "
290 "type '%s'\n"), path, value);
291 }
292 }
293
294 free(value);
295
296 return (err);
297 }
298
299 /*
300 * Validate that a disk including all partitions are safe to use.
301 *
302 * For EFI labeled disks this can done relatively easily with the libefi
303 * library. The partition numbers are extracted from the label and used
304 * to generate the expected /dev/ paths. Each partition can then be
305 * checked for conflicts.
306 *
307 * For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
308 * but due to the lack of a readily available libraries this scanning is
309 * not implemented. Instead only the device path as given is checked.
310 */
311 static int
check_disk(const char * path,blkid_cache cache,int force,boolean_t isspare,boolean_t iswholedisk)312 check_disk(const char *path, blkid_cache cache, int force,
313 boolean_t isspare, boolean_t iswholedisk)
314 {
315 struct dk_gpt *vtoc;
316 char slice_path[MAXPATHLEN];
317 int err = 0;
318 int fd, i;
319 int flags = O_RDONLY|O_DIRECT;
320
321 if (!iswholedisk)
322 return (check_slice(path, cache, force, isspare));
323
324 /* only spares can be shared, other devices require exclusive access */
325 if (!isspare)
326 flags |= O_EXCL;
327
328 if ((fd = open(path, flags)) < 0) {
329 char *value = blkid_get_tag_value(cache, "TYPE", path);
330 (void) fprintf(stderr, gettext("%s is in use and contains "
331 "a %s filesystem.\n"), path, value ? value : "unknown");
332 free(value);
333 return (-1);
334 }
335
336 /*
337 * Expected to fail for non-EFI labeled disks. Just check the device
338 * as given and do not attempt to detect and scan partitions.
339 */
340 err = efi_alloc_and_read(fd, &vtoc);
341 if (err) {
342 (void) close(fd);
343 return (check_slice(path, cache, force, isspare));
344 }
345
346 /*
347 * The primary efi partition label is damaged however the secondary
348 * label at the end of the device is intact. Rather than use this
349 * label we should play it safe and treat this as a non efi device.
350 */
351 if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
352 efi_free(vtoc);
353 (void) close(fd);
354
355 if (force) {
356 /* Partitions will now be created using the backup */
357 return (0);
358 } else {
359 vdev_error(gettext("%s contains a corrupt primary "
360 "EFI label.\n"), path);
361 return (-1);
362 }
363 }
364
365 for (i = 0; i < vtoc->efi_nparts; i++) {
366
367 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
368 uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
369 continue;
370
371 if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
372 (void) snprintf(slice_path, sizeof (slice_path),
373 "%s%s%d", path, "-part", i+1);
374 else
375 (void) snprintf(slice_path, sizeof (slice_path),
376 "%s%s%d", path, isdigit(path[strlen(path)-1]) ?
377 "p" : "", i+1);
378
379 err = check_slice(slice_path, cache, force, isspare);
380 if (err)
381 break;
382 }
383
384 efi_free(vtoc);
385 (void) close(fd);
386
387 return (err);
388 }
389
390 int
check_device(const char * path,boolean_t force,boolean_t isspare,boolean_t iswholedisk)391 check_device(const char *path, boolean_t force,
392 boolean_t isspare, boolean_t iswholedisk)
393 {
394 blkid_cache cache;
395 int error;
396
397 error = blkid_get_cache(&cache, NULL);
398 if (error != 0) {
399 (void) fprintf(stderr, gettext("unable to access the blkid "
400 "cache.\n"));
401 return (-1);
402 }
403
404 error = check_disk(path, cache, force, isspare, iswholedisk);
405 blkid_put_cache(cache);
406
407 return (error);
408 }
409
410 void
after_zpool_upgrade(zpool_handle_t * zhp)411 after_zpool_upgrade(zpool_handle_t *zhp)
412 {
413 (void) zhp;
414 }
415
416 int
check_file(const char * file,boolean_t force,boolean_t isspare)417 check_file(const char *file, boolean_t force, boolean_t isspare)
418 {
419 return (check_file_generic(file, force, isspare));
420 }
421
422 /*
423 * Read from a sysfs file and return an allocated string. Removes
424 * the newline from the end of the string if there is one.
425 *
426 * Returns a string on success (which must be freed), or NULL on error.
427 */
zpool_sysfs_gets(char * path)428 static char *zpool_sysfs_gets(char *path)
429 {
430 int fd;
431 struct stat statbuf;
432 char *buf = NULL;
433 ssize_t count = 0;
434 fd = open(path, O_RDONLY);
435 if (fd < 0)
436 return (NULL);
437
438 if (fstat(fd, &statbuf) != 0) {
439 close(fd);
440 return (NULL);
441 }
442
443 buf = calloc(statbuf.st_size + 1, sizeof (*buf));
444 if (buf == NULL) {
445 close(fd);
446 return (NULL);
447 }
448
449 /*
450 * Note, we can read less bytes than st_size, and that's ok. Sysfs
451 * files will report their size is 4k even if they only return a small
452 * string.
453 */
454 count = read(fd, buf, statbuf.st_size);
455 if (count < 0) {
456 /* Error doing read() or we overran the buffer */
457 close(fd);
458 free(buf);
459 return (NULL);
460 }
461
462 /* Remove trailing newline */
463 if (count > 0 && buf[count - 1] == '\n')
464 buf[count - 1] = 0;
465
466 close(fd);
467
468 return (buf);
469 }
470
471 /*
472 * Write a string to a sysfs file.
473 *
474 * Returns 0 on success, non-zero otherwise.
475 */
zpool_sysfs_puts(char * path,char * str)476 static int zpool_sysfs_puts(char *path, char *str)
477 {
478 FILE *file;
479
480 file = fopen(path, "w");
481 if (!file) {
482 return (-1);
483 }
484
485 if (fputs(str, file) < 0) {
486 fclose(file);
487 return (-2);
488 }
489 fclose(file);
490 return (0);
491 }
492
493 /* Given a vdev nvlist_t, rescan its enclosure sysfs path */
494 static void
rescan_vdev_config_dev_sysfs_path(nvlist_t * vdev_nv)495 rescan_vdev_config_dev_sysfs_path(nvlist_t *vdev_nv)
496 {
497 update_vdev_config_dev_sysfs_path(vdev_nv,
498 fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH),
499 ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
500 }
501
502 /*
503 * Given a power string: "on", "off", "1", or "0", return 0 if it's an
504 * off value, 1 if it's an on value, and -1 if the value is unrecognized.
505 */
zpool_power_parse_value(char * str)506 static int zpool_power_parse_value(char *str)
507 {
508 if ((strcmp(str, "off") == 0) || (strcmp(str, "0") == 0))
509 return (0);
510
511 if ((strcmp(str, "on") == 0) || (strcmp(str, "1") == 0))
512 return (1);
513
514 return (-1);
515 }
516
517 /*
518 * Given a vdev string return an allocated string containing the sysfs path to
519 * its power control file. Also do a check if the power control file really
520 * exists and has correct permissions.
521 *
522 * Example returned strings:
523 *
524 * /sys/class/enclosure/0:0:122:0/10/power_status
525 * /sys/bus/pci/slots/10/power
526 *
527 * Returns allocated string on success (which must be freed), NULL on failure.
528 */
529 static char *
zpool_power_sysfs_path(zpool_handle_t * zhp,char * vdev)530 zpool_power_sysfs_path(zpool_handle_t *zhp, char *vdev)
531 {
532 const char *enc_sysfs_dir = NULL;
533 char *path = NULL;
534 nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL);
535
536 if (vdev_nv == NULL) {
537 return (NULL);
538 }
539
540 /* Make sure we're getting the updated enclosure sysfs path */
541 rescan_vdev_config_dev_sysfs_path(vdev_nv);
542
543 if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
544 &enc_sysfs_dir) != 0) {
545 return (NULL);
546 }
547
548 if (asprintf(&path, "%s/power_status", enc_sysfs_dir) == -1)
549 return (NULL);
550
551 if (access(path, W_OK) != 0) {
552 free(path);
553 path = NULL;
554 /* No HDD 'power_control' file, maybe it's NVMe? */
555 if (asprintf(&path, "%s/power", enc_sysfs_dir) == -1) {
556 return (NULL);
557 }
558
559 if (access(path, R_OK | W_OK) != 0) {
560 /* Not NVMe either */
561 free(path);
562 return (NULL);
563 }
564 }
565
566 return (path);
567 }
568
569 /*
570 * Given a path to a sysfs power control file, return B_TRUE if you should use
571 * "on/off" words to control it, or B_FALSE otherwise ("0/1" to control).
572 */
573 static boolean_t
zpool_power_use_word(char * sysfs_path)574 zpool_power_use_word(char *sysfs_path)
575 {
576 if (strcmp(&sysfs_path[strlen(sysfs_path) - strlen("power_status")],
577 "power_status") == 0) {
578 return (B_TRUE);
579 }
580 return (B_FALSE);
581 }
582
583 /*
584 * Check the sysfs power control value for a vdev.
585 *
586 * Returns:
587 * 0 - Power is off
588 * 1 - Power is on
589 * -1 - Error or unsupported
590 */
591 int
zpool_power_current_state(zpool_handle_t * zhp,char * vdev)592 zpool_power_current_state(zpool_handle_t *zhp, char *vdev)
593 {
594 char *val;
595 int rc;
596
597 char *path = zpool_power_sysfs_path(zhp, vdev);
598 if (path == NULL)
599 return (-1);
600
601 val = zpool_sysfs_gets(path);
602 if (val == NULL) {
603 free(path);
604 return (-1);
605 }
606
607 rc = zpool_power_parse_value(val);
608 free(val);
609 free(path);
610 return (rc);
611 }
612
613 /*
614 * Turn on or off the slot to a device
615 *
616 * Device path is the full path to the device (like /dev/sda or /dev/sda1).
617 *
618 * Return code:
619 * 0: Success
620 * ENOTSUP: Power control not supported for OS
621 * EBADSLT: Couldn't read current power state
622 * ENOENT: No sysfs path to power control
623 * EIO: Couldn't write sysfs power value
624 * EBADE: Sysfs power value didn't change
625 */
626 int
zpool_power(zpool_handle_t * zhp,char * vdev,boolean_t turn_on)627 zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on)
628 {
629 char *sysfs_path;
630 const char *val;
631 int rc;
632 int timeout_ms;
633
634 rc = zpool_power_current_state(zhp, vdev);
635 if (rc == -1) {
636 return (EBADSLT);
637 }
638
639 /* Already correct value? */
640 if (rc == (int)turn_on)
641 return (0);
642
643 sysfs_path = zpool_power_sysfs_path(zhp, vdev);
644 if (sysfs_path == NULL)
645 return (ENOENT);
646
647 if (zpool_power_use_word(sysfs_path)) {
648 val = turn_on ? "on" : "off";
649 } else {
650 val = turn_on ? "1" : "0";
651 }
652
653 rc = zpool_sysfs_puts(sysfs_path, (char *)val);
654
655 free(sysfs_path);
656 if (rc != 0) {
657 return (EIO);
658 }
659
660 /*
661 * Wait up to 30 seconds for sysfs power value to change after
662 * writing it.
663 */
664 timeout_ms = zpool_getenv_int("ZPOOL_POWER_ON_SLOT_TIMEOUT_MS", 30000);
665 for (int i = 0; i < MAX(1, timeout_ms / 200); i++) {
666 rc = zpool_power_current_state(zhp, vdev);
667 if (rc == (int)turn_on)
668 return (0); /* success */
669
670 fsleep(0.200); /* 200ms */
671 }
672
673 /* sysfs value never changed */
674 return (EBADE);
675 }
676