1fa9e4066Sahrens /*
2fa9e4066Sahrens * CDDL HEADER START
3fa9e4066Sahrens *
4fa9e4066Sahrens * The contents of this file are subject to the terms of the
5bef6b7d2Swebaker * Common Development and Distribution License (the "License").
6bef6b7d2Swebaker * You may not use this file except in compliance with the License.
7fa9e4066Sahrens *
8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens * See the License for the specific language governing permissions
11fa9e4066Sahrens * and limitations under the License.
12fa9e4066Sahrens *
13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens *
19fa9e4066Sahrens * CDDL HEADER END
20fa9e4066Sahrens */
21fa9e4066Sahrens /*
22f13665b7Sbo zhou - Sun Microsystems - Beijing China * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
236fe4f300SPavel Zakharov * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24295438baSHans Rosenfeld * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
251b500975SMike Gerdts * Copyright 2020 Joyent, Inc.
2630c304d9SJoshua M. Clulow * Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
2729621f01SHans Rosenfeld * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
28fa9e4066Sahrens */
29fa9e4066Sahrens
30fa9e4066Sahrens #include <sys/zfs_context.h>
31dcba9f3fSGeorge Wilson #include <sys/spa_impl.h>
32e7cbe64fSgw25295 #include <sys/refcount.h>
33fa9e4066Sahrens #include <sys/vdev_impl.h>
34084fd14fSBrian Behlendorf #include <sys/vdev_trim.h>
35770499e1SDan Kimmel #include <sys/abd.h>
36fa9e4066Sahrens #include <sys/fs/zfs.h>
37fa9e4066Sahrens #include <sys/zio.h>
38afefbcddSeschrock #include <sys/sunldi.h>
394263d13fSGeorge Wilson #include <sys/efi_partition.h>
4051ece835Seschrock #include <sys/fm/fs/zfs.h>
41ac04831dSMike Gerdts #include <sys/ddi.h>
42fa9e4066Sahrens
43fa9e4066Sahrens /*
44fc5c75cfSJerry Jelinek * Tunable to disable TRIM in case we're using a problematic SSD.
45fb05b94aSJerry Jelinek */
46fc5c75cfSJerry Jelinek uint_t zfs_no_trim = 0;
47fb05b94aSJerry Jelinek
48fb05b94aSJerry Jelinek /*
49f8fdf681SPrakash Surya * Tunable parameter for debugging or performance analysis. Setting this
50f8fdf681SPrakash Surya * will cause pool corruption on power loss if a volatile out-of-order
51f8fdf681SPrakash Surya * write cache is enabled.
52f8fdf681SPrakash Surya */
53f8fdf681SPrakash Surya boolean_t zfs_nocacheflush = B_FALSE;
54f8fdf681SPrakash Surya
55f8fdf681SPrakash Surya /*
56fa9e4066Sahrens * Virtual device vector for disks.
57fa9e4066Sahrens */
58fa9e4066Sahrens
59fa9e4066Sahrens extern ldi_ident_t zfs_li;
60fa9e4066Sahrens
6139cddb10SJoshua M. Clulow static void vdev_disk_close(vdev_t *);
6239cddb10SJoshua M. Clulow
63ac04831dSMike Gerdts typedef struct vdev_disk {
64ac04831dSMike Gerdts ddi_devid_t vd_devid;
65ac04831dSMike Gerdts char *vd_minor;
66ac04831dSMike Gerdts ldi_handle_t vd_lh;
67ac04831dSMike Gerdts list_t vd_ldi_cbs;
68ac04831dSMike Gerdts boolean_t vd_ldi_offline;
69ac04831dSMike Gerdts } vdev_disk_t;
70ac04831dSMike Gerdts
71ac04831dSMike Gerdts typedef struct vdev_disk_buf {
72ac04831dSMike Gerdts buf_t vdb_buf;
73ac04831dSMike Gerdts zio_t *vdb_io;
74ac04831dSMike Gerdts } vdev_disk_buf_t;
75ac04831dSMike Gerdts
7639cddb10SJoshua M. Clulow typedef struct vdev_disk_ldi_cb {
7739cddb10SJoshua M. Clulow list_node_t lcb_next;
7839cddb10SJoshua M. Clulow ldi_callback_id_t lcb_id;
7939cddb10SJoshua M. Clulow } vdev_disk_ldi_cb_t;
8039cddb10SJoshua M. Clulow
816fe4f300SPavel Zakharov /*
826fe4f300SPavel Zakharov * Bypass the devid when opening a disk vdev.
836fe4f300SPavel Zakharov * There have been issues where the devids of several devices were shuffled,
846fe4f300SPavel Zakharov * causing pool open failures. Note, that this flag is intended to be used
856fe4f300SPavel Zakharov * for pool recovery only.
866fe4f300SPavel Zakharov *
876fe4f300SPavel Zakharov * Note that if a pool is imported with the devids bypassed, all its vdevs will
886fe4f300SPavel Zakharov * cease storing devid information permanently. In practice, the devid is rarely
896fe4f300SPavel Zakharov * useful as vdev paths do not tend to change unless the hardware is
906fe4f300SPavel Zakharov * reconfigured. That said, if the paths do change and a pool fails to open
916fe4f300SPavel Zakharov * automatically at boot, a simple zpool import should re-scan the paths and fix
926fe4f300SPavel Zakharov * the issue.
936fe4f300SPavel Zakharov */
946fe4f300SPavel Zakharov boolean_t vdev_disk_bypass_devid = B_FALSE;
956fe4f300SPavel Zakharov
9639cddb10SJoshua M. Clulow static void
vdev_disk_alloc(vdev_t * vd)9739cddb10SJoshua M. Clulow vdev_disk_alloc(vdev_t *vd)
9839cddb10SJoshua M. Clulow {
9939cddb10SJoshua M. Clulow vdev_disk_t *dvd;
10039cddb10SJoshua M. Clulow
10139cddb10SJoshua M. Clulow dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
10239cddb10SJoshua M. Clulow /*
10339cddb10SJoshua M. Clulow * Create the LDI event callback list.
10439cddb10SJoshua M. Clulow */
10539cddb10SJoshua M. Clulow list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t),
10639cddb10SJoshua M. Clulow offsetof(vdev_disk_ldi_cb_t, lcb_next));
10739cddb10SJoshua M. Clulow }
10839cddb10SJoshua M. Clulow
10939cddb10SJoshua M. Clulow static void
vdev_disk_free(vdev_t * vd)11039cddb10SJoshua M. Clulow vdev_disk_free(vdev_t *vd)
11139cddb10SJoshua M. Clulow {
11239cddb10SJoshua M. Clulow vdev_disk_t *dvd = vd->vdev_tsd;
11339cddb10SJoshua M. Clulow vdev_disk_ldi_cb_t *lcb;
11439cddb10SJoshua M. Clulow
11539cddb10SJoshua M. Clulow if (dvd == NULL)
11639cddb10SJoshua M. Clulow return;
11739cddb10SJoshua M. Clulow
11839cddb10SJoshua M. Clulow /*
11939cddb10SJoshua M. Clulow * We have already closed the LDI handle. Clean up the LDI event
12039cddb10SJoshua M. Clulow * callbacks and free vd->vdev_tsd.
12139cddb10SJoshua M. Clulow */
12239cddb10SJoshua M. Clulow while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) {
12339cddb10SJoshua M. Clulow list_remove(&dvd->vd_ldi_cbs, lcb);
12439cddb10SJoshua M. Clulow (void) ldi_ev_remove_callbacks(lcb->lcb_id);
12539cddb10SJoshua M. Clulow kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t));
12639cddb10SJoshua M. Clulow }
12739cddb10SJoshua M. Clulow list_destroy(&dvd->vd_ldi_cbs);
12839cddb10SJoshua M. Clulow kmem_free(dvd, sizeof (vdev_disk_t));
12939cddb10SJoshua M. Clulow vd->vdev_tsd = NULL;
13039cddb10SJoshua M. Clulow }
13139cddb10SJoshua M. Clulow
13239cddb10SJoshua M. Clulow static int
vdev_disk_off_notify(ldi_handle_t lh __unused,ldi_ev_cookie_t ecookie,void * arg,void * ev_data __unused)1331b500975SMike Gerdts vdev_disk_off_notify(ldi_handle_t lh __unused, ldi_ev_cookie_t ecookie,
1341b500975SMike Gerdts void *arg, void *ev_data __unused)
13539cddb10SJoshua M. Clulow {
13639cddb10SJoshua M. Clulow vdev_t *vd = (vdev_t *)arg;
13739cddb10SJoshua M. Clulow vdev_disk_t *dvd = vd->vdev_tsd;
13839cddb10SJoshua M. Clulow
13939cddb10SJoshua M. Clulow /*
14039cddb10SJoshua M. Clulow * Ignore events other than offline.
14139cddb10SJoshua M. Clulow */
14239cddb10SJoshua M. Clulow if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
14339cddb10SJoshua M. Clulow return (LDI_EV_SUCCESS);
14439cddb10SJoshua M. Clulow
14539cddb10SJoshua M. Clulow /*
1461b500975SMike Gerdts * Tell any new threads that stumble upon this vdev that they should not
1471b500975SMike Gerdts * try to do I/O.
14839cddb10SJoshua M. Clulow */
14939cddb10SJoshua M. Clulow dvd->vd_ldi_offline = B_TRUE;
15039cddb10SJoshua M. Clulow
15139cddb10SJoshua M. Clulow /*
1521b500975SMike Gerdts * Request that the spa_async_thread mark the device as REMOVED and
1531b500975SMike Gerdts * notify FMA of the removal. This should also trigger a vdev_close()
1541b500975SMike Gerdts * in the async thread.
15539cddb10SJoshua M. Clulow */
15639cddb10SJoshua M. Clulow zfs_post_remove(vd->vdev_spa, vd);
15739cddb10SJoshua M. Clulow vd->vdev_remove_wanted = B_TRUE;
15839cddb10SJoshua M. Clulow spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
15939cddb10SJoshua M. Clulow
16039cddb10SJoshua M. Clulow return (LDI_EV_SUCCESS);
16139cddb10SJoshua M. Clulow }
16239cddb10SJoshua M. Clulow
16339cddb10SJoshua M. Clulow static void
vdev_disk_off_finalize(ldi_handle_t lh __unused,ldi_ev_cookie_t ecookie,int ldi_result,void * arg,void * ev_data __unused)1641b500975SMike Gerdts vdev_disk_off_finalize(ldi_handle_t lh __unused, ldi_ev_cookie_t ecookie,
1651b500975SMike Gerdts int ldi_result, void *arg, void *ev_data __unused)
16639cddb10SJoshua M. Clulow {
16739cddb10SJoshua M. Clulow vdev_t *vd = (vdev_t *)arg;
16839cddb10SJoshua M. Clulow
16939cddb10SJoshua M. Clulow /*
17039cddb10SJoshua M. Clulow * Ignore events other than offline.
17139cddb10SJoshua M. Clulow */
17239cddb10SJoshua M. Clulow if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
17339cddb10SJoshua M. Clulow return;
17439cddb10SJoshua M. Clulow
17539cddb10SJoshua M. Clulow /*
17639cddb10SJoshua M. Clulow * Request that the vdev be reopened if the offline state change was
17739cddb10SJoshua M. Clulow * unsuccessful.
17839cddb10SJoshua M. Clulow */
17939cddb10SJoshua M. Clulow if (ldi_result != LDI_EV_SUCCESS) {
18039cddb10SJoshua M. Clulow vd->vdev_probe_wanted = B_TRUE;
18139cddb10SJoshua M. Clulow spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE);
18239cddb10SJoshua M. Clulow }
18339cddb10SJoshua M. Clulow }
18439cddb10SJoshua M. Clulow
18539cddb10SJoshua M. Clulow static ldi_ev_callback_t vdev_disk_off_callb = {
18639cddb10SJoshua M. Clulow .cb_vers = LDI_EV_CB_VERS,
18739cddb10SJoshua M. Clulow .cb_notify = vdev_disk_off_notify,
18839cddb10SJoshua M. Clulow .cb_finalize = vdev_disk_off_finalize
18939cddb10SJoshua M. Clulow };
19039cddb10SJoshua M. Clulow
19139cddb10SJoshua M. Clulow static void
vdev_disk_dgrd_finalize(ldi_handle_t lh __unused,ldi_ev_cookie_t ecookie,int ldi_result,void * arg,void * ev_data __unused)1921b500975SMike Gerdts vdev_disk_dgrd_finalize(ldi_handle_t lh __unused, ldi_ev_cookie_t ecookie,
1931b500975SMike Gerdts int ldi_result, void *arg, void *ev_data __unused)
19439cddb10SJoshua M. Clulow {
19539cddb10SJoshua M. Clulow vdev_t *vd = (vdev_t *)arg;
19639cddb10SJoshua M. Clulow
19739cddb10SJoshua M. Clulow /*
19839cddb10SJoshua M. Clulow * Ignore events other than degrade.
19939cddb10SJoshua M. Clulow */
20039cddb10SJoshua M. Clulow if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_DEGRADE) != 0)
20139cddb10SJoshua M. Clulow return;
20239cddb10SJoshua M. Clulow
20339cddb10SJoshua M. Clulow /*
20439cddb10SJoshua M. Clulow * Degrade events always succeed. Mark the vdev as degraded.
20539cddb10SJoshua M. Clulow * This status is purely informative for the user.
20639cddb10SJoshua M. Clulow */
20739cddb10SJoshua M. Clulow (void) vdev_degrade(vd->vdev_spa, vd->vdev_guid, 0);
20839cddb10SJoshua M. Clulow }
20939cddb10SJoshua M. Clulow
21039cddb10SJoshua M. Clulow static ldi_ev_callback_t vdev_disk_dgrd_callb = {
21139cddb10SJoshua M. Clulow .cb_vers = LDI_EV_CB_VERS,
21239cddb10SJoshua M. Clulow .cb_notify = NULL,
21339cddb10SJoshua M. Clulow .cb_finalize = vdev_disk_dgrd_finalize
21439cddb10SJoshua M. Clulow };
21539cddb10SJoshua M. Clulow
216dcba9f3fSGeorge Wilson static void
vdev_disk_hold(vdev_t * vd)217dcba9f3fSGeorge Wilson vdev_disk_hold(vdev_t *vd)
218dcba9f3fSGeorge Wilson {
219dcba9f3fSGeorge Wilson ddi_devid_t devid;
220dcba9f3fSGeorge Wilson char *minor;
221dcba9f3fSGeorge Wilson
222dcba9f3fSGeorge Wilson ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
223dcba9f3fSGeorge Wilson
224dcba9f3fSGeorge Wilson /*
225dcba9f3fSGeorge Wilson * We must have a pathname, and it must be absolute.
226dcba9f3fSGeorge Wilson */
227dcba9f3fSGeorge Wilson if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
228dcba9f3fSGeorge Wilson return;
229dcba9f3fSGeorge Wilson
230dcba9f3fSGeorge Wilson /*
231dcba9f3fSGeorge Wilson * Only prefetch path and devid info if the device has
232dcba9f3fSGeorge Wilson * never been opened.
233dcba9f3fSGeorge Wilson */
234dcba9f3fSGeorge Wilson if (vd->vdev_tsd != NULL)
235dcba9f3fSGeorge Wilson return;
236dcba9f3fSGeorge Wilson
237dcba9f3fSGeorge Wilson if (vd->vdev_wholedisk == -1ULL) {
238dcba9f3fSGeorge Wilson size_t len = strlen(vd->vdev_path) + 3;
239dcba9f3fSGeorge Wilson char *buf = kmem_alloc(len, KM_SLEEP);
240dcba9f3fSGeorge Wilson
241dcba9f3fSGeorge Wilson (void) snprintf(buf, len, "%ss0", vd->vdev_path);
242dcba9f3fSGeorge Wilson
243dcba9f3fSGeorge Wilson (void) ldi_vp_from_name(buf, &vd->vdev_name_vp);
244dcba9f3fSGeorge Wilson kmem_free(buf, len);
245dcba9f3fSGeorge Wilson }
246dcba9f3fSGeorge Wilson
247dcba9f3fSGeorge Wilson if (vd->vdev_name_vp == NULL)
248dcba9f3fSGeorge Wilson (void) ldi_vp_from_name(vd->vdev_path, &vd->vdev_name_vp);
249dcba9f3fSGeorge Wilson
250dcba9f3fSGeorge Wilson if (vd->vdev_devid != NULL &&
251dcba9f3fSGeorge Wilson ddi_devid_str_decode(vd->vdev_devid, &devid, &minor) == 0) {
252dcba9f3fSGeorge Wilson (void) ldi_vp_from_devid(devid, minor, &vd->vdev_devid_vp);
253dcba9f3fSGeorge Wilson ddi_devid_str_free(minor);
254dcba9f3fSGeorge Wilson ddi_devid_free(devid);
255dcba9f3fSGeorge Wilson }
256dcba9f3fSGeorge Wilson }
257dcba9f3fSGeorge Wilson
258dcba9f3fSGeorge Wilson static void
vdev_disk_rele(vdev_t * vd)259dcba9f3fSGeorge Wilson vdev_disk_rele(vdev_t *vd)
260dcba9f3fSGeorge Wilson {
261dcba9f3fSGeorge Wilson ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
262dcba9f3fSGeorge Wilson
263dcba9f3fSGeorge Wilson if (vd->vdev_name_vp) {
264dcba9f3fSGeorge Wilson VN_RELE_ASYNC(vd->vdev_name_vp,
265dcba9f3fSGeorge Wilson dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
266dcba9f3fSGeorge Wilson vd->vdev_name_vp = NULL;
267dcba9f3fSGeorge Wilson }
268dcba9f3fSGeorge Wilson if (vd->vdev_devid_vp) {
269dcba9f3fSGeorge Wilson VN_RELE_ASYNC(vd->vdev_devid_vp,
270dcba9f3fSGeorge Wilson dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
271dcba9f3fSGeorge Wilson vd->vdev_devid_vp = NULL;
272dcba9f3fSGeorge Wilson }
273dcba9f3fSGeorge Wilson }
274dcba9f3fSGeorge Wilson
275a5b57771SDan McDonald /*
276a5b57771SDan McDonald * We want to be loud in DEBUG kernels when DKIOCGMEDIAINFOEXT fails, or when
277a5b57771SDan McDonald * even a fallback to DKIOCGMEDIAINFO fails.
278a5b57771SDan McDonald */
279a5b57771SDan McDonald #ifdef DEBUG
280a5b57771SDan McDonald #define VDEV_DEBUG(...) cmn_err(CE_NOTE, __VA_ARGS__)
281a5b57771SDan McDonald #else
282a5b57771SDan McDonald #define VDEV_DEBUG(...) /* Nothing... */
283a5b57771SDan McDonald #endif
284a5b57771SDan McDonald
285fa9e4066Sahrens static int
vdev_disk_open(vdev_t * vd,uint64_t * psize,uint64_t * max_psize,uint64_t * ashift)2864263d13fSGeorge Wilson vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
2874263d13fSGeorge Wilson uint64_t *ashift)
288fa9e4066Sahrens {
2898ad4d6ddSJeff Bonwick spa_t *spa = vd->vdev_spa;
29039cddb10SJoshua M. Clulow vdev_disk_t *dvd = vd->vdev_tsd;
29139cddb10SJoshua M. Clulow ldi_ev_cookie_t ecookie;
29239cddb10SJoshua M. Clulow vdev_disk_ldi_cb_t *lcb;
293a5b57771SDan McDonald union {
294a5b57771SDan McDonald struct dk_minfo_ext ude;
295a5b57771SDan McDonald struct dk_minfo ud;
296a5b57771SDan McDonald } dks;
297a5b57771SDan McDonald struct dk_minfo_ext *dkmext = &dks.ude;
298a5b57771SDan McDonald struct dk_minfo *dkm = &dks.ud;
299084fd14fSBrian Behlendorf int error, can_free;
300e14bb325SJeff Bonwick dev_t dev;
301e14bb325SJeff Bonwick int otyp;
302fb02ae02SGeorge Wilson boolean_t validate_devid = B_FALSE;
303a5b57771SDan McDonald uint64_t capacity = 0, blksz = 0, pbsize;
304*8b26092dSJoshua M. Clulow const char *rdpath = vdev_disk_preroot_force_path();
305fa9e4066Sahrens
306fa9e4066Sahrens /*
307fa9e4066Sahrens * We must have a pathname, and it must be absolute.
308fa9e4066Sahrens */
309fa9e4066Sahrens if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
310fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
311be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL));
312fa9e4066Sahrens }
313fa9e4066Sahrens
314095bcd66SGeorge Wilson /*
315095bcd66SGeorge Wilson * Reopen the device if it's not currently open. Otherwise,
316095bcd66SGeorge Wilson * just update the physical size of the device.
317095bcd66SGeorge Wilson */
31839cddb10SJoshua M. Clulow if (dvd != NULL) {
319095bcd66SGeorge Wilson ASSERT(vd->vdev_reopening);
320095bcd66SGeorge Wilson goto skip_open;
321095bcd66SGeorge Wilson }
322095bcd66SGeorge Wilson
32339cddb10SJoshua M. Clulow /*
32439cddb10SJoshua M. Clulow * Create vd->vdev_tsd.
32539cddb10SJoshua M. Clulow */
32639cddb10SJoshua M. Clulow vdev_disk_alloc(vd);
32739cddb10SJoshua M. Clulow dvd = vd->vdev_tsd;
328fa9e4066Sahrens
329fa9e4066Sahrens /*
3306fe4f300SPavel Zakharov * Allow bypassing the devid.
3316fe4f300SPavel Zakharov */
332*8b26092dSJoshua M. Clulow if (vd->vdev_devid != NULL &&
333*8b26092dSJoshua M. Clulow (vdev_disk_bypass_devid || rdpath != NULL)) {
3346fe4f300SPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open, devid %s bypassed",
3356fe4f300SPavel Zakharov vd->vdev_devid);
3366fe4f300SPavel Zakharov spa_strfree(vd->vdev_devid);
3376fe4f300SPavel Zakharov vd->vdev_devid = NULL;
3386fe4f300SPavel Zakharov }
3396fe4f300SPavel Zakharov
3406fe4f300SPavel Zakharov /*
341fa9e4066Sahrens * When opening a disk device, we want to preserve the user's original
342fa9e4066Sahrens * intent. We always want to open the device by the path the user gave
3431724dc7bSJoshua M. Clulow * us, even if it is one of multiple paths to the same device. But we
344fa9e4066Sahrens * also want to be able to survive disks being removed/recabled.
345fa9e4066Sahrens * Therefore the sequence of opening devices is:
346fa9e4066Sahrens *
347afefbcddSeschrock * 1. Try opening the device by path. For legacy pools without the
348afefbcddSeschrock * 'whole_disk' property, attempt to fix the path by appending 's0'.
349fa9e4066Sahrens *
350fa9e4066Sahrens * 2. If the devid of the device matches the stored value, return
351fa9e4066Sahrens * success.
352fa9e4066Sahrens *
353fa9e4066Sahrens * 3. Otherwise, the device may have moved. Try opening the device
354fa9e4066Sahrens * by the devid instead.
355fa9e4066Sahrens */
356fa9e4066Sahrens if (vd->vdev_devid != NULL) {
357fa9e4066Sahrens if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
358fa9e4066Sahrens &dvd->vd_minor) != 0) {
35929621f01SHans Rosenfeld vdev_dbgmsg(vd,
36029621f01SHans Rosenfeld "vdev_disk_open, invalid devid %s bypassed",
36129621f01SHans Rosenfeld vd->vdev_devid);
36229621f01SHans Rosenfeld spa_strfree(vd->vdev_devid);
36329621f01SHans Rosenfeld vd->vdev_devid = NULL;
364fa9e4066Sahrens }
365fa9e4066Sahrens }
366fa9e4066Sahrens
367fa9e4066Sahrens error = EINVAL; /* presume failure */
368fa9e4066Sahrens
369*8b26092dSJoshua M. Clulow if (rdpath != NULL) {
370*8b26092dSJoshua M. Clulow /*
371*8b26092dSJoshua M. Clulow * We have been asked to open only a specific root device, and
372*8b26092dSJoshua M. Clulow * to fail otherwise.
373*8b26092dSJoshua M. Clulow */
374*8b26092dSJoshua M. Clulow error = ldi_open_by_name((char *)rdpath, spa_mode(spa), kcred,
375*8b26092dSJoshua M. Clulow &dvd->vd_lh, zfs_li);
376*8b26092dSJoshua M. Clulow validate_devid = B_TRUE;
377*8b26092dSJoshua M. Clulow goto rootdisk_only;
378*8b26092dSJoshua M. Clulow }
379*8b26092dSJoshua M. Clulow
380095bcd66SGeorge Wilson if (vd->vdev_path != NULL) {
381afefbcddSeschrock if (vd->vdev_wholedisk == -1ULL) {
382fa9e4066Sahrens size_t len = strlen(vd->vdev_path) + 3;
383fa9e4066Sahrens char *buf = kmem_alloc(len, KM_SLEEP);
384fa9e4066Sahrens
385fa9e4066Sahrens (void) snprintf(buf, len, "%ss0", vd->vdev_path);
386fa9e4066Sahrens
38739cddb10SJoshua M. Clulow error = ldi_open_by_name(buf, spa_mode(spa), kcred,
38839cddb10SJoshua M. Clulow &dvd->vd_lh, zfs_li);
38939cddb10SJoshua M. Clulow if (error == 0) {
390afefbcddSeschrock spa_strfree(vd->vdev_path);
391afefbcddSeschrock vd->vdev_path = buf;
392afefbcddSeschrock vd->vdev_wholedisk = 1ULL;
393afefbcddSeschrock } else {
394fa9e4066Sahrens kmem_free(buf, len);
395afefbcddSeschrock }
396afefbcddSeschrock }
397afefbcddSeschrock
39839cddb10SJoshua M. Clulow /*
39939cddb10SJoshua M. Clulow * If we have not yet opened the device, try to open it by the
40039cddb10SJoshua M. Clulow * specified path.
40139cddb10SJoshua M. Clulow */
40239cddb10SJoshua M. Clulow if (error != 0) {
40339cddb10SJoshua M. Clulow error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
40439cddb10SJoshua M. Clulow kcred, &dvd->vd_lh, zfs_li);
40539cddb10SJoshua M. Clulow }
406fa9e4066Sahrens
407fa9e4066Sahrens /*
408fa9e4066Sahrens * Compare the devid to the stored value.
409fa9e4066Sahrens */
4106af23589SJoshua M. Clulow if (error == 0 && vd->vdev_devid != NULL) {
4116af23589SJoshua M. Clulow ddi_devid_t devid = NULL;
4126af23589SJoshua M. Clulow
4136af23589SJoshua M. Clulow if (ldi_get_devid(dvd->vd_lh, &devid) != 0) {
4146af23589SJoshua M. Clulow /*
4156af23589SJoshua M. Clulow * We expected a devid on this device but it no
4166af23589SJoshua M. Clulow * longer appears to have one. The validation
4176af23589SJoshua M. Clulow * step may need to remove it from the
4186af23589SJoshua M. Clulow * configuration.
4196af23589SJoshua M. Clulow */
4206af23589SJoshua M. Clulow validate_devid = B_TRUE;
4216af23589SJoshua M. Clulow
4226af23589SJoshua M. Clulow } else if (ddi_devid_compare(devid, dvd->vd_devid) !=
4236af23589SJoshua M. Clulow 0) {
4246fe4f300SPavel Zakharov /*
4256fe4f300SPavel Zakharov * A mismatch here is unexpected, log it.
4266fe4f300SPavel Zakharov */
4276fe4f300SPavel Zakharov char *devid_str = ddi_devid_str_encode(devid,
4286fe4f300SPavel Zakharov dvd->vd_minor);
4296fe4f300SPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open: devid "
4306fe4f300SPavel Zakharov "mismatch: %s != %s", vd->vdev_devid,
4316fe4f300SPavel Zakharov devid_str);
4326fe4f300SPavel Zakharov cmn_err(CE_NOTE, "vdev_disk_open %s: devid "
4336fe4f300SPavel Zakharov "mismatch: %s != %s", vd->vdev_path,
4346fe4f300SPavel Zakharov vd->vdev_devid, devid_str);
4356fe4f300SPavel Zakharov ddi_devid_str_free(devid_str);
4366fe4f300SPavel Zakharov
437be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL);
4388ad4d6ddSJeff Bonwick (void) ldi_close(dvd->vd_lh, spa_mode(spa),
4398ad4d6ddSJeff Bonwick kcred);
440fa9e4066Sahrens dvd->vd_lh = NULL;
441fa9e4066Sahrens }
4426af23589SJoshua M. Clulow
4436af23589SJoshua M. Clulow if (devid != NULL) {
444fa9e4066Sahrens ddi_devid_free(devid);
445fa9e4066Sahrens }
4466af23589SJoshua M. Clulow }
447afefbcddSeschrock
448afefbcddSeschrock /*
449afefbcddSeschrock * If we succeeded in opening the device, but 'vdev_wholedisk'
450afefbcddSeschrock * is not yet set, then this must be a slice.
451afefbcddSeschrock */
452afefbcddSeschrock if (error == 0 && vd->vdev_wholedisk == -1ULL)
453afefbcddSeschrock vd->vdev_wholedisk = 0;
454fa9e4066Sahrens }
455fa9e4066Sahrens
456fa9e4066Sahrens /*
457fa9e4066Sahrens * If we were unable to open by path, or the devid check fails, open by
458fa9e4066Sahrens * devid instead.
459fa9e4066Sahrens */
460fb02ae02SGeorge Wilson if (error != 0 && vd->vdev_devid != NULL) {
461fa9e4066Sahrens error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
4628ad4d6ddSJeff Bonwick spa_mode(spa), kcred, &dvd->vd_lh, zfs_li);
4636fe4f300SPavel Zakharov if (error != 0) {
4646fe4f300SPavel Zakharov vdev_dbgmsg(vd, "Failed to open by devid (%s)",
4656fe4f300SPavel Zakharov vd->vdev_devid);
4666fe4f300SPavel Zakharov }
467fb02ae02SGeorge Wilson }
468fa9e4066Sahrens
4693d7072f8Seschrock /*
4703d7072f8Seschrock * If all else fails, then try opening by physical path (if available)
4713d7072f8Seschrock * or the logical path (if we failed due to the devid check). While not
4723d7072f8Seschrock * as reliable as the devid, this will give us something, and the higher
4733d7072f8Seschrock * level vdev validation will prevent us from opening the wrong device.
4743d7072f8Seschrock */
4756af23589SJoshua M. Clulow if (error != 0) {
476fb02ae02SGeorge Wilson validate_devid = B_TRUE;
477fb02ae02SGeorge Wilson
4783d7072f8Seschrock if (vd->vdev_physpath != NULL &&
4796af23589SJoshua M. Clulow (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) {
4808ad4d6ddSJeff Bonwick error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa),
4813d7072f8Seschrock kcred, &dvd->vd_lh, zfs_li);
4826af23589SJoshua M. Clulow }
4833d7072f8Seschrock
4843d7072f8Seschrock /*
4853d7072f8Seschrock * Note that we don't support the legacy auto-wholedisk support
4863d7072f8Seschrock * as above. This hasn't been used in a very long time and we
4873d7072f8Seschrock * don't need to propagate its oddities to this edge condition.
4883d7072f8Seschrock */
4896af23589SJoshua M. Clulow if (error != 0 && vd->vdev_path != NULL) {
4908ad4d6ddSJeff Bonwick error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
4918ad4d6ddSJeff Bonwick kcred, &dvd->vd_lh, zfs_li);
4923d7072f8Seschrock }
4936af23589SJoshua M. Clulow }
4943d7072f8Seschrock
49530c304d9SJoshua M. Clulow /*
49630c304d9SJoshua M. Clulow * If this is early in boot, a sweep of available block devices may
49730c304d9SJoshua M. Clulow * locate an alternative path that we can try.
49830c304d9SJoshua M. Clulow */
49930c304d9SJoshua M. Clulow if (error != 0) {
50030c304d9SJoshua M. Clulow const char *altdevpath = vdev_disk_preroot_lookup(
50130c304d9SJoshua M. Clulow spa_guid(spa), vd->vdev_guid);
50230c304d9SJoshua M. Clulow
50330c304d9SJoshua M. Clulow if (altdevpath != NULL) {
50430c304d9SJoshua M. Clulow vdev_dbgmsg(vd, "Trying alternate preroot path (%s)",
50530c304d9SJoshua M. Clulow altdevpath);
50630c304d9SJoshua M. Clulow
50730c304d9SJoshua M. Clulow validate_devid = B_TRUE;
50830c304d9SJoshua M. Clulow
50930c304d9SJoshua M. Clulow if ((error = ldi_open_by_name((char *)altdevpath,
51030c304d9SJoshua M. Clulow spa_mode(spa), kcred, &dvd->vd_lh, zfs_li)) != 0) {
51130c304d9SJoshua M. Clulow vdev_dbgmsg(vd, "Failed to open by preroot "
51230c304d9SJoshua M. Clulow "path (%s)", altdevpath);
51330c304d9SJoshua M. Clulow }
51430c304d9SJoshua M. Clulow }
51530c304d9SJoshua M. Clulow }
51630c304d9SJoshua M. Clulow
517*8b26092dSJoshua M. Clulow rootdisk_only:
5186af23589SJoshua M. Clulow if (error != 0) {
519fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
5203ee8c80cSPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open: failed to open [error=%d]",
5213ee8c80cSPavel Zakharov error);
522fa9e4066Sahrens return (error);
523fa9e4066Sahrens }
524fa9e4066Sahrens
525fa9e4066Sahrens /*
526fb02ae02SGeorge Wilson * Now that the device has been successfully opened, update the devid
527fb02ae02SGeorge Wilson * if necessary.
528fb02ae02SGeorge Wilson */
5296af23589SJoshua M. Clulow if (validate_devid) {
5306af23589SJoshua M. Clulow ddi_devid_t devid = NULL;
5316af23589SJoshua M. Clulow char *minorname = NULL;
5326af23589SJoshua M. Clulow char *vd_devid = NULL;
5336af23589SJoshua M. Clulow boolean_t remove = B_FALSE, update = B_FALSE;
534fb02ae02SGeorge Wilson
5356af23589SJoshua M. Clulow /*
5366af23589SJoshua M. Clulow * Get the current devid and minor name for the device we
5376af23589SJoshua M. Clulow * opened.
5386af23589SJoshua M. Clulow */
5396af23589SJoshua M. Clulow if (ldi_get_devid(dvd->vd_lh, &devid) != 0 ||
5406af23589SJoshua M. Clulow ldi_get_minor_name(dvd->vd_lh, &minorname) != 0) {
5416af23589SJoshua M. Clulow /*
5426af23589SJoshua M. Clulow * If we are unable to get the devid or the minor name
5436af23589SJoshua M. Clulow * for the device, we need to remove them from the
5446af23589SJoshua M. Clulow * configuration to prevent potential inconsistencies.
5456af23589SJoshua M. Clulow */
5466af23589SJoshua M. Clulow if (dvd->vd_minor != NULL || dvd->vd_devid != NULL ||
5476af23589SJoshua M. Clulow vd->vdev_devid != NULL) {
5486af23589SJoshua M. Clulow /*
5496af23589SJoshua M. Clulow * We only need to remove the devid if one
5506af23589SJoshua M. Clulow * exists.
5516af23589SJoshua M. Clulow */
5526af23589SJoshua M. Clulow remove = B_TRUE;
553fb02ae02SGeorge Wilson }
5546af23589SJoshua M. Clulow
5556af23589SJoshua M. Clulow } else if (dvd->vd_devid == NULL || dvd->vd_minor == NULL) {
5566af23589SJoshua M. Clulow /*
5576af23589SJoshua M. Clulow * There was previously no devid at all so we need to
5586af23589SJoshua M. Clulow * add one.
5596af23589SJoshua M. Clulow */
5606af23589SJoshua M. Clulow update = B_TRUE;
5616af23589SJoshua M. Clulow
5626af23589SJoshua M. Clulow } else if (ddi_devid_compare(devid, dvd->vd_devid) != 0 ||
5636af23589SJoshua M. Clulow strcmp(minorname, dvd->vd_minor) != 0) {
5646af23589SJoshua M. Clulow /*
5656af23589SJoshua M. Clulow * The devid or minor name on file does not match the
5666af23589SJoshua M. Clulow * one from the opened device.
5676af23589SJoshua M. Clulow */
5686af23589SJoshua M. Clulow update = B_TRUE;
5696af23589SJoshua M. Clulow }
5706af23589SJoshua M. Clulow
5716af23589SJoshua M. Clulow if (update) {
5726af23589SJoshua M. Clulow /*
5736af23589SJoshua M. Clulow * Render the new devid and minor name as a string for
5746af23589SJoshua M. Clulow * logging and to store in the vdev configuration.
5756af23589SJoshua M. Clulow */
5766af23589SJoshua M. Clulow vd_devid = ddi_devid_str_encode(devid, minorname);
5776af23589SJoshua M. Clulow }
5786af23589SJoshua M. Clulow
5796af23589SJoshua M. Clulow if (update || remove) {
5806af23589SJoshua M. Clulow vdev_dbgmsg(vd, "vdev_disk_open: update devid from "
5816af23589SJoshua M. Clulow "'%s' to '%s'",
5826af23589SJoshua M. Clulow vd->vdev_devid != NULL ? vd->vdev_devid : "<none>",
5836af23589SJoshua M. Clulow vd_devid != NULL ? vd_devid : "<none>");
5846af23589SJoshua M. Clulow cmn_err(CE_NOTE, "vdev_disk_open %s: update devid "
5856af23589SJoshua M. Clulow "from '%s' to '%s'",
5866af23589SJoshua M. Clulow vd->vdev_path != NULL ? vd->vdev_path : "?",
5876af23589SJoshua M. Clulow vd->vdev_devid != NULL ? vd->vdev_devid : "<none>",
5886af23589SJoshua M. Clulow vd_devid != NULL ? vd_devid : "<none>");
5896af23589SJoshua M. Clulow
5906af23589SJoshua M. Clulow /*
5916af23589SJoshua M. Clulow * Remove and free any existing values.
5926af23589SJoshua M. Clulow */
5936af23589SJoshua M. Clulow if (dvd->vd_minor != NULL) {
5946af23589SJoshua M. Clulow ddi_devid_str_free(dvd->vd_minor);
5956af23589SJoshua M. Clulow dvd->vd_minor = NULL;
5966af23589SJoshua M. Clulow }
5976af23589SJoshua M. Clulow if (dvd->vd_devid != NULL) {
5986af23589SJoshua M. Clulow ddi_devid_free(dvd->vd_devid);
5996af23589SJoshua M. Clulow dvd->vd_devid = NULL;
6006af23589SJoshua M. Clulow }
6016af23589SJoshua M. Clulow if (vd->vdev_devid != NULL) {
6026af23589SJoshua M. Clulow spa_strfree(vd->vdev_devid);
6036af23589SJoshua M. Clulow vd->vdev_devid = NULL;
6046af23589SJoshua M. Clulow }
6056af23589SJoshua M. Clulow }
6066af23589SJoshua M. Clulow
6076af23589SJoshua M. Clulow if (update) {
6086af23589SJoshua M. Clulow /*
6096af23589SJoshua M. Clulow * Install the new values.
6106af23589SJoshua M. Clulow */
6116af23589SJoshua M. Clulow vd->vdev_devid = vd_devid;
6126af23589SJoshua M. Clulow dvd->vd_minor = minorname;
6136af23589SJoshua M. Clulow dvd->vd_devid = devid;
6146af23589SJoshua M. Clulow
6156af23589SJoshua M. Clulow } else {
6166af23589SJoshua M. Clulow if (devid != NULL) {
617fb02ae02SGeorge Wilson ddi_devid_free(devid);
618fb02ae02SGeorge Wilson }
6196af23589SJoshua M. Clulow if (minorname != NULL) {
6206af23589SJoshua M. Clulow kmem_free(minorname, strlen(minorname) + 1);
6216af23589SJoshua M. Clulow }
6226af23589SJoshua M. Clulow }
6236af23589SJoshua M. Clulow }
624fb02ae02SGeorge Wilson
625fb02ae02SGeorge Wilson /*
6263d7072f8Seschrock * Once a device is opened, verify that the physical device path (if
6273d7072f8Seschrock * available) is up to date.
6283d7072f8Seschrock */
6293d7072f8Seschrock if (ldi_get_dev(dvd->vd_lh, &dev) == 0 &&
6303d7072f8Seschrock ldi_get_otyp(dvd->vd_lh, &otyp) == 0) {
6310a4e9518Sgw25295 char *physpath, *minorname;
6320a4e9518Sgw25295
6333d7072f8Seschrock physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6343d7072f8Seschrock minorname = NULL;
6353d7072f8Seschrock if (ddi_dev_pathname(dev, otyp, physpath) == 0 &&
6363d7072f8Seschrock ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 &&
6373d7072f8Seschrock (vd->vdev_physpath == NULL ||
6383d7072f8Seschrock strcmp(vd->vdev_physpath, physpath) != 0)) {
6393d7072f8Seschrock if (vd->vdev_physpath)
6403d7072f8Seschrock spa_strfree(vd->vdev_physpath);
6413d7072f8Seschrock (void) strlcat(physpath, ":", MAXPATHLEN);
6423d7072f8Seschrock (void) strlcat(physpath, minorname, MAXPATHLEN);
6433d7072f8Seschrock vd->vdev_physpath = spa_strdup(physpath);
6443d7072f8Seschrock }
6453d7072f8Seschrock if (minorname)
6463d7072f8Seschrock kmem_free(minorname, strlen(minorname) + 1);
6473d7072f8Seschrock kmem_free(physpath, MAXPATHLEN);
6483d7072f8Seschrock }
6493d7072f8Seschrock
65039cddb10SJoshua M. Clulow /*
65139cddb10SJoshua M. Clulow * Register callbacks for the LDI offline event.
65239cddb10SJoshua M. Clulow */
65339cddb10SJoshua M. Clulow if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) ==
65439cddb10SJoshua M. Clulow LDI_EV_SUCCESS) {
65539cddb10SJoshua M. Clulow lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
65639cddb10SJoshua M. Clulow list_insert_tail(&dvd->vd_ldi_cbs, lcb);
65739cddb10SJoshua M. Clulow (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
65839cddb10SJoshua M. Clulow &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id);
65939cddb10SJoshua M. Clulow }
66039cddb10SJoshua M. Clulow
66139cddb10SJoshua M. Clulow /*
66239cddb10SJoshua M. Clulow * Register callbacks for the LDI degrade event.
66339cddb10SJoshua M. Clulow */
66439cddb10SJoshua M. Clulow if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) ==
66539cddb10SJoshua M. Clulow LDI_EV_SUCCESS) {
66639cddb10SJoshua M. Clulow lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
66739cddb10SJoshua M. Clulow list_insert_tail(&dvd->vd_ldi_cbs, lcb);
66839cddb10SJoshua M. Clulow (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
66939cddb10SJoshua M. Clulow &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id);
67039cddb10SJoshua M. Clulow }
671084fd14fSBrian Behlendorf
672095bcd66SGeorge Wilson skip_open:
6733d7072f8Seschrock /*
674fa9e4066Sahrens * Determine the actual size of the device.
675fa9e4066Sahrens */
676fa9e4066Sahrens if (ldi_get_size(dvd->vd_lh, psize) != 0) {
677fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
6783ee8c80cSPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open: failed to get size");
679be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL));
680fa9e4066Sahrens }
681fa9e4066Sahrens
682a5b57771SDan McDonald *max_psize = *psize;
683a5b57771SDan McDonald
684ecc2d604Sbonwick /*
685ecc2d604Sbonwick * Determine the device's minimum transfer size.
686ecc2d604Sbonwick * If the ioctl isn't supported, assume DEV_BSIZE.
687bef6b7d2Swebaker */
688a5b57771SDan McDonald if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT,
689a5b57771SDan McDonald (intptr_t)dkmext, FKIOCTL, kcred, NULL)) == 0) {
690a5b57771SDan McDonald capacity = dkmext->dki_capacity - 1;
691a5b57771SDan McDonald blksz = dkmext->dki_lbsize;
692a5b57771SDan McDonald pbsize = dkmext->dki_pbsize;
693a5b57771SDan McDonald } else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO,
694a5b57771SDan McDonald (intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) {
695a5b57771SDan McDonald VDEV_DEBUG(
696a5b57771SDan McDonald "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n",
697a5b57771SDan McDonald vd->vdev_path);
698a5b57771SDan McDonald capacity = dkm->dki_capacity - 1;
699a5b57771SDan McDonald blksz = dkm->dki_lbsize;
700a5b57771SDan McDonald pbsize = blksz;
701a5b57771SDan McDonald } else {
702a5b57771SDan McDonald VDEV_DEBUG("vdev_disk_open(\"%s\"): "
703a5b57771SDan McDonald "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n",
704a5b57771SDan McDonald vd->vdev_path, error);
705a5b57771SDan McDonald pbsize = DEV_BSIZE;
706a5b57771SDan McDonald }
707bef6b7d2Swebaker
708bf16b11eSMatthew Ahrens *ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1;
709bef6b7d2Swebaker
7104263d13fSGeorge Wilson if (vd->vdev_wholedisk == 1) {
7114263d13fSGeorge Wilson int wce = 1;
7124263d13fSGeorge Wilson
713a5b57771SDan McDonald if (error == 0) {
7144263d13fSGeorge Wilson /*
715a5b57771SDan McDonald * If we have the capability to expand, we'd have
716a5b57771SDan McDonald * found out via success from DKIOCGMEDIAINFO{,EXT}.
717a5b57771SDan McDonald * Adjust max_psize upward accordingly since we know
718a5b57771SDan McDonald * we own the whole disk now.
719a5b57771SDan McDonald */
720c39a2aaeSGeorge Wilson *max_psize = capacity * blksz;
721a5b57771SDan McDonald }
722a5b57771SDan McDonald
723a5b57771SDan McDonald /*
724a5b57771SDan McDonald * Since we own the whole disk, try to enable disk write
725a5b57771SDan McDonald * caching. We ignore errors because it's OK if we can't do it.
7264263d13fSGeorge Wilson */
7274263d13fSGeorge Wilson (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
7284263d13fSGeorge Wilson FKIOCTL, kcred, NULL);
7294263d13fSGeorge Wilson }
7304263d13fSGeorge Wilson
731b468a217Seschrock /*
732b468a217Seschrock * Clear the nowritecache bit, so that on a vdev_reopen() we will
733b468a217Seschrock * try again.
734b468a217Seschrock */
735b468a217Seschrock vd->vdev_nowritecache = B_FALSE;
736b468a217Seschrock
737084fd14fSBrian Behlendorf if (ldi_ioctl(dvd->vd_lh, DKIOC_CANFREE, (intptr_t)&can_free, FKIOCTL,
738084fd14fSBrian Behlendorf kcred, NULL) == 0 && can_free == 1) {
739084fd14fSBrian Behlendorf vd->vdev_has_trim = B_TRUE;
740084fd14fSBrian Behlendorf } else {
741084fd14fSBrian Behlendorf vd->vdev_has_trim = B_FALSE;
742084fd14fSBrian Behlendorf }
743084fd14fSBrian Behlendorf
744fb05b94aSJerry Jelinek if (zfs_no_trim == 1)
745fb05b94aSJerry Jelinek vd->vdev_has_trim = B_FALSE;
746fb05b94aSJerry Jelinek
747084fd14fSBrian Behlendorf /* Currently only supported for ZoL. */
748084fd14fSBrian Behlendorf vd->vdev_has_securetrim = B_FALSE;
749084fd14fSBrian Behlendorf
75012a8814cSTom Caputi /* Inform the ZIO pipeline that we are non-rotational */
75112a8814cSTom Caputi vd->vdev_nonrot = B_FALSE;
75212a8814cSTom Caputi if (ldi_prop_exists(dvd->vd_lh, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
75312a8814cSTom Caputi "device-solid-state")) {
75412a8814cSTom Caputi if (ldi_prop_get_int(dvd->vd_lh,
75512a8814cSTom Caputi LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
75612a8814cSTom Caputi "device-solid-state", B_FALSE) != 0)
75712a8814cSTom Caputi vd->vdev_nonrot = B_TRUE;
75812a8814cSTom Caputi }
75912a8814cSTom Caputi
760fa9e4066Sahrens return (0);
761fa9e4066Sahrens }
762fa9e4066Sahrens
763fa9e4066Sahrens static void
vdev_disk_close(vdev_t * vd)764fa9e4066Sahrens vdev_disk_close(vdev_t *vd)
765fa9e4066Sahrens {
766fa9e4066Sahrens vdev_disk_t *dvd = vd->vdev_tsd;
767fa9e4066Sahrens
768095bcd66SGeorge Wilson if (vd->vdev_reopening || dvd == NULL)
769fa9e4066Sahrens return;
770fa9e4066Sahrens
77139cddb10SJoshua M. Clulow if (dvd->vd_minor != NULL) {
772fa9e4066Sahrens ddi_devid_str_free(dvd->vd_minor);
77339cddb10SJoshua M. Clulow dvd->vd_minor = NULL;
77439cddb10SJoshua M. Clulow }
775fa9e4066Sahrens
77639cddb10SJoshua M. Clulow if (dvd->vd_devid != NULL) {
777fa9e4066Sahrens ddi_devid_free(dvd->vd_devid);
77839cddb10SJoshua M. Clulow dvd->vd_devid = NULL;
77939cddb10SJoshua M. Clulow }
780fa9e4066Sahrens
78139cddb10SJoshua M. Clulow if (dvd->vd_lh != NULL) {
7828ad4d6ddSJeff Bonwick (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
78339cddb10SJoshua M. Clulow dvd->vd_lh = NULL;
78439cddb10SJoshua M. Clulow }
785fa9e4066Sahrens
78698d1cbfeSGeorge Wilson vd->vdev_delayed_close = B_FALSE;
78739cddb10SJoshua M. Clulow vdev_disk_free(vd);
788fa9e4066Sahrens }
789fa9e4066Sahrens
790ac04831dSMike Gerdts static int
vdev_disk_ldi_physio(ldi_handle_t vd_lh,caddr_t data,size_t size,uint64_t offset,int flags)791810e43b2SBill Pijewski vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
792810e43b2SBill Pijewski size_t size, uint64_t offset, int flags)
793e7cbe64fSgw25295 {
794e7cbe64fSgw25295 buf_t *bp;
795e7cbe64fSgw25295 int error = 0;
796e7cbe64fSgw25295
797e7cbe64fSgw25295 if (vd_lh == NULL)
798be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL));
799e7cbe64fSgw25295
800e7cbe64fSgw25295 ASSERT(flags & B_READ || flags & B_WRITE);
801e7cbe64fSgw25295
802e7cbe64fSgw25295 bp = getrbuf(KM_SLEEP);
803e7cbe64fSgw25295 bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST;
804e7cbe64fSgw25295 bp->b_bcount = size;
805e7cbe64fSgw25295 bp->b_un.b_addr = (void *)data;
806e7cbe64fSgw25295 bp->b_lblkno = lbtodb(offset);
807e7cbe64fSgw25295 bp->b_bufsize = size;
808e7cbe64fSgw25295
809e7cbe64fSgw25295 error = ldi_strategy(vd_lh, bp);
810e7cbe64fSgw25295 ASSERT(error == 0);
811e7cbe64fSgw25295 if ((error = biowait(bp)) == 0 && bp->b_resid != 0)
812be6fd75aSMatthew Ahrens error = SET_ERROR(EIO);
813e7cbe64fSgw25295 freerbuf(bp);
814e7cbe64fSgw25295
815e7cbe64fSgw25295 return (error);
816e7cbe64fSgw25295 }
817e7cbe64fSgw25295
818c62757b2SToomas Soome static int
vdev_disk_dumpio(vdev_t * vd,caddr_t data,size_t size,uint64_t offset,uint64_t origoffset __unused,boolean_t doread,boolean_t isdump)819ac04831dSMike Gerdts vdev_disk_dumpio(vdev_t *vd, caddr_t data, size_t size,
8201b500975SMike Gerdts uint64_t offset, uint64_t origoffset __unused, boolean_t doread,
8211b500975SMike Gerdts boolean_t isdump)
822ac04831dSMike Gerdts {
823ac04831dSMike Gerdts vdev_disk_t *dvd = vd->vdev_tsd;
824ac04831dSMike Gerdts int flags = doread ? B_READ : B_WRITE;
825ac04831dSMike Gerdts
826ac04831dSMike Gerdts /*
827ac04831dSMike Gerdts * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
828ac04831dSMike Gerdts * Nothing to be done here but return failure.
829ac04831dSMike Gerdts */
830ac04831dSMike Gerdts if (dvd == NULL || dvd->vd_ldi_offline) {
8311b500975SMike Gerdts return (SET_ERROR(ENXIO));
832ac04831dSMike Gerdts }
833ac04831dSMike Gerdts
834ac04831dSMike Gerdts ASSERT(vd->vdev_ops == &vdev_disk_ops);
835ac04831dSMike Gerdts
836ac04831dSMike Gerdts offset += VDEV_LABEL_START_SIZE;
837ac04831dSMike Gerdts
838ac04831dSMike Gerdts /*
839ac04831dSMike Gerdts * If in the context of an active crash dump, use the ldi_dump(9F)
840ac04831dSMike Gerdts * call instead of ldi_strategy(9F) as usual.
841ac04831dSMike Gerdts */
842ac04831dSMike Gerdts if (isdump) {
843ac04831dSMike Gerdts ASSERT3P(dvd, !=, NULL);
844ac04831dSMike Gerdts return (ldi_dump(dvd->vd_lh, data, lbtodb(offset),
845ac04831dSMike Gerdts lbtodb(size)));
846ac04831dSMike Gerdts }
847ac04831dSMike Gerdts
848ac04831dSMike Gerdts return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
849ac04831dSMike Gerdts }
850ac04831dSMike Gerdts
851ac04831dSMike Gerdts static int
vdev_disk_io_intr(buf_t * bp)852fa9e4066Sahrens vdev_disk_io_intr(buf_t *bp)
853fa9e4066Sahrens {
85431d7e8faSGeorge Wilson vdev_buf_t *vb = (vdev_buf_t *)bp;
85531d7e8faSGeorge Wilson zio_t *zio = vb->vb_io;
856fa9e4066Sahrens
85751ece835Seschrock /*
85851ece835Seschrock * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
85951ece835Seschrock * Rather than teach the rest of the stack about other error
86051ece835Seschrock * possibilities (EFAULT, etc), we normalize the error value here.
86151ece835Seschrock */
86251ece835Seschrock zio->io_error = (geterror(bp) != 0 ? EIO : 0);
86351ece835Seschrock
86451ece835Seschrock if (zio->io_error == 0 && bp->b_resid != 0)
865be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(EIO);
866fa9e4066Sahrens
867770499e1SDan Kimmel if (zio->io_type == ZIO_TYPE_READ) {
868770499e1SDan Kimmel abd_return_buf_copy(zio->io_abd, bp->b_un.b_addr, zio->io_size);
869770499e1SDan Kimmel } else {
870770499e1SDan Kimmel abd_return_buf(zio->io_abd, bp->b_un.b_addr, zio->io_size);
871770499e1SDan Kimmel }
872770499e1SDan Kimmel
87331d7e8faSGeorge Wilson kmem_free(vb, sizeof (vdev_buf_t));
874fa9e4066Sahrens
87597e81309SPrakash Surya zio_delay_interrupt(zio);
876c62757b2SToomas Soome return (0);
877fa9e4066Sahrens }
878fa9e4066Sahrens
879fa9e4066Sahrens static void
vdev_disk_ioctl_free(zio_t * zio)880f4a72450SJeff Bonwick vdev_disk_ioctl_free(zio_t *zio)
881f4a72450SJeff Bonwick {
882f4a72450SJeff Bonwick kmem_free(zio->io_vsd, sizeof (struct dk_callback));
883f4a72450SJeff Bonwick }
884f4a72450SJeff Bonwick
88522fe2c88SJonathan Adams static const zio_vsd_ops_t vdev_disk_vsd_ops = {
88622fe2c88SJonathan Adams vdev_disk_ioctl_free,
88722fe2c88SJonathan Adams zio_vsd_default_cksum_report
88822fe2c88SJonathan Adams };
88922fe2c88SJonathan Adams
890f4a72450SJeff Bonwick static void
vdev_disk_ioctl_done(void * zio_arg,int error)891fa9e4066Sahrens vdev_disk_ioctl_done(void *zio_arg, int error)
892fa9e4066Sahrens {
893fa9e4066Sahrens zio_t *zio = zio_arg;
894fa9e4066Sahrens
895fa9e4066Sahrens zio->io_error = error;
896fa9e4066Sahrens
897e05725b1Sbonwick zio_interrupt(zio);
898fa9e4066Sahrens }
899fa9e4066Sahrens
900738f37bcSGeorge Wilson static void
vdev_disk_io_start(zio_t * zio)901fa9e4066Sahrens vdev_disk_io_start(zio_t *zio)
902fa9e4066Sahrens {
903fa9e4066Sahrens vdev_t *vd = zio->io_vd;
904fa9e4066Sahrens vdev_disk_t *dvd = vd->vdev_tsd;
905084fd14fSBrian Behlendorf unsigned long trim_flags = 0;
90631d7e8faSGeorge Wilson vdev_buf_t *vb;
907e14bb325SJeff Bonwick struct dk_callback *dkc;
908fa9e4066Sahrens buf_t *bp;
909e14bb325SJeff Bonwick int error;
910fa9e4066Sahrens
91139cddb10SJoshua M. Clulow /*
91239cddb10SJoshua M. Clulow * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
91339cddb10SJoshua M. Clulow * Nothing to be done here but return failure.
91439cddb10SJoshua M. Clulow */
9151b500975SMike Gerdts if (dvd == NULL || dvd->vd_ldi_offline) {
91639cddb10SJoshua M. Clulow zio->io_error = ENXIO;
917738f37bcSGeorge Wilson zio_interrupt(zio);
918738f37bcSGeorge Wilson return;
91939cddb10SJoshua M. Clulow }
92039cddb10SJoshua M. Clulow
921084fd14fSBrian Behlendorf switch (zio->io_type) {
922084fd14fSBrian Behlendorf case ZIO_TYPE_IOCTL:
923fa9e4066Sahrens /* XXPOLICY */
9240a4e9518Sgw25295 if (!vdev_readable(vd)) {
925be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(ENXIO);
926738f37bcSGeorge Wilson zio_interrupt(zio);
927738f37bcSGeorge Wilson return;
928fa9e4066Sahrens }
929fa9e4066Sahrens
930fa9e4066Sahrens switch (zio->io_cmd) {
931fa9e4066Sahrens
932fa9e4066Sahrens case DKIOCFLUSHWRITECACHE:
933fa9e4066Sahrens
934a2eea2e1Sahrens if (zfs_nocacheflush)
935a2eea2e1Sahrens break;
936a2eea2e1Sahrens
937b468a217Seschrock if (vd->vdev_nowritecache) {
938be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(ENOTSUP);
939b468a217Seschrock break;
940b468a217Seschrock }
941b468a217Seschrock
942e14bb325SJeff Bonwick zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP);
94322fe2c88SJonathan Adams zio->io_vsd_ops = &vdev_disk_vsd_ops;
944e14bb325SJeff Bonwick
945e14bb325SJeff Bonwick dkc->dkc_callback = vdev_disk_ioctl_done;
946e14bb325SJeff Bonwick dkc->dkc_flag = FLUSH_VOLATILE;
947e14bb325SJeff Bonwick dkc->dkc_cookie = zio;
948fa9e4066Sahrens
949fa9e4066Sahrens error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
950e14bb325SJeff Bonwick (uintptr_t)dkc, FKIOCTL, kcred, NULL);
951fa9e4066Sahrens
952fa9e4066Sahrens if (error == 0) {
953fa9e4066Sahrens /*
954fa9e4066Sahrens * The ioctl will be done asychronously,
955fa9e4066Sahrens * and will call vdev_disk_ioctl_done()
956fa9e4066Sahrens * upon completion.
957fa9e4066Sahrens */
958738f37bcSGeorge Wilson return;
959e05725b1Sbonwick }
960e05725b1Sbonwick
961fa9e4066Sahrens zio->io_error = error;
962b468a217Seschrock
963fa9e4066Sahrens break;
964fa9e4066Sahrens
965fa9e4066Sahrens default:
966be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(ENOTSUP);
967fa9e4066Sahrens }
968fa9e4066Sahrens
969738f37bcSGeorge Wilson zio_execute(zio);
970738f37bcSGeorge Wilson return;
971084fd14fSBrian Behlendorf
972084fd14fSBrian Behlendorf case ZIO_TYPE_TRIM:
973fb05b94aSJerry Jelinek if (zfs_no_trim == 1 || !vd->vdev_has_trim) {
974084fd14fSBrian Behlendorf zio->io_error = SET_ERROR(ENOTSUP);
975084fd14fSBrian Behlendorf zio_execute(zio);
976084fd14fSBrian Behlendorf return;
977084fd14fSBrian Behlendorf }
978084fd14fSBrian Behlendorf /* Currently only supported on ZoL. */
979084fd14fSBrian Behlendorf ASSERT0(zio->io_trim_flags & ZIO_TRIM_SECURE);
980084fd14fSBrian Behlendorf
981084fd14fSBrian Behlendorf /* dkioc_free_list_t is already declared to hold one entry */
982084fd14fSBrian Behlendorf dkioc_free_list_t dfl;
983084fd14fSBrian Behlendorf dfl.dfl_flags = 0;
984084fd14fSBrian Behlendorf dfl.dfl_num_exts = 1;
985d0562c10SJerry Jelinek dfl.dfl_offset = 0;
986084fd14fSBrian Behlendorf dfl.dfl_exts[0].dfle_start = zio->io_offset;
987084fd14fSBrian Behlendorf dfl.dfl_exts[0].dfle_length = zio->io_size;
988084fd14fSBrian Behlendorf
989084fd14fSBrian Behlendorf zio->io_error = ldi_ioctl(dvd->vd_lh, DKIOCFREE,
990084fd14fSBrian Behlendorf (uintptr_t)&dfl, FKIOCTL, kcred, NULL);
991084fd14fSBrian Behlendorf
992084fd14fSBrian Behlendorf if (zio->io_error == ENOTSUP || zio->io_error == ENOTTY) {
993084fd14fSBrian Behlendorf /*
994084fd14fSBrian Behlendorf * The device must have changed and now TRIM is
995084fd14fSBrian Behlendorf * no longer supported.
996084fd14fSBrian Behlendorf */
997084fd14fSBrian Behlendorf vd->vdev_has_trim = B_FALSE;
998084fd14fSBrian Behlendorf }
999084fd14fSBrian Behlendorf
1000084fd14fSBrian Behlendorf zio_interrupt(zio);
1001084fd14fSBrian Behlendorf return;
1002fa9e4066Sahrens }
1003fa9e4066Sahrens
1004f693d300SSteven Hartland ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
100597e81309SPrakash Surya zio->io_target_timestamp = zio_handle_io_delay(zio);
1006f693d300SSteven Hartland
100731d7e8faSGeorge Wilson vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
1008fa9e4066Sahrens
100931d7e8faSGeorge Wilson vb->vb_io = zio;
101031d7e8faSGeorge Wilson bp = &vb->vb_buf;
1011fa9e4066Sahrens
1012fa9e4066Sahrens bioinit(bp);
1013e14bb325SJeff Bonwick bp->b_flags = B_BUSY | B_NOCACHE |
10148956713aSEric Schrock (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
10158956713aSEric Schrock if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
10168956713aSEric Schrock bp->b_flags |= B_FAILFAST;
1017fa9e4066Sahrens bp->b_bcount = zio->io_size;
1018770499e1SDan Kimmel
1019770499e1SDan Kimmel if (zio->io_type == ZIO_TYPE_READ) {
1020770499e1SDan Kimmel bp->b_un.b_addr =
1021770499e1SDan Kimmel abd_borrow_buf(zio->io_abd, zio->io_size);
1022770499e1SDan Kimmel } else {
1023770499e1SDan Kimmel bp->b_un.b_addr =
1024770499e1SDan Kimmel abd_borrow_buf_copy(zio->io_abd, zio->io_size);
1025770499e1SDan Kimmel }
1026770499e1SDan Kimmel
1027fa9e4066Sahrens bp->b_lblkno = lbtodb(zio->io_offset);
1028fa9e4066Sahrens bp->b_bufsize = zio->io_size;
1029c62757b2SToomas Soome bp->b_iodone = vdev_disk_io_intr;
1030fa9e4066Sahrens
1031fa88c70fSJerry Jelinek /*
1032fa88c70fSJerry Jelinek * In general we would expect ldi_strategy() to return non-zero only
1033fa88c70fSJerry Jelinek * because of programming errors, but we've also seen this fail shortly
1034fa88c70fSJerry Jelinek * after a disk dies.
1035fa88c70fSJerry Jelinek */
1036fa88c70fSJerry Jelinek if (ldi_strategy(dvd->vd_lh, bp) != 0) {
1037fa88c70fSJerry Jelinek zio->io_error = ENXIO;
1038fa88c70fSJerry Jelinek zio_interrupt(zio);
1039fa88c70fSJerry Jelinek }
1040fa9e4066Sahrens }
1041fa9e4066Sahrens
1042e14bb325SJeff Bonwick static void
vdev_disk_io_done(zio_t * zio)1043fa9e4066Sahrens vdev_disk_io_done(zio_t *zio)
1044fa9e4066Sahrens {
1045e14bb325SJeff Bonwick vdev_t *vd = zio->io_vd;
1046ea8dc4b6Seschrock
10473d7072f8Seschrock /*
10483d7072f8Seschrock * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
10493d7072f8Seschrock * the device has been removed. If this is the case, then we trigger an
10500a4e9518Sgw25295 * asynchronous removal of the device. Otherwise, probe the device and
10511f7ad2e1Sgw25295 * make sure it's still accessible.
10523d7072f8Seschrock */
10531d713200SEric Schrock if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
10540a4e9518Sgw25295 vdev_disk_t *dvd = vd->vdev_tsd;
1055e14bb325SJeff Bonwick int state = DKIO_NONE;
10560a4e9518Sgw25295
1057e14bb325SJeff Bonwick if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
1058e14bb325SJeff Bonwick FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
10591d713200SEric Schrock /*
10601d713200SEric Schrock * We post the resource as soon as possible, instead of
10611d713200SEric Schrock * when the async removal actually happens, because the
10621d713200SEric Schrock * DE is using this information to discard previous I/O
10631d713200SEric Schrock * errors.
10641d713200SEric Schrock */
10651d713200SEric Schrock zfs_post_remove(zio->io_spa, vd);
10663d7072f8Seschrock vd->vdev_remove_wanted = B_TRUE;
10673d7072f8Seschrock spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
106898d1cbfeSGeorge Wilson } else if (!vd->vdev_delayed_close) {
106998d1cbfeSGeorge Wilson vd->vdev_delayed_close = B_TRUE;
107051ece835Seschrock }
10713d7072f8Seschrock }
10723d7072f8Seschrock }
10733d7072f8Seschrock
1074fa9e4066Sahrens vdev_ops_t vdev_disk_ops = {
1075a3874b8bSToomas Soome .vdev_op_open = vdev_disk_open,
1076a3874b8bSToomas Soome .vdev_op_close = vdev_disk_close,
1077a3874b8bSToomas Soome .vdev_op_asize = vdev_default_asize,
1078a3874b8bSToomas Soome .vdev_op_io_start = vdev_disk_io_start,
1079a3874b8bSToomas Soome .vdev_op_io_done = vdev_disk_io_done,
1080a3874b8bSToomas Soome .vdev_op_state_change = NULL,
1081a3874b8bSToomas Soome .vdev_op_need_resilver = NULL,
1082a3874b8bSToomas Soome .vdev_op_hold = vdev_disk_hold,
1083a3874b8bSToomas Soome .vdev_op_rele = vdev_disk_rele,
1084a3874b8bSToomas Soome .vdev_op_remap = NULL,
1085a3874b8bSToomas Soome .vdev_op_xlate = vdev_default_xlate,
1086ac04831dSMike Gerdts .vdev_op_dumpio = vdev_disk_dumpio,
1087a3874b8bSToomas Soome .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */
1088a3874b8bSToomas Soome .vdev_op_leaf = B_TRUE /* leaf vdev */
1089fa9e4066Sahrens };
1090e7cbe64fSgw25295
1091e7cbe64fSgw25295 /*
1092051aabe6Staylor * Given the root disk device devid or pathname, read the label from
1093051aabe6Staylor * the device, and construct a configuration nvlist.
1094e7cbe64fSgw25295 */
1095f940fbb1SLin Ling int
vdev_disk_read_rootlabel(const char * devpath,const char * devid,nvlist_t ** config)109630c304d9SJoshua M. Clulow vdev_disk_read_rootlabel(const char *devpath, const char *devid,
109730c304d9SJoshua M. Clulow nvlist_t **config)
1098e7cbe64fSgw25295 {
1099e7cbe64fSgw25295 ldi_handle_t vd_lh;
1100e7cbe64fSgw25295 vdev_label_t *label;
1101e7cbe64fSgw25295 uint64_t s, size;
1102e7cbe64fSgw25295 int l;
1103051aabe6Staylor ddi_devid_t tmpdevid;
1104f4565e39SLin Ling int error = -1;
1105051aabe6Staylor char *minor_name;
1106e7cbe64fSgw25295
1107e7cbe64fSgw25295 /*
1108e7cbe64fSgw25295 * Read the device label and build the nvlist.
1109e7cbe64fSgw25295 */
111030c304d9SJoshua M. Clulow if (devid != NULL && ddi_devid_str_decode((char *)devid, &tmpdevid,
1111051aabe6Staylor &minor_name) == 0) {
1112051aabe6Staylor error = ldi_open_by_devid(tmpdevid, minor_name,
11138ad4d6ddSJeff Bonwick FREAD, kcred, &vd_lh, zfs_li);
1114051aabe6Staylor ddi_devid_free(tmpdevid);
1115051aabe6Staylor ddi_devid_str_free(minor_name);
1116051aabe6Staylor }
1117051aabe6Staylor
111830c304d9SJoshua M. Clulow if (error != 0 && (error = ldi_open_by_name((char *)devpath, FREAD,
111930c304d9SJoshua M. Clulow kcred, &vd_lh, zfs_li)) != 0) {
1120f940fbb1SLin Ling return (error);
112130c304d9SJoshua M. Clulow }
1122e7cbe64fSgw25295
1123bf82a41bSeschrock if (ldi_get_size(vd_lh, &s)) {
1124bf82a41bSeschrock (void) ldi_close(vd_lh, FREAD, kcred);
1125be6fd75aSMatthew Ahrens return (SET_ERROR(EIO));
1126bf82a41bSeschrock }
1127e7cbe64fSgw25295
1128e7cbe64fSgw25295 size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
1129e7cbe64fSgw25295 label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP);
1130e7cbe64fSgw25295
113117f1e64aSEric Taylor *config = NULL;
1132e7cbe64fSgw25295 for (l = 0; l < VDEV_LABELS; l++) {
1133e7cbe64fSgw25295 uint64_t offset, state, txg = 0;
1134e7cbe64fSgw25295
1135e7cbe64fSgw25295 /* read vdev label */
1136e7cbe64fSgw25295 offset = vdev_label_offset(size, l, 0);
1137810e43b2SBill Pijewski if (vdev_disk_ldi_physio(vd_lh, (caddr_t)label,
11382264ca7fSLin Ling VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0)
1139e7cbe64fSgw25295 continue;
1140e7cbe64fSgw25295
1141e7cbe64fSgw25295 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
1142f940fbb1SLin Ling sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
1143f940fbb1SLin Ling *config = NULL;
1144e7cbe64fSgw25295 continue;
1145e7cbe64fSgw25295 }
1146e7cbe64fSgw25295
1147f940fbb1SLin Ling if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
1148e7cbe64fSgw25295 &state) != 0 || state >= POOL_STATE_DESTROYED) {
1149f940fbb1SLin Ling nvlist_free(*config);
1150f940fbb1SLin Ling *config = NULL;
1151e7cbe64fSgw25295 continue;
1152e7cbe64fSgw25295 }
1153e7cbe64fSgw25295
1154f940fbb1SLin Ling if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
1155e7cbe64fSgw25295 &txg) != 0 || txg == 0) {
1156f940fbb1SLin Ling nvlist_free(*config);
1157f940fbb1SLin Ling *config = NULL;
1158e7cbe64fSgw25295 continue;
1159e7cbe64fSgw25295 }
1160e7cbe64fSgw25295
1161e7cbe64fSgw25295 break;
1162e7cbe64fSgw25295 }
1163e7cbe64fSgw25295
1164e7cbe64fSgw25295 kmem_free(label, sizeof (vdev_label_t));
1165bf82a41bSeschrock (void) ldi_close(vd_lh, FREAD, kcred);
116617f1e64aSEric Taylor if (*config == NULL)
1167be6fd75aSMatthew Ahrens error = SET_ERROR(EIDRM);
1168bf82a41bSeschrock
1169f940fbb1SLin Ling return (error);
1170e7cbe64fSgw25295 }
117130c304d9SJoshua M. Clulow
117230c304d9SJoshua M. Clulow struct veb {
117330c304d9SJoshua M. Clulow list_t veb_ents;
117430c304d9SJoshua M. Clulow boolean_t veb_scanned;
1175*8b26092dSJoshua M. Clulow char *veb_force_path;
117630c304d9SJoshua M. Clulow };
117730c304d9SJoshua M. Clulow
117830c304d9SJoshua M. Clulow struct veb_ent {
117930c304d9SJoshua M. Clulow uint64_t vebe_pool_guid;
118030c304d9SJoshua M. Clulow uint64_t vebe_vdev_guid;
118130c304d9SJoshua M. Clulow
118230c304d9SJoshua M. Clulow char *vebe_devpath;
118330c304d9SJoshua M. Clulow
118430c304d9SJoshua M. Clulow list_node_t vebe_link;
118530c304d9SJoshua M. Clulow };
118630c304d9SJoshua M. Clulow
118730c304d9SJoshua M. Clulow static kmutex_t veb_lock;
118830c304d9SJoshua M. Clulow static struct veb *veb;
118930c304d9SJoshua M. Clulow
119030c304d9SJoshua M. Clulow static int
vdev_disk_preroot_scan_walk(const char * devpath,void * arg)119130c304d9SJoshua M. Clulow vdev_disk_preroot_scan_walk(const char *devpath, void *arg)
119230c304d9SJoshua M. Clulow {
119330c304d9SJoshua M. Clulow int r;
119430c304d9SJoshua M. Clulow nvlist_t *cfg = NULL;
119530c304d9SJoshua M. Clulow uint64_t pguid = 0, vguid = 0;
119630c304d9SJoshua M. Clulow
119730c304d9SJoshua M. Clulow /*
119830c304d9SJoshua M. Clulow * Attempt to read the label from this block device.
119930c304d9SJoshua M. Clulow */
120030c304d9SJoshua M. Clulow if ((r = vdev_disk_read_rootlabel(devpath, NULL, &cfg)) != 0) {
120130c304d9SJoshua M. Clulow /*
120230c304d9SJoshua M. Clulow * Many of the available block devices will represent slices or
120330c304d9SJoshua M. Clulow * partitions of disks, or may represent disks that are not at
120430c304d9SJoshua M. Clulow * all initialised with ZFS. As this is a best effort
120530c304d9SJoshua M. Clulow * mechanism to locate an alternate path to a particular vdev,
120630c304d9SJoshua M. Clulow * we will ignore any failures and keep scanning.
120730c304d9SJoshua M. Clulow */
120830c304d9SJoshua M. Clulow return (PREROOT_WALK_BLOCK_DEVICES_NEXT);
120930c304d9SJoshua M. Clulow }
121030c304d9SJoshua M. Clulow
121130c304d9SJoshua M. Clulow /*
121230c304d9SJoshua M. Clulow * Determine the pool and vdev GUID read from the label for this
121330c304d9SJoshua M. Clulow * device. Both values must be present and have a non-zero value.
121430c304d9SJoshua M. Clulow */
121530c304d9SJoshua M. Clulow if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pguid) != 0 ||
121630c304d9SJoshua M. Clulow nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_GUID, &vguid) != 0 ||
121730c304d9SJoshua M. Clulow pguid == 0 || vguid == 0) {
121830c304d9SJoshua M. Clulow /*
121930c304d9SJoshua M. Clulow * This label was not complete.
122030c304d9SJoshua M. Clulow */
122130c304d9SJoshua M. Clulow goto out;
122230c304d9SJoshua M. Clulow }
122330c304d9SJoshua M. Clulow
122430c304d9SJoshua M. Clulow /*
122530c304d9SJoshua M. Clulow * Keep track of all of the GUID-to-devpath mappings we find so that
122630c304d9SJoshua M. Clulow * vdev_disk_preroot_lookup() can search them.
122730c304d9SJoshua M. Clulow */
122830c304d9SJoshua M. Clulow struct veb_ent *vebe = kmem_zalloc(sizeof (*vebe), KM_SLEEP);
122930c304d9SJoshua M. Clulow vebe->vebe_pool_guid = pguid;
123030c304d9SJoshua M. Clulow vebe->vebe_vdev_guid = vguid;
123130c304d9SJoshua M. Clulow vebe->vebe_devpath = spa_strdup(devpath);
123230c304d9SJoshua M. Clulow
123330c304d9SJoshua M. Clulow list_insert_tail(&veb->veb_ents, vebe);
123430c304d9SJoshua M. Clulow
123530c304d9SJoshua M. Clulow out:
123630c304d9SJoshua M. Clulow nvlist_free(cfg);
123730c304d9SJoshua M. Clulow return (PREROOT_WALK_BLOCK_DEVICES_NEXT);
123830c304d9SJoshua M. Clulow }
123930c304d9SJoshua M. Clulow
124030c304d9SJoshua M. Clulow const char *
vdev_disk_preroot_lookup(uint64_t pool_guid,uint64_t vdev_guid)124130c304d9SJoshua M. Clulow vdev_disk_preroot_lookup(uint64_t pool_guid, uint64_t vdev_guid)
124230c304d9SJoshua M. Clulow {
124330c304d9SJoshua M. Clulow if (pool_guid == 0 || vdev_guid == 0) {
124430c304d9SJoshua M. Clulow /*
124530c304d9SJoshua M. Clulow * If we aren't provided both a pool and a vdev GUID, we cannot
124630c304d9SJoshua M. Clulow * perform a lookup.
124730c304d9SJoshua M. Clulow */
124830c304d9SJoshua M. Clulow return (NULL);
124930c304d9SJoshua M. Clulow }
125030c304d9SJoshua M. Clulow
125130c304d9SJoshua M. Clulow mutex_enter(&veb_lock);
125230c304d9SJoshua M. Clulow if (veb == NULL) {
125330c304d9SJoshua M. Clulow /*
125430c304d9SJoshua M. Clulow * If vdev_disk_preroot_fini() has been called already, there
125530c304d9SJoshua M. Clulow * is nothing we can do.
125630c304d9SJoshua M. Clulow */
125730c304d9SJoshua M. Clulow mutex_exit(&veb_lock);
125830c304d9SJoshua M. Clulow return (NULL);
125930c304d9SJoshua M. Clulow }
126030c304d9SJoshua M. Clulow
126130c304d9SJoshua M. Clulow /*
126230c304d9SJoshua M. Clulow * We want to perform at most one scan of all block devices per boot.
126330c304d9SJoshua M. Clulow */
126430c304d9SJoshua M. Clulow if (!veb->veb_scanned) {
126530c304d9SJoshua M. Clulow cmn_err(CE_NOTE, "Performing full ZFS device scan!");
126630c304d9SJoshua M. Clulow
126730c304d9SJoshua M. Clulow preroot_walk_block_devices(vdev_disk_preroot_scan_walk, NULL);
126830c304d9SJoshua M. Clulow
126930c304d9SJoshua M. Clulow veb->veb_scanned = B_TRUE;
127030c304d9SJoshua M. Clulow }
127130c304d9SJoshua M. Clulow
127230c304d9SJoshua M. Clulow const char *path = NULL;
127330c304d9SJoshua M. Clulow for (struct veb_ent *vebe = list_head(&veb->veb_ents); vebe != NULL;
127430c304d9SJoshua M. Clulow vebe = list_next(&veb->veb_ents, vebe)) {
127530c304d9SJoshua M. Clulow if (vebe->vebe_pool_guid == pool_guid &&
127630c304d9SJoshua M. Clulow vebe->vebe_vdev_guid == vdev_guid) {
127730c304d9SJoshua M. Clulow path = vebe->vebe_devpath;
127830c304d9SJoshua M. Clulow break;
127930c304d9SJoshua M. Clulow }
128030c304d9SJoshua M. Clulow }
128130c304d9SJoshua M. Clulow
128230c304d9SJoshua M. Clulow mutex_exit(&veb_lock);
128330c304d9SJoshua M. Clulow
128430c304d9SJoshua M. Clulow return (path);
128530c304d9SJoshua M. Clulow }
128630c304d9SJoshua M. Clulow
1287*8b26092dSJoshua M. Clulow const char *
vdev_disk_preroot_force_path(void)1288*8b26092dSJoshua M. Clulow vdev_disk_preroot_force_path(void)
1289*8b26092dSJoshua M. Clulow {
1290*8b26092dSJoshua M. Clulow const char *force_path = NULL;
1291*8b26092dSJoshua M. Clulow
1292*8b26092dSJoshua M. Clulow mutex_enter(&veb_lock);
1293*8b26092dSJoshua M. Clulow if (veb != NULL) {
1294*8b26092dSJoshua M. Clulow force_path = veb->veb_force_path;
1295*8b26092dSJoshua M. Clulow }
1296*8b26092dSJoshua M. Clulow mutex_exit(&veb_lock);
1297*8b26092dSJoshua M. Clulow
1298*8b26092dSJoshua M. Clulow return (force_path);
1299*8b26092dSJoshua M. Clulow }
1300*8b26092dSJoshua M. Clulow
130130c304d9SJoshua M. Clulow void
vdev_disk_preroot_init(const char * force_path)1302*8b26092dSJoshua M. Clulow vdev_disk_preroot_init(const char *force_path)
130330c304d9SJoshua M. Clulow {
130430c304d9SJoshua M. Clulow mutex_init(&veb_lock, NULL, MUTEX_DEFAULT, NULL);
130530c304d9SJoshua M. Clulow
130630c304d9SJoshua M. Clulow VERIFY3P(veb, ==, NULL);
130730c304d9SJoshua M. Clulow veb = kmem_zalloc(sizeof (*veb), KM_SLEEP);
130830c304d9SJoshua M. Clulow list_create(&veb->veb_ents, sizeof (struct veb_ent),
130930c304d9SJoshua M. Clulow offsetof(struct veb_ent, vebe_link));
131030c304d9SJoshua M. Clulow veb->veb_scanned = B_FALSE;
1311*8b26092dSJoshua M. Clulow if (force_path != NULL) {
1312*8b26092dSJoshua M. Clulow veb->veb_force_path = spa_strdup(force_path);
1313*8b26092dSJoshua M. Clulow }
131430c304d9SJoshua M. Clulow }
131530c304d9SJoshua M. Clulow
131630c304d9SJoshua M. Clulow void
vdev_disk_preroot_fini(void)131730c304d9SJoshua M. Clulow vdev_disk_preroot_fini(void)
131830c304d9SJoshua M. Clulow {
131930c304d9SJoshua M. Clulow mutex_enter(&veb_lock);
132030c304d9SJoshua M. Clulow
132130c304d9SJoshua M. Clulow if (veb != NULL) {
132230c304d9SJoshua M. Clulow while (!list_is_empty(&veb->veb_ents)) {
132330c304d9SJoshua M. Clulow struct veb_ent *vebe = list_remove_head(&veb->veb_ents);
132430c304d9SJoshua M. Clulow
132530c304d9SJoshua M. Clulow spa_strfree(vebe->vebe_devpath);
132630c304d9SJoshua M. Clulow
132730c304d9SJoshua M. Clulow kmem_free(vebe, sizeof (*vebe));
132830c304d9SJoshua M. Clulow }
132930c304d9SJoshua M. Clulow
1330*8b26092dSJoshua M. Clulow if (veb->veb_force_path != NULL) {
1331*8b26092dSJoshua M. Clulow spa_strfree(veb->veb_force_path);
1332*8b26092dSJoshua M. Clulow }
1333*8b26092dSJoshua M. Clulow
133430c304d9SJoshua M. Clulow kmem_free(veb, sizeof (*veb));
133530c304d9SJoshua M. Clulow veb = NULL;
133630c304d9SJoshua M. Clulow }
133730c304d9SJoshua M. Clulow
133830c304d9SJoshua M. Clulow mutex_exit(&veb_lock);
133930c304d9SJoshua M. Clulow }
1340