xref: /illumos-gate/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c (revision 6528affb110ab8cf8b4464874b4a07f3f937475d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * The ZFS retire agent is responsible for managing hot spares across all pools.
30  * When we see a device fault, we try to open the associated pool and look for
31  * any hot spares.  We iterate over any available hot spares and attempt a
32  * 'zpool replace' for each one.
33  */
34 
35 #include <fm/fmd_api.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/fm/protocol.h>
38 #include <sys/fm/fs/zfs.h>
39 #include <libzfs.h>
40 
41 /*
42  * Find a pool with a matching GUID.
43  */
44 typedef struct find_cbdata {
45 	uint64_t	cb_guid;
46 	zpool_handle_t	*cb_zhp;
47 } find_cbdata_t;
48 
49 static int
50 find_pool(zpool_handle_t *zhp, void *data)
51 {
52 	find_cbdata_t *cbp = data;
53 
54 	if (cbp->cb_guid == zpool_get_guid(zhp)) {
55 		cbp->cb_zhp = zhp;
56 		return (1);
57 	}
58 
59 	zpool_close(zhp);
60 	return (0);
61 }
62 
63 /*
64  * Find a vdev within a tree with a matching GUID.
65  */
66 static nvlist_t *
67 find_vdev(nvlist_t *nv, uint64_t search)
68 {
69 	uint64_t guid;
70 	nvlist_t **child;
71 	uint_t c, children;
72 	nvlist_t *ret;
73 
74 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
75 	    guid == search)
76 		return (nv);
77 
78 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
79 	    &child, &children) != 0)
80 		return (NULL);
81 
82 	for (c = 0; c < children; c++) {
83 		if ((ret = find_vdev(child[c], search)) != NULL)
84 			return (ret);
85 	}
86 
87 	return (NULL);
88 }
89 
90 /*ARGSUSED*/
91 static void
92 zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
93     const char *class)
94 {
95 	uint64_t pool_guid, vdev_guid;
96 	char *dev_name;
97 	zpool_handle_t *zhp;
98 	nvlist_t *resource, *config, *nvroot;
99 	nvlist_t *vdev;
100 	nvlist_t **spares, **faults;
101 	uint_t s, nspares, f, nfaults;
102 	nvlist_t *replacement;
103 	find_cbdata_t cb;
104 	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
105 
106 	/*
107 	 * Get information from the fault.
108 	 */
109 	if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
110 	    &faults, &nfaults) != 0)
111 		return;
112 
113 	for (f = 0; f < nfaults; f++) {
114 		if (nvlist_lookup_nvlist(faults[f], FM_FAULT_RESOURCE,
115 		    &resource) != 0 ||
116 		    nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL,
117 		    &pool_guid) != 0 ||
118 		    nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV,
119 		    &vdev_guid) != 0)
120 			continue;
121 
122 		/*
123 		 * From the pool guid and vdev guid, get the pool name and
124 		 * device name.
125 		 */
126 		cb.cb_guid = pool_guid;
127 		if (zpool_iter(zhdl, find_pool, &cb) != 1)
128 			continue;
129 
130 		zhp = cb.cb_zhp;
131 		config = zpool_get_config(zhp, NULL);
132 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
133 		    &nvroot) != 0) {
134 			zpool_close(zhp);
135 			continue;
136 		}
137 
138 		if ((vdev = find_vdev(nvroot, vdev_guid)) == NULL) {
139 			zpool_close(zhp);
140 			continue;
141 		}
142 
143 		/*
144 		 * Find out if there are any hot spares available in the pool.
145 		 */
146 		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
147 		    &spares, &nspares) != 0) {
148 			zpool_close(zhp);
149 			continue;
150 		}
151 
152 		if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) {
153 			zpool_close(zhp);
154 			continue;
155 		}
156 
157 		if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
158 		    VDEV_TYPE_ROOT) != 0) {
159 			nvlist_free(replacement);
160 			zpool_close(zhp);
161 			continue;
162 		}
163 
164 		dev_name = zpool_vdev_name(zhdl, zhp, vdev);
165 
166 		/*
167 		 * Try to replace each spare, ending when we successfully
168 		 * replace it.
169 		 */
170 		for (s = 0; s < nspares; s++) {
171 			char *spare_name;
172 
173 			if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
174 			    &spare_name) != 0)
175 				continue;
176 
177 			if (nvlist_add_nvlist_array(replacement,
178 			    ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0)
179 				continue;
180 
181 			if (zpool_vdev_attach(zhp, dev_name, spare_name,
182 			    replacement, B_TRUE) == 0)
183 				break;
184 		}
185 
186 		free(dev_name);
187 		nvlist_free(replacement);
188 		zpool_close(zhp);
189 	}
190 }
191 
192 static const fmd_hdl_ops_t fmd_ops = {
193 	zfs_retire_recv,	/* fmdo_recv */
194 	NULL,			/* fmdo_timeout */
195 	NULL,			/* fmdo_close */
196 	NULL,			/* fmdo_stats */
197 	NULL,			/* fmdo_gc */
198 };
199 
200 static const fmd_prop_t fmd_props[] = {
201 	{ NULL, 0, NULL }
202 };
203 
204 static const fmd_hdl_info_t fmd_info = {
205 	"ZFS Retire Agent", "1.0", &fmd_ops, fmd_props
206 };
207 
208 void
209 _fmd_init(fmd_hdl_t *hdl)
210 {
211 	libzfs_handle_t *zhdl;
212 
213 	if ((zhdl = libzfs_init()) == NULL)
214 		return;
215 
216 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
217 		libzfs_fini(zhdl);
218 		return;
219 	}
220 
221 	fmd_hdl_setspecific(hdl, zhdl);
222 }
223 
224 void
225 _fmd_fini(fmd_hdl_t *hdl)
226 {
227 	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
228 
229 	if (zhdl != NULL)
230 		libzfs_fini(zhdl);
231 }
232