xref: /freebsd/sys/geom/vinum/geom_vinum_init.c (revision 7029da5c36f2d3cf6bb6c81bf551229f416399e8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004, 2007 Lukas Ertl
5  * Copyright (c) 2007, 2009 Ulf Lilleengen
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/bio.h>
34 #include <sys/libkern.h>
35 #include <sys/malloc.h>
36 
37 #include <geom/geom.h>
38 #include <geom/geom_dbg.h>
39 #include <geom/vinum/geom_vinum_var.h>
40 #include <geom/vinum/geom_vinum.h>
41 
42 static int		 gv_sync(struct gv_volume *);
43 static int		 gv_rebuild_plex(struct gv_plex *);
44 static int		 gv_init_plex(struct gv_plex *);
45 static int		 gv_grow_plex(struct gv_plex *);
46 static int		 gv_sync_plex(struct gv_plex *, struct gv_plex *);
47 static struct gv_plex	*gv_find_good_plex(struct gv_volume *);
48 
49 void
50 gv_start_obj(struct g_geom *gp, struct gctl_req *req)
51 {
52 	struct gv_softc *sc;
53 	struct gv_volume *v;
54 	struct gv_plex *p;
55 	int *argc, *initsize;
56 	char *argv, buf[20];
57 	int i, type;
58 
59 	argc = gctl_get_paraml(req, "argc", sizeof(*argc));
60 	initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
61 
62 	if (argc == NULL || *argc == 0) {
63 		gctl_error(req, "no arguments given");
64 		return;
65 	}
66 
67 	sc = gp->softc;
68 
69 	for (i = 0; i < *argc; i++) {
70 		snprintf(buf, sizeof(buf), "argv%d", i);
71 		argv = gctl_get_param(req, buf, NULL);
72 		if (argv == NULL)
73 			continue;
74 		type = gv_object_type(sc, argv);
75 		switch (type) {
76 		case GV_TYPE_VOL:
77 			v = gv_find_vol(sc, argv);
78 			if (v != NULL)
79 				gv_post_event(sc, GV_EVENT_START_VOLUME, v,
80 				    NULL, *initsize, 0);
81 			break;
82 
83 		case GV_TYPE_PLEX:
84 			p = gv_find_plex(sc, argv);
85 			if (p != NULL)
86 				gv_post_event(sc, GV_EVENT_START_PLEX, p, NULL,
87 				    *initsize, 0);
88 			break;
89 
90 		case GV_TYPE_SD:
91 		case GV_TYPE_DRIVE:
92 			/* XXX Not implemented, but what is the use? */
93 			gctl_error(req, "unable to start '%s' - not yet supported",
94 			    argv);
95 			return;
96 		default:
97 			gctl_error(req, "unknown object '%s'", argv);
98 			return;
99 		}
100 	}
101 }
102 
103 int
104 gv_start_plex(struct gv_plex *p)
105 {
106 	struct gv_volume *v;
107 	struct gv_plex *up;
108 	struct gv_sd *s;
109 	int error;
110 
111 	KASSERT(p != NULL, ("gv_start_plex: NULL p"));
112 
113 	error = 0;
114 	v = p->vol_sc;
115 
116 	/* RAID5 plexes can either be init, rebuilt or grown. */
117 	if (p->org == GV_PLEX_RAID5) {
118 		if (p->state > GV_PLEX_DEGRADED) {
119 			LIST_FOREACH(s, &p->subdisks, in_plex) {
120 				if (s->flags & GV_SD_GROW) {
121 					error = gv_grow_plex(p);
122 					return (error);
123 				}
124 			}
125 		} else if (p->state == GV_PLEX_DEGRADED) {
126 			error = gv_rebuild_plex(p);
127 		} else
128 			error = gv_init_plex(p);
129 	} else {
130 		/* We want to sync from the other plex if we're down. */
131 		if (p->state == GV_PLEX_DOWN && v->plexcount > 1) {
132 			up = gv_find_good_plex(v);
133 			if (up == NULL) {
134 				G_VINUM_DEBUG(1, "unable to find a good plex");
135 				return (ENXIO);
136 			}
137 			g_topology_lock();
138 			error = gv_access(v->provider, 1, 1, 0);
139 			if (error) {
140 				g_topology_unlock();
141 				G_VINUM_DEBUG(0, "sync from '%s' failed to "
142 				    "access volume: %d", up->name, error);
143 				return (error);
144 			}
145 			g_topology_unlock();
146 			error = gv_sync_plex(p, up);
147 			if (error)
148 				return (error);
149 		/*
150 		 * In case we have a stripe that is up, check whether it can be
151 		 * grown.
152 		 */
153 		} else if (p->org == GV_PLEX_STRIPED &&
154 		    p->state != GV_PLEX_DOWN) {
155 			LIST_FOREACH(s, &p->subdisks, in_plex) {
156 				if (s->flags & GV_SD_GROW) {
157 					error = gv_grow_plex(p);
158 					break;
159 				}
160 			}
161 		}
162 	}
163 	return (error);
164 }
165 
166 int
167 gv_start_vol(struct gv_volume *v)
168 {
169 	struct gv_plex *p;
170 	int error;
171 
172 	KASSERT(v != NULL, ("gv_start_vol: NULL v"));
173 
174 	error = 0;
175 
176 	if (v->plexcount == 0)
177 		return (ENXIO);
178 
179 	else if (v->plexcount == 1) {
180 		p = LIST_FIRST(&v->plexes);
181 		KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
182 		error = gv_start_plex(p);
183 	} else
184 		error = gv_sync(v);
185 
186 	return (error);
187 }
188 
189 /* Sync a plex p from the plex up.  */
190 static int
191 gv_sync_plex(struct gv_plex *p, struct gv_plex *up)
192 {
193 	int error;
194 
195 	KASSERT(p != NULL, ("%s: NULL p", __func__));
196 	KASSERT(up != NULL, ("%s: NULL up", __func__));
197 	if ((p == up) || (p->state == GV_PLEX_UP))
198 		return (0);
199 	if (p->flags & GV_PLEX_SYNCING ||
200 	    p->flags & GV_PLEX_REBUILDING ||
201 	    p->flags & GV_PLEX_GROWING) {
202 		return (EINPROGRESS);
203 	}
204 	p->synced = 0;
205 	p->flags |= GV_PLEX_SYNCING;
206 	G_VINUM_DEBUG(1, "starting sync of plex %s", p->name);
207 	error = gv_sync_request(up, p, p->synced,
208 	    MIN(GV_DFLT_SYNCSIZE, up->size - p->synced),
209 	    BIO_READ, NULL);
210 	if (error) {
211 		G_VINUM_DEBUG(0, "error syncing plex %s", p->name);
212 		return (error);
213 	}
214 	return (0);
215 }
216 
217 /* Return a good plex from volume v. */
218 static struct gv_plex *
219 gv_find_good_plex(struct gv_volume *v)
220 {
221 	struct gv_plex *up;
222 
223 	/* Find the plex that's up. */
224 	up = NULL;
225 	LIST_FOREACH(up, &v->plexes, in_volume) {
226 		if (up->state == GV_PLEX_UP)
227 			break;
228 	}
229 	/* Didn't find a good plex. */
230 	return (up);
231 }
232 
233 static int
234 gv_sync(struct gv_volume *v)
235 {
236 	struct gv_softc *sc;
237 	struct gv_plex *p, *up;
238 	int error;
239 
240 	KASSERT(v != NULL, ("gv_sync: NULL v"));
241 	sc = v->vinumconf;
242 	KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
243 
244 
245 	up = gv_find_good_plex(v);
246 	if (up == NULL)
247 		return (ENXIO);
248 	g_topology_lock();
249 	error = gv_access(v->provider, 1, 1, 0);
250 	if (error) {
251 		g_topology_unlock();
252 		G_VINUM_DEBUG(0, "sync from '%s' failed to access volume: %d",
253 		    up->name, error);
254 		return (error);
255 	}
256 	g_topology_unlock();
257 
258 	/* Go through the good plex, and issue BIO's to all other plexes. */
259 	LIST_FOREACH(p, &v->plexes, in_volume) {
260 		error = gv_sync_plex(p, up);
261 		if (error)
262 			break;
263 	}
264 	return (0);
265 }
266 
267 static int
268 gv_rebuild_plex(struct gv_plex *p)
269 {
270 	struct gv_drive *d;
271 	struct gv_sd *s;
272 	int error;
273 
274 	if (p->flags & GV_PLEX_SYNCING ||
275 	    p->flags & GV_PLEX_REBUILDING ||
276 	    p->flags & GV_PLEX_GROWING)
277 		return (EINPROGRESS);
278 	/*
279 	 * Make sure that all subdisks have consumers. We won't allow a rebuild
280 	 * unless every subdisk have one.
281 	 */
282 	LIST_FOREACH(s, &p->subdisks, in_plex) {
283 		d = s->drive_sc;
284 		if (d == NULL || (d->flags & GV_DRIVE_REFERENCED)) {
285 			G_VINUM_DEBUG(0, "unable to rebuild %s, subdisk(s) have"
286 			    " no drives", p->name);
287 			return (ENXIO);
288 		}
289 	}
290 	p->flags |= GV_PLEX_REBUILDING;
291 	p->synced = 0;
292 
293 	g_topology_assert_not();
294 	g_topology_lock();
295 	error = gv_access(p->vol_sc->provider, 1, 1, 0);
296 	if (error) {
297 		G_VINUM_DEBUG(0, "unable to access provider");
298 		return (0);
299 	}
300 	g_topology_unlock();
301 
302 	gv_parity_request(p, GV_BIO_REBUILD, 0);
303 	return (0);
304 }
305 
306 static int
307 gv_grow_plex(struct gv_plex *p)
308 {
309 	struct gv_volume *v;
310 	struct gv_sd *s;
311 	off_t origsize, origlength;
312 	int error, sdcount;
313 
314 	KASSERT(p != NULL, ("gv_grow_plex: NULL p"));
315 	v = p->vol_sc;
316 	KASSERT(v != NULL, ("gv_grow_plex: NULL v"));
317 
318 	if (p->flags & GV_PLEX_GROWING ||
319 	    p->flags & GV_PLEX_SYNCING ||
320 	    p->flags & GV_PLEX_REBUILDING)
321 		return (EINPROGRESS);
322 	g_topology_lock();
323 	error = gv_access(v->provider, 1, 1, 0);
324 	g_topology_unlock();
325 	if (error) {
326 		G_VINUM_DEBUG(0, "unable to access provider");
327 		return (error);
328 	}
329 
330 	/* XXX: This routine with finding origsize is used two other places as
331 	 * well, so we should create a function for it. */
332 	sdcount = p->sdcount;
333 	LIST_FOREACH(s, &p->subdisks, in_plex) {
334 		if (s->flags & GV_SD_GROW)
335 			sdcount--;
336 	}
337 	s = LIST_FIRST(&p->subdisks);
338 	if (s == NULL) {
339 		G_VINUM_DEBUG(0, "error growing plex without subdisks");
340 		return (GV_ERR_NOTFOUND);
341 	}
342 	p->flags |= GV_PLEX_GROWING;
343 	origsize = (sdcount - 1) * s->size;
344 	origlength = (sdcount - 1) * p->stripesize;
345 	p->synced = 0;
346 	G_VINUM_DEBUG(1, "starting growing of plex %s", p->name);
347 	gv_grow_request(p, 0, MIN(origlength, origsize), BIO_READ, NULL);
348 
349 	return (0);
350 }
351 
352 static int
353 gv_init_plex(struct gv_plex *p)
354 {
355 	struct gv_drive *d;
356 	struct gv_sd *s;
357 	int error;
358 	off_t start;
359 	caddr_t data;
360 
361 	KASSERT(p != NULL, ("gv_init_plex: NULL p"));
362 
363 	LIST_FOREACH(s, &p->subdisks, in_plex) {
364 		if (s->state == GV_SD_INITIALIZING)
365 			return (EINPROGRESS);
366 		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
367 		s->init_size = GV_DFLT_SYNCSIZE;
368 		start = s->drive_offset + s->initialized;
369 		d = s->drive_sc;
370 		if (d == NULL) {
371 			G_VINUM_DEBUG(0, "subdisk %s has no drive yet", s->name);
372 			break;
373 		}
374 		/*
375 		 * Take the lock here since we need to avoid a race in
376 		 * gv_init_request if the BIO is completed before the lock is
377 		 * released.
378 		 */
379 		g_topology_lock();
380 		error = g_access(d->consumer, 0, 1, 0);
381 		g_topology_unlock();
382 		if (error) {
383 			G_VINUM_DEBUG(0, "error accessing consumer when "
384 			    "initializing %s", s->name);
385 			break;
386 		}
387 		data = g_malloc(s->init_size, M_WAITOK | M_ZERO);
388 		gv_init_request(s, start, data, s->init_size);
389 	}
390 	return (0);
391 }
392