xref: /freebsd/sys/geom/vinum/geom_vinum_init.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /*-
2  * Copyright (c) 2004 Lukas Ertl
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/kernel.h>
33 #include <sys/kthread.h>
34 #include <sys/libkern.h>
35 #include <sys/malloc.h>
36 #include <sys/queue.h>
37 
38 #include <geom/geom.h>
39 #include <geom/vinum/geom_vinum_var.h>
40 #include <geom/vinum/geom_vinum.h>
41 #include <geom/vinum/geom_vinum_share.h>
42 
43 static int	gv_init_plex(struct gv_plex *);
44 void	gv_init_td(void *);
45 static int	gv_rebuild_plex(struct gv_plex *);
46 void	gv_rebuild_td(void *);
47 static int	gv_start_plex(struct gv_plex *);
48 static int	gv_start_vol(struct gv_volume *);
49 static int	gv_sync(struct gv_volume *);
50 void	gv_sync_td(void *);
51 
52 struct gv_sync_args {
53 	struct gv_volume *v;
54 	struct gv_plex *from;
55 	struct gv_plex *to;
56 	off_t syncsize;
57 };
58 
59 void
60 gv_parityop(struct g_geom *gp, struct gctl_req *req)
61 {
62 	struct gv_softc *sc;
63 	struct gv_plex *p;
64 	struct bio *bp;
65 	struct g_consumer *cp;
66 	int error, *flags, type, *rebuild, rv;
67 	char *plex;
68 
69 	rv = -1;
70 
71 	plex = gctl_get_param(req, "plex", NULL);
72 	if (plex == NULL) {
73 		gctl_error(req, "no plex given");
74 		goto out;
75 	}
76 
77 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
78 	if (flags == NULL) {
79 		gctl_error(req, "no flags given");
80 		goto out;
81 	}
82 
83 	rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
84 	if (rebuild == NULL) {
85 		gctl_error(req, "no rebuild op given");
86 		goto out;
87 	}
88 
89 	sc = gp->softc;
90 	type = gv_object_type(sc, plex);
91 	switch (type) {
92 	case GV_TYPE_PLEX:
93 		break;
94 	case GV_TYPE_VOL:
95 	case GV_TYPE_SD:
96 	case GV_TYPE_DRIVE:
97 	default:
98 		gctl_error(req, "'%s' is not a plex", plex);
99 		goto out;
100 	}
101 
102 	p = gv_find_plex(sc, plex);
103 	if (p->state != GV_PLEX_UP) {
104 		gctl_error(req, "plex %s is not completely accessible",
105 		    p->name);
106 		goto out;
107 	}
108 	if (p->org != GV_PLEX_RAID5) {
109 		gctl_error(req, "plex %s is not a RAID5 plex", p->name);
110 		goto out;
111 	}
112 
113 	cp = p->consumer;
114 	error = g_access(cp, 1, 1, 0);
115 	if (error) {
116 		gctl_error(req, "cannot access consumer");
117 		goto out;
118 	}
119 	g_topology_unlock();
120 
121 	/* Reset the check pointer when using -f. */
122 	if (*flags & GV_FLAG_F)
123 		p->synced = 0;
124 
125 	bp = g_new_bio();
126 	if (bp == NULL) {
127 		gctl_error(req, "cannot create BIO - out of memory");
128 		g_topology_lock();
129 		error = g_access(cp, -1, -1, 0);
130 		goto out;
131 	}
132 	bp->bio_cmd = BIO_WRITE;
133 	bp->bio_done = NULL;
134 	bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
135 	bp->bio_cflags |= GV_BIO_CHECK;
136 	if (*rebuild)
137 		bp->bio_cflags |= GV_BIO_PARITY;
138 	bp->bio_offset = p->synced;
139 	bp->bio_length = p->stripesize;
140 
141 	/* Schedule it down ... */
142 	g_io_request(bp, cp);
143 
144 	/* ... and wait for the result. */
145 	error = biowait(bp, "gwrite");
146 	g_free(bp->bio_data);
147 	g_destroy_bio(bp);
148 
149 	if (error) {
150 		/* Incorrect parity. */
151 		if (error == EAGAIN)
152 			rv = 1;
153 
154 		/* Some other error happened. */
155 		else
156 			gctl_error(req, "Parity check failed at offset 0x%jx, "
157 			    "errno %d", (intmax_t)p->synced, error);
158 
159 	/* Correct parity. */
160 	} else
161 		rv = 0;
162 
163 	gctl_set_param(req, "offset", &p->synced, sizeof(p->synced));
164 
165 	/* Advance the checkpointer if there was no error. */
166 	if (rv == 0)
167 		p->synced += p->stripesize;
168 
169 	/* End of plex; reset the check pointer and signal it to the caller. */
170 	if (p->synced >= p->size) {
171 		p->synced = 0;
172 		rv = -2;
173 	}
174 
175 	g_topology_lock();
176 	error = g_access(cp, -1, -1, 0);
177 
178 out:
179 	gctl_set_param(req, "rv", &rv, sizeof(rv));
180 }
181 
182 void
183 gv_start_obj(struct g_geom *gp, struct gctl_req *req)
184 {
185 	struct gv_softc *sc;
186 	struct gv_volume *v;
187 	struct gv_plex *p;
188 	int *argc, *initsize;
189 	char *argv, buf[20];
190 	int err, i, type;
191 
192 	argc = gctl_get_paraml(req, "argc", sizeof(*argc));
193 	initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
194 
195 	if (argc == NULL || *argc == 0) {
196 		gctl_error(req, "no arguments given");
197 		return;
198 	}
199 
200 	sc = gp->softc;
201 
202 	for (i = 0; i < *argc; i++) {
203 		snprintf(buf, sizeof(buf), "argv%d", i);
204 		argv = gctl_get_param(req, buf, NULL);
205 		if (argv == NULL)
206 			continue;
207 		type = gv_object_type(sc, argv);
208 		switch (type) {
209 		case GV_TYPE_VOL:
210 			v = gv_find_vol(sc, argv);
211 			err = gv_start_vol(v);
212 			if (err) {
213 				if (err == EINPROGRESS) {
214 					gctl_error(req, "cannot start volume "
215 					    "'%s': already in progress", argv);
216 				} else {
217 					gctl_error(req, "cannot start volume "
218 					    "'%s'; errno: %d", argv, err);
219 				}
220 				return;
221 			}
222 			break;
223 
224 		case GV_TYPE_PLEX:
225 			p = gv_find_plex(sc, argv);
226 			err = gv_start_plex(p);
227 			if (err) {
228 				if (err == EINPROGRESS) {
229 					gctl_error(req, "cannot start plex "
230 					    "'%s': already in progress", argv);
231 				} else {
232 					gctl_error(req, "cannot start plex "
233 					    "'%s'; errno: %d", argv, err);
234 				}
235 				return;
236 			}
237 			break;
238 
239 		case GV_TYPE_SD:
240 		case GV_TYPE_DRIVE:
241 			/* XXX not yet */
242 			gctl_error(req, "cannot start '%s' - not yet supported",
243 			    argv);
244 			return;
245 		default:
246 			gctl_error(req, "unknown object '%s'", argv);
247 			return;
248 		}
249 	}
250 }
251 
252 static int
253 gv_start_plex(struct gv_plex *p)
254 {
255 	struct gv_volume *v;
256 	int error;
257 
258 	KASSERT(p != NULL, ("gv_start_plex: NULL p"));
259 
260 	if (p->state == GV_PLEX_UP)
261 		return (0);
262 
263 	error = 0;
264 	v = p->vol_sc;
265 	if ((v != NULL) && (v->plexcount > 1))
266 		error = gv_sync(v);
267 	else if (p->org == GV_PLEX_RAID5) {
268 		if (p->state == GV_PLEX_DEGRADED)
269 			error = gv_rebuild_plex(p);
270 		else
271 			error = gv_init_plex(p);
272 	}
273 
274 	return (error);
275 }
276 
277 static int
278 gv_start_vol(struct gv_volume *v)
279 {
280 	struct gv_plex *p;
281 	struct gv_sd *s;
282 	int error;
283 
284 	KASSERT(v != NULL, ("gv_start_vol: NULL v"));
285 
286 	error = 0;
287 
288 	if (v->plexcount == 0)
289 		return (ENXIO);
290 
291 	else if (v->plexcount == 1) {
292 		p = LIST_FIRST(&v->plexes);
293 		KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
294 		if (p->org == GV_PLEX_RAID5) {
295 			switch (p->state) {
296 			case GV_PLEX_DOWN:
297 				error = gv_init_plex(p);
298 				break;
299 			case GV_PLEX_DEGRADED:
300 				error = gv_rebuild_plex(p);
301 				break;
302 			default:
303 				return (0);
304 			}
305 		} else {
306 			LIST_FOREACH(s, &p->subdisks, in_plex) {
307 				gv_set_sd_state(s, GV_SD_UP,
308 				    GV_SETSTATE_CONFIG);
309 			}
310 		}
311 	} else
312 		error = gv_sync(v);
313 
314 	return (error);
315 }
316 
317 static int
318 gv_sync(struct gv_volume *v)
319 {
320 	struct gv_softc *sc;
321 	struct gv_plex *p, *up;
322 	struct gv_sync_args *sync;
323 
324 	KASSERT(v != NULL, ("gv_sync: NULL v"));
325 	sc = v->vinumconf;
326 	KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
327 
328 	/* Find the plex that's up. */
329 	up = NULL;
330 	LIST_FOREACH(up, &v->plexes, in_volume) {
331 		if (up->state == GV_PLEX_UP)
332 			break;
333 	}
334 
335 	/* Didn't find a good plex. */
336 	if (up == NULL)
337 		return (ENXIO);
338 
339 	LIST_FOREACH(p, &v->plexes, in_volume) {
340 		if ((p == up) || (p->state == GV_PLEX_UP))
341 			continue;
342 		if (p->flags & GV_PLEX_SYNCING) {
343 			return (EINPROGRESS);
344 		}
345 		p->flags |= GV_PLEX_SYNCING;
346 		sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
347 		sync->v = v;
348 		sync->from = up;
349 		sync->to = p;
350 		sync->syncsize = GV_DFLT_SYNCSIZE;
351 		kthread_create(gv_sync_td, sync, NULL, 0, 0, "gv_sync '%s'",
352 		    p->name);
353 	}
354 
355 	return (0);
356 }
357 
358 static int
359 gv_rebuild_plex(struct gv_plex *p)
360 {
361 	struct gv_sync_args *sync;
362 
363 	if (gv_is_open(p->geom))
364 		return (EBUSY);
365 
366 	if (p->flags & GV_PLEX_SYNCING)
367 		return (EINPROGRESS);
368 	p->flags |= GV_PLEX_SYNCING;
369 
370 	sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
371 	sync->to = p;
372 	sync->syncsize = GV_DFLT_SYNCSIZE;
373 
374 	kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s",
375 	    p->name);
376 
377 	return (0);
378 }
379 
380 static int
381 gv_init_plex(struct gv_plex *p)
382 {
383 	struct gv_sd *s;
384 
385 	KASSERT(p != NULL, ("gv_init_plex: NULL p"));
386 
387 	LIST_FOREACH(s, &p->subdisks, in_plex) {
388 		if (s->state == GV_SD_INITIALIZING)
389 			return (EINPROGRESS);
390 		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
391 		s->init_size = GV_DFLT_SYNCSIZE;
392 		kthread_create(gv_init_td, s, NULL, 0, 0, "gv_init %s",
393 		    s->name);
394 	}
395 
396 	return (0);
397 }
398 
399 /* This thread is responsible for rebuilding a degraded RAID5 plex. */
400 void
401 gv_rebuild_td(void *arg)
402 {
403 	struct bio *bp;
404 	struct gv_plex *p;
405 	struct g_consumer *cp;
406 	struct gv_sync_args *sync;
407 	u_char *buf;
408 	off_t i;
409 	int error;
410 
411 	buf = NULL;
412 	bp = NULL;
413 
414 	sync = arg;
415 	p = sync->to;
416 	p->synced = 0;
417 	cp = p->consumer;
418 
419 	g_topology_lock();
420 	error = g_access(cp, 1, 1, 0);
421 	if (error) {
422 		g_topology_unlock();
423 		printf("GEOM_VINUM: rebuild of %s failed to access consumer: "
424 		    "%d\n", p->name, error);
425 		kthread_exit(error);
426 	}
427 	g_topology_unlock();
428 
429 	buf = g_malloc(sync->syncsize, M_WAITOK);
430 
431 	printf("GEOM_VINUM: rebuild of %s started\n", p->name);
432 	i = 0;
433 	for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) {
434 /*
435 		if (i + sync->syncsize > p->size)
436 			sync->syncsize = p->size - i;
437 */
438 		bp = g_new_bio();
439 		if (bp == NULL) {
440 			printf("GEOM_VINUM: rebuild of %s failed creating bio: "
441 			    "out of memory\n", p->name);
442 			break;
443 		}
444 		bp->bio_cmd = BIO_WRITE;
445 		bp->bio_done = NULL;
446 		bp->bio_data = buf;
447 		bp->bio_cflags |= GV_BIO_REBUILD;
448 		bp->bio_offset = i;
449 		bp->bio_length = p->stripesize;
450 
451 		/* Schedule it down ... */
452 		g_io_request(bp, cp);
453 
454 		/* ... and wait for the result. */
455 		error = biowait(bp, "gwrite");
456 		if (error) {
457 			printf("GEOM_VINUM: rebuild of %s failed at offset %jd "
458 			    "errno: %d\n", p->name, i, error);
459 			break;
460 		}
461 		g_destroy_bio(bp);
462 		bp = NULL;
463 	}
464 
465 	if (bp != NULL)
466 		g_destroy_bio(bp);
467 	if (buf != NULL)
468 		g_free(buf);
469 
470 	g_topology_lock();
471 	g_access(cp, -1, -1, 0);
472 	gv_save_config_all(p->vinumconf);
473 	g_topology_unlock();
474 
475 	p->flags &= ~GV_PLEX_SYNCING;
476 	p->synced = 0;
477 
478 	/* Successful initialization. */
479 	if (!error)
480 		printf("GEOM_VINUM: rebuild of %s finished\n", p->name);
481 
482 	g_free(sync);
483 	kthread_exit(error);
484 }
485 
486 void
487 gv_sync_td(void *arg)
488 {
489 	struct bio *bp;
490 	struct gv_plex *p;
491 	struct g_consumer *from, *to;
492 	struct gv_sync_args *sync;
493 	u_char *buf;
494 	off_t i;
495 	int error;
496 
497 	sync = arg;
498 
499 	from = sync->from->consumer;
500 	to = sync->to->consumer;
501 
502 	p = sync->to;
503 	p->synced = 0;
504 
505 	error = 0;
506 
507 	g_topology_lock();
508 	error = g_access(from, 1, 0, 0);
509 	if (error) {
510 		g_topology_unlock();
511 		printf("GEOM_VINUM: sync from '%s' failed to access "
512 		    "consumer: %d\n", sync->from->name, error);
513 		g_free(sync);
514 		kthread_exit(error);
515 	}
516 	error = g_access(to, 0, 1, 0);
517 	if (error) {
518 		g_access(from, -1, 0, 0);
519 		g_topology_unlock();
520 		printf("GEOM_VINUM: sync to '%s' failed to access "
521 		    "consumer: %d\n", p->name, error);
522 		g_free(sync);
523 		kthread_exit(error);
524 	}
525 	g_topology_unlock();
526 
527 	printf("GEOM_VINUM: plex sync %s -> %s started\n", sync->from->name,
528 	    sync->to->name);
529 	for (i = 0; i < p->size; i+= sync->syncsize) {
530 		/* Read some bits from the good plex. */
531 		buf = g_read_data(from, i, sync->syncsize, &error);
532 		if (buf == NULL) {
533 			printf("GEOM_VINUM: sync read from '%s' failed at "
534 			    "offset %jd; errno: %d\n", sync->from->name, i,
535 			    error);
536 			break;
537 		}
538 
539 		/*
540 		 * Create a bio and schedule it down on the 'bad' plex.  We
541 		 * cannot simply use g_write_data() because we have to let the
542 		 * lower parts know that we are an initialization process and
543 		 * not a 'normal' request.
544 		 */
545 		bp = g_new_bio();
546 		if (bp == NULL) {
547 			printf("GEOM_VINUM: sync write to '%s' failed at "
548 			    "offset %jd; out of memory\n", p->name, i);
549 			g_free(buf);
550 			break;
551 		}
552 		bp->bio_cmd = BIO_WRITE;
553 		bp->bio_offset = i;
554 		bp->bio_length = sync->syncsize;
555 		bp->bio_data = buf;
556 		bp->bio_done = NULL;
557 
558 		/*
559 		 * This hack declare this bio as part of an initialization
560 		 * process, so that the lower levels allow it to get through.
561 		 */
562 		bp->bio_cflags |= GV_BIO_SYNCREQ;
563 
564 		/* Schedule it down ... */
565 		g_io_request(bp, to);
566 
567 		/* ... and wait for the result. */
568 		error = biowait(bp, "gwrite");
569 		g_destroy_bio(bp);
570 		g_free(buf);
571 		if (error) {
572 			printf("GEOM_VINUM: sync write to '%s' failed at "
573 			    "offset %jd; errno: %d\n", p->name, i, error);
574 			break;
575 		}
576 
577 		/* Note that we have synced a little bit more. */
578 		p->synced += sync->syncsize;
579 	}
580 
581 	g_topology_lock();
582 	g_access(from, -1, 0, 0);
583 	g_access(to, 0, -1, 0);
584 	gv_save_config_all(p->vinumconf);
585 	g_topology_unlock();
586 
587 	/* Successful initialization. */
588 	if (!error)
589 		printf("GEOM_VINUM: plex sync %s -> %s finished\n",
590 		    sync->from->name, sync->to->name);
591 
592 	p->flags &= ~GV_PLEX_SYNCING;
593 	p->synced = 0;
594 
595 	g_free(sync);
596 	kthread_exit(error);
597 }
598 
599 void
600 gv_init_td(void *arg)
601 {
602 	struct gv_sd *s;
603 	struct gv_drive *d;
604 	struct g_geom *gp;
605 	struct g_consumer *cp;
606 	int error;
607 	off_t i, init_size, start, offset, length;
608 	u_char *buf;
609 
610 	s = arg;
611 	KASSERT(s != NULL, ("gv_init_td: NULL s"));
612 	d = s->drive_sc;
613 	KASSERT(d != NULL, ("gv_init_td: NULL d"));
614 	gp = d->geom;
615 	KASSERT(gp != NULL, ("gv_init_td: NULL gp"));
616 
617 	cp = LIST_FIRST(&gp->consumer);
618 	KASSERT(cp != NULL, ("gv_init_td: NULL cp"));
619 
620 	s->init_error = 0;
621 	init_size = s->init_size;
622 	start = s->drive_offset + s->initialized;
623 	offset = s->drive_offset;
624 	length = s->size;
625 
626 	buf = g_malloc(s->init_size, M_WAITOK | M_ZERO);
627 
628 	g_topology_lock();
629 	error = g_access(cp, 0, 1, 0);
630 	if (error) {
631 		s->init_error = error;
632 		g_topology_unlock();
633 		printf("GEOM_VINUM: subdisk '%s' init: failed to access "
634 		    "consumer; error: %d\n", s->name, error);
635 		kthread_exit(error);
636 	}
637 	g_topology_unlock();
638 
639 	for (i = start; i < offset + length; i += init_size) {
640 		error = g_write_data(cp, i, buf, init_size);
641 		if (error) {
642 			printf("GEOM_VINUM: subdisk '%s' init: write failed"
643 			    " at offset %jd (drive offset %jd); error %d\n",
644 			    s->name, (intmax_t)s->initialized, (intmax_t)i,
645 			    error);
646 			break;
647 		}
648 		s->initialized += init_size;
649 	}
650 
651 	g_free(buf);
652 
653 	g_topology_lock();
654 	g_access(cp, 0, -1, 0);
655 	g_topology_unlock();
656 	if (error) {
657 		s->init_error = error;
658 		g_topology_lock();
659 		gv_set_sd_state(s, GV_SD_STALE,
660 		    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
661 		g_topology_unlock();
662 	} else {
663 		g_topology_lock();
664 		gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
665 		g_topology_unlock();
666 		s->initialized = 0;
667 		printf("GEOM_VINUM: subdisk '%s' init: finished successfully\n",
668 		    s->name);
669 	}
670 	kthread_exit(error);
671 }
672