1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004, 2007 Lukas Ertl
5 * Copyright (c) 2007, 2009 Ulf Lilleengen
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/lock.h>
33 #include <sys/malloc.h>
34 #include <sys/systm.h>
35
36 #include <geom/geom.h>
37 #include <geom/geom_dbg.h>
38 #include <geom/vinum/geom_vinum_var.h>
39 #include <geom/vinum/geom_vinum_raid5.h>
40 #include <geom/vinum/geom_vinum.h>
41
42 static int gv_check_parity(struct gv_plex *, struct bio *,
43 struct gv_raid5_packet *);
44 static int gv_normal_parity(struct gv_plex *, struct bio *,
45 struct gv_raid5_packet *);
46 static void gv_plex_flush(struct gv_plex *);
47 static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
48 int *, int);
49 static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
50 off_t, caddr_t);
51 static void gv_post_bio(struct gv_softc *, struct bio *);
52
53 void
gv_plex_start(struct gv_plex * p,struct bio * bp)54 gv_plex_start(struct gv_plex *p, struct bio *bp)
55 {
56 struct bio *cbp;
57 struct gv_sd *s;
58 struct gv_raid5_packet *wp;
59 caddr_t addr;
60 off_t bcount, boff, len;
61
62 bcount = bp->bio_length;
63 addr = bp->bio_data;
64 boff = bp->bio_offset;
65
66 /* Walk over the whole length of the request, we might split it up. */
67 while (bcount > 0) {
68 wp = NULL;
69
70 /*
71 * RAID5 plexes need special treatment, as a single request
72 * might involve several read/write sub-requests.
73 */
74 if (p->org == GV_PLEX_RAID5) {
75 wp = gv_raid5_start(p, bp, addr, boff, bcount);
76 if (wp == NULL)
77 return;
78
79 len = wp->length;
80
81 if (TAILQ_EMPTY(&wp->bits))
82 g_free(wp);
83 else if (wp->lockbase != -1)
84 TAILQ_INSERT_TAIL(&p->packets, wp, list);
85
86 /*
87 * Requests to concatenated and striped plexes go straight
88 * through.
89 */
90 } else {
91 len = gv_plex_normal_request(p, bp, boff, bcount, addr);
92 }
93 if (len < 0)
94 return;
95
96 bcount -= len;
97 addr += len;
98 boff += len;
99 }
100
101 /*
102 * Fire off all sub-requests. We get the correct consumer (== drive)
103 * to send each request to via the subdisk that was stored in
104 * cbp->bio_caller1.
105 */
106 cbp = bioq_takefirst(p->bqueue);
107 while (cbp != NULL) {
108 /*
109 * RAID5 sub-requests need to come in correct order, otherwise
110 * we trip over the parity, as it might be overwritten by
111 * another sub-request. We abuse cbp->bio_caller2 to mark
112 * potential overlap situations.
113 */
114 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
115 /* Park the bio on the waiting queue. */
116 cbp->bio_pflags |= GV_BIO_ONHOLD;
117 bioq_disksort(p->wqueue, cbp);
118 } else {
119 s = cbp->bio_caller1;
120 g_io_request(cbp, s->drive_sc->consumer);
121 }
122 cbp = bioq_takefirst(p->bqueue);
123 }
124 }
125
126 static int
gv_plex_offset(struct gv_plex * p,off_t boff,off_t bcount,off_t * real_off,off_t * real_len,int * sdno,int growing)127 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
128 off_t *real_len, int *sdno, int growing)
129 {
130 struct gv_sd *s;
131 int i, sdcount;
132 off_t len_left, stripeend, stripeno, stripestart;
133
134 switch (p->org) {
135 case GV_PLEX_CONCAT:
136 /*
137 * Find the subdisk where this request starts. The subdisks in
138 * this list must be ordered by plex_offset.
139 */
140 i = 0;
141 LIST_FOREACH(s, &p->subdisks, in_plex) {
142 if (s->plex_offset <= boff &&
143 s->plex_offset + s->size > boff) {
144 *sdno = i;
145 break;
146 }
147 i++;
148 }
149 if (s == NULL || s->drive_sc == NULL)
150 return (GV_ERR_NOTFOUND);
151
152 /* Calculate corresponding offsets on disk. */
153 *real_off = boff - s->plex_offset;
154 len_left = s->size - (*real_off);
155 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
156 *real_len = (bcount > len_left) ? len_left : bcount;
157 break;
158
159 case GV_PLEX_STRIPED:
160 /* The number of the stripe where the request starts. */
161 stripeno = boff / p->stripesize;
162 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
163
164 /* Take growing subdisks into account when calculating. */
165 sdcount = gv_sdcount(p, (boff >= p->synced));
166
167 if (!(boff + bcount <= p->synced) &&
168 (p->flags & GV_PLEX_GROWING) &&
169 !growing)
170 return (GV_ERR_ISBUSY);
171 *sdno = stripeno % sdcount;
172
173 KASSERT(*sdno >= 0, ("gv_plex_offset: sdno < 0"));
174 stripestart = (stripeno / sdcount) *
175 p->stripesize;
176 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0"));
177 stripeend = stripestart + p->stripesize;
178 *real_off = boff - (stripeno * p->stripesize) +
179 stripestart;
180 len_left = stripeend - *real_off;
181 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
182
183 *real_len = (bcount <= len_left) ? bcount : len_left;
184 break;
185
186 default:
187 return (GV_ERR_PLEXORG);
188 }
189 return (0);
190 }
191
192 /*
193 * Prepare a normal plex request.
194 */
195 static int
gv_plex_normal_request(struct gv_plex * p,struct bio * bp,off_t boff,off_t bcount,caddr_t addr)196 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
197 off_t bcount, caddr_t addr)
198 {
199 struct gv_sd *s;
200 struct bio *cbp;
201 off_t real_len, real_off;
202 int i, err, sdno;
203
204 s = NULL;
205 sdno = -1;
206 real_len = real_off = 0;
207
208 err = ENXIO;
209
210 if (p == NULL || LIST_EMPTY(&p->subdisks))
211 goto bad;
212
213 err = gv_plex_offset(p, boff, bcount, &real_off,
214 &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW));
215 /* If the request was blocked, put it into wait. */
216 if (err == GV_ERR_ISBUSY) {
217 bioq_disksort(p->rqueue, bp);
218 return (-1); /* "Fail", and delay request. */
219 }
220 if (err) {
221 err = ENXIO;
222 goto bad;
223 }
224 err = ENXIO;
225
226 /* Find the right subdisk. */
227 i = 0;
228 LIST_FOREACH(s, &p->subdisks, in_plex) {
229 if (i == sdno)
230 break;
231 i++;
232 }
233
234 /* Subdisk not found. */
235 if (s == NULL || s->drive_sc == NULL)
236 goto bad;
237
238 /* Now check if we can handle the request on this subdisk. */
239 switch (s->state) {
240 case GV_SD_UP:
241 /* If the subdisk is up, just continue. */
242 break;
243 case GV_SD_DOWN:
244 if (bp->bio_pflags & GV_BIO_INTERNAL)
245 G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
246 " order to perform administrative requests");
247 goto bad;
248 case GV_SD_STALE:
249 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) {
250 G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
251 "regular requests");
252 goto bad;
253 }
254
255 G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
256 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
257 break;
258 case GV_SD_INITIALIZING:
259 if (bp->bio_cmd == BIO_READ)
260 goto bad;
261 break;
262 default:
263 /* All other subdisk states mean it's not accessible. */
264 goto bad;
265 }
266
267 /* Clone the bio and adjust the offsets and sizes. */
268 cbp = g_clone_bio(bp);
269 if (cbp == NULL) {
270 err = ENOMEM;
271 goto bad;
272 }
273 cbp->bio_offset = real_off + s->drive_offset;
274 cbp->bio_length = real_len;
275 cbp->bio_data = addr;
276 cbp->bio_done = gv_done;
277 cbp->bio_caller1 = s;
278 s->drive_sc->active++;
279
280 /* Store the sub-requests now and let others issue them. */
281 bioq_insert_tail(p->bqueue, cbp);
282 return (real_len);
283 bad:
284 G_VINUM_LOGREQ(0, bp, "plex request failed.");
285 /* Building the sub-request failed. If internal BIO, do not deliver. */
286 if (bp->bio_pflags & GV_BIO_INTERNAL) {
287 if (bp->bio_pflags & GV_BIO_MALLOC)
288 g_free(bp->bio_data);
289 g_destroy_bio(bp);
290 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
291 GV_PLEX_GROWING);
292 return (-1);
293 }
294 g_io_deliver(bp, err);
295 return (-1);
296 }
297
298 /*
299 * Handle a completed request to a striped or concatenated plex.
300 */
301 void
gv_plex_normal_done(struct gv_plex * p,struct bio * bp)302 gv_plex_normal_done(struct gv_plex *p, struct bio *bp)
303 {
304 struct bio *pbp;
305
306 pbp = bp->bio_parent;
307 if (pbp->bio_error == 0)
308 pbp->bio_error = bp->bio_error;
309 g_destroy_bio(bp);
310 pbp->bio_inbed++;
311 if (pbp->bio_children == pbp->bio_inbed) {
312 /* Just set it to length since multiple plexes will
313 * screw things up. */
314 pbp->bio_completed = pbp->bio_length;
315 if (pbp->bio_pflags & GV_BIO_SYNCREQ)
316 gv_sync_complete(p, pbp);
317 else if (pbp->bio_pflags & GV_BIO_GROW)
318 gv_grow_complete(p, pbp);
319 else
320 g_io_deliver(pbp, pbp->bio_error);
321 }
322 }
323
324 /*
325 * Handle a completed request to a RAID-5 plex.
326 */
327 void
gv_plex_raid5_done(struct gv_plex * p,struct bio * bp)328 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
329 {
330 struct gv_softc *sc;
331 struct bio *cbp, *pbp;
332 struct gv_bioq *bq, *bq2;
333 struct gv_raid5_packet *wp;
334 off_t completed;
335 int i;
336
337 completed = 0;
338 sc = p->vinumconf;
339 wp = bp->bio_caller2;
340
341 switch (bp->bio_parent->bio_cmd) {
342 case BIO_READ:
343 if (wp == NULL) {
344 completed = bp->bio_completed;
345 break;
346 }
347
348 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
349 if (bq->bp != bp)
350 continue;
351 TAILQ_REMOVE(&wp->bits, bq, queue);
352 g_free(bq);
353 for (i = 0; i < wp->length; i++)
354 wp->data[i] ^= bp->bio_data[i];
355 break;
356 }
357 if (TAILQ_EMPTY(&wp->bits)) {
358 completed = wp->length;
359 if (wp->lockbase != -1) {
360 TAILQ_REMOVE(&p->packets, wp, list);
361 /* Bring the waiting bios back into the game. */
362 pbp = bioq_takefirst(p->wqueue);
363 while (pbp != NULL) {
364 gv_post_bio(sc, pbp);
365 pbp = bioq_takefirst(p->wqueue);
366 }
367 }
368 g_free(wp);
369 }
370
371 break;
372
373 case BIO_WRITE:
374 /* XXX can this ever happen? */
375 if (wp == NULL) {
376 completed = bp->bio_completed;
377 break;
378 }
379
380 /* Check if we need to handle parity data. */
381 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
382 if (bq->bp != bp)
383 continue;
384 TAILQ_REMOVE(&wp->bits, bq, queue);
385 g_free(bq);
386 cbp = wp->parity;
387 if (cbp != NULL) {
388 for (i = 0; i < wp->length; i++)
389 cbp->bio_data[i] ^= bp->bio_data[i];
390 }
391 break;
392 }
393
394 /* Handle parity data. */
395 if (TAILQ_EMPTY(&wp->bits)) {
396 if (bp->bio_parent->bio_pflags & GV_BIO_CHECK)
397 i = gv_check_parity(p, bp, wp);
398 else
399 i = gv_normal_parity(p, bp, wp);
400
401 /* All of our sub-requests have finished. */
402 if (i) {
403 completed = wp->length;
404 TAILQ_REMOVE(&p->packets, wp, list);
405 /* Bring the waiting bios back into the game. */
406 pbp = bioq_takefirst(p->wqueue);
407 while (pbp != NULL) {
408 gv_post_bio(sc, pbp);
409 pbp = bioq_takefirst(p->wqueue);
410 }
411 g_free(wp);
412 }
413 }
414
415 break;
416 }
417
418 pbp = bp->bio_parent;
419 if (pbp->bio_error == 0)
420 pbp->bio_error = bp->bio_error;
421 pbp->bio_completed += completed;
422
423 /* When the original request is finished, we deliver it. */
424 pbp->bio_inbed++;
425 if (pbp->bio_inbed == pbp->bio_children) {
426 /* Hand it over for checking or delivery. */
427 if (pbp->bio_cmd == BIO_WRITE &&
428 (pbp->bio_pflags & GV_BIO_CHECK)) {
429 gv_parity_complete(p, pbp);
430 } else if (pbp->bio_cmd == BIO_WRITE &&
431 (pbp->bio_pflags & GV_BIO_REBUILD)) {
432 gv_rebuild_complete(p, pbp);
433 } else if (pbp->bio_pflags & GV_BIO_INIT) {
434 gv_init_complete(p, pbp);
435 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
436 gv_sync_complete(p, pbp);
437 } else if (pbp->bio_pflags & GV_BIO_GROW) {
438 gv_grow_complete(p, pbp);
439 } else {
440 g_io_deliver(pbp, pbp->bio_error);
441 }
442 }
443
444 /* Clean up what we allocated. */
445 if (bp->bio_cflags & GV_BIO_MALLOC)
446 g_free(bp->bio_data);
447 g_destroy_bio(bp);
448 }
449
450 static int
gv_check_parity(struct gv_plex * p,struct bio * bp,struct gv_raid5_packet * wp)451 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
452 {
453 struct bio *pbp;
454 struct gv_sd *s;
455 int err, finished, i;
456
457 err = 0;
458 finished = 1;
459
460 if (wp->waiting != NULL) {
461 pbp = wp->waiting;
462 wp->waiting = NULL;
463 s = pbp->bio_caller1;
464 g_io_request(pbp, s->drive_sc->consumer);
465 finished = 0;
466
467 } else if (wp->parity != NULL) {
468 pbp = wp->parity;
469 wp->parity = NULL;
470
471 /* Check if the parity is correct. */
472 for (i = 0; i < wp->length; i++) {
473 if (bp->bio_data[i] != pbp->bio_data[i]) {
474 err = 1;
475 break;
476 }
477 }
478
479 /* The parity is not correct... */
480 if (err) {
481 bp->bio_parent->bio_error = EAGAIN;
482
483 /* ... but we rebuild it. */
484 if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) {
485 s = pbp->bio_caller1;
486 g_io_request(pbp, s->drive_sc->consumer);
487 finished = 0;
488 }
489 }
490
491 /*
492 * Clean up the BIO we would have used for rebuilding the
493 * parity.
494 */
495 if (finished) {
496 bp->bio_parent->bio_inbed++;
497 g_destroy_bio(pbp);
498 }
499 }
500
501 return (finished);
502 }
503
504 static int
gv_normal_parity(struct gv_plex * p,struct bio * bp,struct gv_raid5_packet * wp)505 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
506 {
507 struct bio *cbp, *pbp;
508 struct gv_sd *s;
509 int finished, i;
510
511 finished = 1;
512
513 if (wp->waiting != NULL) {
514 pbp = wp->waiting;
515 wp->waiting = NULL;
516 cbp = wp->parity;
517 for (i = 0; i < wp->length; i++)
518 cbp->bio_data[i] ^= pbp->bio_data[i];
519 s = pbp->bio_caller1;
520 g_io_request(pbp, s->drive_sc->consumer);
521 finished = 0;
522
523 } else if (wp->parity != NULL) {
524 cbp = wp->parity;
525 wp->parity = NULL;
526 s = cbp->bio_caller1;
527 g_io_request(cbp, s->drive_sc->consumer);
528 finished = 0;
529 }
530
531 return (finished);
532 }
533
534 /* Flush the queue with delayed requests. */
535 static void
gv_plex_flush(struct gv_plex * p)536 gv_plex_flush(struct gv_plex *p)
537 {
538 struct bio *bp;
539
540 bp = bioq_takefirst(p->rqueue);
541 while (bp != NULL) {
542 gv_plex_start(p, bp);
543 bp = bioq_takefirst(p->rqueue);
544 }
545 }
546
547 static void
gv_post_bio(struct gv_softc * sc,struct bio * bp)548 gv_post_bio(struct gv_softc *sc, struct bio *bp)
549 {
550
551 KASSERT(sc != NULL, ("NULL sc"));
552 KASSERT(bp != NULL, ("NULL bp"));
553 mtx_lock(&sc->bqueue_mtx);
554 bioq_disksort(sc->bqueue_down, bp);
555 wakeup(sc);
556 mtx_unlock(&sc->bqueue_mtx);
557 }
558
559 int
gv_sync_request(struct gv_plex * from,struct gv_plex * to,off_t offset,off_t length,int type,caddr_t data)560 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
561 off_t length, int type, caddr_t data)
562 {
563 struct gv_softc *sc;
564 struct bio *bp;
565
566 KASSERT(from != NULL, ("NULL from"));
567 KASSERT(to != NULL, ("NULL to"));
568 sc = from->vinumconf;
569 KASSERT(sc != NULL, ("NULL sc"));
570
571 bp = g_new_bio();
572 if (bp == NULL) {
573 G_VINUM_DEBUG(0, "sync from '%s' failed at offset "
574 " %jd; out of memory", from->name, offset);
575 return (ENOMEM);
576 }
577 bp->bio_length = length;
578 bp->bio_done = NULL;
579 bp->bio_pflags |= GV_BIO_SYNCREQ;
580 bp->bio_offset = offset;
581 bp->bio_caller1 = from;
582 bp->bio_caller2 = to;
583 bp->bio_cmd = type;
584 if (data == NULL)
585 data = g_malloc(length, M_WAITOK);
586 bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */
587 bp->bio_data = data;
588
589 /* Send down next. */
590 gv_post_bio(sc, bp);
591 //gv_plex_start(from, bp);
592 return (0);
593 }
594
595 /*
596 * Handle a finished plex sync bio.
597 */
598 int
gv_sync_complete(struct gv_plex * to,struct bio * bp)599 gv_sync_complete(struct gv_plex *to, struct bio *bp)
600 {
601 struct gv_plex *from, *p;
602 struct gv_sd *s;
603 struct gv_volume *v;
604 struct gv_softc *sc;
605 off_t offset;
606 int err;
607
608 g_topology_assert_not();
609
610 err = 0;
611 KASSERT(to != NULL, ("NULL to"));
612 KASSERT(bp != NULL, ("NULL bp"));
613 from = bp->bio_caller2;
614 KASSERT(from != NULL, ("NULL from"));
615 v = to->vol_sc;
616 KASSERT(v != NULL, ("NULL v"));
617 sc = v->vinumconf;
618 KASSERT(sc != NULL, ("NULL sc"));
619
620 /* If it was a read, write it. */
621 if (bp->bio_cmd == BIO_READ) {
622 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length,
623 BIO_WRITE, bp->bio_data);
624 /* If it was a write, read the next one. */
625 } else if (bp->bio_cmd == BIO_WRITE) {
626 if (bp->bio_pflags & GV_BIO_MALLOC)
627 g_free(bp->bio_data);
628 to->synced += bp->bio_length;
629 /* If we're finished, clean up. */
630 if (bp->bio_offset + bp->bio_length >= from->size) {
631 G_VINUM_DEBUG(1, "syncing of %s from %s completed",
632 to->name, from->name);
633 /* Update our state. */
634 LIST_FOREACH(s, &to->subdisks, in_plex)
635 gv_set_sd_state(s, GV_SD_UP, 0);
636 gv_update_plex_state(to);
637 to->flags &= ~GV_PLEX_SYNCING;
638 to->synced = 0;
639 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
640 } else {
641 offset = bp->bio_offset + bp->bio_length;
642 err = gv_sync_request(from, to, offset,
643 MIN(bp->bio_length, from->size - offset),
644 BIO_READ, NULL);
645 }
646 }
647 g_destroy_bio(bp);
648 /* Clean up if there was an error. */
649 if (err) {
650 to->flags &= ~GV_PLEX_SYNCING;
651 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err);
652 }
653
654 /* Check if all plexes are synced, and lower refcounts. */
655 g_topology_lock();
656 LIST_FOREACH(p, &v->plexes, in_volume) {
657 if (p->flags & GV_PLEX_SYNCING) {
658 g_topology_unlock();
659 return (-1);
660 }
661 }
662 /* If we came here, all plexes are synced, and we're free. */
663 gv_access(v->provider, -1, -1, 0);
664 g_topology_unlock();
665 G_VINUM_DEBUG(1, "plex sync completed");
666 gv_volume_flush(v);
667 return (0);
668 }
669
670 /*
671 * Create a new bio struct for the next grow request.
672 */
673 int
gv_grow_request(struct gv_plex * p,off_t offset,off_t length,int type,caddr_t data)674 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
675 caddr_t data)
676 {
677 struct gv_softc *sc;
678 struct bio *bp;
679
680 KASSERT(p != NULL, ("gv_grow_request: NULL p"));
681 sc = p->vinumconf;
682 KASSERT(sc != NULL, ("gv_grow_request: NULL sc"));
683
684 bp = g_new_bio();
685 if (bp == NULL) {
686 G_VINUM_DEBUG(0, "grow of %s failed creating bio: "
687 "out of memory", p->name);
688 return (ENOMEM);
689 }
690
691 bp->bio_cmd = type;
692 bp->bio_done = NULL;
693 bp->bio_error = 0;
694 bp->bio_caller1 = p;
695 bp->bio_offset = offset;
696 bp->bio_length = length;
697 bp->bio_pflags |= GV_BIO_GROW;
698 if (data == NULL)
699 data = g_malloc(length, M_WAITOK);
700 bp->bio_pflags |= GV_BIO_MALLOC;
701 bp->bio_data = data;
702
703 gv_post_bio(sc, bp);
704 //gv_plex_start(p, bp);
705 return (0);
706 }
707
708 /*
709 * Finish handling of a bio to a growing plex.
710 */
711 void
gv_grow_complete(struct gv_plex * p,struct bio * bp)712 gv_grow_complete(struct gv_plex *p, struct bio *bp)
713 {
714 struct gv_softc *sc;
715 struct gv_sd *s;
716 struct gv_volume *v;
717 off_t origsize, offset;
718 int sdcount, err;
719
720 v = p->vol_sc;
721 KASSERT(v != NULL, ("gv_grow_complete: NULL v"));
722 sc = v->vinumconf;
723 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc"));
724 err = 0;
725
726 /* If it was a read, write it. */
727 if (bp->bio_cmd == BIO_READ) {
728 p->synced += bp->bio_length;
729 err = gv_grow_request(p, bp->bio_offset, bp->bio_length,
730 BIO_WRITE, bp->bio_data);
731 /* If it was a write, read next. */
732 } else if (bp->bio_cmd == BIO_WRITE) {
733 if (bp->bio_pflags & GV_BIO_MALLOC)
734 g_free(bp->bio_data);
735
736 /* Find the real size of the plex. */
737 sdcount = gv_sdcount(p, 1);
738 s = LIST_FIRST(&p->subdisks);
739 KASSERT(s != NULL, ("NULL s"));
740 origsize = (s->size * (sdcount - 1));
741 if (bp->bio_offset + bp->bio_length >= origsize) {
742 G_VINUM_DEBUG(1, "growing of %s completed", p->name);
743 p->flags &= ~GV_PLEX_GROWING;
744 LIST_FOREACH(s, &p->subdisks, in_plex) {
745 s->flags &= ~GV_SD_GROW;
746 gv_set_sd_state(s, GV_SD_UP, 0);
747 }
748 p->size = gv_plex_size(p);
749 gv_update_vol_size(v, gv_vol_size(v));
750 gv_set_plex_state(p, GV_PLEX_UP, 0);
751 g_topology_lock();
752 gv_access(v->provider, -1, -1, 0);
753 g_topology_unlock();
754 p->synced = 0;
755 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
756 /* Issue delayed requests. */
757 gv_plex_flush(p);
758 } else {
759 offset = bp->bio_offset + bp->bio_length;
760 err = gv_grow_request(p, offset,
761 MIN(bp->bio_length, origsize - offset),
762 BIO_READ, NULL);
763 }
764 }
765 g_destroy_bio(bp);
766
767 if (err) {
768 p->flags &= ~GV_PLEX_GROWING;
769 G_VINUM_DEBUG(0, "error growing plex: error code %d", err);
770 }
771 }
772
773 /*
774 * Create an initialization BIO and send it off to the consumer. Assume that
775 * we're given initialization data as parameter.
776 */
777 void
gv_init_request(struct gv_sd * s,off_t start,caddr_t data,off_t length)778 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
779 {
780 struct gv_drive *d;
781 struct g_consumer *cp;
782 struct bio *bp, *cbp;
783
784 KASSERT(s != NULL, ("gv_init_request: NULL s"));
785 d = s->drive_sc;
786 KASSERT(d != NULL, ("gv_init_request: NULL d"));
787 cp = d->consumer;
788 KASSERT(cp != NULL, ("gv_init_request: NULL cp"));
789
790 bp = g_new_bio();
791 if (bp == NULL) {
792 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
793 " (drive offset %jd); out of memory", s->name,
794 (intmax_t)s->initialized, (intmax_t)start);
795 return; /* XXX: Error codes. */
796 }
797 bp->bio_cmd = BIO_WRITE;
798 bp->bio_data = data;
799 bp->bio_done = NULL;
800 bp->bio_error = 0;
801 bp->bio_length = length;
802 bp->bio_pflags |= GV_BIO_INIT;
803 bp->bio_offset = start;
804 bp->bio_caller1 = s;
805
806 /* Then ofcourse, we have to clone it. */
807 cbp = g_clone_bio(bp);
808 if (cbp == NULL) {
809 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
810 " (drive offset %jd); out of memory", s->name,
811 (intmax_t)s->initialized, (intmax_t)start);
812 return; /* XXX: Error codes. */
813 }
814 cbp->bio_done = gv_done;
815 cbp->bio_caller1 = s;
816 d->active++;
817 /* Send it off to the consumer. */
818 g_io_request(cbp, cp);
819 }
820
821 /*
822 * Handle a finished initialization BIO.
823 */
824 void
gv_init_complete(struct gv_plex * p,struct bio * bp)825 gv_init_complete(struct gv_plex *p, struct bio *bp)
826 {
827 struct gv_softc *sc;
828 struct gv_drive *d;
829 struct g_consumer *cp;
830 struct gv_sd *s;
831 off_t start, length;
832 caddr_t data;
833 int error;
834
835 s = bp->bio_caller1;
836 start = bp->bio_offset;
837 length = bp->bio_length;
838 error = bp->bio_error;
839 data = bp->bio_data;
840
841 KASSERT(s != NULL, ("gv_init_complete: NULL s"));
842 d = s->drive_sc;
843 KASSERT(d != NULL, ("gv_init_complete: NULL d"));
844 cp = d->consumer;
845 KASSERT(cp != NULL, ("gv_init_complete: NULL cp"));
846 sc = p->vinumconf;
847 KASSERT(sc != NULL, ("gv_init_complete: NULL sc"));
848
849 g_destroy_bio(bp);
850
851 /*
852 * First we need to find out if it was okay, and abort if it's not.
853 * Then we need to free previous buffers, find out the correct subdisk,
854 * as well as getting the correct starting point and length of the BIO.
855 */
856 if (start >= s->drive_offset + s->size) {
857 /* Free the data we initialized. */
858 g_free(data);
859 g_topology_assert_not();
860 g_topology_lock();
861 g_access(cp, 0, -1, 0);
862 g_topology_unlock();
863 if (error) {
864 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE |
865 GV_SETSTATE_CONFIG);
866 } else {
867 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
868 s->initialized = 0;
869 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
870 G_VINUM_DEBUG(1, "subdisk '%s' init: finished "
871 "successfully", s->name);
872 }
873 return;
874 }
875 s->initialized += length;
876 start += length;
877 gv_init_request(s, start, data, length);
878 }
879
880 /*
881 * Create a new bio struct for the next parity rebuild. Used both by internal
882 * rebuild of degraded plexes as well as user initiated rebuilds/checks.
883 */
884 void
gv_parity_request(struct gv_plex * p,int flags,off_t offset)885 gv_parity_request(struct gv_plex *p, int flags, off_t offset)
886 {
887 struct gv_softc *sc;
888 struct bio *bp;
889
890 KASSERT(p != NULL, ("gv_parity_request: NULL p"));
891 sc = p->vinumconf;
892 KASSERT(sc != NULL, ("gv_parity_request: NULL sc"));
893
894 bp = g_new_bio();
895 if (bp == NULL) {
896 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
897 "out of memory", p->name);
898 return;
899 }
900
901 bp->bio_cmd = BIO_WRITE;
902 bp->bio_done = NULL;
903 bp->bio_error = 0;
904 bp->bio_length = p->stripesize;
905 bp->bio_caller1 = p;
906
907 /*
908 * Check if it's a rebuild of a degraded plex or a user request of
909 * parity rebuild.
910 */
911 if (flags & GV_BIO_REBUILD)
912 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK);
913 else if (flags & GV_BIO_CHECK)
914 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
915 else {
916 G_VINUM_DEBUG(0, "invalid flags given in rebuild");
917 return;
918 }
919
920 bp->bio_pflags = flags;
921 bp->bio_pflags |= GV_BIO_MALLOC;
922
923 /* We still have more parity to build. */
924 bp->bio_offset = offset;
925 gv_post_bio(sc, bp);
926 //gv_plex_start(p, bp); /* Send it down to the plex. */
927 }
928
929 /*
930 * Handle a finished parity write.
931 */
932 void
gv_parity_complete(struct gv_plex * p,struct bio * bp)933 gv_parity_complete(struct gv_plex *p, struct bio *bp)
934 {
935 struct gv_softc *sc;
936 int error, flags;
937
938 error = bp->bio_error;
939 flags = bp->bio_pflags;
940 flags &= ~GV_BIO_MALLOC;
941
942 sc = p->vinumconf;
943 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
944
945 /* Clean up what we allocated. */
946 if (bp->bio_pflags & GV_BIO_MALLOC)
947 g_free(bp->bio_data);
948 g_destroy_bio(bp);
949
950 if (error == EAGAIN) {
951 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx",
952 (intmax_t)p->synced);
953 }
954
955 /* Any error is fatal, except EAGAIN when we're rebuilding. */
956 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) {
957 /* Make sure we don't have the lock. */
958 g_topology_assert_not();
959 g_topology_lock();
960 gv_access(p->vol_sc->provider, -1, -1, 0);
961 g_topology_unlock();
962 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx "
963 "errno %d", p->name, (intmax_t)p->synced, error);
964 return;
965 } else {
966 p->synced += p->stripesize;
967 }
968
969 if (p->synced >= p->size) {
970 /* Make sure we don't have the lock. */
971 g_topology_assert_not();
972 g_topology_lock();
973 gv_access(p->vol_sc->provider, -1, -1, 0);
974 g_topology_unlock();
975 /* We're finished. */
976 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name);
977 p->synced = 0;
978 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
979 return;
980 }
981
982 /* Send down next. It will determine if we need to itself. */
983 gv_parity_request(p, flags, p->synced);
984 }
985
986 /*
987 * Handle a finished plex rebuild bio.
988 */
989 void
gv_rebuild_complete(struct gv_plex * p,struct bio * bp)990 gv_rebuild_complete(struct gv_plex *p, struct bio *bp)
991 {
992 struct gv_softc *sc;
993 struct gv_sd *s;
994 int error, flags;
995 off_t offset;
996
997 error = bp->bio_error;
998 flags = bp->bio_pflags;
999 offset = bp->bio_offset;
1000 flags &= ~GV_BIO_MALLOC;
1001 sc = p->vinumconf;
1002 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
1003
1004 /* Clean up what we allocated. */
1005 if (bp->bio_pflags & GV_BIO_MALLOC)
1006 g_free(bp->bio_data);
1007 g_destroy_bio(bp);
1008
1009 if (error) {
1010 g_topology_assert_not();
1011 g_topology_lock();
1012 gv_access(p->vol_sc->provider, -1, -1, 0);
1013 g_topology_unlock();
1014
1015 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d",
1016 p->name, (intmax_t)offset, error);
1017 p->flags &= ~GV_PLEX_REBUILDING;
1018 p->synced = 0;
1019 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1020 return;
1021 }
1022
1023 offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
1024 if (offset >= p->size) {
1025 /* We're finished. */
1026 g_topology_assert_not();
1027 g_topology_lock();
1028 gv_access(p->vol_sc->provider, -1, -1, 0);
1029 g_topology_unlock();
1030
1031 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
1032 gv_save_config(p->vinumconf);
1033 p->flags &= ~GV_PLEX_REBUILDING;
1034 p->synced = 0;
1035 /* Try to up all subdisks. */
1036 LIST_FOREACH(s, &p->subdisks, in_plex)
1037 gv_update_sd_state(s);
1038 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
1039 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
1040 return;
1041 }
1042
1043 /* Send down next. It will determine if we need to itself. */
1044 gv_parity_request(p, flags, offset);
1045 }
1046