1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/module.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/bio.h>
36 #include <sys/sbuf.h>
37 #include <sys/sysctl.h>
38 #include <sys/malloc.h>
39 #include <vm/uma.h>
40 #include <geom/geom.h>
41 #include <geom/geom_dbg.h>
42 #include <geom/stripe/g_stripe.h>
43
44 FEATURE(geom_stripe, "GEOM striping support");
45
46 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data");
47
48 static uma_zone_t g_stripe_zone;
49
50 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
51 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
52 struct g_geom *gp);
53
54 static g_taste_t g_stripe_taste;
55 static g_ctl_req_t g_stripe_config;
56 static g_dumpconf_t g_stripe_dumpconf;
57 static g_init_t g_stripe_init;
58 static g_fini_t g_stripe_fini;
59
60 struct g_class g_stripe_class = {
61 .name = G_STRIPE_CLASS_NAME,
62 .version = G_VERSION,
63 .ctlreq = g_stripe_config,
64 .taste = g_stripe_taste,
65 .destroy_geom = g_stripe_destroy_geom,
66 .init = g_stripe_init,
67 .fini = g_stripe_fini
68 };
69
70 SYSCTL_DECL(_kern_geom);
71 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
72 "GEOM_STRIPE stuff");
73 static u_int g_stripe_debug = 0;
74 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0,
75 "Debug level");
76 static int g_stripe_fast = 0;
77 SYSCTL_INT(_kern_geom_stripe, OID_AUTO, fast,
78 CTLFLAG_RWTUN, &g_stripe_fast, 0,
79 "Fast, but memory-consuming, mode");
80 static u_long g_stripe_maxmem;
81 SYSCTL_ULONG(_kern_geom_stripe, OID_AUTO, maxmem,
82 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &g_stripe_maxmem, 0,
83 "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
84 static u_int g_stripe_fast_failed = 0;
85 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD,
86 &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed");
87
88 /*
89 * Greatest Common Divisor.
90 */
91 static u_int
gcd(u_int a,u_int b)92 gcd(u_int a, u_int b)
93 {
94 u_int c;
95
96 while (b != 0) {
97 c = a;
98 a = b;
99 b = (c % b);
100 }
101 return (a);
102 }
103
104 /*
105 * Least Common Multiple.
106 */
107 static u_int
lcm(u_int a,u_int b)108 lcm(u_int a, u_int b)
109 {
110
111 return ((a * b) / gcd(a, b));
112 }
113
114 static void
g_stripe_init(struct g_class * mp __unused)115 g_stripe_init(struct g_class *mp __unused)
116 {
117
118 g_stripe_maxmem = maxphys * 100;
119 TUNABLE_ULONG_FETCH("kern.geom.stripe.maxmem,", &g_stripe_maxmem);
120 g_stripe_zone = uma_zcreate("g_stripe_zone", maxphys, NULL, NULL,
121 NULL, NULL, 0, 0);
122 g_stripe_maxmem -= g_stripe_maxmem % maxphys;
123 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / maxphys);
124 }
125
126 static void
g_stripe_fini(struct g_class * mp __unused)127 g_stripe_fini(struct g_class *mp __unused)
128 {
129
130 uma_zdestroy(g_stripe_zone);
131 }
132
133 /*
134 * Return the number of valid disks.
135 */
136 static u_int
g_stripe_nvalid(struct g_stripe_softc * sc)137 g_stripe_nvalid(struct g_stripe_softc *sc)
138 {
139 u_int i, no;
140
141 no = 0;
142 for (i = 0; i < sc->sc_ndisks; i++) {
143 if (sc->sc_disks[i] != NULL)
144 no++;
145 }
146
147 return (no);
148 }
149
150 static void
g_stripe_remove_disk(struct g_consumer * cp)151 g_stripe_remove_disk(struct g_consumer *cp)
152 {
153 struct g_stripe_softc *sc;
154
155 g_topology_assert();
156 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
157 sc = (struct g_stripe_softc *)cp->geom->softc;
158 KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
159
160 if (cp->private == NULL) {
161 G_STRIPE_DEBUG(0, "Disk %s removed from %s.",
162 cp->provider->name, sc->sc_name);
163 cp->private = (void *)(uintptr_t)-1;
164 }
165
166 if (sc->sc_provider != NULL) {
167 G_STRIPE_DEBUG(0, "Device %s deactivated.",
168 sc->sc_provider->name);
169 g_wither_provider(sc->sc_provider, ENXIO);
170 sc->sc_provider = NULL;
171 }
172
173 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
174 return;
175 sc->sc_disks[cp->index] = NULL;
176 cp->index = 0;
177 g_detach(cp);
178 g_destroy_consumer(cp);
179 /* If there are no valid disks anymore, remove device. */
180 if (LIST_EMPTY(&sc->sc_geom->consumer))
181 g_stripe_destroy(sc, 1);
182 }
183
184 static void
g_stripe_orphan(struct g_consumer * cp)185 g_stripe_orphan(struct g_consumer *cp)
186 {
187 struct g_stripe_softc *sc;
188 struct g_geom *gp;
189
190 g_topology_assert();
191 gp = cp->geom;
192 sc = gp->softc;
193 if (sc == NULL)
194 return;
195
196 g_stripe_remove_disk(cp);
197 }
198
199 static int
g_stripe_access(struct g_provider * pp,int dr,int dw,int de)200 g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
201 {
202 struct g_consumer *cp1, *cp2, *tmp;
203 struct g_stripe_softc *sc __diagused;
204 struct g_geom *gp;
205 int error;
206
207 g_topology_assert();
208 gp = pp->geom;
209 sc = gp->softc;
210 KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
211
212 /* On first open, grab an extra "exclusive" bit */
213 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
214 de++;
215 /* ... and let go of it on last close */
216 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
217 de--;
218
219 LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
220 error = g_access(cp1, dr, dw, de);
221 if (error != 0)
222 goto fail;
223 if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
224 cp1->private != NULL) {
225 g_stripe_remove_disk(cp1); /* May destroy geom. */
226 }
227 }
228 return (0);
229
230 fail:
231 LIST_FOREACH(cp2, &gp->consumer, consumer) {
232 if (cp1 == cp2)
233 break;
234 g_access(cp2, -dr, -dw, -de);
235 }
236 return (error);
237 }
238
239 static void
g_stripe_copy(struct g_stripe_softc * sc,char * src,char * dst,off_t offset,off_t length,int mode)240 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
241 off_t length, int mode)
242 {
243 off_t stripesize;
244 size_t len;
245
246 stripesize = sc->sc_stripesize;
247 len = (size_t)(stripesize - (offset & (stripesize - 1)));
248 do {
249 bcopy(src, dst, len);
250 if (mode) {
251 dst += len + stripesize * (sc->sc_ndisks - 1);
252 src += len;
253 } else {
254 dst += len;
255 src += len + stripesize * (sc->sc_ndisks - 1);
256 }
257 length -= len;
258 KASSERT(length >= 0,
259 ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).",
260 (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length));
261 if (length > stripesize)
262 len = stripesize;
263 else
264 len = length;
265 } while (length > 0);
266 }
267
268 static void
g_stripe_done(struct bio * bp)269 g_stripe_done(struct bio *bp)
270 {
271 struct g_stripe_softc *sc;
272 struct bio *pbp;
273
274 pbp = bp->bio_parent;
275 sc = pbp->bio_to->geom->softc;
276 if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
277 g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
278 bp->bio_length, 1);
279 bp->bio_data = bp->bio_caller1;
280 bp->bio_caller1 = NULL;
281 }
282 mtx_lock(&sc->sc_lock);
283 if (pbp->bio_error == 0)
284 pbp->bio_error = bp->bio_error;
285 pbp->bio_completed += bp->bio_completed;
286 pbp->bio_inbed++;
287 if (pbp->bio_children == pbp->bio_inbed) {
288 mtx_unlock(&sc->sc_lock);
289 if (pbp->bio_driver1 != NULL)
290 uma_zfree(g_stripe_zone, pbp->bio_driver1);
291 if (bp->bio_cmd == BIO_SPEEDUP)
292 pbp->bio_completed = pbp->bio_length;
293 g_io_deliver(pbp, pbp->bio_error);
294 } else
295 mtx_unlock(&sc->sc_lock);
296 g_destroy_bio(bp);
297 }
298
299 static int
g_stripe_start_fast(struct bio * bp,u_int no,off_t offset,off_t length)300 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
301 {
302 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
303 struct g_stripe_softc *sc;
304 char *addr, *data = NULL;
305 struct bio *cbp;
306 off_t stripesize;
307 u_int nparts = 0;
308 int error;
309
310 sc = bp->bio_to->geom->softc;
311
312 addr = bp->bio_data;
313 stripesize = sc->sc_stripesize;
314
315 cbp = g_clone_bio(bp);
316 if (cbp == NULL) {
317 error = ENOMEM;
318 goto failure;
319 }
320 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
321 nparts++;
322 /*
323 * Fill in the component buf structure.
324 */
325 cbp->bio_done = g_stripe_done;
326 cbp->bio_offset = offset;
327 cbp->bio_data = addr;
328 cbp->bio_caller1 = NULL;
329 cbp->bio_length = length;
330 cbp->bio_caller2 = sc->sc_disks[no];
331
332 /* offset -= offset % stripesize; */
333 offset -= offset & (stripesize - 1);
334 addr += length;
335 length = bp->bio_length - length;
336 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
337 if (no > sc->sc_ndisks - 1) {
338 no = 0;
339 offset += stripesize;
340 }
341 if (nparts >= sc->sc_ndisks) {
342 cbp = TAILQ_NEXT(cbp, bio_queue);
343 if (cbp == NULL)
344 cbp = TAILQ_FIRST(&queue);
345 nparts++;
346 /*
347 * Update bio structure.
348 */
349 /*
350 * MIN() is in case when
351 * (bp->bio_length % sc->sc_stripesize) != 0.
352 */
353 cbp->bio_length += MIN(stripesize, length);
354 if (cbp->bio_caller1 == NULL) {
355 cbp->bio_caller1 = cbp->bio_data;
356 cbp->bio_data = NULL;
357 if (data == NULL) {
358 data = uma_zalloc(g_stripe_zone,
359 M_NOWAIT);
360 if (data == NULL) {
361 error = ENOMEM;
362 goto failure;
363 }
364 }
365 }
366 } else {
367 cbp = g_clone_bio(bp);
368 if (cbp == NULL) {
369 error = ENOMEM;
370 goto failure;
371 }
372 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
373 nparts++;
374 /*
375 * Fill in the component buf structure.
376 */
377 cbp->bio_done = g_stripe_done;
378 cbp->bio_offset = offset;
379 cbp->bio_data = addr;
380 cbp->bio_caller1 = NULL;
381 /*
382 * MIN() is in case when
383 * (bp->bio_length % sc->sc_stripesize) != 0.
384 */
385 cbp->bio_length = MIN(stripesize, length);
386 cbp->bio_caller2 = sc->sc_disks[no];
387 }
388 }
389 if (data != NULL)
390 bp->bio_driver1 = data;
391 /*
392 * Fire off all allocated requests!
393 */
394 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
395 struct g_consumer *cp;
396
397 TAILQ_REMOVE(&queue, cbp, bio_queue);
398 cp = cbp->bio_caller2;
399 cbp->bio_caller2 = NULL;
400 cbp->bio_to = cp->provider;
401 if (cbp->bio_caller1 != NULL) {
402 cbp->bio_data = data;
403 if (bp->bio_cmd == BIO_WRITE) {
404 g_stripe_copy(sc, cbp->bio_caller1, data,
405 cbp->bio_offset, cbp->bio_length, 0);
406 }
407 data += cbp->bio_length;
408 }
409 G_STRIPE_LOGREQ(cbp, "Sending request.");
410 g_io_request(cbp, cp);
411 }
412 return (0);
413 failure:
414 if (data != NULL)
415 uma_zfree(g_stripe_zone, data);
416 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
417 TAILQ_REMOVE(&queue, cbp, bio_queue);
418 if (cbp->bio_caller1 != NULL) {
419 cbp->bio_data = cbp->bio_caller1;
420 cbp->bio_caller1 = NULL;
421 }
422 bp->bio_children--;
423 g_destroy_bio(cbp);
424 }
425 return (error);
426 }
427
428 static int
g_stripe_start_economic(struct bio * bp,u_int no,off_t offset,off_t length)429 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
430 {
431 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
432 struct g_stripe_softc *sc;
433 off_t stripesize;
434 struct bio *cbp;
435 char *addr;
436 int error;
437
438 sc = bp->bio_to->geom->softc;
439
440 stripesize = sc->sc_stripesize;
441
442 cbp = g_clone_bio(bp);
443 if (cbp == NULL) {
444 error = ENOMEM;
445 goto failure;
446 }
447 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
448 /*
449 * Fill in the component buf structure.
450 */
451 if (bp->bio_length == length)
452 cbp->bio_done = g_std_done; /* Optimized lockless case. */
453 else
454 cbp->bio_done = g_stripe_done;
455 cbp->bio_offset = offset;
456 cbp->bio_length = length;
457 if ((bp->bio_flags & BIO_UNMAPPED) != 0)
458 addr = NULL;
459 else
460 addr = bp->bio_data;
461 cbp->bio_caller2 = sc->sc_disks[no];
462
463 /* offset -= offset % stripesize; */
464 offset -= offset & (stripesize - 1);
465 if (bp->bio_cmd != BIO_DELETE)
466 addr += length;
467 length = bp->bio_length - length;
468 for (no++; length > 0; no++, length -= stripesize) {
469 if (no > sc->sc_ndisks - 1) {
470 no = 0;
471 offset += stripesize;
472 }
473 cbp = g_clone_bio(bp);
474 if (cbp == NULL) {
475 error = ENOMEM;
476 goto failure;
477 }
478 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
479
480 /*
481 * Fill in the component buf structure.
482 */
483 cbp->bio_done = g_stripe_done;
484 cbp->bio_offset = offset;
485 /*
486 * MIN() is in case when
487 * (bp->bio_length % sc->sc_stripesize) != 0.
488 */
489 cbp->bio_length = MIN(stripesize, length);
490 if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
491 cbp->bio_ma_offset += (uintptr_t)addr;
492 cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
493 cbp->bio_ma_offset %= PAGE_SIZE;
494 cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
495 cbp->bio_length) / PAGE_SIZE;
496 } else
497 cbp->bio_data = addr;
498
499 cbp->bio_caller2 = sc->sc_disks[no];
500
501 if (bp->bio_cmd != BIO_DELETE)
502 addr += stripesize;
503 }
504 /*
505 * Fire off all allocated requests!
506 */
507 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
508 struct g_consumer *cp;
509
510 TAILQ_REMOVE(&queue, cbp, bio_queue);
511 cp = cbp->bio_caller2;
512 cbp->bio_caller2 = NULL;
513 cbp->bio_to = cp->provider;
514 G_STRIPE_LOGREQ(cbp, "Sending request.");
515 g_io_request(cbp, cp);
516 }
517 return (0);
518 failure:
519 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
520 TAILQ_REMOVE(&queue, cbp, bio_queue);
521 bp->bio_children--;
522 g_destroy_bio(cbp);
523 }
524 return (error);
525 }
526
527 static void
g_stripe_pushdown(struct g_stripe_softc * sc,struct bio * bp)528 g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp)
529 {
530 struct bio_queue_head queue;
531 struct g_consumer *cp;
532 struct bio *cbp;
533 u_int no;
534
535 bioq_init(&queue);
536 for (no = 0; no < sc->sc_ndisks; no++) {
537 cbp = g_clone_bio(bp);
538 if (cbp == NULL) {
539 for (cbp = bioq_first(&queue); cbp != NULL;
540 cbp = bioq_first(&queue)) {
541 bioq_remove(&queue, cbp);
542 g_destroy_bio(cbp);
543 }
544 if (bp->bio_error == 0)
545 bp->bio_error = ENOMEM;
546 g_io_deliver(bp, bp->bio_error);
547 return;
548 }
549 bioq_insert_tail(&queue, cbp);
550 cbp->bio_done = g_stripe_done;
551 cbp->bio_caller2 = sc->sc_disks[no];
552 cbp->bio_to = sc->sc_disks[no]->provider;
553 }
554 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
555 bioq_remove(&queue, cbp);
556 G_STRIPE_LOGREQ(cbp, "Sending request.");
557 cp = cbp->bio_caller2;
558 cbp->bio_caller2 = NULL;
559 g_io_request(cbp, cp);
560 }
561 }
562
563 static void
g_stripe_start(struct bio * bp)564 g_stripe_start(struct bio *bp)
565 {
566 off_t offset, start, length, nstripe, stripesize;
567 struct g_stripe_softc *sc;
568 u_int no;
569 int error, fast = 0;
570
571 sc = bp->bio_to->geom->softc;
572 /*
573 * If sc == NULL, provider's error should be set and g_stripe_start()
574 * should not be called at all.
575 */
576 KASSERT(sc != NULL,
577 ("Provider's error should be set (error=%d)(device=%s).",
578 bp->bio_to->error, bp->bio_to->name));
579
580 G_STRIPE_LOGREQ(bp, "Request received.");
581
582 switch (bp->bio_cmd) {
583 case BIO_READ:
584 case BIO_WRITE:
585 case BIO_DELETE:
586 break;
587 case BIO_SPEEDUP:
588 case BIO_FLUSH:
589 g_stripe_pushdown(sc, bp);
590 return;
591 case BIO_GETATTR:
592 if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
593 int val = (sc->sc_flags & G_STRIPE_FLAG_CANDELETE) != 0;
594 g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
595 return;
596 }
597 /* otherwise: To which provider it should be delivered? */
598 default:
599 g_io_deliver(bp, EOPNOTSUPP);
600 return;
601 }
602
603 stripesize = sc->sc_stripesize;
604
605 /*
606 * Calculations are quite messy, but fast I hope.
607 */
608
609 /* Stripe number. */
610 /* nstripe = bp->bio_offset / stripesize; */
611 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
612 /* Disk number. */
613 no = nstripe % sc->sc_ndisks;
614 /* Start position in stripe. */
615 /* start = bp->bio_offset % stripesize; */
616 start = bp->bio_offset & (stripesize - 1);
617 /* Start position in disk. */
618 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
619 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
620 /* Length of data to operate. */
621 length = MIN(bp->bio_length, stripesize - start);
622
623 /*
624 * Do use "fast" mode when:
625 * 1. "Fast" mode is ON.
626 * and
627 * 2. Request size is less than or equal to maxphys,
628 * which should always be true.
629 * and
630 * 3. Request size is bigger than stripesize * ndisks. If it isn't,
631 * there will be no need to send more than one I/O request to
632 * a provider, so there is nothing to optmize.
633 * and
634 * 4. Request is not unmapped.
635 * and
636 * 5. It is not a BIO_DELETE.
637 */
638 if (g_stripe_fast && bp->bio_length <= maxphys &&
639 bp->bio_length >= stripesize * sc->sc_ndisks &&
640 (bp->bio_flags & BIO_UNMAPPED) == 0 &&
641 bp->bio_cmd != BIO_DELETE) {
642 fast = 1;
643 }
644 error = 0;
645 if (fast) {
646 error = g_stripe_start_fast(bp, no, offset, length);
647 if (error != 0)
648 g_stripe_fast_failed++;
649 }
650 /*
651 * Do use "economic" when:
652 * 1. "Economic" mode is ON.
653 * or
654 * 2. "Fast" mode failed. It can only fail if there is no memory.
655 */
656 if (!fast || error != 0)
657 error = g_stripe_start_economic(bp, no, offset, length);
658 if (error != 0) {
659 if (bp->bio_error == 0)
660 bp->bio_error = error;
661 g_io_deliver(bp, bp->bio_error);
662 }
663 }
664
665 static void
g_stripe_check_and_run(struct g_stripe_softc * sc)666 g_stripe_check_and_run(struct g_stripe_softc *sc)
667 {
668 struct g_provider *dp;
669 off_t mediasize, ms;
670 u_int no, sectorsize = 0;
671
672 g_topology_assert();
673 if (g_stripe_nvalid(sc) != sc->sc_ndisks)
674 return;
675
676 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
677 sc->sc_name);
678 sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
679 if (g_stripe_fast == 0)
680 sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED;
681 /*
682 * Find the smallest disk.
683 */
684 mediasize = sc->sc_disks[0]->provider->mediasize;
685 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
686 mediasize -= sc->sc_disks[0]->provider->sectorsize;
687 mediasize -= mediasize % sc->sc_stripesize;
688 sectorsize = sc->sc_disks[0]->provider->sectorsize;
689 for (no = 1; no < sc->sc_ndisks; no++) {
690 dp = sc->sc_disks[no]->provider;
691 ms = dp->mediasize;
692 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
693 ms -= dp->sectorsize;
694 ms -= ms % sc->sc_stripesize;
695 if (ms < mediasize)
696 mediasize = ms;
697 sectorsize = lcm(sectorsize, dp->sectorsize);
698
699 /* A provider underneath us doesn't support unmapped */
700 if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
701 G_STRIPE_DEBUG(1, "Cancelling unmapped "
702 "because of %s.", dp->name);
703 sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED;
704 }
705 }
706 sc->sc_provider->sectorsize = sectorsize;
707 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
708 sc->sc_provider->stripesize = sc->sc_stripesize;
709 sc->sc_provider->stripeoffset = 0;
710 g_error_provider(sc->sc_provider, 0);
711
712 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
713 }
714
715 static int
g_stripe_read_metadata(struct g_consumer * cp,struct g_stripe_metadata * md)716 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
717 {
718 struct g_provider *pp;
719 u_char *buf;
720 int error;
721
722 g_topology_assert();
723
724 error = g_access(cp, 1, 0, 0);
725 if (error != 0)
726 return (error);
727 pp = cp->provider;
728 g_topology_unlock();
729 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
730 &error);
731 g_topology_lock();
732 g_access(cp, -1, 0, 0);
733 if (buf == NULL)
734 return (error);
735
736 /* Decode metadata. */
737 stripe_metadata_decode(buf, md);
738 g_free(buf);
739
740 return (0);
741 }
742
743 /*
744 * Add disk to given device.
745 */
746 static int
g_stripe_add_disk(struct g_stripe_softc * sc,struct g_provider * pp,u_int no)747 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
748 {
749 struct g_consumer *cp, *fcp;
750 struct g_geom *gp;
751 int error;
752
753 g_topology_assert();
754 /* Metadata corrupted? */
755 if (no >= sc->sc_ndisks)
756 return (EINVAL);
757
758 /* Check if disk is not already attached. */
759 if (sc->sc_disks[no] != NULL)
760 return (EEXIST);
761
762 gp = sc->sc_geom;
763 fcp = LIST_FIRST(&gp->consumer);
764
765 cp = g_new_consumer(gp);
766 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
767 cp->private = NULL;
768 cp->index = no;
769 error = g_attach(cp, pp);
770 if (error != 0) {
771 g_destroy_consumer(cp);
772 return (error);
773 }
774
775 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
776 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
777 if (error != 0) {
778 g_detach(cp);
779 g_destroy_consumer(cp);
780 return (error);
781 }
782 }
783 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
784 struct g_stripe_metadata md;
785
786 /* Reread metadata. */
787 error = g_stripe_read_metadata(cp, &md);
788 if (error != 0)
789 goto fail;
790
791 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
792 strcmp(md.md_name, sc->sc_name) != 0 ||
793 md.md_id != sc->sc_id) {
794 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
795 goto fail;
796 }
797 }
798
799 sc->sc_disks[no] = cp;
800
801 /* cascade candelete */
802 error = g_access(cp, 1, 0, 0);
803 if (error == 0) {
804 int can_delete;
805
806 error = g_getattr("GEOM::candelete", cp, &can_delete);
807 if (error == 0 && can_delete != 0)
808 sc->sc_flags |= G_STRIPE_FLAG_CANDELETE;
809 G_STRIPE_DEBUG(1, "Provider %s candelete %i.", pp->name,
810 can_delete);
811 g_access(cp, -1, 0, 0);
812 }
813
814 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
815 g_stripe_check_and_run(sc);
816
817 return (0);
818 fail:
819 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
820 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
821 g_detach(cp);
822 g_destroy_consumer(cp);
823 return (error);
824 }
825
826 static struct g_geom *
g_stripe_create(struct g_class * mp,const struct g_stripe_metadata * md,u_int type)827 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
828 u_int type)
829 {
830 struct g_stripe_softc *sc;
831 struct g_geom *gp;
832 u_int no;
833
834 g_topology_assert();
835 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
836 md->md_id);
837
838 /* Two disks is minimum. */
839 if (md->md_all < 2) {
840 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
841 return (NULL);
842 }
843 #if 0
844 /* Stripe size have to be grater than or equal to sector size. */
845 if (md->md_stripesize < sectorsize) {
846 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
847 return (NULL);
848 }
849 #endif
850 /* Stripe size have to be power of 2. */
851 if (!powerof2(md->md_stripesize)) {
852 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
853 return (NULL);
854 }
855
856 /* Check for duplicate unit */
857 LIST_FOREACH(gp, &mp->geom, geom) {
858 sc = gp->softc;
859 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
860 G_STRIPE_DEBUG(0, "Device %s already configured.",
861 sc->sc_name);
862 return (NULL);
863 }
864 }
865 gp = g_new_geom(mp, md->md_name);
866 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
867 gp->start = g_stripe_start;
868 gp->spoiled = g_stripe_orphan;
869 gp->orphan = g_stripe_orphan;
870 gp->access = g_stripe_access;
871 gp->dumpconf = g_stripe_dumpconf;
872
873 sc->sc_id = md->md_id;
874 sc->sc_stripesize = md->md_stripesize;
875 sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1);
876 sc->sc_ndisks = md->md_all;
877 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
878 M_STRIPE, M_WAITOK | M_ZERO);
879 for (no = 0; no < sc->sc_ndisks; no++)
880 sc->sc_disks[no] = NULL;
881 sc->sc_type = type;
882 mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF);
883
884 gp->softc = sc;
885 sc->sc_geom = gp;
886 sc->sc_provider = NULL;
887
888 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
889
890 return (gp);
891 }
892
893 static int
g_stripe_destroy(struct g_stripe_softc * sc,boolean_t force)894 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
895 {
896 struct g_provider *pp;
897 struct g_consumer *cp, *cp1;
898 struct g_geom *gp;
899
900 g_topology_assert();
901
902 if (sc == NULL)
903 return (ENXIO);
904
905 pp = sc->sc_provider;
906 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
907 if (force) {
908 G_STRIPE_DEBUG(0, "Device %s is still open, so it "
909 "can't be definitely removed.", pp->name);
910 } else {
911 G_STRIPE_DEBUG(1,
912 "Device %s is still open (r%dw%de%d).", pp->name,
913 pp->acr, pp->acw, pp->ace);
914 return (EBUSY);
915 }
916 }
917
918 gp = sc->sc_geom;
919 LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
920 g_stripe_remove_disk(cp);
921 if (cp1 == NULL)
922 return (0); /* Recursion happened. */
923 }
924 if (!LIST_EMPTY(&gp->consumer))
925 return (EINPROGRESS);
926
927 gp->softc = NULL;
928 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
929 gp->name));
930 free(sc->sc_disks, M_STRIPE);
931 mtx_destroy(&sc->sc_lock);
932 free(sc, M_STRIPE);
933 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
934 g_wither_geom(gp, ENXIO);
935 return (0);
936 }
937
938 static int
g_stripe_destroy_geom(struct gctl_req * req __unused,struct g_class * mp __unused,struct g_geom * gp)939 g_stripe_destroy_geom(struct gctl_req *req __unused,
940 struct g_class *mp __unused, struct g_geom *gp)
941 {
942 struct g_stripe_softc *sc;
943
944 sc = gp->softc;
945 return (g_stripe_destroy(sc, 0));
946 }
947
948 static struct g_geom *
g_stripe_taste(struct g_class * mp,struct g_provider * pp,int flags __unused)949 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
950 {
951 struct g_stripe_metadata md;
952 struct g_stripe_softc *sc;
953 struct g_consumer *cp;
954 struct g_geom *gp;
955 int error;
956
957 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
958 g_topology_assert();
959
960 /* Skip providers that are already open for writing. */
961 if (pp->acw > 0)
962 return (NULL);
963
964 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
965
966 gp = g_new_geom(mp, "stripe:taste");
967 gp->start = g_stripe_start;
968 gp->access = g_stripe_access;
969 gp->orphan = g_stripe_orphan;
970 cp = g_new_consumer(gp);
971 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
972 error = g_attach(cp, pp);
973 if (error == 0) {
974 error = g_stripe_read_metadata(cp, &md);
975 g_detach(cp);
976 }
977 g_destroy_consumer(cp);
978 g_destroy_geom(gp);
979 if (error != 0)
980 return (NULL);
981 gp = NULL;
982
983 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
984 return (NULL);
985 if (md.md_version > G_STRIPE_VERSION) {
986 printf("geom_stripe.ko module is too old to handle %s.\n",
987 pp->name);
988 return (NULL);
989 }
990 /*
991 * Backward compatibility:
992 */
993 /* There was no md_provider field in earlier versions of metadata. */
994 if (md.md_version < 2)
995 bzero(md.md_provider, sizeof(md.md_provider));
996 /* There was no md_provsize field in earlier versions of metadata. */
997 if (md.md_version < 3)
998 md.md_provsize = pp->mediasize;
999
1000 if (md.md_provider[0] != '\0' &&
1001 !g_compare_names(md.md_provider, pp->name))
1002 return (NULL);
1003 if (md.md_provsize != pp->mediasize)
1004 return (NULL);
1005
1006 /*
1007 * Let's check if device already exists.
1008 */
1009 sc = NULL;
1010 LIST_FOREACH(gp, &mp->geom, geom) {
1011 sc = gp->softc;
1012 if (sc == NULL)
1013 continue;
1014 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
1015 continue;
1016 if (strcmp(md.md_name, sc->sc_name) != 0)
1017 continue;
1018 if (md.md_id != sc->sc_id)
1019 continue;
1020 break;
1021 }
1022 if (gp != NULL) {
1023 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
1024 error = g_stripe_add_disk(sc, pp, md.md_no);
1025 if (error != 0) {
1026 G_STRIPE_DEBUG(0,
1027 "Cannot add disk %s to %s (error=%d).", pp->name,
1028 gp->name, error);
1029 return (NULL);
1030 }
1031 } else {
1032 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
1033 if (gp == NULL) {
1034 G_STRIPE_DEBUG(0, "Cannot create device %s.",
1035 md.md_name);
1036 return (NULL);
1037 }
1038 sc = gp->softc;
1039 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
1040 error = g_stripe_add_disk(sc, pp, md.md_no);
1041 if (error != 0) {
1042 G_STRIPE_DEBUG(0,
1043 "Cannot add disk %s to %s (error=%d).", pp->name,
1044 gp->name, error);
1045 g_stripe_destroy(sc, 1);
1046 return (NULL);
1047 }
1048 }
1049
1050 return (gp);
1051 }
1052
1053 static void
g_stripe_ctl_create(struct gctl_req * req,struct g_class * mp)1054 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
1055 {
1056 u_int attached, no;
1057 struct g_stripe_metadata md;
1058 struct g_provider *pp;
1059 struct g_stripe_softc *sc;
1060 struct g_geom *gp;
1061 struct sbuf *sb;
1062 off_t *stripesize;
1063 const char *name;
1064 char param[16];
1065 int *nargs;
1066
1067 g_topology_assert();
1068 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1069 if (nargs == NULL) {
1070 gctl_error(req, "No '%s' argument.", "nargs");
1071 return;
1072 }
1073 if (*nargs <= 2) {
1074 gctl_error(req, "Too few arguments.");
1075 return;
1076 }
1077
1078 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
1079 md.md_version = G_STRIPE_VERSION;
1080 name = gctl_get_asciiparam(req, "arg0");
1081 if (name == NULL) {
1082 gctl_error(req, "No 'arg%u' argument.", 0);
1083 return;
1084 }
1085 strlcpy(md.md_name, name, sizeof(md.md_name));
1086 md.md_id = arc4random();
1087 md.md_no = 0;
1088 md.md_all = *nargs - 1;
1089 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
1090 if (stripesize == NULL) {
1091 gctl_error(req, "No '%s' argument.", "stripesize");
1092 return;
1093 }
1094 md.md_stripesize = (uint32_t)*stripesize;
1095 bzero(md.md_provider, sizeof(md.md_provider));
1096 /* This field is not important here. */
1097 md.md_provsize = 0;
1098
1099 /* Check all providers are valid */
1100 for (no = 1; no < *nargs; no++) {
1101 snprintf(param, sizeof(param), "arg%u", no);
1102 pp = gctl_get_provider(req, param);
1103 if (pp == NULL)
1104 return;
1105 }
1106
1107 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
1108 if (gp == NULL) {
1109 gctl_error(req, "Can't configure %s.", md.md_name);
1110 return;
1111 }
1112
1113 sc = gp->softc;
1114 sb = sbuf_new_auto();
1115 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
1116 for (attached = 0, no = 1; no < *nargs; no++) {
1117 snprintf(param, sizeof(param), "arg%u", no);
1118 pp = gctl_get_provider(req, param);
1119 if (pp == NULL) {
1120 name = gctl_get_asciiparam(req, param);
1121 MPASS(name != NULL);
1122 sbuf_printf(sb, " %s", name);
1123 continue;
1124 }
1125 if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
1126 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
1127 no, pp->name, gp->name);
1128 sbuf_printf(sb, " %s", pp->name);
1129 continue;
1130 }
1131 attached++;
1132 }
1133 sbuf_finish(sb);
1134 if (md.md_all != attached) {
1135 g_stripe_destroy(gp->softc, 1);
1136 gctl_error(req, "%s", sbuf_data(sb));
1137 }
1138 sbuf_delete(sb);
1139 }
1140
1141 static struct g_stripe_softc *
g_stripe_find_device(struct g_class * mp,const char * name)1142 g_stripe_find_device(struct g_class *mp, const char *name)
1143 {
1144 struct g_stripe_softc *sc;
1145 struct g_geom *gp;
1146
1147 LIST_FOREACH(gp, &mp->geom, geom) {
1148 sc = gp->softc;
1149 if (sc == NULL)
1150 continue;
1151 if (strcmp(sc->sc_name, name) == 0)
1152 return (sc);
1153 }
1154 return (NULL);
1155 }
1156
1157 static void
g_stripe_ctl_destroy(struct gctl_req * req,struct g_class * mp)1158 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
1159 {
1160 struct g_stripe_softc *sc;
1161 int *force, *nargs, error;
1162 const char *name;
1163 char param[16];
1164 u_int i;
1165
1166 g_topology_assert();
1167
1168 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1169 if (nargs == NULL) {
1170 gctl_error(req, "No '%s' argument.", "nargs");
1171 return;
1172 }
1173 if (*nargs <= 0) {
1174 gctl_error(req, "Missing device(s).");
1175 return;
1176 }
1177 force = gctl_get_paraml(req, "force", sizeof(*force));
1178 if (force == NULL) {
1179 gctl_error(req, "No '%s' argument.", "force");
1180 return;
1181 }
1182
1183 for (i = 0; i < (u_int)*nargs; i++) {
1184 snprintf(param, sizeof(param), "arg%u", i);
1185 name = gctl_get_asciiparam(req, param);
1186 if (name == NULL) {
1187 gctl_error(req, "No 'arg%u' argument.", i);
1188 return;
1189 }
1190 sc = g_stripe_find_device(mp, name);
1191 if (sc == NULL) {
1192 gctl_error(req, "No such device: %s.", name);
1193 return;
1194 }
1195 error = g_stripe_destroy(sc, *force);
1196 if (error != 0) {
1197 gctl_error(req, "Cannot destroy device %s (error=%d).",
1198 sc->sc_name, error);
1199 return;
1200 }
1201 }
1202 }
1203
1204 static void
g_stripe_config(struct gctl_req * req,struct g_class * mp,const char * verb)1205 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
1206 {
1207 uint32_t *version;
1208
1209 g_topology_assert();
1210
1211 version = gctl_get_paraml(req, "version", sizeof(*version));
1212 if (version == NULL) {
1213 gctl_error(req, "No '%s' argument.", "version");
1214 return;
1215 }
1216 if (*version != G_STRIPE_VERSION) {
1217 gctl_error(req, "Userland and kernel parts are out of sync.");
1218 return;
1219 }
1220
1221 if (strcmp(verb, "create") == 0) {
1222 g_stripe_ctl_create(req, mp);
1223 return;
1224 } else if (strcmp(verb, "destroy") == 0 ||
1225 strcmp(verb, "stop") == 0) {
1226 g_stripe_ctl_destroy(req, mp);
1227 return;
1228 }
1229
1230 gctl_error(req, "Unknown verb.");
1231 }
1232
1233 static void
g_stripe_dumpconf(struct sbuf * sb,const char * indent,struct g_geom * gp,struct g_consumer * cp,struct g_provider * pp)1234 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1235 struct g_consumer *cp, struct g_provider *pp)
1236 {
1237 struct g_stripe_softc *sc;
1238
1239 sc = gp->softc;
1240 if (sc == NULL)
1241 return;
1242 if (pp != NULL) {
1243 /* Nothing here. */
1244 } else if (cp != NULL) {
1245 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
1246 (u_int)cp->index);
1247 } else {
1248 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
1249 sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent,
1250 (uintmax_t)sc->sc_stripesize);
1251 sbuf_printf(sb, "%s<Type>", indent);
1252 switch (sc->sc_type) {
1253 case G_STRIPE_TYPE_AUTOMATIC:
1254 sbuf_cat(sb, "AUTOMATIC");
1255 break;
1256 case G_STRIPE_TYPE_MANUAL:
1257 sbuf_cat(sb, "MANUAL");
1258 break;
1259 default:
1260 sbuf_cat(sb, "UNKNOWN");
1261 break;
1262 }
1263 sbuf_cat(sb, "</Type>\n");
1264 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
1265 indent, sc->sc_ndisks, g_stripe_nvalid(sc));
1266 sbuf_printf(sb, "%s<State>", indent);
1267 if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
1268 sbuf_cat(sb, "UP");
1269 else
1270 sbuf_cat(sb, "DOWN");
1271 sbuf_cat(sb, "</State>\n");
1272 }
1273 }
1274
1275 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
1276 MODULE_VERSION(geom_stripe, 0);
1277