1 /*-
2 * SPDX-License-Identifier: (BSD-2-Clause AND BSD-3-Clause)
3 *
4 * Copyright (c) 2003 Poul-Henning Kamp.
5 * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
33 */
34
35 /*-
36 * Copyright (c) 1988 University of Utah.
37 * Copyright (c) 1990, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * the Systems Programming Group of the University of Utah Computer
42 * Science Department.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * SUCH DAMAGE.
67 *
68 * from: Utah $Hdr: cd.c 1.6 90/11/28$
69 */
70
71 /*
72 * Dynamic configuration and disklabel support by:
73 * Jason R. Thorpe <thorpej@nas.nasa.gov>
74 * Numerical Aerodynamic Simulation Facility
75 * Mail Stop 258-6
76 * NASA Ames Research Center
77 * Moffett Field, CA 94035
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/kernel.h>
83 #include <sys/module.h>
84 #include <sys/bio.h>
85 #include <sys/malloc.h>
86 #include <sys/sbuf.h>
87 #include <geom/geom.h>
88
89 /*
90 * Number of blocks to untouched in front of a component partition.
91 * This is to avoid violating its disklabel area when it starts at the
92 * beginning of the slice.
93 */
94 #if !defined(CCD_OFFSET)
95 #define CCD_OFFSET 16
96 #endif
97
98 /* sc_flags */
99 #define CCDF_UNIFORM 0x02 /* use LCCD of sizes for uniform interleave */
100 #define CCDF_MIRROR 0x04 /* use mirroring */
101 #define CCDF_NO_OFFSET 0x08 /* do not leave space in front */
102 #define CCDF_LINUX 0x10 /* use Linux compatibility mode */
103
104 /* Mask of user-settable ccd flags. */
105 #define CCDF_USERMASK (CCDF_UNIFORM|CCDF_MIRROR)
106
107 /*
108 * Interleave description table.
109 * Computed at boot time to speed irregular-interleave lookups.
110 * The idea is that we interleave in "groups". First we interleave
111 * evenly over all component disks up to the size of the smallest
112 * component (the first group), then we interleave evenly over all
113 * remaining disks up to the size of the next-smallest (second group),
114 * and so on.
115 *
116 * Each table entry describes the interleave characteristics of one
117 * of these groups. For example if a concatenated disk consisted of
118 * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
119 * DEV_BSIZE (1), the table would have three entries:
120 *
121 * ndisk startblk startoff dev
122 * 3 0 0 0, 1, 2
123 * 2 9 3 0, 2
124 * 1 13 5 2
125 * 0 - - -
126 *
127 * which says that the first nine blocks (0-8) are interleaved over
128 * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
129 * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
130 * at component block 3, and the remaining blocks (13-14) are on disk
131 * 2 starting at offset 5.
132 */
133 struct ccdiinfo {
134 int ii_ndisk; /* # of disks range is interleaved over */
135 daddr_t ii_startblk; /* starting scaled block # for range */
136 daddr_t ii_startoff; /* starting component offset (block #) */
137 int *ii_index; /* ordered list of components in range */
138 };
139
140 /*
141 * Component info table.
142 * Describes a single component of a concatenated disk.
143 */
144 struct ccdcinfo {
145 daddr_t ci_size; /* size */
146 struct g_provider *ci_provider; /* provider */
147 struct g_consumer *ci_consumer; /* consumer */
148 };
149
150 /*
151 * A concatenated disk is described by this structure.
152 */
153
154 struct ccd_s {
155 LIST_ENTRY(ccd_s) list;
156
157 int sc_unit; /* logical unit number */
158 int sc_flags; /* flags */
159 daddr_t sc_size; /* size of ccd */
160 int sc_ileave; /* interleave */
161 u_int sc_ndisks; /* number of components */
162 struct ccdcinfo *sc_cinfo; /* component info */
163 struct ccdiinfo *sc_itable; /* interleave table */
164 uint32_t sc_secsize; /* # bytes per sector */
165 int sc_pick; /* side of mirror picked */
166 daddr_t sc_blk[2]; /* mirror localization */
167 uint32_t sc_offset; /* actual offset used */
168 };
169
170 static g_start_t g_ccd_start;
171 static void ccdiodone(struct bio *bp);
172 static void ccdinterleave(struct ccd_s *);
173 static int ccdinit(struct gctl_req *req, struct ccd_s *);
174 static int ccdbuffer(struct bio **ret, struct ccd_s *,
175 struct bio *, daddr_t, caddr_t, long);
176
177 static void
g_ccd_orphan(struct g_consumer * cp)178 g_ccd_orphan(struct g_consumer *cp)
179 {
180 /*
181 * XXX: We don't do anything here. It is not obvious
182 * XXX: what DTRT would be, so we do what the previous
183 * XXX: code did: ignore it and let the user cope.
184 */
185 }
186
187 static int
g_ccd_access(struct g_provider * pp,int dr,int dw,int de)188 g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
189 {
190 struct g_geom *gp;
191 struct g_consumer *cp1, *cp2;
192 int error;
193
194 de += dr;
195 de += dw;
196
197 gp = pp->geom;
198 error = ENXIO;
199 LIST_FOREACH(cp1, &gp->consumer, consumer) {
200 error = g_access(cp1, dr, dw, de);
201 if (error) {
202 LIST_FOREACH(cp2, &gp->consumer, consumer) {
203 if (cp1 == cp2)
204 break;
205 g_access(cp2, -dr, -dw, -de);
206 }
207 break;
208 }
209 }
210 return (error);
211 }
212
213 /*
214 * Free the softc and its substructures.
215 */
216 static void
g_ccd_freesc(struct ccd_s * sc)217 g_ccd_freesc(struct ccd_s *sc)
218 {
219 struct ccdiinfo *ii;
220
221 g_free(sc->sc_cinfo);
222 if (sc->sc_itable != NULL) {
223 for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
224 g_free(ii->ii_index);
225 g_free(sc->sc_itable);
226 }
227 g_free(sc);
228 }
229
230 static int
ccdinit(struct gctl_req * req,struct ccd_s * cs)231 ccdinit(struct gctl_req *req, struct ccd_s *cs)
232 {
233 struct ccdcinfo *ci;
234 daddr_t size;
235 int ix;
236 daddr_t minsize;
237 int maxsecsize;
238 off_t mediasize;
239 u_int sectorsize;
240
241 cs->sc_size = 0;
242
243 maxsecsize = 0;
244 minsize = 0;
245
246 if (cs->sc_flags & CCDF_LINUX) {
247 cs->sc_offset = 0;
248 cs->sc_ileave *= 2;
249 if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
250 gctl_error(req, "Mirror mode for Linux raids is "
251 "only supported with 2 devices");
252 } else {
253 if (cs->sc_flags & CCDF_NO_OFFSET)
254 cs->sc_offset = 0;
255 else
256 cs->sc_offset = CCD_OFFSET;
257 }
258 for (ix = 0; ix < cs->sc_ndisks; ix++) {
259 ci = &cs->sc_cinfo[ix];
260
261 mediasize = ci->ci_provider->mediasize;
262 sectorsize = ci->ci_provider->sectorsize;
263 if (sectorsize > maxsecsize)
264 maxsecsize = sectorsize;
265 size = mediasize / DEV_BSIZE - cs->sc_offset;
266
267 /* Truncate to interleave boundary */
268
269 if (cs->sc_ileave > 1)
270 size -= size % cs->sc_ileave;
271
272 if (size == 0) {
273 gctl_error(req, "Component %s has effective size zero",
274 ci->ci_provider->name);
275 return(ENODEV);
276 }
277
278 if (minsize == 0 || size < minsize)
279 minsize = size;
280 ci->ci_size = size;
281 cs->sc_size += size;
282 }
283
284 /*
285 * Don't allow the interleave to be smaller than
286 * the biggest component sector.
287 */
288 if ((cs->sc_ileave > 0) &&
289 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
290 gctl_error(req, "Interleave to small for sector size");
291 return(EINVAL);
292 }
293
294 /*
295 * If uniform interleave is desired set all sizes to that of
296 * the smallest component. This will guarantee that a single
297 * interleave table is generated.
298 *
299 * Lost space must be taken into account when calculating the
300 * overall size. Half the space is lost when CCDF_MIRROR is
301 * specified.
302 */
303 if (cs->sc_flags & CCDF_UNIFORM) {
304 for (ix = 0; ix < cs->sc_ndisks; ix++) {
305 ci = &cs->sc_cinfo[ix];
306 ci->ci_size = minsize;
307 }
308 cs->sc_size = cs->sc_ndisks * minsize;
309 }
310
311 if (cs->sc_flags & CCDF_MIRROR) {
312 /*
313 * Check to see if an even number of components
314 * have been specified. The interleave must also
315 * be non-zero in order for us to be able to
316 * guarantee the topology.
317 */
318 if (cs->sc_ndisks % 2) {
319 gctl_error(req,
320 "Mirroring requires an even number of disks");
321 return(EINVAL);
322 }
323 if (cs->sc_ileave == 0) {
324 gctl_error(req,
325 "An interleave must be specified when mirroring");
326 return(EINVAL);
327 }
328 cs->sc_size = (cs->sc_ndisks/2) * minsize;
329 }
330
331 /*
332 * Construct the interleave table.
333 */
334 ccdinterleave(cs);
335
336 /*
337 * Create pseudo-geometry based on 1MB cylinders. It's
338 * pretty close.
339 */
340 cs->sc_secsize = maxsecsize;
341
342 return (0);
343 }
344
345 static void
ccdinterleave(struct ccd_s * cs)346 ccdinterleave(struct ccd_s *cs)
347 {
348 struct ccdcinfo *ci, *smallci;
349 struct ccdiinfo *ii;
350 daddr_t bn, lbn;
351 int ix;
352 daddr_t size;
353
354 /*
355 * Allocate an interleave table. The worst case occurs when each
356 * of N disks is of a different size, resulting in N interleave
357 * tables.
358 *
359 * Chances are this is too big, but we don't care.
360 */
361 size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
362 cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
363
364 /*
365 * Trivial case: no interleave (actually interleave of disk size).
366 * Each table entry represents a single component in its entirety.
367 *
368 * An interleave of 0 may not be used with a mirror setup.
369 */
370 if (cs->sc_ileave == 0) {
371 bn = 0;
372 ii = cs->sc_itable;
373
374 for (ix = 0; ix < cs->sc_ndisks; ix++) {
375 /* Allocate space for ii_index. */
376 ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
377 ii->ii_ndisk = 1;
378 ii->ii_startblk = bn;
379 ii->ii_startoff = 0;
380 ii->ii_index[0] = ix;
381 bn += cs->sc_cinfo[ix].ci_size;
382 ii++;
383 }
384 ii->ii_ndisk = 0;
385 return;
386 }
387
388 /*
389 * The following isn't fast or pretty; it doesn't have to be.
390 */
391 size = 0;
392 bn = lbn = 0;
393 for (ii = cs->sc_itable; ; ii++) {
394 /*
395 * Allocate space for ii_index. We might allocate more then
396 * we use.
397 */
398 ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
399 M_WAITOK);
400
401 /*
402 * Locate the smallest of the remaining components
403 */
404 smallci = NULL;
405 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks];
406 ci++) {
407 if (ci->ci_size > size &&
408 (smallci == NULL ||
409 ci->ci_size < smallci->ci_size)) {
410 smallci = ci;
411 }
412 }
413
414 /*
415 * Nobody left, all done
416 */
417 if (smallci == NULL) {
418 ii->ii_ndisk = 0;
419 g_free(ii->ii_index);
420 ii->ii_index = NULL;
421 break;
422 }
423
424 /*
425 * Record starting logical block using an sc_ileave blocksize.
426 */
427 ii->ii_startblk = bn / cs->sc_ileave;
428
429 /*
430 * Record starting component block using an sc_ileave
431 * blocksize. This value is relative to the beginning of
432 * a component disk.
433 */
434 ii->ii_startoff = lbn;
435
436 /*
437 * Determine how many disks take part in this interleave
438 * and record their indices.
439 */
440 ix = 0;
441 for (ci = cs->sc_cinfo;
442 ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
443 if (ci->ci_size >= smallci->ci_size) {
444 ii->ii_index[ix++] = ci - cs->sc_cinfo;
445 }
446 }
447 ii->ii_ndisk = ix;
448 bn += ix * (smallci->ci_size - size);
449 lbn = smallci->ci_size / cs->sc_ileave;
450 size = smallci->ci_size;
451 }
452 }
453
454 static void
g_ccd_start(struct bio * bp)455 g_ccd_start(struct bio *bp)
456 {
457 long bcount, rcount;
458 struct bio *cbp[2];
459 caddr_t addr;
460 daddr_t bn;
461 int err;
462 struct ccd_s *cs;
463
464 cs = bp->bio_to->geom->softc;
465
466 /*
467 * Block all GETATTR requests, we wouldn't know which of our
468 * subdevices we should ship it off to.
469 * XXX: this may not be the right policy.
470 */
471 if(bp->bio_cmd == BIO_GETATTR) {
472 g_io_deliver(bp, EINVAL);
473 return;
474 }
475
476 /*
477 * Translate the partition-relative block number to an absolute.
478 */
479 bn = bp->bio_offset / cs->sc_secsize;
480
481 /*
482 * Allocate component buffers and fire off the requests
483 */
484 addr = bp->bio_data;
485 for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
486 err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
487 if (err) {
488 bp->bio_completed += bcount;
489 if (bp->bio_error == 0)
490 bp->bio_error = err;
491 if (bp->bio_completed == bp->bio_length)
492 g_io_deliver(bp, bp->bio_error);
493 return;
494 }
495 rcount = cbp[0]->bio_length;
496
497 if (cs->sc_flags & CCDF_MIRROR) {
498 /*
499 * Mirroring. Writes go to both disks, reads are
500 * taken from whichever disk seems most appropriate.
501 *
502 * We attempt to localize reads to the disk whos arm
503 * is nearest the read request. We ignore seeks due
504 * to writes when making this determination and we
505 * also try to avoid hogging.
506 */
507 if (cbp[0]->bio_cmd != BIO_READ) {
508 g_io_request(cbp[0], cbp[0]->bio_from);
509 g_io_request(cbp[1], cbp[1]->bio_from);
510 } else {
511 int pick = cs->sc_pick;
512 daddr_t range = cs->sc_size / 16;
513
514 if (bn < cs->sc_blk[pick] - range ||
515 bn > cs->sc_blk[pick] + range
516 ) {
517 cs->sc_pick = pick = 1 - pick;
518 }
519 cs->sc_blk[pick] = bn + btodb(rcount);
520 g_io_request(cbp[pick], cbp[pick]->bio_from);
521 }
522 } else {
523 /*
524 * Not mirroring
525 */
526 g_io_request(cbp[0], cbp[0]->bio_from);
527 }
528 bn += btodb(rcount);
529 addr += rcount;
530 }
531 }
532
533 /*
534 * Build a component buffer header.
535 */
536 static int
ccdbuffer(struct bio ** cb,struct ccd_s * cs,struct bio * bp,daddr_t bn,caddr_t addr,long bcount)537 ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
538 {
539 struct ccdcinfo *ci, *ci2 = NULL;
540 struct bio *cbp;
541 daddr_t cbn, cboff;
542 off_t cbc;
543
544 /*
545 * Determine which component bn falls in.
546 */
547 cbn = bn;
548 cboff = 0;
549
550 if (cs->sc_ileave == 0) {
551 /*
552 * Serially concatenated and neither a mirror nor a parity
553 * config. This is a special case.
554 */
555 daddr_t sblk;
556
557 sblk = 0;
558 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
559 sblk += ci->ci_size;
560 cbn -= sblk;
561 } else {
562 struct ccdiinfo *ii;
563 int ccdisk, off;
564
565 /*
566 * Calculate cbn, the logical superblock (sc_ileave chunks),
567 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
568 * to cbn.
569 */
570 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
571 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
572
573 /*
574 * Figure out which interleave table to use.
575 */
576 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
577 if (ii->ii_startblk > cbn)
578 break;
579 }
580 ii--;
581
582 /*
583 * off is the logical superblock relative to the beginning
584 * of this interleave block.
585 */
586 off = cbn - ii->ii_startblk;
587
588 /*
589 * We must calculate which disk component to use (ccdisk),
590 * and recalculate cbn to be the superblock relative to
591 * the beginning of the component. This is typically done by
592 * adding 'off' and ii->ii_startoff together. However, 'off'
593 * must typically be divided by the number of components in
594 * this interleave array to be properly convert it from a
595 * CCD-relative logical superblock number to a
596 * component-relative superblock number.
597 */
598 if (ii->ii_ndisk == 1) {
599 /*
600 * When we have just one disk, it can't be a mirror
601 * or a parity config.
602 */
603 ccdisk = ii->ii_index[0];
604 cbn = ii->ii_startoff + off;
605 } else {
606 if (cs->sc_flags & CCDF_MIRROR) {
607 /*
608 * We have forced a uniform mapping, resulting
609 * in a single interleave array. We double
610 * up on the first half of the available
611 * components and our mirror is in the second
612 * half. This only works with a single
613 * interleave array because doubling up
614 * doubles the number of sectors, so there
615 * cannot be another interleave array because
616 * the next interleave array's calculations
617 * would be off.
618 */
619 int ndisk2 = ii->ii_ndisk / 2;
620 ccdisk = ii->ii_index[off % ndisk2];
621 cbn = ii->ii_startoff + off / ndisk2;
622 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
623 } else {
624 ccdisk = ii->ii_index[off % ii->ii_ndisk];
625 cbn = ii->ii_startoff + off / ii->ii_ndisk;
626 }
627 }
628
629 ci = &cs->sc_cinfo[ccdisk];
630
631 /*
632 * Convert cbn from a superblock to a normal block so it
633 * can be used to calculate (along with cboff) the normal
634 * block index into this particular disk.
635 */
636 cbn *= cs->sc_ileave;
637 }
638
639 /*
640 * Fill in the component buf structure.
641 */
642 cbp = g_clone_bio(bp);
643 if (cbp == NULL)
644 return (ENOMEM);
645 cbp->bio_done = g_std_done;
646 cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
647 cbp->bio_data = addr;
648 if (cs->sc_ileave == 0)
649 cbc = dbtob((off_t)(ci->ci_size - cbn));
650 else
651 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
652 cbp->bio_length = (cbc < bcount) ? cbc : bcount;
653
654 cbp->bio_from = ci->ci_consumer;
655 cb[0] = cbp;
656
657 if (cs->sc_flags & CCDF_MIRROR) {
658 cbp = g_clone_bio(bp);
659 if (cbp == NULL)
660 return (ENOMEM);
661 cbp->bio_done = cb[0]->bio_done = ccdiodone;
662 cbp->bio_offset = cb[0]->bio_offset;
663 cbp->bio_data = cb[0]->bio_data;
664 cbp->bio_length = cb[0]->bio_length;
665 cbp->bio_from = ci2->ci_consumer;
666 cbp->bio_caller1 = cb[0];
667 cb[0]->bio_caller1 = cbp;
668 cb[1] = cbp;
669 }
670 return (0);
671 }
672
673 /*
674 * Called only for mirrored operations.
675 */
676 static void
ccdiodone(struct bio * cbp)677 ccdiodone(struct bio *cbp)
678 {
679 struct bio *mbp, *pbp;
680
681 mbp = cbp->bio_caller1;
682 pbp = cbp->bio_parent;
683
684 if (pbp->bio_cmd == BIO_READ) {
685 if (cbp->bio_error == 0) {
686 /* We will not be needing the partner bio */
687 if (mbp != NULL) {
688 pbp->bio_inbed++;
689 g_destroy_bio(mbp);
690 }
691 g_std_done(cbp);
692 return;
693 }
694 if (mbp != NULL) {
695 /* Try partner the bio instead */
696 mbp->bio_caller1 = NULL;
697 pbp->bio_inbed++;
698 g_destroy_bio(cbp);
699 g_io_request(mbp, mbp->bio_from);
700 /*
701 * XXX: If this comes back OK, we should actually
702 * try to write the good data on the failed mirror
703 */
704 return;
705 }
706 g_std_done(cbp);
707 return;
708 }
709 if (mbp != NULL) {
710 mbp->bio_caller1 = NULL;
711 pbp->bio_inbed++;
712 if (cbp->bio_error != 0 && pbp->bio_error == 0)
713 pbp->bio_error = cbp->bio_error;
714 g_destroy_bio(cbp);
715 return;
716 }
717 g_std_done(cbp);
718 }
719
720 static void
g_ccd_create(struct gctl_req * req,struct g_class * mp)721 g_ccd_create(struct gctl_req *req, struct g_class *mp)
722 {
723 int *unit, *ileave, *nprovider;
724 struct g_geom *gp;
725 struct g_consumer *cp;
726 struct g_provider *pp;
727 struct ccd_s *sc;
728 struct sbuf *sb;
729 char buf[20];
730 int i, error;
731
732 g_topology_assert();
733 unit = gctl_get_paraml(req, "unit", sizeof (*unit));
734 if (unit == NULL) {
735 gctl_error(req, "unit parameter not given");
736 return;
737 }
738 ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
739 if (ileave == NULL) {
740 gctl_error(req, "ileave parameter not given");
741 return;
742 }
743 nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
744 if (nprovider == NULL) {
745 gctl_error(req, "nprovider parameter not given");
746 return;
747 }
748
749 /* Check for duplicate unit */
750 LIST_FOREACH(gp, &mp->geom, geom) {
751 sc = gp->softc;
752 if (sc != NULL && sc->sc_unit == *unit) {
753 gctl_error(req, "Unit %d already configured", *unit);
754 return;
755 }
756 }
757
758 if (*nprovider <= 0) {
759 gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
760 return;
761 }
762
763 /* Check all providers are valid */
764 for (i = 0; i < *nprovider; i++) {
765 snprintf(buf, sizeof(buf), "provider%d", i);
766 pp = gctl_get_provider(req, buf);
767 if (pp == NULL)
768 return;
769 }
770
771 gp = g_new_geomf(mp, "ccd%d", *unit);
772 sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
773 gp->softc = sc;
774 sc->sc_ndisks = *nprovider;
775
776 /* Allocate space for the component info. */
777 sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
778 M_WAITOK | M_ZERO);
779
780 /* Create consumers and attach to all providers */
781 for (i = 0; i < *nprovider; i++) {
782 snprintf(buf, sizeof(buf), "provider%d", i);
783 pp = gctl_get_provider(req, buf);
784 cp = g_new_consumer(gp);
785 error = g_attach(cp, pp);
786 KASSERT(error == 0, ("attach to %s failed", pp->name));
787 sc->sc_cinfo[i].ci_consumer = cp;
788 sc->sc_cinfo[i].ci_provider = pp;
789 }
790
791 sc->sc_unit = *unit;
792 sc->sc_ileave = *ileave;
793
794 if (gctl_get_param(req, "no_offset", NULL))
795 sc->sc_flags |= CCDF_NO_OFFSET;
796 if (gctl_get_param(req, "linux", NULL))
797 sc->sc_flags |= CCDF_LINUX;
798
799 if (gctl_get_param(req, "uniform", NULL))
800 sc->sc_flags |= CCDF_UNIFORM;
801 if (gctl_get_param(req, "mirror", NULL))
802 sc->sc_flags |= CCDF_MIRROR;
803
804 if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
805 printf("%s: disabling mirror, interleave is 0\n", gp->name);
806 sc->sc_flags &= ~(CCDF_MIRROR);
807 }
808
809 if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
810 printf("%s: mirror/parity forces uniform flag\n", gp->name);
811 sc->sc_flags |= CCDF_UNIFORM;
812 }
813
814 error = ccdinit(req, sc);
815 if (error != 0) {
816 g_ccd_freesc(sc);
817 gp->softc = NULL;
818 g_wither_geom(gp, ENXIO);
819 return;
820 }
821
822 pp = g_new_providerf(gp, "%s", gp->name);
823 pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
824 pp->sectorsize = sc->sc_secsize;
825 g_error_provider(pp, 0);
826
827 sb = sbuf_new_auto();
828 sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
829 for (i = 0; i < *nprovider; i++) {
830 sbuf_printf(sb, "%s%s",
831 i == 0 ? "(" : ", ",
832 sc->sc_cinfo[i].ci_provider->name);
833 }
834 sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
835 if (sc->sc_ileave != 0)
836 sbuf_printf(sb, "interleaved at %d blocks\n",
837 sc->sc_ileave);
838 else
839 sbuf_printf(sb, "concatenated\n");
840 sbuf_finish(sb);
841 gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
842 sbuf_delete(sb);
843 }
844
845 static int
g_ccd_destroy_geom(struct gctl_req * req,struct g_class * mp,struct g_geom * gp)846 g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
847 {
848 struct g_provider *pp;
849 struct ccd_s *sc;
850
851 g_topology_assert();
852 sc = gp->softc;
853 pp = LIST_FIRST(&gp->provider);
854 if (sc == NULL || pp == NULL)
855 return (EBUSY);
856 if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
857 gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
858 pp->acr, pp->acw, pp->ace);
859 return (EBUSY);
860 }
861 g_ccd_freesc(sc);
862 gp->softc = NULL;
863 g_wither_geom(gp, ENXIO);
864 return (0);
865 }
866
867 static void
g_ccd_list(struct gctl_req * req,struct g_class * mp)868 g_ccd_list(struct gctl_req *req, struct g_class *mp)
869 {
870 struct sbuf *sb;
871 struct ccd_s *cs;
872 struct g_geom *gp;
873 int i, unit, *up;
874
875 up = gctl_get_paraml(req, "unit", sizeof (*up));
876 if (up == NULL) {
877 gctl_error(req, "unit parameter not given");
878 return;
879 }
880 unit = *up;
881 sb = sbuf_new_auto();
882 LIST_FOREACH(gp, &mp->geom, geom) {
883 cs = gp->softc;
884 if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
885 continue;
886 sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
887 cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
888
889 for (i = 0; i < cs->sc_ndisks; ++i) {
890 sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
891 cs->sc_cinfo[i].ci_provider->name);
892 }
893 sbuf_printf(sb, "\n");
894 }
895 sbuf_finish(sb);
896 gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
897 sbuf_delete(sb);
898 }
899
900 static void
g_ccd_config(struct gctl_req * req,struct g_class * mp,char const * verb)901 g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
902 {
903 struct g_geom *gp;
904
905 g_topology_assert();
906 if (!strcmp(verb, "create geom")) {
907 g_ccd_create(req, mp);
908 } else if (!strcmp(verb, "destroy geom")) {
909 gp = gctl_get_geom(req, mp, "geom");
910 if (gp != NULL)
911 g_ccd_destroy_geom(req, mp, gp);
912 } else if (!strcmp(verb, "list")) {
913 g_ccd_list(req, mp);
914 } else {
915 gctl_error(req, "unknown verb");
916 }
917 }
918
919 static struct g_class g_ccd_class = {
920 .name = "CCD",
921 .version = G_VERSION,
922 .ctlreq = g_ccd_config,
923 .destroy_geom = g_ccd_destroy_geom,
924 .start = g_ccd_start,
925 .orphan = g_ccd_orphan,
926 .access = g_ccd_access,
927 };
928
929 DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
930 MODULE_VERSION(geom_ccd, 0);
931