1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/param.h>
29 #include <sys/bio.h>
30 #include <sys/buf.h>
31 #include <sys/ctype.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/reboot.h>
37 #include <sys/rwlock.h>
38 #include <sys/sbuf.h>
39 #include <sys/sysctl.h>
40
41 #include <geom/geom.h>
42 #include <geom/geom_dbg.h>
43 #include <geom/union/g_union.h>
44
45 SYSCTL_DECL(_kern_geom);
46 static SYSCTL_NODE(_kern_geom, OID_AUTO, union, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
47 "GEOM_UNION stuff");
48 static u_int g_union_debug = 0;
49 SYSCTL_UINT(_kern_geom_union, OID_AUTO, debug, CTLFLAG_RW, &g_union_debug, 0,
50 "Debug level");
51
52 static void g_union_config(struct gctl_req *req, struct g_class *mp,
53 const char *verb);
54 static g_access_t g_union_access;
55 static g_start_t g_union_start;
56 static g_dumpconf_t g_union_dumpconf;
57 static g_orphan_t g_union_orphan;
58 static int g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
59 struct g_geom *gp);
60 static g_provgone_t g_union_providergone;
61 static g_resize_t g_union_resize;
62
63 struct g_class g_union_class = {
64 .name = G_UNION_CLASS_NAME,
65 .version = G_VERSION,
66 .ctlreq = g_union_config,
67 .access = g_union_access,
68 .start = g_union_start,
69 .dumpconf = g_union_dumpconf,
70 .orphan = g_union_orphan,
71 .destroy_geom = g_union_destroy_geom,
72 .providergone = g_union_providergone,
73 .resize = g_union_resize,
74 };
75
76 static void g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool);
77 static intmax_t g_union_fetcharg(struct gctl_req *req, const char *name);
78 static bool g_union_verify_nprefix(const char *name);
79 static void g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool);
80 static struct g_geom *g_union_find_geom(struct g_class *mp, const char *name);
81 static void g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool);
82 static void g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool);
83 static void g_union_revert(struct g_union_softc *sc);
84 static void g_union_doio(struct g_union_wip *wip);
85 static void g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool);
86 static void g_union_setmap(struct bio *bp, struct g_union_softc *sc);
87 static bool g_union_getmap(struct bio *bp, struct g_union_softc *sc,
88 off_t *len2read);
89 static void g_union_done(struct bio *bp);
90 static void g_union_kerneldump(struct bio *bp, struct g_union_softc *sc);
91 static int g_union_dumper(void *, void *, off_t, size_t);
92 static int g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force);
93
94 /*
95 * Operate on union-specific configuration commands.
96 */
97 static void
g_union_config(struct gctl_req * req,struct g_class * mp,const char * verb)98 g_union_config(struct gctl_req *req, struct g_class *mp, const char *verb)
99 {
100 uint32_t *version, *verbose;
101
102 g_topology_assert();
103
104 version = gctl_get_paraml(req, "version", sizeof(*version));
105 if (version == NULL) {
106 gctl_error(req, "No '%s' argument.", "version");
107 return;
108 }
109 if (*version != G_UNION_VERSION) {
110 gctl_error(req, "Userland and kernel parts are out of sync.");
111 return;
112 }
113 verbose = gctl_get_paraml(req, "verbose", sizeof(*verbose));
114 if (verbose == NULL) {
115 gctl_error(req, "No '%s' argument.", "verbose");
116 return;
117 }
118 if (strcmp(verb, "create") == 0) {
119 g_union_ctl_create(req, mp, *verbose);
120 return;
121 } else if (strcmp(verb, "destroy") == 0) {
122 g_union_ctl_destroy(req, mp, *verbose);
123 return;
124 } else if (strcmp(verb, "reset") == 0) {
125 g_union_ctl_reset(req, mp, *verbose);
126 return;
127 } else if (strcmp(verb, "revert") == 0) {
128 g_union_ctl_revert(req, mp, *verbose);
129 return;
130 } else if (strcmp(verb, "commit") == 0) {
131 g_union_ctl_commit(req, mp, *verbose);
132 return;
133 }
134
135 gctl_error(req, "Unknown verb.");
136 }
137
138 /*
139 * Create a union device.
140 */
141 static void
g_union_ctl_create(struct gctl_req * req,struct g_class * mp,bool verbose)142 g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose)
143 {
144 struct g_provider *upperpp, *lowerpp, *newpp;
145 struct g_consumer *uppercp, *lowercp;
146 struct g_union_softc *sc;
147 struct g_geom_alias *gap;
148 struct g_geom *gp;
149 intmax_t offset, secsize, size, needed;
150 const char *gunionname;
151 int *nargs, error, i, n;
152 char name[64];
153
154 g_topology_assert();
155
156 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
157 if (nargs == NULL) {
158 gctl_error(req, "No '%s' argument.", "nargs");
159 return;
160 }
161 if (*nargs < 2) {
162 gctl_error(req, "Missing device(s).");
163 return;
164 }
165 if (*nargs > 2) {
166 gctl_error(req, "Extra device(s).");
167 return;
168 }
169
170 offset = g_union_fetcharg(req, "offset");
171 size = g_union_fetcharg(req, "size");
172 secsize = g_union_fetcharg(req, "secsize");
173 gunionname = gctl_get_asciiparam(req, "gunionname");
174
175 upperpp = gctl_get_provider(req, "arg0");
176 lowerpp = gctl_get_provider(req, "arg1");
177 if (upperpp == NULL || lowerpp == NULL)
178 /* error message provided by gctl_get_provider() */
179 return;
180 /* Create the union */
181 if (secsize == 0)
182 secsize = lowerpp->sectorsize;
183 else if ((secsize % lowerpp->sectorsize) != 0) {
184 gctl_error(req, "Sector size %jd is not a multiple of lower "
185 "provider %s's %jd sector size.", (intmax_t)secsize,
186 lowerpp->name, (intmax_t)lowerpp->sectorsize);
187 return;
188 }
189 if (secsize > maxphys) {
190 gctl_error(req, "Too big secsize %jd for lower provider %s.",
191 (intmax_t)secsize, lowerpp->name);
192 return;
193 }
194 if (secsize % upperpp->sectorsize != 0) {
195 gctl_error(req, "Sector size %jd is not a multiple of upper "
196 "provider %s's %jd sector size.", (intmax_t)secsize,
197 upperpp->name, (intmax_t)upperpp->sectorsize);
198 return;
199 }
200 if ((offset % secsize) != 0) {
201 gctl_error(req, "Offset %jd is not a multiple of lower "
202 "provider %s's %jd sector size.", (intmax_t)offset,
203 lowerpp->name, (intmax_t)lowerpp->sectorsize);
204 return;
205 }
206 if (size == 0)
207 size = lowerpp->mediasize - offset;
208 else
209 size -= offset;
210 if ((size % secsize) != 0) {
211 gctl_error(req, "Size %jd is not a multiple of sector size "
212 "%jd.", (intmax_t)size, (intmax_t)secsize);
213 return;
214 }
215 if (offset + size < lowerpp->mediasize) {
216 gctl_error(req, "Size %jd is too small for lower provider %s, "
217 "needs %jd.", (intmax_t)(offset + size), lowerpp->name,
218 lowerpp->mediasize);
219 return;
220 }
221 if (size > upperpp->mediasize) {
222 gctl_error(req, "Upper provider %s size (%jd) is too small, "
223 "needs %jd.", upperpp->name, (intmax_t)upperpp->mediasize,
224 (intmax_t)size);
225 return;
226 }
227 if (gunionname != NULL && !g_union_verify_nprefix(gunionname)) {
228 gctl_error(req, "Gunion name %s must be alphanumeric.",
229 gunionname);
230 return;
231 }
232 if (gunionname != NULL) {
233 n = snprintf(name, sizeof(name), "%s%s", gunionname,
234 G_UNION_SUFFIX);
235 } else {
236 n = snprintf(name, sizeof(name), "%s-%s%s", upperpp->name,
237 lowerpp->name, G_UNION_SUFFIX);
238 }
239 if (n <= 0 || n >= sizeof(name)) {
240 gctl_error(req, "Invalid provider name.");
241 return;
242 }
243 LIST_FOREACH(gp, &mp->geom, geom) {
244 if (strcmp(gp->name, name) == 0) {
245 gctl_error(req, "Provider %s already exists.", name);
246 return;
247 }
248 }
249 gp = g_new_geom(mp, name);
250 sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
251 rw_init(&sc->sc_rwlock, "gunion");
252 TAILQ_INIT(&sc->sc_wiplist);
253 sc->sc_offset = offset;
254 sc->sc_size = size;
255 sc->sc_sectorsize = secsize;
256 sc->sc_reads = 0;
257 sc->sc_writes = 0;
258 sc->sc_deletes = 0;
259 sc->sc_getattrs = 0;
260 sc->sc_flushes = 0;
261 sc->sc_speedups = 0;
262 sc->sc_cmd0s = 0;
263 sc->sc_cmd1s = 0;
264 sc->sc_cmd2s = 0;
265 sc->sc_readbytes = 0;
266 sc->sc_wrotebytes = 0;
267 sc->sc_writemap_memory = 0;
268 gp->softc = sc;
269
270 newpp = g_new_providerf(gp, "%s", gp->name);
271 newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
272 newpp->mediasize = size;
273 newpp->sectorsize = secsize;
274 LIST_FOREACH(gap, &upperpp->aliases, ga_next)
275 g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
276 G_UNION_SUFFIX);
277 LIST_FOREACH(gap, &lowerpp->aliases, ga_next)
278 g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
279 G_UNION_SUFFIX);
280 lowercp = g_new_consumer(gp);
281 lowercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
282 if ((error = g_attach(lowercp, lowerpp)) != 0) {
283 gctl_error(req, "Error %d: cannot attach to provider %s.",
284 error, lowerpp->name);
285 goto fail1;
286 }
287 /* request read and exclusive access for lower */
288 if ((error = g_access(lowercp, 1, 0, 1)) != 0) {
289 gctl_error(req, "Error %d: cannot obtain exclusive access to "
290 "%s.\n\tMust be unmounted or mounted read-only.", error,
291 lowerpp->name);
292 goto fail2;
293 }
294 uppercp = g_new_consumer(gp);
295 uppercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
296 if ((error = g_attach(uppercp, upperpp)) != 0) {
297 gctl_error(req, "Error %d: cannot attach to provider %s.",
298 error, upperpp->name);
299 goto fail3;
300 }
301 /* request read, write, and exclusive access for upper */
302 if ((error = g_access(uppercp, 1, 1, 1)) != 0) {
303 gctl_error(req, "Error %d: cannot obtain write access to %s.",
304 error, upperpp->name);
305 goto fail4;
306 }
307 sc->sc_uppercp = uppercp;
308 sc->sc_lowercp = lowercp;
309
310 newpp->flags |= (upperpp->flags & G_PF_ACCEPT_UNMAPPED) &
311 (lowerpp->flags & G_PF_ACCEPT_UNMAPPED);
312 g_error_provider(newpp, 0);
313 /*
314 * Allocate the map that tracks the sectors that have been written
315 * to the top layer. We use a 2-level hierarchy as that lets us
316 * map up to 1 petabyte using allocations of less than 33 Mb
317 * when using 4K byte sectors (or 268 Mb with 512 byte sectors).
318 *
319 * We totally populate the leaf nodes rather than allocating them
320 * as they are first used because their usage occurs in the
321 * g_union_start() routine that may be running in the g_down
322 * thread which cannot sleep.
323 */
324 sc->sc_map_size = roundup(size / secsize, BITS_PER_ENTRY);
325 needed = sc->sc_map_size / BITS_PER_ENTRY;
326 for (sc->sc_root_size = 1;
327 sc->sc_root_size * sc->sc_root_size < needed;
328 sc->sc_root_size++)
329 continue;
330 sc->sc_writemap_root = g_malloc(sc->sc_root_size * sizeof(uint64_t *),
331 M_WAITOK | M_ZERO);
332 sc->sc_leaf_size = sc->sc_root_size;
333 sc->sc_bits_per_leaf = sc->sc_leaf_size * BITS_PER_ENTRY;
334 sc->sc_leafused = g_malloc(roundup(sc->sc_root_size, BITS_PER_ENTRY),
335 M_WAITOK | M_ZERO);
336 for (i = 0; i < sc->sc_root_size; i++)
337 sc->sc_writemap_root[i] =
338 g_malloc(sc->sc_leaf_size * sizeof(uint64_t),
339 M_WAITOK | M_ZERO);
340 sc->sc_writemap_memory =
341 (sc->sc_root_size + sc->sc_root_size * sc->sc_leaf_size) *
342 sizeof(uint64_t) + roundup(sc->sc_root_size, BITS_PER_ENTRY);
343 if (verbose)
344 gctl_msg(req, 0, "Device %s created with memory map size %jd.",
345 gp->name, (intmax_t)sc->sc_writemap_memory);
346 gctl_post_messages(req);
347 G_UNION_DEBUG(1, "Device %s created with memory map size %jd.",
348 gp->name, (intmax_t)sc->sc_writemap_memory);
349 return;
350
351 fail4:
352 g_detach(uppercp);
353 fail3:
354 g_destroy_consumer(uppercp);
355 g_access(lowercp, -1, 0, -1);
356 fail2:
357 g_detach(lowercp);
358 fail1:
359 g_destroy_consumer(lowercp);
360 g_destroy_provider(newpp);
361 g_free(sc);
362 g_destroy_geom(gp);
363 }
364
365 /*
366 * Fetch named option and verify that it is positive.
367 */
368 static intmax_t
g_union_fetcharg(struct gctl_req * req,const char * name)369 g_union_fetcharg(struct gctl_req *req, const char *name)
370 {
371 intmax_t *val;
372
373 val = gctl_get_paraml_opt(req, name, sizeof(*val));
374 if (val == NULL)
375 return (0);
376 if (*val >= 0)
377 return (*val);
378 gctl_msg(req, EINVAL, "Invalid '%s' (%jd): negative value, "
379 "using default.", name, *val);
380 return (0);
381 }
382
383 /*
384 * Verify that a name is alphanumeric.
385 */
386 static bool
g_union_verify_nprefix(const char * name)387 g_union_verify_nprefix(const char *name)
388 {
389 int i;
390
391 for (i = 0; i < strlen(name); i++) {
392 if (isalpha(name[i]) == 0 && isdigit(name[i]) == 0) {
393 return (false);
394 }
395 }
396 return (true);
397 }
398
399 /*
400 * Destroy a union device.
401 */
402 static void
g_union_ctl_destroy(struct gctl_req * req,struct g_class * mp,bool verbose)403 g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool verbose)
404 {
405 int *nargs, *force, error, i;
406 struct g_geom *gp;
407 const char *name;
408 char param[16];
409
410 g_topology_assert();
411
412 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
413 if (nargs == NULL) {
414 gctl_error(req, "No '%s' argument.", "nargs");
415 return;
416 }
417 if (*nargs <= 0) {
418 gctl_error(req, "Missing device(s).");
419 return;
420 }
421 force = gctl_get_paraml(req, "force", sizeof(*force));
422 if (force == NULL) {
423 gctl_error(req, "No 'force' argument.");
424 return;
425 }
426
427 for (i = 0; i < *nargs; i++) {
428 snprintf(param, sizeof(param), "arg%d", i);
429 name = gctl_get_asciiparam(req, param);
430 if (name == NULL) {
431 gctl_msg(req, EINVAL, "No '%s' argument.", param);
432 continue;
433 }
434 if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
435 name += strlen(_PATH_DEV);
436 gp = g_union_find_geom(mp, name);
437 if (gp == NULL) {
438 gctl_msg(req, EINVAL, "Device %s is invalid.", name);
439 continue;
440 }
441 error = g_union_destroy(verbose ? req : NULL, gp, *force);
442 if (error != 0)
443 gctl_msg(req, error, "Error %d: "
444 "cannot destroy device %s.", error, gp->name);
445 }
446 gctl_post_messages(req);
447 }
448
449 /*
450 * Find a union geom.
451 */
452 static struct g_geom *
g_union_find_geom(struct g_class * mp,const char * name)453 g_union_find_geom(struct g_class *mp, const char *name)
454 {
455 struct g_geom *gp;
456
457 LIST_FOREACH(gp, &mp->geom, geom) {
458 if (strcmp(gp->name, name) == 0)
459 return (gp);
460 }
461 return (NULL);
462 }
463
464 /*
465 * Zero out all the statistics associated with a union device.
466 */
467 static void
g_union_ctl_reset(struct gctl_req * req,struct g_class * mp,bool verbose)468 g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool verbose)
469 {
470 struct g_union_softc *sc;
471 struct g_provider *pp;
472 struct g_geom *gp;
473 char param[16];
474 int i, *nargs;
475
476 g_topology_assert();
477
478 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
479 if (nargs == NULL) {
480 gctl_error(req, "No '%s' argument.", "nargs");
481 return;
482 }
483 if (*nargs <= 0) {
484 gctl_error(req, "Missing device(s).");
485 return;
486 }
487
488 for (i = 0; i < *nargs; i++) {
489 snprintf(param, sizeof(param), "arg%d", i);
490 pp = gctl_get_provider(req, param);
491 if (pp == NULL) {
492 gctl_msg(req, EINVAL, "No '%s' argument.", param);
493 continue;
494 }
495 gp = pp->geom;
496 if (gp->class != mp) {
497 gctl_msg(req, EINVAL, "Provider %s is invalid.",
498 pp->name);
499 continue;
500 }
501 sc = gp->softc;
502 sc->sc_reads = 0;
503 sc->sc_writes = 0;
504 sc->sc_deletes = 0;
505 sc->sc_getattrs = 0;
506 sc->sc_flushes = 0;
507 sc->sc_speedups = 0;
508 sc->sc_cmd0s = 0;
509 sc->sc_cmd1s = 0;
510 sc->sc_cmd2s = 0;
511 sc->sc_readbytes = 0;
512 sc->sc_wrotebytes = 0;
513 if (verbose)
514 gctl_msg(req, 0, "Device %s has been reset.", pp->name);
515 G_UNION_DEBUG(1, "Device %s has been reset.", pp->name);
516 }
517 gctl_post_messages(req);
518 }
519
520 /*
521 * Revert all write requests made to the top layer of the union.
522 */
523 static void
g_union_ctl_revert(struct gctl_req * req,struct g_class * mp,bool verbose)524 g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool verbose)
525 {
526 struct g_union_softc *sc;
527 struct g_provider *pp;
528 struct g_geom *gp;
529 char param[16];
530 int i, *nargs;
531
532 g_topology_assert();
533
534 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
535 if (nargs == NULL) {
536 gctl_error(req, "No '%s' argument.", "nargs");
537 return;
538 }
539 if (*nargs <= 0) {
540 gctl_error(req, "Missing device(s).");
541 return;
542 }
543
544 for (i = 0; i < *nargs; i++) {
545 snprintf(param, sizeof(param), "arg%d", i);
546 pp = gctl_get_provider(req, param);
547 if (pp == NULL) {
548 gctl_msg(req, EINVAL, "No '%s' argument.", param);
549 continue;
550 }
551 gp = pp->geom;
552 if (gp->class != mp) {
553 gctl_msg(req, EINVAL, "Provider %s is invalid.",
554 pp->name);
555 continue;
556 }
557 sc = gp->softc;
558 if (g_union_get_writelock(sc) != 0) {
559 gctl_msg(req, EINVAL, "Revert already in progress for "
560 "provider %s.", pp->name);
561 continue;
562 }
563 /*
564 * No mount or other use of union is allowed.
565 */
566 if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) {
567 gctl_msg(req, EPERM, "Unable to get exclusive access "
568 "for reverting of %s;\n\t%s cannot be mounted or "
569 "otherwise open during a revert.",
570 pp->name, pp->name);
571 g_union_rel_writelock(sc);
572 continue;
573 }
574 g_union_revert(sc);
575 g_union_rel_writelock(sc);
576 if (verbose)
577 gctl_msg(req, 0, "Device %s has been reverted.",
578 pp->name);
579 G_UNION_DEBUG(1, "Device %s has been reverted.", pp->name);
580 }
581 gctl_post_messages(req);
582 }
583
584 /*
585 * Revert union writes by zero'ing out the writemap.
586 */
587 static void
g_union_revert(struct g_union_softc * sc)588 g_union_revert(struct g_union_softc *sc)
589 {
590 int i;
591
592 G_WLOCK(sc);
593 for (i = 0; i < sc->sc_root_size; i++)
594 memset(sc->sc_writemap_root[i], 0,
595 sc->sc_leaf_size * sizeof(uint64_t));
596 memset(sc->sc_leafused, 0, roundup(sc->sc_root_size, BITS_PER_ENTRY));
597 G_WUNLOCK(sc);
598 }
599
600 /*
601 * Commit all the writes made in the top layer to the lower layer.
602 */
603 static void
g_union_ctl_commit(struct gctl_req * req,struct g_class * mp,bool verbose)604 g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool verbose)
605 {
606 struct g_union_softc *sc;
607 struct g_provider *pp, *lowerpp;
608 struct g_consumer *lowercp;
609 struct g_geom *gp;
610 struct bio *bp;
611 char param[16];
612 off_t len2rd, len2wt, savelen;
613 int i, error, error1, *nargs, *force, *reboot;
614
615 g_topology_assert();
616
617 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
618 if (nargs == NULL) {
619 gctl_error(req, "No '%s' argument.", "nargs");
620 return;
621 }
622 if (*nargs <= 0) {
623 gctl_error(req, "Missing device(s).");
624 return;
625 }
626 force = gctl_get_paraml(req, "force", sizeof(*force));
627 if (force == NULL) {
628 gctl_error(req, "No 'force' argument.");
629 return;
630 }
631 reboot = gctl_get_paraml(req, "reboot", sizeof(*reboot));
632 if (reboot == NULL) {
633 gctl_error(req, "No 'reboot' argument.");
634 return;
635 }
636
637 /* Get a bio buffer to do our I/O */
638 bp = g_alloc_bio();
639 bp->bio_data = g_malloc(MAXBSIZE, M_WAITOK);
640 bp->bio_done = biodone;
641 for (i = 0; i < *nargs; i++) {
642 snprintf(param, sizeof(param), "arg%d", i);
643 pp = gctl_get_provider(req, param);
644 if (pp == NULL) {
645 gctl_msg(req, EINVAL, "No '%s' argument.", param);
646 continue;
647 }
648 gp = pp->geom;
649 if (gp->class != mp) {
650 gctl_msg(req, EINVAL, "Provider %s is invalid.",
651 pp->name);
652 continue;
653 }
654 sc = gp->softc;
655 if (g_union_get_writelock(sc) != 0) {
656 gctl_msg(req, EINVAL, "Commit already in progress for "
657 "provider %s.", pp->name);
658 continue;
659 }
660
661 /* upgrade to write access for lower */
662 lowercp = sc->sc_lowercp;
663 lowerpp = lowercp->provider;
664 /*
665 * No mount or other use of union is allowed, unless the
666 * -f flag is given which allows read-only mount or usage.
667 */
668 if ((*force == false && pp->acr > 0) || pp->acw > 0 ||
669 pp->ace > 0) {
670 gctl_msg(req, EPERM, "Unable to get exclusive access "
671 "for writing of %s.\n\tNote that %s cannot be "
672 "mounted or otherwise\n\topen during a commit "
673 "unless the -f flag is used.", pp->name, pp->name);
674 g_union_rel_writelock(sc);
675 continue;
676 }
677 /*
678 * No mount or other use of lower media is allowed, unless the
679 * -f flag is given which allows read-only mount or usage.
680 */
681 if ((*force == false && lowerpp->acr > lowercp->acr) ||
682 lowerpp->acw > lowercp->acw ||
683 lowerpp->ace > lowercp->ace) {
684 gctl_msg(req, EPERM, "provider %s is unable to get "
685 "exclusive access to %s\n\tfor writing. Note that "
686 "%s cannot be mounted or otherwise open\n\tduring "
687 "a commit unless the -f flag is used.", pp->name,
688 lowerpp->name, lowerpp->name);
689 g_union_rel_writelock(sc);
690 continue;
691 }
692 if ((error = g_access(lowercp, 0, 1, 0)) != 0) {
693 gctl_msg(req, error, "Error %d: provider %s is unable "
694 "to access %s for writing.", error, pp->name,
695 lowerpp->name);
696 g_union_rel_writelock(sc);
697 continue;
698 }
699 g_topology_unlock();
700 /* Loop over write map copying across written blocks */
701 bp->bio_offset = 0;
702 bp->bio_length = sc->sc_map_size * sc->sc_sectorsize;
703 G_RLOCK(sc);
704 error = 0;
705 while (bp->bio_length > 0) {
706 if (!g_union_getmap(bp, sc, &len2rd)) {
707 /* not written, so skip */
708 bp->bio_offset += len2rd;
709 bp->bio_length -= len2rd;
710 continue;
711 }
712 G_RUNLOCK(sc);
713 /* need to read then write len2rd sectors */
714 for ( ; len2rd > 0; len2rd -= len2wt) {
715 /* limit ourselves to MAXBSIZE size I/Os */
716 len2wt = len2rd;
717 if (len2wt > MAXBSIZE)
718 len2wt = MAXBSIZE;
719 savelen = bp->bio_length;
720 bp->bio_length = len2wt;
721 bp->bio_cmd = BIO_READ;
722 g_io_request(bp, sc->sc_uppercp);
723 if ((error = biowait(bp, "rdunion")) != 0) {
724 gctl_msg(req, error, "Commit read "
725 "error %d in provider %s, commit "
726 "aborted.", error, pp->name);
727 goto cleanup;
728 }
729 bp->bio_flags &= ~BIO_DONE;
730 bp->bio_cmd = BIO_WRITE;
731 g_io_request(bp, lowercp);
732 if ((error = biowait(bp, "wtunion")) != 0) {
733 gctl_msg(req, error, "Commit write "
734 "error %d in provider %s, commit "
735 "aborted.", error, pp->name);
736 goto cleanup;
737 }
738 bp->bio_flags &= ~BIO_DONE;
739 bp->bio_offset += len2wt;
740 bp->bio_length = savelen - len2wt;
741 }
742 G_RLOCK(sc);
743 }
744 G_RUNLOCK(sc);
745 /* clear the write map */
746 g_union_revert(sc);
747 cleanup:
748 g_topology_lock();
749 /* return lower to previous access */
750 if ((error1 = g_access(lowercp, 0, -1, 0)) != 0) {
751 G_UNION_DEBUG(2, "Error %d: device %s could not reset "
752 "access to %s (r=0 w=-1 e=0).", error1, pp->name,
753 lowerpp->name);
754 }
755 g_union_rel_writelock(sc);
756 if (error == 0 && verbose)
757 gctl_msg(req, 0, "Device %s has been committed.",
758 pp->name);
759 G_UNION_DEBUG(1, "Device %s has been committed.", pp->name);
760 }
761 gctl_post_messages(req);
762 g_free(bp->bio_data);
763 g_destroy_bio(bp);
764 if (*reboot)
765 kern_reboot(RB_AUTOBOOT);
766 }
767
768 /*
769 * Generally allow access unless a commit is in progress.
770 */
771 static int
g_union_access(struct g_provider * pp,int r,int w,int e)772 g_union_access(struct g_provider *pp, int r, int w, int e)
773 {
774 struct g_union_softc *sc;
775
776 sc = pp->geom->softc;
777 if (sc == NULL) {
778 if (r <= 0 && w <= 0 && e <= 0)
779 return (0);
780 return (ENXIO);
781 }
782 r += pp->acr;
783 w += pp->acw;
784 e += pp->ace;
785 if (g_union_get_writelock(sc) != 0) {
786 if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0)
787 return (0);
788 return (EBUSY);
789 }
790 g_union_rel_writelock(sc);
791 return (0);
792 }
793
794 /*
795 * Initiate an I/O operation on the union device.
796 */
797 static void
g_union_start(struct bio * bp)798 g_union_start(struct bio *bp)
799 {
800 struct g_union_softc *sc;
801 struct g_union_wip *wip;
802 struct bio *cbp;
803
804 sc = bp->bio_to->geom->softc;
805 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
806 wip = g_malloc(sizeof(*wip), M_NOWAIT);
807 if (wip == NULL) {
808 g_io_deliver(bp, ENOMEM);
809 return;
810 }
811 TAILQ_INIT(&wip->wip_waiting);
812 wip->wip_bp = bp;
813 wip->wip_sc = sc;
814 wip->wip_start = bp->bio_offset + sc->sc_offset;
815 wip->wip_end = wip->wip_start + bp->bio_length - 1;
816 wip->wip_numios = 1;
817 wip->wip_error = 0;
818 g_union_doio(wip);
819 return;
820 }
821
822 /*
823 * All commands other than read and write are passed through to
824 * the upper-level device since it is writable and thus able to
825 * respond to delete, flush, and speedup requests.
826 */
827 cbp = g_clone_bio(bp);
828 if (cbp == NULL) {
829 g_io_deliver(bp, ENOMEM);
830 return;
831 }
832 cbp->bio_offset = bp->bio_offset + sc->sc_offset;
833 cbp->bio_done = g_std_done;
834
835 switch (cbp->bio_cmd) {
836 case BIO_DELETE:
837 G_UNION_LOGREQ(cbp, "Delete request received.");
838 atomic_add_long(&sc->sc_deletes, 1);
839 break;
840 case BIO_GETATTR:
841 G_UNION_LOGREQ(cbp, "Getattr request received.");
842 atomic_add_long(&sc->sc_getattrs, 1);
843 if (strcmp(cbp->bio_attribute, "GEOM::kerneldump") != 0)
844 /* forward the GETATTR to the lower-level device */
845 break;
846 g_union_kerneldump(bp, sc);
847 return;
848 case BIO_FLUSH:
849 G_UNION_LOGREQ(cbp, "Flush request received.");
850 atomic_add_long(&sc->sc_flushes, 1);
851 break;
852 case BIO_SPEEDUP:
853 G_UNION_LOGREQ(cbp, "Speedup request received.");
854 atomic_add_long(&sc->sc_speedups, 1);
855 break;
856 case BIO_CMD0:
857 G_UNION_LOGREQ(cbp, "Cmd0 request received.");
858 atomic_add_long(&sc->sc_cmd0s, 1);
859 break;
860 case BIO_CMD1:
861 G_UNION_LOGREQ(cbp, "Cmd1 request received.");
862 atomic_add_long(&sc->sc_cmd1s, 1);
863 break;
864 case BIO_CMD2:
865 G_UNION_LOGREQ(cbp, "Cmd2 request received.");
866 atomic_add_long(&sc->sc_cmd2s, 1);
867 break;
868 default:
869 G_UNION_LOGREQ(cbp, "Unknown (%d) request received.",
870 cbp->bio_cmd);
871 break;
872 }
873 g_io_request(cbp, sc->sc_uppercp);
874 }
875
876 /*
877 * Initiate a read or write operation on the union device.
878 */
879 static void
g_union_doio(struct g_union_wip * wip)880 g_union_doio(struct g_union_wip *wip)
881 {
882 struct g_union_softc *sc;
883 struct g_consumer *cp, *firstcp;
884 struct g_union_wip *activewip;
885 struct bio *cbp, *firstbp;
886 off_t rdlen, len2rd, offset;
887 int iocnt, needstoblock;
888 char *level;
889
890 /*
891 * To maintain consistency, we cannot allow concurrent reads
892 * or writes to the same block.
893 *
894 * A work-in-progress (wip) structure is allocated for each
895 * read or write request. All active requests are kept on the
896 * softc sc_wiplist. As each request arrives, it is checked to
897 * see if it overlaps any of the active entries. If it does not
898 * overlap, then it is added to the active list and initiated.
899 * If it does overlap an active entry, it is added to the
900 * wip_waiting list for the active entry that it overlaps.
901 * When an active entry completes, it restarts all the requests
902 * on its wip_waiting list.
903 */
904 sc = wip->wip_sc;
905 G_WLOCK(sc);
906 TAILQ_FOREACH(activewip, &sc->sc_wiplist, wip_next) {
907 if (wip->wip_end < activewip->wip_start ||
908 wip->wip_start > activewip->wip_end)
909 continue;
910 needstoblock = 1;
911 if (wip->wip_bp->bio_cmd == BIO_WRITE)
912 if (activewip->wip_bp->bio_cmd == BIO_WRITE)
913 sc->sc_writeblockwrite += 1;
914 else
915 sc->sc_readblockwrite += 1;
916 else
917 if (activewip->wip_bp->bio_cmd == BIO_WRITE)
918 sc->sc_writeblockread += 1;
919 else {
920 sc->sc_readcurrentread += 1;
921 needstoblock = 0;
922 }
923 /* Put request on a waiting list if necessary */
924 if (needstoblock) {
925 TAILQ_INSERT_TAIL(&activewip->wip_waiting, wip,
926 wip_next);
927 G_WUNLOCK(sc);
928 return;
929 }
930 }
931 /* Put request on the active list */
932 TAILQ_INSERT_TAIL(&sc->sc_wiplist, wip, wip_next);
933
934 /*
935 * Process I/O requests that have been cleared to go.
936 */
937 cbp = g_clone_bio(wip->wip_bp);
938 if (cbp == NULL) {
939 TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
940 G_WUNLOCK(sc);
941 KASSERT(TAILQ_FIRST(&wip->wip_waiting) == NULL,
942 ("g_union_doio: non-empty work-in-progress waiting queue"));
943 g_io_deliver(wip->wip_bp, ENOMEM);
944 g_free(wip);
945 return;
946 }
947 G_WUNLOCK(sc);
948 cbp->bio_caller1 = wip;
949 cbp->bio_done = g_union_done;
950 cbp->bio_offset = wip->wip_start;
951
952 /*
953 * Writes are always done to the top level. The blocks that
954 * are written are recorded in the bitmap when the I/O completes.
955 */
956 if (cbp->bio_cmd == BIO_WRITE) {
957 G_UNION_LOGREQ(cbp, "Sending %jd byte write request to upper "
958 "level.", cbp->bio_length);
959 atomic_add_long(&sc->sc_writes, 1);
960 atomic_add_long(&sc->sc_wrotebytes, cbp->bio_length);
961 g_io_request(cbp, sc->sc_uppercp);
962 return;
963 }
964 /*
965 * The usual read case is that we either read the top layer
966 * if the block has been previously written or the bottom layer
967 * if it has not been written. However, it is possible that
968 * only part of the block has been written, For example we may
969 * have written a UFS/FFS file fragment comprising several
970 * sectors out of an 8-sector block. Here, if the entire
971 * 8-sector block is read for example by a snapshot needing
972 * to copy the full block, then we need to read the written
973 * sectors from the upper level and the unwritten sectors from
974 * the lower level. We do this by alternately reading from the
975 * top and bottom layers until we complete the read. We
976 * simplify for the common case to just do the I/O and return.
977 */
978 atomic_add_long(&sc->sc_reads, 1);
979 atomic_add_long(&sc->sc_readbytes, cbp->bio_length);
980 rdlen = cbp->bio_length;
981 offset = 0;
982 for (iocnt = 0; ; iocnt++) {
983 if (g_union_getmap(cbp, sc, &len2rd)) {
984 /* read top */
985 cp = sc->sc_uppercp;
986 level = "upper";
987 } else {
988 /* read bottom */
989 cp = sc->sc_lowercp;
990 level = "lower";
991 }
992 /* Check if only a single read is required */
993 if (iocnt == 0 && rdlen == len2rd) {
994 G_UNION_LOGREQLVL((cp == sc->sc_uppercp) ?
995 3 : 4, cbp, "Sending %jd byte read "
996 "request to %s level.", len2rd, level);
997 g_io_request(cbp, cp);
998 return;
999 }
1000 cbp->bio_length = len2rd;
1001 if ((cbp->bio_flags & BIO_UNMAPPED) != 0)
1002 cbp->bio_ma_offset += offset;
1003 else
1004 cbp->bio_data += offset;
1005 offset += len2rd;
1006 rdlen -= len2rd;
1007 G_UNION_LOGREQLVL(3, cbp, "Sending %jd byte read "
1008 "request to %s level.", len2rd, level);
1009 /*
1010 * To avoid prematurely notifying our consumer
1011 * that their I/O has completed, we have to delay
1012 * issuing our first I/O request until we have
1013 * issued all the additional I/O requests.
1014 */
1015 if (iocnt > 0) {
1016 atomic_add_long(&wip->wip_numios, 1);
1017 g_io_request(cbp, cp);
1018 } else {
1019 firstbp = cbp;
1020 firstcp = cp;
1021 }
1022 if (rdlen == 0)
1023 break;
1024 /* set up for next read */
1025 cbp = g_clone_bio(wip->wip_bp);
1026 if (cbp == NULL) {
1027 wip->wip_error = ENOMEM;
1028 atomic_add_long(&wip->wip_numios, -1);
1029 break;
1030 }
1031 cbp->bio_caller1 = wip;
1032 cbp->bio_done = g_union_done;
1033 cbp->bio_offset += offset;
1034 cbp->bio_length = rdlen;
1035 atomic_add_long(&sc->sc_reads, 1);
1036 }
1037 /* We have issued all our I/O, so start the first one */
1038 g_io_request(firstbp, firstcp);
1039 return;
1040 }
1041
1042 /*
1043 * Used when completing a union I/O operation.
1044 */
1045 static void
g_union_done(struct bio * bp)1046 g_union_done(struct bio *bp)
1047 {
1048 struct g_union_wip *wip, *waitingwip;
1049 struct g_union_softc *sc;
1050
1051 wip = bp->bio_caller1;
1052 if (wip->wip_error != 0 && bp->bio_error == 0)
1053 bp->bio_error = wip->wip_error;
1054 wip->wip_error = 0;
1055 if (atomic_fetchadd_long(&wip->wip_numios, -1) == 1) {
1056 sc = wip->wip_sc;
1057 G_WLOCK(sc);
1058 if (bp->bio_cmd == BIO_WRITE)
1059 g_union_setmap(bp, sc);
1060 TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
1061 G_WUNLOCK(sc);
1062 while ((waitingwip = TAILQ_FIRST(&wip->wip_waiting)) != NULL) {
1063 TAILQ_REMOVE(&wip->wip_waiting, waitingwip, wip_next);
1064 g_union_doio(waitingwip);
1065 }
1066 g_free(wip);
1067 }
1068 g_std_done(bp);
1069 }
1070
1071 /*
1072 * Record blocks that have been written in the map.
1073 */
1074 static void
g_union_setmap(struct bio * bp,struct g_union_softc * sc)1075 g_union_setmap(struct bio *bp, struct g_union_softc *sc)
1076 {
1077 size_t root_idx;
1078 uint64_t **leaf;
1079 uint64_t *wordp;
1080 off_t start, numsec;
1081
1082 G_WLOCKOWNED(sc);
1083 KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1084 ("g_union_setmap: offset not on sector boundry"));
1085 KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1086 ("g_union_setmap: length not a multiple of sectors"));
1087 start = bp->bio_offset / sc->sc_sectorsize;
1088 numsec = bp->bio_length / sc->sc_sectorsize;
1089 KASSERT(start + numsec <= sc->sc_map_size,
1090 ("g_union_setmap: block %jd is out of range", start + numsec));
1091 for ( ; numsec > 0; numsec--, start++) {
1092 root_idx = start / sc->sc_bits_per_leaf;
1093 leaf = &sc->sc_writemap_root[root_idx];
1094 wordp = &(*leaf)
1095 [(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1096 *wordp |= 1ULL << (start % BITS_PER_ENTRY);
1097 sc->sc_leafused[root_idx / BITS_PER_ENTRY] |=
1098 1ULL << (root_idx % BITS_PER_ENTRY);
1099 }
1100 }
1101
1102 /*
1103 * Check map to determine whether blocks have been written.
1104 *
1105 * Return true if they have been written so should be read from the top
1106 * layer. Return false if they have not been written so should be read
1107 * from the bottom layer. Return in len2read the bytes to be read. See
1108 * the comment above the BIO_READ implementation in g_union_start() for
1109 * an explantion of why len2read may be shorter than the buffer length.
1110 */
1111 static bool
g_union_getmap(struct bio * bp,struct g_union_softc * sc,off_t * len2read)1112 g_union_getmap(struct bio *bp, struct g_union_softc *sc, off_t *len2read)
1113 {
1114 off_t start, numsec, leafresid, bitloc;
1115 bool first, maptype, retval;
1116 uint64_t *leaf, word;
1117 size_t root_idx;
1118
1119 KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1120 ("g_union_getmap: offset not on sector boundry"));
1121 KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1122 ("g_union_getmap: length not a multiple of sectors"));
1123 start = bp->bio_offset / sc->sc_sectorsize;
1124 numsec = bp->bio_length / sc->sc_sectorsize;
1125 G_UNION_DEBUG(4, "g_union_getmap: check %jd sectors starting at %jd\n",
1126 numsec, start);
1127 KASSERT(start + numsec <= sc->sc_map_size,
1128 ("g_union_getmap: block %jd is out of range", start + numsec));
1129 root_idx = start / sc->sc_bits_per_leaf;
1130 first = true;
1131 maptype = false;
1132 while (numsec > 0) {
1133 /* Check first if the leaf records any written sectors */
1134 root_idx = start / sc->sc_bits_per_leaf;
1135 leafresid = sc->sc_bits_per_leaf -
1136 (start % sc->sc_bits_per_leaf);
1137 if (((sc->sc_leafused[root_idx / BITS_PER_ENTRY]) &
1138 (1ULL << (root_idx % BITS_PER_ENTRY))) == 0) {
1139 if (first) {
1140 maptype = false;
1141 first = false;
1142 }
1143 if (maptype)
1144 break;
1145 numsec -= leafresid;
1146 start += leafresid;
1147 continue;
1148 }
1149 /* Check up to a word boundry, then check word by word */
1150 leaf = sc->sc_writemap_root[root_idx];
1151 word = leaf[(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1152 bitloc = start % BITS_PER_ENTRY;
1153 if (bitloc == 0 && (word == 0 || word == ~0)) {
1154 if (first) {
1155 if (word == 0)
1156 maptype = false;
1157 else
1158 maptype = true;
1159 first = false;
1160 }
1161 if ((word == 0 && maptype) ||
1162 (word == ~0 && !maptype))
1163 break;
1164 numsec -= BITS_PER_ENTRY;
1165 start += BITS_PER_ENTRY;
1166 continue;
1167 }
1168 for ( ; bitloc < BITS_PER_ENTRY; bitloc ++) {
1169 retval = (word & (1ULL << bitloc)) != 0;
1170 if (first) {
1171 maptype = retval;
1172 first = false;
1173 }
1174 if (maptype == retval) {
1175 numsec--;
1176 start++;
1177 continue;
1178 }
1179 goto out;
1180 }
1181 }
1182 out:
1183 if (numsec < 0) {
1184 start += numsec;
1185 numsec = 0;
1186 }
1187 *len2read = bp->bio_length - (numsec * sc->sc_sectorsize);
1188 G_UNION_DEBUG(maptype ? 3 : 4,
1189 "g_union_getmap: return maptype %swritten for %jd "
1190 "sectors ending at %jd\n", maptype ? "" : "NOT ",
1191 *len2read / sc->sc_sectorsize, start - 1);
1192 return (maptype);
1193 }
1194
1195 /*
1196 * Fill in details for a BIO_GETATTR request.
1197 */
1198 static void
g_union_kerneldump(struct bio * bp,struct g_union_softc * sc)1199 g_union_kerneldump(struct bio *bp, struct g_union_softc *sc)
1200 {
1201 struct g_kerneldump *gkd;
1202 struct g_geom *gp;
1203 struct g_provider *pp;
1204
1205 gkd = (struct g_kerneldump *)bp->bio_data;
1206 gp = bp->bio_to->geom;
1207 g_trace(G_T_TOPOLOGY, "%s(%s, %jd, %jd)", __func__, gp->name,
1208 (intmax_t)gkd->offset, (intmax_t)gkd->length);
1209
1210 pp = LIST_FIRST(&gp->provider);
1211
1212 gkd->di.dumper = g_union_dumper;
1213 gkd->di.priv = sc;
1214 gkd->di.blocksize = pp->sectorsize;
1215 gkd->di.maxiosize = DFLTPHYS;
1216 gkd->di.mediaoffset = sc->sc_offset + gkd->offset;
1217 if (gkd->offset > sc->sc_size) {
1218 g_io_deliver(bp, ENODEV);
1219 return;
1220 }
1221 if (gkd->offset + gkd->length > sc->sc_size)
1222 gkd->length = sc->sc_size - gkd->offset;
1223 gkd->di.mediasize = gkd->length;
1224 g_io_deliver(bp, 0);
1225 }
1226
1227 /*
1228 * Handler for g_union_kerneldump().
1229 */
1230 static int
g_union_dumper(void * priv,void * virtual,off_t offset,size_t length)1231 g_union_dumper(void *priv, void *virtual, off_t offset, size_t length)
1232 {
1233
1234 return (0);
1235 }
1236
1237 /*
1238 * List union statistics.
1239 */
1240 static void
g_union_dumpconf(struct sbuf * sb,const char * indent,struct g_geom * gp,struct g_consumer * cp,struct g_provider * pp)1241 g_union_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1242 struct g_consumer *cp, struct g_provider *pp)
1243 {
1244 struct g_union_softc *sc;
1245
1246 if (pp != NULL || cp != NULL || gp->softc == NULL)
1247 return;
1248 sc = gp->softc;
1249 sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent,
1250 (uintmax_t)sc->sc_reads);
1251 sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent,
1252 (uintmax_t)sc->sc_writes);
1253 sbuf_printf(sb, "%s<Deletes>%ju</Deletes>\n", indent,
1254 (uintmax_t)sc->sc_deletes);
1255 sbuf_printf(sb, "%s<Getattrs>%ju</Getattrs>\n", indent,
1256 (uintmax_t)sc->sc_getattrs);
1257 sbuf_printf(sb, "%s<Flushes>%ju</Flushes>\n", indent,
1258 (uintmax_t)sc->sc_flushes);
1259 sbuf_printf(sb, "%s<Speedups>%ju</Speedups>\n", indent,
1260 (uintmax_t)sc->sc_speedups);
1261 sbuf_printf(sb, "%s<Cmd0s>%ju</Cmd0s>\n", indent,
1262 (uintmax_t)sc->sc_cmd0s);
1263 sbuf_printf(sb, "%s<Cmd1s>%ju</Cmd1s>\n", indent,
1264 (uintmax_t)sc->sc_cmd1s);
1265 sbuf_printf(sb, "%s<Cmd2s>%ju</Cmd2s>\n", indent,
1266 (uintmax_t)sc->sc_cmd2s);
1267 sbuf_printf(sb, "%s<ReadCurrentRead>%ju</ReadCurrentRead>\n", indent,
1268 (uintmax_t)sc->sc_readcurrentread);
1269 sbuf_printf(sb, "%s<ReadBlockWrite>%ju</ReadBlockWrite>\n", indent,
1270 (uintmax_t)sc->sc_readblockwrite);
1271 sbuf_printf(sb, "%s<WriteBlockRead>%ju</WriteBlockRead>\n", indent,
1272 (uintmax_t)sc->sc_writeblockread);
1273 sbuf_printf(sb, "%s<WriteBlockWrite>%ju</WriteBlockWrite>\n", indent,
1274 (uintmax_t)sc->sc_writeblockwrite);
1275 sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
1276 (uintmax_t)sc->sc_readbytes);
1277 sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
1278 (uintmax_t)sc->sc_wrotebytes);
1279 sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent,
1280 (intmax_t)sc->sc_offset);
1281 }
1282
1283 /*
1284 * Clean up an orphaned geom.
1285 */
1286 static void
g_union_orphan(struct g_consumer * cp)1287 g_union_orphan(struct g_consumer *cp)
1288 {
1289
1290 g_topology_assert();
1291 g_union_destroy(NULL, cp->geom, true);
1292 }
1293
1294 /*
1295 * Clean up a union geom.
1296 */
1297 static int
g_union_destroy_geom(struct gctl_req * req,struct g_class * mp,struct g_geom * gp)1298 g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
1299 struct g_geom *gp)
1300 {
1301
1302 return (g_union_destroy(NULL, gp, false));
1303 }
1304
1305 /*
1306 * Clean up a union device.
1307 */
1308 static int
g_union_destroy(struct gctl_req * req,struct g_geom * gp,bool force)1309 g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force)
1310 {
1311 struct g_union_softc *sc;
1312 struct g_provider *pp;
1313 int error;
1314
1315 g_topology_assert();
1316 sc = gp->softc;
1317 if (sc == NULL)
1318 return (ENXIO);
1319 pp = LIST_FIRST(&gp->provider);
1320 if ((sc->sc_flags & DOING_COMMIT) != 0 ||
1321 (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0))) {
1322 if (force) {
1323 if (req != NULL)
1324 gctl_msg(req, 0, "Device %s is still in use, "
1325 "so is being forcibly removed.", gp->name);
1326 G_UNION_DEBUG(1, "Device %s is still in use, so "
1327 "is being forcibly removed.", gp->name);
1328 } else {
1329 if (req != NULL)
1330 gctl_msg(req, EBUSY, "Device %s is still open "
1331 "(r=%d w=%d e=%d).", gp->name, pp->acr,
1332 pp->acw, pp->ace);
1333 G_UNION_DEBUG(1, "Device %s is still open "
1334 "(r=%d w=%d e=%d).", gp->name, pp->acr,
1335 pp->acw, pp->ace);
1336 return (EBUSY);
1337 }
1338 } else {
1339 if (req != NULL)
1340 gctl_msg(req, 0, "Device %s removed.", gp->name);
1341 G_UNION_DEBUG(1, "Device %s removed.", gp->name);
1342 }
1343 /* Close consumers */
1344 if ((error = g_access(sc->sc_lowercp, -1, 0, -1)) != 0)
1345 G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1346 "to %s.", error, gp->name, sc->sc_lowercp->provider->name);
1347 if ((error = g_access(sc->sc_uppercp, -1, -1, -1)) != 0)
1348 G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1349 "to %s.", error, gp->name, sc->sc_uppercp->provider->name);
1350
1351 g_wither_geom(gp, ENXIO);
1352
1353 return (0);
1354 }
1355
1356 /*
1357 * Clean up a union provider.
1358 */
1359 static void
g_union_providergone(struct g_provider * pp)1360 g_union_providergone(struct g_provider *pp)
1361 {
1362 struct g_geom *gp;
1363 struct g_union_softc *sc;
1364 size_t i;
1365
1366 gp = pp->geom;
1367 sc = gp->softc;
1368 gp->softc = NULL;
1369 for (i = 0; i < sc->sc_root_size; i++)
1370 g_free(sc->sc_writemap_root[i]);
1371 g_free(sc->sc_writemap_root);
1372 g_free(sc->sc_leafused);
1373 rw_destroy(&sc->sc_rwlock);
1374 g_free(sc);
1375 }
1376
1377 /*
1378 * Respond to a resized provider.
1379 */
1380 static void
g_union_resize(struct g_consumer * cp)1381 g_union_resize(struct g_consumer *cp)
1382 {
1383 struct g_union_softc *sc;
1384 struct g_geom *gp;
1385
1386 g_topology_assert();
1387
1388 gp = cp->geom;
1389 sc = gp->softc;
1390
1391 /*
1392 * If size has gotten bigger, ignore it and just keep using
1393 * the space we already had. Otherwise we are done.
1394 */
1395 if (sc->sc_size < cp->provider->mediasize - sc->sc_offset)
1396 return;
1397 g_union_destroy(NULL, gp, true);
1398 }
1399
1400 DECLARE_GEOM_CLASS(g_union_class, g_union);
1401 MODULE_VERSION(geom_union, 0);
1402