xref: /freebsd/sys/geom/geom_subr.c (revision dc60165b73e4c4d829a2cb9fed5cce585e93d9a9)
1 /*-
2  * Copyright (c) 2002 Poul-Henning Kamp
3  * Copyright (c) 2002 Networks Associates Technology, Inc.
4  * All rights reserved.
5  *
6  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7  * and NAI Labs, the Security Research Division of Network Associates, Inc.
8  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9  * DARPA CHATS research program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The names of the authors may not be used to endorse or promote
20  *    products derived from this software without specific prior written
21  *    permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include "opt_ddb.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/devicestat.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/bio.h>
47 #include <sys/sysctl.h>
48 #include <sys/proc.h>
49 #include <sys/kthread.h>
50 #include <sys/lock.h>
51 #include <sys/mutex.h>
52 #include <sys/errno.h>
53 #include <sys/sbuf.h>
54 #include <geom/geom.h>
55 #include <geom/geom_int.h>
56 #include <machine/stdarg.h>
57 
58 #ifdef DDB
59 #include <ddb/ddb.h>
60 #endif
61 
62 struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
63 static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
64 char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
65 
66 struct g_hh00 {
67 	struct g_class	*mp;
68 	int		error;
69 	int		post;
70 };
71 
72 /*
73  * This event offers a new class a chance to taste all preexisting providers.
74  */
75 static void
76 g_load_class(void *arg, int flag)
77 {
78 	struct g_hh00 *hh;
79 	struct g_class *mp2, *mp;
80 	struct g_geom *gp;
81 	struct g_provider *pp;
82 
83 	g_topology_assert();
84 	if (flag == EV_CANCEL)	/* XXX: can't happen ? */
85 		return;
86 	if (g_shutdown)
87 		return;
88 
89 	hh = arg;
90 	mp = hh->mp;
91 	hh->error = 0;
92 	if (hh->post) {
93 		g_free(hh);
94 		hh = NULL;
95 	}
96 	g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name);
97 	KASSERT(mp->name != NULL && *mp->name != '\0',
98 	    ("GEOM class has no name"));
99 	LIST_FOREACH(mp2, &g_classes, class) {
100 		if (mp2 == mp) {
101 			printf("The GEOM class %s is already loaded.\n",
102 			    mp2->name);
103 			if (hh != NULL)
104 				hh->error = EEXIST;
105 			return;
106 		} else if (strcmp(mp2->name, mp->name) == 0) {
107 			printf("A GEOM class %s is already loaded.\n",
108 			    mp2->name);
109 			if (hh != NULL)
110 				hh->error = EEXIST;
111 			return;
112 		}
113 	}
114 
115 	LIST_INIT(&mp->geom);
116 	LIST_INSERT_HEAD(&g_classes, mp, class);
117 	if (mp->init != NULL)
118 		mp->init(mp);
119 	if (mp->taste == NULL)
120 		return;
121 	LIST_FOREACH(mp2, &g_classes, class) {
122 		if (mp == mp2)
123 			continue;
124 		LIST_FOREACH(gp, &mp2->geom, geom) {
125 			LIST_FOREACH(pp, &gp->provider, provider) {
126 				mp->taste(mp, pp, 0);
127 				g_topology_assert();
128 			}
129 		}
130 	}
131 }
132 
133 static void
134 g_unload_class(void *arg, int flag)
135 {
136 	struct g_hh00 *hh;
137 	struct g_class *mp;
138 	struct g_geom *gp;
139 	struct g_provider *pp;
140 	struct g_consumer *cp;
141 	int error;
142 
143 	g_topology_assert();
144 	hh = arg;
145 	mp = hh->mp;
146 	G_VALID_CLASS(mp);
147 	g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name);
148 
149 	/*
150 	 * We allow unloading if we have no geoms, or a class
151 	 * method we can use to get rid of them.
152 	 */
153 	if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) {
154 		hh->error = EOPNOTSUPP;
155 		return;
156 	}
157 
158 	/* We refuse to unload if anything is open */
159 	LIST_FOREACH(gp, &mp->geom, geom) {
160 		LIST_FOREACH(pp, &gp->provider, provider)
161 			if (pp->acr || pp->acw || pp->ace) {
162 				hh->error = EBUSY;
163 				return;
164 			}
165 		LIST_FOREACH(cp, &gp->consumer, consumer)
166 			if (cp->acr || cp->acw || cp->ace) {
167 				hh->error = EBUSY;
168 				return;
169 			}
170 	}
171 
172 	/* Bar new entries */
173 	mp->taste = NULL;
174 	mp->config = NULL;
175 
176 	error = 0;
177 	for (;;) {
178 		gp = LIST_FIRST(&mp->geom);
179 		if (gp == NULL)
180 			break;
181 		error = mp->destroy_geom(NULL, mp, gp);
182 		if (error != 0)
183 			break;
184 	}
185 	if (error == 0) {
186 		if (mp->fini != NULL)
187 			mp->fini(mp);
188 		LIST_REMOVE(mp, class);
189 	}
190 	hh->error = error;
191 	return;
192 }
193 
194 int
195 g_modevent(module_t mod, int type, void *data)
196 {
197 	struct g_hh00 *hh;
198 	int error;
199 	static int g_ignition;
200 	struct g_class *mp;
201 
202 	mp = data;
203 	if (mp->version != G_VERSION) {
204 		printf("GEOM class %s has Wrong version %x\n",
205 		    mp->name, mp->version);
206 		return (EINVAL);
207 	}
208 	if (!g_ignition) {
209 		g_ignition++;
210 		g_init();
211 	}
212 	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
213 	hh->mp = data;
214 	error = EOPNOTSUPP;
215 	switch (type) {
216 	case MOD_LOAD:
217 		g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", hh->mp->name);
218 		/*
219 		 * Once the system is not cold, MOD_LOAD calls will be
220 		 * from the userland and the g_event thread will be able
221 		 * to acknowledge their completion.
222 		 */
223 		if (cold) {
224 			hh->post = 1;
225 			error = g_post_event(g_load_class, hh, M_WAITOK, NULL);
226 		} else {
227 			error = g_waitfor_event(g_load_class, hh, M_WAITOK,
228 			    NULL);
229 			if (error == 0)
230 				error = hh->error;
231 			g_free(hh);
232 		}
233 		break;
234 	case MOD_UNLOAD:
235 		g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", hh->mp->name);
236 		error = g_waitfor_event(g_unload_class, hh, M_WAITOK, NULL);
237 		if (error == 0)
238 			error = hh->error;
239 		if (error == 0) {
240 			KASSERT(LIST_EMPTY(&hh->mp->geom),
241 			    ("Unloaded class (%s) still has geom", hh->mp->name));
242 		}
243 		g_free(hh);
244 		break;
245 	default:
246 		g_free(hh);
247 		break;
248 	}
249 	return (error);
250 }
251 
252 static void
253 g_retaste_event(void *arg, int flag)
254 {
255 	struct g_class *cp, *mp;
256 	struct g_geom *gp, *gp2;
257 	struct g_hh00 *hh;
258 	struct g_provider *pp;
259 
260 	g_topology_assert();
261 	if (flag == EV_CANCEL)  /* XXX: can't happen ? */
262 		return;
263 	if (g_shutdown)
264 		return;
265 
266 	hh = arg;
267 	mp = hh->mp;
268 	hh->error = 0;
269 	if (hh->post) {
270 		g_free(hh);
271 		hh = NULL;
272 	}
273 	g_trace(G_T_TOPOLOGY, "g_retaste(%s)", mp->name);
274 
275 	LIST_FOREACH(cp, &g_classes, class) {
276 		LIST_FOREACH(gp, &cp->geom, geom) {
277 			LIST_FOREACH(pp, &gp->provider, provider) {
278 				if (pp->acr || pp->acw || pp->ace)
279 					continue;
280 				LIST_FOREACH(gp2, &mp->geom, geom) {
281 					if (!strcmp(pp->name, gp2->name))
282 						break;
283 				}
284 				if (gp2 != NULL)
285 					g_wither_geom(gp2, ENXIO);
286 				mp->taste(mp, pp, 0);
287 				g_topology_assert();
288 			}
289 		}
290 	}
291 }
292 
293 int
294 g_retaste(struct g_class *mp)
295 {
296 	struct g_hh00 *hh;
297 	int error;
298 
299 	if (mp->taste == NULL)
300 		return (EINVAL);
301 
302 	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
303 	hh->mp = mp;
304 
305 	if (cold) {
306 		hh->post = 1;
307 		error = g_post_event(g_retaste_event, hh, M_WAITOK, NULL);
308 	} else {
309 		error = g_waitfor_event(g_retaste_event, hh, M_WAITOK, NULL);
310 		if (error == 0)
311 			error = hh->error;
312 		g_free(hh);
313 	}
314 
315 	return (error);
316 }
317 
318 struct g_geom *
319 g_new_geomf(struct g_class *mp, const char *fmt, ...)
320 {
321 	struct g_geom *gp;
322 	va_list ap;
323 	struct sbuf *sb;
324 
325 	g_topology_assert();
326 	G_VALID_CLASS(mp);
327 	sb = sbuf_new_auto();
328 	va_start(ap, fmt);
329 	sbuf_vprintf(sb, fmt, ap);
330 	va_end(ap);
331 	sbuf_finish(sb);
332 	gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO);
333 	gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
334 	gp->class = mp;
335 	gp->rank = 1;
336 	LIST_INIT(&gp->consumer);
337 	LIST_INIT(&gp->provider);
338 	LIST_INSERT_HEAD(&mp->geom, gp, geom);
339 	TAILQ_INSERT_HEAD(&geoms, gp, geoms);
340 	strcpy(gp->name, sbuf_data(sb));
341 	sbuf_delete(sb);
342 	/* Fill in defaults from class */
343 	gp->start = mp->start;
344 	gp->spoiled = mp->spoiled;
345 	gp->dumpconf = mp->dumpconf;
346 	gp->access = mp->access;
347 	gp->orphan = mp->orphan;
348 	gp->ioctl = mp->ioctl;
349 	return (gp);
350 }
351 
352 void
353 g_destroy_geom(struct g_geom *gp)
354 {
355 
356 	g_topology_assert();
357 	G_VALID_GEOM(gp);
358 	g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
359 	KASSERT(LIST_EMPTY(&gp->consumer),
360 	    ("g_destroy_geom(%s) with consumer(s) [%p]",
361 	    gp->name, LIST_FIRST(&gp->consumer)));
362 	KASSERT(LIST_EMPTY(&gp->provider),
363 	    ("g_destroy_geom(%s) with provider(s) [%p]",
364 	    gp->name, LIST_FIRST(&gp->provider)));
365 	g_cancel_event(gp);
366 	LIST_REMOVE(gp, geom);
367 	TAILQ_REMOVE(&geoms, gp, geoms);
368 	g_free(gp->name);
369 	g_free(gp);
370 }
371 
372 /*
373  * This function is called (repeatedly) until the geom has withered away.
374  */
375 void
376 g_wither_geom(struct g_geom *gp, int error)
377 {
378 	struct g_provider *pp;
379 
380 	g_topology_assert();
381 	G_VALID_GEOM(gp);
382 	g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name);
383 	if (!(gp->flags & G_GEOM_WITHER)) {
384 		gp->flags |= G_GEOM_WITHER;
385 		LIST_FOREACH(pp, &gp->provider, provider)
386 			if (!(pp->flags & G_PF_ORPHAN))
387 				g_orphan_provider(pp, error);
388 	}
389 	g_do_wither();
390 }
391 
392 /*
393  * Convenience function to destroy a particular provider.
394  */
395 void
396 g_wither_provider(struct g_provider *pp, int error)
397 {
398 
399 	pp->flags |= G_PF_WITHER;
400 	if (!(pp->flags & G_PF_ORPHAN))
401 		g_orphan_provider(pp, error);
402 }
403 
404 /*
405  * This function is called (repeatedly) until the has withered away.
406  */
407 void
408 g_wither_geom_close(struct g_geom *gp, int error)
409 {
410 	struct g_consumer *cp;
411 
412 	g_topology_assert();
413 	G_VALID_GEOM(gp);
414 	g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name);
415 	LIST_FOREACH(cp, &gp->consumer, consumer)
416 		if (cp->acr || cp->acw || cp->ace)
417 			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
418 	g_wither_geom(gp, error);
419 }
420 
421 /*
422  * This function is called (repeatedly) until we cant wash away more
423  * withered bits at present.  Return value contains two bits.  Bit 0
424  * set means "withering stuff we can't wash now", bit 1 means "call
425  * me again, there may be stuff I didn't get the first time around.
426  */
427 int
428 g_wither_washer()
429 {
430 	struct g_class *mp;
431 	struct g_geom *gp, *gp2;
432 	struct g_provider *pp, *pp2;
433 	struct g_consumer *cp, *cp2;
434 	int result;
435 
436 	result = 0;
437 	g_topology_assert();
438 	LIST_FOREACH(mp, &g_classes, class) {
439 		LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
440 			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
441 				if (!(pp->flags & G_PF_WITHER))
442 					continue;
443 				if (LIST_EMPTY(&pp->consumers))
444 					g_destroy_provider(pp);
445 				else
446 					result |= 1;
447 			}
448 			if (!(gp->flags & G_GEOM_WITHER))
449 				continue;
450 			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
451 				if (LIST_EMPTY(&pp->consumers))
452 					g_destroy_provider(pp);
453 				else
454 					result |= 1;
455 			}
456 			LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) {
457 				if (cp->acr || cp->acw || cp->ace) {
458 					result |= 1;
459 					continue;
460 				}
461 				if (cp->provider != NULL)
462 					g_detach(cp);
463 				g_destroy_consumer(cp);
464 				result |= 2;
465 			}
466 			if (LIST_EMPTY(&gp->provider) &&
467 			    LIST_EMPTY(&gp->consumer))
468 				g_destroy_geom(gp);
469 			else
470 				result |= 1;
471 		}
472 	}
473 	return (result);
474 }
475 
476 struct g_consumer *
477 g_new_consumer(struct g_geom *gp)
478 {
479 	struct g_consumer *cp;
480 
481 	g_topology_assert();
482 	G_VALID_GEOM(gp);
483 	KASSERT(!(gp->flags & G_GEOM_WITHER),
484 	    ("g_new_consumer on WITHERing geom(%s) (class %s)",
485 	    gp->name, gp->class->name));
486 	KASSERT(gp->orphan != NULL,
487 	    ("g_new_consumer on geom(%s) (class %s) without orphan",
488 	    gp->name, gp->class->name));
489 
490 	cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
491 	cp->geom = gp;
492 	cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED,
493 	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
494 	LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
495 	return(cp);
496 }
497 
498 void
499 g_destroy_consumer(struct g_consumer *cp)
500 {
501 	struct g_geom *gp;
502 
503 	g_topology_assert();
504 	G_VALID_CONSUMER(cp);
505 	g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
506 	KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
507 	KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
508 	KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
509 	KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
510 	g_cancel_event(cp);
511 	gp = cp->geom;
512 	LIST_REMOVE(cp, consumer);
513 	devstat_remove_entry(cp->stat);
514 	g_free(cp);
515 	if (gp->flags & G_GEOM_WITHER)
516 		g_do_wither();
517 }
518 
519 static void
520 g_new_provider_event(void *arg, int flag)
521 {
522 	struct g_class *mp;
523 	struct g_provider *pp;
524 	struct g_consumer *cp;
525 	int i;
526 
527 	g_topology_assert();
528 	if (flag == EV_CANCEL)
529 		return;
530 	if (g_shutdown)
531 		return;
532 	pp = arg;
533 	G_VALID_PROVIDER(pp);
534 	KASSERT(!(pp->flags & G_PF_WITHER),
535 	    ("g_new_provider_event but withered"));
536 	LIST_FOREACH(mp, &g_classes, class) {
537 		if (mp->taste == NULL)
538 			continue;
539 		i = 1;
540 		LIST_FOREACH(cp, &pp->consumers, consumers)
541 			if (cp->geom->class == mp)
542 				i = 0;
543 		if (!i)
544 			continue;
545 		mp->taste(mp, pp, 0);
546 		g_topology_assert();
547 	}
548 	if (pp->roothold != NULL) {
549 		root_mount_rel(pp->roothold);
550 		pp->roothold = NULL;
551 	}
552 }
553 
554 
555 struct g_provider *
556 g_new_providerf(struct g_geom *gp, const char *fmt, ...)
557 {
558 	struct g_provider *pp;
559 	struct sbuf *sb;
560 	va_list ap;
561 
562 	g_topology_assert();
563 	G_VALID_GEOM(gp);
564 	KASSERT(gp->access != NULL,
565 	    ("new provider on geom(%s) without ->access (class %s)",
566 	    gp->name, gp->class->name));
567 	KASSERT(gp->start != NULL,
568 	    ("new provider on geom(%s) without ->start (class %s)",
569 	    gp->name, gp->class->name));
570 	KASSERT(!(gp->flags & G_GEOM_WITHER),
571 	    ("new provider on WITHERing geom(%s) (class %s)",
572 	    gp->name, gp->class->name));
573 	sb = sbuf_new_auto();
574 	va_start(ap, fmt);
575 	sbuf_vprintf(sb, fmt, ap);
576 	va_end(ap);
577 	sbuf_finish(sb);
578 	pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
579 	pp->name = (char *)(pp + 1);
580 	strcpy(pp->name, sbuf_data(sb));
581 	sbuf_delete(sb);
582 	LIST_INIT(&pp->consumers);
583 	pp->error = ENXIO;
584 	pp->geom = gp;
585 	pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED,
586 	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
587 	LIST_INSERT_HEAD(&gp->provider, pp, provider);
588 	pp->roothold = root_mount_hold(pp->name, M_WAITOK);
589 	g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL);
590 	return (pp);
591 }
592 
593 void
594 g_error_provider(struct g_provider *pp, int error)
595 {
596 
597 	/* G_VALID_PROVIDER(pp);  We may not have g_topology */
598 	pp->error = error;
599 }
600 
601 struct g_provider *
602 g_provider_by_name(char const *arg)
603 {
604 	struct g_class *cp;
605 	struct g_geom *gp;
606 	struct g_provider *pp;
607 
608 	LIST_FOREACH(cp, &g_classes, class) {
609 		LIST_FOREACH(gp, &cp->geom, geom) {
610 			LIST_FOREACH(pp, &gp->provider, provider) {
611 				if (!strcmp(arg, pp->name))
612 					return (pp);
613 			}
614 		}
615 	}
616 	return (NULL);
617 }
618 
619 void
620 g_destroy_provider(struct g_provider *pp)
621 {
622 	struct g_geom *gp;
623 
624 	g_topology_assert();
625 	G_VALID_PROVIDER(pp);
626 	KASSERT(LIST_EMPTY(&pp->consumers),
627 	    ("g_destroy_provider but attached"));
628 	KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
629 	KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
630 	KASSERT (pp->ace == 0, ("g_destroy_provider with ace"));
631 	g_cancel_event(pp);
632 	LIST_REMOVE(pp, provider);
633 	gp = pp->geom;
634 	devstat_remove_entry(pp->stat);
635 	g_free(pp);
636 	if ((gp->flags & G_GEOM_WITHER))
637 		g_do_wither();
638 }
639 
640 /*
641  * We keep the "geoms" list sorted by topological order (== increasing
642  * numerical rank) at all times.
643  * When an attach is done, the attaching geoms rank is invalidated
644  * and it is moved to the tail of the list.
645  * All geoms later in the sequence has their ranks reevaluated in
646  * sequence.  If we cannot assign rank to a geom because it's
647  * prerequisites do not have rank, we move that element to the tail
648  * of the sequence with invalid rank as well.
649  * At some point we encounter our original geom and if we stil fail
650  * to assign it a rank, there must be a loop and we fail back to
651  * g_attach() which detach again and calls redo_rank again
652  * to fix up the damage.
653  * It would be much simpler code wise to do it recursively, but we
654  * can't risk that on the kernel stack.
655  */
656 
657 static int
658 redo_rank(struct g_geom *gp)
659 {
660 	struct g_consumer *cp;
661 	struct g_geom *gp1, *gp2;
662 	int n, m;
663 
664 	g_topology_assert();
665 	G_VALID_GEOM(gp);
666 
667 	/* Invalidate this geoms rank and move it to the tail */
668 	gp1 = TAILQ_NEXT(gp, geoms);
669 	if (gp1 != NULL) {
670 		gp->rank = 0;
671 		TAILQ_REMOVE(&geoms, gp, geoms);
672 		TAILQ_INSERT_TAIL(&geoms, gp, geoms);
673 	} else {
674 		gp1 = gp;
675 	}
676 
677 	/* re-rank the rest of the sequence */
678 	for (; gp1 != NULL; gp1 = gp2) {
679 		gp1->rank = 0;
680 		m = 1;
681 		LIST_FOREACH(cp, &gp1->consumer, consumer) {
682 			if (cp->provider == NULL)
683 				continue;
684 			n = cp->provider->geom->rank;
685 			if (n == 0) {
686 				m = 0;
687 				break;
688 			} else if (n >= m)
689 				m = n + 1;
690 		}
691 		gp1->rank = m;
692 		gp2 = TAILQ_NEXT(gp1, geoms);
693 
694 		/* got a rank, moving on */
695 		if (m != 0)
696 			continue;
697 
698 		/* no rank to original geom means loop */
699 		if (gp == gp1)
700 			return (ELOOP);
701 
702 		/* no rank, put it at the end move on */
703 		TAILQ_REMOVE(&geoms, gp1, geoms);
704 		TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
705 	}
706 	return (0);
707 }
708 
709 int
710 g_attach(struct g_consumer *cp, struct g_provider *pp)
711 {
712 	int error;
713 
714 	g_topology_assert();
715 	G_VALID_CONSUMER(cp);
716 	G_VALID_PROVIDER(pp);
717 	KASSERT(cp->provider == NULL, ("attach but attached"));
718 	cp->provider = pp;
719 	LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
720 	error = redo_rank(cp->geom);
721 	if (error) {
722 		LIST_REMOVE(cp, consumers);
723 		cp->provider = NULL;
724 		redo_rank(cp->geom);
725 	}
726 	return (error);
727 }
728 
729 void
730 g_detach(struct g_consumer *cp)
731 {
732 	struct g_provider *pp;
733 
734 	g_topology_assert();
735 	G_VALID_CONSUMER(cp);
736 	g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp);
737 	KASSERT(cp->provider != NULL, ("detach but not attached"));
738 	KASSERT(cp->acr == 0, ("detach but nonzero acr"));
739 	KASSERT(cp->acw == 0, ("detach but nonzero acw"));
740 	KASSERT(cp->ace == 0, ("detach but nonzero ace"));
741 	KASSERT(cp->nstart == cp->nend,
742 	    ("detach with active requests"));
743 	pp = cp->provider;
744 	LIST_REMOVE(cp, consumers);
745 	cp->provider = NULL;
746 	if (pp->geom->flags & G_GEOM_WITHER)
747 		g_do_wither();
748 	else if (pp->flags & G_PF_WITHER)
749 		g_do_wither();
750 	redo_rank(cp->geom);
751 }
752 
753 /*
754  * g_access()
755  *
756  * Access-check with delta values.  The question asked is "can provider
757  * "cp" change the access counters by the relative amounts dc[rwe] ?"
758  */
759 
760 int
761 g_access(struct g_consumer *cp, int dcr, int dcw, int dce)
762 {
763 	struct g_provider *pp;
764 	int pr,pw,pe;
765 	int error;
766 
767 	g_topology_assert();
768 	G_VALID_CONSUMER(cp);
769 	pp = cp->provider;
770 	KASSERT(pp != NULL, ("access but not attached"));
771 	G_VALID_PROVIDER(pp);
772 
773 	g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)",
774 	    cp, pp->name, dcr, dcw, dce);
775 
776 	KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
777 	KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
778 	KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
779 	KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request"));
780 	KASSERT(pp->geom->access != NULL, ("NULL geom->access"));
781 
782 	/*
783 	 * If our class cares about being spoiled, and we have been, we
784 	 * are probably just ahead of the event telling us that.  Fail
785 	 * now rather than having to unravel this later.
786 	 */
787 	if (cp->geom->spoiled != NULL && cp->spoiled &&
788 	    (dcr > 0 || dcw > 0 || dce > 0))
789 		return (ENXIO);
790 
791 	/*
792 	 * Figure out what counts the provider would have had, if this
793 	 * consumer had (r0w0e0) at this time.
794 	 */
795 	pr = pp->acr - cp->acr;
796 	pw = pp->acw - cp->acw;
797 	pe = pp->ace - cp->ace;
798 
799 	g_trace(G_T_ACCESS,
800     "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
801 	    dcr, dcw, dce,
802 	    cp->acr, cp->acw, cp->ace,
803 	    pp->acr, pp->acw, pp->ace,
804 	    pp, pp->name);
805 
806 	/* If foot-shooting is enabled, any open on rank#1 is OK */
807 	if ((g_debugflags & 16) && pp->geom->rank == 1)
808 		;
809 	/* If we try exclusive but already write: fail */
810 	else if (dce > 0 && pw > 0)
811 		return (EPERM);
812 	/* If we try write but already exclusive: fail */
813 	else if (dcw > 0 && pe > 0)
814 		return (EPERM);
815 	/* If we try to open more but provider is error'ed: fail */
816 	else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0)
817 		return (pp->error);
818 
819 	/* Ok then... */
820 
821 	error = pp->geom->access(pp, dcr, dcw, dce);
822 	KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0,
823 	    ("Geom provider %s::%s failed closing ->access()",
824 	    pp->geom->class->name, pp->name));
825 	if (!error) {
826 		/*
827 		 * If we open first write, spoil any partner consumers.
828 		 * If we close last write and provider is not errored,
829 		 * trigger re-taste.
830 		 */
831 		if (pp->acw == 0 && dcw != 0)
832 			g_spoil(pp, cp);
833 		else if (pp->acw != 0 && pp->acw == -dcw && pp->error == 0 &&
834 		    !(pp->geom->flags & G_GEOM_WITHER))
835 			g_post_event(g_new_provider_event, pp, M_WAITOK,
836 			    pp, NULL);
837 
838 		pp->acr += dcr;
839 		pp->acw += dcw;
840 		pp->ace += dce;
841 		cp->acr += dcr;
842 		cp->acw += dcw;
843 		cp->ace += dce;
844 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)
845 			KASSERT(pp->sectorsize > 0,
846 			    ("Provider %s lacks sectorsize", pp->name));
847 	}
848 	return (error);
849 }
850 
851 int
852 g_handleattr_int(struct bio *bp, const char *attribute, int val)
853 {
854 
855 	return (g_handleattr(bp, attribute, &val, sizeof val));
856 }
857 
858 int
859 g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
860 {
861 
862 	return (g_handleattr(bp, attribute, &val, sizeof val));
863 }
864 
865 int
866 g_handleattr_str(struct bio *bp, const char *attribute, const char *str)
867 {
868 
869 	return (g_handleattr(bp, attribute, str, 0));
870 }
871 
872 int
873 g_handleattr(struct bio *bp, const char *attribute, const void *val, int len)
874 {
875 	int error = 0;
876 
877 	if (strcmp(bp->bio_attribute, attribute))
878 		return (0);
879 	if (len == 0) {
880 		bzero(bp->bio_data, bp->bio_length);
881 		if (strlcpy(bp->bio_data, val, bp->bio_length) >=
882 		    bp->bio_length) {
883 			printf("%s: %s bio_length %jd len %zu -> EFAULT\n",
884 			    __func__, bp->bio_to->name,
885 			    (intmax_t)bp->bio_length, strlen(val));
886 			error = EFAULT;
887 		}
888 	} else if (bp->bio_length == len) {
889 		bcopy(val, bp->bio_data, len);
890 	} else {
891 		printf("%s: %s bio_length %jd len %d -> EFAULT\n", __func__,
892 		    bp->bio_to->name, (intmax_t)bp->bio_length, len);
893 		error = EFAULT;
894 	}
895 	if (error == 0)
896 		bp->bio_completed = bp->bio_length;
897 	g_io_deliver(bp, error);
898 	return (1);
899 }
900 
901 int
902 g_std_access(struct g_provider *pp,
903 	int dr __unused, int dw __unused, int de __unused)
904 {
905 
906 	g_topology_assert();
907 	G_VALID_PROVIDER(pp);
908         return (0);
909 }
910 
911 void
912 g_std_done(struct bio *bp)
913 {
914 	struct bio *bp2;
915 
916 	bp2 = bp->bio_parent;
917 	if (bp2->bio_error == 0)
918 		bp2->bio_error = bp->bio_error;
919 	bp2->bio_completed += bp->bio_completed;
920 	g_destroy_bio(bp);
921 	bp2->bio_inbed++;
922 	if (bp2->bio_children == bp2->bio_inbed)
923 		g_io_deliver(bp2, bp2->bio_error);
924 }
925 
926 /* XXX: maybe this is only g_slice_spoiled */
927 
928 void
929 g_std_spoiled(struct g_consumer *cp)
930 {
931 	struct g_geom *gp;
932 	struct g_provider *pp;
933 
934 	g_topology_assert();
935 	G_VALID_CONSUMER(cp);
936 	g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
937 	g_detach(cp);
938 	gp = cp->geom;
939 	LIST_FOREACH(pp, &gp->provider, provider)
940 		g_orphan_provider(pp, ENXIO);
941 	g_destroy_consumer(cp);
942 	if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
943 		g_destroy_geom(gp);
944 	else
945 		gp->flags |= G_GEOM_WITHER;
946 }
947 
948 /*
949  * Spoiling happens when a provider is opened for writing, but consumers
950  * which are configured by in-band data are attached (slicers for instance).
951  * Since the write might potentially change the in-band data, such consumers
952  * need to re-evaluate their existence after the writing session closes.
953  * We do this by (offering to) tear them down when the open for write happens
954  * in return for a re-taste when it closes again.
955  * Together with the fact that such consumers grab an 'e' bit whenever they
956  * are open, regardless of mode, this ends up DTRT.
957  */
958 
959 static void
960 g_spoil_event(void *arg, int flag)
961 {
962 	struct g_provider *pp;
963 	struct g_consumer *cp, *cp2;
964 
965 	g_topology_assert();
966 	if (flag == EV_CANCEL)
967 		return;
968 	pp = arg;
969 	G_VALID_PROVIDER(pp);
970 	for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) {
971 		cp2 = LIST_NEXT(cp, consumers);
972 		if (!cp->spoiled)
973 			continue;
974 		cp->spoiled = 0;
975 		if (cp->geom->spoiled == NULL)
976 			continue;
977 		cp->geom->spoiled(cp);
978 		g_topology_assert();
979 	}
980 }
981 
982 void
983 g_spoil(struct g_provider *pp, struct g_consumer *cp)
984 {
985 	struct g_consumer *cp2;
986 
987 	g_topology_assert();
988 	G_VALID_PROVIDER(pp);
989 	G_VALID_CONSUMER(cp);
990 
991 	LIST_FOREACH(cp2, &pp->consumers, consumers) {
992 		if (cp2 == cp)
993 			continue;
994 /*
995 		KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
996 		KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
997 */
998 		KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
999 		cp2->spoiled++;
1000 	}
1001 	g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL);
1002 }
1003 
1004 int
1005 g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len)
1006 {
1007 	int error, i;
1008 
1009 	i = len;
1010 	error = g_io_getattr(attr, cp, &i, var);
1011 	if (error)
1012 		return (error);
1013 	if (i != len)
1014 		return (EINVAL);
1015 	return (0);
1016 }
1017 
1018 #if defined(DIAGNOSTIC) || defined(DDB)
1019 /*
1020  * This function walks (topologically unsafely) the mesh and return a
1021  * non-zero integer if it finds the argument pointer is an object.
1022  * The return value indicates which type of object it is belived to be.
1023  * If topology is not locked, this function is potentially dangerous,
1024  * but since it is for debugging purposes and can be useful for instance
1025  * from DDB, we do not assert topology lock is held.
1026  */
1027 int
1028 g_valid_obj(void const *ptr)
1029 {
1030 	struct g_class *mp;
1031 	struct g_geom *gp;
1032 	struct g_consumer *cp;
1033 	struct g_provider *pp;
1034 
1035 	LIST_FOREACH(mp, &g_classes, class) {
1036 		if (ptr == mp)
1037 			return (1);
1038 		LIST_FOREACH(gp, &mp->geom, geom) {
1039 			if (ptr == gp)
1040 				return (2);
1041 			LIST_FOREACH(cp, &gp->consumer, consumer)
1042 				if (ptr == cp)
1043 					return (3);
1044 			LIST_FOREACH(pp, &gp->provider, provider)
1045 				if (ptr == pp)
1046 					return (4);
1047 		}
1048 	}
1049 	return(0);
1050 }
1051 #endif
1052 
1053 #ifdef DDB
1054 
1055 #define	gprintf(...)	do {						\
1056 	printf("%*s", indent, "");					\
1057 	printf(__VA_ARGS__);						\
1058 } while (0)
1059 #define	gprintln(...)	do {						\
1060 	gprintf(__VA_ARGS__);						\
1061 	printf("\n");							\
1062 } while (0)
1063 
1064 #define	ADDFLAG(obj, flag, sflag)	do {				\
1065 	if ((obj)->flags & (flag)) {					\
1066 		if (comma)						\
1067 			strlcat(str, ",", size);			\
1068 		strlcat(str, (sflag), size);				\
1069 		comma = 1;						\
1070 	}								\
1071 } while (0)
1072 
1073 static char *
1074 provider_flags_to_string(struct g_provider *pp, char *str, size_t size)
1075 {
1076 	int comma = 0;
1077 
1078 	bzero(str, size);
1079 	if (pp->flags == 0) {
1080 		strlcpy(str, "NONE", size);
1081 		return (str);
1082 	}
1083 	ADDFLAG(pp, G_PF_CANDELETE, "G_PF_CANDELETE");
1084 	ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER");
1085 	ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN");
1086 	return (str);
1087 }
1088 
1089 static char *
1090 geom_flags_to_string(struct g_geom *gp, char *str, size_t size)
1091 {
1092 	int comma = 0;
1093 
1094 	bzero(str, size);
1095 	if (gp->flags == 0) {
1096 		strlcpy(str, "NONE", size);
1097 		return (str);
1098 	}
1099 	ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER");
1100 	return (str);
1101 }
1102 static void
1103 db_show_geom_consumer(int indent, struct g_consumer *cp)
1104 {
1105 
1106 	if (indent == 0) {
1107 		gprintln("consumer: %p", cp);
1108 		gprintln("  class:    %s (%p)", cp->geom->class->name,
1109 		    cp->geom->class);
1110 		gprintln("  geom:     %s (%p)", cp->geom->name, cp->geom);
1111 		if (cp->provider == NULL)
1112 			gprintln("  provider: none");
1113 		else {
1114 			gprintln("  provider: %s (%p)", cp->provider->name,
1115 			    cp->provider);
1116 		}
1117 		gprintln("  access:   r%dw%de%d", cp->acr, cp->acw, cp->ace);
1118 		gprintln("  spoiled:  %d", cp->spoiled);
1119 		gprintln("  nstart:   %u", cp->nstart);
1120 		gprintln("  nend:     %u", cp->nend);
1121 	} else {
1122 		gprintf("consumer: %p (%s), access=r%dw%de%d", cp,
1123 		    cp->provider != NULL ? cp->provider->name : "none",
1124 		    cp->acr, cp->acw, cp->ace);
1125 		if (cp->spoiled)
1126 			printf(", spoiled=%d", cp->spoiled);
1127 		printf("\n");
1128 	}
1129 }
1130 
1131 static void
1132 db_show_geom_provider(int indent, struct g_provider *pp)
1133 {
1134 	struct g_consumer *cp;
1135 	char flags[64];
1136 
1137 	if (indent == 0) {
1138 		gprintln("provider: %s (%p)", pp->name, pp);
1139 		gprintln("  class:        %s (%p)", pp->geom->class->name,
1140 		    pp->geom->class);
1141 		gprintln("  geom:         %s (%p)", pp->geom->name, pp->geom);
1142 		gprintln("  mediasize:    %jd", (intmax_t)pp->mediasize);
1143 		gprintln("  sectorsize:   %u", pp->sectorsize);
1144 		gprintln("  stripesize:   %u", pp->stripesize);
1145 		gprintln("  stripeoffset: %u", pp->stripeoffset);
1146 		gprintln("  access:       r%dw%de%d", pp->acr, pp->acw,
1147 		    pp->ace);
1148 		gprintln("  flags:        %s (0x%04x)",
1149 		    provider_flags_to_string(pp, flags, sizeof(flags)),
1150 		    pp->flags);
1151 		gprintln("  error:        %d", pp->error);
1152 		gprintln("  nstart:       %u", pp->nstart);
1153 		gprintln("  nend:         %u", pp->nend);
1154 		if (LIST_EMPTY(&pp->consumers))
1155 			gprintln("  consumers:    none");
1156 	} else {
1157 		gprintf("provider: %s (%p), access=r%dw%de%d",
1158 		    pp->name, pp, pp->acr, pp->acw, pp->ace);
1159 		if (pp->flags != 0) {
1160 			printf(", flags=%s (0x%04x)",
1161 			    provider_flags_to_string(pp, flags, sizeof(flags)),
1162 			    pp->flags);
1163 		}
1164 		printf("\n");
1165 	}
1166 	if (!LIST_EMPTY(&pp->consumers)) {
1167 		LIST_FOREACH(cp, &pp->consumers, consumers) {
1168 			db_show_geom_consumer(indent + 2, cp);
1169 			if (db_pager_quit)
1170 				break;
1171 		}
1172 	}
1173 }
1174 
1175 static void
1176 db_show_geom_geom(int indent, struct g_geom *gp)
1177 {
1178 	struct g_provider *pp;
1179 	struct g_consumer *cp;
1180 	char flags[64];
1181 
1182 	if (indent == 0) {
1183 		gprintln("geom: %s (%p)", gp->name, gp);
1184 		gprintln("  class:     %s (%p)", gp->class->name, gp->class);
1185 		gprintln("  flags:     %s (0x%04x)",
1186 		    geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags);
1187 		gprintln("  rank:      %d", gp->rank);
1188 		if (LIST_EMPTY(&gp->provider))
1189 			gprintln("  providers: none");
1190 		if (LIST_EMPTY(&gp->consumer))
1191 			gprintln("  consumers: none");
1192 	} else {
1193 		gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank);
1194 		if (gp->flags != 0) {
1195 			printf(", flags=%s (0x%04x)",
1196 			    geom_flags_to_string(gp, flags, sizeof(flags)),
1197 			    gp->flags);
1198 		}
1199 		printf("\n");
1200 	}
1201 	if (!LIST_EMPTY(&gp->provider)) {
1202 		LIST_FOREACH(pp, &gp->provider, provider) {
1203 			db_show_geom_provider(indent + 2, pp);
1204 			if (db_pager_quit)
1205 				break;
1206 		}
1207 	}
1208 	if (!LIST_EMPTY(&gp->consumer)) {
1209 		LIST_FOREACH(cp, &gp->consumer, consumer) {
1210 			db_show_geom_consumer(indent + 2, cp);
1211 			if (db_pager_quit)
1212 				break;
1213 		}
1214 	}
1215 }
1216 
1217 static void
1218 db_show_geom_class(struct g_class *mp)
1219 {
1220 	struct g_geom *gp;
1221 
1222 	printf("class: %s (%p)\n", mp->name, mp);
1223 	LIST_FOREACH(gp, &mp->geom, geom) {
1224 		db_show_geom_geom(2, gp);
1225 		if (db_pager_quit)
1226 			break;
1227 	}
1228 }
1229 
1230 /*
1231  * Print the GEOM topology or the given object.
1232  */
1233 DB_SHOW_COMMAND(geom, db_show_geom)
1234 {
1235 	struct g_class *mp;
1236 
1237 	if (!have_addr) {
1238 		/* No address given, print the entire topology. */
1239 		LIST_FOREACH(mp, &g_classes, class) {
1240 			db_show_geom_class(mp);
1241 			printf("\n");
1242 			if (db_pager_quit)
1243 				break;
1244 		}
1245 	} else {
1246 		switch (g_valid_obj((void *)addr)) {
1247 		case 1:
1248 			db_show_geom_class((struct g_class *)addr);
1249 			break;
1250 		case 2:
1251 			db_show_geom_geom(0, (struct g_geom *)addr);
1252 			break;
1253 		case 3:
1254 			db_show_geom_consumer(0, (struct g_consumer *)addr);
1255 			break;
1256 		case 4:
1257 			db_show_geom_provider(0, (struct g_provider *)addr);
1258 			break;
1259 		default:
1260 			printf("Not a GEOM object.\n");
1261 			break;
1262 		}
1263 	}
1264 }
1265 
1266 #undef	gprintf
1267 #undef	gprintln
1268 #undef	ADDFLAG
1269 
1270 #endif	/* DDB */
1271