xref: /freebsd/sys/geom/geom_io.c (revision a35d88931c87cfe6bd38f01d7bad22140b3b38f3)
1 /*-
2  * Copyright (c) 2002 Poul-Henning Kamp
3  * Copyright (c) 2002 Networks Associates Technology, Inc.
4  * All rights reserved.
5  *
6  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7  * and NAI Labs, the Security Research Division of Network Associates, Inc.
8  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9  * DARPA CHATS research program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The names of the authors may not be used to endorse or promote
20  *    products derived from this software without specific prior written
21  *    permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/bio.h>
44 #include <sys/ktr.h>
45 
46 #include <sys/errno.h>
47 #include <geom/geom.h>
48 #include <geom/geom_int.h>
49 #include <sys/devicestat.h>
50 
51 #include <vm/uma.h>
52 
53 static struct g_bioq g_bio_run_down;
54 static struct g_bioq g_bio_run_up;
55 static struct g_bioq g_bio_run_task;
56 
57 static u_int pace;
58 static uma_zone_t	biozone;
59 
60 #include <machine/atomic.h>
61 
62 static void
63 g_bioq_lock(struct g_bioq *bq)
64 {
65 
66 	mtx_lock(&bq->bio_queue_lock);
67 }
68 
69 static void
70 g_bioq_unlock(struct g_bioq *bq)
71 {
72 
73 	mtx_unlock(&bq->bio_queue_lock);
74 }
75 
76 #if 0
77 static void
78 g_bioq_destroy(struct g_bioq *bq)
79 {
80 
81 	mtx_destroy(&bq->bio_queue_lock);
82 }
83 #endif
84 
85 static void
86 g_bioq_init(struct g_bioq *bq)
87 {
88 
89 	TAILQ_INIT(&bq->bio_queue);
90 	mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
91 }
92 
93 static struct bio *
94 g_bioq_first(struct g_bioq *bq)
95 {
96 	struct bio *bp;
97 
98 	bp = TAILQ_FIRST(&bq->bio_queue);
99 	if (bp != NULL) {
100 		KASSERT((bp->bio_flags & BIO_ONQUEUE),
101 		    ("Bio not on queue bp=%p target %p", bp, bq));
102 		bp->bio_flags &= ~BIO_ONQUEUE;
103 		TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
104 		bq->bio_queue_length--;
105 	}
106 	return (bp);
107 }
108 
109 struct bio *
110 g_new_bio(void)
111 {
112 	struct bio *bp;
113 
114 	bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
115 	return (bp);
116 }
117 
118 struct bio *
119 g_alloc_bio(void)
120 {
121 	struct bio *bp;
122 
123 	bp = uma_zalloc(biozone, M_WAITOK | M_ZERO);
124 	return (bp);
125 }
126 
127 void
128 g_destroy_bio(struct bio *bp)
129 {
130 
131 	uma_zfree(biozone, bp);
132 }
133 
134 struct bio *
135 g_clone_bio(struct bio *bp)
136 {
137 	struct bio *bp2;
138 
139 	bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
140 	if (bp2 != NULL) {
141 		bp2->bio_parent = bp;
142 		bp2->bio_cmd = bp->bio_cmd;
143 		bp2->bio_length = bp->bio_length;
144 		bp2->bio_offset = bp->bio_offset;
145 		bp2->bio_data = bp->bio_data;
146 		bp2->bio_attribute = bp->bio_attribute;
147 		bp->bio_children++;
148 	}
149 	return(bp2);
150 }
151 
152 void
153 g_io_init()
154 {
155 
156 	g_bioq_init(&g_bio_run_down);
157 	g_bioq_init(&g_bio_run_up);
158 	g_bioq_init(&g_bio_run_task);
159 	biozone = uma_zcreate("g_bio", sizeof (struct bio),
160 	    NULL, NULL,
161 	    NULL, NULL,
162 	    0, 0);
163 }
164 
165 int
166 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
167 {
168 	struct bio *bp;
169 	int error;
170 
171 	g_trace(G_T_BIO, "bio_getattr(%s)", attr);
172 	bp = g_alloc_bio();
173 	bp->bio_cmd = BIO_GETATTR;
174 	bp->bio_done = NULL;
175 	bp->bio_attribute = attr;
176 	bp->bio_length = *len;
177 	bp->bio_data = ptr;
178 	g_io_request(bp, cp);
179 	error = biowait(bp, "ggetattr");
180 	*len = bp->bio_completed;
181 	g_destroy_bio(bp);
182 	return (error);
183 }
184 
185 static int
186 g_io_check(struct bio *bp)
187 {
188 	struct g_consumer *cp;
189 	struct g_provider *pp;
190 
191 	cp = bp->bio_from;
192 	pp = bp->bio_to;
193 
194 	/* Fail if access counters dont allow the operation */
195 	switch(bp->bio_cmd) {
196 	case BIO_READ:
197 	case BIO_GETATTR:
198 		if (cp->acr == 0)
199 			return (EPERM);
200 		break;
201 	case BIO_WRITE:
202 	case BIO_DELETE:
203 		if (cp->acw == 0)
204 			return (EPERM);
205 		break;
206 	default:
207 		return (EPERM);
208 	}
209 	/* if provider is marked for error, don't disturb. */
210 	if (pp->error)
211 		return (pp->error);
212 
213 	switch(bp->bio_cmd) {
214 	case BIO_READ:
215 	case BIO_WRITE:
216 	case BIO_DELETE:
217 		/* Zero sectorsize is a probably lack of media */
218 		if (pp->sectorsize == 0)
219 			return (ENXIO);
220 		/* Reject I/O not on sector boundary */
221 		if (bp->bio_offset % pp->sectorsize)
222 			return (EINVAL);
223 		/* Reject I/O not integral sector long */
224 		if (bp->bio_length % pp->sectorsize)
225 			return (EINVAL);
226 		/* Reject requests before or past the end of media. */
227 		if (bp->bio_offset < 0)
228 			return (EIO);
229 		if (bp->bio_offset > pp->mediasize)
230 			return (EIO);
231 		break;
232 	default:
233 		break;
234 	}
235 	return (0);
236 }
237 
238 void
239 g_io_request(struct bio *bp, struct g_consumer *cp)
240 {
241 	struct g_provider *pp;
242 
243 	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
244 	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
245 	KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request"));
246 	pp = cp->provider;
247 	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
248 
249 	if (bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) {
250 		KASSERT(bp->bio_offset % cp->provider->sectorsize == 0,
251 		    ("wrong offset %jd for sectorsize %u",
252 		    bp->bio_offset, cp->provider->sectorsize));
253 		KASSERT(bp->bio_length % cp->provider->sectorsize == 0,
254 		    ("wrong length %jd for sectorsize %u",
255 		    bp->bio_length, cp->provider->sectorsize));
256 	}
257 
258 	g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
259 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
260 
261 	bp->bio_from = cp;
262 	bp->bio_to = pp;
263 	bp->bio_error = 0;
264 	bp->bio_completed = 0;
265 
266 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
267 	    ("Bio already on queue bp=%p", bp));
268 	bp->bio_flags |= BIO_ONQUEUE;
269 
270 	binuptime(&bp->bio_t0);
271 	if (g_collectstats & 4)
272 		g_bioq_lock(&g_bio_run_down);
273 	if (g_collectstats & 1)
274 		devstat_start_transaction(pp->stat, &bp->bio_t0);
275 	if (g_collectstats & 2)
276 		devstat_start_transaction(cp->stat, &bp->bio_t0);
277 
278 	if (!(g_collectstats & 4))
279 		g_bioq_lock(&g_bio_run_down);
280 	pp->nstart++;
281 	cp->nstart++;
282 	TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
283 	g_bio_run_down.bio_queue_length++;
284 	g_bioq_unlock(&g_bio_run_down);
285 
286 	/* Pass it on down. */
287 	wakeup(&g_wait_down);
288 }
289 
290 void
291 g_io_deliver(struct bio *bp, int error)
292 {
293 	struct g_consumer *cp;
294 	struct g_provider *pp;
295 
296 	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
297 	pp = bp->bio_to;
298 	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
299 	cp = bp->bio_from;
300 	if (cp == NULL) {
301 		bp->bio_error = error;
302 		bp->bio_done(bp);
303 		return;
304 	}
305 	KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
306 	KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
307 	KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0"));
308 	KASSERT(bp->bio_completed <= bp->bio_length,
309 	    ("bio_completed can't be greater than bio_length"));
310 
311 	g_trace(G_T_BIO,
312 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
313 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
314 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
315 
316 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
317 	    ("Bio already on queue bp=%p", bp));
318 
319 	/*
320 	 * XXX: next two doesn't belong here
321 	 */
322 	bp->bio_bcount = bp->bio_length;
323 	bp->bio_resid = bp->bio_bcount - bp->bio_completed;
324 
325 	if (g_collectstats & 4)
326 		g_bioq_lock(&g_bio_run_up);
327 	if (g_collectstats & 1)
328 		devstat_end_transaction_bio(pp->stat, bp);
329 	if (g_collectstats & 2)
330 		devstat_end_transaction_bio(cp->stat, bp);
331 	if (!(g_collectstats & 4))
332 		g_bioq_lock(&g_bio_run_up);
333 	cp->nend++;
334 	pp->nend++;
335 	if (error != ENOMEM) {
336 		bp->bio_error = error;
337 		TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
338 		bp->bio_flags |= BIO_ONQUEUE;
339 		g_bio_run_up.bio_queue_length++;
340 		g_bioq_unlock(&g_bio_run_up);
341 		wakeup(&g_wait_up);
342 		return;
343 	}
344 	g_bioq_unlock(&g_bio_run_up);
345 
346 	if (bootverbose)
347 		printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
348 	bp->bio_children = 0;
349 	bp->bio_inbed = 0;
350 	g_io_request(bp, cp);
351 	pace++;
352 	return;
353 }
354 
355 void
356 g_io_schedule_down(struct thread *tp __unused)
357 {
358 	struct bio *bp;
359 	off_t excess;
360 	int error;
361 #ifdef WITNESS
362 	struct mtx mymutex;
363 
364 	bzero(&mymutex, sizeof mymutex);
365 	mtx_init(&mymutex, "g_xdown", NULL, MTX_DEF);
366 #endif
367 
368 	for(;;) {
369 		g_bioq_lock(&g_bio_run_down);
370 		bp = g_bioq_first(&g_bio_run_down);
371 		if (bp == NULL) {
372 			CTR0(KTR_GEOM, "g_down going to sleep");
373 			msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
374 			    PRIBIO | PDROP, "-", hz/10);
375 			continue;
376 		}
377 		CTR0(KTR_GEOM, "g_down has work to do");
378 		g_bioq_unlock(&g_bio_run_down);
379 		if (pace > 0) {
380 			CTR1(KTR_GEOM, "g_down pacing self (pace %d)", pace);
381 			msleep(&error, NULL, PRIBIO, "g_down", hz/10);
382 			pace--;
383 		}
384 		error = g_io_check(bp);
385 		if (error) {
386 			CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider "
387 			    "%s returned %d", bp, bp->bio_to->name, error);
388 			g_io_deliver(bp, error);
389 			continue;
390 		}
391 		CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
392 		    bp->bio_to->name);
393 		switch (bp->bio_cmd) {
394 		case BIO_READ:
395 		case BIO_WRITE:
396 		case BIO_DELETE:
397 			/* Truncate requests to the end of providers media. */
398 			/*
399 			 * XXX: What if we truncate because of offset being
400 			 * bad, not length?
401 			 */
402 			excess = bp->bio_offset + bp->bio_length;
403 			if (excess > bp->bio_to->mediasize) {
404 				excess -= bp->bio_to->mediasize;
405 				bp->bio_length -= excess;
406 				if (excess > 0)
407 					CTR3(KTR_GEOM, "g_down truncated bio "
408 					    "%p provider %s by %d", bp,
409 					    bp->bio_to->name, excess);
410 			}
411 			/* Deliver zero length transfers right here. */
412 			if (bp->bio_length == 0) {
413 				g_io_deliver(bp, 0);
414 				CTR2(KTR_GEOM, "g_down terminated 0-length "
415 				    "bp %p provider %s", bp, bp->bio_to->name);
416 				continue;
417 			}
418 			break;
419 		default:
420 			break;
421 		}
422 #ifdef WITNESS
423 		mtx_lock(&mymutex);
424 #endif
425 		CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
426 		    "len %ld", bp, bp->bio_to->name, bp->bio_offset,
427 		    bp->bio_length);
428 		bp->bio_to->geom->start(bp);
429 #ifdef WITNESS
430 		mtx_unlock(&mymutex);
431 #endif
432 	}
433 }
434 
435 void
436 bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg)
437 {
438 	bp->bio_task = func;
439 	bp->bio_task_arg = arg;
440 	/*
441 	 * The taskqueue is actually just a second queue off the "up"
442 	 * queue, so we use the same lock.
443 	 */
444 	g_bioq_lock(&g_bio_run_up);
445 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
446 	    ("Bio already on queue bp=%p target taskq", bp));
447 	bp->bio_flags |= BIO_ONQUEUE;
448 	TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue);
449 	g_bio_run_task.bio_queue_length++;
450 	wakeup(&g_wait_up);
451 	g_bioq_unlock(&g_bio_run_up);
452 }
453 
454 
455 void
456 g_io_schedule_up(struct thread *tp __unused)
457 {
458 	struct bio *bp;
459 #ifdef WITNESS
460 	struct mtx mymutex;
461 
462 	bzero(&mymutex, sizeof mymutex);
463 	mtx_init(&mymutex, "g_xup", NULL, MTX_DEF);
464 #endif
465 	for(;;) {
466 		g_bioq_lock(&g_bio_run_up);
467 		bp = g_bioq_first(&g_bio_run_task);
468 		if (bp != NULL) {
469 			g_bioq_unlock(&g_bio_run_up);
470 #ifdef WITNESS
471 			mtx_lock(&mymutex);
472 #endif
473 			CTR1(KTR_GEOM, "g_up processing task bp %p", bp);
474 			bp->bio_task(bp->bio_task_arg);
475 #ifdef WITNESS
476 			mtx_unlock(&mymutex);
477 #endif
478 			continue;
479 		}
480 		bp = g_bioq_first(&g_bio_run_up);
481 		if (bp != NULL) {
482 			g_bioq_unlock(&g_bio_run_up);
483 #ifdef WITNESS
484 			mtx_lock(&mymutex);
485 #endif
486 			CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off "
487 			    "%ld len %ld", bp, bp->bio_to->name,
488 			    bp->bio_offset, bp->bio_length);
489 			biodone(bp);
490 #ifdef WITNESS
491 			mtx_unlock(&mymutex);
492 #endif
493 			continue;
494 		}
495 		CTR0(KTR_GEOM, "g_up going to sleep");
496 		msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
497 		    PRIBIO | PDROP, "-", hz/10);
498 	}
499 }
500 
501 void *
502 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
503 {
504 	struct bio *bp;
505 	void *ptr;
506 	int errorc;
507 
508 	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
509 	    length <= MAXPHYS, ("g_read_data(): invalid length %jd",
510 	    (intmax_t)length));
511 
512 	bp = g_alloc_bio();
513 	bp->bio_cmd = BIO_READ;
514 	bp->bio_done = NULL;
515 	bp->bio_offset = offset;
516 	bp->bio_length = length;
517 	ptr = g_malloc(length, M_WAITOK);
518 	bp->bio_data = ptr;
519 	g_io_request(bp, cp);
520 	errorc = biowait(bp, "gread");
521 	if (error != NULL)
522 		*error = errorc;
523 	g_destroy_bio(bp);
524 	if (errorc) {
525 		g_free(ptr);
526 		ptr = NULL;
527 	}
528 	return (ptr);
529 }
530 
531 int
532 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
533 {
534 	struct bio *bp;
535 	int error;
536 
537 	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
538 	    length <= MAXPHYS, ("g_write_data(): invalid length %jd",
539 	    (intmax_t)length));
540 
541 	bp = g_alloc_bio();
542 	bp->bio_cmd = BIO_WRITE;
543 	bp->bio_done = NULL;
544 	bp->bio_offset = offset;
545 	bp->bio_length = length;
546 	bp->bio_data = ptr;
547 	g_io_request(bp, cp);
548 	error = biowait(bp, "gwrite");
549 	g_destroy_bio(bp);
550 	return (error);
551 }
552 
553 void
554 g_print_bio(struct bio *bp)
555 {
556 	const char *pname, *cmd = NULL;
557 
558 	if (bp->bio_to != NULL)
559 		pname = bp->bio_to->name;
560 	else
561 		pname = "[unknown]";
562 
563 	switch (bp->bio_cmd) {
564 	case BIO_GETATTR:
565 		cmd = "GETATTR";
566 		printf("%s[%s(attr=%s)]", pname, cmd, bp->bio_attribute);
567 		return;
568 	case BIO_READ:
569 		cmd = "READ";
570 	case BIO_WRITE:
571 		if (cmd == NULL)
572 			cmd = "WRITE";
573 	case BIO_DELETE:
574 		if (cmd == NULL)
575 			cmd = "DELETE";
576 		printf("%s[%s(offset=%jd, length=%jd)]", pname, cmd,
577 		    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
578 		return;
579 	default:
580 		cmd = "UNKNOWN";
581 		printf("%s[%s()]", pname, cmd);
582 		return;
583 	}
584 	/* NOTREACHED */
585 }
586