xref: /freebsd/sys/geom/geom_io.c (revision f9218d3d4fd34f082473b3a021c6d4d109fb47cf)
1 /*-
2  * Copyright (c) 2002 Poul-Henning Kamp
3  * Copyright (c) 2002 Networks Associates Technology, Inc.
4  * All rights reserved.
5  *
6  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7  * and NAI Labs, the Security Research Division of Network Associates, Inc.
8  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9  * DARPA CHATS research program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The names of the authors may not be used to endorse or promote
20  *    products derived from this software without specific prior written
21  *    permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $FreeBSD$
36  */
37 
38 
39 #include <sys/param.h>
40 #include <sys/stdint.h>
41 #ifndef _KERNEL
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <signal.h>
46 #include <err.h>
47 #include <sched.h>
48 #else
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/malloc.h>
52 #include <sys/bio.h>
53 #endif
54 
55 #include <sys/errno.h>
56 #include <geom/geom.h>
57 #include <geom/geom_int.h>
58 #include <geom/geom_stats.h>
59 
60 static struct g_bioq g_bio_run_down;
61 static struct g_bioq g_bio_run_up;
62 static struct g_bioq g_bio_run_task;
63 static struct g_bioq g_bio_idle;
64 
65 static u_int pace;
66 
67 #include <machine/atomic.h>
68 
69 static void
70 g_bioq_lock(struct g_bioq *bq)
71 {
72 
73 	mtx_lock(&bq->bio_queue_lock);
74 }
75 
76 static void
77 g_bioq_unlock(struct g_bioq *bq)
78 {
79 
80 	mtx_unlock(&bq->bio_queue_lock);
81 }
82 
83 #if 0
84 static void
85 g_bioq_destroy(struct g_bioq *bq)
86 {
87 
88 	mtx_destroy(&bq->bio_queue_lock);
89 }
90 #endif
91 
92 static void
93 g_bioq_init(struct g_bioq *bq)
94 {
95 
96 	TAILQ_INIT(&bq->bio_queue);
97 	mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
98 }
99 
100 static struct bio *
101 g_bioq_first(struct g_bioq *bq)
102 {
103 	struct bio *bp;
104 
105 	bp = TAILQ_FIRST(&bq->bio_queue);
106 	if (bp != NULL) {
107 		TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
108 		bq->bio_queue_length--;
109 	}
110 	return (bp);
111 }
112 
113 static void
114 g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq)
115 {
116 
117 	g_bioq_lock(rq);
118 	TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue);
119 	rq->bio_queue_length++;
120 	g_bioq_unlock(rq);
121 }
122 
123 struct bio *
124 g_new_bio(void)
125 {
126 	struct bio *bp;
127 
128 	g_bioq_lock(&g_bio_idle);
129 	bp = g_bioq_first(&g_bio_idle);
130 	g_bioq_unlock(&g_bio_idle);
131 	if (bp == NULL)
132 		bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO);
133 	/* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */
134 	return (bp);
135 }
136 
137 void
138 g_destroy_bio(struct bio *bp)
139 {
140 
141 	/* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */
142 	bzero(bp, sizeof *bp);
143 	g_bioq_enqueue_tail(bp, &g_bio_idle);
144 }
145 
146 struct bio *
147 g_clone_bio(struct bio *bp)
148 {
149 	struct bio *bp2;
150 
151 	bp2 = g_new_bio();
152 	if (bp2 != NULL) {
153 		bp2->bio_parent = bp;
154 		bp2->bio_cmd = bp->bio_cmd;
155 		bp2->bio_length = bp->bio_length;
156 		bp2->bio_offset = bp->bio_offset;
157 		bp2->bio_data = bp->bio_data;
158 		bp2->bio_attribute = bp->bio_attribute;
159 		bp->bio_children++;
160 	}
161 	/* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */
162 	return(bp2);
163 }
164 
165 void
166 g_io_init()
167 {
168 
169 	g_bioq_init(&g_bio_run_down);
170 	g_bioq_init(&g_bio_run_up);
171 	g_bioq_init(&g_bio_run_task);
172 	g_bioq_init(&g_bio_idle);
173 }
174 
175 int
176 g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr)
177 {
178 	struct bio *bp;
179 	int error;
180 
181 	g_trace(G_T_BIO, "bio_setattr(%s)", attr);
182 	bp = g_new_bio();
183 	bp->bio_cmd = BIO_SETATTR;
184 	bp->bio_done = NULL;
185 	bp->bio_attribute = attr;
186 	bp->bio_length = len;
187 	bp->bio_data = ptr;
188 	g_io_request(bp, cp);
189 	error = biowait(bp, "gsetattr");
190 	g_destroy_bio(bp);
191 	return (error);
192 }
193 
194 
195 int
196 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
197 {
198 	struct bio *bp;
199 	int error;
200 
201 	g_trace(G_T_BIO, "bio_getattr(%s)", attr);
202 	bp = g_new_bio();
203 	bp->bio_cmd = BIO_GETATTR;
204 	bp->bio_done = NULL;
205 	bp->bio_attribute = attr;
206 	bp->bio_length = *len;
207 	bp->bio_data = ptr;
208 	g_io_request(bp, cp);
209 	error = biowait(bp, "ggetattr");
210 	*len = bp->bio_completed;
211 	g_destroy_bio(bp);
212 	return (error);
213 }
214 
215 static int
216 g_io_check(struct bio *bp)
217 {
218 	struct g_consumer *cp;
219 	struct g_provider *pp;
220 
221 	cp = bp->bio_from;
222 	pp = bp->bio_to;
223 
224 	/* Fail if access counters dont allow the operation */
225 	switch(bp->bio_cmd) {
226 	case BIO_READ:
227 	case BIO_GETATTR:
228 		if (cp->acr == 0)
229 			return (EPERM);
230 		break;
231 	case BIO_WRITE:
232 	case BIO_DELETE:
233 	case BIO_SETATTR:
234 		if (cp->acw == 0)
235 			return (EPERM);
236 		break;
237 	default:
238 		return (EPERM);
239 	}
240 	/* if provider is marked for error, don't disturb. */
241 	if (pp->error)
242 		return (pp->error);
243 
244 	switch(bp->bio_cmd) {
245 	case BIO_READ:
246 	case BIO_WRITE:
247 	case BIO_DELETE:
248 		/* Reject I/O not on sector boundary */
249 		if (bp->bio_offset % pp->sectorsize)
250 			return (EINVAL);
251 		/* Reject I/O not integral sector long */
252 		if (bp->bio_length % pp->sectorsize)
253 			return (EINVAL);
254 		/* Reject requests past the end of media. */
255 		if (bp->bio_offset > pp->mediasize)
256 			return (EIO);
257 		break;
258 	default:
259 		break;
260 	}
261 	return (0);
262 }
263 
264 void
265 g_io_request(struct bio *bp, struct g_consumer *cp)
266 {
267 	struct g_provider *pp;
268 	struct bintime bt;
269 
270 	pp = cp->provider;
271 	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
272 	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
273 	KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request"));
274 	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
275 
276 	bp->bio_from = cp;
277 	bp->bio_to = pp;
278 	bp->bio_error = 0;
279 	bp->bio_completed = 0;
280 
281 	if (g_collectstats) {
282 		binuptime(&bt);
283 		bp->bio_t0 = bt;
284 		if (cp->stat->nop == cp->stat->nend)
285 			cp->stat->wentbusy = bt; /* Consumer is idle */
286 		if (pp->stat->nop == pp->stat->nend)
287 			pp->stat->wentbusy = bt; /* Provider is idle */
288 	}
289 	cp->stat->nop++;
290 	pp->stat->nop++;
291 
292 	/* Pass it on down. */
293 	g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
294 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
295 	g_bioq_enqueue_tail(bp, &g_bio_run_down);
296 	wakeup(&g_wait_down);
297 }
298 
299 void
300 g_io_deliver(struct bio *bp, int error)
301 {
302 	struct g_consumer *cp;
303 	struct g_provider *pp;
304 	struct bintime t1, dt;
305 	int idx;
306 
307 	cp = bp->bio_from;
308 	pp = bp->bio_to;
309 	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
310 	KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
311 	KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
312 	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
313 
314 	g_trace(G_T_BIO,
315 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
316 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
317 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
318 
319 	if (g_collectstats) {
320 		switch (bp->bio_cmd) {
321 		case BIO_READ:    idx =  G_STAT_IDX_READ;    break;
322 		case BIO_WRITE:   idx =  G_STAT_IDX_WRITE;   break;
323 		case BIO_DELETE:  idx =  G_STAT_IDX_DELETE;  break;
324 		case BIO_GETATTR: idx =  -1; break;
325 		case BIO_SETATTR: idx =  -1; break;
326 		default:
327 			panic("unknown bio_cmd in g_io_deliver");
328 			break;
329 		}
330 		binuptime(&t1);
331 		/* Raise the "inconsistent" flag for userland */
332 		atomic_add_acq_int(&cp->stat->seq0, 1);
333 		atomic_add_acq_int(&pp->stat->seq0, 1);
334 		if (idx >= 0) {
335 			/* Account the service time */
336 			dt = t1;
337 			bintime_sub(&dt, &bp->bio_t0);
338 			bintime_add(&cp->stat->ops[idx].dt, &dt);
339 			bintime_add(&pp->stat->ops[idx].dt, &dt);
340 			/* ... and the metrics */
341 			pp->stat->ops[idx].nbyte += bp->bio_completed;
342 			cp->stat->ops[idx].nbyte += bp->bio_completed;
343 			pp->stat->ops[idx].nop++;
344 			cp->stat->ops[idx].nop++;
345 			/* ... and any errors */
346 			if (error == ENOMEM) {
347 				cp->stat->ops[idx].nmem++;
348 				pp->stat->ops[idx].nmem++;
349 			} else if (error != 0) {
350 				cp->stat->ops[idx].nerr++;
351 				pp->stat->ops[idx].nerr++;
352 			}
353 		}
354 		/* Account for busy time on the consumer */
355 		dt = t1;
356 		bintime_sub(&dt, &cp->stat->wentbusy);
357 		bintime_add(&cp->stat->bt, &dt);
358 		cp->stat->wentbusy = t1;
359 		/* Account for busy time on the provider */
360 		dt = t1;
361 		bintime_sub(&dt, &pp->stat->wentbusy);
362 		bintime_add(&pp->stat->bt, &dt);
363 		pp->stat->wentbusy = t1;
364 		/* Mark the structures as consistent again */
365 		atomic_add_acq_int(&cp->stat->seq1, 1);
366 		atomic_add_acq_int(&pp->stat->seq1, 1);
367 	}
368 	cp->stat->nend++;
369 	pp->stat->nend++;
370 
371 	if (error == ENOMEM) {
372 		printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
373 		g_io_request(bp, cp);
374 		pace++;
375 		return;
376 	}
377 	bp->bio_error = error;
378 	g_bioq_enqueue_tail(bp, &g_bio_run_up);
379 	wakeup(&g_wait_up);
380 }
381 
382 void
383 g_io_schedule_down(struct thread *tp __unused)
384 {
385 	struct bio *bp;
386 	off_t excess;
387 	int error;
388 	struct mtx mymutex;
389 
390 	bzero(&mymutex, sizeof mymutex);
391 	mtx_init(&mymutex, "g_xdown", MTX_DEF, 0);
392 
393 	for(;;) {
394 		g_bioq_lock(&g_bio_run_down);
395 		bp = g_bioq_first(&g_bio_run_down);
396 		if (bp == NULL) {
397 			msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
398 			    PRIBIO | PDROP, "g_down", hz/10);
399 			continue;
400 		}
401 		g_bioq_unlock(&g_bio_run_down);
402 		error = g_io_check(bp);
403 		if (error) {
404 			g_io_deliver(bp, error);
405 			continue;
406 		}
407 		switch (bp->bio_cmd) {
408 		case BIO_READ:
409 		case BIO_WRITE:
410 		case BIO_DELETE:
411 			/* Truncate requests to the end of providers media. */
412 			excess = bp->bio_offset + bp->bio_length;
413 			if (excess > bp->bio_to->mediasize) {
414 				excess -= bp->bio_to->mediasize;
415 				bp->bio_length -= excess;
416 			}
417 			/* Deliver zero length transfers right here. */
418 			if (bp->bio_length == 0) {
419 				g_io_deliver(bp, 0);
420 				continue;
421 			}
422 			break;
423 		default:
424 			break;
425 		}
426 		mtx_lock(&mymutex);
427 		bp->bio_to->geom->start(bp);
428 		mtx_unlock(&mymutex);
429 		if (pace) {
430 			pace--;
431 			break;
432 		}
433 	}
434 }
435 
436 void
437 bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg)
438 {
439 	bp->bio_task = func;
440 	bp->bio_task_arg = arg;
441 	/*
442 	 * The taskqueue is actually just a second queue off the "up"
443 	 * queue, so we use the same lock.
444 	 */
445 	g_bioq_lock(&g_bio_run_up);
446 	TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue);
447 	g_bio_run_task.bio_queue_length++;
448 	wakeup(&g_wait_up);
449 	g_bioq_unlock(&g_bio_run_up);
450 }
451 
452 
453 void
454 g_io_schedule_up(struct thread *tp __unused)
455 {
456 	struct bio *bp;
457 	struct mtx mymutex;
458 
459 	bzero(&mymutex, sizeof mymutex);
460 	mtx_init(&mymutex, "g_xup", MTX_DEF, 0);
461 	for(;;) {
462 		g_bioq_lock(&g_bio_run_up);
463 		bp = g_bioq_first(&g_bio_run_task);
464 		if (bp != NULL) {
465 			g_bioq_unlock(&g_bio_run_up);
466 			mtx_lock(&mymutex);
467 			bp->bio_task(bp, bp->bio_task_arg);
468 			mtx_unlock(&mymutex);
469 			continue;
470 		}
471 		bp = g_bioq_first(&g_bio_run_up);
472 		if (bp != NULL) {
473 			g_bioq_unlock(&g_bio_run_up);
474 			mtx_lock(&mymutex);
475 			biodone(bp);
476 			mtx_unlock(&mymutex);
477 			continue;
478 		}
479 		msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
480 		    PRIBIO | PDROP, "g_up", hz/10);
481 	}
482 }
483 
484 void *
485 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
486 {
487 	struct bio *bp;
488 	void *ptr;
489 	int errorc;
490 
491 	bp = g_new_bio();
492 	bp->bio_cmd = BIO_READ;
493 	bp->bio_done = NULL;
494 	bp->bio_offset = offset;
495 	bp->bio_length = length;
496 	ptr = g_malloc(length, M_WAITOK);
497 	bp->bio_data = ptr;
498 	g_io_request(bp, cp);
499 	errorc = biowait(bp, "gread");
500 	if (error != NULL)
501 		*error = errorc;
502 	g_destroy_bio(bp);
503 	if (errorc) {
504 		g_free(ptr);
505 		ptr = NULL;
506 	}
507 	return (ptr);
508 }
509 
510 int
511 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
512 {
513 	struct bio *bp;
514 	int error;
515 
516 	bp = g_new_bio();
517 	bp->bio_cmd = BIO_WRITE;
518 	bp->bio_done = NULL;
519 	bp->bio_offset = offset;
520 	bp->bio_length = length;
521 	bp->bio_data = ptr;
522 	g_io_request(bp, cp);
523 	error = biowait(bp, "gwrite");
524 	g_destroy_bio(bp);
525 	return (error);
526 }
527