xref: /freebsd/sys/geom/geom_io.c (revision 729362425c09cf6b362366aabc6fb547eee8035a)
1 /*-
2  * Copyright (c) 2002 Poul-Henning Kamp
3  * Copyright (c) 2002 Networks Associates Technology, Inc.
4  * All rights reserved.
5  *
6  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7  * and NAI Labs, the Security Research Division of Network Associates, Inc.
8  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9  * DARPA CHATS research program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The names of the authors may not be used to endorse or promote
20  *    products derived from this software without specific prior written
21  *    permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $FreeBSD$
36  */
37 
38 
39 #include <sys/param.h>
40 #ifndef _KERNEL
41 #include <stdio.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <signal.h>
45 #include <err.h>
46 #include <sched.h>
47 #else
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/malloc.h>
51 #include <sys/bio.h>
52 #endif
53 
54 #include <sys/errno.h>
55 #include <geom/geom.h>
56 #include <geom/geom_int.h>
57 #include <sys/devicestat.h>
58 
59 static struct g_bioq g_bio_run_down;
60 static struct g_bioq g_bio_run_up;
61 static struct g_bioq g_bio_run_task;
62 static struct g_bioq g_bio_idle;
63 
64 static u_int pace;
65 
66 #include <machine/atomic.h>
67 
68 static void
69 g_bioq_lock(struct g_bioq *bq)
70 {
71 
72 	mtx_lock(&bq->bio_queue_lock);
73 }
74 
75 static void
76 g_bioq_unlock(struct g_bioq *bq)
77 {
78 
79 	mtx_unlock(&bq->bio_queue_lock);
80 }
81 
82 #if 0
83 static void
84 g_bioq_destroy(struct g_bioq *bq)
85 {
86 
87 	mtx_destroy(&bq->bio_queue_lock);
88 }
89 #endif
90 
91 static void
92 g_bioq_init(struct g_bioq *bq)
93 {
94 
95 	TAILQ_INIT(&bq->bio_queue);
96 	mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
97 }
98 
99 static struct bio *
100 g_bioq_first(struct g_bioq *bq)
101 {
102 	struct bio *bp;
103 
104 	bp = TAILQ_FIRST(&bq->bio_queue);
105 	if (bp != NULL) {
106 		TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
107 		bq->bio_queue_length--;
108 	}
109 	return (bp);
110 }
111 
112 static void
113 g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq)
114 {
115 
116 	g_bioq_lock(rq);
117 	TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue);
118 	rq->bio_queue_length++;
119 	g_bioq_unlock(rq);
120 }
121 
122 struct bio *
123 g_new_bio(void)
124 {
125 	struct bio *bp;
126 
127 	g_bioq_lock(&g_bio_idle);
128 	bp = g_bioq_first(&g_bio_idle);
129 	g_bioq_unlock(&g_bio_idle);
130 	if (bp == NULL)
131 		bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO);
132 	/* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */
133 	return (bp);
134 }
135 
136 void
137 g_destroy_bio(struct bio *bp)
138 {
139 
140 	/* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */
141 	bzero(bp, sizeof *bp);
142 	g_bioq_enqueue_tail(bp, &g_bio_idle);
143 }
144 
145 struct bio *
146 g_clone_bio(struct bio *bp)
147 {
148 	struct bio *bp2;
149 
150 	bp2 = g_new_bio();
151 	if (bp2 != NULL) {
152 		bp2->bio_parent = bp;
153 		bp2->bio_cmd = bp->bio_cmd;
154 		bp2->bio_length = bp->bio_length;
155 		bp2->bio_offset = bp->bio_offset;
156 		bp2->bio_data = bp->bio_data;
157 		bp2->bio_attribute = bp->bio_attribute;
158 		bp->bio_children++;
159 	}
160 	/* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */
161 	return(bp2);
162 }
163 
164 void
165 g_io_init()
166 {
167 
168 	g_bioq_init(&g_bio_run_down);
169 	g_bioq_init(&g_bio_run_up);
170 	g_bioq_init(&g_bio_run_task);
171 	g_bioq_init(&g_bio_idle);
172 }
173 
174 int
175 g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr)
176 {
177 	struct bio *bp;
178 	int error;
179 
180 	g_trace(G_T_BIO, "bio_setattr(%s)", attr);
181 	bp = g_new_bio();
182 	bp->bio_cmd = BIO_SETATTR;
183 	bp->bio_done = NULL;
184 	bp->bio_attribute = attr;
185 	bp->bio_length = len;
186 	bp->bio_data = ptr;
187 	g_io_request(bp, cp);
188 	error = biowait(bp, "gsetattr");
189 	g_destroy_bio(bp);
190 	return (error);
191 }
192 
193 
194 int
195 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
196 {
197 	struct bio *bp;
198 	int error;
199 
200 	g_trace(G_T_BIO, "bio_getattr(%s)", attr);
201 	bp = g_new_bio();
202 	bp->bio_cmd = BIO_GETATTR;
203 	bp->bio_done = NULL;
204 	bp->bio_attribute = attr;
205 	bp->bio_length = *len;
206 	bp->bio_data = ptr;
207 	g_io_request(bp, cp);
208 	error = biowait(bp, "ggetattr");
209 	*len = bp->bio_completed;
210 	g_destroy_bio(bp);
211 	return (error);
212 }
213 
214 static int
215 g_io_check(struct bio *bp)
216 {
217 	struct g_consumer *cp;
218 	struct g_provider *pp;
219 
220 	cp = bp->bio_from;
221 	pp = bp->bio_to;
222 
223 	/* Fail if access counters dont allow the operation */
224 	switch(bp->bio_cmd) {
225 	case BIO_READ:
226 	case BIO_GETATTR:
227 		if (cp->acr == 0)
228 			return (EPERM);
229 		break;
230 	case BIO_WRITE:
231 	case BIO_DELETE:
232 	case BIO_SETATTR:
233 		if (cp->acw == 0)
234 			return (EPERM);
235 		break;
236 	default:
237 		return (EPERM);
238 	}
239 	/* if provider is marked for error, don't disturb. */
240 	if (pp->error)
241 		return (pp->error);
242 
243 	switch(bp->bio_cmd) {
244 	case BIO_READ:
245 	case BIO_WRITE:
246 	case BIO_DELETE:
247 		/* Reject I/O not on sector boundary */
248 		if (bp->bio_offset % pp->sectorsize)
249 			return (EINVAL);
250 		/* Reject I/O not integral sector long */
251 		if (bp->bio_length % pp->sectorsize)
252 			return (EINVAL);
253 		/* Reject requests past the end of media. */
254 		if (bp->bio_offset > pp->mediasize)
255 			return (EIO);
256 		break;
257 	default:
258 		break;
259 	}
260 	return (0);
261 }
262 
263 void
264 g_io_request(struct bio *bp, struct g_consumer *cp)
265 {
266 	struct g_provider *pp;
267 
268 	pp = cp->provider;
269 	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
270 	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
271 	KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request"));
272 	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
273 
274 	bp->bio_from = cp;
275 	bp->bio_to = pp;
276 	bp->bio_error = 0;
277 	bp->bio_completed = 0;
278 
279 	if (g_collectstats) {
280 		devstat_start_transaction_bio(cp->stat, bp);
281 		devstat_start_transaction_bio(pp->stat, bp);
282 	}
283 	cp->nstart++;
284 	pp->nstart++;
285 
286 	/* Pass it on down. */
287 	g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
288 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
289 	g_bioq_enqueue_tail(bp, &g_bio_run_down);
290 	wakeup(&g_wait_down);
291 }
292 
293 void
294 g_io_deliver(struct bio *bp, int error)
295 {
296 	struct g_consumer *cp;
297 	struct g_provider *pp;
298 
299 	cp = bp->bio_from;
300 	pp = bp->bio_to;
301 	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
302 	KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
303 	KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
304 	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
305 
306 	g_trace(G_T_BIO,
307 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
308 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
309 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
310 
311 	bp->bio_bcount = bp->bio_length;
312 	if (g_collectstats) {
313 		bp->bio_resid = bp->bio_bcount - bp->bio_completed;
314 		devstat_end_transaction_bio(cp->stat, bp);
315 		devstat_end_transaction_bio(pp->stat, bp);
316 	}
317 	cp->nend++;
318 	pp->nend++;
319 
320 	if (error == ENOMEM) {
321 		printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
322 		g_io_request(bp, cp);
323 		pace++;
324 		return;
325 	}
326 	bp->bio_error = error;
327 	g_bioq_enqueue_tail(bp, &g_bio_run_up);
328 	wakeup(&g_wait_up);
329 }
330 
331 void
332 g_io_schedule_down(struct thread *tp __unused)
333 {
334 	struct bio *bp;
335 	off_t excess;
336 	int error;
337 	struct mtx mymutex;
338 
339 	bzero(&mymutex, sizeof mymutex);
340 	mtx_init(&mymutex, "g_xdown", MTX_DEF, 0);
341 
342 	for(;;) {
343 		g_bioq_lock(&g_bio_run_down);
344 		bp = g_bioq_first(&g_bio_run_down);
345 		if (bp == NULL) {
346 			msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
347 			    PRIBIO | PDROP, "g_down", hz/10);
348 			continue;
349 		}
350 		g_bioq_unlock(&g_bio_run_down);
351 		error = g_io_check(bp);
352 		if (error) {
353 			g_io_deliver(bp, error);
354 			continue;
355 		}
356 		switch (bp->bio_cmd) {
357 		case BIO_READ:
358 		case BIO_WRITE:
359 		case BIO_DELETE:
360 			/* Truncate requests to the end of providers media. */
361 			excess = bp->bio_offset + bp->bio_length;
362 			if (excess > bp->bio_to->mediasize) {
363 				excess -= bp->bio_to->mediasize;
364 				bp->bio_length -= excess;
365 			}
366 			/* Deliver zero length transfers right here. */
367 			if (bp->bio_length == 0) {
368 				g_io_deliver(bp, 0);
369 				continue;
370 			}
371 			break;
372 		default:
373 			break;
374 		}
375 		mtx_lock(&mymutex);
376 		bp->bio_to->geom->start(bp);
377 		mtx_unlock(&mymutex);
378 		if (pace) {
379 			pace--;
380 			break;
381 		}
382 	}
383 }
384 
385 void
386 bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg)
387 {
388 	bp->bio_task = func;
389 	bp->bio_task_arg = arg;
390 	/*
391 	 * The taskqueue is actually just a second queue off the "up"
392 	 * queue, so we use the same lock.
393 	 */
394 	g_bioq_lock(&g_bio_run_up);
395 	TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue);
396 	g_bio_run_task.bio_queue_length++;
397 	wakeup(&g_wait_up);
398 	g_bioq_unlock(&g_bio_run_up);
399 }
400 
401 
402 void
403 g_io_schedule_up(struct thread *tp __unused)
404 {
405 	struct bio *bp;
406 	struct mtx mymutex;
407 
408 	bzero(&mymutex, sizeof mymutex);
409 	mtx_init(&mymutex, "g_xup", MTX_DEF, 0);
410 	for(;;) {
411 		g_bioq_lock(&g_bio_run_up);
412 		bp = g_bioq_first(&g_bio_run_task);
413 		if (bp != NULL) {
414 			g_bioq_unlock(&g_bio_run_up);
415 			mtx_lock(&mymutex);
416 			bp->bio_task(bp, bp->bio_task_arg);
417 			mtx_unlock(&mymutex);
418 			continue;
419 		}
420 		bp = g_bioq_first(&g_bio_run_up);
421 		if (bp != NULL) {
422 			g_bioq_unlock(&g_bio_run_up);
423 			mtx_lock(&mymutex);
424 			biodone(bp);
425 			mtx_unlock(&mymutex);
426 			continue;
427 		}
428 		msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
429 		    PRIBIO | PDROP, "g_up", hz/10);
430 	}
431 }
432 
433 void *
434 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
435 {
436 	struct bio *bp;
437 	void *ptr;
438 	int errorc;
439 
440 	bp = g_new_bio();
441 	bp->bio_cmd = BIO_READ;
442 	bp->bio_done = NULL;
443 	bp->bio_offset = offset;
444 	bp->bio_length = length;
445 	ptr = g_malloc(length, M_WAITOK);
446 	bp->bio_data = ptr;
447 	g_io_request(bp, cp);
448 	errorc = biowait(bp, "gread");
449 	if (error != NULL)
450 		*error = errorc;
451 	g_destroy_bio(bp);
452 	if (errorc) {
453 		g_free(ptr);
454 		ptr = NULL;
455 	}
456 	return (ptr);
457 }
458 
459 int
460 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
461 {
462 	struct bio *bp;
463 	int error;
464 
465 	bp = g_new_bio();
466 	bp->bio_cmd = BIO_WRITE;
467 	bp->bio_done = NULL;
468 	bp->bio_offset = offset;
469 	bp->bio_length = length;
470 	bp->bio_data = ptr;
471 	g_io_request(bp, cp);
472 	error = biowait(bp, "gwrite");
473 	g_destroy_bio(bp);
474 	return (error);
475 }
476