xref: /illumos-gate/usr/src/uts/common/fs/fdbuffer.c (revision 2e837a72011f54762249b6612c2a64f171efcd43)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1998,2001 by Sun Microsystems, Inc.
24  * All rights reserved.
25  *
26  */
27 
28 #include <sys/types.h>
29 #include <sys/cmn_err.h>
30 #include <sys/kmem.h>
31 #include <sys/systm.h>
32 #include <sys/debug.h>
33 #include <sys/ddi.h>
34 
35 #include <sys/fdbuffer.h>
36 
37 #ifdef DEBUG
38 static int fdb_debug;
39 #define	FDB_D_CREATE	001
40 #define	FDB_D_ALLOC	002
41 #define	FDB_D_IO	004
42 #define	FDB_D_ASYNC	010
43 #define	DEBUGF(lvl, args)	{ if ((lvl) & fdb_debug) cmn_err args; }
44 #else
45 #define	DEBUGF(level, args)
46 #endif
47 static struct kmem_cache *fdb_cache;
48 static void fdb_zero_holes(fdbuffer_t *fdb);
49 
50 /* ARGSUSED */
51 static int
52 fdb_cache_constructor(void *buf, void *cdrarg, int kmflags)
53 {
54 	fdbuffer_t *fdb = buf;
55 
56 	mutex_init(&fdb->fd_mutex, NULL, MUTEX_DEFAULT, NULL);
57 
58 	return (0);
59 }
60 
61 /* ARGSUSED */
62 static void
63 fdb_cache_destructor(void *buf, void *cdrarg)
64 {
65 	fdbuffer_t *fdb = buf;
66 
67 	mutex_destroy(&fdb->fd_mutex);
68 }
69 
70 void
71 fdb_init()
72 {
73 	fdb_cache = kmem_cache_create("fdb_cache", sizeof (fdbuffer_t),
74 	    0, fdb_cache_constructor, fdb_cache_destructor,
75 	    NULL, NULL, NULL, 0);
76 }
77 
78 static void
79 fdb_prepare(fdbuffer_t *fdb)
80 {
81 	fdb->fd_holes = NULL;
82 	fdb->fd_iofunc = NULL;
83 	fdb->fd_iargp = NULL;
84 	fdb->fd_parentbp = NULL;
85 	fdb->fd_resid = 0;
86 	fdb->fd_iocount = 0;
87 	fdb->fd_iodispatch = 0;
88 	fdb->fd_err = 0;
89 }
90 
91 fdbuffer_t *
92 fdb_page_create(page_t *pp, size_t len, int flags)
93 {
94 	fdbuffer_t *fdb;
95 
96 	DEBUGF(FDB_D_CREATE, (CE_NOTE,
97 	    "?fdb_page_create: pp: %p len: %lux flags: %x",
98 	    (void *)pp, len, flags));
99 
100 	ASSERT(flags & (FDB_READ|FDB_WRITE));
101 
102 	fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
103 
104 	fdb_prepare(fdb);
105 
106 	fdb->fd_type = FDB_PAGEIO;
107 	fdb->fd_len = len;
108 	fdb->fd_state = flags;
109 	fdb->fd_pages = pp;
110 
111 	return (fdb);
112 }
113 
114 fdbuffer_t *
115 fdb_addr_create(
116 	caddr_t addr,
117 	size_t len,
118 	int flags,
119 	page_t **pplist,
120 	struct proc *procp)
121 {
122 	fdbuffer_t *fdb;
123 
124 	DEBUGF(FDB_D_CREATE, (CE_NOTE,
125 	    "?fdb_addr_create: addr: %p len: %lux flags: %x",
126 	    (void *)addr, len, flags));
127 
128 	ASSERT(flags & (FDB_READ|FDB_WRITE));
129 
130 	fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
131 
132 	fdb_prepare(fdb);
133 
134 	fdb->fd_type = FDB_VADDR;
135 	fdb->fd_len = len;
136 	fdb->fd_state = flags;
137 	fdb->fd_addr = addr;
138 	fdb->fd_shadow = pplist;
139 	fdb->fd_procp = procp;
140 
141 	return (fdb);
142 }
143 
144 void
145 fdb_set_iofunc(fdbuffer_t *fdb, fdb_iodone_t iofunc, void *ioargp, int flag)
146 {
147 	ASSERT(fdb);
148 	ASSERT(iofunc);
149 	ASSERT((flag & ~FDB_ICALLBACK) == 0);
150 
151 	fdb->fd_iofunc = iofunc;
152 	fdb->fd_iargp = ioargp;
153 
154 	mutex_enter(&fdb->fd_mutex);
155 
156 	if (flag & FDB_ICALLBACK)
157 		fdb->fd_state |= FDB_ICALLBACK;
158 
159 	fdb->fd_state |= FDB_ASYNC;
160 
161 	mutex_exit(&fdb->fd_mutex);
162 }
163 
164 int
165 fdb_get_error(fdbuffer_t *fdb)
166 {
167 	return (fdb->fd_err);
168 }
169 
170 void
171 fdb_free(fdbuffer_t *fdb)
172 {
173 	fdb_holes_t *fdh, *fdhp;
174 
175 	DEBUGF(FDB_D_CREATE, (CE_NOTE, "?fdb_free: addr: %p flags: %x",
176 	    (void *)fdb, fdb->fd_state));
177 
178 	ASSERT(fdb);
179 	ASSERT(fdb->fd_iodispatch == 0);
180 
181 	if (fdb->fd_state & FDB_ZEROHOLE) {
182 		fdb_zero_holes(fdb);
183 	}
184 
185 	for (fdh = fdb->fd_holes; fdh; ) {
186 		fdhp = fdh;
187 		fdh = fdh->next_hole;
188 		kmem_free(fdhp, sizeof (fdb_holes_t));
189 	}
190 
191 	if (fdb->fd_parentbp != NULL) {
192 		switch (fdb->fd_type) {
193 		case FDB_PAGEIO:
194 			pageio_done(fdb->fd_parentbp);
195 			break;
196 		case FDB_VADDR:
197 			kmem_free(fdb->fd_parentbp, sizeof (struct buf));
198 			break;
199 		default:
200 			cmn_err(CE_CONT, "?fdb_free: Unknown fdb type.");
201 			break;
202 		}
203 	}
204 
205 	kmem_cache_free(fdb_cache, fdb);
206 
207 }
208 
209 /*
210  * The offset should be from the begining of the buffer
211  * it has nothing to do with file offset. This fact should be
212  * reflected in the caller of this routine.
213  */
214 
215 void
216 fdb_add_hole(fdbuffer_t *fdb, u_offset_t off, size_t len)
217 {
218 	fdb_holes_t *this_hole;
219 
220 	ASSERT(fdb);
221 	ASSERT(off < fdb->fd_len);
222 
223 	DEBUGF(FDB_D_IO, (CE_NOTE, "?fdb_add_hole: off %llx len %lx",
224 	    off, len));
225 
226 	this_hole = kmem_alloc(sizeof (fdb_holes_t), KM_SLEEP);
227 	this_hole->off = off;
228 	this_hole->len = len;
229 
230 	if (fdb->fd_holes == NULL || off < fdb->fd_holes->off) {
231 		this_hole->next_hole = fdb->fd_holes;
232 		fdb->fd_holes = this_hole;
233 	} else {
234 		fdb_holes_t *fdhp = fdb->fd_holes;
235 
236 		while (fdhp->next_hole && off > fdhp->next_hole->off)
237 			fdhp = fdhp->next_hole;
238 
239 		this_hole->next_hole = fdhp->next_hole;
240 		fdhp->next_hole = this_hole;
241 	}
242 
243 	mutex_enter(&fdb->fd_mutex);
244 
245 	fdb->fd_iocount += len;
246 
247 	mutex_exit(&fdb->fd_mutex);
248 }
249 
250 fdb_holes_t *
251 fdb_get_holes(fdbuffer_t *fdb)
252 {
253 	ASSERT(fdb);
254 
255 	if (fdb->fd_state & FDB_ZEROHOLE) {
256 		fdb_zero_holes(fdb);
257 	}
258 
259 	return (fdb->fd_holes);
260 }
261 
262 /*
263  * Note that offsets refer to offsets from the begining of the buffer
264  * and as such the memory should be cleared accordingly.
265  */
266 
267 static void
268 fdb_zero_holes(fdbuffer_t *fdb)
269 {
270 	fdb_holes_t *fdh = fdb->fd_holes;
271 	page_t *pp;
272 
273 	ASSERT(fdb);
274 
275 	if (!fdh)
276 		return;
277 
278 	switch (fdb->fd_type) {
279 	case FDB_PAGEIO:
280 		pp = fdb->fd_pages;
281 		while (fdh) {
282 			fdb_holes_t *pfdh = fdh;
283 			size_t l = fdh->len;
284 			u_offset_t o = fdh->off;
285 			ASSERT(pp);
286 
287 			do {
288 				int  zerolen;
289 				ASSERT(o >= pp->p_offset);
290 
291 				/*
292 				 * This offset is wrong since
293 				 * the offset passed from the pages
294 				 * perspective starts at some virtual
295 				 * address but the hole is relative
296 				 * to the beginning of the fdbuffer.
297 				 */
298 				if (o >= pp->p_offset + PAGESIZE)
299 					continue;
300 
301 				zerolen = min(PAGESIZE, l);
302 
303 				ASSERT(zerolen > 0);
304 				ASSERT(zerolen <= PAGESIZE);
305 
306 				pagezero(pp, ((uintptr_t)o & PAGEOFFSET),
307 				    zerolen);
308 
309 				l -= zerolen;
310 				o += zerolen;
311 
312 				if (l == 0)
313 					break;
314 
315 			} while (pp = page_list_next(pp));
316 
317 			if (!pp)
318 				break;
319 
320 			fdh = fdh->next_hole;
321 			kmem_free(pfdh, sizeof (fdb_holes_t));
322 		}
323 		break;
324 	case FDB_VADDR:
325 		while (fdh) {
326 			fdb_holes_t *pfdh = fdh;
327 
328 			bzero(fdb->fd_addr + fdh->off, fdh->len);
329 
330 			fdh = fdh->next_hole;
331 			kmem_free(pfdh, sizeof (fdb_holes_t));
332 		}
333 		break;
334 	default:
335 		panic("fdb_zero_holes: Unknown fdb type.");
336 		break;
337 	}
338 }
339 
340 
341 buf_t *
342 fdb_iosetup(fdbuffer_t *fdb, u_offset_t off, size_t len, struct vnode *vp,
343     int b_flags)
344 {
345 	buf_t *bp;
346 
347 	DEBUGF(FDB_D_IO, (CE_NOTE,
348 	    "?fdb_iosetup: off: %llx len: %lux fdb: len: %lux flags: %x",
349 	    off, len, fdb->fd_len, fdb->fd_state));
350 
351 	ASSERT(fdb);
352 
353 	mutex_enter(&fdb->fd_mutex);
354 
355 	ASSERT(((b_flags & B_READ) && (fdb->fd_state & FDB_READ)) ||
356 	    ((b_flags & B_WRITE) && (fdb->fd_state & FDB_WRITE)));
357 	/*
358 	 * The fdb can be used either in sync or async mode, if the
359 	 * buffer has not been used it may be used in either mode, but
360 	 * once you have started to use the buf in either mode all
361 	 * subsequent i/o requests must take place the same way.
362 	 */
363 
364 	ASSERT(((b_flags & B_ASYNC) &&
365 	    ((fdb->fd_state & FDB_ASYNC) || !(fdb->fd_state & FDB_SYNC))) ||
366 	    (!(b_flags & B_ASYNC) &&
367 	    ((fdb->fd_state & FDB_SYNC) || !(fdb->fd_state & FDB_ASYNC))));
368 
369 
370 	fdb->fd_state |= b_flags & B_ASYNC ? FDB_ASYNC : FDB_SYNC;
371 
372 	fdb->fd_iodispatch++;
373 
374 	ASSERT((fdb->fd_state & FDB_ASYNC && fdb->fd_iofunc != NULL) ||
375 	    fdb->fd_state & FDB_SYNC);
376 
377 	mutex_exit(&fdb->fd_mutex);
378 
379 	ASSERT((len & (DEV_BSIZE - 1)) == 0);
380 	ASSERT(off+len <= fdb->fd_len);
381 
382 	switch (fdb->fd_type) {
383 	case FDB_PAGEIO:
384 		if (fdb->fd_parentbp == NULL) {
385 			bp = pageio_setup(fdb->fd_pages, len, vp, b_flags);
386 			fdb->fd_parentbp = bp;
387 		}
388 		break;
389 	case FDB_VADDR:
390 		if (fdb->fd_parentbp == NULL) {
391 
392 			bp = kmem_alloc(sizeof (buf_t), KM_SLEEP);
393 			bioinit(bp);
394 			bp->b_error = 0;
395 			bp->b_proc = fdb->fd_procp;
396 			bp->b_flags = b_flags | B_BUSY | B_PHYS;
397 			bp->b_bcount = len;
398 			bp->b_un.b_addr = fdb->fd_addr;
399 			bp->b_shadow = fdb->fd_shadow;
400 			if (fdb->fd_shadow != NULL)
401 				bp->b_flags |= B_SHADOW;
402 			fdb->fd_parentbp = bp;
403 		}
404 		break;
405 	default:
406 		panic("fdb_iosetup: Unsupported fdb type.");
407 		break;
408 	};
409 
410 	bp = bioclone(fdb->fd_parentbp, off, len, 0, 0,
411 	    (b_flags & B_ASYNC) ? fdb_iodone : NULL,
412 	    NULL, KM_SLEEP);
413 
414 	bp->b_forw = (struct buf *)fdb;
415 
416 	if (b_flags & B_ASYNC)
417 		bp->b_flags |= B_ASYNC;
418 
419 	return (bp);
420 }
421 
422 size_t
423 fdb_get_iolen(fdbuffer_t *fdb)
424 {
425 	ASSERT(fdb);
426 	ASSERT(fdb->fd_iodispatch == 0);
427 
428 	return (fdb->fd_iocount - fdb->fd_resid);
429 }
430 
431 void
432 fdb_ioerrdone(fdbuffer_t *fdb, int error)
433 {
434 	ASSERT(fdb);
435 	ASSERT(fdb->fd_state & FDB_ASYNC);
436 
437 	DEBUGF(FDB_D_IO, (CE_NOTE,
438 	    "?fdb_ioerrdone: fdb: len: %lux flags: %x error: %d",
439 	    fdb->fd_len, fdb->fd_state, error));
440 
441 	mutex_enter(&fdb->fd_mutex);
442 
443 	fdb->fd_err = error;
444 
445 	if (error)
446 		fdb->fd_state |= FDB_ERROR;
447 	else
448 		fdb->fd_state |= FDB_DONE;
449 
450 	/*
451 	 * If there is outstanding i/o return wainting for i/o's to complete.
452 	 */
453 	if (fdb->fd_iodispatch > 0) {
454 		mutex_exit(&fdb->fd_mutex);
455 		return;
456 	}
457 
458 	mutex_exit(&fdb->fd_mutex);
459 	fdb->fd_iofunc(fdb, fdb->fd_iargp, NULL);
460 }
461 
462 int
463 fdb_iodone(buf_t *bp)
464 {
465 	fdbuffer_t *fdb = (fdbuffer_t *)bp->b_forw;
466 	int	error, isasync;
467 	int	icallback;
468 
469 	ASSERT(fdb);
470 
471 	DEBUGF(FDB_D_IO, (CE_NOTE,
472 	    "?fdb_iodone: fdb: len: %lux flags: %x error: %d",
473 	    fdb->fd_len, fdb->fd_state, geterror(bp)));
474 
475 	if (bp->b_flags & B_REMAPPED)
476 		bp_mapout(bp);
477 
478 	mutex_enter(&fdb->fd_mutex);
479 
480 	icallback = fdb->fd_state & FDB_ICALLBACK;
481 	isasync = fdb->fd_state & FDB_ASYNC;
482 
483 	ASSERT(fdb->fd_iodispatch > 0);
484 	fdb->fd_iodispatch--;
485 
486 	if (error = geterror(bp)) {
487 		fdb->fd_err = error;
488 		if (bp->b_resid)
489 			fdb->fd_resid += bp->b_resid;
490 		else
491 			fdb->fd_resid += bp->b_bcount;
492 	}
493 
494 	fdb->fd_iocount += bp->b_bcount;
495 
496 	/*
497 	 * ioack collects the total amount of i/o accounted for
498 	 * this includes:
499 	 *
500 	 *	- i/o completed
501 	 *	- i/o attempted but not completed,
502 	 *	- i/o not done due to holes.
503 	 *
504 	 * Once the entire i/o ranges has been accounted for we'll
505 	 * call the async function associated with the fdb.
506 	 *
507 	 */
508 
509 	if ((fdb->fd_iodispatch == 0) &&
510 	    (fdb->fd_state & (FDB_ERROR|FDB_DONE))) {
511 
512 		mutex_exit(&fdb->fd_mutex);
513 
514 		if (isasync || icallback) {
515 			fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
516 		}
517 
518 	} else {
519 
520 		mutex_exit(&fdb->fd_mutex);
521 
522 		if (icallback) {
523 			fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
524 		}
525 	}
526 
527 	freerbuf(bp);
528 	return (0);
529 }
530