1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 1998,2001 by Sun Microsystems, Inc.
24 * All rights reserved.
25 *
26 */
27
28 #pragma ident "%Z%%M% %I% %E% SMI"
29
30 #include <sys/types.h>
31 #include <sys/cmn_err.h>
32 #include <sys/kmem.h>
33 #include <sys/systm.h>
34 #include <sys/debug.h>
35 #include <sys/ddi.h>
36
37 #include <sys/fdbuffer.h>
38
39 #ifdef DEBUG
40 static int fdb_debug;
41 #define FDB_D_CREATE 001
42 #define FDB_D_ALLOC 002
43 #define FDB_D_IO 004
44 #define FDB_D_ASYNC 010
45 #define DEBUGF(lvl, args) { if ((lvl) & fdb_debug) cmn_err args; }
46 #else
47 #define DEBUGF(level, args)
48 #endif
49 static struct kmem_cache *fdb_cache;
50 static void fdb_zero_holes(fdbuffer_t *fdb);
51
52 /* ARGSUSED */
53 static int
fdb_cache_constructor(void * buf,void * cdrarg,int kmflags)54 fdb_cache_constructor(void *buf, void *cdrarg, int kmflags)
55 {
56 fdbuffer_t *fdb = buf;
57
58 mutex_init(&fdb->fd_mutex, NULL, MUTEX_DEFAULT, NULL);
59
60 return (0);
61 }
62
63 /* ARGSUSED */
64 static void
fdb_cache_destructor(void * buf,void * cdrarg)65 fdb_cache_destructor(void *buf, void *cdrarg)
66 {
67 fdbuffer_t *fdb = buf;
68
69 mutex_destroy(&fdb->fd_mutex);
70 }
71
72 void
fdb_init()73 fdb_init()
74 {
75 fdb_cache = kmem_cache_create("fdb_cache", sizeof (fdbuffer_t),
76 0, fdb_cache_constructor, fdb_cache_destructor,
77 NULL, NULL, NULL, 0);
78 }
79
80 static void
fdb_prepare(fdbuffer_t * fdb)81 fdb_prepare(fdbuffer_t *fdb)
82 {
83 fdb->fd_holes = NULL;
84 fdb->fd_iofunc = NULL;
85 fdb->fd_iargp = NULL;
86 fdb->fd_parentbp = NULL;
87 fdb->fd_resid = 0;
88 fdb->fd_iocount = 0;
89 fdb->fd_iodispatch = 0;
90 fdb->fd_err = 0;
91 }
92
93 fdbuffer_t *
fdb_page_create(page_t * pp,size_t len,int flags)94 fdb_page_create(page_t *pp, size_t len, int flags)
95 {
96 fdbuffer_t *fdb;
97
98 DEBUGF(FDB_D_CREATE, (CE_NOTE,
99 "?fdb_page_create: pp: %p len: %lux flags: %x",
100 (void *)pp, len, flags));
101
102 ASSERT(flags & (FDB_READ|FDB_WRITE));
103
104 fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
105
106 fdb_prepare(fdb);
107
108 fdb->fd_type = FDB_PAGEIO;
109 fdb->fd_len = len;
110 fdb->fd_state = flags;
111 fdb->fd_pages = pp;
112
113 return (fdb);
114 }
115
116 fdbuffer_t *
fdb_addr_create(caddr_t addr,size_t len,int flags,page_t ** pplist,struct proc * procp)117 fdb_addr_create(
118 caddr_t addr,
119 size_t len,
120 int flags,
121 page_t **pplist,
122 struct proc *procp)
123 {
124 fdbuffer_t *fdb;
125
126 DEBUGF(FDB_D_CREATE, (CE_NOTE,
127 "?fdb_addr_create: addr: %p len: %lux flags: %x",
128 (void *)addr, len, flags));
129
130 ASSERT(flags & (FDB_READ|FDB_WRITE));
131
132 fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
133
134 fdb_prepare(fdb);
135
136 fdb->fd_type = FDB_VADDR;
137 fdb->fd_len = len;
138 fdb->fd_state = flags;
139 fdb->fd_addr = addr;
140 fdb->fd_shadow = pplist;
141 fdb->fd_procp = procp;
142
143 return (fdb);
144 }
145
146 void
fdb_set_iofunc(fdbuffer_t * fdb,fdb_iodone_t iofunc,void * ioargp,int flag)147 fdb_set_iofunc(fdbuffer_t *fdb, fdb_iodone_t iofunc, void *ioargp, int flag)
148 {
149 ASSERT(fdb);
150 ASSERT(iofunc);
151 ASSERT((flag & ~FDB_ICALLBACK) == 0);
152
153 fdb->fd_iofunc = iofunc;
154 fdb->fd_iargp = ioargp;
155
156 mutex_enter(&fdb->fd_mutex);
157
158 if (flag & FDB_ICALLBACK)
159 fdb->fd_state |= FDB_ICALLBACK;
160
161 fdb->fd_state |= FDB_ASYNC;
162
163 mutex_exit(&fdb->fd_mutex);
164 }
165
166 int
fdb_get_error(fdbuffer_t * fdb)167 fdb_get_error(fdbuffer_t *fdb)
168 {
169 return (fdb->fd_err);
170 }
171
172 void
fdb_free(fdbuffer_t * fdb)173 fdb_free(fdbuffer_t *fdb)
174 {
175 fdb_holes_t *fdh, *fdhp;
176
177 DEBUGF(FDB_D_CREATE, (CE_NOTE, "?fdb_free: addr: %p flags: %x",
178 (void *)fdb, fdb->fd_state));
179
180 ASSERT(fdb);
181 ASSERT(fdb->fd_iodispatch == 0);
182
183 if (fdb->fd_state & FDB_ZEROHOLE) {
184 fdb_zero_holes(fdb);
185 }
186
187 for (fdh = fdb->fd_holes; fdh; ) {
188 fdhp = fdh;
189 fdh = fdh->next_hole;
190 kmem_free(fdhp, sizeof (fdb_holes_t));
191 }
192
193 if (fdb->fd_parentbp != NULL) {
194 switch (fdb->fd_type) {
195 case FDB_PAGEIO:
196 pageio_done(fdb->fd_parentbp);
197 break;
198 case FDB_VADDR:
199 kmem_free(fdb->fd_parentbp, sizeof (struct buf));
200 break;
201 default:
202 cmn_err(CE_CONT, "?fdb_free: Unknown fdb type.");
203 break;
204 }
205 }
206
207 kmem_cache_free(fdb_cache, fdb);
208
209 }
210
211 /*
212 * The offset should be from the begining of the buffer
213 * it has nothing to do with file offset. This fact should be
214 * reflected in the caller of this routine.
215 */
216
217 void
fdb_add_hole(fdbuffer_t * fdb,u_offset_t off,size_t len)218 fdb_add_hole(fdbuffer_t *fdb, u_offset_t off, size_t len)
219 {
220 fdb_holes_t *this_hole;
221
222 ASSERT(fdb);
223 ASSERT(off < fdb->fd_len);
224
225 DEBUGF(FDB_D_IO, (CE_NOTE, "?fdb_add_hole: off %llx len %lx",
226 off, len));
227
228 this_hole = kmem_alloc(sizeof (fdb_holes_t), KM_SLEEP);
229 this_hole->off = off;
230 this_hole->len = len;
231
232 if (fdb->fd_holes == NULL || off < fdb->fd_holes->off) {
233 this_hole->next_hole = fdb->fd_holes;
234 fdb->fd_holes = this_hole;
235 } else {
236 fdb_holes_t *fdhp = fdb->fd_holes;
237
238 while (fdhp->next_hole && off > fdhp->next_hole->off)
239 fdhp = fdhp->next_hole;
240
241 this_hole->next_hole = fdhp->next_hole;
242 fdhp->next_hole = this_hole;
243 }
244
245 mutex_enter(&fdb->fd_mutex);
246
247 fdb->fd_iocount += len;
248
249 mutex_exit(&fdb->fd_mutex);
250 }
251
252 fdb_holes_t *
fdb_get_holes(fdbuffer_t * fdb)253 fdb_get_holes(fdbuffer_t *fdb)
254 {
255 ASSERT(fdb);
256
257 if (fdb->fd_state & FDB_ZEROHOLE) {
258 fdb_zero_holes(fdb);
259 }
260
261 return (fdb->fd_holes);
262 }
263
264 /*
265 * Note that offsets refer to offsets from the begining of the buffer
266 * and as such the memory should be cleared accordingly.
267 */
268
269 static void
fdb_zero_holes(fdbuffer_t * fdb)270 fdb_zero_holes(fdbuffer_t *fdb)
271 {
272 fdb_holes_t *fdh = fdb->fd_holes;
273 page_t *pp;
274
275 ASSERT(fdb);
276
277 if (!fdh)
278 return;
279
280 switch (fdb->fd_type) {
281 case FDB_PAGEIO:
282 pp = fdb->fd_pages;
283 while (fdh) {
284 fdb_holes_t *pfdh = fdh;
285 size_t l = fdh->len;
286 u_offset_t o = fdh->off;
287 ASSERT(pp);
288
289 do {
290 int zerolen;
291 ASSERT(o >= pp->p_offset);
292
293 /*
294 * This offset is wrong since
295 * the offset passed from the pages
296 * perspective starts at some virtual
297 * address but the hole is relative
298 * to the beginning of the fdbuffer.
299 */
300 if (o >= pp->p_offset + PAGESIZE)
301 continue;
302
303 zerolen = min(PAGESIZE, l);
304
305 ASSERT(zerolen > 0);
306 ASSERT(zerolen <= PAGESIZE);
307
308 pagezero(pp, ((uintptr_t)o & PAGEOFFSET),
309 zerolen);
310
311 l -= zerolen;
312 o += zerolen;
313
314 if (l == 0)
315 break;
316
317 } while (pp = page_list_next(pp));
318
319 if (!pp)
320 break;
321
322 fdh = fdh->next_hole;
323 kmem_free(pfdh, sizeof (fdb_holes_t));
324 }
325 break;
326 case FDB_VADDR:
327 while (fdh) {
328 fdb_holes_t *pfdh = fdh;
329
330 bzero(fdb->fd_addr + fdh->off, fdh->len);
331
332 fdh = fdh->next_hole;
333 kmem_free(pfdh, sizeof (fdb_holes_t));
334 }
335 default:
336 panic("fdb_zero_holes: Unknown fdb type.");
337 break;
338 }
339 }
340
341
342 buf_t *
fdb_iosetup(fdbuffer_t * fdb,u_offset_t off,size_t len,struct vnode * vp,int b_flags)343 fdb_iosetup(fdbuffer_t *fdb, u_offset_t off, size_t len, struct vnode *vp,
344 int b_flags)
345 {
346 buf_t *bp;
347
348 DEBUGF(FDB_D_IO, (CE_NOTE,
349 "?fdb_iosetup: off: %llx len: %lux fdb: len: %lux flags: %x",
350 off, len, fdb->fd_len, fdb->fd_state));
351
352 ASSERT(fdb);
353
354 mutex_enter(&fdb->fd_mutex);
355
356 ASSERT(((b_flags & B_READ) && (fdb->fd_state & FDB_READ)) ||
357 ((b_flags & B_WRITE) && (fdb->fd_state & FDB_WRITE)));
358 /*
359 * The fdb can be used either in sync or async mode, if the
360 * buffer has not been used it may be used in either mode, but
361 * once you have started to use the buf in either mode all
362 * subsequent i/o requests must take place the same way.
363 */
364
365 ASSERT(((b_flags & B_ASYNC) &&
366 ((fdb->fd_state & FDB_ASYNC) || !(fdb->fd_state & FDB_SYNC))) ||
367 (!(b_flags & B_ASYNC) &&
368 ((fdb->fd_state & FDB_SYNC) || !(fdb->fd_state & FDB_ASYNC))));
369
370
371 fdb->fd_state |= b_flags & B_ASYNC ? FDB_ASYNC : FDB_SYNC;
372
373 fdb->fd_iodispatch++;
374
375 ASSERT((fdb->fd_state & FDB_ASYNC && fdb->fd_iofunc != NULL) ||
376 fdb->fd_state & FDB_SYNC);
377
378 mutex_exit(&fdb->fd_mutex);
379
380 ASSERT((len & (DEV_BSIZE - 1)) == 0);
381 ASSERT(off+len <= fdb->fd_len);
382
383 switch (fdb->fd_type) {
384 case FDB_PAGEIO:
385 if (fdb->fd_parentbp == NULL) {
386 bp = pageio_setup(fdb->fd_pages, len, vp, b_flags);
387 fdb->fd_parentbp = bp;
388 }
389 break;
390 case FDB_VADDR:
391 if (fdb->fd_parentbp == NULL) {
392
393 bp = kmem_alloc(sizeof (buf_t), KM_SLEEP);
394 bioinit(bp);
395 bp->b_error = 0;
396 bp->b_proc = fdb->fd_procp;
397 bp->b_flags = b_flags | B_BUSY | B_PHYS;
398 bp->b_bcount = len;
399 bp->b_un.b_addr = fdb->fd_addr;
400 bp->b_shadow = fdb->fd_shadow;
401 if (fdb->fd_shadow != NULL)
402 bp->b_flags |= B_SHADOW;
403 fdb->fd_parentbp = bp;
404 }
405 break;
406 default:
407 panic("fdb_iosetup: Unsupported fdb type.");
408 break;
409 };
410
411 bp = bioclone(fdb->fd_parentbp, off, len, 0, 0,
412 (b_flags & B_ASYNC) ? (int (*)())fdb_iodone : NULL,
413 NULL, KM_SLEEP);
414
415 bp->b_forw = (struct buf *)fdb;
416
417 if (b_flags & B_ASYNC)
418 bp->b_flags |= B_ASYNC;
419
420 return (bp);
421 }
422
423 size_t
fdb_get_iolen(fdbuffer_t * fdb)424 fdb_get_iolen(fdbuffer_t *fdb)
425 {
426 ASSERT(fdb);
427 ASSERT(fdb->fd_iodispatch == 0);
428
429 return (fdb->fd_iocount - fdb->fd_resid);
430 }
431
432 void
fdb_ioerrdone(fdbuffer_t * fdb,int error)433 fdb_ioerrdone(fdbuffer_t *fdb, int error)
434 {
435 ASSERT(fdb);
436 ASSERT(fdb->fd_state & FDB_ASYNC);
437
438 DEBUGF(FDB_D_IO, (CE_NOTE,
439 "?fdb_ioerrdone: fdb: len: %lux flags: %x error: %d",
440 fdb->fd_len, fdb->fd_state, error));
441
442 mutex_enter(&fdb->fd_mutex);
443
444 fdb->fd_err = error;
445
446 if (error)
447 fdb->fd_state |= FDB_ERROR;
448 else
449 fdb->fd_state |= FDB_DONE;
450
451 /*
452 * If there is outstanding i/o return wainting for i/o's to complete.
453 */
454 if (fdb->fd_iodispatch > 0) {
455 mutex_exit(&fdb->fd_mutex);
456 return;
457 }
458
459 mutex_exit(&fdb->fd_mutex);
460 fdb->fd_iofunc(fdb, fdb->fd_iargp, NULL);
461 }
462
463 void
fdb_iodone(buf_t * bp)464 fdb_iodone(buf_t *bp)
465 {
466 fdbuffer_t *fdb = (fdbuffer_t *)bp->b_forw;
467 int error, isasync;
468 int icallback;
469
470 ASSERT(fdb);
471
472 DEBUGF(FDB_D_IO, (CE_NOTE,
473 "?fdb_iodone: fdb: len: %lux flags: %x error: %d",
474 fdb->fd_len, fdb->fd_state, geterror(bp)));
475
476 if (bp->b_flags & B_REMAPPED)
477 bp_mapout(bp);
478
479 mutex_enter(&fdb->fd_mutex);
480
481 icallback = fdb->fd_state & FDB_ICALLBACK;
482 isasync = fdb->fd_state & FDB_ASYNC;
483
484 ASSERT(fdb->fd_iodispatch > 0);
485 fdb->fd_iodispatch--;
486
487 if (error = geterror(bp)) {
488 fdb->fd_err = error;
489 if (bp->b_resid)
490 fdb->fd_resid += bp->b_resid;
491 else
492 fdb->fd_resid += bp->b_bcount;
493 }
494
495 fdb->fd_iocount += bp->b_bcount;
496
497 /*
498 * ioack collects the total amount of i/o accounted for
499 * this includes:
500 *
501 * - i/o completed
502 * - i/o attempted but not completed,
503 * - i/o not done due to holes.
504 *
505 * Once the entire i/o ranges has been accounted for we'll
506 * call the async function associated with the fdb.
507 *
508 */
509
510 if ((fdb->fd_iodispatch == 0) &&
511 (fdb->fd_state & (FDB_ERROR|FDB_DONE))) {
512
513 mutex_exit(&fdb->fd_mutex);
514
515 if (isasync || icallback) {
516 fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
517 }
518
519 } else {
520
521 mutex_exit(&fdb->fd_mutex);
522
523 if (icallback) {
524 fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
525 }
526 }
527
528 freerbuf(bp);
529 }
530