1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Copyright 2017 RackTop Systems. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 /* 32 * University Copyright- Copyright (c) 1982, 1986, 1988 33 * The Regents of the University of California 34 * All Rights Reserved 35 * 36 * University Acknowledgment- Portions of this document are derived from 37 * software developed by the University of California, Berkeley, and its 38 * contributors. 39 */ 40 41 #ifndef _SYS_BUF_H 42 #define _SYS_BUF_H 43 44 #include <sys/types32.h> 45 #include <sys/t_lock.h> 46 #include <sys/kstat.h> 47 48 #ifdef __cplusplus 49 extern "C" { 50 #endif 51 52 /* 53 * Each buffer in the pool is usually doubly linked into 2 lists: 54 * the device with which it is currently associated (always) 55 * and also on a list of blocks available for allocation 56 * for other use (usually). 57 * The latter list is kept in last-used order, and the two 58 * lists are doubly linked to make it easy to remove 59 * a buffer from one list when it was found by 60 * looking through the other. 61 * A buffer is on the available list, and is liable 62 * to be reassigned to another disk block, if and only 63 * if it is not marked BUSY. When a buffer is busy, the 64 * available-list pointers can be used for other purposes. 65 * Most drivers use the forward ptr as a link in their I/O active queue. 66 * A buffer header contains all the information required to perform I/O. 67 * Most of the routines which manipulate these things are in bio.c. 68 * 69 * There are a number of locks associated with the buffer management 70 * system. 71 * hbuf.b_lock: protects hash chains, buffer hdr freelists 72 * and delayed write freelist 73 * bfree_lock; protects the bfreelist structure 74 * bhdr_lock: protects the free header list 75 * blist_lock: protects b_list fields 76 * buf.b_sem: protects all remaining members in the buf struct 77 * buf.b_io: I/O synchronization variable 78 * 79 * A buffer header is never "locked" (b_sem) when it is on 80 * a "freelist" (bhdrlist or bfreelist avail lists). 81 */ 82 typedef struct buf { 83 int b_flags; /* see defines below */ 84 struct buf *b_forw; /* headed by d_tab of conf.c */ 85 struct buf *b_back; /* " */ 86 struct buf *av_forw; /* position on free list, */ 87 struct buf *av_back; /* if not BUSY */ 88 o_dev_t b_dev; /* OLD major+minor device name */ 89 size_t b_bcount; /* transfer count */ 90 union { 91 caddr_t b_addr; /* low order core address */ 92 struct fs *b_fs; /* superblocks */ 93 struct cg *b_cg; /* UFS cylinder group block */ 94 struct dinode *b_dino; /* UFS ilist */ 95 daddr32_t *b_daddr; /* disk blocks */ 96 } b_un; 97 98 lldaddr_t _b_blkno; /* block # on device (union) */ 99 #define b_lblkno _b_blkno._f 100 #ifdef _LP64 101 #define b_blkno _b_blkno._f 102 #else 103 #define b_blkno _b_blkno._p._l 104 #endif /* _LP64 */ 105 106 char b_obs1; /* obsolete */ 107 size_t b_resid; /* words not transferred after error */ 108 clock_t b_start; /* request start time */ 109 struct proc *b_proc; /* process doing physical or swap I/O */ 110 struct page *b_pages; /* page list for PAGEIO */ 111 clock_t b_obs2; /* obsolete */ 112 /* Begin new stuff */ 113 #define b_actf av_forw 114 #define b_actl av_back 115 #define b_active b_bcount 116 #define b_errcnt b_resid 117 size_t b_bufsize; /* size of allocated buffer */ 118 int (*b_iodone)(struct buf *); /* function called by iodone */ 119 struct vnode *b_vp; /* vnode associated with block */ 120 struct buf *b_chain; /* chain together all buffers here */ 121 int b_obs3; /* obsolete */ 122 int b_error; /* expanded error field */ 123 void *b_private; /* "opaque" driver private area */ 124 dev_t b_edev; /* expanded dev field */ 125 ksema_t b_sem; /* Exclusive access to buf */ 126 ksema_t b_io; /* I/O Synchronization */ 127 struct buf *b_list; /* List of potential B_DELWRI bufs */ 128 struct page **b_shadow; /* shadow page list */ 129 void *b_dip; /* device info pointer */ 130 struct vnode *b_file; /* file associated with this buffer */ 131 offset_t b_offset; /* offset in file assoc. with buffer */ 132 } buf_t; 133 134 /* 135 * Bufhd structures used at the head of the hashed buffer queues. 136 * We only need seven words for this, so this abbreviated 137 * definition saves some space. 138 */ 139 struct diskhd { 140 int b_flags; /* not used, needed for consistency */ 141 struct buf *b_forw, *b_back; /* queue of unit queues */ 142 struct buf *av_forw, *av_back; /* queue of bufs for this unit */ 143 o_dev_t b_dev; /* OLD major+minor device name */ 144 size_t b_bcount; /* transfer count */ 145 }; 146 147 148 /* 149 * Statistics on the buffer cache 150 */ 151 struct biostats { 152 kstat_named_t bio_lookup; /* requests to assign buffer */ 153 kstat_named_t bio_hit; /* buffer already associated with blk */ 154 kstat_named_t bio_bufwant; /* kmem_allocs NOSLEEP failed new buf */ 155 kstat_named_t bio_bufwait; /* kmem_allocs with KM_SLEEP for buf */ 156 kstat_named_t bio_bufbusy; /* buffer locked by someone else */ 157 kstat_named_t bio_bufdup; /* duplicate buffer found for block */ 158 }; 159 160 /* 161 * These flags are kept in b_flags. 162 * The first group is part of the DDI 163 */ 164 #define B_BUSY 0x0001 /* not on av_forw/back list */ 165 #define B_DONE 0x0002 /* transaction finished */ 166 #define B_ERROR 0x0004 /* transaction aborted */ 167 #define B_PAGEIO 0x0010 /* do I/O to pages on bp->p_pages */ 168 #define B_PHYS 0x0020 /* Physical IO potentially using UNIBUS map */ 169 #define B_READ 0x0040 /* read when I/O occurs */ 170 #define B_WRITE 0x0100 /* non-read pseudo-flag */ 171 172 /* Not part of the DDI */ 173 #define B_WANTED 0x0080 /* issue wakeup when BUSY goes off */ 174 #define B_AGE 0x000200 /* delayed write for correct aging */ 175 #define B_ASYNC 0x000400 /* don't wait for I/O completion */ 176 #define B_DELWRI 0x000800 /* delayed write-wait til buf needed */ 177 #define B_STALE 0x001000 /* on av_* list; invalid contents */ 178 #define B_DONTNEED 0x002000 /* after write, need not be cached */ 179 #define B_REMAPPED 0x004000 /* buffer is kernel addressable */ 180 #define B_FREE 0x008000 /* free page when done */ 181 #define B_INVAL 0x010000 /* destroy page when done */ 182 #define B_FORCE 0x020000 /* semi-permanent removal from cache */ 183 #define B_NOCACHE 0x080000 /* don't cache block when released */ 184 #define B_TRUNC 0x100000 /* truncate page without I/O */ 185 #define B_SHADOW 0x200000 /* is b_shadow field valid? */ 186 #define B_RETRYWRI 0x400000 /* retry write til works or bfinval */ 187 #define B_FAILFAST 0x1000000 /* Fail promptly if device goes away */ 188 #define B_STARTED 0x2000000 /* io:::start probe called for buf */ 189 #define B_ABRWRITE 0x4000000 /* Application based recovery active */ 190 #define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */ 191 192 /* 193 * There is some confusion over the meaning of B_FREE and B_INVAL and what 194 * the use of one over the other implies. 195 * 196 * In both cases, when we are done with the page (buffer) we want to free 197 * up the page. In the case of B_FREE, the page will go to the cachelist. 198 * In the case of B_INVAL, the page will be destroyed (hashed out of it's 199 * vnode) and placed on the freelist. Beyond this, there is no difference 200 * between the sole use of these two flags. In both cases, IO will be done 201 * if the page is not yet committed to storage. 202 * 203 * In order to discard pages without writing them back, (B_INVAL | B_TRUNC) 204 * should be used. 205 * 206 * Use (B_INVAL | B_FORCE) to force the page to be destroyed even if we 207 * could not successfuly write out the page. 208 */ 209 210 /* 211 * Insq/Remq for the buffer hash lists. 212 */ 213 #define bremhash(bp) { \ 214 ASSERT((bp)->b_forw != NULL); \ 215 ASSERT((bp)->b_back != NULL); \ 216 (bp)->b_back->b_forw = (bp)->b_forw; \ 217 (bp)->b_forw->b_back = (bp)->b_back; \ 218 (bp)->b_forw = (bp)->b_back = NULL; \ 219 } 220 #define binshash(bp, dp) { \ 221 ASSERT((bp)->b_forw == NULL); \ 222 ASSERT((bp)->b_back == NULL); \ 223 ASSERT((dp)->b_forw != NULL); \ 224 ASSERT((dp)->b_back != NULL); \ 225 (bp)->b_forw = (dp)->b_forw; \ 226 (bp)->b_back = (dp); \ 227 (dp)->b_forw->b_back = (bp); \ 228 (dp)->b_forw = (bp); \ 229 } 230 231 232 /* 233 * The hash structure maintains two lists: 234 * 235 * 1) The hash list of buffers (b_forw & b_back) 236 * 2) The LRU free list of buffers on this hash bucket (av_forw & av_back) 237 * 238 * The dwbuf structure keeps a list of delayed write buffers per hash bucket 239 * hence there are exactly the same number of dwbuf structures as there are 240 * the hash buckets (hbuf structures) in the system. 241 * 242 * The number of buffers on the freelist may not be equal to the number of 243 * buffers on the hash list. That is because when buffers are busy they are 244 * taken off the freelist but not off the hash list. "b_length" field keeps 245 * track of the number of free buffers (including delayed writes ones) on 246 * the hash bucket. The "b_lock" mutex protects the free list as well as 247 * the hash list. It also protects the counter "b_length". 248 * 249 * Enties b_forw, b_back, av_forw & av_back must be at the same offset 250 * as the ones in buf structure. 251 */ 252 struct hbuf { 253 int b_flags; 254 255 struct buf *b_forw; /* hash list forw pointer */ 256 struct buf *b_back; /* hash list back pointer */ 257 258 struct buf *av_forw; /* free list forw pointer */ 259 struct buf *av_back; /* free list back pointer */ 260 261 int b_length; /* # of entries on free list */ 262 kmutex_t b_lock; /* lock to protect this structure */ 263 }; 264 265 266 /* 267 * The delayed list pointer entries should match with the buf strcuture. 268 */ 269 struct dwbuf { 270 int b_flags; /* not used */ 271 272 struct buf *b_forw; /* not used */ 273 struct buf *b_back; /* not used */ 274 275 struct buf *av_forw; /* delayed write forw pointer */ 276 struct buf *av_back; /* delayed write back pointer */ 277 }; 278 279 280 /* 281 * Unlink a buffer from the available (free or delayed write) list and mark 282 * it busy (internal interface). 283 */ 284 #define notavail(bp) \ 285 {\ 286 ASSERT(SEMA_HELD(&bp->b_sem)); \ 287 ASSERT((bp)->av_forw != NULL); \ 288 ASSERT((bp)->av_back != NULL); \ 289 ASSERT((bp)->av_forw != (bp)); \ 290 ASSERT((bp)->av_back != (bp)); \ 291 (bp)->av_back->av_forw = (bp)->av_forw; \ 292 (bp)->av_forw->av_back = (bp)->av_back; \ 293 (bp)->b_flags |= B_BUSY; \ 294 (bp)->av_forw = (bp)->av_back = NULL; \ 295 } 296 297 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 298 /* 299 * Macros to avoid the extra function call needed for binary compat. 300 * 301 * B_RETRYWRI is not included in clear_flags for BWRITE(), BWRITE2(), 302 * or brwrite() so that the retry operation is persistent until the 303 * write either succeeds or the buffer is bfinval()'d. 304 * 305 */ 306 #define BREAD(dev, blkno, bsize) \ 307 bread_common(/* ufsvfsp */ NULL, dev, blkno, bsize) 308 309 #define BWRITE(bp) \ 310 bwrite_common(/* ufsvfsp */ NULL, bp, /* force_wait */ 0, \ 311 /* do_relse */ 1, \ 312 /* clear_flags */ (B_READ | B_DONE | B_ERROR | B_DELWRI)) 313 314 #define BWRITE2(bp) \ 315 bwrite_common(/* ufsvfsp */ NULL, bp, /* force_wait */ 1, \ 316 /* do_relse */ 0, \ 317 /* clear_flags */ (B_READ | B_DONE | B_ERROR | B_DELWRI)) 318 319 #define GETBLK(dev, blkno, bsize) \ 320 getblk_common(/* ufsvfsp */ NULL, dev, blkno, bsize, /* errflg */ 0) 321 322 323 /* 324 * Macros for new retry write interfaces. 325 */ 326 327 /* 328 * Same as bdwrite() except write failures are retried. 329 */ 330 #define bdrwrite(bp) { \ 331 (bp)->b_flags |= B_RETRYWRI; \ 332 bdwrite((bp)); \ 333 } 334 335 /* 336 * Same as bwrite() except write failures are retried. 337 */ 338 #define brwrite(bp) { \ 339 (bp)->b_flags |= B_RETRYWRI; \ 340 bwrite_common((bp), /* force_wait */ 0, /* do_relse */ 1, \ 341 /* clear_flags */ (B_READ | B_DONE | B_ERROR | B_DELWRI)); \ 342 } 343 344 extern struct hbuf *hbuf; /* Hash table */ 345 extern struct dwbuf *dwbuf; /* delayed write hash table */ 346 extern struct buf *buf; /* The buffer pool itself */ 347 extern struct buf bfreelist; /* head of available list */ 348 349 extern void (*bio_lufs_strategy)(void *, buf_t *); /* UFS Logging */ 350 extern void (*bio_snapshot_strategy)(void *, buf_t *); /* UFS snapshots */ 351 352 int bcheck(dev_t, struct buf *); 353 int iowait(struct buf *); 354 int hash2ints(int x, int y); 355 int bio_busy(int); 356 int biowait(struct buf *); 357 int biomodified(struct buf *); 358 int geterror(struct buf *); 359 void minphys(struct buf *); 360 /* 361 * ufsvfsp is declared as a void * to avoid having everyone that uses 362 * this header file include sys/fs/ufs_inode.h. 363 */ 364 void bwrite_common(void *ufsvfsp, struct buf *, int force_wait, 365 int do_relse, int clear_flags); 366 void bwrite(struct buf *); 367 void bwrite2(struct buf *); 368 void bdwrite(struct buf *); 369 void bawrite(struct buf *); 370 void brelse(struct buf *); 371 void iodone(struct buf *); 372 void clrbuf(struct buf *); 373 void bflush(dev_t); 374 void blkflush(dev_t, daddr_t); 375 void binval(dev_t); 376 int bfinval(dev_t, int); 377 void binit(void); 378 void biodone(struct buf *); 379 void bioinit(struct buf *); 380 void biofini(struct buf *); 381 void bp_mapin(struct buf *); 382 void *bp_mapin_common(struct buf *, int); 383 void bp_mapout(struct buf *); 384 int bp_copyin(struct buf *, void *, offset_t, size_t); 385 int bp_copyout(void *, struct buf *, offset_t, size_t); 386 void bp_init(size_t, uint_t); 387 int bp_color(struct buf *); 388 void pageio_done(struct buf *); 389 struct buf *bread(dev_t, daddr_t, long); 390 struct buf *bread_common(void *, dev_t, daddr_t, long); 391 struct buf *breada(dev_t, daddr_t, daddr_t, long); 392 struct buf *getblk(dev_t, daddr_t, long); 393 struct buf *getblk_common(void *, dev_t, daddr_t, long, int); 394 struct buf *ngeteblk(long); 395 struct buf *geteblk(void); 396 struct buf *pageio_setup(struct page *, size_t, struct vnode *, int); 397 void bioerror(struct buf *bp, int error); 398 void bioreset(struct buf *bp); 399 struct buf *bioclone(struct buf *, off_t, size_t, dev_t, daddr_t, 400 int (*)(struct buf *), struct buf *, int); 401 size_t biosize(void); 402 #endif /* defined(_KERNEL) || defined(_FAKE_KERNEL) */ 403 404 #ifdef __cplusplus 405 } 406 #endif 407 408 #endif /* _SYS_BUF_H */ 409