vfs_bio.c (becbad1f6e18fec7c3bf286778a766ffca4457be) | vfs_bio.c (21fae96123f71665f4325f1f69b5b99a24af6c4b) |
---|---|
1/*- 2 * Copyright (c) 2004 Poul-Henning Kamp 3 * Copyright (c) 1994,1997 John S. Dyson 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Konstantin Belousov 8 * under sponsorship from the FreeBSD Foundation. --- 49 unchanged lines hidden (view full) --- 58#include <sys/malloc.h> 59#include <sys/mount.h> 60#include <sys/mutex.h> 61#include <sys/kernel.h> 62#include <sys/kthread.h> 63#include <sys/proc.h> 64#include <sys/resourcevar.h> 65#include <sys/rwlock.h> | 1/*- 2 * Copyright (c) 2004 Poul-Henning Kamp 3 * Copyright (c) 1994,1997 John S. Dyson 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Konstantin Belousov 8 * under sponsorship from the FreeBSD Foundation. --- 49 unchanged lines hidden (view full) --- 58#include <sys/malloc.h> 59#include <sys/mount.h> 60#include <sys/mutex.h> 61#include <sys/kernel.h> 62#include <sys/kthread.h> 63#include <sys/proc.h> 64#include <sys/resourcevar.h> 65#include <sys/rwlock.h> |
66#include <sys/smp.h> |
|
66#include <sys/sysctl.h> 67#include <sys/sysproto.h> 68#include <sys/vmem.h> 69#include <sys/vmmeter.h> 70#include <sys/vnode.h> 71#include <sys/watchdog.h> 72#include <geom/geom.h> 73#include <vm/vm.h> --- 21 unchanged lines hidden (view full) --- 95}; 96 97static struct buf *buf; /* buffer header pool */ 98extern struct buf *swbuf; /* Swap buffer header pool. */ 99caddr_t unmapped_buf; 100 101/* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */ 102struct proc *bufdaemonproc; | 67#include <sys/sysctl.h> 68#include <sys/sysproto.h> 69#include <sys/vmem.h> 70#include <sys/vmmeter.h> 71#include <sys/vnode.h> 72#include <sys/watchdog.h> 73#include <geom/geom.h> 74#include <vm/vm.h> --- 21 unchanged lines hidden (view full) --- 96}; 97 98static struct buf *buf; /* buffer header pool */ 99extern struct buf *swbuf; /* Swap buffer header pool. */ 100caddr_t unmapped_buf; 101 102/* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */ 103struct proc *bufdaemonproc; |
104struct proc *bufspacedaemonproc; |
|
103 104static int inmem(struct vnode *vp, daddr_t blkno); 105static void vm_hold_free_pages(struct buf *bp, int newbsize); 106static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, 107 vm_offset_t to); 108static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m); 109static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, 110 vm_page_t m); 111static void vfs_clean_pages_dirty_buf(struct buf *bp); 112static void vfs_setdirty_locked_object(struct buf *bp); 113static void vfs_vmio_invalidate(struct buf *bp); 114static void vfs_vmio_truncate(struct buf *bp, int npages); 115static void vfs_vmio_extend(struct buf *bp, int npages, int size); 116static int vfs_bio_clcheck(struct vnode *vp, int size, 117 daddr_t lblkno, daddr_t blkno); 118static int buf_flush(struct vnode *vp, int); | 105 106static int inmem(struct vnode *vp, daddr_t blkno); 107static void vm_hold_free_pages(struct buf *bp, int newbsize); 108static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, 109 vm_offset_t to); 110static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m); 111static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, 112 vm_page_t m); 113static void vfs_clean_pages_dirty_buf(struct buf *bp); 114static void vfs_setdirty_locked_object(struct buf *bp); 115static void vfs_vmio_invalidate(struct buf *bp); 116static void vfs_vmio_truncate(struct buf *bp, int npages); 117static void vfs_vmio_extend(struct buf *bp, int npages, int size); 118static int vfs_bio_clcheck(struct vnode *vp, int size, 119 daddr_t lblkno, daddr_t blkno); 120static int buf_flush(struct vnode *vp, int); |
121static int buf_recycle(bool); 122static int buf_scan(bool); |
|
119static int flushbufqueues(struct vnode *, int, int); 120static void buf_daemon(void); 121static void bremfreel(struct buf *bp); 122static __inline void bd_wakeup(void); 123static int sysctl_runningspace(SYSCTL_HANDLER_ARGS); | 123static int flushbufqueues(struct vnode *, int, int); 124static void buf_daemon(void); 125static void bremfreel(struct buf *bp); 126static __inline void bd_wakeup(void); 127static int sysctl_runningspace(SYSCTL_HANDLER_ARGS); |
128static void bufkva_reclaim(vmem_t *, int); 129static void bufkva_free(struct buf *); 130static int buf_import(void *, void **, int, int); 131static void buf_release(void *, void **, int); 132 |
|
124#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 125 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 126static int sysctl_bufspace(SYSCTL_HANDLER_ARGS); 127#endif 128 129int vmiodirenable = TRUE; 130SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0, 131 "Use the VM system for directory writes"); --- 8 unchanged lines hidden (view full) --- 140#else 141SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0, 142 "Physical memory used for buffers"); 143#endif 144static long bufkvaspace; 145SYSCTL_LONG(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace, 0, 146 "Kernel virtual memory used for buffers"); 147static long maxbufspace; | 133#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 134 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 135static int sysctl_bufspace(SYSCTL_HANDLER_ARGS); 136#endif 137 138int vmiodirenable = TRUE; 139SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0, 140 "Use the VM system for directory writes"); --- 8 unchanged lines hidden (view full) --- 149#else 150SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0, 151 "Physical memory used for buffers"); 152#endif 153static long bufkvaspace; 154SYSCTL_LONG(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace, 0, 155 "Kernel virtual memory used for buffers"); 156static long maxbufspace; |
148SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0, 149 "Maximum allowed value of bufspace (including buf_daemon)"); | 157SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW, &maxbufspace, 0, 158 "Maximum allowed value of bufspace (including metadata)"); |
150static long bufmallocspace; 151SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, 152 "Amount of malloced memory for buffers"); 153static long maxbufmallocspace; | 159static long bufmallocspace; 160SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, 161 "Amount of malloced memory for buffers"); 162static long maxbufmallocspace; |
154SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 0, 155 "Maximum amount of malloced memory for buffers"); | 163SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 164 0, "Maximum amount of malloced memory for buffers"); |
156static long lobufspace; | 165static long lobufspace; |
157SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0, | 166SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RW, &lobufspace, 0, |
158 "Minimum amount of buffers we want to have"); 159long hibufspace; | 167 "Minimum amount of buffers we want to have"); 168long hibufspace; |
160SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0, 161 "Maximum allowed value of bufspace (excluding buf_daemon)"); 162static int bufreusecnt; 163SYSCTL_INT(_vfs, OID_AUTO, bufreusecnt, CTLFLAG_RW, &bufreusecnt, 0, 164 "Number of times we have reused a buffer"); | 169SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RW, &hibufspace, 0, 170 "Maximum allowed value of bufspace (excluding metadata)"); 171long bufspacethresh; 172SYSCTL_LONG(_vfs, OID_AUTO, bufspacethresh, CTLFLAG_RW, &bufspacethresh, 173 0, "Bufspace consumed before waking the daemon to free some"); |
165static int buffreekvacnt; 166SYSCTL_INT(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt, 0, 167 "Number of times we have freed the KVA space from some buffer"); 168static int bufdefragcnt; 169SYSCTL_INT(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, &bufdefragcnt, 0, 170 "Number of times we have had to repeat buffer allocation to defragment"); 171static long lorunningspace; 172SYSCTL_PROC(_vfs, OID_AUTO, lorunningspace, CTLTYPE_LONG | CTLFLAG_MPSAFE | --- 27 unchanged lines hidden (view full) --- 200int dirtybufthresh; 201SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh, 202 0, "Number of bdwrite to bawrite conversions to clear dirty buffers"); 203static int numfreebuffers; 204SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0, 205 "Number of free buffers"); 206static int lofreebuffers; 207SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0, | 174static int buffreekvacnt; 175SYSCTL_INT(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt, 0, 176 "Number of times we have freed the KVA space from some buffer"); 177static int bufdefragcnt; 178SYSCTL_INT(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, &bufdefragcnt, 0, 179 "Number of times we have had to repeat buffer allocation to defragment"); 180static long lorunningspace; 181SYSCTL_PROC(_vfs, OID_AUTO, lorunningspace, CTLTYPE_LONG | CTLFLAG_MPSAFE | --- 27 unchanged lines hidden (view full) --- 209int dirtybufthresh; 210SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh, 211 0, "Number of bdwrite to bawrite conversions to clear dirty buffers"); 212static int numfreebuffers; 213SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0, 214 "Number of free buffers"); 215static int lofreebuffers; 216SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0, |
208 "XXX Unused"); | 217 "Target number of free buffers"); |
209static int hifreebuffers; 210SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0, | 218static int hifreebuffers; 219SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0, |
211 "XXX Complicatedly unused"); | 220 "Threshold for clean buffer recycling"); |
212static int getnewbufcalls; 213SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW, &getnewbufcalls, 0, 214 "Number of calls to getnewbuf"); 215static int getnewbufrestarts; 216SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0, 217 "Number of times getnewbuf has had to restart a buffer aquisition"); 218static int mappingrestarts; 219SYSCTL_INT(_vfs, OID_AUTO, mappingrestarts, CTLFLAG_RW, &mappingrestarts, 0, 220 "Number of times getblk has had to restart a buffer mapping for " 221 "unmapped buffer"); | 221static int getnewbufcalls; 222SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW, &getnewbufcalls, 0, 223 "Number of calls to getnewbuf"); 224static int getnewbufrestarts; 225SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0, 226 "Number of times getnewbuf has had to restart a buffer aquisition"); 227static int mappingrestarts; 228SYSCTL_INT(_vfs, OID_AUTO, mappingrestarts, CTLFLAG_RW, &mappingrestarts, 0, 229 "Number of times getblk has had to restart a buffer mapping for " 230 "unmapped buffer"); |
231static int numbufallocfails; 232SYSCTL_INT(_vfs, OID_AUTO, numbufallocfails, CTLFLAG_RW, &numbufallocfails, 0, 233 "Number of times buffer allocations failed"); |
|
222static int flushbufqtarget = 100; 223SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0, 224 "Amount of work to do in flushbufqueues when helping bufdaemon"); 225static long notbufdflushes; 226SYSCTL_LONG(_vfs, OID_AUTO, notbufdflushes, CTLFLAG_RD, ¬bufdflushes, 0, 227 "Number of dirty buffer flushes done by the bufdaemon helpers"); 228static long barrierwrites; 229SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0, 230 "Number of barrier writes"); 231SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, 232 &unmapped_buf_allowed, 0, 233 "Permit the use of the unmapped i/o"); 234 235/* | 234static int flushbufqtarget = 100; 235SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0, 236 "Amount of work to do in flushbufqueues when helping bufdaemon"); 237static long notbufdflushes; 238SYSCTL_LONG(_vfs, OID_AUTO, notbufdflushes, CTLFLAG_RD, ¬bufdflushes, 0, 239 "Number of dirty buffer flushes done by the bufdaemon helpers"); 240static long barrierwrites; 241SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0, 242 "Number of barrier writes"); 243SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, 244 &unmapped_buf_allowed, 0, 245 "Permit the use of the unmapped i/o"); 246 247/* |
236 * Lock for the non-dirty bufqueues 237 */ 238static struct mtx_padalign bqclean; 239 240/* 241 * Lock for the dirty queue. 242 */ 243static struct mtx_padalign bqdirty; 244 245/* | |
246 * This lock synchronizes access to bd_request. 247 */ 248static struct mtx_padalign bdlock; 249 250/* 251 * This lock protects the runningbufreq and synchronizes runningbufwakeup and 252 * waitrunningbufspace(). 253 */ --- 12 unchanged lines hidden (view full) --- 266/* 267 * Wakeup point for bufdaemon, as well as indicator of whether it is already 268 * active. Set to 1 when the bufdaemon is already "on" the queue, 0 when it 269 * is idling. 270 */ 271static int bd_request; 272 273/* | 248 * This lock synchronizes access to bd_request. 249 */ 250static struct mtx_padalign bdlock; 251 252/* 253 * This lock protects the runningbufreq and synchronizes runningbufwakeup and 254 * waitrunningbufspace(). 255 */ --- 12 unchanged lines hidden (view full) --- 268/* 269 * Wakeup point for bufdaemon, as well as indicator of whether it is already 270 * active. Set to 1 when the bufdaemon is already "on" the queue, 0 when it 271 * is idling. 272 */ 273static int bd_request; 274 275/* |
276 * Request/wakeup point for the bufspace daemon. 277 */ 278static int bufspace_request; 279 280/* |
|
274 * Request for the buf daemon to write more buffers than is indicated by 275 * lodirtybuf. This may be necessary to push out excess dependencies or 276 * defragment the address space where a simple count of the number of dirty 277 * buffers is insufficient to characterize the demand for flushing them. 278 */ 279static int bd_speedupreq; 280 281/* --- 11 unchanged lines hidden (view full) --- 293 * Used in runningbufwakeup() and waitrunningbufspace(). 294 */ 295static int runningbufreq; 296 297/* 298 * Synchronization (sleep/wakeup) variable for buffer requests. 299 * Can contain the VFS_BIO_NEED flags defined below; setting/clearing is done 300 * by and/or. | 281 * Request for the buf daemon to write more buffers than is indicated by 282 * lodirtybuf. This may be necessary to push out excess dependencies or 283 * defragment the address space where a simple count of the number of dirty 284 * buffers is insufficient to characterize the demand for flushing them. 285 */ 286static int bd_speedupreq; 287 288/* --- 11 unchanged lines hidden (view full) --- 300 * Used in runningbufwakeup() and waitrunningbufspace(). 301 */ 302static int runningbufreq; 303 304/* 305 * Synchronization (sleep/wakeup) variable for buffer requests. 306 * Can contain the VFS_BIO_NEED flags defined below; setting/clearing is done 307 * by and/or. |
301 * Used in numdirtywakeup(), bufspacewakeup(), bufcountadd(), bwillwrite(), | 308 * Used in numdirtywakeup(), bufspace_wakeup(), bwillwrite(), |
302 * getnewbuf(), and getblk(). 303 */ 304static volatile int needsbuffer; 305 306/* 307 * Synchronization for bwillwrite() waiters. 308 */ 309static int bdirtywait; 310 311/* 312 * Definitions for the buffer free lists. 313 */ | 309 * getnewbuf(), and getblk(). 310 */ 311static volatile int needsbuffer; 312 313/* 314 * Synchronization for bwillwrite() waiters. 315 */ 316static int bdirtywait; 317 318/* 319 * Definitions for the buffer free lists. 320 */ |
314#define BUFFER_QUEUES 4 /* number of free buffer queues */ 315 | |
316#define QUEUE_NONE 0 /* on no queue */ | 321#define QUEUE_NONE 0 /* on no queue */ |
317#define QUEUE_CLEAN 1 /* non-B_DELWRI buffers */ | 322#define QUEUE_EMPTY 1 /* empty buffer headers */ |
318#define QUEUE_DIRTY 2 /* B_DELWRI buffers */ | 323#define QUEUE_DIRTY 2 /* B_DELWRI buffers */ |
319#define QUEUE_EMPTY 3 /* empty buffer headers */ | 324#define QUEUE_CLEAN 3 /* non-B_DELWRI buffers */ |
320#define QUEUE_SENTINEL 1024 /* not an queue index, but mark for sentinel */ 321 | 325#define QUEUE_SENTINEL 1024 /* not an queue index, but mark for sentinel */ 326 |
327/* Maximum number of clean buffer queues. */ 328#define CLEAN_QUEUES 16 329 330/* Configured number of clean queues. */ 331static int clean_queues; 332 333/* Maximum number of buffer queues. */ 334#define BUFFER_QUEUES (QUEUE_CLEAN + CLEAN_QUEUES) 335 |
|
322/* Queues for free buffers with various properties */ 323static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } }; 324#ifdef INVARIANTS 325static int bq_len[BUFFER_QUEUES]; 326#endif 327 328/* | 336/* Queues for free buffers with various properties */ 337static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } }; 338#ifdef INVARIANTS 339static int bq_len[BUFFER_QUEUES]; 340#endif 341 342/* |
343 * Lock for each bufqueue 344 */ 345static struct mtx_padalign bqlocks[BUFFER_QUEUES]; 346 347/* 348 * per-cpu empty buffer cache. 349 */ 350uma_zone_t buf_zone; 351 352/* |
|
329 * Single global constant for BUF_WMESG, to avoid getting multiple references. 330 * buf_wmesg is referred from macros. 331 */ 332const char *buf_wmesg = BUF_WMESG; 333 | 353 * Single global constant for BUF_WMESG, to avoid getting multiple references. 354 * buf_wmesg is referred from macros. 355 */ 356const char *buf_wmesg = BUF_WMESG; 357 |
334#define VFS_BIO_NEED_ANY 0x01 /* any freeable buffer */ 335#define VFS_BIO_NEED_FREE 0x04 /* wait for free bufs, hi hysteresis */ 336#define VFS_BIO_NEED_BUFSPACE 0x08 /* wait for buf space, lo hysteresis */ 337 | |
338static int 339sysctl_runningspace(SYSCTL_HANDLER_ARGS) 340{ 341 long value; 342 int error; 343 344 value = *(long *)arg1; 345 error = sysctl_handle_long(oidp, &value, 0, req); --- 31 unchanged lines hidden (view full) --- 377 if (lvalue > INT_MAX) 378 /* On overflow, still write out a long to trigger ENOMEM. */ 379 return (sysctl_handle_long(oidp, &lvalue, 0, req)); 380 ivalue = lvalue; 381 return (sysctl_handle_int(oidp, &ivalue, 0, req)); 382} 383#endif 384 | 358static int 359sysctl_runningspace(SYSCTL_HANDLER_ARGS) 360{ 361 long value; 362 int error; 363 364 value = *(long *)arg1; 365 error = sysctl_handle_long(oidp, &value, 0, req); --- 31 unchanged lines hidden (view full) --- 397 if (lvalue > INT_MAX) 398 /* On overflow, still write out a long to trigger ENOMEM. */ 399 return (sysctl_handle_long(oidp, &lvalue, 0, req)); 400 ivalue = lvalue; 401 return (sysctl_handle_int(oidp, &ivalue, 0, req)); 402} 403#endif 404 |
405static int 406bqcleanq(void) 407{ 408 static int nextq; 409 410 return ((atomic_fetchadd_int(&nextq, 1) % clean_queues) + QUEUE_CLEAN); 411} 412 413static int 414bqisclean(int qindex) 415{ 416 417 return (qindex >= QUEUE_CLEAN && qindex < QUEUE_CLEAN + CLEAN_QUEUES); 418} 419 |
|
385/* 386 * bqlock: 387 * 388 * Return the appropriate queue lock based on the index. 389 */ 390static inline struct mtx * 391bqlock(int qindex) 392{ 393 | 420/* 421 * bqlock: 422 * 423 * Return the appropriate queue lock based on the index. 424 */ 425static inline struct mtx * 426bqlock(int qindex) 427{ 428 |
394 if (qindex == QUEUE_DIRTY) 395 return (struct mtx *)(&bqdirty); 396 return (struct mtx *)(&bqclean); | 429 return (struct mtx *)&bqlocks[qindex]; |
397} 398 399/* 400 * bdirtywakeup: 401 * 402 * Wakeup any bwillwrite() waiters. 403 */ 404static void --- 37 unchanged lines hidden (view full) --- 442 * buf daemon will keep running until the condition clears. 443 */ 444 if (atomic_fetchadd_int(&numdirtybuffers, 1) == 445 (lodirtybuffers + hidirtybuffers) / 2) 446 bd_wakeup(); 447} 448 449/* | 430} 431 432/* 433 * bdirtywakeup: 434 * 435 * Wakeup any bwillwrite() waiters. 436 */ 437static void --- 37 unchanged lines hidden (view full) --- 475 * buf daemon will keep running until the condition clears. 476 */ 477 if (atomic_fetchadd_int(&numdirtybuffers, 1) == 478 (lodirtybuffers + hidirtybuffers) / 2) 479 bd_wakeup(); 480} 481 482/* |
450 * bufspacewakeup: | 483 * bufspace_wakeup: |
451 * 452 * Called when buffer space is potentially available for recovery. 453 * getnewbuf() will block on this flag when it is unable to free 454 * sufficient buffer space. Buffer space becomes recoverable when 455 * bp's get placed back in the queues. 456 */ | 484 * 485 * Called when buffer space is potentially available for recovery. 486 * getnewbuf() will block on this flag when it is unable to free 487 * sufficient buffer space. Buffer space becomes recoverable when 488 * bp's get placed back in the queues. 489 */ |
457static __inline void 458bufspacewakeup(void) | 490static void 491bufspace_wakeup(void) |
459{ | 492{ |
460 int need_wakeup, on; | |
461 462 /* | 493 494 /* |
463 * If someone is waiting for bufspace, wake them up. Even 464 * though we may not have freed the kva space yet, the waiting 465 * process will be able to now. | 495 * If someone is waiting for bufspace, wake them up. 496 * 497 * Since needsbuffer is set prior to doing an additional queue 498 * scan it is safe to check for the flag prior to acquiring the 499 * lock. The thread that is preparing to scan again before 500 * blocking would discover the buf we released. |
466 */ | 501 */ |
502 if (needsbuffer) { 503 rw_rlock(&nblock); 504 if (atomic_cmpset_int(&needsbuffer, 1, 0) == 1) 505 wakeup(__DEVOLATILE(void *, &needsbuffer)); 506 rw_runlock(&nblock); 507 } 508} 509 510/* 511 * bufspace_daemonwakeup: 512 * 513 * Wakeup the daemon responsible for freeing clean bufs. 514 */ 515static void 516bufspace_daemonwakeup(void) 517{ |
|
467 rw_rlock(&nblock); | 518 rw_rlock(&nblock); |
468 for (;;) { 469 need_wakeup = 0; 470 on = needsbuffer; 471 if ((on & VFS_BIO_NEED_BUFSPACE) == 0) 472 break; 473 need_wakeup = 1; 474 if (atomic_cmpset_rel_int(&needsbuffer, on, 475 on & ~VFS_BIO_NEED_BUFSPACE)) 476 break; | 519 if (bufspace_request == 0) { 520 bufspace_request = 1; 521 wakeup(&bufspace_request); |
477 } | 522 } |
478 if (need_wakeup) 479 wakeup(__DEVOLATILE(void *, &needsbuffer)); | |
480 rw_runlock(&nblock); 481} 482 483/* | 523 rw_runlock(&nblock); 524} 525 526/* |
484 * bufspaceadjust: | 527 * bufspace_adjust: |
485 * 486 * Adjust the reported bufspace for a KVA managed buffer, possibly 487 * waking any waiters. 488 */ 489static void | 528 * 529 * Adjust the reported bufspace for a KVA managed buffer, possibly 530 * waking any waiters. 531 */ 532static void |
490bufspaceadjust(struct buf *bp, int bufsize) | 533bufspace_adjust(struct buf *bp, int bufsize) |
491{ | 534{ |
535 long space; |
|
492 int diff; 493 494 KASSERT((bp->b_flags & B_MALLOC) == 0, | 536 int diff; 537 538 KASSERT((bp->b_flags & B_MALLOC) == 0, |
495 ("bufspaceadjust: malloc buf %p", bp)); | 539 ("bufspace_adjust: malloc buf %p", bp)); |
496 diff = bufsize - bp->b_bufsize; 497 if (diff < 0) { 498 atomic_subtract_long(&bufspace, -diff); | 540 diff = bufsize - bp->b_bufsize; 541 if (diff < 0) { 542 atomic_subtract_long(&bufspace, -diff); |
499 bufspacewakeup(); 500 } else 501 atomic_add_long(&bufspace, diff); | 543 bufspace_wakeup(); 544 } else { 545 space = atomic_fetchadd_long(&bufspace, diff); 546 /* Wake up the daemon on the transition. */ 547 if (space < bufspacethresh && space + diff >= bufspacethresh) 548 bufspace_daemonwakeup(); 549 } |
502 bp->b_bufsize = bufsize; 503} 504 505/* | 550 bp->b_bufsize = bufsize; 551} 552 553/* |
554 * bufspace_reserve: 555 * 556 * Reserve bufspace before calling allocbuf(). metadata has a 557 * different space limit than data. 558 */ 559static int 560bufspace_reserve(int size, bool metadata) 561{ 562 long limit; 563 long space; 564 565 if (metadata) 566 limit = maxbufspace; 567 else 568 limit = hibufspace; 569 do { 570 space = bufspace; 571 if (space + size > limit) 572 return (ENOSPC); 573 } while (atomic_cmpset_long(&bufspace, space, space + size) == 0); 574 575 /* Wake up the daemon on the transition. */ 576 if (space < bufspacethresh && space + size >= bufspacethresh) 577 bufspace_daemonwakeup(); 578 579 return (0); 580} 581 582/* 583 * bufspace_release: 584 * 585 * Release reserved bufspace after bufspace_adjust() has consumed it. 586 */ 587static void 588bufspace_release(int size) 589{ 590 atomic_subtract_long(&bufspace, size); 591 bufspace_wakeup(); 592} 593 594/* 595 * bufspace_wait: 596 * 597 * Wait for bufspace, acting as the buf daemon if a locked vnode is 598 * supplied. needsbuffer must be set in a safe fashion prior to 599 * polling for space. The operation must be re-tried on return. 600 */ 601static void 602bufspace_wait(struct vnode *vp, int gbflags, int slpflag, int slptimeo) 603{ 604 struct thread *td; 605 int error, fl, norunbuf; 606 607 if ((gbflags & GB_NOWAIT_BD) != 0) 608 return; 609 610 td = curthread; 611 rw_wlock(&nblock); 612 while (needsbuffer != 0) { 613 if (vp != NULL && vp->v_type != VCHR && 614 (td->td_pflags & TDP_BUFNEED) == 0) { 615 rw_wunlock(&nblock); 616 /* 617 * getblk() is called with a vnode locked, and 618 * some majority of the dirty buffers may as 619 * well belong to the vnode. Flushing the 620 * buffers there would make a progress that 621 * cannot be achieved by the buf_daemon, that 622 * cannot lock the vnode. 623 */ 624 norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) | 625 (td->td_pflags & TDP_NORUNNINGBUF); 626 627 /* 628 * Play bufdaemon. The getnewbuf() function 629 * may be called while the thread owns lock 630 * for another dirty buffer for the same 631 * vnode, which makes it impossible to use 632 * VOP_FSYNC() there, due to the buffer lock 633 * recursion. 634 */ 635 td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; 636 fl = buf_flush(vp, flushbufqtarget); 637 td->td_pflags &= norunbuf; 638 rw_wlock(&nblock); 639 if (fl != 0) 640 continue; 641 if (needsbuffer == 0) 642 break; 643 } 644 error = rw_sleep(__DEVOLATILE(void *, &needsbuffer), &nblock, 645 (PRIBIO + 4) | slpflag, "newbuf", slptimeo); 646 if (error != 0) 647 break; 648 } 649 rw_wunlock(&nblock); 650} 651 652 653/* 654 * bufspace_daemon: 655 * 656 * buffer space management daemon. Tries to maintain some marginal 657 * amount of free buffer space so that requesting processes neither 658 * block nor work to reclaim buffers. 659 */ 660static void 661bufspace_daemon(void) 662{ 663 for (;;) { 664 kproc_suspend_check(bufspacedaemonproc); 665 666 /* 667 * Free buffers from the clean queue until we meet our 668 * targets. 669 * 670 * Theory of operation: The buffer cache is most efficient 671 * when some free buffer headers and space are always 672 * available to getnewbuf(). This daemon attempts to prevent 673 * the excessive blocking and synchronization associated 674 * with shortfall. It goes through three phases according 675 * demand: 676 * 677 * 1) The daemon wakes up voluntarily once per-second 678 * during idle periods when the counters are below 679 * the wakeup thresholds (bufspacethresh, lofreebuffers). 680 * 681 * 2) The daemon wakes up as we cross the thresholds 682 * ahead of any potential blocking. This may bounce 683 * slightly according to the rate of consumption and 684 * release. 685 * 686 * 3) The daemon and consumers are starved for working 687 * clean buffers. This is the 'bufspace' sleep below 688 * which will inefficiently trade bufs with bqrelse 689 * until we return to condition 2. 690 */ 691 while (bufspace > lobufspace || 692 numfreebuffers < hifreebuffers) { 693 if (buf_recycle(false) != 0) { 694 atomic_set_int(&needsbuffer, 1); 695 if (buf_recycle(false) != 0) { 696 rw_wlock(&nblock); 697 if (needsbuffer) 698 rw_sleep(__DEVOLATILE(void *, 699 &needsbuffer), &nblock, 700 PRIBIO|PDROP, "bufspace", 701 hz/10); 702 else 703 rw_wunlock(&nblock); 704 } 705 } 706 maybe_yield(); 707 } 708 709 /* 710 * Re-check our limits under the exclusive nblock. 711 */ 712 rw_wlock(&nblock); 713 if (bufspace < bufspacethresh && 714 numfreebuffers > lofreebuffers) { 715 bufspace_request = 0; 716 rw_sleep(&bufspace_request, &nblock, PRIBIO|PDROP, 717 "-", hz); 718 } else 719 rw_wunlock(&nblock); 720 } 721} 722 723static struct kproc_desc bufspace_kp = { 724 "bufspacedaemon", 725 bufspace_daemon, 726 &bufspacedaemonproc 727}; 728SYSINIT(bufspacedaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, 729 &bufspace_kp); 730 731/* |
|
506 * bufmallocadjust: 507 * 508 * Adjust the reported bufspace for a malloc managed buffer, possibly 509 * waking any waiters. 510 */ 511static void 512bufmallocadjust(struct buf *bp, int bufsize) 513{ 514 int diff; 515 516 KASSERT((bp->b_flags & B_MALLOC) != 0, 517 ("bufmallocadjust: non-malloc buf %p", bp)); 518 diff = bufsize - bp->b_bufsize; | 732 * bufmallocadjust: 733 * 734 * Adjust the reported bufspace for a malloc managed buffer, possibly 735 * waking any waiters. 736 */ 737static void 738bufmallocadjust(struct buf *bp, int bufsize) 739{ 740 int diff; 741 742 KASSERT((bp->b_flags & B_MALLOC) != 0, 743 ("bufmallocadjust: non-malloc buf %p", bp)); 744 diff = bufsize - bp->b_bufsize; |
519 if (diff < 0) { | 745 if (diff < 0) |
520 atomic_subtract_long(&bufmallocspace, -diff); | 746 atomic_subtract_long(&bufmallocspace, -diff); |
521 bufspacewakeup(); 522 } else | 747 else |
523 atomic_add_long(&bufmallocspace, diff); 524 bp->b_bufsize = bufsize; 525} 526 527/* 528 * runningwakeup: 529 * 530 * Wake up processes that are waiting on asynchronous writes to fall --- 35 unchanged lines hidden (view full) --- 566 if (space < lorunningspace) 567 return; 568 if (space - bspace > lorunningspace) 569 return; 570 runningwakeup(); 571} 572 573/* | 748 atomic_add_long(&bufmallocspace, diff); 749 bp->b_bufsize = bufsize; 750} 751 752/* 753 * runningwakeup: 754 * 755 * Wake up processes that are waiting on asynchronous writes to fall --- 35 unchanged lines hidden (view full) --- 791 if (space < lorunningspace) 792 return; 793 if (space - bspace > lorunningspace) 794 return; 795 runningwakeup(); 796} 797 798/* |
574 * bufcountadd: 575 * 576 * Called when a buffer has been added to one of the free queues to 577 * account for the buffer and to wakeup anyone waiting for free buffers. 578 * This typically occurs when large amounts of metadata are being handled 579 * by the buffer cache ( else buffer space runs out first, usually ). 580 */ 581static __inline void 582bufcountadd(struct buf *bp) 583{ 584 int mask, need_wakeup, old, on; 585 586 KASSERT((bp->b_flags & B_INFREECNT) == 0, 587 ("buf %p already counted as free", bp)); 588 bp->b_flags |= B_INFREECNT; 589 old = atomic_fetchadd_int(&numfreebuffers, 1); 590 KASSERT(old >= 0 && old < nbuf, 591 ("numfreebuffers climbed to %d", old + 1)); 592 mask = VFS_BIO_NEED_ANY; 593 if (numfreebuffers >= hifreebuffers) 594 mask |= VFS_BIO_NEED_FREE; 595 rw_rlock(&nblock); 596 for (;;) { 597 need_wakeup = 0; 598 on = needsbuffer; 599 if (on == 0) 600 break; 601 need_wakeup = 1; 602 if (atomic_cmpset_rel_int(&needsbuffer, on, on & ~mask)) 603 break; 604 } 605 if (need_wakeup) 606 wakeup(__DEVOLATILE(void *, &needsbuffer)); 607 rw_runlock(&nblock); 608} 609 610/* 611 * bufcountsub: 612 * 613 * Decrement the numfreebuffers count as needed. 614 */ 615static void 616bufcountsub(struct buf *bp) 617{ 618 int old; 619 620 /* 621 * Fixup numfreebuffers count. If the buffer is invalid or not 622 * delayed-write, the buffer was free and we must decrement 623 * numfreebuffers. 624 */ 625 if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0) { 626 KASSERT((bp->b_flags & B_INFREECNT) != 0, 627 ("buf %p not counted in numfreebuffers", bp)); 628 bp->b_flags &= ~B_INFREECNT; 629 old = atomic_fetchadd_int(&numfreebuffers, -1); 630 KASSERT(old > 0, ("numfreebuffers dropped to %d", old - 1)); 631 } 632} 633 634/* | |
635 * waitrunningbufspace() 636 * 637 * runningbufspace is a measure of the amount of I/O currently 638 * running. This routine is used in async-write situations to 639 * prevent creating huge backups of pending writes to a device. 640 * Only asynchronous writes are governed by this function. 641 * 642 * This does NOT turn an async write into a sync write. It waits --- 199 unchanged lines hidden (view full) --- 842/* Initialize the buffer subsystem. Called before use of any buffers. */ 843void 844bufinit(void) 845{ 846 struct buf *bp; 847 int i; 848 849 CTASSERT(MAXBCACHEBUF >= MAXBSIZE); | 799 * waitrunningbufspace() 800 * 801 * runningbufspace is a measure of the amount of I/O currently 802 * running. This routine is used in async-write situations to 803 * prevent creating huge backups of pending writes to a device. 804 * Only asynchronous writes are governed by this function. 805 * 806 * This does NOT turn an async write into a sync write. It waits --- 199 unchanged lines hidden (view full) --- 1006/* Initialize the buffer subsystem. Called before use of any buffers. */ 1007void 1008bufinit(void) 1009{ 1010 struct buf *bp; 1011 int i; 1012 1013 CTASSERT(MAXBCACHEBUF >= MAXBSIZE); |
850 mtx_init(&bqclean, "bufq clean lock", NULL, MTX_DEF); 851 mtx_init(&bqdirty, "bufq dirty lock", NULL, MTX_DEF); | 1014 mtx_init(&bqlocks[QUEUE_DIRTY], "bufq dirty lock", NULL, MTX_DEF); 1015 mtx_init(&bqlocks[QUEUE_EMPTY], "bufq empty lock", NULL, MTX_DEF); 1016 for (i = QUEUE_CLEAN; i < QUEUE_CLEAN + CLEAN_QUEUES; i++) 1017 mtx_init(&bqlocks[i], "bufq clean lock", NULL, MTX_DEF); |
852 mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF); 853 rw_init(&nblock, "needsbuffer lock"); 854 mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF); 855 mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF); 856 857 /* next, make a null set of free lists */ 858 for (i = 0; i < BUFFER_QUEUES; i++) 859 TAILQ_INIT(&bufqueues[i]); 860 861 unmapped_buf = (caddr_t)kva_alloc(MAXPHYS); 862 863 /* finally, initialize each buffer header and stick on empty q */ 864 for (i = 0; i < nbuf; i++) { 865 bp = &buf[i]; 866 bzero(bp, sizeof *bp); | 1018 mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF); 1019 rw_init(&nblock, "needsbuffer lock"); 1020 mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF); 1021 mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF); 1022 1023 /* next, make a null set of free lists */ 1024 for (i = 0; i < BUFFER_QUEUES; i++) 1025 TAILQ_INIT(&bufqueues[i]); 1026 1027 unmapped_buf = (caddr_t)kva_alloc(MAXPHYS); 1028 1029 /* finally, initialize each buffer header and stick on empty q */ 1030 for (i = 0; i < nbuf; i++) { 1031 bp = &buf[i]; 1032 bzero(bp, sizeof *bp); |
867 bp->b_flags = B_INVAL | B_INFREECNT; | 1033 bp->b_flags = B_INVAL; |
868 bp->b_rcred = NOCRED; 869 bp->b_wcred = NOCRED; 870 bp->b_qindex = QUEUE_EMPTY; 871 bp->b_xflags = 0; 872 bp->b_data = bp->b_kvabase = unmapped_buf; 873 LIST_INIT(&bp->b_dep); 874 BUF_LOCKINIT(bp); 875 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 876#ifdef INVARIANTS 877 bq_len[QUEUE_EMPTY]++; 878#endif 879 } 880 881 /* 882 * maxbufspace is the absolute maximum amount of buffer space we are 883 * allowed to reserve in KVM and in real terms. The absolute maximum | 1034 bp->b_rcred = NOCRED; 1035 bp->b_wcred = NOCRED; 1036 bp->b_qindex = QUEUE_EMPTY; 1037 bp->b_xflags = 0; 1038 bp->b_data = bp->b_kvabase = unmapped_buf; 1039 LIST_INIT(&bp->b_dep); 1040 BUF_LOCKINIT(bp); 1041 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 1042#ifdef INVARIANTS 1043 bq_len[QUEUE_EMPTY]++; 1044#endif 1045 } 1046 1047 /* 1048 * maxbufspace is the absolute maximum amount of buffer space we are 1049 * allowed to reserve in KVM and in real terms. The absolute maximum |
884 * is nominally used by buf_daemon. hibufspace is the nominal maximum 885 * used by most other processes. The differential is required to 886 * ensure that buf_daemon is able to run when other processes might 887 * be blocked waiting for buffer space. | 1050 * is nominally used by metadata. hibufspace is the nominal maximum 1051 * used by most other requests. The differential is required to 1052 * ensure that metadata deadlocks don't occur. |
888 * 889 * maxbufspace is based on BKVASIZE. Allocating buffers larger then 890 * this may result in KVM fragmentation which is not handled optimally | 1053 * 1054 * maxbufspace is based on BKVASIZE. Allocating buffers larger then 1055 * this may result in KVM fragmentation which is not handled optimally |
891 * by the system. | 1056 * by the system. XXX This is less true with vmem. We could use 1057 * PAGE_SIZE. |
892 */ 893 maxbufspace = (long)nbuf * BKVASIZE; 894 hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10); | 1058 */ 1059 maxbufspace = (long)nbuf * BKVASIZE; 1060 hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10); |
895 lobufspace = hibufspace - MAXBCACHEBUF; | 1061 lobufspace = (hibufspace / 20) * 19; /* 95% */ 1062 bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2; |
896 897 /* 898 * Note: The 16 MiB upper limit for hirunningspace was chosen 899 * arbitrarily and may need further tuning. It corresponds to 900 * 128 outstanding write IO requests (if IO size is 128 KiB), 901 * which fits with many RAID controllers' tagged queuing limits. 902 * The lower 1 MiB limit is the historical upper limit for 903 * hirunningspace. 904 */ 905 hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF), 906 16 * 1024 * 1024), 1024 * 1024); 907 lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF); 908 | 1063 1064 /* 1065 * Note: The 16 MiB upper limit for hirunningspace was chosen 1066 * arbitrarily and may need further tuning. It corresponds to 1067 * 128 outstanding write IO requests (if IO size is 128 KiB), 1068 * which fits with many RAID controllers' tagged queuing limits. 1069 * The lower 1 MiB limit is the historical upper limit for 1070 * hirunningspace. 1071 */ 1072 hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF), 1073 16 * 1024 * 1024), 1024 * 1024); 1074 lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF); 1075 |
909/* 910 * Limit the amount of malloc memory since it is wired permanently into 911 * the kernel space. Even though this is accounted for in the buffer 912 * allocation, we don't want the malloced region to grow uncontrolled. 913 * The malloc scheme improves memory utilization significantly on average 914 * (small) directories. 915 */ | 1076 /* 1077 * Limit the amount of malloc memory since it is wired permanently into 1078 * the kernel space. Even though this is accounted for in the buffer 1079 * allocation, we don't want the malloced region to grow uncontrolled. 1080 * The malloc scheme improves memory utilization significantly on 1081 * average (small) directories. 1082 */ |
916 maxbufmallocspace = hibufspace / 20; 917 | 1083 maxbufmallocspace = hibufspace / 20; 1084 |
918/* 919 * Reduce the chance of a deadlock occuring by limiting the number 920 * of delayed-write dirty buffers we allow to stack up. 921 */ | 1085 /* 1086 * Reduce the chance of a deadlock occuring by limiting the number 1087 * of delayed-write dirty buffers we allow to stack up. 1088 */ |
922 hidirtybuffers = nbuf / 4 + 20; 923 dirtybufthresh = hidirtybuffers * 9 / 10; 924 numdirtybuffers = 0; | 1089 hidirtybuffers = nbuf / 4 + 20; 1090 dirtybufthresh = hidirtybuffers * 9 / 10; 1091 numdirtybuffers = 0; |
925/* 926 * To support extreme low-memory systems, make sure hidirtybuffers cannot 927 * eat up all available buffer space. This occurs when our minimum cannot 928 * be met. We try to size hidirtybuffers to 3/4 our buffer space assuming 929 * BKVASIZE'd buffers. 930 */ | 1092 /* 1093 * To support extreme low-memory systems, make sure hidirtybuffers 1094 * cannot eat up all available buffer space. This occurs when our 1095 * minimum cannot be met. We try to size hidirtybuffers to 3/4 our 1096 * buffer space assuming BKVASIZE'd buffers. 1097 */ |
931 while ((long)hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) { 932 hidirtybuffers >>= 1; 933 } 934 lodirtybuffers = hidirtybuffers / 2; 935 | 1098 while ((long)hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) { 1099 hidirtybuffers >>= 1; 1100 } 1101 lodirtybuffers = hidirtybuffers / 2; 1102 |
936/* 937 * Try to keep the number of free buffers in the specified range, 938 * and give special processes (e.g. like buf_daemon) access to an 939 * emergency reserve. 940 */ 941 lofreebuffers = nbuf / 18 + 5; 942 hifreebuffers = 2 * lofreebuffers; | 1103 /* 1104 * lofreebuffers should be sufficient to avoid stalling waiting on 1105 * buf headers under heavy utilization. The bufs in per-cpu caches 1106 * are counted as free but will be unavailable to threads executing 1107 * on other cpus. 1108 * 1109 * hifreebuffers is the free target for the bufspace daemon. This 1110 * should be set appropriately to limit work per-iteration. 1111 */ 1112 lofreebuffers = MIN((nbuf / 25) + (20 * mp_ncpus), 128 * mp_ncpus); 1113 hifreebuffers = (3 * lofreebuffers) / 2; |
943 numfreebuffers = nbuf; 944 945 bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | 946 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); | 1114 numfreebuffers = nbuf; 1115 1116 bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | 1117 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); |
1118 1119 /* Setup the kva and free list allocators. */ 1120 vmem_set_reclaim(buffer_arena, bufkva_reclaim); 1121 buf_zone = uma_zcache_create("buf free cache", sizeof(struct buf), 1122 NULL, NULL, NULL, NULL, buf_import, buf_release, NULL, 0); 1123 1124 /* 1125 * Size the clean queue according to the amount of buffer space. 1126 * One queue per-256mb up to the max. More queues gives better 1127 * concurrency but less accurate LRU. 1128 */ 1129 clean_queues = MIN(howmany(maxbufspace, 256*1024*1024), CLEAN_QUEUES); 1130 |
|
947} 948 949#ifdef INVARIANTS 950static inline void 951vfs_buf_check_mapped(struct buf *bp) 952{ 953 954 KASSERT(bp->b_kvabase != unmapped_buf, --- 169 unchanged lines hidden (view full) --- 1124 * 1125 * Insert the buffer into the appropriate free list. 1126 */ 1127static void 1128binsfree(struct buf *bp, int qindex) 1129{ 1130 struct mtx *olock, *nlock; 1131 | 1131} 1132 1133#ifdef INVARIANTS 1134static inline void 1135vfs_buf_check_mapped(struct buf *bp) 1136{ 1137 1138 KASSERT(bp->b_kvabase != unmapped_buf, --- 169 unchanged lines hidden (view full) --- 1308 * 1309 * Insert the buffer into the appropriate free list. 1310 */ 1311static void 1312binsfree(struct buf *bp, int qindex) 1313{ 1314 struct mtx *olock, *nlock; 1315 |
1132 BUF_ASSERT_XLOCKED(bp); | 1316 if (qindex != QUEUE_EMPTY) { 1317 BUF_ASSERT_XLOCKED(bp); 1318 } |
1133 | 1319 |
1320 /* 1321 * Stick to the same clean queue for the lifetime of the buf to 1322 * limit locking below. Otherwise pick ont sequentially. 1323 */ 1324 if (qindex == QUEUE_CLEAN) { 1325 if (bqisclean(bp->b_qindex)) 1326 qindex = bp->b_qindex; 1327 else 1328 qindex = bqcleanq(); 1329 } 1330 1331 /* 1332 * Handle delayed bremfree() processing. 1333 */ |
|
1134 nlock = bqlock(qindex); | 1334 nlock = bqlock(qindex); |
1135 /* Handle delayed bremfree() processing. */ | |
1136 if (bp->b_flags & B_REMFREE) { 1137 olock = bqlock(bp->b_qindex); 1138 mtx_lock(olock); 1139 bremfreel(bp); 1140 if (olock != nlock) { 1141 mtx_unlock(olock); 1142 mtx_lock(nlock); 1143 } --- 7 unchanged lines hidden (view full) --- 1151 if (bp->b_flags & B_AGE) 1152 TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist); 1153 else 1154 TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); 1155#ifdef INVARIANTS 1156 bq_len[bp->b_qindex]++; 1157#endif 1158 mtx_unlock(nlock); | 1335 if (bp->b_flags & B_REMFREE) { 1336 olock = bqlock(bp->b_qindex); 1337 mtx_lock(olock); 1338 bremfreel(bp); 1339 if (olock != nlock) { 1340 mtx_unlock(olock); 1341 mtx_lock(nlock); 1342 } --- 7 unchanged lines hidden (view full) --- 1350 if (bp->b_flags & B_AGE) 1351 TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist); 1352 else 1353 TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); 1354#ifdef INVARIANTS 1355 bq_len[bp->b_qindex]++; 1356#endif 1357 mtx_unlock(nlock); |
1358} |
|
1159 | 1359 |
1360/* 1361 * buf_free: 1362 * 1363 * Free a buffer to the buf zone once it no longer has valid contents. 1364 */ 1365static void 1366buf_free(struct buf *bp) 1367{ 1368 1369 if (bp->b_flags & B_REMFREE) 1370 bremfreef(bp); 1371 if (bp->b_vflags & BV_BKGRDINPROG) 1372 panic("losing buffer 1"); 1373 if (bp->b_rcred != NOCRED) { 1374 crfree(bp->b_rcred); 1375 bp->b_rcred = NOCRED; 1376 } 1377 if (bp->b_wcred != NOCRED) { 1378 crfree(bp->b_wcred); 1379 bp->b_wcred = NOCRED; 1380 } 1381 if (!LIST_EMPTY(&bp->b_dep)) 1382 buf_deallocate(bp); 1383 bufkva_free(bp); 1384 BUF_UNLOCK(bp); 1385 uma_zfree(buf_zone, bp); 1386 atomic_add_int(&numfreebuffers, 1); 1387 bufspace_wakeup(); 1388} 1389 1390/* 1391 * buf_import: 1392 * 1393 * Import bufs into the uma cache from the buf list. The system still 1394 * expects a static array of bufs and much of the synchronization 1395 * around bufs assumes type stable storage. As a result, UMA is used 1396 * only as a per-cpu cache of bufs still maintained on a global list. 1397 */ 1398static int 1399buf_import(void *arg, void **store, int cnt, int flags) 1400{ 1401 struct buf *bp; 1402 int i; 1403 1404 mtx_lock(&bqlocks[QUEUE_EMPTY]); 1405 for (i = 0; i < cnt; i++) { 1406 bp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]); 1407 if (bp == NULL) 1408 break; 1409 bremfreel(bp); 1410 store[i] = bp; 1411 } 1412 mtx_unlock(&bqlocks[QUEUE_EMPTY]); 1413 1414 return (i); 1415} 1416 1417/* 1418 * buf_release: 1419 * 1420 * Release bufs from the uma cache back to the buffer queues. 1421 */ 1422static void 1423buf_release(void *arg, void **store, int cnt) 1424{ 1425 int i; 1426 1427 for (i = 0; i < cnt; i++) 1428 binsfree(store[i], QUEUE_EMPTY); 1429} 1430 1431/* 1432 * buf_alloc: 1433 * 1434 * Allocate an empty buffer header. 1435 */ 1436static struct buf * 1437buf_alloc(void) 1438{ 1439 struct buf *bp; 1440 1441 bp = uma_zalloc(buf_zone, M_NOWAIT); 1442 if (bp == NULL) { 1443 bufspace_daemonwakeup(); 1444 atomic_add_int(&numbufallocfails, 1); 1445 return (NULL); 1446 } 1447 |
|
1160 /* | 1448 /* |
1161 * Something we can maybe free or reuse. | 1449 * Wake-up the bufspace daemon on transition. |
1162 */ | 1450 */ |
1163 if (bp->b_bufsize && !(bp->b_flags & B_DELWRI)) 1164 bufspacewakeup(); | 1451 if (atomic_fetchadd_int(&numfreebuffers, -1) == lofreebuffers) 1452 bufspace_daemonwakeup(); |
1165 | 1453 |
1166 if ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI)) 1167 bufcountadd(bp); | 1454 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) 1455 panic("getnewbuf_empty: Locked buf %p on free queue.", bp); 1456 1457 KASSERT(bp->b_vp == NULL, 1458 ("bp: %p still has vnode %p.", bp, bp->b_vp)); 1459 KASSERT((bp->b_flags & (B_DELWRI | B_NOREUSE)) == 0, 1460 ("invalid buffer %p flags %#x", bp, bp->b_flags)); 1461 KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0, 1462 ("bp: %p still on a buffer list. xflags %X", bp, bp->b_xflags)); 1463 KASSERT(bp->b_npages == 0, 1464 ("bp: %p still has %d vm pages\n", bp, bp->b_npages)); 1465 KASSERT(bp->b_kvasize == 0, ("bp: %p still has kva\n", bp)); 1466 KASSERT(bp->b_bufsize == 0, ("bp: %p still has bufspace\n", bp)); 1467 1468 bp->b_flags = 0; 1469 bp->b_ioflags = 0; 1470 bp->b_xflags = 0; 1471 bp->b_vflags = 0; 1472 bp->b_vp = NULL; 1473 bp->b_blkno = bp->b_lblkno = 0; 1474 bp->b_offset = NOOFFSET; 1475 bp->b_iodone = 0; 1476 bp->b_error = 0; 1477 bp->b_resid = 0; 1478 bp->b_bcount = 0; 1479 bp->b_npages = 0; 1480 bp->b_dirtyoff = bp->b_dirtyend = 0; 1481 bp->b_bufobj = NULL; 1482 bp->b_pin_count = 0; 1483 bp->b_data = bp->b_kvabase = unmapped_buf; 1484 bp->b_fsprivate1 = NULL; 1485 bp->b_fsprivate2 = NULL; 1486 bp->b_fsprivate3 = NULL; 1487 LIST_INIT(&bp->b_dep); 1488 1489 return (bp); |
1168} 1169 1170/* | 1490} 1491 1492/* |
1493 * buf_qrecycle: 1494 * 1495 * Free a buffer from the given bufqueue. kva controls whether the 1496 * freed buf must own some kva resources. This is used for 1497 * defragmenting. 1498 */ 1499static int 1500buf_qrecycle(int qindex, bool kva) 1501{ 1502 struct buf *bp, *nbp; 1503 1504 if (kva) 1505 atomic_add_int(&bufdefragcnt, 1); 1506 nbp = NULL; 1507 mtx_lock(&bqlocks[qindex]); 1508 nbp = TAILQ_FIRST(&bufqueues[qindex]); 1509 1510 /* 1511 * Run scan, possibly freeing data and/or kva mappings on the fly 1512 * depending. 1513 */ 1514 while ((bp = nbp) != NULL) { 1515 /* 1516 * Calculate next bp (we can only use it if we do not 1517 * release the bqlock). 1518 */ 1519 nbp = TAILQ_NEXT(bp, b_freelist); 1520 1521 /* 1522 * If we are defragging then we need a buffer with 1523 * some kva to reclaim. 1524 */ 1525 if (kva && bp->b_kvasize == 0) 1526 continue; 1527 1528 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) 1529 continue; 1530 1531 /* 1532 * Skip buffers with background writes in progress. 1533 */ 1534 if ((bp->b_vflags & BV_BKGRDINPROG) != 0) { 1535 BUF_UNLOCK(bp); 1536 continue; 1537 } 1538 1539 KASSERT(bp->b_qindex == qindex, 1540 ("getnewbuf: inconsistent queue %d bp %p", qindex, bp)); 1541 /* 1542 * NOTE: nbp is now entirely invalid. We can only restart 1543 * the scan from this point on. 1544 */ 1545 bremfreel(bp); 1546 mtx_unlock(&bqlocks[qindex]); 1547 1548 /* 1549 * Requeue the background write buffer with error and 1550 * restart the scan. 1551 */ 1552 if ((bp->b_vflags & BV_BKGRDERR) != 0) { 1553 bqrelse(bp); 1554 mtx_lock(&bqlocks[qindex]); 1555 nbp = TAILQ_FIRST(&bufqueues[qindex]); 1556 continue; 1557 } 1558 bp->b_flags |= B_INVAL; 1559 brelse(bp); 1560 return (0); 1561 } 1562 mtx_unlock(&bqlocks[qindex]); 1563 1564 return (ENOBUFS); 1565} 1566 1567/* 1568 * buf_recycle: 1569 * 1570 * Iterate through all clean queues until we find a buf to recycle or 1571 * exhaust the search. 1572 */ 1573static int 1574buf_recycle(bool kva) 1575{ 1576 int qindex, first_qindex; 1577 1578 qindex = first_qindex = bqcleanq(); 1579 do { 1580 if (buf_qrecycle(qindex, kva) == 0) 1581 return (0); 1582 if (++qindex == QUEUE_CLEAN + clean_queues) 1583 qindex = QUEUE_CLEAN; 1584 } while (qindex != first_qindex); 1585 1586 return (ENOBUFS); 1587} 1588 1589/* 1590 * buf_scan: 1591 * 1592 * Scan the clean queues looking for a buffer to recycle. needsbuffer 1593 * is set on failure so that the caller may optionally bufspace_wait() 1594 * in a race-free fashion. 1595 */ 1596static int 1597buf_scan(bool defrag) 1598{ 1599 int error; 1600 1601 /* 1602 * To avoid heavy synchronization and wakeup races we set 1603 * needsbuffer and re-poll before failing. This ensures that 1604 * no frees can be missed between an unsuccessful poll and 1605 * going to sleep in a synchronized fashion. 1606 */ 1607 if ((error = buf_recycle(defrag)) != 0) { 1608 atomic_set_int(&needsbuffer, 1); 1609 bufspace_daemonwakeup(); 1610 error = buf_recycle(defrag); 1611 } 1612 if (error == 0) 1613 atomic_add_int(&getnewbufrestarts, 1); 1614 return (error); 1615} 1616 1617/* |
|
1171 * bremfree: 1172 * 1173 * Mark the buffer for removal from the appropriate free list. 1174 * 1175 */ 1176void 1177bremfree(struct buf *bp) 1178{ 1179 1180 CTR3(KTR_BUF, "bremfree(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); 1181 KASSERT((bp->b_flags & B_REMFREE) == 0, 1182 ("bremfree: buffer %p already marked for delayed removal.", bp)); 1183 KASSERT(bp->b_qindex != QUEUE_NONE, 1184 ("bremfree: buffer %p not on a queue.", bp)); 1185 BUF_ASSERT_XLOCKED(bp); 1186 1187 bp->b_flags |= B_REMFREE; | 1618 * bremfree: 1619 * 1620 * Mark the buffer for removal from the appropriate free list. 1621 * 1622 */ 1623void 1624bremfree(struct buf *bp) 1625{ 1626 1627 CTR3(KTR_BUF, "bremfree(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); 1628 KASSERT((bp->b_flags & B_REMFREE) == 0, 1629 ("bremfree: buffer %p already marked for delayed removal.", bp)); 1630 KASSERT(bp->b_qindex != QUEUE_NONE, 1631 ("bremfree: buffer %p not on a queue.", bp)); 1632 BUF_ASSERT_XLOCKED(bp); 1633 1634 bp->b_flags |= B_REMFREE; |
1188 bufcountsub(bp); | |
1189} 1190 1191/* 1192 * bremfreef: 1193 * 1194 * Force an immediate removal from a free list. Used only in nfs when 1195 * it abuses the b_freelist pointer. 1196 */ --- 17 unchanged lines hidden (view full) --- 1214static void 1215bremfreel(struct buf *bp) 1216{ 1217 1218 CTR3(KTR_BUF, "bremfreel(%p) vp %p flags %X", 1219 bp, bp->b_vp, bp->b_flags); 1220 KASSERT(bp->b_qindex != QUEUE_NONE, 1221 ("bremfreel: buffer %p not on a queue.", bp)); | 1635} 1636 1637/* 1638 * bremfreef: 1639 * 1640 * Force an immediate removal from a free list. Used only in nfs when 1641 * it abuses the b_freelist pointer. 1642 */ --- 17 unchanged lines hidden (view full) --- 1660static void 1661bremfreel(struct buf *bp) 1662{ 1663 1664 CTR3(KTR_BUF, "bremfreel(%p) vp %p flags %X", 1665 bp, bp->b_vp, bp->b_flags); 1666 KASSERT(bp->b_qindex != QUEUE_NONE, 1667 ("bremfreel: buffer %p not on a queue.", bp)); |
1222 BUF_ASSERT_XLOCKED(bp); | 1668 if (bp->b_qindex != QUEUE_EMPTY) { 1669 BUF_ASSERT_XLOCKED(bp); 1670 } |
1223 mtx_assert(bqlock(bp->b_qindex), MA_OWNED); 1224 1225 TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); 1226#ifdef INVARIANTS 1227 KASSERT(bq_len[bp->b_qindex] >= 1, ("queue %d underflow", 1228 bp->b_qindex)); 1229 bq_len[bp->b_qindex]--; 1230#endif 1231 bp->b_qindex = QUEUE_NONE; | 1671 mtx_assert(bqlock(bp->b_qindex), MA_OWNED); 1672 1673 TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); 1674#ifdef INVARIANTS 1675 KASSERT(bq_len[bp->b_qindex] >= 1, ("queue %d underflow", 1676 bp->b_qindex)); 1677 bq_len[bp->b_qindex]--; 1678#endif 1679 bp->b_qindex = QUEUE_NONE; |
1232 /* 1233 * If this was a delayed bremfree() we only need to remove the buffer 1234 * from the queue and return the stats are already done. 1235 */ 1236 if (bp->b_flags & B_REMFREE) { 1237 bp->b_flags &= ~B_REMFREE; 1238 return; 1239 } 1240 bufcountsub(bp); | 1680 bp->b_flags &= ~B_REMFREE; |
1241} 1242 1243/* | 1681} 1682 1683/* |
1244 * bufkvafree: | 1684 * bufkva_free: |
1245 * 1246 * Free the kva allocation for a buffer. 1247 * 1248 */ 1249static void | 1685 * 1686 * Free the kva allocation for a buffer. 1687 * 1688 */ 1689static void |
1250bufkvafree(struct buf *bp) | 1690bufkva_free(struct buf *bp) |
1251{ 1252 1253#ifdef INVARIANTS 1254 if (bp->b_kvasize == 0) { 1255 KASSERT(bp->b_kvabase == unmapped_buf && 1256 bp->b_data == unmapped_buf, 1257 ("Leaked KVA space on %p", bp)); 1258 } else if (buf_mapped(bp)) --- 7 unchanged lines hidden (view full) --- 1266 vmem_free(buffer_arena, (vm_offset_t)bp->b_kvabase, bp->b_kvasize); 1267 atomic_subtract_long(&bufkvaspace, bp->b_kvasize); 1268 atomic_add_int(&buffreekvacnt, 1); 1269 bp->b_data = bp->b_kvabase = unmapped_buf; 1270 bp->b_kvasize = 0; 1271} 1272 1273/* | 1691{ 1692 1693#ifdef INVARIANTS 1694 if (bp->b_kvasize == 0) { 1695 KASSERT(bp->b_kvabase == unmapped_buf && 1696 bp->b_data == unmapped_buf, 1697 ("Leaked KVA space on %p", bp)); 1698 } else if (buf_mapped(bp)) --- 7 unchanged lines hidden (view full) --- 1706 vmem_free(buffer_arena, (vm_offset_t)bp->b_kvabase, bp->b_kvasize); 1707 atomic_subtract_long(&bufkvaspace, bp->b_kvasize); 1708 atomic_add_int(&buffreekvacnt, 1); 1709 bp->b_data = bp->b_kvabase = unmapped_buf; 1710 bp->b_kvasize = 0; 1711} 1712 1713/* |
1274 * bufkvaalloc: | 1714 * bufkva_alloc: |
1275 * 1276 * Allocate the buffer KVA and set b_kvasize and b_kvabase. 1277 */ 1278static int | 1715 * 1716 * Allocate the buffer KVA and set b_kvasize and b_kvabase. 1717 */ 1718static int |
1279bufkvaalloc(struct buf *bp, int maxsize, int gbflags) | 1719bufkva_alloc(struct buf *bp, int maxsize, int gbflags) |
1280{ 1281 vm_offset_t addr; 1282 int error; 1283 1284 KASSERT((gbflags & GB_UNMAPPED) == 0 || (gbflags & GB_KVAALLOC) != 0, 1285 ("Invalid gbflags 0x%x in %s", gbflags, __func__)); 1286 | 1720{ 1721 vm_offset_t addr; 1722 int error; 1723 1724 KASSERT((gbflags & GB_UNMAPPED) == 0 || (gbflags & GB_KVAALLOC) != 0, 1725 ("Invalid gbflags 0x%x in %s", gbflags, __func__)); 1726 |
1287 bufkvafree(bp); | 1727 bufkva_free(bp); |
1288 1289 addr = 0; 1290 error = vmem_alloc(buffer_arena, maxsize, M_BESTFIT | M_NOWAIT, &addr); 1291 if (error != 0) { 1292 /* 1293 * Buffer map is too fragmented. Request the caller 1294 * to defragment the map. 1295 */ | 1728 1729 addr = 0; 1730 error = vmem_alloc(buffer_arena, maxsize, M_BESTFIT | M_NOWAIT, &addr); 1731 if (error != 0) { 1732 /* 1733 * Buffer map is too fragmented. Request the caller 1734 * to defragment the map. 1735 */ |
1296 atomic_add_int(&bufdefragcnt, 1); | |
1297 return (error); 1298 } 1299 bp->b_kvabase = (caddr_t)addr; 1300 bp->b_kvasize = maxsize; 1301 atomic_add_long(&bufkvaspace, bp->b_kvasize); 1302 if ((gbflags & GB_UNMAPPED) != 0) { 1303 bp->b_data = unmapped_buf; 1304 BUF_CHECK_UNMAPPED(bp); 1305 } else { 1306 bp->b_data = bp->b_kvabase; 1307 BUF_CHECK_MAPPED(bp); 1308 } 1309 return (0); 1310} 1311 1312/* | 1736 return (error); 1737 } 1738 bp->b_kvabase = (caddr_t)addr; 1739 bp->b_kvasize = maxsize; 1740 atomic_add_long(&bufkvaspace, bp->b_kvasize); 1741 if ((gbflags & GB_UNMAPPED) != 0) { 1742 bp->b_data = unmapped_buf; 1743 BUF_CHECK_UNMAPPED(bp); 1744 } else { 1745 bp->b_data = bp->b_kvabase; 1746 BUF_CHECK_MAPPED(bp); 1747 } 1748 return (0); 1749} 1750 1751/* |
1752 * bufkva_reclaim: 1753 * 1754 * Reclaim buffer kva by freeing buffers holding kva. This is a vmem 1755 * callback that fires to avoid returning failure. 1756 */ 1757static void 1758bufkva_reclaim(vmem_t *vmem, int flags) 1759{ 1760 int i; 1761 1762 for (i = 0; i < 5; i++) 1763 if (buf_scan(true) != 0) 1764 break; 1765 return; 1766} 1767 1768 1769/* |
|
1313 * Attempt to initiate asynchronous I/O on read-ahead blocks. We must 1314 * clear BIO_ERROR and B_INVAL prior to initiating I/O . If B_CACHE is set, 1315 * the buffer is valid and we do not have to do anything. 1316 */ 1317void 1318breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, 1319 int cnt, struct ucred * cred) 1320{ --- 574 unchanged lines hidden (view full) --- 1895 if (bp->b_flags & B_DELWRI) 1896 bundirty(bp); 1897 if (bp->b_vp) 1898 brelvp(bp); 1899 } 1900 1901 /* buffers with no memory */ 1902 if (bp->b_bufsize == 0) { | 1770 * Attempt to initiate asynchronous I/O on read-ahead blocks. We must 1771 * clear BIO_ERROR and B_INVAL prior to initiating I/O . If B_CACHE is set, 1772 * the buffer is valid and we do not have to do anything. 1773 */ 1774void 1775breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, 1776 int cnt, struct ucred * cred) 1777{ --- 574 unchanged lines hidden (view full) --- 2352 if (bp->b_flags & B_DELWRI) 2353 bundirty(bp); 2354 if (bp->b_vp) 2355 brelvp(bp); 2356 } 2357 2358 /* buffers with no memory */ 2359 if (bp->b_bufsize == 0) { |
1903 bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA); 1904 if (bp->b_vflags & BV_BKGRDINPROG) 1905 panic("losing buffer 1"); 1906 bufkvafree(bp); 1907 qindex = QUEUE_EMPTY; 1908 bp->b_flags |= B_AGE; | 2360 buf_free(bp); 2361 return; 2362 } |
1909 /* buffers with junk contents */ | 2363 /* buffers with junk contents */ |
1910 } else if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) || | 2364 if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) || |
1911 (bp->b_ioflags & BIO_ERROR)) { 1912 bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA); 1913 if (bp->b_vflags & BV_BKGRDINPROG) 1914 panic("losing buffer 2"); 1915 qindex = QUEUE_CLEAN; 1916 bp->b_flags |= B_AGE; 1917 /* remaining buffers */ 1918 } else if (bp->b_flags & B_DELWRI) 1919 qindex = QUEUE_DIRTY; 1920 else 1921 qindex = QUEUE_CLEAN; 1922 1923 binsfree(bp, qindex); 1924 1925 bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | B_DIRECT); 1926 if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY)) 1927 panic("brelse: not dirty"); 1928 /* unlock */ 1929 BUF_UNLOCK(bp); | 2365 (bp->b_ioflags & BIO_ERROR)) { 2366 bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA); 2367 if (bp->b_vflags & BV_BKGRDINPROG) 2368 panic("losing buffer 2"); 2369 qindex = QUEUE_CLEAN; 2370 bp->b_flags |= B_AGE; 2371 /* remaining buffers */ 2372 } else if (bp->b_flags & B_DELWRI) 2373 qindex = QUEUE_DIRTY; 2374 else 2375 qindex = QUEUE_CLEAN; 2376 2377 binsfree(bp, qindex); 2378 2379 bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | B_DIRECT); 2380 if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY)) 2381 panic("brelse: not dirty"); 2382 /* unlock */ 2383 BUF_UNLOCK(bp); |
2384 if (qindex == QUEUE_CLEAN) 2385 bufspace_wakeup(); |
|
1930} 1931 1932/* 1933 * Release a buffer back to the appropriate queue but do not try to free 1934 * it. The buffer is expected to be used again soon. 1935 * 1936 * bqrelse() is used by bdwrite() to requeue a delayed write, and used by 1937 * biodone() to requeue an async I/O on completion. It is also used when --- 6 unchanged lines hidden (view full) --- 1944bqrelse(struct buf *bp) 1945{ 1946 int qindex; 1947 1948 CTR3(KTR_BUF, "bqrelse(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); 1949 KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), 1950 ("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); 1951 | 2386} 2387 2388/* 2389 * Release a buffer back to the appropriate queue but do not try to free 2390 * it. The buffer is expected to be used again soon. 2391 * 2392 * bqrelse() is used by bdwrite() to requeue a delayed write, and used by 2393 * biodone() to requeue an async I/O on completion. It is also used when --- 6 unchanged lines hidden (view full) --- 2400bqrelse(struct buf *bp) 2401{ 2402 int qindex; 2403 2404 CTR3(KTR_BUF, "bqrelse(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); 2405 KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), 2406 ("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); 2407 |
2408 qindex = QUEUE_NONE; |
|
1952 if (BUF_LOCKRECURSED(bp)) { 1953 /* do not release to free list */ 1954 BUF_UNLOCK(bp); 1955 return; 1956 } 1957 bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); 1958 1959 if (bp->b_flags & B_MANAGED) { --- 19 unchanged lines hidden (view full) --- 1979 } 1980 qindex = QUEUE_CLEAN; 1981 } 1982 binsfree(bp, qindex); 1983 1984out: 1985 /* unlock */ 1986 BUF_UNLOCK(bp); | 2409 if (BUF_LOCKRECURSED(bp)) { 2410 /* do not release to free list */ 2411 BUF_UNLOCK(bp); 2412 return; 2413 } 2414 bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); 2415 2416 if (bp->b_flags & B_MANAGED) { --- 19 unchanged lines hidden (view full) --- 2436 } 2437 qindex = QUEUE_CLEAN; 2438 } 2439 binsfree(bp, qindex); 2440 2441out: 2442 /* unlock */ 2443 BUF_UNLOCK(bp); |
2444 if (qindex == QUEUE_CLEAN) 2445 bufspace_wakeup(); |
|
1987} 1988 1989/* 1990 * Complete I/O to a VMIO backed page. Validate the pages as appropriate, 1991 * restore bogus pages. 1992 */ 1993static void 1994vfs_vmio_iodone(struct buf *bp) --- 383 unchanged lines hidden (view full) --- 2378 */ 2379 nwritten = bp->b_bufsize; 2380 (void) bwrite(bp); 2381 2382 return (nwritten); 2383} 2384 2385/* | 2446} 2447 2448/* 2449 * Complete I/O to a VMIO backed page. Validate the pages as appropriate, 2450 * restore bogus pages. 2451 */ 2452static void 2453vfs_vmio_iodone(struct buf *bp) --- 383 unchanged lines hidden (view full) --- 2837 */ 2838 nwritten = bp->b_bufsize; 2839 (void) bwrite(bp); 2840 2841 return (nwritten); 2842} 2843 2844/* |
2386 * Ask the bufdaemon for help, or act as bufdaemon itself, when a 2387 * locked vnode is supplied. | 2845 * getnewbuf_kva: 2846 * 2847 * Allocate KVA for an empty buf header according to gbflags. |
2388 */ | 2848 */ |
2389static void 2390getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo, 2391 int defrag) | 2849static int 2850getnewbuf_kva(struct buf *bp, int gbflags, int maxsize) |
2392{ | 2851{ |
2393 struct thread *td; 2394 char *waitmsg; 2395 int error, fl, flags, norunbuf; | |
2396 | 2852 |
2397 mtx_assert(&bqclean, MA_OWNED); 2398 2399 if (defrag) { 2400 flags = VFS_BIO_NEED_BUFSPACE; 2401 waitmsg = "nbufkv"; 2402 } else if (bufspace >= hibufspace) { 2403 waitmsg = "nbufbs"; 2404 flags = VFS_BIO_NEED_BUFSPACE; 2405 } else { 2406 waitmsg = "newbuf"; 2407 flags = VFS_BIO_NEED_ANY; 2408 } 2409 atomic_set_int(&needsbuffer, flags); 2410 mtx_unlock(&bqclean); 2411 2412 bd_speedup(); /* heeeelp */ 2413 if ((gbflags & GB_NOWAIT_BD) != 0) 2414 return; 2415 2416 td = curthread; 2417 rw_wlock(&nblock); 2418 while ((needsbuffer & flags) != 0) { 2419 if (vp != NULL && vp->v_type != VCHR && 2420 (td->td_pflags & TDP_BUFNEED) == 0) { 2421 rw_wunlock(&nblock); 2422 /* 2423 * getblk() is called with a vnode locked, and 2424 * some majority of the dirty buffers may as 2425 * well belong to the vnode. Flushing the 2426 * buffers there would make a progress that 2427 * cannot be achieved by the buf_daemon, that 2428 * cannot lock the vnode. 2429 */ 2430 norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) | 2431 (td->td_pflags & TDP_NORUNNINGBUF); 2432 2433 /* 2434 * Play bufdaemon. The getnewbuf() function 2435 * may be called while the thread owns lock 2436 * for another dirty buffer for the same 2437 * vnode, which makes it impossible to use 2438 * VOP_FSYNC() there, due to the buffer lock 2439 * recursion. 2440 */ 2441 td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; 2442 fl = buf_flush(vp, flushbufqtarget); 2443 td->td_pflags &= norunbuf; 2444 rw_wlock(&nblock); 2445 if (fl != 0) 2446 continue; 2447 if ((needsbuffer & flags) == 0) 2448 break; 2449 } 2450 error = rw_sleep(__DEVOLATILE(void *, &needsbuffer), &nblock, 2451 (PRIBIO + 4) | slpflag, waitmsg, slptimeo); 2452 if (error != 0) 2453 break; 2454 } 2455 rw_wunlock(&nblock); 2456} 2457 2458static void 2459getnewbuf_reuse_bp(struct buf *bp, int qindex) 2460{ 2461 2462 CTR6(KTR_BUF, "getnewbuf(%p) vp %p flags %X kvasize %d bufsize %d " 2463 "queue %d (recycling)", bp, bp->b_vp, bp->b_flags, 2464 bp->b_kvasize, bp->b_bufsize, qindex); 2465 mtx_assert(&bqclean, MA_NOTOWNED); 2466 2467 /* 2468 * Note: we no longer distinguish between VMIO and non-VMIO 2469 * buffers. 2470 */ 2471 KASSERT((bp->b_flags & (B_DELWRI | B_NOREUSE)) == 0, 2472 ("invalid buffer %p flags %#x found in queue %d", bp, bp->b_flags, 2473 qindex)); 2474 2475 /* 2476 * When recycling a clean buffer we have to truncate it and 2477 * release the vnode. 2478 */ 2479 if (qindex == QUEUE_CLEAN) { 2480 allocbuf(bp, 0); 2481 if (bp->b_vp != NULL) 2482 brelvp(bp); 2483 } 2484 2485 /* 2486 * Get the rest of the buffer freed up. b_kva* is still valid 2487 * after this operation. 2488 */ 2489 if (bp->b_rcred != NOCRED) { 2490 crfree(bp->b_rcred); 2491 bp->b_rcred = NOCRED; 2492 } 2493 if (bp->b_wcred != NOCRED) { 2494 crfree(bp->b_wcred); 2495 bp->b_wcred = NOCRED; 2496 } 2497 if (!LIST_EMPTY(&bp->b_dep)) 2498 buf_deallocate(bp); 2499 if (bp->b_vflags & BV_BKGRDINPROG) 2500 panic("losing buffer 3"); 2501 KASSERT(bp->b_vp == NULL, ("bp: %p still has vnode %p. qindex: %d", 2502 bp, bp->b_vp, qindex)); 2503 KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0, 2504 ("bp: %p still on a buffer list. xflags %X", bp, bp->b_xflags)); 2505 KASSERT(bp->b_npages == 0, 2506 ("bp: %p still has %d vm pages\n", bp, bp->b_npages)); 2507 2508 bp->b_flags = 0; 2509 bp->b_ioflags = 0; 2510 bp->b_xflags = 0; 2511 KASSERT((bp->b_flags & B_INFREECNT) == 0, 2512 ("buf %p still counted as free?", bp)); 2513 bp->b_vflags = 0; 2514 bp->b_vp = NULL; 2515 bp->b_blkno = bp->b_lblkno = 0; 2516 bp->b_offset = NOOFFSET; 2517 bp->b_iodone = 0; 2518 bp->b_error = 0; 2519 bp->b_resid = 0; 2520 bp->b_bcount = 0; 2521 bp->b_npages = 0; 2522 bp->b_dirtyoff = bp->b_dirtyend = 0; 2523 bp->b_bufobj = NULL; 2524 bp->b_pin_count = 0; 2525 bp->b_data = bp->b_kvabase; 2526 bp->b_fsprivate1 = NULL; 2527 bp->b_fsprivate2 = NULL; 2528 bp->b_fsprivate3 = NULL; 2529 2530 LIST_INIT(&bp->b_dep); 2531} 2532 2533static struct buf * 2534getnewbuf_scan(int maxsize, int defrag, int unmapped, int metadata) 2535{ 2536 struct buf *bp, *nbp; 2537 int nqindex, qindex, pass; 2538 2539 KASSERT(!unmapped || !defrag, ("both unmapped and defrag")); 2540 2541 pass = 0; 2542restart: 2543 if (pass != 0) 2544 atomic_add_int(&getnewbufrestarts, 1); 2545 2546 nbp = NULL; 2547 mtx_lock(&bqclean); 2548 /* 2549 * If we're not defragging or low on bufspace attempt to make a new 2550 * buf from a header. 2551 */ 2552 if (defrag == 0 && bufspace + maxsize < hibufspace) { 2553 nqindex = QUEUE_EMPTY; 2554 nbp = TAILQ_FIRST(&bufqueues[nqindex]); 2555 } 2556 /* 2557 * All available buffers might be clean or we need to start recycling. 2558 */ 2559 if (nbp == NULL) { 2560 nqindex = QUEUE_CLEAN; 2561 nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]); 2562 } 2563 2564 /* 2565 * Run scan, possibly freeing data and/or kva mappings on the fly 2566 * depending. 2567 */ 2568 while ((bp = nbp) != NULL) { 2569 qindex = nqindex; 2570 | 2853 if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_UNMAPPED) { |
2571 /* | 2854 /* |
2572 * Calculate next bp (we can only use it if we do not 2573 * release the bqlock) | 2855 * In order to keep fragmentation sane we only allocate kva 2856 * in BKVASIZE chunks. XXX with vmem we can do page size. |
2574 */ | 2857 */ |
2575 if ((nbp = TAILQ_NEXT(bp, b_freelist)) == NULL) { 2576 switch (qindex) { 2577 case QUEUE_EMPTY: 2578 nqindex = QUEUE_CLEAN; 2579 nbp = TAILQ_FIRST(&bufqueues[nqindex]); 2580 if (nbp != NULL) 2581 break; 2582 /* FALLTHROUGH */ 2583 case QUEUE_CLEAN: 2584 if (metadata && pass == 0) { 2585 pass = 1; 2586 nqindex = QUEUE_EMPTY; 2587 nbp = TAILQ_FIRST(&bufqueues[nqindex]); 2588 } 2589 /* 2590 * nbp is NULL. 2591 */ 2592 break; 2593 } 2594 } 2595 /* 2596 * If we are defragging then we need a buffer with 2597 * b_kvasize != 0. This situation occurs when we 2598 * have many unmapped bufs. 2599 */ 2600 if (defrag && bp->b_kvasize == 0) 2601 continue; | 2858 maxsize = (maxsize + BKVAMASK) & ~BKVAMASK; |
2602 | 2859 |
2603 /* 2604 * Start freeing the bp. This is somewhat involved. nbp 2605 * remains valid only for QUEUE_EMPTY[KVA] bp's. 2606 */ 2607 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) 2608 continue; 2609 /* 2610 * BKGRDINPROG can only be set with the buf and bufobj 2611 * locks both held. We tolerate a race to clear it here. 2612 */ 2613 if (bp->b_vflags & BV_BKGRDINPROG) { 2614 BUF_UNLOCK(bp); 2615 continue; 2616 } 2617 2618 /* 2619 * Requeue the background write buffer with error. 2620 */ 2621 if ((bp->b_vflags & BV_BKGRDERR) != 0) { 2622 bremfreel(bp); 2623 mtx_unlock(&bqclean); 2624 bqrelse(bp); 2625 continue; 2626 } 2627 2628 KASSERT(bp->b_qindex == qindex, 2629 ("getnewbuf: inconsistent queue %d bp %p", qindex, bp)); 2630 2631 bremfreel(bp); 2632 mtx_unlock(&bqclean); 2633 2634 /* 2635 * NOTE: nbp is now entirely invalid. We can only restart 2636 * the scan from this point on. 2637 */ 2638 getnewbuf_reuse_bp(bp, qindex); 2639 mtx_assert(&bqclean, MA_NOTOWNED); 2640 2641 /* 2642 * If we are defragging then free the buffer. 2643 */ 2644 if (defrag) { 2645 bp->b_flags |= B_INVAL; 2646 brelse(bp); 2647 defrag = 0; 2648 goto restart; 2649 } 2650 2651 /* 2652 * Notify any waiters for the buffer lock about 2653 * identity change by freeing the buffer. 2654 */ 2655 if (qindex == QUEUE_CLEAN && BUF_LOCKWAITERS(bp)) { 2656 bp->b_flags |= B_INVAL; 2657 brelse(bp); 2658 goto restart; 2659 } 2660 2661 if (metadata) 2662 break; 2663 2664 /* 2665 * If we are overcomitted then recover the buffer and its 2666 * KVM space. This occurs in rare situations when multiple 2667 * processes are blocked in getnewbuf() or allocbuf(). 2668 */ 2669 if (bufspace >= hibufspace && bp->b_kvasize != 0) { 2670 bp->b_flags |= B_INVAL; 2671 brelse(bp); 2672 goto restart; 2673 } 2674 break; | 2860 if (maxsize != bp->b_kvasize && 2861 bufkva_alloc(bp, maxsize, gbflags)) 2862 return (ENOSPC); |
2675 } | 2863 } |
2676 return (bp); | 2864 return (0); |
2677} 2678 2679/* 2680 * getnewbuf: 2681 * 2682 * Find and initialize a new buffer header, freeing up existing buffers 2683 * in the bufqueues as necessary. The new buffer is returned locked. 2684 * | 2865} 2866 2867/* 2868 * getnewbuf: 2869 * 2870 * Find and initialize a new buffer header, freeing up existing buffers 2871 * in the bufqueues as necessary. The new buffer is returned locked. 2872 * |
2685 * Important: B_INVAL is not set. If the caller wishes to throw the 2686 * buffer away, the caller must set B_INVAL prior to calling brelse(). 2687 * | |
2688 * We block if: 2689 * We have insufficient buffer headers 2690 * We have insufficient buffer space 2691 * buffer_arena is too fragmented ( space reservation fails ) 2692 * If we have to flush dirty buffers ( but we try to avoid this ) | 2873 * We block if: 2874 * We have insufficient buffer headers 2875 * We have insufficient buffer space 2876 * buffer_arena is too fragmented ( space reservation fails ) 2877 * If we have to flush dirty buffers ( but we try to avoid this ) |
2878 * 2879 * The caller is responsible for releasing the reserved bufspace after 2880 * allocbuf() is called. |
|
2693 */ 2694static struct buf * | 2881 */ 2882static struct buf * |
2695getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize, 2696 int gbflags) | 2883getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int maxsize, int gbflags) |
2697{ 2698 struct buf *bp; | 2884{ 2885 struct buf *bp; |
2699 int defrag, metadata; | 2886 bool metadata, reserved; |
2700 2701 KASSERT((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, 2702 ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); 2703 if (!unmapped_buf_allowed) 2704 gbflags &= ~(GB_UNMAPPED | GB_KVAALLOC); 2705 | 2887 2888 KASSERT((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, 2889 ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); 2890 if (!unmapped_buf_allowed) 2891 gbflags &= ~(GB_UNMAPPED | GB_KVAALLOC); 2892 |
2706 defrag = 0; | |
2707 if (vp == NULL || (vp->v_vflag & (VV_MD | VV_SYSTEM)) != 0 || 2708 vp->v_type == VCHR) | 2893 if (vp == NULL || (vp->v_vflag & (VV_MD | VV_SYSTEM)) != 0 || 2894 vp->v_type == VCHR) |
2709 metadata = 1; | 2895 metadata = true; |
2710 else | 2896 else |
2711 metadata = 0; 2712 /* 2713 * We can't afford to block since we might be holding a vnode lock, 2714 * which may prevent system daemons from running. We deal with 2715 * low-memory situations by proactively returning memory and running 2716 * async I/O rather then sync I/O. 2717 */ | 2897 metadata = false; |
2718 atomic_add_int(&getnewbufcalls, 1); | 2898 atomic_add_int(&getnewbufcalls, 1); |
2719restart: 2720 bp = getnewbuf_scan(maxsize, defrag, (gbflags & (GB_UNMAPPED | 2721 GB_KVAALLOC)) == GB_UNMAPPED, metadata); 2722 if (bp != NULL) 2723 defrag = 0; | 2899 reserved = false; 2900 do { 2901 if (reserved == false && 2902 bufspace_reserve(maxsize, metadata) != 0) 2903 continue; 2904 reserved = true; 2905 if ((bp = buf_alloc()) == NULL) 2906 continue; 2907 if (getnewbuf_kva(bp, gbflags, maxsize) == 0) 2908 return (bp); 2909 break; 2910 } while(buf_scan(false) == 0); |
2724 | 2911 |
2725 /* 2726 * If we exhausted our list, sleep as appropriate. We may have to 2727 * wakeup various daemons and write out some dirty buffers. 2728 * 2729 * Generally we are sleeping due to insufficient buffer space. 2730 */ 2731 if (bp == NULL) { 2732 mtx_assert(&bqclean, MA_OWNED); 2733 getnewbuf_bufd_help(vp, gbflags, slpflag, slptimeo, defrag); 2734 mtx_assert(&bqclean, MA_NOTOWNED); 2735 } else if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == GB_UNMAPPED) { 2736 mtx_assert(&bqclean, MA_NOTOWNED); 2737 2738 bufkvafree(bp); 2739 atomic_add_int(&bufreusecnt, 1); 2740 } else { 2741 mtx_assert(&bqclean, MA_NOTOWNED); 2742 2743 /* 2744 * We finally have a valid bp. We aren't quite out of the 2745 * woods, we still have to reserve kva space. In order to 2746 * keep fragmentation sane we only allocate kva in BKVASIZE 2747 * chunks. 2748 */ 2749 maxsize = (maxsize + BKVAMASK) & ~BKVAMASK; 2750 2751 if (maxsize != bp->b_kvasize && 2752 bufkvaalloc(bp, maxsize, gbflags)) { 2753 defrag = 1; 2754 bp->b_flags |= B_INVAL; 2755 brelse(bp); 2756 goto restart; 2757 } else if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == 2758 (GB_UNMAPPED | GB_KVAALLOC)) { 2759 bp->b_data = unmapped_buf; 2760 BUF_CHECK_UNMAPPED(bp); 2761 } 2762 atomic_add_int(&bufreusecnt, 1); | 2912 if (reserved) 2913 bufspace_release(maxsize); 2914 if (bp != NULL) { 2915 bp->b_flags |= B_INVAL; 2916 brelse(bp); |
2763 } | 2917 } |
2764 return (bp); | 2918 bufspace_wait(vp, gbflags, slpflag, slptimeo); 2919 2920 return (NULL); |
2765} 2766 2767/* 2768 * buf_daemon: 2769 * 2770 * buffer flushing daemon. Buffers are normally flushed by the 2771 * update daemon but if it cannot keep up this process starts to 2772 * take the load in an attempt to prevent getnewbuf() from blocking. 2773 */ | 2921} 2922 2923/* 2924 * buf_daemon: 2925 * 2926 * buffer flushing daemon. Buffers are normally flushed by the 2927 * update daemon but if it cannot keep up this process starts to 2928 * take the load in an attempt to prevent getnewbuf() from blocking. 2929 */ |
2774 | |
2775static struct kproc_desc buf_kp = { 2776 "bufdaemon", 2777 buf_daemon, 2778 &bufdaemonproc 2779}; 2780SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp); 2781 2782static int --- 114 unchanged lines hidden (view full) --- 2897 int error; 2898 bool unlock; 2899 2900 flushed = 0; 2901 queue = QUEUE_DIRTY; 2902 bp = NULL; 2903 sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO); 2904 sentinel->b_qindex = QUEUE_SENTINEL; | 2930static struct kproc_desc buf_kp = { 2931 "bufdaemon", 2932 buf_daemon, 2933 &bufdaemonproc 2934}; 2935SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp); 2936 2937static int --- 114 unchanged lines hidden (view full) --- 3052 int error; 3053 bool unlock; 3054 3055 flushed = 0; 3056 queue = QUEUE_DIRTY; 3057 bp = NULL; 3058 sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO); 3059 sentinel->b_qindex = QUEUE_SENTINEL; |
2905 mtx_lock(&bqdirty); | 3060 mtx_lock(&bqlocks[queue]); |
2906 TAILQ_INSERT_HEAD(&bufqueues[queue], sentinel, b_freelist); | 3061 TAILQ_INSERT_HEAD(&bufqueues[queue], sentinel, b_freelist); |
2907 mtx_unlock(&bqdirty); | 3062 mtx_unlock(&bqlocks[queue]); |
2908 while (flushed != target) { 2909 maybe_yield(); | 3063 while (flushed != target) { 3064 maybe_yield(); |
2910 mtx_lock(&bqdirty); | 3065 mtx_lock(&bqlocks[queue]); |
2911 bp = TAILQ_NEXT(sentinel, b_freelist); 2912 if (bp != NULL) { 2913 TAILQ_REMOVE(&bufqueues[queue], sentinel, b_freelist); 2914 TAILQ_INSERT_AFTER(&bufqueues[queue], bp, sentinel, 2915 b_freelist); 2916 } else { | 3066 bp = TAILQ_NEXT(sentinel, b_freelist); 3067 if (bp != NULL) { 3068 TAILQ_REMOVE(&bufqueues[queue], sentinel, b_freelist); 3069 TAILQ_INSERT_AFTER(&bufqueues[queue], bp, sentinel, 3070 b_freelist); 3071 } else { |
2917 mtx_unlock(&bqdirty); | 3072 mtx_unlock(&bqlocks[queue]); |
2918 break; 2919 } 2920 /* 2921 * Skip sentinels inserted by other invocations of the 2922 * flushbufqueues(), taking care to not reorder them. 2923 * 2924 * Only flush the buffers that belong to the 2925 * vnode locked by the curthread. 2926 */ 2927 if (bp->b_qindex == QUEUE_SENTINEL || (lvp != NULL && 2928 bp->b_vp != lvp)) { | 3073 break; 3074 } 3075 /* 3076 * Skip sentinels inserted by other invocations of the 3077 * flushbufqueues(), taking care to not reorder them. 3078 * 3079 * Only flush the buffers that belong to the 3080 * vnode locked by the curthread. 3081 */ 3082 if (bp->b_qindex == QUEUE_SENTINEL || (lvp != NULL && 3083 bp->b_vp != lvp)) { |
2929 mtx_unlock(&bqdirty); | 3084 mtx_unlock(&bqlocks[queue]); |
2930 continue; 2931 } 2932 error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL); | 3085 continue; 3086 } 3087 error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL); |
2933 mtx_unlock(&bqdirty); | 3088 mtx_unlock(&bqlocks[queue]); |
2934 if (error != 0) 2935 continue; 2936 if (bp->b_pin_count > 0) { 2937 BUF_UNLOCK(bp); 2938 continue; 2939 } 2940 /* 2941 * BKGRDINPROG can only be set with the buf and bufobj --- 66 unchanged lines hidden (view full) --- 3008 if (curproc == bufdaemonproc && 3009 runningbufspace > hirunningspace) 3010 waitrunningbufspace(); 3011 continue; 3012 } 3013 vn_finished_write(mp); 3014 BUF_UNLOCK(bp); 3015 } | 3089 if (error != 0) 3090 continue; 3091 if (bp->b_pin_count > 0) { 3092 BUF_UNLOCK(bp); 3093 continue; 3094 } 3095 /* 3096 * BKGRDINPROG can only be set with the buf and bufobj --- 66 unchanged lines hidden (view full) --- 3163 if (curproc == bufdaemonproc && 3164 runningbufspace > hirunningspace) 3165 waitrunningbufspace(); 3166 continue; 3167 } 3168 vn_finished_write(mp); 3169 BUF_UNLOCK(bp); 3170 } |
3016 mtx_lock(&bqdirty); | 3171 mtx_lock(&bqlocks[queue]); |
3017 TAILQ_REMOVE(&bufqueues[queue], sentinel, b_freelist); | 3172 TAILQ_REMOVE(&bufqueues[queue], sentinel, b_freelist); |
3018 mtx_unlock(&bqdirty); | 3173 mtx_unlock(&bqlocks[queue]); |
3019 free(sentinel, M_TEMP); 3020 return (flushed); 3021} 3022 3023/* 3024 * Check to see if a block is currently memory resident. 3025 */ 3026struct buf * --- 164 unchanged lines hidden (view full) --- 3191/* 3192 * Allocate the KVA mapping for an existing buffer. 3193 * If an unmapped buffer is provided but a mapped buffer is requested, take 3194 * also care to properly setup mappings between pages and KVA. 3195 */ 3196static void 3197bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags) 3198{ | 3174 free(sentinel, M_TEMP); 3175 return (flushed); 3176} 3177 3178/* 3179 * Check to see if a block is currently memory resident. 3180 */ 3181struct buf * --- 164 unchanged lines hidden (view full) --- 3346/* 3347 * Allocate the KVA mapping for an existing buffer. 3348 * If an unmapped buffer is provided but a mapped buffer is requested, take 3349 * also care to properly setup mappings between pages and KVA. 3350 */ 3351static void 3352bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags) 3353{ |
3199 struct buf *scratch_bp; | |
3200 int bsize, maxsize, need_mapping, need_kva; 3201 off_t offset; 3202 3203 need_mapping = bp->b_data == unmapped_buf && 3204 (gbflags & GB_UNMAPPED) == 0; 3205 need_kva = bp->b_kvabase == unmapped_buf && 3206 bp->b_data == unmapped_buf && 3207 (gbflags & GB_KVAALLOC) != 0; --- 16 unchanged lines hidden (view full) --- 3224 * if the buffer was mapped. 3225 */ 3226 bsize = vn_isdisk(bp->b_vp, NULL) ? DEV_BSIZE : bp->b_bufobj->bo_bsize; 3227 KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize")); 3228 offset = blkno * bsize; 3229 maxsize = size + (offset & PAGE_MASK); 3230 maxsize = imax(maxsize, bsize); 3231 | 3354 int bsize, maxsize, need_mapping, need_kva; 3355 off_t offset; 3356 3357 need_mapping = bp->b_data == unmapped_buf && 3358 (gbflags & GB_UNMAPPED) == 0; 3359 need_kva = bp->b_kvabase == unmapped_buf && 3360 bp->b_data == unmapped_buf && 3361 (gbflags & GB_KVAALLOC) != 0; --- 16 unchanged lines hidden (view full) --- 3378 * if the buffer was mapped. 3379 */ 3380 bsize = vn_isdisk(bp->b_vp, NULL) ? DEV_BSIZE : bp->b_bufobj->bo_bsize; 3381 KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize")); 3382 offset = blkno * bsize; 3383 maxsize = size + (offset & PAGE_MASK); 3384 maxsize = imax(maxsize, bsize); 3385 |
3232mapping_loop: 3233 if (bufkvaalloc(bp, maxsize, gbflags)) { 3234 /* 3235 * Request defragmentation. getnewbuf() returns us the 3236 * allocated space by the scratch buffer KVA. 3237 */ 3238 scratch_bp = getnewbuf(bp->b_vp, 0, 0, size, maxsize, gbflags | 3239 (GB_UNMAPPED | GB_KVAALLOC)); 3240 if (scratch_bp == NULL) { 3241 if ((gbflags & GB_NOWAIT_BD) != 0) { 3242 /* 3243 * XXXKIB: defragmentation cannot 3244 * succeed, not sure what else to do. 3245 */ 3246 panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp); 3247 } 3248 atomic_add_int(&mappingrestarts, 1); 3249 goto mapping_loop; | 3386 while (bufkva_alloc(bp, maxsize, gbflags) != 0) { 3387 if ((gbflags & GB_NOWAIT_BD) != 0) { 3388 /* 3389 * XXXKIB: defragmentation cannot 3390 * succeed, not sure what else to do. 3391 */ 3392 panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp); |
3250 } | 3393 } |
3251 KASSERT(scratch_bp->b_kvabase != unmapped_buf, 3252 ("scratch bp has no KVA %p", scratch_bp)); 3253 /* Grab pointers. */ 3254 bp->b_kvabase = scratch_bp->b_kvabase; 3255 bp->b_kvasize = scratch_bp->b_kvasize; 3256 bp->b_data = scratch_bp->b_data; 3257 3258 /* Get rid of the scratch buffer. */ 3259 scratch_bp->b_kvasize = 0; 3260 scratch_bp->b_flags |= B_INVAL; 3261 scratch_bp->b_data = scratch_bp->b_kvabase = unmapped_buf; 3262 brelse(scratch_bp); | 3394 atomic_add_int(&mappingrestarts, 1); 3395 bufspace_wait(bp->b_vp, gbflags, 0, 0); |
3263 } 3264has_addr: 3265 if (need_mapping) { 3266 /* b_offset is handled by bpmap_qenter. */ 3267 bp->b_data = bp->b_kvabase; 3268 BUF_CHECK_MAPPED(bp); 3269 bpmap_qenter(bp); 3270 } --- 210 unchanged lines hidden (view full) --- 3481 maxsize = size + (offset & PAGE_MASK); 3482 } else { 3483 maxsize = size; 3484 /* Do not allow non-VMIO notmapped buffers. */ 3485 flags &= ~(GB_UNMAPPED | GB_KVAALLOC); 3486 } 3487 maxsize = imax(maxsize, bsize); 3488 | 3396 } 3397has_addr: 3398 if (need_mapping) { 3399 /* b_offset is handled by bpmap_qenter. */ 3400 bp->b_data = bp->b_kvabase; 3401 BUF_CHECK_MAPPED(bp); 3402 bpmap_qenter(bp); 3403 } --- 210 unchanged lines hidden (view full) --- 3614 maxsize = size + (offset & PAGE_MASK); 3615 } else { 3616 maxsize = size; 3617 /* Do not allow non-VMIO notmapped buffers. */ 3618 flags &= ~(GB_UNMAPPED | GB_KVAALLOC); 3619 } 3620 maxsize = imax(maxsize, bsize); 3621 |
3489 bp = getnewbuf(vp, slpflag, slptimeo, size, maxsize, flags); | 3622 bp = getnewbuf(vp, slpflag, slptimeo, maxsize, flags); |
3490 if (bp == NULL) { 3491 if (slpflag || slptimeo) 3492 return NULL; 3493 goto loop; 3494 } 3495 3496 /* 3497 * This code is used to make sure that a buffer is not --- 7 unchanged lines hidden (view full) --- 3505 * the splay tree implementation when dealing with duplicate 3506 * lblkno's. 3507 */ 3508 BO_LOCK(bo); 3509 if (gbincore(bo, blkno)) { 3510 BO_UNLOCK(bo); 3511 bp->b_flags |= B_INVAL; 3512 brelse(bp); | 3623 if (bp == NULL) { 3624 if (slpflag || slptimeo) 3625 return NULL; 3626 goto loop; 3627 } 3628 3629 /* 3630 * This code is used to make sure that a buffer is not --- 7 unchanged lines hidden (view full) --- 3638 * the splay tree implementation when dealing with duplicate 3639 * lblkno's. 3640 */ 3641 BO_LOCK(bo); 3642 if (gbincore(bo, blkno)) { 3643 BO_UNLOCK(bo); 3644 bp->b_flags |= B_INVAL; 3645 brelse(bp); |
3646 bufspace_release(maxsize); |
|
3513 goto loop; 3514 } 3515 3516 /* 3517 * Insert the buffer into the hash, so that it can 3518 * be found by incore. 3519 */ 3520 bp->b_blkno = bp->b_lblkno = blkno; --- 17 unchanged lines hidden (view full) --- 3538 bp->b_flags &= ~B_VMIO; 3539 KASSERT(bp->b_bufobj->bo_object == NULL, 3540 ("ARGH! has b_bufobj->bo_object %p %p\n", 3541 bp, bp->b_bufobj->bo_object)); 3542 BUF_CHECK_MAPPED(bp); 3543 } 3544 3545 allocbuf(bp, size); | 3647 goto loop; 3648 } 3649 3650 /* 3651 * Insert the buffer into the hash, so that it can 3652 * be found by incore. 3653 */ 3654 bp->b_blkno = bp->b_lblkno = blkno; --- 17 unchanged lines hidden (view full) --- 3672 bp->b_flags &= ~B_VMIO; 3673 KASSERT(bp->b_bufobj->bo_object == NULL, 3674 ("ARGH! has b_bufobj->bo_object %p %p\n", 3675 bp, bp->b_bufobj->bo_object)); 3676 BUF_CHECK_MAPPED(bp); 3677 } 3678 3679 allocbuf(bp, size); |
3680 bufspace_release(maxsize); |
|
3546 bp->b_flags &= ~B_DONE; 3547 } 3548 CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp); 3549 BUF_ASSERT_HELD(bp); 3550end: 3551 KASSERT(bp->b_bufobj == bo, 3552 ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); 3553 return (bp); --- 5 unchanged lines hidden (view full) --- 3559 */ 3560struct buf * 3561geteblk(int size, int flags) 3562{ 3563 struct buf *bp; 3564 int maxsize; 3565 3566 maxsize = (size + BKVAMASK) & ~BKVAMASK; | 3681 bp->b_flags &= ~B_DONE; 3682 } 3683 CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp); 3684 BUF_ASSERT_HELD(bp); 3685end: 3686 KASSERT(bp->b_bufobj == bo, 3687 ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); 3688 return (bp); --- 5 unchanged lines hidden (view full) --- 3694 */ 3695struct buf * 3696geteblk(int size, int flags) 3697{ 3698 struct buf *bp; 3699 int maxsize; 3700 3701 maxsize = (size + BKVAMASK) & ~BKVAMASK; |
3567 while ((bp = getnewbuf(NULL, 0, 0, size, maxsize, flags)) == NULL) { | 3702 while ((bp = getnewbuf(NULL, 0, 0, maxsize, flags)) == NULL) { |
3568 if ((flags & GB_NOWAIT_BD) && 3569 (curthread->td_pflags & TDP_BUFNEED) != 0) 3570 return (NULL); 3571 } 3572 allocbuf(bp, size); | 3703 if ((flags & GB_NOWAIT_BD) && 3704 (curthread->td_pflags & TDP_BUFNEED) != 0) 3705 return (NULL); 3706 } 3707 allocbuf(bp, size); |
3708 bufspace_release(maxsize); |
|
3573 bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */ 3574 BUF_ASSERT_HELD(bp); 3575 return (bp); 3576} 3577 3578/* 3579 * Truncate the backing store for a non-vmio buffer. 3580 */ --- 9 unchanged lines hidden (view full) --- 3590 bufmallocadjust(bp, 0); 3591 free(bp->b_data, M_BIOBUF); 3592 bp->b_data = bp->b_kvabase; 3593 bp->b_flags &= ~B_MALLOC; 3594 } 3595 return; 3596 } 3597 vm_hold_free_pages(bp, newbsize); | 3709 bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */ 3710 BUF_ASSERT_HELD(bp); 3711 return (bp); 3712} 3713 3714/* 3715 * Truncate the backing store for a non-vmio buffer. 3716 */ --- 9 unchanged lines hidden (view full) --- 3726 bufmallocadjust(bp, 0); 3727 free(bp->b_data, M_BIOBUF); 3728 bp->b_data = bp->b_kvabase; 3729 bp->b_flags &= ~B_MALLOC; 3730 } 3731 return; 3732 } 3733 vm_hold_free_pages(bp, newbsize); |
3598 bufspaceadjust(bp, newbsize); | 3734 bufspace_adjust(bp, newbsize); |
3599} 3600 3601/* 3602 * Extend the backing for a non-VMIO buffer. 3603 */ 3604static void 3605vfs_nonvmio_extend(struct buf *bp, int newbsize) 3606{ --- 34 unchanged lines hidden (view full) --- 3641 newbsize = round_page(newbsize); 3642 } 3643 vm_hold_load_pages(bp, (vm_offset_t) bp->b_data + bp->b_bufsize, 3644 (vm_offset_t) bp->b_data + newbsize); 3645 if (origbuf != NULL) { 3646 bcopy(origbuf, bp->b_data, origbufsize); 3647 free(origbuf, M_BIOBUF); 3648 } | 3735} 3736 3737/* 3738 * Extend the backing for a non-VMIO buffer. 3739 */ 3740static void 3741vfs_nonvmio_extend(struct buf *bp, int newbsize) 3742{ --- 34 unchanged lines hidden (view full) --- 3777 newbsize = round_page(newbsize); 3778 } 3779 vm_hold_load_pages(bp, (vm_offset_t) bp->b_data + bp->b_bufsize, 3780 (vm_offset_t) bp->b_data + newbsize); 3781 if (origbuf != NULL) { 3782 bcopy(origbuf, bp->b_data, origbufsize); 3783 free(origbuf, M_BIOBUF); 3784 } |
3649 bufspaceadjust(bp, newbsize); | 3785 bufspace_adjust(bp, newbsize); |
3650} 3651 3652/* 3653 * This code constitutes the buffer memory from either anonymous system 3654 * memory (in the case of non-VMIO operations) or from an associated 3655 * VM object (in the case of VMIO operations). This code is able to 3656 * resize a buffer up or down. 3657 * --- 45 unchanged lines hidden (view full) --- 3703 if (size == 0 || bp->b_bufsize == 0) 3704 bp->b_flags |= B_CACHE; 3705 3706 if (newbsize < bp->b_bufsize) 3707 vfs_vmio_truncate(bp, desiredpages); 3708 /* XXX This looks as if it should be newbsize > b_bufsize */ 3709 else if (size > bp->b_bcount) 3710 vfs_vmio_extend(bp, desiredpages, size); | 3786} 3787 3788/* 3789 * This code constitutes the buffer memory from either anonymous system 3790 * memory (in the case of non-VMIO operations) or from an associated 3791 * VM object (in the case of VMIO operations). This code is able to 3792 * resize a buffer up or down. 3793 * --- 45 unchanged lines hidden (view full) --- 3839 if (size == 0 || bp->b_bufsize == 0) 3840 bp->b_flags |= B_CACHE; 3841 3842 if (newbsize < bp->b_bufsize) 3843 vfs_vmio_truncate(bp, desiredpages); 3844 /* XXX This looks as if it should be newbsize > b_bufsize */ 3845 else if (size > bp->b_bcount) 3846 vfs_vmio_extend(bp, desiredpages, size); |
3711 bufspaceadjust(bp, newbsize); | 3847 bufspace_adjust(bp, newbsize); |
3712 } 3713 bp->b_bcount = size; /* requested buffer size. */ 3714 return (1); 3715} 3716 3717extern int inflight_transient_maps; 3718 3719void --- 871 unchanged lines hidden (view full) --- 4591 4592 if (have_addr) { 4593 db_printf("usage: countfreebufs\n"); 4594 return; 4595 } 4596 4597 for (i = 0; i < nbuf; i++) { 4598 bp = &buf[i]; | 3848 } 3849 bp->b_bcount = size; /* requested buffer size. */ 3850 return (1); 3851} 3852 3853extern int inflight_transient_maps; 3854 3855void --- 871 unchanged lines hidden (view full) --- 4727 4728 if (have_addr) { 4729 db_printf("usage: countfreebufs\n"); 4730 return; 4731 } 4732 4733 for (i = 0; i < nbuf; i++) { 4734 bp = &buf[i]; |
4599 if ((bp->b_flags & B_INFREECNT) != 0) | 4735 if (bp->b_qindex == QUEUE_EMPTY) |
4600 nfree++; 4601 else 4602 used++; 4603 } 4604 4605 db_printf("Counted %d free, %d used (%d tot)\n", nfree, used, 4606 nfree + used); 4607 db_printf("numfreebuffers is %d\n", numfreebuffers); 4608} 4609#endif /* DDB */ | 4736 nfree++; 4737 else 4738 used++; 4739 } 4740 4741 db_printf("Counted %d free, %d used (%d tot)\n", nfree, used, 4742 nfree + used); 4743 db_printf("numfreebuffers is %d\n", numfreebuffers); 4744} 4745#endif /* DDB */ |