1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_PAGEMAP_H 3 #define _LINUX_PAGEMAP_H 4 5 /* 6 * Copyright 1995 Linus Torvalds 7 */ 8 #include <linux/mm.h> 9 #include <linux/fs.h> 10 #include <linux/list.h> 11 #include <linux/highmem.h> 12 #include <linux/compiler.h> 13 #include <linux/uaccess.h> 14 #include <linux/gfp.h> 15 #include <linux/bitops.h> 16 #include <linux/hardirq.h> /* for in_interrupt() */ 17 #include <linux/hugetlb_inline.h> 18 19 struct folio_batch; 20 21 unsigned long invalidate_mapping_pages(struct address_space *mapping, 22 pgoff_t start, pgoff_t end); 23 24 static inline void invalidate_remote_inode(struct inode *inode) 25 { 26 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 27 S_ISLNK(inode->i_mode)) 28 invalidate_mapping_pages(inode->i_mapping, 0, -1); 29 } 30 int invalidate_inode_pages2(struct address_space *mapping); 31 int invalidate_inode_pages2_range(struct address_space *mapping, 32 pgoff_t start, pgoff_t end); 33 int kiocb_invalidate_pages(struct kiocb *iocb, size_t count); 34 void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count); 35 int filemap_invalidate_pages(struct address_space *mapping, 36 loff_t pos, loff_t end, bool nowait); 37 38 int write_inode_now(struct inode *, int sync); 39 int filemap_fdatawrite(struct address_space *); 40 int filemap_flush(struct address_space *); 41 int filemap_flush_nr(struct address_space *mapping, long *nr_to_write); 42 int filemap_fdatawait_keep_errors(struct address_space *mapping); 43 int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); 44 int filemap_fdatawait_range_keep_errors(struct address_space *mapping, 45 loff_t start_byte, loff_t end_byte); 46 int filemap_invalidate_inode(struct inode *inode, bool flush, 47 loff_t start, loff_t end); 48 49 static inline int filemap_fdatawait(struct address_space *mapping) 50 { 51 return filemap_fdatawait_range(mapping, 0, LLONG_MAX); 52 } 53 54 bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); 55 int filemap_write_and_wait_range(struct address_space *mapping, 56 loff_t lstart, loff_t lend); 57 int filemap_fdatawrite_range(struct address_space *mapping, 58 loff_t start, loff_t end); 59 int filemap_check_errors(struct address_space *mapping); 60 void __filemap_set_wb_err(struct address_space *mapping, int err); 61 int kiocb_write_and_wait(struct kiocb *iocb, size_t count); 62 63 static inline int filemap_write_and_wait(struct address_space *mapping) 64 { 65 return filemap_write_and_wait_range(mapping, 0, LLONG_MAX); 66 } 67 68 /** 69 * filemap_set_wb_err - set a writeback error on an address_space 70 * @mapping: mapping in which to set writeback error 71 * @err: error to be set in mapping 72 * 73 * When writeback fails in some way, we must record that error so that 74 * userspace can be informed when fsync and the like are called. We endeavor 75 * to report errors on any file that was open at the time of the error. Some 76 * internal callers also need to know when writeback errors have occurred. 77 * 78 * When a writeback error occurs, most filesystems will want to call 79 * filemap_set_wb_err to record the error in the mapping so that it will be 80 * automatically reported whenever fsync is called on the file. 81 */ 82 static inline void filemap_set_wb_err(struct address_space *mapping, int err) 83 { 84 /* Fastpath for common case of no error */ 85 if (unlikely(err)) 86 __filemap_set_wb_err(mapping, err); 87 } 88 89 /** 90 * filemap_check_wb_err - has an error occurred since the mark was sampled? 91 * @mapping: mapping to check for writeback errors 92 * @since: previously-sampled errseq_t 93 * 94 * Grab the errseq_t value from the mapping, and see if it has changed "since" 95 * the given value was sampled. 96 * 97 * If it has then report the latest error set, otherwise return 0. 98 */ 99 static inline int filemap_check_wb_err(struct address_space *mapping, 100 errseq_t since) 101 { 102 return errseq_check(&mapping->wb_err, since); 103 } 104 105 /** 106 * filemap_sample_wb_err - sample the current errseq_t to test for later errors 107 * @mapping: mapping to be sampled 108 * 109 * Writeback errors are always reported relative to a particular sample point 110 * in the past. This function provides those sample points. 111 */ 112 static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) 113 { 114 return errseq_sample(&mapping->wb_err); 115 } 116 117 /** 118 * file_sample_sb_err - sample the current errseq_t to test for later errors 119 * @file: file pointer to be sampled 120 * 121 * Grab the most current superblock-level errseq_t value for the given 122 * struct file. 123 */ 124 static inline errseq_t file_sample_sb_err(struct file *file) 125 { 126 return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err); 127 } 128 129 /* 130 * Flush file data before changing attributes. Caller must hold any locks 131 * required to prevent further writes to this file until we're done setting 132 * flags. 133 */ 134 static inline int inode_drain_writes(struct inode *inode) 135 { 136 inode_dio_wait(inode); 137 return filemap_write_and_wait(inode->i_mapping); 138 } 139 140 static inline bool mapping_empty(const struct address_space *mapping) 141 { 142 return xa_empty(&mapping->i_pages); 143 } 144 145 /* 146 * mapping_shrinkable - test if page cache state allows inode reclaim 147 * @mapping: the page cache mapping 148 * 149 * This checks the mapping's cache state for the pupose of inode 150 * reclaim and LRU management. 151 * 152 * The caller is expected to hold the i_lock, but is not required to 153 * hold the i_pages lock, which usually protects cache state. That's 154 * because the i_lock and the list_lru lock that protect the inode and 155 * its LRU state don't nest inside the irq-safe i_pages lock. 156 * 157 * Cache deletions are performed under the i_lock, which ensures that 158 * when an inode goes empty, it will reliably get queued on the LRU. 159 * 160 * Cache additions do not acquire the i_lock and may race with this 161 * check, in which case we'll report the inode as shrinkable when it 162 * has cache pages. This is okay: the shrinker also checks the 163 * refcount and the referenced bit, which will be elevated or set in 164 * the process of adding new cache pages to an inode. 165 */ 166 static inline bool mapping_shrinkable(const struct address_space *mapping) 167 { 168 void *head; 169 170 /* 171 * On highmem systems, there could be lowmem pressure from the 172 * inodes before there is highmem pressure from the page 173 * cache. Make inodes shrinkable regardless of cache state. 174 */ 175 if (IS_ENABLED(CONFIG_HIGHMEM)) 176 return true; 177 178 /* Cache completely empty? Shrink away. */ 179 head = rcu_access_pointer(mapping->i_pages.xa_head); 180 if (!head) 181 return true; 182 183 /* 184 * The xarray stores single offset-0 entries directly in the 185 * head pointer, which allows non-resident page cache entries 186 * to escape the shadow shrinker's list of xarray nodes. The 187 * inode shrinker needs to pick them up under memory pressure. 188 */ 189 if (!xa_is_node(head) && xa_is_value(head)) 190 return true; 191 192 return false; 193 } 194 195 /* 196 * Bits in mapping->flags. 197 */ 198 enum mapping_flags { 199 AS_EIO = 0, /* IO error on async write */ 200 AS_ENOSPC = 1, /* ENOSPC on async write */ 201 AS_MM_ALL_LOCKS = 2, /* under mm_take_all_locks() */ 202 AS_UNEVICTABLE = 3, /* e.g., ramdisk, SHM_LOCK */ 203 AS_EXITING = 4, /* final truncate in progress */ 204 /* writeback related tags are not used */ 205 AS_NO_WRITEBACK_TAGS = 5, 206 AS_RELEASE_ALWAYS = 6, /* Call ->release_folio(), even if no private data */ 207 AS_STABLE_WRITES = 7, /* must wait for writeback before modifying 208 folio contents */ 209 AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ 210 AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, 211 AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't 212 account usage to user cgroups */ 213 /* Bits 16-25 are used for FOLIO_ORDER */ 214 AS_FOLIO_ORDER_BITS = 5, 215 AS_FOLIO_ORDER_MIN = 16, 216 AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS, 217 }; 218 219 #define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1) 220 #define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN) 221 #define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX) 222 #define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK) 223 224 /** 225 * mapping_set_error - record a writeback error in the address_space 226 * @mapping: the mapping in which an error should be set 227 * @error: the error to set in the mapping 228 * 229 * When writeback fails in some way, we must record that error so that 230 * userspace can be informed when fsync and the like are called. We endeavor 231 * to report errors on any file that was open at the time of the error. Some 232 * internal callers also need to know when writeback errors have occurred. 233 * 234 * When a writeback error occurs, most filesystems will want to call 235 * mapping_set_error to record the error in the mapping so that it can be 236 * reported when the application calls fsync(2). 237 */ 238 static inline void mapping_set_error(struct address_space *mapping, int error) 239 { 240 if (likely(!error)) 241 return; 242 243 /* Record in wb_err for checkers using errseq_t based tracking */ 244 __filemap_set_wb_err(mapping, error); 245 246 /* Record it in superblock */ 247 if (mapping->host) 248 errseq_set(&mapping->host->i_sb->s_wb_err, error); 249 250 /* Record it in flags for now, for legacy callers */ 251 if (error == -ENOSPC) 252 set_bit(AS_ENOSPC, &mapping->flags); 253 else 254 set_bit(AS_EIO, &mapping->flags); 255 } 256 257 static inline void mapping_set_unevictable(struct address_space *mapping) 258 { 259 set_bit(AS_UNEVICTABLE, &mapping->flags); 260 } 261 262 static inline void mapping_clear_unevictable(struct address_space *mapping) 263 { 264 clear_bit(AS_UNEVICTABLE, &mapping->flags); 265 } 266 267 static inline bool mapping_unevictable(const struct address_space *mapping) 268 { 269 return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags); 270 } 271 272 static inline void mapping_set_exiting(struct address_space *mapping) 273 { 274 set_bit(AS_EXITING, &mapping->flags); 275 } 276 277 static inline int mapping_exiting(const struct address_space *mapping) 278 { 279 return test_bit(AS_EXITING, &mapping->flags); 280 } 281 282 static inline void mapping_set_no_writeback_tags(struct address_space *mapping) 283 { 284 set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); 285 } 286 287 static inline int mapping_use_writeback_tags(const struct address_space *mapping) 288 { 289 return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); 290 } 291 292 static inline bool mapping_release_always(const struct address_space *mapping) 293 { 294 return test_bit(AS_RELEASE_ALWAYS, &mapping->flags); 295 } 296 297 static inline void mapping_set_release_always(struct address_space *mapping) 298 { 299 set_bit(AS_RELEASE_ALWAYS, &mapping->flags); 300 } 301 302 static inline void mapping_clear_release_always(struct address_space *mapping) 303 { 304 clear_bit(AS_RELEASE_ALWAYS, &mapping->flags); 305 } 306 307 static inline bool mapping_stable_writes(const struct address_space *mapping) 308 { 309 return test_bit(AS_STABLE_WRITES, &mapping->flags); 310 } 311 312 static inline void mapping_set_stable_writes(struct address_space *mapping) 313 { 314 set_bit(AS_STABLE_WRITES, &mapping->flags); 315 } 316 317 static inline void mapping_clear_stable_writes(struct address_space *mapping) 318 { 319 clear_bit(AS_STABLE_WRITES, &mapping->flags); 320 } 321 322 static inline void mapping_set_inaccessible(struct address_space *mapping) 323 { 324 /* 325 * It's expected inaccessible mappings are also unevictable. Compaction 326 * migrate scanner (isolate_migratepages_block()) relies on this to 327 * reduce page locking. 328 */ 329 set_bit(AS_UNEVICTABLE, &mapping->flags); 330 set_bit(AS_INACCESSIBLE, &mapping->flags); 331 } 332 333 static inline bool mapping_inaccessible(const struct address_space *mapping) 334 { 335 return test_bit(AS_INACCESSIBLE, &mapping->flags); 336 } 337 338 static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_space *mapping) 339 { 340 set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); 341 } 342 343 static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct address_space *mapping) 344 { 345 return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); 346 } 347 348 static inline gfp_t mapping_gfp_mask(const struct address_space *mapping) 349 { 350 return mapping->gfp_mask; 351 } 352 353 /* Restricts the given gfp_mask to what the mapping allows. */ 354 static inline gfp_t mapping_gfp_constraint(const struct address_space *mapping, 355 gfp_t gfp_mask) 356 { 357 return mapping_gfp_mask(mapping) & gfp_mask; 358 } 359 360 /* 361 * This is non-atomic. Only to be used before the mapping is activated. 362 * Probably needs a barrier... 363 */ 364 static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) 365 { 366 m->gfp_mask = mask; 367 } 368 369 /* 370 * There are some parts of the kernel which assume that PMD entries 371 * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, 372 * limit the maximum allocation order to PMD size. I'm not aware of any 373 * assumptions about maximum order if THP are disabled, but 8 seems like 374 * a good order (that's 1MB if you're using 4kB pages) 375 */ 376 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 377 #define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER 378 #else 379 #define PREFERRED_MAX_PAGECACHE_ORDER 8 380 #endif 381 382 /* 383 * xas_split_alloc() does not support arbitrary orders. This implies no 384 * 512MB THP on ARM64 with 64KB base page size. 385 */ 386 #define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) 387 #define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) 388 389 /* 390 * mapping_max_folio_size_supported() - Check the max folio size supported 391 * 392 * The filesystem should call this function at mount time if there is a 393 * requirement on the folio mapping size in the page cache. 394 */ 395 static inline size_t mapping_max_folio_size_supported(void) 396 { 397 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 398 return 1U << (PAGE_SHIFT + MAX_PAGECACHE_ORDER); 399 return PAGE_SIZE; 400 } 401 402 /* 403 * mapping_set_folio_order_range() - Set the orders supported by a file. 404 * @mapping: The address space of the file. 405 * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive). 406 * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive). 407 * 408 * The filesystem should call this function in its inode constructor to 409 * indicate which base size (min) and maximum size (max) of folio the VFS 410 * can use to cache the contents of the file. This should only be used 411 * if the filesystem needs special handling of folio sizes (ie there is 412 * something the core cannot know). 413 * Do not tune it based on, eg, i_size. 414 * 415 * Context: This should not be called while the inode is active as it 416 * is non-atomic. 417 */ 418 static inline void mapping_set_folio_order_range(struct address_space *mapping, 419 unsigned int min, 420 unsigned int max) 421 { 422 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 423 return; 424 425 if (min > MAX_PAGECACHE_ORDER) 426 min = MAX_PAGECACHE_ORDER; 427 428 if (max > MAX_PAGECACHE_ORDER) 429 max = MAX_PAGECACHE_ORDER; 430 431 if (max < min) 432 max = min; 433 434 mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) | 435 (min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX); 436 } 437 438 static inline void mapping_set_folio_min_order(struct address_space *mapping, 439 unsigned int min) 440 { 441 mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER); 442 } 443 444 /** 445 * mapping_set_large_folios() - Indicate the file supports large folios. 446 * @mapping: The address space of the file. 447 * 448 * The filesystem should call this function in its inode constructor to 449 * indicate that the VFS can use large folios to cache the contents of 450 * the file. 451 * 452 * Context: This should not be called while the inode is active as it 453 * is non-atomic. 454 */ 455 static inline void mapping_set_large_folios(struct address_space *mapping) 456 { 457 mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER); 458 } 459 460 static inline unsigned int 461 mapping_max_folio_order(const struct address_space *mapping) 462 { 463 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 464 return 0; 465 return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX; 466 } 467 468 static inline unsigned int 469 mapping_min_folio_order(const struct address_space *mapping) 470 { 471 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 472 return 0; 473 return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN; 474 } 475 476 static inline unsigned long 477 mapping_min_folio_nrpages(const struct address_space *mapping) 478 { 479 return 1UL << mapping_min_folio_order(mapping); 480 } 481 482 static inline unsigned long 483 mapping_min_folio_nrbytes(const struct address_space *mapping) 484 { 485 return mapping_min_folio_nrpages(mapping) << PAGE_SHIFT; 486 } 487 488 /** 489 * mapping_align_index() - Align index for this mapping. 490 * @mapping: The address_space. 491 * @index: The page index. 492 * 493 * The index of a folio must be naturally aligned. If you are adding a 494 * new folio to the page cache and need to know what index to give it, 495 * call this function. 496 */ 497 static inline pgoff_t mapping_align_index(const struct address_space *mapping, 498 pgoff_t index) 499 { 500 return round_down(index, mapping_min_folio_nrpages(mapping)); 501 } 502 503 /* 504 * Large folio support currently depends on THP. These dependencies are 505 * being worked on but are not yet fixed. 506 */ 507 static inline bool mapping_large_folio_support(const struct address_space *mapping) 508 { 509 /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ 510 VM_WARN_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON, 511 "Anonymous mapping always supports large folio"); 512 513 return mapping_max_folio_order(mapping) > 0; 514 } 515 516 /** 517 * mapping_pmd_folio_support() - Check if a mapping supports PMD-sized folio 518 * @mapping: The address_space 519 * 520 * While some mappings support large folios, they might not support PMD-sized 521 * folios. This function checks whether a mapping supports PMD-sized folios. 522 * For example, khugepaged needs this information before attempting to 523 * collapsing THPs. 524 * 525 * Return: True if PMD-sized folios are supported, otherwise false. 526 */ 527 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 528 static inline bool mapping_pmd_folio_support(const struct address_space *mapping) 529 { 530 /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ 531 VM_WARN_ON_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON); 532 533 return mapping_min_folio_order(mapping) <= PMD_ORDER && 534 mapping_max_folio_order(mapping) >= PMD_ORDER; 535 } 536 #else 537 static inline bool mapping_pmd_folio_support(const struct address_space *mapping) 538 { 539 return false; 540 } 541 #endif 542 543 /* Return the maximum folio size for this pagecache mapping, in bytes. */ 544 static inline size_t mapping_max_folio_size(const struct address_space *mapping) 545 { 546 return PAGE_SIZE << mapping_max_folio_order(mapping); 547 } 548 549 struct address_space *folio_mapping(const struct folio *folio); 550 551 /** 552 * folio_flush_mapping - Find the file mapping this folio belongs to. 553 * @folio: The folio. 554 * 555 * For folios which are in the page cache, return the mapping that this 556 * page belongs to. Anonymous folios return NULL, even if they're in 557 * the swap cache. Other kinds of folio also return NULL. 558 * 559 * This is ONLY used by architecture cache flushing code. If you aren't 560 * writing cache flushing code, you want either folio_mapping() or 561 * folio_file_mapping(). 562 */ 563 static inline struct address_space *folio_flush_mapping(struct folio *folio) 564 { 565 if (unlikely(folio_test_swapcache(folio))) 566 return NULL; 567 568 return folio_mapping(folio); 569 } 570 571 /** 572 * folio_inode - Get the host inode for this folio. 573 * @folio: The folio. 574 * 575 * For folios which are in the page cache, return the inode that this folio 576 * belongs to. 577 * 578 * Do not call this for folios which aren't in the page cache. 579 */ 580 static inline struct inode *folio_inode(struct folio *folio) 581 { 582 return folio->mapping->host; 583 } 584 585 /** 586 * folio_attach_private - Attach private data to a folio. 587 * @folio: Folio to attach data to. 588 * @data: Data to attach to folio. 589 * 590 * Attaching private data to a folio increments the page's reference count. 591 * The data must be detached before the folio will be freed. 592 */ 593 static inline void folio_attach_private(struct folio *folio, void *data) 594 { 595 folio_get(folio); 596 folio->private = data; 597 folio_set_private(folio); 598 } 599 600 /** 601 * folio_change_private - Change private data on a folio. 602 * @folio: Folio to change the data on. 603 * @data: Data to set on the folio. 604 * 605 * Change the private data attached to a folio and return the old 606 * data. The page must previously have had data attached and the data 607 * must be detached before the folio will be freed. 608 * 609 * Return: Data that was previously attached to the folio. 610 */ 611 static inline void *folio_change_private(struct folio *folio, void *data) 612 { 613 void *old = folio_get_private(folio); 614 615 folio->private = data; 616 return old; 617 } 618 619 /** 620 * folio_detach_private - Detach private data from a folio. 621 * @folio: Folio to detach data from. 622 * 623 * Removes the data that was previously attached to the folio and decrements 624 * the refcount on the page. 625 * 626 * Return: Data that was attached to the folio. 627 */ 628 static inline void *folio_detach_private(struct folio *folio) 629 { 630 void *data = folio_get_private(folio); 631 632 if (!folio_test_private(folio)) 633 return NULL; 634 folio_clear_private(folio); 635 folio->private = NULL; 636 folio_put(folio); 637 638 return data; 639 } 640 641 static inline void attach_page_private(struct page *page, void *data) 642 { 643 folio_attach_private(page_folio(page), data); 644 } 645 646 static inline void *detach_page_private(struct page *page) 647 { 648 return folio_detach_private(page_folio(page)); 649 } 650 651 #ifdef CONFIG_NUMA 652 struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order, 653 struct mempolicy *policy); 654 #else 655 static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order, 656 struct mempolicy *policy) 657 { 658 return folio_alloc_noprof(gfp, order); 659 } 660 #endif 661 662 #define filemap_alloc_folio(...) \ 663 alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__)) 664 665 static inline struct page *__page_cache_alloc(gfp_t gfp) 666 { 667 return &filemap_alloc_folio(gfp, 0, NULL)->page; 668 } 669 670 static inline gfp_t readahead_gfp_mask(struct address_space *x) 671 { 672 return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; 673 } 674 675 typedef int filler_t(struct file *, struct folio *); 676 677 pgoff_t page_cache_next_miss(struct address_space *mapping, 678 pgoff_t index, unsigned long max_scan); 679 pgoff_t page_cache_prev_miss(struct address_space *mapping, 680 pgoff_t index, unsigned long max_scan); 681 682 /** 683 * typedef fgf_t - Flags for getting folios from the page cache. 684 * 685 * Most users of the page cache will not need to use these flags; 686 * there are convenience functions such as filemap_get_folio() and 687 * filemap_lock_folio(). For users which need more control over exactly 688 * what is done with the folios, these flags to __filemap_get_folio() 689 * are available. 690 * 691 * * %FGP_ACCESSED - The folio will be marked accessed. 692 * * %FGP_LOCK - The folio is returned locked. 693 * * %FGP_CREAT - If no folio is present then a new folio is allocated, 694 * added to the page cache and the VM's LRU list. The folio is 695 * returned locked. 696 * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the 697 * folio is already in cache. If the folio was allocated, unlock it 698 * before returning so the caller can do the same dance. 699 * * %FGP_WRITE - The folio will be written to by the caller. 700 * * %FGP_NOFS - __GFP_FS will get cleared in gfp. 701 * * %FGP_NOWAIT - Don't block on the folio lock. 702 * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) 703 * * %FGP_DONTCACHE - Uncached buffered IO 704 * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin() 705 * implementation. 706 */ 707 typedef unsigned int __bitwise fgf_t; 708 709 #define FGP_ACCESSED ((__force fgf_t)0x00000001) 710 #define FGP_LOCK ((__force fgf_t)0x00000002) 711 #define FGP_CREAT ((__force fgf_t)0x00000004) 712 #define FGP_WRITE ((__force fgf_t)0x00000008) 713 #define FGP_NOFS ((__force fgf_t)0x00000010) 714 #define FGP_NOWAIT ((__force fgf_t)0x00000020) 715 #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) 716 #define FGP_STABLE ((__force fgf_t)0x00000080) 717 #define FGP_DONTCACHE ((__force fgf_t)0x00000100) 718 #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ 719 720 #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) 721 722 static inline unsigned int filemap_get_order(size_t size) 723 { 724 unsigned int shift = ilog2(size); 725 726 if (shift <= PAGE_SHIFT) 727 return 0; 728 729 return shift - PAGE_SHIFT; 730 } 731 732 /** 733 * fgf_set_order - Encode a length in the fgf_t flags. 734 * @size: The suggested size of the folio to create. 735 * 736 * The caller of __filemap_get_folio() can use this to suggest a preferred 737 * size for the folio that is created. If there is already a folio at 738 * the index, it will be returned, no matter what its size. If a folio 739 * is freshly created, it may be of a different size than requested 740 * due to alignment constraints, memory pressure, or the presence of 741 * other folios at nearby indices. 742 */ 743 static inline fgf_t fgf_set_order(size_t size) 744 { 745 unsigned int order = filemap_get_order(size); 746 747 if (!order) 748 return 0; 749 return (__force fgf_t)(order << 26); 750 } 751 752 void *filemap_get_entry(struct address_space *mapping, pgoff_t index); 753 struct folio *__filemap_get_folio_mpol(struct address_space *mapping, 754 pgoff_t index, fgf_t fgf_flags, gfp_t gfp, struct mempolicy *policy); 755 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, 756 fgf_t fgp_flags, gfp_t gfp); 757 758 static inline struct folio *__filemap_get_folio(struct address_space *mapping, 759 pgoff_t index, fgf_t fgf_flags, gfp_t gfp) 760 { 761 return __filemap_get_folio_mpol(mapping, index, fgf_flags, gfp, NULL); 762 } 763 764 /** 765 * write_begin_get_folio - Get folio for write_begin with flags. 766 * @iocb: The kiocb passed from write_begin (may be NULL). 767 * @mapping: The address space to search. 768 * @index: The page cache index. 769 * @len: Length of data being written. 770 * 771 * This is a helper for filesystem write_begin() implementations. 772 * It wraps __filemap_get_folio(), setting appropriate flags in 773 * the write begin context. 774 * 775 * Return: A folio or an ERR_PTR. 776 */ 777 static inline struct folio *write_begin_get_folio(const struct kiocb *iocb, 778 struct address_space *mapping, pgoff_t index, size_t len) 779 { 780 fgf_t fgp_flags = FGP_WRITEBEGIN; 781 782 fgp_flags |= fgf_set_order(len); 783 784 if (iocb && iocb->ki_flags & IOCB_DONTCACHE) 785 fgp_flags |= FGP_DONTCACHE; 786 787 return __filemap_get_folio(mapping, index, fgp_flags, 788 mapping_gfp_mask(mapping)); 789 } 790 791 /** 792 * filemap_get_folio - Find and get a folio. 793 * @mapping: The address_space to search. 794 * @index: The page index. 795 * 796 * Looks up the page cache entry at @mapping & @index. If a folio is 797 * present, it is returned with an increased refcount. 798 * 799 * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for 800 * this index. Will not return a shadow, swap or DAX entry. 801 */ 802 static inline struct folio *filemap_get_folio(struct address_space *mapping, 803 pgoff_t index) 804 { 805 return __filemap_get_folio(mapping, index, 0, 0); 806 } 807 808 /** 809 * filemap_lock_folio - Find and lock a folio. 810 * @mapping: The address_space to search. 811 * @index: The page index. 812 * 813 * Looks up the page cache entry at @mapping & @index. If a folio is 814 * present, it is returned locked with an increased refcount. 815 * 816 * Context: May sleep. 817 * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for 818 * this index. Will not return a shadow, swap or DAX entry. 819 */ 820 static inline struct folio *filemap_lock_folio(struct address_space *mapping, 821 pgoff_t index) 822 { 823 return __filemap_get_folio(mapping, index, FGP_LOCK, 0); 824 } 825 826 /** 827 * filemap_grab_folio - grab a folio from the page cache 828 * @mapping: The address space to search 829 * @index: The page index 830 * 831 * Looks up the page cache entry at @mapping & @index. If no folio is found, 832 * a new folio is created. The folio is locked, marked as accessed, and 833 * returned. 834 * 835 * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found 836 * and failed to create a folio. 837 */ 838 static inline struct folio *filemap_grab_folio(struct address_space *mapping, 839 pgoff_t index) 840 { 841 return __filemap_get_folio(mapping, index, 842 FGP_LOCK | FGP_ACCESSED | FGP_CREAT, 843 mapping_gfp_mask(mapping)); 844 } 845 846 /** 847 * find_get_page - find and get a page reference 848 * @mapping: the address_space to search 849 * @offset: the page index 850 * 851 * Looks up the page cache slot at @mapping & @offset. If there is a 852 * page cache page, it is returned with an increased refcount. 853 * 854 * Otherwise, %NULL is returned. 855 */ 856 static inline struct page *find_get_page(struct address_space *mapping, 857 pgoff_t offset) 858 { 859 return pagecache_get_page(mapping, offset, 0, 0); 860 } 861 862 static inline struct page *find_get_page_flags(struct address_space *mapping, 863 pgoff_t offset, fgf_t fgp_flags) 864 { 865 return pagecache_get_page(mapping, offset, fgp_flags, 0); 866 } 867 868 /** 869 * find_lock_page - locate, pin and lock a pagecache page 870 * @mapping: the address_space to search 871 * @index: the page index 872 * 873 * Looks up the page cache entry at @mapping & @index. If there is a 874 * page cache page, it is returned locked and with an increased 875 * refcount. 876 * 877 * Context: May sleep. 878 * Return: A struct page or %NULL if there is no page in the cache for this 879 * index. 880 */ 881 static inline struct page *find_lock_page(struct address_space *mapping, 882 pgoff_t index) 883 { 884 return pagecache_get_page(mapping, index, FGP_LOCK, 0); 885 } 886 887 /** 888 * find_or_create_page - locate or add a pagecache page 889 * @mapping: the page's address_space 890 * @index: the page's index into the mapping 891 * @gfp_mask: page allocation mode 892 * 893 * Looks up the page cache slot at @mapping & @offset. If there is a 894 * page cache page, it is returned locked and with an increased 895 * refcount. 896 * 897 * If the page is not present, a new page is allocated using @gfp_mask 898 * and added to the page cache and the VM's LRU list. The page is 899 * returned locked and with an increased refcount. 900 * 901 * On memory exhaustion, %NULL is returned. 902 * 903 * find_or_create_page() may sleep, even if @gfp_flags specifies an 904 * atomic allocation! 905 */ 906 static inline struct page *find_or_create_page(struct address_space *mapping, 907 pgoff_t index, gfp_t gfp_mask) 908 { 909 return pagecache_get_page(mapping, index, 910 FGP_LOCK|FGP_ACCESSED|FGP_CREAT, 911 gfp_mask); 912 } 913 914 /** 915 * grab_cache_page_nowait - returns locked page at given index in given cache 916 * @mapping: target address_space 917 * @index: the page index 918 * 919 * Returns locked page at given index in given cache, creating it if 920 * needed, but do not wait if the page is locked or to reclaim memory. 921 * This is intended for speculative data generators, where the data can 922 * be regenerated if the page couldn't be grabbed. This routine should 923 * be safe to call while holding the lock for another page. 924 * 925 * Clear __GFP_FS when allocating the page to avoid recursion into the fs 926 * and deadlock against the caller's locked page. 927 */ 928 static inline struct page *grab_cache_page_nowait(struct address_space *mapping, 929 pgoff_t index) 930 { 931 return pagecache_get_page(mapping, index, 932 FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, 933 mapping_gfp_mask(mapping)); 934 } 935 936 /** 937 * folio_next_index - Get the index of the next folio. 938 * @folio: The current folio. 939 * 940 * Return: The index of the folio which follows this folio in the file. 941 */ 942 static inline pgoff_t folio_next_index(const struct folio *folio) 943 { 944 return folio->index + folio_nr_pages(folio); 945 } 946 947 /** 948 * folio_next_pos - Get the file position of the next folio. 949 * @folio: The current folio. 950 * 951 * Return: The position of the folio which follows this folio in the file. 952 */ 953 static inline loff_t folio_next_pos(const struct folio *folio) 954 { 955 return (loff_t)folio_next_index(folio) << PAGE_SHIFT; 956 } 957 958 /** 959 * folio_file_page - The page for a particular index. 960 * @folio: The folio which contains this index. 961 * @index: The index we want to look up. 962 * 963 * Sometimes after looking up a folio in the page cache, we need to 964 * obtain the specific page for an index (eg a page fault). 965 * 966 * Return: The page containing the file data for this index. 967 */ 968 static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) 969 { 970 return folio_page(folio, index & (folio_nr_pages(folio) - 1)); 971 } 972 973 /** 974 * folio_contains - Does this folio contain this index? 975 * @folio: The folio. 976 * @index: The page index within the file. 977 * 978 * Context: The caller should have the folio locked and ensure 979 * e.g., shmem did not move this folio to the swap cache. 980 * Return: true or false. 981 */ 982 static inline bool folio_contains(const struct folio *folio, pgoff_t index) 983 { 984 VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio); 985 return index - folio->index < folio_nr_pages(folio); 986 } 987 988 unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, 989 pgoff_t end, struct folio_batch *fbatch); 990 unsigned filemap_get_folios_contig(struct address_space *mapping, 991 pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); 992 unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, 993 pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); 994 unsigned filemap_get_folios_dirty(struct address_space *mapping, 995 pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); 996 997 struct folio *read_cache_folio(struct address_space *, pgoff_t index, 998 filler_t *filler, struct file *file); 999 struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index, 1000 gfp_t flags); 1001 struct page *read_cache_page(struct address_space *, pgoff_t index, 1002 filler_t *filler, struct file *file); 1003 extern struct page * read_cache_page_gfp(struct address_space *mapping, 1004 pgoff_t index, gfp_t gfp_mask); 1005 1006 static inline struct page *read_mapping_page(struct address_space *mapping, 1007 pgoff_t index, struct file *file) 1008 { 1009 return read_cache_page(mapping, index, NULL, file); 1010 } 1011 1012 static inline struct folio *read_mapping_folio(struct address_space *mapping, 1013 pgoff_t index, struct file *file) 1014 { 1015 return read_cache_folio(mapping, index, NULL, file); 1016 } 1017 1018 /** 1019 * page_pgoff - Calculate the logical page offset of this page. 1020 * @folio: The folio containing this page. 1021 * @page: The page which we need the offset of. 1022 * 1023 * For file pages, this is the offset from the beginning of the file 1024 * in units of PAGE_SIZE. For anonymous pages, this is the offset from 1025 * the beginning of the anon_vma in units of PAGE_SIZE. This will 1026 * return nonsense for KSM pages. 1027 * 1028 * Context: Caller must have a reference on the folio or otherwise 1029 * prevent it from being split or freed. 1030 * 1031 * Return: The offset in units of PAGE_SIZE. 1032 */ 1033 static inline pgoff_t page_pgoff(const struct folio *folio, 1034 const struct page *page) 1035 { 1036 return folio->index + folio_page_idx(folio, page); 1037 } 1038 1039 /** 1040 * folio_pos - Returns the byte position of this folio in its file. 1041 * @folio: The folio. 1042 */ 1043 static inline loff_t folio_pos(const struct folio *folio) 1044 { 1045 return ((loff_t)folio->index) * PAGE_SIZE; 1046 } 1047 1048 /* 1049 * Return byte-offset into filesystem object for page. 1050 */ 1051 static inline loff_t page_offset(struct page *page) 1052 { 1053 struct folio *folio = page_folio(page); 1054 1055 return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE; 1056 } 1057 1058 /* 1059 * Get the offset in PAGE_SIZE (even for hugetlb folios). 1060 */ 1061 static inline pgoff_t folio_pgoff(const struct folio *folio) 1062 { 1063 return folio->index; 1064 } 1065 1066 static inline pgoff_t linear_page_index(const struct vm_area_struct *vma, 1067 const unsigned long address) 1068 { 1069 pgoff_t pgoff; 1070 pgoff = (address - vma->vm_start) >> PAGE_SHIFT; 1071 pgoff += vma->vm_pgoff; 1072 return pgoff; 1073 } 1074 1075 struct wait_page_key { 1076 struct folio *folio; 1077 int bit_nr; 1078 int page_match; 1079 }; 1080 1081 struct wait_page_queue { 1082 struct folio *folio; 1083 int bit_nr; 1084 wait_queue_entry_t wait; 1085 }; 1086 1087 static inline bool wake_page_match(struct wait_page_queue *wait_page, 1088 struct wait_page_key *key) 1089 { 1090 if (wait_page->folio != key->folio) 1091 return false; 1092 key->page_match = 1; 1093 1094 if (wait_page->bit_nr != key->bit_nr) 1095 return false; 1096 1097 return true; 1098 } 1099 1100 void __folio_lock(struct folio *folio); 1101 int __folio_lock_killable(struct folio *folio); 1102 vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf); 1103 void unlock_page(struct page *page); 1104 void folio_unlock(struct folio *folio); 1105 1106 /** 1107 * folio_trylock() - Attempt to lock a folio. 1108 * @folio: The folio to attempt to lock. 1109 * 1110 * Sometimes it is undesirable to wait for a folio to be unlocked (eg 1111 * when the locks are being taken in the wrong order, or if making 1112 * progress through a batch of folios is more important than processing 1113 * them in order). Usually folio_lock() is the correct function to call. 1114 * 1115 * Context: Any context. 1116 * Return: Whether the lock was successfully acquired. 1117 */ 1118 static inline bool folio_trylock(struct folio *folio) 1119 { 1120 return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0))); 1121 } 1122 1123 /* 1124 * Return true if the page was successfully locked 1125 */ 1126 static inline bool trylock_page(struct page *page) 1127 { 1128 return folio_trylock(page_folio(page)); 1129 } 1130 1131 /** 1132 * folio_lock() - Lock this folio. 1133 * @folio: The folio to lock. 1134 * 1135 * The folio lock protects against many things, probably more than it 1136 * should. It is primarily held while a folio is being brought uptodate, 1137 * either from its backing file or from swap. It is also held while a 1138 * folio is being truncated from its address_space, so holding the lock 1139 * is sufficient to keep folio->mapping stable. 1140 * 1141 * The folio lock is also held while write() is modifying the page to 1142 * provide POSIX atomicity guarantees (as long as the write does not 1143 * cross a page boundary). Other modifications to the data in the folio 1144 * do not hold the folio lock and can race with writes, eg DMA and stores 1145 * to mapped pages. 1146 * 1147 * Context: May sleep. If you need to acquire the locks of two or 1148 * more folios, they must be in order of ascending index, if they are 1149 * in the same address_space. If they are in different address_spaces, 1150 * acquire the lock of the folio which belongs to the address_space which 1151 * has the lowest address in memory first. 1152 */ 1153 static inline void folio_lock(struct folio *folio) 1154 { 1155 might_sleep(); 1156 if (!folio_trylock(folio)) 1157 __folio_lock(folio); 1158 } 1159 1160 /** 1161 * lock_page() - Lock the folio containing this page. 1162 * @page: The page to lock. 1163 * 1164 * See folio_lock() for a description of what the lock protects. 1165 * This is a legacy function and new code should probably use folio_lock() 1166 * instead. 1167 * 1168 * Context: May sleep. Pages in the same folio share a lock, so do not 1169 * attempt to lock two pages which share a folio. 1170 */ 1171 static inline void lock_page(struct page *page) 1172 { 1173 struct folio *folio; 1174 might_sleep(); 1175 1176 folio = page_folio(page); 1177 if (!folio_trylock(folio)) 1178 __folio_lock(folio); 1179 } 1180 1181 /** 1182 * folio_lock_killable() - Lock this folio, interruptible by a fatal signal. 1183 * @folio: The folio to lock. 1184 * 1185 * Attempts to lock the folio, like folio_lock(), except that the sleep 1186 * to acquire the lock is interruptible by a fatal signal. 1187 * 1188 * Context: May sleep; see folio_lock(). 1189 * Return: 0 if the lock was acquired; -EINTR if a fatal signal was received. 1190 */ 1191 static inline int folio_lock_killable(struct folio *folio) 1192 { 1193 might_sleep(); 1194 if (!folio_trylock(folio)) 1195 return __folio_lock_killable(folio); 1196 return 0; 1197 } 1198 1199 /* 1200 * folio_lock_or_retry - Lock the folio, unless this would block and the 1201 * caller indicated that it can handle a retry. 1202 * 1203 * Return value and mmap_lock implications depend on flags; see 1204 * __folio_lock_or_retry(). 1205 */ 1206 static inline vm_fault_t folio_lock_or_retry(struct folio *folio, 1207 struct vm_fault *vmf) 1208 { 1209 might_sleep(); 1210 if (!folio_trylock(folio)) 1211 return __folio_lock_or_retry(folio, vmf); 1212 return 0; 1213 } 1214 1215 /* 1216 * This is exported only for folio_wait_locked/folio_wait_writeback, etc., 1217 * and should not be used directly. 1218 */ 1219 void folio_wait_bit(struct folio *folio, int bit_nr); 1220 int folio_wait_bit_killable(struct folio *folio, int bit_nr); 1221 1222 /* 1223 * Wait for a folio to be unlocked. 1224 * 1225 * This must be called with the caller "holding" the folio, 1226 * ie with increased folio reference count so that the folio won't 1227 * go away during the wait. 1228 */ 1229 static inline void folio_wait_locked(struct folio *folio) 1230 { 1231 if (folio_test_locked(folio)) 1232 folio_wait_bit(folio, PG_locked); 1233 } 1234 1235 static inline int folio_wait_locked_killable(struct folio *folio) 1236 { 1237 if (!folio_test_locked(folio)) 1238 return 0; 1239 return folio_wait_bit_killable(folio, PG_locked); 1240 } 1241 1242 void folio_end_read(struct folio *folio, bool success); 1243 void wait_on_page_writeback(struct page *page); 1244 void folio_wait_writeback(struct folio *folio); 1245 int folio_wait_writeback_killable(struct folio *folio); 1246 void end_page_writeback(struct page *page); 1247 void folio_end_writeback(struct folio *folio); 1248 void folio_end_writeback_no_dropbehind(struct folio *folio); 1249 void folio_end_dropbehind(struct folio *folio); 1250 void folio_wait_stable(struct folio *folio); 1251 void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); 1252 void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); 1253 void __folio_cancel_dirty(struct folio *folio); 1254 static inline void folio_cancel_dirty(struct folio *folio) 1255 { 1256 /* Avoid atomic ops, locking, etc. when not actually needed. */ 1257 if (folio_test_dirty(folio)) 1258 __folio_cancel_dirty(folio); 1259 } 1260 bool folio_clear_dirty_for_io(struct folio *folio); 1261 bool clear_page_dirty_for_io(struct page *page); 1262 void folio_invalidate(struct folio *folio, size_t offset, size_t length); 1263 bool noop_dirty_folio(struct address_space *mapping, struct folio *folio); 1264 1265 #ifdef CONFIG_MIGRATION 1266 int filemap_migrate_folio(struct address_space *mapping, struct folio *dst, 1267 struct folio *src, enum migrate_mode mode); 1268 #else 1269 #define filemap_migrate_folio NULL 1270 #endif 1271 void folio_end_private_2(struct folio *folio); 1272 void folio_wait_private_2(struct folio *folio); 1273 int folio_wait_private_2_killable(struct folio *folio); 1274 1275 /* 1276 * Fault in userspace address range. 1277 */ 1278 size_t fault_in_writeable(char __user *uaddr, size_t size); 1279 size_t fault_in_subpage_writeable(char __user *uaddr, size_t size); 1280 size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); 1281 size_t fault_in_readable(const char __user *uaddr, size_t size); 1282 1283 int add_to_page_cache_lru(struct page *page, struct address_space *mapping, 1284 pgoff_t index, gfp_t gfp); 1285 int filemap_add_folio(struct address_space *mapping, struct folio *folio, 1286 pgoff_t index, gfp_t gfp); 1287 void filemap_remove_folio(struct folio *folio); 1288 void __filemap_remove_folio(struct folio *folio, void *shadow); 1289 void replace_page_cache_folio(struct folio *old, struct folio *new); 1290 void delete_from_page_cache_batch(struct address_space *mapping, 1291 struct folio_batch *fbatch); 1292 bool filemap_release_folio(struct folio *folio, gfp_t gfp); 1293 loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, 1294 int whence); 1295 1296 /* Must be non-static for BPF error injection */ 1297 int __filemap_add_folio(struct address_space *mapping, struct folio *folio, 1298 pgoff_t index, gfp_t gfp, void **shadowp); 1299 1300 bool filemap_range_has_writeback(struct address_space *mapping, 1301 loff_t start_byte, loff_t end_byte); 1302 1303 /** 1304 * filemap_range_needs_writeback - check if range potentially needs writeback 1305 * @mapping: address space within which to check 1306 * @start_byte: offset in bytes where the range starts 1307 * @end_byte: offset in bytes where the range ends (inclusive) 1308 * 1309 * Find at least one page in the range supplied, usually used to check if 1310 * direct writing in this range will trigger a writeback. Used by O_DIRECT 1311 * read/write with IOCB_NOWAIT, to see if the caller needs to do 1312 * filemap_write_and_wait_range() before proceeding. 1313 * 1314 * Return: %true if the caller should do filemap_write_and_wait_range() before 1315 * doing O_DIRECT to a page in this range, %false otherwise. 1316 */ 1317 static inline bool filemap_range_needs_writeback(struct address_space *mapping, 1318 loff_t start_byte, 1319 loff_t end_byte) 1320 { 1321 if (!mapping->nrpages) 1322 return false; 1323 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && 1324 !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) 1325 return false; 1326 return filemap_range_has_writeback(mapping, start_byte, end_byte); 1327 } 1328 1329 /** 1330 * struct readahead_control - Describes a readahead request. 1331 * 1332 * A readahead request is for consecutive pages. Filesystems which 1333 * implement the ->readahead method should call readahead_folio() or 1334 * __readahead_batch() in a loop and attempt to start reads into each 1335 * folio in the request. 1336 * 1337 * Most of the fields in this struct are private and should be accessed 1338 * by the functions below. 1339 * 1340 * @file: The file, used primarily by network filesystems for authentication. 1341 * May be NULL if invoked internally by the filesystem. 1342 * @mapping: Readahead this filesystem object. 1343 * @ra: File readahead state. May be NULL. 1344 */ 1345 struct readahead_control { 1346 struct file *file; 1347 struct address_space *mapping; 1348 struct file_ra_state *ra; 1349 /* private: use the readahead_* accessors instead */ 1350 pgoff_t _index; 1351 unsigned int _nr_pages; 1352 unsigned int _batch_count; 1353 bool dropbehind; 1354 bool _workingset; 1355 unsigned long _pflags; 1356 }; 1357 1358 #define DEFINE_READAHEAD(ractl, f, r, m, i) \ 1359 struct readahead_control ractl = { \ 1360 .file = f, \ 1361 .mapping = m, \ 1362 .ra = r, \ 1363 ._index = i, \ 1364 } 1365 1366 #define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) 1367 1368 void page_cache_ra_unbounded(struct readahead_control *, 1369 unsigned long nr_to_read, unsigned long lookahead_count); 1370 void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); 1371 void page_cache_async_ra(struct readahead_control *, struct folio *, 1372 unsigned long req_count); 1373 void readahead_expand(struct readahead_control *ractl, 1374 loff_t new_start, size_t new_len); 1375 1376 /** 1377 * page_cache_sync_readahead - generic file readahead 1378 * @mapping: address_space which holds the pagecache and I/O vectors 1379 * @ra: file_ra_state which holds the readahead state 1380 * @file: Used by the filesystem for authentication. 1381 * @index: Index of first page to be read. 1382 * @req_count: Total number of pages being read by the caller. 1383 * 1384 * page_cache_sync_readahead() should be called when a cache miss happened: 1385 * it will submit the read. The readahead logic may decide to piggyback more 1386 * pages onto the read request if access patterns suggest it will improve 1387 * performance. 1388 */ 1389 static inline 1390 void page_cache_sync_readahead(struct address_space *mapping, 1391 struct file_ra_state *ra, struct file *file, pgoff_t index, 1392 unsigned long req_count) 1393 { 1394 DEFINE_READAHEAD(ractl, file, ra, mapping, index); 1395 page_cache_sync_ra(&ractl, req_count); 1396 } 1397 1398 /** 1399 * page_cache_async_readahead - file readahead for marked pages 1400 * @mapping: address_space which holds the pagecache and I/O vectors 1401 * @ra: file_ra_state which holds the readahead state 1402 * @file: Used by the filesystem for authentication. 1403 * @folio: The folio which triggered the readahead call. 1404 * @req_count: Total number of pages being read by the caller. 1405 * 1406 * page_cache_async_readahead() should be called when a page is used which 1407 * is marked as PageReadahead; this is a marker to suggest that the application 1408 * has used up enough of the readahead window that we should start pulling in 1409 * more pages. 1410 */ 1411 static inline 1412 void page_cache_async_readahead(struct address_space *mapping, 1413 struct file_ra_state *ra, struct file *file, 1414 struct folio *folio, unsigned long req_count) 1415 { 1416 DEFINE_READAHEAD(ractl, file, ra, mapping, folio->index); 1417 page_cache_async_ra(&ractl, folio, req_count); 1418 } 1419 1420 static inline struct folio *__readahead_folio(struct readahead_control *ractl) 1421 { 1422 struct folio *folio; 1423 1424 BUG_ON(ractl->_batch_count > ractl->_nr_pages); 1425 ractl->_nr_pages -= ractl->_batch_count; 1426 ractl->_index += ractl->_batch_count; 1427 1428 if (!ractl->_nr_pages) { 1429 ractl->_batch_count = 0; 1430 return NULL; 1431 } 1432 1433 folio = xa_load(&ractl->mapping->i_pages, ractl->_index); 1434 VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 1435 ractl->_batch_count = folio_nr_pages(folio); 1436 1437 return folio; 1438 } 1439 1440 /** 1441 * readahead_folio - Get the next folio to read. 1442 * @ractl: The current readahead request. 1443 * 1444 * Context: The folio is locked. The caller should unlock the folio once 1445 * all I/O to that folio has completed. 1446 * Return: A pointer to the next folio, or %NULL if we are done. 1447 */ 1448 static inline struct folio *readahead_folio(struct readahead_control *ractl) 1449 { 1450 struct folio *folio = __readahead_folio(ractl); 1451 1452 if (folio) 1453 folio_put(folio); 1454 return folio; 1455 } 1456 1457 static inline unsigned int __readahead_batch(struct readahead_control *rac, 1458 struct page **array, unsigned int array_sz) 1459 { 1460 unsigned int i = 0; 1461 XA_STATE(xas, &rac->mapping->i_pages, 0); 1462 struct folio *folio; 1463 1464 BUG_ON(rac->_batch_count > rac->_nr_pages); 1465 rac->_nr_pages -= rac->_batch_count; 1466 rac->_index += rac->_batch_count; 1467 rac->_batch_count = 0; 1468 1469 xas_set(&xas, rac->_index); 1470 rcu_read_lock(); 1471 xas_for_each(&xas, folio, rac->_index + rac->_nr_pages - 1) { 1472 if (xas_retry(&xas, folio)) 1473 continue; 1474 VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 1475 array[i++] = folio_page(folio, 0); 1476 rac->_batch_count += folio_nr_pages(folio); 1477 if (i == array_sz) 1478 break; 1479 } 1480 rcu_read_unlock(); 1481 1482 return i; 1483 } 1484 1485 /** 1486 * readahead_pos - The byte offset into the file of this readahead request. 1487 * @rac: The readahead request. 1488 */ 1489 static inline loff_t readahead_pos(const struct readahead_control *rac) 1490 { 1491 return (loff_t)rac->_index * PAGE_SIZE; 1492 } 1493 1494 /** 1495 * readahead_length - The number of bytes in this readahead request. 1496 * @rac: The readahead request. 1497 */ 1498 static inline size_t readahead_length(const struct readahead_control *rac) 1499 { 1500 return rac->_nr_pages * PAGE_SIZE; 1501 } 1502 1503 /** 1504 * readahead_index - The index of the first page in this readahead request. 1505 * @rac: The readahead request. 1506 */ 1507 static inline pgoff_t readahead_index(const struct readahead_control *rac) 1508 { 1509 return rac->_index; 1510 } 1511 1512 /** 1513 * readahead_count - The number of pages in this readahead request. 1514 * @rac: The readahead request. 1515 */ 1516 static inline unsigned int readahead_count(const struct readahead_control *rac) 1517 { 1518 return rac->_nr_pages; 1519 } 1520 1521 /** 1522 * readahead_batch_length - The number of bytes in the current batch. 1523 * @rac: The readahead request. 1524 */ 1525 static inline size_t readahead_batch_length(const struct readahead_control *rac) 1526 { 1527 return rac->_batch_count * PAGE_SIZE; 1528 } 1529 1530 static inline unsigned long dir_pages(const struct inode *inode) 1531 { 1532 return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> 1533 PAGE_SHIFT; 1534 } 1535 1536 /** 1537 * folio_mkwrite_check_truncate - check if folio was truncated 1538 * @folio: the folio to check 1539 * @inode: the inode to check the folio against 1540 * 1541 * Return: the number of bytes in the folio up to EOF, 1542 * or -EFAULT if the folio was truncated. 1543 */ 1544 static inline ssize_t folio_mkwrite_check_truncate(const struct folio *folio, 1545 const struct inode *inode) 1546 { 1547 loff_t size = i_size_read(inode); 1548 pgoff_t index = size >> PAGE_SHIFT; 1549 size_t offset = offset_in_folio(folio, size); 1550 1551 if (!folio->mapping) 1552 return -EFAULT; 1553 1554 /* folio is wholly inside EOF */ 1555 if (folio_next_index(folio) - 1 < index) 1556 return folio_size(folio); 1557 /* folio is wholly past EOF */ 1558 if (folio->index > index || !offset) 1559 return -EFAULT; 1560 /* folio is partially inside EOF */ 1561 return offset; 1562 } 1563 1564 /** 1565 * i_blocks_per_folio - How many blocks fit in this folio. 1566 * @inode: The inode which contains the blocks. 1567 * @folio: The folio. 1568 * 1569 * If the block size is larger than the size of this folio, return zero. 1570 * 1571 * Context: The caller should hold a refcount on the folio to prevent it 1572 * from being split. 1573 * Return: The number of filesystem blocks covered by this folio. 1574 */ 1575 static inline 1576 unsigned int i_blocks_per_folio(const struct inode *inode, 1577 const struct folio *folio) 1578 { 1579 return folio_size(folio) >> inode->i_blkbits; 1580 } 1581 #endif /* _LINUX_PAGEMAP_H */ 1582