1 /* 2 * page.c - buffer/page management specific to NILFS 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net>, 21 * Seiji Kihara <kihara@osrg.net>. 22 */ 23 24 #include <linux/pagemap.h> 25 #include <linux/writeback.h> 26 #include <linux/swap.h> 27 #include <linux/bitops.h> 28 #include <linux/page-flags.h> 29 #include <linux/list.h> 30 #include <linux/highmem.h> 31 #include <linux/pagevec.h> 32 #include <linux/gfp.h> 33 #include "nilfs.h" 34 #include "page.h" 35 #include "mdt.h" 36 37 38 #define NILFS_BUFFER_INHERENT_BITS \ 39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked)) 41 42 static struct buffer_head * 43 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 44 int blkbits, unsigned long b_state) 45 46 { 47 unsigned long first_block; 48 struct buffer_head *bh; 49 50 if (!page_has_buffers(page)) 51 create_empty_buffers(page, 1 << blkbits, b_state); 52 53 first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); 54 bh = nilfs_page_get_nth_block(page, block - first_block); 55 56 touch_buffer(bh); 57 wait_on_buffer(bh); 58 return bh; 59 } 60 61 struct buffer_head *nilfs_grab_buffer(struct inode *inode, 62 struct address_space *mapping, 63 unsigned long blkoff, 64 unsigned long b_state) 65 { 66 int blkbits = inode->i_blkbits; 67 pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits); 68 struct page *page; 69 struct buffer_head *bh; 70 71 page = grab_cache_page(mapping, index); 72 if (unlikely(!page)) 73 return NULL; 74 75 bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); 76 if (unlikely(!bh)) { 77 unlock_page(page); 78 put_page(page); 79 return NULL; 80 } 81 return bh; 82 } 83 84 /** 85 * nilfs_forget_buffer - discard dirty state 86 * @inode: owner inode of the buffer 87 * @bh: buffer head of the buffer to be discarded 88 */ 89 void nilfs_forget_buffer(struct buffer_head *bh) 90 { 91 struct page *page = bh->b_page; 92 const unsigned long clear_bits = 93 (1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped | 94 1 << BH_Async_Write | 1 << BH_NILFS_Volatile | 95 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected); 96 97 lock_buffer(bh); 98 set_mask_bits(&bh->b_state, clear_bits, 0); 99 if (nilfs_page_buffers_clean(page)) 100 __nilfs_clear_page_dirty(page); 101 102 bh->b_blocknr = -1; 103 ClearPageUptodate(page); 104 ClearPageMappedToDisk(page); 105 unlock_buffer(bh); 106 brelse(bh); 107 } 108 109 /** 110 * nilfs_copy_buffer -- copy buffer data and flags 111 * @dbh: destination buffer 112 * @sbh: source buffer 113 */ 114 void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) 115 { 116 void *kaddr0, *kaddr1; 117 unsigned long bits; 118 struct page *spage = sbh->b_page, *dpage = dbh->b_page; 119 struct buffer_head *bh; 120 121 kaddr0 = kmap_atomic(spage); 122 kaddr1 = kmap_atomic(dpage); 123 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); 124 kunmap_atomic(kaddr1); 125 kunmap_atomic(kaddr0); 126 127 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; 128 dbh->b_blocknr = sbh->b_blocknr; 129 dbh->b_bdev = sbh->b_bdev; 130 131 bh = dbh; 132 bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); 133 while ((bh = bh->b_this_page) != dbh) { 134 lock_buffer(bh); 135 bits &= bh->b_state; 136 unlock_buffer(bh); 137 } 138 if (bits & (1UL << BH_Uptodate)) 139 SetPageUptodate(dpage); 140 else 141 ClearPageUptodate(dpage); 142 if (bits & (1UL << BH_Mapped)) 143 SetPageMappedToDisk(dpage); 144 else 145 ClearPageMappedToDisk(dpage); 146 } 147 148 /** 149 * nilfs_page_buffers_clean - check if a page has dirty buffers or not. 150 * @page: page to be checked 151 * 152 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. 153 * Otherwise, it returns non-zero value. 154 */ 155 int nilfs_page_buffers_clean(struct page *page) 156 { 157 struct buffer_head *bh, *head; 158 159 bh = head = page_buffers(page); 160 do { 161 if (buffer_dirty(bh)) 162 return 0; 163 bh = bh->b_this_page; 164 } while (bh != head); 165 return 1; 166 } 167 168 void nilfs_page_bug(struct page *page) 169 { 170 struct address_space *m; 171 unsigned long ino; 172 173 if (unlikely(!page)) { 174 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); 175 return; 176 } 177 178 m = page->mapping; 179 ino = m ? m->host->i_ino : 0; 180 181 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " 182 "mapping=%p ino=%lu\n", 183 page, page_ref_count(page), 184 (unsigned long long)page->index, page->flags, m, ino); 185 186 if (page_has_buffers(page)) { 187 struct buffer_head *bh, *head; 188 int i = 0; 189 190 bh = head = page_buffers(page); 191 do { 192 printk(KERN_CRIT 193 " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", 194 i++, bh, atomic_read(&bh->b_count), 195 (unsigned long long)bh->b_blocknr, bh->b_state); 196 bh = bh->b_this_page; 197 } while (bh != head); 198 } 199 } 200 201 /** 202 * nilfs_copy_page -- copy the page with buffers 203 * @dst: destination page 204 * @src: source page 205 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. 206 * 207 * This function is for both data pages and btnode pages. The dirty flag 208 * should be treated by caller. The page must not be under i/o. 209 * Both src and dst page must be locked 210 */ 211 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) 212 { 213 struct buffer_head *dbh, *dbufs, *sbh, *sbufs; 214 unsigned long mask = NILFS_BUFFER_INHERENT_BITS; 215 216 BUG_ON(PageWriteback(dst)); 217 218 sbh = sbufs = page_buffers(src); 219 if (!page_has_buffers(dst)) 220 create_empty_buffers(dst, sbh->b_size, 0); 221 222 if (copy_dirty) 223 mask |= (1UL << BH_Dirty); 224 225 dbh = dbufs = page_buffers(dst); 226 do { 227 lock_buffer(sbh); 228 lock_buffer(dbh); 229 dbh->b_state = sbh->b_state & mask; 230 dbh->b_blocknr = sbh->b_blocknr; 231 dbh->b_bdev = sbh->b_bdev; 232 sbh = sbh->b_this_page; 233 dbh = dbh->b_this_page; 234 } while (dbh != dbufs); 235 236 copy_highpage(dst, src); 237 238 if (PageUptodate(src) && !PageUptodate(dst)) 239 SetPageUptodate(dst); 240 else if (!PageUptodate(src) && PageUptodate(dst)) 241 ClearPageUptodate(dst); 242 if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) 243 SetPageMappedToDisk(dst); 244 else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) 245 ClearPageMappedToDisk(dst); 246 247 do { 248 unlock_buffer(sbh); 249 unlock_buffer(dbh); 250 sbh = sbh->b_this_page; 251 dbh = dbh->b_this_page; 252 } while (dbh != dbufs); 253 } 254 255 int nilfs_copy_dirty_pages(struct address_space *dmap, 256 struct address_space *smap) 257 { 258 struct pagevec pvec; 259 unsigned int i; 260 pgoff_t index = 0; 261 int err = 0; 262 263 pagevec_init(&pvec, 0); 264 repeat: 265 if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, 266 PAGEVEC_SIZE)) 267 return 0; 268 269 for (i = 0; i < pagevec_count(&pvec); i++) { 270 struct page *page = pvec.pages[i], *dpage; 271 272 lock_page(page); 273 if (unlikely(!PageDirty(page))) 274 NILFS_PAGE_BUG(page, "inconsistent dirty state"); 275 276 dpage = grab_cache_page(dmap, page->index); 277 if (unlikely(!dpage)) { 278 /* No empty page is added to the page cache */ 279 err = -ENOMEM; 280 unlock_page(page); 281 break; 282 } 283 if (unlikely(!page_has_buffers(page))) 284 NILFS_PAGE_BUG(page, 285 "found empty page in dat page cache"); 286 287 nilfs_copy_page(dpage, page, 1); 288 __set_page_dirty_nobuffers(dpage); 289 290 unlock_page(dpage); 291 put_page(dpage); 292 unlock_page(page); 293 } 294 pagevec_release(&pvec); 295 cond_resched(); 296 297 if (likely(!err)) 298 goto repeat; 299 return err; 300 } 301 302 /** 303 * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache 304 * @dmap: destination page cache 305 * @smap: source page cache 306 * 307 * No pages must no be added to the cache during this process. 308 * This must be ensured by the caller. 309 */ 310 void nilfs_copy_back_pages(struct address_space *dmap, 311 struct address_space *smap) 312 { 313 struct pagevec pvec; 314 unsigned int i, n; 315 pgoff_t index = 0; 316 int err; 317 318 pagevec_init(&pvec, 0); 319 repeat: 320 n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); 321 if (!n) 322 return; 323 index = pvec.pages[n - 1]->index + 1; 324 325 for (i = 0; i < pagevec_count(&pvec); i++) { 326 struct page *page = pvec.pages[i], *dpage; 327 pgoff_t offset = page->index; 328 329 lock_page(page); 330 dpage = find_lock_page(dmap, offset); 331 if (dpage) { 332 /* override existing page on the destination cache */ 333 WARN_ON(PageDirty(dpage)); 334 nilfs_copy_page(dpage, page, 0); 335 unlock_page(dpage); 336 put_page(dpage); 337 } else { 338 struct page *page2; 339 340 /* move the page to the destination cache */ 341 spin_lock_irq(&smap->tree_lock); 342 page2 = radix_tree_delete(&smap->page_tree, offset); 343 WARN_ON(page2 != page); 344 345 smap->nrpages--; 346 spin_unlock_irq(&smap->tree_lock); 347 348 spin_lock_irq(&dmap->tree_lock); 349 err = radix_tree_insert(&dmap->page_tree, offset, page); 350 if (unlikely(err < 0)) { 351 WARN_ON(err == -EEXIST); 352 page->mapping = NULL; 353 put_page(page); /* for cache */ 354 } else { 355 page->mapping = dmap; 356 dmap->nrpages++; 357 if (PageDirty(page)) 358 radix_tree_tag_set(&dmap->page_tree, 359 offset, 360 PAGECACHE_TAG_DIRTY); 361 } 362 spin_unlock_irq(&dmap->tree_lock); 363 } 364 unlock_page(page); 365 } 366 pagevec_release(&pvec); 367 cond_resched(); 368 369 goto repeat; 370 } 371 372 /** 373 * nilfs_clear_dirty_pages - discard dirty pages in address space 374 * @mapping: address space with dirty pages for discarding 375 * @silent: suppress [true] or print [false] warning messages 376 */ 377 void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) 378 { 379 struct pagevec pvec; 380 unsigned int i; 381 pgoff_t index = 0; 382 383 pagevec_init(&pvec, 0); 384 385 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 386 PAGEVEC_SIZE)) { 387 for (i = 0; i < pagevec_count(&pvec); i++) { 388 struct page *page = pvec.pages[i]; 389 390 lock_page(page); 391 nilfs_clear_dirty_page(page, silent); 392 unlock_page(page); 393 } 394 pagevec_release(&pvec); 395 cond_resched(); 396 } 397 } 398 399 /** 400 * nilfs_clear_dirty_page - discard dirty page 401 * @page: dirty page that will be discarded 402 * @silent: suppress [true] or print [false] warning messages 403 */ 404 void nilfs_clear_dirty_page(struct page *page, bool silent) 405 { 406 struct inode *inode = page->mapping->host; 407 struct super_block *sb = inode->i_sb; 408 409 BUG_ON(!PageLocked(page)); 410 411 if (!silent) { 412 nilfs_warning(sb, __func__, 413 "discard page: offset %lld, ino %lu", 414 page_offset(page), inode->i_ino); 415 } 416 417 ClearPageUptodate(page); 418 ClearPageMappedToDisk(page); 419 420 if (page_has_buffers(page)) { 421 struct buffer_head *bh, *head; 422 const unsigned long clear_bits = 423 (1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped | 424 1 << BH_Async_Write | 1 << BH_NILFS_Volatile | 425 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected); 426 427 bh = head = page_buffers(page); 428 do { 429 lock_buffer(bh); 430 if (!silent) { 431 nilfs_warning(sb, __func__, 432 "discard block %llu, size %zu", 433 (u64)bh->b_blocknr, bh->b_size); 434 } 435 set_mask_bits(&bh->b_state, clear_bits, 0); 436 unlock_buffer(bh); 437 } while (bh = bh->b_this_page, bh != head); 438 } 439 440 __nilfs_clear_page_dirty(page); 441 } 442 443 unsigned nilfs_page_count_clean_buffers(struct page *page, 444 unsigned from, unsigned to) 445 { 446 unsigned block_start, block_end; 447 struct buffer_head *bh, *head; 448 unsigned nc = 0; 449 450 for (bh = head = page_buffers(page), block_start = 0; 451 bh != head || !block_start; 452 block_start = block_end, bh = bh->b_this_page) { 453 block_end = block_start + bh->b_size; 454 if (block_end > from && block_start < to && !buffer_dirty(bh)) 455 nc++; 456 } 457 return nc; 458 } 459 460 void nilfs_mapping_init(struct address_space *mapping, struct inode *inode) 461 { 462 mapping->host = inode; 463 mapping->flags = 0; 464 mapping_set_gfp_mask(mapping, GFP_NOFS); 465 mapping->private_data = NULL; 466 mapping->a_ops = &empty_aops; 467 } 468 469 /* 470 * NILFS2 needs clear_page_dirty() in the following two cases: 471 * 472 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears 473 * page dirty flags when it copies back pages from the shadow cache 474 * (gcdat->{i_mapping,i_btnode_cache}) to its original cache 475 * (dat->{i_mapping,i_btnode_cache}). 476 * 477 * 2) Some B-tree operations like insertion or deletion may dispose buffers 478 * in dirty state, and this needs to cancel the dirty state of their pages. 479 */ 480 int __nilfs_clear_page_dirty(struct page *page) 481 { 482 struct address_space *mapping = page->mapping; 483 484 if (mapping) { 485 spin_lock_irq(&mapping->tree_lock); 486 if (test_bit(PG_dirty, &page->flags)) { 487 radix_tree_tag_clear(&mapping->page_tree, 488 page_index(page), 489 PAGECACHE_TAG_DIRTY); 490 spin_unlock_irq(&mapping->tree_lock); 491 return clear_page_dirty_for_io(page); 492 } 493 spin_unlock_irq(&mapping->tree_lock); 494 return 0; 495 } 496 return TestClearPageDirty(page); 497 } 498 499 /** 500 * nilfs_find_uncommitted_extent - find extent of uncommitted data 501 * @inode: inode 502 * @start_blk: start block offset (in) 503 * @blkoff: start offset of the found extent (out) 504 * 505 * This function searches an extent of buffers marked "delayed" which 506 * starts from a block offset equal to or larger than @start_blk. If 507 * such an extent was found, this will store the start offset in 508 * @blkoff and return its length in blocks. Otherwise, zero is 509 * returned. 510 */ 511 unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 512 sector_t start_blk, 513 sector_t *blkoff) 514 { 515 unsigned int i; 516 pgoff_t index; 517 unsigned int nblocks_in_page; 518 unsigned long length = 0; 519 sector_t b; 520 struct pagevec pvec; 521 struct page *page; 522 523 if (inode->i_mapping->nrpages == 0) 524 return 0; 525 526 index = start_blk >> (PAGE_SHIFT - inode->i_blkbits); 527 nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits); 528 529 pagevec_init(&pvec, 0); 530 531 repeat: 532 pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE, 533 pvec.pages); 534 if (pvec.nr == 0) 535 return length; 536 537 if (length > 0 && pvec.pages[0]->index > index) 538 goto out; 539 540 b = pvec.pages[0]->index << (PAGE_SHIFT - inode->i_blkbits); 541 i = 0; 542 do { 543 page = pvec.pages[i]; 544 545 lock_page(page); 546 if (page_has_buffers(page)) { 547 struct buffer_head *bh, *head; 548 549 bh = head = page_buffers(page); 550 do { 551 if (b < start_blk) 552 continue; 553 if (buffer_delay(bh)) { 554 if (length == 0) 555 *blkoff = b; 556 length++; 557 } else if (length > 0) { 558 goto out_locked; 559 } 560 } while (++b, bh = bh->b_this_page, bh != head); 561 } else { 562 if (length > 0) 563 goto out_locked; 564 565 b += nblocks_in_page; 566 } 567 unlock_page(page); 568 569 } while (++i < pagevec_count(&pvec)); 570 571 index = page->index + 1; 572 pagevec_release(&pvec); 573 cond_resched(); 574 goto repeat; 575 576 out_locked: 577 unlock_page(page); 578 out: 579 pagevec_release(&pvec); 580 return length; 581 } 582