1 /* 2 * page.c - buffer/page management specific to NILFS 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net>, 21 * Seiji Kihara <kihara@osrg.net>. 22 */ 23 24 #include <linux/pagemap.h> 25 #include <linux/writeback.h> 26 #include <linux/swap.h> 27 #include <linux/bitops.h> 28 #include <linux/page-flags.h> 29 #include <linux/list.h> 30 #include <linux/highmem.h> 31 #include <linux/pagevec.h> 32 #include <linux/gfp.h> 33 #include "nilfs.h" 34 #include "page.h" 35 #include "mdt.h" 36 37 38 #define NILFS_BUFFER_INHERENT_BITS \ 39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ 41 (1UL << BH_NILFS_Checked)) 42 43 static struct buffer_head * 44 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 45 int blkbits, unsigned long b_state) 46 47 { 48 unsigned long first_block; 49 struct buffer_head *bh; 50 51 if (!page_has_buffers(page)) 52 create_empty_buffers(page, 1 << blkbits, b_state); 53 54 first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); 55 bh = nilfs_page_get_nth_block(page, block - first_block); 56 57 touch_buffer(bh); 58 wait_on_buffer(bh); 59 return bh; 60 } 61 62 /* 63 * Since the page cache of B-tree node pages or data page cache of pseudo 64 * inodes does not have a valid mapping->host pointer, calling 65 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; 66 * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). 67 * To avoid this problem, the old style mark_buffer_dirty() is used instead. 68 */ 69 void nilfs_mark_buffer_dirty(struct buffer_head *bh) 70 { 71 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) 72 __set_page_dirty_nobuffers(bh->b_page); 73 } 74 75 struct buffer_head *nilfs_grab_buffer(struct inode *inode, 76 struct address_space *mapping, 77 unsigned long blkoff, 78 unsigned long b_state) 79 { 80 int blkbits = inode->i_blkbits; 81 pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); 82 struct page *page, *opage; 83 struct buffer_head *bh, *obh; 84 85 page = grab_cache_page(mapping, index); 86 if (unlikely(!page)) 87 return NULL; 88 89 bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); 90 if (unlikely(!bh)) { 91 unlock_page(page); 92 page_cache_release(page); 93 return NULL; 94 } 95 if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { 96 /* 97 * Shadow page cache uses assoc_mapping to point its original 98 * page cache. The following code tries the original cache 99 * if the given cache is a shadow and it didn't hit. 100 */ 101 opage = find_lock_page(mapping->assoc_mapping, index); 102 if (!opage) 103 return bh; 104 105 obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, 106 b_state); 107 if (buffer_uptodate(obh)) { 108 nilfs_copy_buffer(bh, obh); 109 if (buffer_dirty(obh)) { 110 nilfs_mark_buffer_dirty(bh); 111 if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) 112 nilfs_mdt_mark_dirty(inode); 113 } 114 } 115 brelse(obh); 116 unlock_page(opage); 117 page_cache_release(opage); 118 } 119 return bh; 120 } 121 122 /** 123 * nilfs_forget_buffer - discard dirty state 124 * @inode: owner inode of the buffer 125 * @bh: buffer head of the buffer to be discarded 126 */ 127 void nilfs_forget_buffer(struct buffer_head *bh) 128 { 129 struct page *page = bh->b_page; 130 131 lock_buffer(bh); 132 clear_buffer_nilfs_volatile(bh); 133 clear_buffer_nilfs_checked(bh); 134 clear_buffer_dirty(bh); 135 if (nilfs_page_buffers_clean(page)) 136 __nilfs_clear_page_dirty(page); 137 138 clear_buffer_uptodate(bh); 139 clear_buffer_mapped(bh); 140 bh->b_blocknr = -1; 141 ClearPageUptodate(page); 142 ClearPageMappedToDisk(page); 143 unlock_buffer(bh); 144 brelse(bh); 145 } 146 147 /** 148 * nilfs_copy_buffer -- copy buffer data and flags 149 * @dbh: destination buffer 150 * @sbh: source buffer 151 */ 152 void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) 153 { 154 void *kaddr0, *kaddr1; 155 unsigned long bits; 156 struct page *spage = sbh->b_page, *dpage = dbh->b_page; 157 struct buffer_head *bh; 158 159 kaddr0 = kmap_atomic(spage, KM_USER0); 160 kaddr1 = kmap_atomic(dpage, KM_USER1); 161 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); 162 kunmap_atomic(kaddr1, KM_USER1); 163 kunmap_atomic(kaddr0, KM_USER0); 164 165 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; 166 dbh->b_blocknr = sbh->b_blocknr; 167 dbh->b_bdev = sbh->b_bdev; 168 169 bh = dbh; 170 bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); 171 while ((bh = bh->b_this_page) != dbh) { 172 lock_buffer(bh); 173 bits &= bh->b_state; 174 unlock_buffer(bh); 175 } 176 if (bits & (1UL << BH_Uptodate)) 177 SetPageUptodate(dpage); 178 else 179 ClearPageUptodate(dpage); 180 if (bits & (1UL << BH_Mapped)) 181 SetPageMappedToDisk(dpage); 182 else 183 ClearPageMappedToDisk(dpage); 184 } 185 186 /** 187 * nilfs_page_buffers_clean - check if a page has dirty buffers or not. 188 * @page: page to be checked 189 * 190 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. 191 * Otherwise, it returns non-zero value. 192 */ 193 int nilfs_page_buffers_clean(struct page *page) 194 { 195 struct buffer_head *bh, *head; 196 197 bh = head = page_buffers(page); 198 do { 199 if (buffer_dirty(bh)) 200 return 0; 201 bh = bh->b_this_page; 202 } while (bh != head); 203 return 1; 204 } 205 206 void nilfs_page_bug(struct page *page) 207 { 208 struct address_space *m; 209 unsigned long ino = 0; 210 211 if (unlikely(!page)) { 212 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); 213 return; 214 } 215 216 m = page->mapping; 217 if (m) { 218 struct inode *inode = NILFS_AS_I(m); 219 if (inode != NULL) 220 ino = inode->i_ino; 221 } 222 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " 223 "mapping=%p ino=%lu\n", 224 page, atomic_read(&page->_count), 225 (unsigned long long)page->index, page->flags, m, ino); 226 227 if (page_has_buffers(page)) { 228 struct buffer_head *bh, *head; 229 int i = 0; 230 231 bh = head = page_buffers(page); 232 do { 233 printk(KERN_CRIT 234 " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", 235 i++, bh, atomic_read(&bh->b_count), 236 (unsigned long long)bh->b_blocknr, bh->b_state); 237 bh = bh->b_this_page; 238 } while (bh != head); 239 } 240 } 241 242 /** 243 * nilfs_alloc_private_page - allocate a private page with buffer heads 244 * 245 * Return Value: On success, a pointer to the allocated page is returned. 246 * On error, NULL is returned. 247 */ 248 struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, 249 unsigned long state) 250 { 251 struct buffer_head *bh, *head, *tail; 252 struct page *page; 253 254 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ 255 if (unlikely(!page)) 256 return NULL; 257 258 lock_page(page); 259 head = alloc_page_buffers(page, size, 0); 260 if (unlikely(!head)) { 261 unlock_page(page); 262 __free_page(page); 263 return NULL; 264 } 265 266 bh = head; 267 do { 268 bh->b_state = (1UL << BH_NILFS_Allocated) | state; 269 tail = bh; 270 bh->b_bdev = bdev; 271 bh = bh->b_this_page; 272 } while (bh); 273 274 tail->b_this_page = head; 275 attach_page_buffers(page, head); 276 277 return page; 278 } 279 280 void nilfs_free_private_page(struct page *page) 281 { 282 BUG_ON(!PageLocked(page)); 283 BUG_ON(page->mapping); 284 285 if (page_has_buffers(page) && !try_to_free_buffers(page)) 286 NILFS_PAGE_BUG(page, "failed to free page"); 287 288 unlock_page(page); 289 __free_page(page); 290 } 291 292 /** 293 * nilfs_copy_page -- copy the page with buffers 294 * @dst: destination page 295 * @src: source page 296 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. 297 * 298 * This function is for both data pages and btnode pages. The dirty flag 299 * should be treated by caller. The page must not be under i/o. 300 * Both src and dst page must be locked 301 */ 302 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) 303 { 304 struct buffer_head *dbh, *dbufs, *sbh, *sbufs; 305 unsigned long mask = NILFS_BUFFER_INHERENT_BITS; 306 307 BUG_ON(PageWriteback(dst)); 308 309 sbh = sbufs = page_buffers(src); 310 if (!page_has_buffers(dst)) 311 create_empty_buffers(dst, sbh->b_size, 0); 312 313 if (copy_dirty) 314 mask |= (1UL << BH_Dirty); 315 316 dbh = dbufs = page_buffers(dst); 317 do { 318 lock_buffer(sbh); 319 lock_buffer(dbh); 320 dbh->b_state = sbh->b_state & mask; 321 dbh->b_blocknr = sbh->b_blocknr; 322 dbh->b_bdev = sbh->b_bdev; 323 sbh = sbh->b_this_page; 324 dbh = dbh->b_this_page; 325 } while (dbh != dbufs); 326 327 copy_highpage(dst, src); 328 329 if (PageUptodate(src) && !PageUptodate(dst)) 330 SetPageUptodate(dst); 331 else if (!PageUptodate(src) && PageUptodate(dst)) 332 ClearPageUptodate(dst); 333 if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) 334 SetPageMappedToDisk(dst); 335 else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) 336 ClearPageMappedToDisk(dst); 337 338 do { 339 unlock_buffer(sbh); 340 unlock_buffer(dbh); 341 sbh = sbh->b_this_page; 342 dbh = dbh->b_this_page; 343 } while (dbh != dbufs); 344 } 345 346 int nilfs_copy_dirty_pages(struct address_space *dmap, 347 struct address_space *smap) 348 { 349 struct pagevec pvec; 350 unsigned int i; 351 pgoff_t index = 0; 352 int err = 0; 353 354 pagevec_init(&pvec, 0); 355 repeat: 356 if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, 357 PAGEVEC_SIZE)) 358 return 0; 359 360 for (i = 0; i < pagevec_count(&pvec); i++) { 361 struct page *page = pvec.pages[i], *dpage; 362 363 lock_page(page); 364 if (unlikely(!PageDirty(page))) 365 NILFS_PAGE_BUG(page, "inconsistent dirty state"); 366 367 dpage = grab_cache_page(dmap, page->index); 368 if (unlikely(!dpage)) { 369 /* No empty page is added to the page cache */ 370 err = -ENOMEM; 371 unlock_page(page); 372 break; 373 } 374 if (unlikely(!page_has_buffers(page))) 375 NILFS_PAGE_BUG(page, 376 "found empty page in dat page cache"); 377 378 nilfs_copy_page(dpage, page, 1); 379 __set_page_dirty_nobuffers(dpage); 380 381 unlock_page(dpage); 382 page_cache_release(dpage); 383 unlock_page(page); 384 } 385 pagevec_release(&pvec); 386 cond_resched(); 387 388 if (likely(!err)) 389 goto repeat; 390 return err; 391 } 392 393 /** 394 * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache 395 * @dmap: destination page cache 396 * @smap: source page cache 397 * 398 * No pages must no be added to the cache during this process. 399 * This must be ensured by the caller. 400 */ 401 void nilfs_copy_back_pages(struct address_space *dmap, 402 struct address_space *smap) 403 { 404 struct pagevec pvec; 405 unsigned int i, n; 406 pgoff_t index = 0; 407 int err; 408 409 pagevec_init(&pvec, 0); 410 repeat: 411 n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); 412 if (!n) 413 return; 414 index = pvec.pages[n - 1]->index + 1; 415 416 for (i = 0; i < pagevec_count(&pvec); i++) { 417 struct page *page = pvec.pages[i], *dpage; 418 pgoff_t offset = page->index; 419 420 lock_page(page); 421 dpage = find_lock_page(dmap, offset); 422 if (dpage) { 423 /* override existing page on the destination cache */ 424 WARN_ON(PageDirty(dpage)); 425 nilfs_copy_page(dpage, page, 0); 426 unlock_page(dpage); 427 page_cache_release(dpage); 428 } else { 429 struct page *page2; 430 431 /* move the page to the destination cache */ 432 spin_lock_irq(&smap->tree_lock); 433 page2 = radix_tree_delete(&smap->page_tree, offset); 434 WARN_ON(page2 != page); 435 436 smap->nrpages--; 437 spin_unlock_irq(&smap->tree_lock); 438 439 spin_lock_irq(&dmap->tree_lock); 440 err = radix_tree_insert(&dmap->page_tree, offset, page); 441 if (unlikely(err < 0)) { 442 WARN_ON(err == -EEXIST); 443 page->mapping = NULL; 444 page_cache_release(page); /* for cache */ 445 } else { 446 page->mapping = dmap; 447 dmap->nrpages++; 448 if (PageDirty(page)) 449 radix_tree_tag_set(&dmap->page_tree, 450 offset, 451 PAGECACHE_TAG_DIRTY); 452 } 453 spin_unlock_irq(&dmap->tree_lock); 454 } 455 unlock_page(page); 456 } 457 pagevec_release(&pvec); 458 cond_resched(); 459 460 goto repeat; 461 } 462 463 void nilfs_clear_dirty_pages(struct address_space *mapping) 464 { 465 struct pagevec pvec; 466 unsigned int i; 467 pgoff_t index = 0; 468 469 pagevec_init(&pvec, 0); 470 471 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 472 PAGEVEC_SIZE)) { 473 for (i = 0; i < pagevec_count(&pvec); i++) { 474 struct page *page = pvec.pages[i]; 475 struct buffer_head *bh, *head; 476 477 lock_page(page); 478 ClearPageUptodate(page); 479 ClearPageMappedToDisk(page); 480 bh = head = page_buffers(page); 481 do { 482 lock_buffer(bh); 483 clear_buffer_dirty(bh); 484 clear_buffer_nilfs_volatile(bh); 485 clear_buffer_nilfs_checked(bh); 486 clear_buffer_uptodate(bh); 487 clear_buffer_mapped(bh); 488 unlock_buffer(bh); 489 bh = bh->b_this_page; 490 } while (bh != head); 491 492 __nilfs_clear_page_dirty(page); 493 unlock_page(page); 494 } 495 pagevec_release(&pvec); 496 cond_resched(); 497 } 498 } 499 500 unsigned nilfs_page_count_clean_buffers(struct page *page, 501 unsigned from, unsigned to) 502 { 503 unsigned block_start, block_end; 504 struct buffer_head *bh, *head; 505 unsigned nc = 0; 506 507 for (bh = head = page_buffers(page), block_start = 0; 508 bh != head || !block_start; 509 block_start = block_end, bh = bh->b_this_page) { 510 block_end = block_start + bh->b_size; 511 if (block_end > from && block_start < to && !buffer_dirty(bh)) 512 nc++; 513 } 514 return nc; 515 } 516 517 /* 518 * NILFS2 needs clear_page_dirty() in the following two cases: 519 * 520 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears 521 * page dirty flags when it copies back pages from the shadow cache 522 * (gcdat->{i_mapping,i_btnode_cache}) to its original cache 523 * (dat->{i_mapping,i_btnode_cache}). 524 * 525 * 2) Some B-tree operations like insertion or deletion may dispose buffers 526 * in dirty state, and this needs to cancel the dirty state of their pages. 527 */ 528 int __nilfs_clear_page_dirty(struct page *page) 529 { 530 struct address_space *mapping = page->mapping; 531 532 if (mapping) { 533 spin_lock_irq(&mapping->tree_lock); 534 if (test_bit(PG_dirty, &page->flags)) { 535 radix_tree_tag_clear(&mapping->page_tree, 536 page_index(page), 537 PAGECACHE_TAG_DIRTY); 538 spin_unlock_irq(&mapping->tree_lock); 539 return clear_page_dirty_for_io(page); 540 } 541 spin_unlock_irq(&mapping->tree_lock); 542 return 0; 543 } 544 return TestClearPageDirty(page); 545 } 546