xref: /linux/fs/nilfs2/page.c (revision c0e297dc61f8d4453e07afbea1fa8d0e67cd4a34)
1 /*
2  * page.c - buffer/page management specific to NILFS
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Ryusuke Konishi <ryusuke@osrg.net>,
21  *            Seiji Kihara <kihara@osrg.net>.
22  */
23 
24 #include <linux/pagemap.h>
25 #include <linux/writeback.h>
26 #include <linux/swap.h>
27 #include <linux/bitops.h>
28 #include <linux/page-flags.h>
29 #include <linux/list.h>
30 #include <linux/highmem.h>
31 #include <linux/pagevec.h>
32 #include <linux/gfp.h>
33 #include "nilfs.h"
34 #include "page.h"
35 #include "mdt.h"
36 
37 
38 #define NILFS_BUFFER_INHERENT_BITS  \
39 	((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 	 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
41 
42 static struct buffer_head *
43 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
44 		       int blkbits, unsigned long b_state)
45 
46 {
47 	unsigned long first_block;
48 	struct buffer_head *bh;
49 
50 	if (!page_has_buffers(page))
51 		create_empty_buffers(page, 1 << blkbits, b_state);
52 
53 	first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
54 	bh = nilfs_page_get_nth_block(page, block - first_block);
55 
56 	touch_buffer(bh);
57 	wait_on_buffer(bh);
58 	return bh;
59 }
60 
61 struct buffer_head *nilfs_grab_buffer(struct inode *inode,
62 				      struct address_space *mapping,
63 				      unsigned long blkoff,
64 				      unsigned long b_state)
65 {
66 	int blkbits = inode->i_blkbits;
67 	pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
68 	struct page *page;
69 	struct buffer_head *bh;
70 
71 	page = grab_cache_page(mapping, index);
72 	if (unlikely(!page))
73 		return NULL;
74 
75 	bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
76 	if (unlikely(!bh)) {
77 		unlock_page(page);
78 		page_cache_release(page);
79 		return NULL;
80 	}
81 	return bh;
82 }
83 
84 /**
85  * nilfs_forget_buffer - discard dirty state
86  * @inode: owner inode of the buffer
87  * @bh: buffer head of the buffer to be discarded
88  */
89 void nilfs_forget_buffer(struct buffer_head *bh)
90 {
91 	struct page *page = bh->b_page;
92 	const unsigned long clear_bits =
93 		(1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped |
94 		 1 << BH_Async_Write | 1 << BH_NILFS_Volatile |
95 		 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected);
96 
97 	lock_buffer(bh);
98 	set_mask_bits(&bh->b_state, clear_bits, 0);
99 	if (nilfs_page_buffers_clean(page))
100 		__nilfs_clear_page_dirty(page);
101 
102 	bh->b_blocknr = -1;
103 	ClearPageUptodate(page);
104 	ClearPageMappedToDisk(page);
105 	unlock_buffer(bh);
106 	brelse(bh);
107 }
108 
109 /**
110  * nilfs_copy_buffer -- copy buffer data and flags
111  * @dbh: destination buffer
112  * @sbh: source buffer
113  */
114 void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
115 {
116 	void *kaddr0, *kaddr1;
117 	unsigned long bits;
118 	struct page *spage = sbh->b_page, *dpage = dbh->b_page;
119 	struct buffer_head *bh;
120 
121 	kaddr0 = kmap_atomic(spage);
122 	kaddr1 = kmap_atomic(dpage);
123 	memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
124 	kunmap_atomic(kaddr1);
125 	kunmap_atomic(kaddr0);
126 
127 	dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
128 	dbh->b_blocknr = sbh->b_blocknr;
129 	dbh->b_bdev = sbh->b_bdev;
130 
131 	bh = dbh;
132 	bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
133 	while ((bh = bh->b_this_page) != dbh) {
134 		lock_buffer(bh);
135 		bits &= bh->b_state;
136 		unlock_buffer(bh);
137 	}
138 	if (bits & (1UL << BH_Uptodate))
139 		SetPageUptodate(dpage);
140 	else
141 		ClearPageUptodate(dpage);
142 	if (bits & (1UL << BH_Mapped))
143 		SetPageMappedToDisk(dpage);
144 	else
145 		ClearPageMappedToDisk(dpage);
146 }
147 
148 /**
149  * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
150  * @page: page to be checked
151  *
152  * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
153  * Otherwise, it returns non-zero value.
154  */
155 int nilfs_page_buffers_clean(struct page *page)
156 {
157 	struct buffer_head *bh, *head;
158 
159 	bh = head = page_buffers(page);
160 	do {
161 		if (buffer_dirty(bh))
162 			return 0;
163 		bh = bh->b_this_page;
164 	} while (bh != head);
165 	return 1;
166 }
167 
168 void nilfs_page_bug(struct page *page)
169 {
170 	struct address_space *m;
171 	unsigned long ino;
172 
173 	if (unlikely(!page)) {
174 		printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
175 		return;
176 	}
177 
178 	m = page->mapping;
179 	ino = m ? m->host->i_ino : 0;
180 
181 	printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
182 	       "mapping=%p ino=%lu\n",
183 	       page, atomic_read(&page->_count),
184 	       (unsigned long long)page->index, page->flags, m, ino);
185 
186 	if (page_has_buffers(page)) {
187 		struct buffer_head *bh, *head;
188 		int i = 0;
189 
190 		bh = head = page_buffers(page);
191 		do {
192 			printk(KERN_CRIT
193 			       " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
194 			       i++, bh, atomic_read(&bh->b_count),
195 			       (unsigned long long)bh->b_blocknr, bh->b_state);
196 			bh = bh->b_this_page;
197 		} while (bh != head);
198 	}
199 }
200 
201 /**
202  * nilfs_copy_page -- copy the page with buffers
203  * @dst: destination page
204  * @src: source page
205  * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
206  *
207  * This function is for both data pages and btnode pages.  The dirty flag
208  * should be treated by caller.  The page must not be under i/o.
209  * Both src and dst page must be locked
210  */
211 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
212 {
213 	struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
214 	unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
215 
216 	BUG_ON(PageWriteback(dst));
217 
218 	sbh = sbufs = page_buffers(src);
219 	if (!page_has_buffers(dst))
220 		create_empty_buffers(dst, sbh->b_size, 0);
221 
222 	if (copy_dirty)
223 		mask |= (1UL << BH_Dirty);
224 
225 	dbh = dbufs = page_buffers(dst);
226 	do {
227 		lock_buffer(sbh);
228 		lock_buffer(dbh);
229 		dbh->b_state = sbh->b_state & mask;
230 		dbh->b_blocknr = sbh->b_blocknr;
231 		dbh->b_bdev = sbh->b_bdev;
232 		sbh = sbh->b_this_page;
233 		dbh = dbh->b_this_page;
234 	} while (dbh != dbufs);
235 
236 	copy_highpage(dst, src);
237 
238 	if (PageUptodate(src) && !PageUptodate(dst))
239 		SetPageUptodate(dst);
240 	else if (!PageUptodate(src) && PageUptodate(dst))
241 		ClearPageUptodate(dst);
242 	if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
243 		SetPageMappedToDisk(dst);
244 	else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
245 		ClearPageMappedToDisk(dst);
246 
247 	do {
248 		unlock_buffer(sbh);
249 		unlock_buffer(dbh);
250 		sbh = sbh->b_this_page;
251 		dbh = dbh->b_this_page;
252 	} while (dbh != dbufs);
253 }
254 
255 int nilfs_copy_dirty_pages(struct address_space *dmap,
256 			   struct address_space *smap)
257 {
258 	struct pagevec pvec;
259 	unsigned int i;
260 	pgoff_t index = 0;
261 	int err = 0;
262 
263 	pagevec_init(&pvec, 0);
264 repeat:
265 	if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
266 				PAGEVEC_SIZE))
267 		return 0;
268 
269 	for (i = 0; i < pagevec_count(&pvec); i++) {
270 		struct page *page = pvec.pages[i], *dpage;
271 
272 		lock_page(page);
273 		if (unlikely(!PageDirty(page)))
274 			NILFS_PAGE_BUG(page, "inconsistent dirty state");
275 
276 		dpage = grab_cache_page(dmap, page->index);
277 		if (unlikely(!dpage)) {
278 			/* No empty page is added to the page cache */
279 			err = -ENOMEM;
280 			unlock_page(page);
281 			break;
282 		}
283 		if (unlikely(!page_has_buffers(page)))
284 			NILFS_PAGE_BUG(page,
285 				       "found empty page in dat page cache");
286 
287 		nilfs_copy_page(dpage, page, 1);
288 		__set_page_dirty_nobuffers(dpage);
289 
290 		unlock_page(dpage);
291 		page_cache_release(dpage);
292 		unlock_page(page);
293 	}
294 	pagevec_release(&pvec);
295 	cond_resched();
296 
297 	if (likely(!err))
298 		goto repeat;
299 	return err;
300 }
301 
302 /**
303  * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
304  * @dmap: destination page cache
305  * @smap: source page cache
306  *
307  * No pages must no be added to the cache during this process.
308  * This must be ensured by the caller.
309  */
310 void nilfs_copy_back_pages(struct address_space *dmap,
311 			   struct address_space *smap)
312 {
313 	struct pagevec pvec;
314 	unsigned int i, n;
315 	pgoff_t index = 0;
316 	int err;
317 
318 	pagevec_init(&pvec, 0);
319 repeat:
320 	n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
321 	if (!n)
322 		return;
323 	index = pvec.pages[n - 1]->index + 1;
324 
325 	for (i = 0; i < pagevec_count(&pvec); i++) {
326 		struct page *page = pvec.pages[i], *dpage;
327 		pgoff_t offset = page->index;
328 
329 		lock_page(page);
330 		dpage = find_lock_page(dmap, offset);
331 		if (dpage) {
332 			/* override existing page on the destination cache */
333 			WARN_ON(PageDirty(dpage));
334 			nilfs_copy_page(dpage, page, 0);
335 			unlock_page(dpage);
336 			page_cache_release(dpage);
337 		} else {
338 			struct page *page2;
339 
340 			/* move the page to the destination cache */
341 			spin_lock_irq(&smap->tree_lock);
342 			page2 = radix_tree_delete(&smap->page_tree, offset);
343 			WARN_ON(page2 != page);
344 
345 			smap->nrpages--;
346 			spin_unlock_irq(&smap->tree_lock);
347 
348 			spin_lock_irq(&dmap->tree_lock);
349 			err = radix_tree_insert(&dmap->page_tree, offset, page);
350 			if (unlikely(err < 0)) {
351 				WARN_ON(err == -EEXIST);
352 				page->mapping = NULL;
353 				page_cache_release(page); /* for cache */
354 			} else {
355 				page->mapping = dmap;
356 				dmap->nrpages++;
357 				if (PageDirty(page))
358 					radix_tree_tag_set(&dmap->page_tree,
359 							   offset,
360 							   PAGECACHE_TAG_DIRTY);
361 			}
362 			spin_unlock_irq(&dmap->tree_lock);
363 		}
364 		unlock_page(page);
365 	}
366 	pagevec_release(&pvec);
367 	cond_resched();
368 
369 	goto repeat;
370 }
371 
372 /**
373  * nilfs_clear_dirty_pages - discard dirty pages in address space
374  * @mapping: address space with dirty pages for discarding
375  * @silent: suppress [true] or print [false] warning messages
376  */
377 void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
378 {
379 	struct pagevec pvec;
380 	unsigned int i;
381 	pgoff_t index = 0;
382 
383 	pagevec_init(&pvec, 0);
384 
385 	while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
386 				  PAGEVEC_SIZE)) {
387 		for (i = 0; i < pagevec_count(&pvec); i++) {
388 			struct page *page = pvec.pages[i];
389 
390 			lock_page(page);
391 			nilfs_clear_dirty_page(page, silent);
392 			unlock_page(page);
393 		}
394 		pagevec_release(&pvec);
395 		cond_resched();
396 	}
397 }
398 
399 /**
400  * nilfs_clear_dirty_page - discard dirty page
401  * @page: dirty page that will be discarded
402  * @silent: suppress [true] or print [false] warning messages
403  */
404 void nilfs_clear_dirty_page(struct page *page, bool silent)
405 {
406 	struct inode *inode = page->mapping->host;
407 	struct super_block *sb = inode->i_sb;
408 
409 	BUG_ON(!PageLocked(page));
410 
411 	if (!silent) {
412 		nilfs_warning(sb, __func__,
413 				"discard page: offset %lld, ino %lu",
414 				page_offset(page), inode->i_ino);
415 	}
416 
417 	ClearPageUptodate(page);
418 	ClearPageMappedToDisk(page);
419 
420 	if (page_has_buffers(page)) {
421 		struct buffer_head *bh, *head;
422 		const unsigned long clear_bits =
423 			(1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped |
424 			 1 << BH_Async_Write | 1 << BH_NILFS_Volatile |
425 			 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected);
426 
427 		bh = head = page_buffers(page);
428 		do {
429 			lock_buffer(bh);
430 			if (!silent) {
431 				nilfs_warning(sb, __func__,
432 					"discard block %llu, size %zu",
433 					(u64)bh->b_blocknr, bh->b_size);
434 			}
435 			set_mask_bits(&bh->b_state, clear_bits, 0);
436 			unlock_buffer(bh);
437 		} while (bh = bh->b_this_page, bh != head);
438 	}
439 
440 	__nilfs_clear_page_dirty(page);
441 }
442 
443 unsigned nilfs_page_count_clean_buffers(struct page *page,
444 					unsigned from, unsigned to)
445 {
446 	unsigned block_start, block_end;
447 	struct buffer_head *bh, *head;
448 	unsigned nc = 0;
449 
450 	for (bh = head = page_buffers(page), block_start = 0;
451 	     bh != head || !block_start;
452 	     block_start = block_end, bh = bh->b_this_page) {
453 		block_end = block_start + bh->b_size;
454 		if (block_end > from && block_start < to && !buffer_dirty(bh))
455 			nc++;
456 	}
457 	return nc;
458 }
459 
460 void nilfs_mapping_init(struct address_space *mapping, struct inode *inode)
461 {
462 	mapping->host = inode;
463 	mapping->flags = 0;
464 	mapping_set_gfp_mask(mapping, GFP_NOFS);
465 	mapping->private_data = NULL;
466 	mapping->a_ops = &empty_aops;
467 }
468 
469 /*
470  * NILFS2 needs clear_page_dirty() in the following two cases:
471  *
472  * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
473  *    page dirty flags when it copies back pages from the shadow cache
474  *    (gcdat->{i_mapping,i_btnode_cache}) to its original cache
475  *    (dat->{i_mapping,i_btnode_cache}).
476  *
477  * 2) Some B-tree operations like insertion or deletion may dispose buffers
478  *    in dirty state, and this needs to cancel the dirty state of their pages.
479  */
480 int __nilfs_clear_page_dirty(struct page *page)
481 {
482 	struct address_space *mapping = page->mapping;
483 
484 	if (mapping) {
485 		spin_lock_irq(&mapping->tree_lock);
486 		if (test_bit(PG_dirty, &page->flags)) {
487 			radix_tree_tag_clear(&mapping->page_tree,
488 					     page_index(page),
489 					     PAGECACHE_TAG_DIRTY);
490 			spin_unlock_irq(&mapping->tree_lock);
491 			return clear_page_dirty_for_io(page);
492 		}
493 		spin_unlock_irq(&mapping->tree_lock);
494 		return 0;
495 	}
496 	return TestClearPageDirty(page);
497 }
498 
499 /**
500  * nilfs_find_uncommitted_extent - find extent of uncommitted data
501  * @inode: inode
502  * @start_blk: start block offset (in)
503  * @blkoff: start offset of the found extent (out)
504  *
505  * This function searches an extent of buffers marked "delayed" which
506  * starts from a block offset equal to or larger than @start_blk.  If
507  * such an extent was found, this will store the start offset in
508  * @blkoff and return its length in blocks.  Otherwise, zero is
509  * returned.
510  */
511 unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
512 					    sector_t start_blk,
513 					    sector_t *blkoff)
514 {
515 	unsigned int i;
516 	pgoff_t index;
517 	unsigned int nblocks_in_page;
518 	unsigned long length = 0;
519 	sector_t b;
520 	struct pagevec pvec;
521 	struct page *page;
522 
523 	if (inode->i_mapping->nrpages == 0)
524 		return 0;
525 
526 	index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
527 	nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
528 
529 	pagevec_init(&pvec, 0);
530 
531 repeat:
532 	pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
533 					pvec.pages);
534 	if (pvec.nr == 0)
535 		return length;
536 
537 	if (length > 0 && pvec.pages[0]->index > index)
538 		goto out;
539 
540 	b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
541 	i = 0;
542 	do {
543 		page = pvec.pages[i];
544 
545 		lock_page(page);
546 		if (page_has_buffers(page)) {
547 			struct buffer_head *bh, *head;
548 
549 			bh = head = page_buffers(page);
550 			do {
551 				if (b < start_blk)
552 					continue;
553 				if (buffer_delay(bh)) {
554 					if (length == 0)
555 						*blkoff = b;
556 					length++;
557 				} else if (length > 0) {
558 					goto out_locked;
559 				}
560 			} while (++b, bh = bh->b_this_page, bh != head);
561 		} else {
562 			if (length > 0)
563 				goto out_locked;
564 
565 			b += nblocks_in_page;
566 		}
567 		unlock_page(page);
568 
569 	} while (++i < pagevec_count(&pvec));
570 
571 	index = page->index + 1;
572 	pagevec_release(&pvec);
573 	cond_resched();
574 	goto repeat;
575 
576 out_locked:
577 	unlock_page(page);
578 out:
579 	pagevec_release(&pvec);
580 	return length;
581 }
582