1 /* 2 * btnode.c - NILFS B-tree node cache 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * This file was originally written by Seiji Kihara <kihara@osrg.net> 21 * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for 22 * stabilization and simplification. 23 * 24 */ 25 26 #include <linux/types.h> 27 #include <linux/buffer_head.h> 28 #include <linux/mm.h> 29 #include <linux/backing-dev.h> 30 #include <linux/gfp.h> 31 #include "nilfs.h" 32 #include "mdt.h" 33 #include "dat.h" 34 #include "page.h" 35 #include "btnode.h" 36 37 void nilfs_btnode_cache_init(struct address_space *btnc, 38 struct backing_dev_info *bdi) 39 { 40 nilfs_mapping_init(btnc, bdi); 41 } 42 43 void nilfs_btnode_cache_clear(struct address_space *btnc) 44 { 45 invalidate_mapping_pages(btnc, 0, -1); 46 truncate_inode_pages(btnc, 0); 47 } 48 49 struct buffer_head * 50 nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) 51 { 52 struct inode *inode = NILFS_BTNC_I(btnc); 53 struct buffer_head *bh; 54 55 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); 56 if (unlikely(!bh)) 57 return NULL; 58 59 if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || 60 buffer_dirty(bh))) { 61 brelse(bh); 62 BUG(); 63 } 64 memset(bh->b_data, 0, 1 << inode->i_blkbits); 65 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 66 bh->b_blocknr = blocknr; 67 set_buffer_mapped(bh); 68 set_buffer_uptodate(bh); 69 70 unlock_page(bh->b_page); 71 page_cache_release(bh->b_page); 72 return bh; 73 } 74 75 int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, 76 sector_t pblocknr, int mode, 77 struct buffer_head **pbh, sector_t *submit_ptr) 78 { 79 struct buffer_head *bh; 80 struct inode *inode = NILFS_BTNC_I(btnc); 81 struct page *page; 82 int err; 83 84 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); 85 if (unlikely(!bh)) 86 return -ENOMEM; 87 88 err = -EEXIST; /* internal code */ 89 page = bh->b_page; 90 91 if (buffer_uptodate(bh) || buffer_dirty(bh)) 92 goto found; 93 94 if (pblocknr == 0) { 95 pblocknr = blocknr; 96 if (inode->i_ino != NILFS_DAT_INO) { 97 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; 98 99 /* blocknr is a virtual block number */ 100 err = nilfs_dat_translate(dat, blocknr, &pblocknr); 101 if (unlikely(err)) { 102 brelse(bh); 103 goto out_locked; 104 } 105 } 106 } 107 108 if (mode == READA) { 109 if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) { 110 err = -EBUSY; /* internal code */ 111 brelse(bh); 112 goto out_locked; 113 } 114 } else { /* mode == READ */ 115 lock_buffer(bh); 116 } 117 if (buffer_uptodate(bh)) { 118 unlock_buffer(bh); 119 err = -EEXIST; /* internal code */ 120 goto found; 121 } 122 set_buffer_mapped(bh); 123 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 124 bh->b_blocknr = pblocknr; /* set block address for read */ 125 bh->b_end_io = end_buffer_read_sync; 126 get_bh(bh); 127 submit_bh(mode, bh); 128 bh->b_blocknr = blocknr; /* set back to the given block address */ 129 *submit_ptr = pblocknr; 130 err = 0; 131 found: 132 *pbh = bh; 133 134 out_locked: 135 unlock_page(page); 136 page_cache_release(page); 137 return err; 138 } 139 140 /** 141 * nilfs_btnode_delete - delete B-tree node buffer 142 * @bh: buffer to be deleted 143 * 144 * nilfs_btnode_delete() invalidates the specified buffer and delete the page 145 * including the buffer if the page gets unbusy. 146 */ 147 void nilfs_btnode_delete(struct buffer_head *bh) 148 { 149 struct address_space *mapping; 150 struct page *page = bh->b_page; 151 pgoff_t index = page_index(page); 152 int still_dirty; 153 154 page_cache_get(page); 155 lock_page(page); 156 wait_on_page_writeback(page); 157 158 nilfs_forget_buffer(bh); 159 still_dirty = PageDirty(page); 160 mapping = page->mapping; 161 unlock_page(page); 162 page_cache_release(page); 163 164 if (!still_dirty && mapping) 165 invalidate_inode_pages2_range(mapping, index, index); 166 } 167 168 /** 169 * nilfs_btnode_prepare_change_key 170 * prepare to move contents of the block for old key to one of new key. 171 * the old buffer will not be removed, but might be reused for new buffer. 172 * it might return -ENOMEM because of memory allocation errors, 173 * and might return -EIO because of disk read errors. 174 */ 175 int nilfs_btnode_prepare_change_key(struct address_space *btnc, 176 struct nilfs_btnode_chkey_ctxt *ctxt) 177 { 178 struct buffer_head *obh, *nbh; 179 struct inode *inode = NILFS_BTNC_I(btnc); 180 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; 181 int err; 182 183 if (oldkey == newkey) 184 return 0; 185 186 obh = ctxt->bh; 187 ctxt->newbh = NULL; 188 189 if (inode->i_blkbits == PAGE_CACHE_SHIFT) { 190 lock_page(obh->b_page); 191 /* 192 * We cannot call radix_tree_preload for the kernels older 193 * than 2.6.23, because it is not exported for modules. 194 */ 195 retry: 196 err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 197 if (err) 198 goto failed_unlock; 199 /* BUG_ON(oldkey != obh->b_page->index); */ 200 if (unlikely(oldkey != obh->b_page->index)) 201 NILFS_PAGE_BUG(obh->b_page, 202 "invalid oldkey %lld (newkey=%lld)", 203 (unsigned long long)oldkey, 204 (unsigned long long)newkey); 205 206 spin_lock_irq(&btnc->tree_lock); 207 err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); 208 spin_unlock_irq(&btnc->tree_lock); 209 /* 210 * Note: page->index will not change to newkey until 211 * nilfs_btnode_commit_change_key() will be called. 212 * To protect the page in intermediate state, the page lock 213 * is held. 214 */ 215 radix_tree_preload_end(); 216 if (!err) 217 return 0; 218 else if (err != -EEXIST) 219 goto failed_unlock; 220 221 err = invalidate_inode_pages2_range(btnc, newkey, newkey); 222 if (!err) 223 goto retry; 224 /* fallback to copy mode */ 225 unlock_page(obh->b_page); 226 } 227 228 nbh = nilfs_btnode_create_block(btnc, newkey); 229 if (!nbh) 230 return -ENOMEM; 231 232 BUG_ON(nbh == obh); 233 ctxt->newbh = nbh; 234 return 0; 235 236 failed_unlock: 237 unlock_page(obh->b_page); 238 return err; 239 } 240 241 /** 242 * nilfs_btnode_commit_change_key 243 * commit the change_key operation prepared by prepare_change_key(). 244 */ 245 void nilfs_btnode_commit_change_key(struct address_space *btnc, 246 struct nilfs_btnode_chkey_ctxt *ctxt) 247 { 248 struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; 249 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; 250 struct page *opage; 251 252 if (oldkey == newkey) 253 return; 254 255 if (nbh == NULL) { /* blocksize == pagesize */ 256 opage = obh->b_page; 257 if (unlikely(oldkey != opage->index)) 258 NILFS_PAGE_BUG(opage, 259 "invalid oldkey %lld (newkey=%lld)", 260 (unsigned long long)oldkey, 261 (unsigned long long)newkey); 262 nilfs_btnode_mark_dirty(obh); 263 264 spin_lock_irq(&btnc->tree_lock); 265 radix_tree_delete(&btnc->page_tree, oldkey); 266 radix_tree_tag_set(&btnc->page_tree, newkey, 267 PAGECACHE_TAG_DIRTY); 268 spin_unlock_irq(&btnc->tree_lock); 269 270 opage->index = obh->b_blocknr = newkey; 271 unlock_page(opage); 272 } else { 273 nilfs_copy_buffer(nbh, obh); 274 nilfs_btnode_mark_dirty(nbh); 275 276 nbh->b_blocknr = newkey; 277 ctxt->bh = nbh; 278 nilfs_btnode_delete(obh); /* will decrement bh->b_count */ 279 } 280 } 281 282 /** 283 * nilfs_btnode_abort_change_key 284 * abort the change_key operation prepared by prepare_change_key(). 285 */ 286 void nilfs_btnode_abort_change_key(struct address_space *btnc, 287 struct nilfs_btnode_chkey_ctxt *ctxt) 288 { 289 struct buffer_head *nbh = ctxt->newbh; 290 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; 291 292 if (oldkey == newkey) 293 return; 294 295 if (nbh == NULL) { /* blocksize == pagesize */ 296 spin_lock_irq(&btnc->tree_lock); 297 radix_tree_delete(&btnc->page_tree, newkey); 298 spin_unlock_irq(&btnc->tree_lock); 299 unlock_page(ctxt->bh->b_page); 300 } else 301 brelse(nbh); 302 } 303