1 /* 2 * segment.c - NILFS segment constructor. 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net> 21 * 22 */ 23 24 #include <linux/pagemap.h> 25 #include <linux/buffer_head.h> 26 #include <linux/writeback.h> 27 #include <linux/bio.h> 28 #include <linux/completion.h> 29 #include <linux/blkdev.h> 30 #include <linux/backing-dev.h> 31 #include <linux/freezer.h> 32 #include <linux/kthread.h> 33 #include <linux/crc32.h> 34 #include <linux/pagevec.h> 35 #include <linux/slab.h> 36 #include "nilfs.h" 37 #include "btnode.h" 38 #include "page.h" 39 #include "segment.h" 40 #include "sufile.h" 41 #include "cpfile.h" 42 #include "ifile.h" 43 #include "segbuf.h" 44 45 46 /* 47 * Segment constructor 48 */ 49 #define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */ 50 51 #define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments 52 appended in collection retry loop */ 53 54 /* Construction mode */ 55 enum { 56 SC_LSEG_SR = 1, /* Make a logical segment having a super root */ 57 SC_LSEG_DSYNC, /* Flush data blocks of a given file and make 58 a logical segment without a super root */ 59 SC_FLUSH_FILE, /* Flush data files, leads to segment writes without 60 creating a checkpoint */ 61 SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without 62 a checkpoint */ 63 }; 64 65 /* Stage numbers of dirty block collection */ 66 enum { 67 NILFS_ST_INIT = 0, 68 NILFS_ST_GC, /* Collecting dirty blocks for GC */ 69 NILFS_ST_FILE, 70 NILFS_ST_IFILE, 71 NILFS_ST_CPFILE, 72 NILFS_ST_SUFILE, 73 NILFS_ST_DAT, 74 NILFS_ST_SR, /* Super root */ 75 NILFS_ST_DSYNC, /* Data sync blocks */ 76 NILFS_ST_DONE, 77 }; 78 79 /* State flags of collection */ 80 #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ 81 #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ 82 #define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */ 83 #define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED) 84 85 /* Operations depending on the construction mode and file type */ 86 struct nilfs_sc_operations { 87 int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *, 88 struct inode *); 89 int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *, 90 struct inode *); 91 int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *, 92 struct inode *); 93 void (*write_data_binfo)(struct nilfs_sc_info *, 94 struct nilfs_segsum_pointer *, 95 union nilfs_binfo *); 96 void (*write_node_binfo)(struct nilfs_sc_info *, 97 struct nilfs_segsum_pointer *, 98 union nilfs_binfo *); 99 }; 100 101 /* 102 * Other definitions 103 */ 104 static void nilfs_segctor_start_timer(struct nilfs_sc_info *); 105 static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); 106 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); 107 static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, 108 int); 109 110 #define nilfs_cnt32_gt(a, b) \ 111 (typecheck(__u32, a) && typecheck(__u32, b) && \ 112 ((__s32)(b) - (__s32)(a) < 0)) 113 #define nilfs_cnt32_ge(a, b) \ 114 (typecheck(__u32, a) && typecheck(__u32, b) && \ 115 ((__s32)(a) - (__s32)(b) >= 0)) 116 #define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) 117 #define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) 118 119 static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) 120 { 121 struct nilfs_transaction_info *cur_ti = current->journal_info; 122 void *save = NULL; 123 124 if (cur_ti) { 125 if (cur_ti->ti_magic == NILFS_TI_MAGIC) 126 return ++cur_ti->ti_count; 127 else { 128 /* 129 * If journal_info field is occupied by other FS, 130 * it is saved and will be restored on 131 * nilfs_transaction_commit(). 132 */ 133 printk(KERN_WARNING 134 "NILFS warning: journal info from a different " 135 "FS\n"); 136 save = current->journal_info; 137 } 138 } 139 if (!ti) { 140 ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS); 141 if (!ti) 142 return -ENOMEM; 143 ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC; 144 } else { 145 ti->ti_flags = 0; 146 } 147 ti->ti_count = 0; 148 ti->ti_save = save; 149 ti->ti_magic = NILFS_TI_MAGIC; 150 current->journal_info = ti; 151 return 0; 152 } 153 154 /** 155 * nilfs_transaction_begin - start indivisible file operations. 156 * @sb: super block 157 * @ti: nilfs_transaction_info 158 * @vacancy_check: flags for vacancy rate checks 159 * 160 * nilfs_transaction_begin() acquires a reader/writer semaphore, called 161 * the segment semaphore, to make a segment construction and write tasks 162 * exclusive. The function is used with nilfs_transaction_commit() in pairs. 163 * The region enclosed by these two functions can be nested. To avoid a 164 * deadlock, the semaphore is only acquired or released in the outermost call. 165 * 166 * This function allocates a nilfs_transaction_info struct to keep context 167 * information on it. It is initialized and hooked onto the current task in 168 * the outermost call. If a pre-allocated struct is given to @ti, it is used 169 * instead; otherwise a new struct is assigned from a slab. 170 * 171 * When @vacancy_check flag is set, this function will check the amount of 172 * free space, and will wait for the GC to reclaim disk space if low capacity. 173 * 174 * Return Value: On success, 0 is returned. On error, one of the following 175 * negative error code is returned. 176 * 177 * %-ENOMEM - Insufficient memory available. 178 * 179 * %-ENOSPC - No space left on device 180 */ 181 int nilfs_transaction_begin(struct super_block *sb, 182 struct nilfs_transaction_info *ti, 183 int vacancy_check) 184 { 185 struct nilfs_sb_info *sbi; 186 struct the_nilfs *nilfs; 187 int ret = nilfs_prepare_segment_lock(ti); 188 189 if (unlikely(ret < 0)) 190 return ret; 191 if (ret > 0) 192 return 0; 193 194 vfs_check_frozen(sb, SB_FREEZE_WRITE); 195 196 sbi = NILFS_SB(sb); 197 nilfs = sbi->s_nilfs; 198 down_read(&nilfs->ns_segctor_sem); 199 if (vacancy_check && nilfs_near_disk_full(nilfs)) { 200 up_read(&nilfs->ns_segctor_sem); 201 ret = -ENOSPC; 202 goto failed; 203 } 204 return 0; 205 206 failed: 207 ti = current->journal_info; 208 current->journal_info = ti->ti_save; 209 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 210 kmem_cache_free(nilfs_transaction_cachep, ti); 211 return ret; 212 } 213 214 /** 215 * nilfs_transaction_commit - commit indivisible file operations. 216 * @sb: super block 217 * 218 * nilfs_transaction_commit() releases the read semaphore which is 219 * acquired by nilfs_transaction_begin(). This is only performed 220 * in outermost call of this function. If a commit flag is set, 221 * nilfs_transaction_commit() sets a timer to start the segment 222 * constructor. If a sync flag is set, it starts construction 223 * directly. 224 */ 225 int nilfs_transaction_commit(struct super_block *sb) 226 { 227 struct nilfs_transaction_info *ti = current->journal_info; 228 struct nilfs_sb_info *sbi; 229 struct nilfs_sc_info *sci; 230 int err = 0; 231 232 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 233 ti->ti_flags |= NILFS_TI_COMMIT; 234 if (ti->ti_count > 0) { 235 ti->ti_count--; 236 return 0; 237 } 238 sbi = NILFS_SB(sb); 239 sci = NILFS_SC(sbi); 240 if (sci != NULL) { 241 if (ti->ti_flags & NILFS_TI_COMMIT) 242 nilfs_segctor_start_timer(sci); 243 if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > 244 sci->sc_watermark) 245 nilfs_segctor_do_flush(sci, 0); 246 } 247 up_read(&sbi->s_nilfs->ns_segctor_sem); 248 current->journal_info = ti->ti_save; 249 250 if (ti->ti_flags & NILFS_TI_SYNC) 251 err = nilfs_construct_segment(sb); 252 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 253 kmem_cache_free(nilfs_transaction_cachep, ti); 254 return err; 255 } 256 257 void nilfs_transaction_abort(struct super_block *sb) 258 { 259 struct nilfs_transaction_info *ti = current->journal_info; 260 261 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 262 if (ti->ti_count > 0) { 263 ti->ti_count--; 264 return; 265 } 266 up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); 267 268 current->journal_info = ti->ti_save; 269 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 270 kmem_cache_free(nilfs_transaction_cachep, ti); 271 } 272 273 void nilfs_relax_pressure_in_lock(struct super_block *sb) 274 { 275 struct nilfs_sb_info *sbi = NILFS_SB(sb); 276 struct nilfs_sc_info *sci = NILFS_SC(sbi); 277 struct the_nilfs *nilfs = sbi->s_nilfs; 278 279 if (!sci || !sci->sc_flush_request) 280 return; 281 282 set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); 283 up_read(&nilfs->ns_segctor_sem); 284 285 down_write(&nilfs->ns_segctor_sem); 286 if (sci->sc_flush_request && 287 test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) { 288 struct nilfs_transaction_info *ti = current->journal_info; 289 290 ti->ti_flags |= NILFS_TI_WRITER; 291 nilfs_segctor_do_immediate_flush(sci); 292 ti->ti_flags &= ~NILFS_TI_WRITER; 293 } 294 downgrade_write(&nilfs->ns_segctor_sem); 295 } 296 297 static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, 298 struct nilfs_transaction_info *ti, 299 int gcflag) 300 { 301 struct nilfs_transaction_info *cur_ti = current->journal_info; 302 303 WARN_ON(cur_ti); 304 ti->ti_flags = NILFS_TI_WRITER; 305 ti->ti_count = 0; 306 ti->ti_save = cur_ti; 307 ti->ti_magic = NILFS_TI_MAGIC; 308 INIT_LIST_HEAD(&ti->ti_garbage); 309 current->journal_info = ti; 310 311 for (;;) { 312 down_write(&sbi->s_nilfs->ns_segctor_sem); 313 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) 314 break; 315 316 nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); 317 318 up_write(&sbi->s_nilfs->ns_segctor_sem); 319 yield(); 320 } 321 if (gcflag) 322 ti->ti_flags |= NILFS_TI_GC; 323 } 324 325 static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) 326 { 327 struct nilfs_transaction_info *ti = current->journal_info; 328 329 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 330 BUG_ON(ti->ti_count > 0); 331 332 up_write(&sbi->s_nilfs->ns_segctor_sem); 333 current->journal_info = ti->ti_save; 334 if (!list_empty(&ti->ti_garbage)) 335 nilfs_dispose_list(sbi, &ti->ti_garbage, 0); 336 } 337 338 static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 339 struct nilfs_segsum_pointer *ssp, 340 unsigned bytes) 341 { 342 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 343 unsigned blocksize = sci->sc_super->s_blocksize; 344 void *p; 345 346 if (unlikely(ssp->offset + bytes > blocksize)) { 347 ssp->offset = 0; 348 BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh, 349 &segbuf->sb_segsum_buffers)); 350 ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh); 351 } 352 p = ssp->bh->b_data + ssp->offset; 353 ssp->offset += bytes; 354 return p; 355 } 356 357 /** 358 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer 359 * @sci: nilfs_sc_info 360 */ 361 static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) 362 { 363 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 364 struct buffer_head *sumbh; 365 unsigned sumbytes; 366 unsigned flags = 0; 367 int err; 368 369 if (nilfs_doing_gc()) 370 flags = NILFS_SS_GC; 371 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno); 372 if (unlikely(err)) 373 return err; 374 375 sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); 376 sumbytes = segbuf->sb_sum.sumbytes; 377 sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes; 378 sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes; 379 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; 380 return 0; 381 } 382 383 static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) 384 { 385 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; 386 if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs)) 387 return -E2BIG; /* The current segment is filled up 388 (internal code) */ 389 sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); 390 return nilfs_segctor_reset_segment_buffer(sci); 391 } 392 393 static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) 394 { 395 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 396 int err; 397 398 if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) { 399 err = nilfs_segctor_feed_segment(sci); 400 if (err) 401 return err; 402 segbuf = sci->sc_curseg; 403 } 404 err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root); 405 if (likely(!err)) 406 segbuf->sb_sum.flags |= NILFS_SS_SR; 407 return err; 408 } 409 410 /* 411 * Functions for making segment summary and payloads 412 */ 413 static int nilfs_segctor_segsum_block_required( 414 struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp, 415 unsigned binfo_size) 416 { 417 unsigned blocksize = sci->sc_super->s_blocksize; 418 /* Size of finfo and binfo is enough small against blocksize */ 419 420 return ssp->offset + binfo_size + 421 (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) > 422 blocksize; 423 } 424 425 static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, 426 struct inode *inode) 427 { 428 sci->sc_curseg->sb_sum.nfinfo++; 429 sci->sc_binfo_ptr = sci->sc_finfo_ptr; 430 nilfs_segctor_map_segsum_entry( 431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 432 433 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 434 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 435 /* skip finfo */ 436 } 437 438 static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, 439 struct inode *inode) 440 { 441 struct nilfs_finfo *finfo; 442 struct nilfs_inode_info *ii; 443 struct nilfs_segment_buffer *segbuf; 444 __u64 cno; 445 446 if (sci->sc_blk_cnt == 0) 447 return; 448 449 ii = NILFS_I(inode); 450 451 if (test_bit(NILFS_I_GCINODE, &ii->i_state)) 452 cno = ii->i_cno; 453 else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) 454 cno = 0; 455 else 456 cno = sci->sc_cno; 457 458 finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, 459 sizeof(*finfo)); 460 finfo->fi_ino = cpu_to_le64(inode->i_ino); 461 finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); 462 finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); 463 finfo->fi_cno = cpu_to_le64(cno); 464 465 segbuf = sci->sc_curseg; 466 segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + 467 sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1); 468 sci->sc_finfo_ptr = sci->sc_binfo_ptr; 469 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; 470 } 471 472 static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, 473 struct buffer_head *bh, 474 struct inode *inode, 475 unsigned binfo_size) 476 { 477 struct nilfs_segment_buffer *segbuf; 478 int required, err = 0; 479 480 retry: 481 segbuf = sci->sc_curseg; 482 required = nilfs_segctor_segsum_block_required( 483 sci, &sci->sc_binfo_ptr, binfo_size); 484 if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) { 485 nilfs_segctor_end_finfo(sci, inode); 486 err = nilfs_segctor_feed_segment(sci); 487 if (err) 488 return err; 489 goto retry; 490 } 491 if (unlikely(required)) { 492 err = nilfs_segbuf_extend_segsum(segbuf); 493 if (unlikely(err)) 494 goto failed; 495 } 496 if (sci->sc_blk_cnt == 0) 497 nilfs_segctor_begin_finfo(sci, inode); 498 499 nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size); 500 /* Substitution to vblocknr is delayed until update_blocknr() */ 501 nilfs_segbuf_add_file_buffer(segbuf, bh); 502 sci->sc_blk_cnt++; 503 failed: 504 return err; 505 } 506 507 static int nilfs_handle_bmap_error(int err, const char *fname, 508 struct inode *inode, struct super_block *sb) 509 { 510 if (err == -EINVAL) { 511 nilfs_error(sb, fname, "broken bmap (inode=%lu)\n", 512 inode->i_ino); 513 err = -EIO; 514 } 515 return err; 516 } 517 518 /* 519 * Callback functions that enumerate, mark, and collect dirty blocks 520 */ 521 static int nilfs_collect_file_data(struct nilfs_sc_info *sci, 522 struct buffer_head *bh, struct inode *inode) 523 { 524 int err; 525 526 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 527 if (unlikely(err < 0)) 528 return nilfs_handle_bmap_error(err, __func__, inode, 529 sci->sc_super); 530 531 err = nilfs_segctor_add_file_block(sci, bh, inode, 532 sizeof(struct nilfs_binfo_v)); 533 if (!err) 534 sci->sc_datablk_cnt++; 535 return err; 536 } 537 538 static int nilfs_collect_file_node(struct nilfs_sc_info *sci, 539 struct buffer_head *bh, 540 struct inode *inode) 541 { 542 int err; 543 544 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 545 if (unlikely(err < 0)) 546 return nilfs_handle_bmap_error(err, __func__, inode, 547 sci->sc_super); 548 return 0; 549 } 550 551 static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, 552 struct buffer_head *bh, 553 struct inode *inode) 554 { 555 WARN_ON(!buffer_dirty(bh)); 556 return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 557 } 558 559 static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci, 560 struct nilfs_segsum_pointer *ssp, 561 union nilfs_binfo *binfo) 562 { 563 struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry( 564 sci, ssp, sizeof(*binfo_v)); 565 *binfo_v = binfo->bi_v; 566 } 567 568 static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, 569 struct nilfs_segsum_pointer *ssp, 570 union nilfs_binfo *binfo) 571 { 572 __le64 *vblocknr = nilfs_segctor_map_segsum_entry( 573 sci, ssp, sizeof(*vblocknr)); 574 *vblocknr = binfo->bi_v.bi_vblocknr; 575 } 576 577 static struct nilfs_sc_operations nilfs_sc_file_ops = { 578 .collect_data = nilfs_collect_file_data, 579 .collect_node = nilfs_collect_file_node, 580 .collect_bmap = nilfs_collect_file_bmap, 581 .write_data_binfo = nilfs_write_file_data_binfo, 582 .write_node_binfo = nilfs_write_file_node_binfo, 583 }; 584 585 static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, 586 struct buffer_head *bh, struct inode *inode) 587 { 588 int err; 589 590 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 591 if (unlikely(err < 0)) 592 return nilfs_handle_bmap_error(err, __func__, inode, 593 sci->sc_super); 594 595 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 596 if (!err) 597 sci->sc_datablk_cnt++; 598 return err; 599 } 600 601 static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, 602 struct buffer_head *bh, struct inode *inode) 603 { 604 WARN_ON(!buffer_dirty(bh)); 605 return nilfs_segctor_add_file_block(sci, bh, inode, 606 sizeof(struct nilfs_binfo_dat)); 607 } 608 609 static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci, 610 struct nilfs_segsum_pointer *ssp, 611 union nilfs_binfo *binfo) 612 { 613 __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp, 614 sizeof(*blkoff)); 615 *blkoff = binfo->bi_dat.bi_blkoff; 616 } 617 618 static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, 619 struct nilfs_segsum_pointer *ssp, 620 union nilfs_binfo *binfo) 621 { 622 struct nilfs_binfo_dat *binfo_dat = 623 nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat)); 624 *binfo_dat = binfo->bi_dat; 625 } 626 627 static struct nilfs_sc_operations nilfs_sc_dat_ops = { 628 .collect_data = nilfs_collect_dat_data, 629 .collect_node = nilfs_collect_file_node, 630 .collect_bmap = nilfs_collect_dat_bmap, 631 .write_data_binfo = nilfs_write_dat_data_binfo, 632 .write_node_binfo = nilfs_write_dat_node_binfo, 633 }; 634 635 static struct nilfs_sc_operations nilfs_sc_dsync_ops = { 636 .collect_data = nilfs_collect_file_data, 637 .collect_node = NULL, 638 .collect_bmap = NULL, 639 .write_data_binfo = nilfs_write_file_data_binfo, 640 .write_node_binfo = NULL, 641 }; 642 643 static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, 644 struct list_head *listp, 645 size_t nlimit, 646 loff_t start, loff_t end) 647 { 648 struct address_space *mapping = inode->i_mapping; 649 struct pagevec pvec; 650 pgoff_t index = 0, last = ULONG_MAX; 651 size_t ndirties = 0; 652 int i; 653 654 if (unlikely(start != 0 || end != LLONG_MAX)) { 655 /* 656 * A valid range is given for sync-ing data pages. The 657 * range is rounded to per-page; extra dirty buffers 658 * may be included if blocksize < pagesize. 659 */ 660 index = start >> PAGE_SHIFT; 661 last = end >> PAGE_SHIFT; 662 } 663 pagevec_init(&pvec, 0); 664 repeat: 665 if (unlikely(index > last) || 666 !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 667 min_t(pgoff_t, last - index, 668 PAGEVEC_SIZE - 1) + 1)) 669 return ndirties; 670 671 for (i = 0; i < pagevec_count(&pvec); i++) { 672 struct buffer_head *bh, *head; 673 struct page *page = pvec.pages[i]; 674 675 if (unlikely(page->index > last)) 676 break; 677 678 if (mapping->host) { 679 lock_page(page); 680 if (!page_has_buffers(page)) 681 create_empty_buffers(page, 682 1 << inode->i_blkbits, 0); 683 unlock_page(page); 684 } 685 686 bh = head = page_buffers(page); 687 do { 688 if (!buffer_dirty(bh)) 689 continue; 690 get_bh(bh); 691 list_add_tail(&bh->b_assoc_buffers, listp); 692 ndirties++; 693 if (unlikely(ndirties >= nlimit)) { 694 pagevec_release(&pvec); 695 cond_resched(); 696 return ndirties; 697 } 698 } while (bh = bh->b_this_page, bh != head); 699 } 700 pagevec_release(&pvec); 701 cond_resched(); 702 goto repeat; 703 } 704 705 static void nilfs_lookup_dirty_node_buffers(struct inode *inode, 706 struct list_head *listp) 707 { 708 struct nilfs_inode_info *ii = NILFS_I(inode); 709 struct address_space *mapping = &ii->i_btnode_cache; 710 struct pagevec pvec; 711 struct buffer_head *bh, *head; 712 unsigned int i; 713 pgoff_t index = 0; 714 715 pagevec_init(&pvec, 0); 716 717 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 718 PAGEVEC_SIZE)) { 719 for (i = 0; i < pagevec_count(&pvec); i++) { 720 bh = head = page_buffers(pvec.pages[i]); 721 do { 722 if (buffer_dirty(bh)) { 723 get_bh(bh); 724 list_add_tail(&bh->b_assoc_buffers, 725 listp); 726 } 727 bh = bh->b_this_page; 728 } while (bh != head); 729 } 730 pagevec_release(&pvec); 731 cond_resched(); 732 } 733 } 734 735 static void nilfs_dispose_list(struct nilfs_sb_info *sbi, 736 struct list_head *head, int force) 737 { 738 struct nilfs_inode_info *ii, *n; 739 struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii; 740 unsigned nv = 0; 741 742 while (!list_empty(head)) { 743 spin_lock(&sbi->s_inode_lock); 744 list_for_each_entry_safe(ii, n, head, i_dirty) { 745 list_del_init(&ii->i_dirty); 746 if (force) { 747 if (unlikely(ii->i_bh)) { 748 brelse(ii->i_bh); 749 ii->i_bh = NULL; 750 } 751 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { 752 set_bit(NILFS_I_QUEUED, &ii->i_state); 753 list_add_tail(&ii->i_dirty, 754 &sbi->s_dirty_files); 755 continue; 756 } 757 ivec[nv++] = ii; 758 if (nv == SC_N_INODEVEC) 759 break; 760 } 761 spin_unlock(&sbi->s_inode_lock); 762 763 for (pii = ivec; nv > 0; pii++, nv--) 764 iput(&(*pii)->vfs_inode); 765 } 766 } 767 768 static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, 769 struct nilfs_root *root) 770 { 771 int ret = 0; 772 773 if (nilfs_mdt_fetch_dirty(root->ifile)) 774 ret++; 775 if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) 776 ret++; 777 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) 778 ret++; 779 if (ret || nilfs_doing_gc()) 780 if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) 781 ret++; 782 return ret; 783 } 784 785 static int nilfs_segctor_clean(struct nilfs_sc_info *sci) 786 { 787 return list_empty(&sci->sc_dirty_files) && 788 !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && 789 sci->sc_nfreesegs == 0 && 790 (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); 791 } 792 793 static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) 794 { 795 struct nilfs_sb_info *sbi = sci->sc_sbi; 796 int ret = 0; 797 798 if (nilfs_test_metadata_dirty(sbi->s_nilfs, sci->sc_root)) 799 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 800 801 spin_lock(&sbi->s_inode_lock); 802 if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) 803 ret++; 804 805 spin_unlock(&sbi->s_inode_lock); 806 return ret; 807 } 808 809 static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) 810 { 811 struct nilfs_sb_info *sbi = sci->sc_sbi; 812 struct the_nilfs *nilfs = sbi->s_nilfs; 813 814 nilfs_mdt_clear_dirty(sci->sc_root->ifile); 815 nilfs_mdt_clear_dirty(nilfs->ns_cpfile); 816 nilfs_mdt_clear_dirty(nilfs->ns_sufile); 817 nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); 818 } 819 820 static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) 821 { 822 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 823 struct buffer_head *bh_cp; 824 struct nilfs_checkpoint *raw_cp; 825 int err; 826 827 /* XXX: this interface will be changed */ 828 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, 829 &raw_cp, &bh_cp); 830 if (likely(!err)) { 831 /* The following code is duplicated with cpfile. But, it is 832 needed to collect the checkpoint even if it was not newly 833 created */ 834 nilfs_mdt_mark_buffer_dirty(bh_cp); 835 nilfs_mdt_mark_dirty(nilfs->ns_cpfile); 836 nilfs_cpfile_put_checkpoint( 837 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 838 } else 839 WARN_ON(err == -EINVAL || err == -ENOENT); 840 841 return err; 842 } 843 844 static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) 845 { 846 struct nilfs_sb_info *sbi = sci->sc_sbi; 847 struct the_nilfs *nilfs = sbi->s_nilfs; 848 struct buffer_head *bh_cp; 849 struct nilfs_checkpoint *raw_cp; 850 int err; 851 852 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, 853 &raw_cp, &bh_cp); 854 if (unlikely(err)) { 855 WARN_ON(err == -EINVAL || err == -ENOENT); 856 goto failed_ibh; 857 } 858 raw_cp->cp_snapshot_list.ssl_next = 0; 859 raw_cp->cp_snapshot_list.ssl_prev = 0; 860 raw_cp->cp_inodes_count = 861 cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); 862 raw_cp->cp_blocks_count = 863 cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); 864 raw_cp->cp_nblk_inc = 865 cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); 866 raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); 867 raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); 868 869 if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 870 nilfs_checkpoint_clear_minor(raw_cp); 871 else 872 nilfs_checkpoint_set_minor(raw_cp); 873 874 nilfs_write_inode_common(sci->sc_root->ifile, 875 &raw_cp->cp_ifile_inode, 1); 876 nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 877 return 0; 878 879 failed_ibh: 880 return err; 881 } 882 883 static void nilfs_fill_in_file_bmap(struct inode *ifile, 884 struct nilfs_inode_info *ii) 885 886 { 887 struct buffer_head *ibh; 888 struct nilfs_inode *raw_inode; 889 890 if (test_bit(NILFS_I_BMAP, &ii->i_state)) { 891 ibh = ii->i_bh; 892 BUG_ON(!ibh); 893 raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, 894 ibh); 895 nilfs_bmap_write(ii->i_bmap, raw_inode); 896 nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); 897 } 898 } 899 900 static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) 901 { 902 struct nilfs_inode_info *ii; 903 904 list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { 905 nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii); 906 set_bit(NILFS_I_COLLECTED, &ii->i_state); 907 } 908 } 909 910 static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, 911 struct the_nilfs *nilfs) 912 { 913 struct buffer_head *bh_sr; 914 struct nilfs_super_root *raw_sr; 915 unsigned isz = nilfs->ns_inode_size; 916 917 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; 918 raw_sr = (struct nilfs_super_root *)bh_sr->b_data; 919 920 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); 921 raw_sr->sr_nongc_ctime 922 = cpu_to_le64(nilfs_doing_gc() ? 923 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 924 raw_sr->sr_flags = 0; 925 926 nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr + 927 NILFS_SR_DAT_OFFSET(isz), 1); 928 nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + 929 NILFS_SR_CPFILE_OFFSET(isz), 1); 930 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + 931 NILFS_SR_SUFILE_OFFSET(isz), 1); 932 } 933 934 static void nilfs_redirty_inodes(struct list_head *head) 935 { 936 struct nilfs_inode_info *ii; 937 938 list_for_each_entry(ii, head, i_dirty) { 939 if (test_bit(NILFS_I_COLLECTED, &ii->i_state)) 940 clear_bit(NILFS_I_COLLECTED, &ii->i_state); 941 } 942 } 943 944 static void nilfs_drop_collected_inodes(struct list_head *head) 945 { 946 struct nilfs_inode_info *ii; 947 948 list_for_each_entry(ii, head, i_dirty) { 949 if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state)) 950 continue; 951 952 clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state); 953 set_bit(NILFS_I_UPDATED, &ii->i_state); 954 } 955 } 956 957 static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, 958 struct inode *inode, 959 struct list_head *listp, 960 int (*collect)(struct nilfs_sc_info *, 961 struct buffer_head *, 962 struct inode *)) 963 { 964 struct buffer_head *bh, *n; 965 int err = 0; 966 967 if (collect) { 968 list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) { 969 list_del_init(&bh->b_assoc_buffers); 970 err = collect(sci, bh, inode); 971 brelse(bh); 972 if (unlikely(err)) 973 goto dispose_buffers; 974 } 975 return 0; 976 } 977 978 dispose_buffers: 979 while (!list_empty(listp)) { 980 bh = list_entry(listp->next, struct buffer_head, 981 b_assoc_buffers); 982 list_del_init(&bh->b_assoc_buffers); 983 brelse(bh); 984 } 985 return err; 986 } 987 988 static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci) 989 { 990 /* Remaining number of blocks within segment buffer */ 991 return sci->sc_segbuf_nblocks - 992 (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks); 993 } 994 995 static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, 996 struct inode *inode, 997 struct nilfs_sc_operations *sc_ops) 998 { 999 LIST_HEAD(data_buffers); 1000 LIST_HEAD(node_buffers); 1001 int err; 1002 1003 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { 1004 size_t n, rest = nilfs_segctor_buffer_rest(sci); 1005 1006 n = nilfs_lookup_dirty_data_buffers( 1007 inode, &data_buffers, rest + 1, 0, LLONG_MAX); 1008 if (n > rest) { 1009 err = nilfs_segctor_apply_buffers( 1010 sci, inode, &data_buffers, 1011 sc_ops->collect_data); 1012 BUG_ON(!err); /* always receive -E2BIG or true error */ 1013 goto break_or_fail; 1014 } 1015 } 1016 nilfs_lookup_dirty_node_buffers(inode, &node_buffers); 1017 1018 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { 1019 err = nilfs_segctor_apply_buffers( 1020 sci, inode, &data_buffers, sc_ops->collect_data); 1021 if (unlikely(err)) { 1022 /* dispose node list */ 1023 nilfs_segctor_apply_buffers( 1024 sci, inode, &node_buffers, NULL); 1025 goto break_or_fail; 1026 } 1027 sci->sc_stage.flags |= NILFS_CF_NODE; 1028 } 1029 /* Collect node */ 1030 err = nilfs_segctor_apply_buffers( 1031 sci, inode, &node_buffers, sc_ops->collect_node); 1032 if (unlikely(err)) 1033 goto break_or_fail; 1034 1035 nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers); 1036 err = nilfs_segctor_apply_buffers( 1037 sci, inode, &node_buffers, sc_ops->collect_bmap); 1038 if (unlikely(err)) 1039 goto break_or_fail; 1040 1041 nilfs_segctor_end_finfo(sci, inode); 1042 sci->sc_stage.flags &= ~NILFS_CF_NODE; 1043 1044 break_or_fail: 1045 return err; 1046 } 1047 1048 static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, 1049 struct inode *inode) 1050 { 1051 LIST_HEAD(data_buffers); 1052 size_t n, rest = nilfs_segctor_buffer_rest(sci); 1053 int err; 1054 1055 n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1, 1056 sci->sc_dsync_start, 1057 sci->sc_dsync_end); 1058 1059 err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, 1060 nilfs_collect_file_data); 1061 if (!err) { 1062 nilfs_segctor_end_finfo(sci, inode); 1063 BUG_ON(n > rest); 1064 /* always receive -E2BIG or true error if n > rest */ 1065 } 1066 return err; 1067 } 1068 1069 static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) 1070 { 1071 struct nilfs_sb_info *sbi = sci->sc_sbi; 1072 struct the_nilfs *nilfs = sbi->s_nilfs; 1073 struct list_head *head; 1074 struct nilfs_inode_info *ii; 1075 size_t ndone; 1076 int err = 0; 1077 1078 switch (sci->sc_stage.scnt) { 1079 case NILFS_ST_INIT: 1080 /* Pre-processes */ 1081 sci->sc_stage.flags = 0; 1082 1083 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) { 1084 sci->sc_nblk_inc = 0; 1085 sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; 1086 if (mode == SC_LSEG_DSYNC) { 1087 sci->sc_stage.scnt = NILFS_ST_DSYNC; 1088 goto dsync_mode; 1089 } 1090 } 1091 1092 sci->sc_stage.dirty_file_ptr = NULL; 1093 sci->sc_stage.gc_inode_ptr = NULL; 1094 if (mode == SC_FLUSH_DAT) { 1095 sci->sc_stage.scnt = NILFS_ST_DAT; 1096 goto dat_stage; 1097 } 1098 sci->sc_stage.scnt++; /* Fall through */ 1099 case NILFS_ST_GC: 1100 if (nilfs_doing_gc()) { 1101 head = &sci->sc_gc_inodes; 1102 ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr, 1103 head, i_dirty); 1104 list_for_each_entry_continue(ii, head, i_dirty) { 1105 err = nilfs_segctor_scan_file( 1106 sci, &ii->vfs_inode, 1107 &nilfs_sc_file_ops); 1108 if (unlikely(err)) { 1109 sci->sc_stage.gc_inode_ptr = list_entry( 1110 ii->i_dirty.prev, 1111 struct nilfs_inode_info, 1112 i_dirty); 1113 goto break_or_fail; 1114 } 1115 set_bit(NILFS_I_COLLECTED, &ii->i_state); 1116 } 1117 sci->sc_stage.gc_inode_ptr = NULL; 1118 } 1119 sci->sc_stage.scnt++; /* Fall through */ 1120 case NILFS_ST_FILE: 1121 head = &sci->sc_dirty_files; 1122 ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, 1123 i_dirty); 1124 list_for_each_entry_continue(ii, head, i_dirty) { 1125 clear_bit(NILFS_I_DIRTY, &ii->i_state); 1126 1127 err = nilfs_segctor_scan_file(sci, &ii->vfs_inode, 1128 &nilfs_sc_file_ops); 1129 if (unlikely(err)) { 1130 sci->sc_stage.dirty_file_ptr = 1131 list_entry(ii->i_dirty.prev, 1132 struct nilfs_inode_info, 1133 i_dirty); 1134 goto break_or_fail; 1135 } 1136 /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */ 1137 /* XXX: required ? */ 1138 } 1139 sci->sc_stage.dirty_file_ptr = NULL; 1140 if (mode == SC_FLUSH_FILE) { 1141 sci->sc_stage.scnt = NILFS_ST_DONE; 1142 return 0; 1143 } 1144 sci->sc_stage.scnt++; 1145 sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; 1146 /* Fall through */ 1147 case NILFS_ST_IFILE: 1148 err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile, 1149 &nilfs_sc_file_ops); 1150 if (unlikely(err)) 1151 break; 1152 sci->sc_stage.scnt++; 1153 /* Creating a checkpoint */ 1154 err = nilfs_segctor_create_checkpoint(sci); 1155 if (unlikely(err)) 1156 break; 1157 /* Fall through */ 1158 case NILFS_ST_CPFILE: 1159 err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile, 1160 &nilfs_sc_file_ops); 1161 if (unlikely(err)) 1162 break; 1163 sci->sc_stage.scnt++; /* Fall through */ 1164 case NILFS_ST_SUFILE: 1165 err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, 1166 sci->sc_nfreesegs, &ndone); 1167 if (unlikely(err)) { 1168 nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1169 sci->sc_freesegs, ndone, 1170 NULL); 1171 break; 1172 } 1173 sci->sc_stage.flags |= NILFS_CF_SUFREED; 1174 1175 err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, 1176 &nilfs_sc_file_ops); 1177 if (unlikely(err)) 1178 break; 1179 sci->sc_stage.scnt++; /* Fall through */ 1180 case NILFS_ST_DAT: 1181 dat_stage: 1182 err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), 1183 &nilfs_sc_dat_ops); 1184 if (unlikely(err)) 1185 break; 1186 if (mode == SC_FLUSH_DAT) { 1187 sci->sc_stage.scnt = NILFS_ST_DONE; 1188 return 0; 1189 } 1190 sci->sc_stage.scnt++; /* Fall through */ 1191 case NILFS_ST_SR: 1192 if (mode == SC_LSEG_SR) { 1193 /* Appending a super root */ 1194 err = nilfs_segctor_add_super_root(sci); 1195 if (unlikely(err)) 1196 break; 1197 } 1198 /* End of a logical segment */ 1199 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1200 sci->sc_stage.scnt = NILFS_ST_DONE; 1201 return 0; 1202 case NILFS_ST_DSYNC: 1203 dsync_mode: 1204 sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; 1205 ii = sci->sc_dsync_inode; 1206 if (!test_bit(NILFS_I_BUSY, &ii->i_state)) 1207 break; 1208 1209 err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); 1210 if (unlikely(err)) 1211 break; 1212 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1213 sci->sc_stage.scnt = NILFS_ST_DONE; 1214 return 0; 1215 case NILFS_ST_DONE: 1216 return 0; 1217 default: 1218 BUG(); 1219 } 1220 1221 break_or_fail: 1222 return err; 1223 } 1224 1225 /** 1226 * nilfs_segctor_begin_construction - setup segment buffer to make a new log 1227 * @sci: nilfs_sc_info 1228 * @nilfs: nilfs object 1229 */ 1230 static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, 1231 struct the_nilfs *nilfs) 1232 { 1233 struct nilfs_segment_buffer *segbuf, *prev; 1234 __u64 nextnum; 1235 int err, alloc = 0; 1236 1237 segbuf = nilfs_segbuf_new(sci->sc_super); 1238 if (unlikely(!segbuf)) 1239 return -ENOMEM; 1240 1241 if (list_empty(&sci->sc_write_logs)) { 1242 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 1243 nilfs->ns_pseg_offset, nilfs); 1244 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1245 nilfs_shift_to_next_segment(nilfs); 1246 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); 1247 } 1248 1249 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; 1250 nextnum = nilfs->ns_nextnum; 1251 1252 if (nilfs->ns_segnum == nilfs->ns_nextnum) 1253 /* Start from the head of a new full segment */ 1254 alloc++; 1255 } else { 1256 /* Continue logs */ 1257 prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs); 1258 nilfs_segbuf_map_cont(segbuf, prev); 1259 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq; 1260 nextnum = prev->sb_nextnum; 1261 1262 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1263 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); 1264 segbuf->sb_sum.seg_seq++; 1265 alloc++; 1266 } 1267 } 1268 1269 err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum); 1270 if (err) 1271 goto failed; 1272 1273 if (alloc) { 1274 err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); 1275 if (err) 1276 goto failed; 1277 } 1278 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); 1279 1280 BUG_ON(!list_empty(&sci->sc_segbufs)); 1281 list_add_tail(&segbuf->sb_list, &sci->sc_segbufs); 1282 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; 1283 return 0; 1284 1285 failed: 1286 nilfs_segbuf_free(segbuf); 1287 return err; 1288 } 1289 1290 static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, 1291 struct the_nilfs *nilfs, int nadd) 1292 { 1293 struct nilfs_segment_buffer *segbuf, *prev; 1294 struct inode *sufile = nilfs->ns_sufile; 1295 __u64 nextnextnum; 1296 LIST_HEAD(list); 1297 int err, ret, i; 1298 1299 prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs); 1300 /* 1301 * Since the segment specified with nextnum might be allocated during 1302 * the previous construction, the buffer including its segusage may 1303 * not be dirty. The following call ensures that the buffer is dirty 1304 * and will pin the buffer on memory until the sufile is written. 1305 */ 1306 err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum); 1307 if (unlikely(err)) 1308 return err; 1309 1310 for (i = 0; i < nadd; i++) { 1311 /* extend segment info */ 1312 err = -ENOMEM; 1313 segbuf = nilfs_segbuf_new(sci->sc_super); 1314 if (unlikely(!segbuf)) 1315 goto failed; 1316 1317 /* map this buffer to region of segment on-disk */ 1318 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); 1319 sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; 1320 1321 /* allocate the next next full segment */ 1322 err = nilfs_sufile_alloc(sufile, &nextnextnum); 1323 if (unlikely(err)) 1324 goto failed_segbuf; 1325 1326 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1; 1327 nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs); 1328 1329 list_add_tail(&segbuf->sb_list, &list); 1330 prev = segbuf; 1331 } 1332 list_splice_tail(&list, &sci->sc_segbufs); 1333 return 0; 1334 1335 failed_segbuf: 1336 nilfs_segbuf_free(segbuf); 1337 failed: 1338 list_for_each_entry(segbuf, &list, sb_list) { 1339 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1340 WARN_ON(ret); /* never fails */ 1341 } 1342 nilfs_destroy_logs(&list); 1343 return err; 1344 } 1345 1346 static void nilfs_free_incomplete_logs(struct list_head *logs, 1347 struct the_nilfs *nilfs) 1348 { 1349 struct nilfs_segment_buffer *segbuf, *prev; 1350 struct inode *sufile = nilfs->ns_sufile; 1351 int ret; 1352 1353 segbuf = NILFS_FIRST_SEGBUF(logs); 1354 if (nilfs->ns_nextnum != segbuf->sb_nextnum) { 1355 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1356 WARN_ON(ret); /* never fails */ 1357 } 1358 if (atomic_read(&segbuf->sb_err)) { 1359 /* Case 1: The first segment failed */ 1360 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) 1361 /* Case 1a: Partial segment appended into an existing 1362 segment */ 1363 nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start, 1364 segbuf->sb_fseg_end); 1365 else /* Case 1b: New full segment */ 1366 set_nilfs_discontinued(nilfs); 1367 } 1368 1369 prev = segbuf; 1370 list_for_each_entry_continue(segbuf, logs, sb_list) { 1371 if (prev->sb_nextnum != segbuf->sb_nextnum) { 1372 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1373 WARN_ON(ret); /* never fails */ 1374 } 1375 if (atomic_read(&segbuf->sb_err) && 1376 segbuf->sb_segnum != nilfs->ns_nextnum) 1377 /* Case 2: extended segment (!= next) failed */ 1378 nilfs_sufile_set_error(sufile, segbuf->sb_segnum); 1379 prev = segbuf; 1380 } 1381 } 1382 1383 static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, 1384 struct inode *sufile) 1385 { 1386 struct nilfs_segment_buffer *segbuf; 1387 unsigned long live_blocks; 1388 int ret; 1389 1390 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1391 live_blocks = segbuf->sb_sum.nblocks + 1392 (segbuf->sb_pseg_start - segbuf->sb_fseg_start); 1393 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1394 live_blocks, 1395 sci->sc_seg_ctime); 1396 WARN_ON(ret); /* always succeed because the segusage is dirty */ 1397 } 1398 } 1399 1400 static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile) 1401 { 1402 struct nilfs_segment_buffer *segbuf; 1403 int ret; 1404 1405 segbuf = NILFS_FIRST_SEGBUF(logs); 1406 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1407 segbuf->sb_pseg_start - 1408 segbuf->sb_fseg_start, 0); 1409 WARN_ON(ret); /* always succeed because the segusage is dirty */ 1410 1411 list_for_each_entry_continue(segbuf, logs, sb_list) { 1412 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1413 0, 0); 1414 WARN_ON(ret); /* always succeed */ 1415 } 1416 } 1417 1418 static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, 1419 struct nilfs_segment_buffer *last, 1420 struct inode *sufile) 1421 { 1422 struct nilfs_segment_buffer *segbuf = last; 1423 int ret; 1424 1425 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { 1426 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; 1427 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1428 WARN_ON(ret); 1429 } 1430 nilfs_truncate_logs(&sci->sc_segbufs, last); 1431 } 1432 1433 1434 static int nilfs_segctor_collect(struct nilfs_sc_info *sci, 1435 struct the_nilfs *nilfs, int mode) 1436 { 1437 struct nilfs_cstage prev_stage = sci->sc_stage; 1438 int err, nadd = 1; 1439 1440 /* Collection retry loop */ 1441 for (;;) { 1442 sci->sc_nblk_this_inc = 0; 1443 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1444 1445 err = nilfs_segctor_reset_segment_buffer(sci); 1446 if (unlikely(err)) 1447 goto failed; 1448 1449 err = nilfs_segctor_collect_blocks(sci, mode); 1450 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; 1451 if (!err) 1452 break; 1453 1454 if (unlikely(err != -E2BIG)) 1455 goto failed; 1456 1457 /* The current segment is filled up */ 1458 if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) 1459 break; 1460 1461 nilfs_clear_logs(&sci->sc_segbufs); 1462 1463 err = nilfs_segctor_extend_segments(sci, nilfs, nadd); 1464 if (unlikely(err)) 1465 return err; 1466 1467 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1468 err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1469 sci->sc_freesegs, 1470 sci->sc_nfreesegs, 1471 NULL); 1472 WARN_ON(err); /* do not happen */ 1473 } 1474 nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); 1475 sci->sc_stage = prev_stage; 1476 } 1477 nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); 1478 return 0; 1479 1480 failed: 1481 return err; 1482 } 1483 1484 static void nilfs_list_replace_buffer(struct buffer_head *old_bh, 1485 struct buffer_head *new_bh) 1486 { 1487 BUG_ON(!list_empty(&new_bh->b_assoc_buffers)); 1488 1489 list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers); 1490 /* The caller must release old_bh */ 1491 } 1492 1493 static int 1494 nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, 1495 struct nilfs_segment_buffer *segbuf, 1496 int mode) 1497 { 1498 struct inode *inode = NULL; 1499 sector_t blocknr; 1500 unsigned long nfinfo = segbuf->sb_sum.nfinfo; 1501 unsigned long nblocks = 0, ndatablk = 0; 1502 struct nilfs_sc_operations *sc_op = NULL; 1503 struct nilfs_segsum_pointer ssp; 1504 struct nilfs_finfo *finfo = NULL; 1505 union nilfs_binfo binfo; 1506 struct buffer_head *bh, *bh_org; 1507 ino_t ino = 0; 1508 int err = 0; 1509 1510 if (!nfinfo) 1511 goto out; 1512 1513 blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk; 1514 ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); 1515 ssp.offset = sizeof(struct nilfs_segment_summary); 1516 1517 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 1518 if (bh == segbuf->sb_super_root) 1519 break; 1520 if (!finfo) { 1521 finfo = nilfs_segctor_map_segsum_entry( 1522 sci, &ssp, sizeof(*finfo)); 1523 ino = le64_to_cpu(finfo->fi_ino); 1524 nblocks = le32_to_cpu(finfo->fi_nblocks); 1525 ndatablk = le32_to_cpu(finfo->fi_ndatablk); 1526 1527 if (buffer_nilfs_node(bh)) 1528 inode = NILFS_BTNC_I(bh->b_page->mapping); 1529 else 1530 inode = NILFS_AS_I(bh->b_page->mapping); 1531 1532 if (mode == SC_LSEG_DSYNC) 1533 sc_op = &nilfs_sc_dsync_ops; 1534 else if (ino == NILFS_DAT_INO) 1535 sc_op = &nilfs_sc_dat_ops; 1536 else /* file blocks */ 1537 sc_op = &nilfs_sc_file_ops; 1538 } 1539 bh_org = bh; 1540 get_bh(bh_org); 1541 err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr, 1542 &binfo); 1543 if (bh != bh_org) 1544 nilfs_list_replace_buffer(bh_org, bh); 1545 brelse(bh_org); 1546 if (unlikely(err)) 1547 goto failed_bmap; 1548 1549 if (ndatablk > 0) 1550 sc_op->write_data_binfo(sci, &ssp, &binfo); 1551 else 1552 sc_op->write_node_binfo(sci, &ssp, &binfo); 1553 1554 blocknr++; 1555 if (--nblocks == 0) { 1556 finfo = NULL; 1557 if (--nfinfo == 0) 1558 break; 1559 } else if (ndatablk > 0) 1560 ndatablk--; 1561 } 1562 out: 1563 return 0; 1564 1565 failed_bmap: 1566 err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super); 1567 return err; 1568 } 1569 1570 static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) 1571 { 1572 struct nilfs_segment_buffer *segbuf; 1573 int err; 1574 1575 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1576 err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode); 1577 if (unlikely(err)) 1578 return err; 1579 nilfs_segbuf_fill_in_segsum(segbuf); 1580 } 1581 return 0; 1582 } 1583 1584 static int 1585 nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) 1586 { 1587 struct page *clone_page; 1588 struct buffer_head *bh, *head, *bh2; 1589 void *kaddr; 1590 1591 bh = head = page_buffers(page); 1592 1593 clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0); 1594 if (unlikely(!clone_page)) 1595 return -ENOMEM; 1596 1597 bh2 = page_buffers(clone_page); 1598 kaddr = kmap_atomic(page, KM_USER0); 1599 do { 1600 if (list_empty(&bh->b_assoc_buffers)) 1601 continue; 1602 get_bh(bh2); 1603 page_cache_get(clone_page); /* for each bh */ 1604 memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size); 1605 bh2->b_blocknr = bh->b_blocknr; 1606 list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers); 1607 list_add_tail(&bh->b_assoc_buffers, out); 1608 } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head); 1609 kunmap_atomic(kaddr, KM_USER0); 1610 1611 if (!TestSetPageWriteback(clone_page)) 1612 account_page_writeback(clone_page); 1613 unlock_page(clone_page); 1614 1615 return 0; 1616 } 1617 1618 static int nilfs_test_page_to_be_frozen(struct page *page) 1619 { 1620 struct address_space *mapping = page->mapping; 1621 1622 if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode)) 1623 return 0; 1624 1625 if (page_mapped(page)) { 1626 ClearPageChecked(page); 1627 return 1; 1628 } 1629 return PageChecked(page); 1630 } 1631 1632 static int nilfs_begin_page_io(struct page *page, struct list_head *out) 1633 { 1634 if (!page || PageWriteback(page)) 1635 /* For split b-tree node pages, this function may be called 1636 twice. We ignore the 2nd or later calls by this check. */ 1637 return 0; 1638 1639 lock_page(page); 1640 clear_page_dirty_for_io(page); 1641 set_page_writeback(page); 1642 unlock_page(page); 1643 1644 if (nilfs_test_page_to_be_frozen(page)) { 1645 int err = nilfs_copy_replace_page_buffers(page, out); 1646 if (unlikely(err)) 1647 return err; 1648 } 1649 return 0; 1650 } 1651 1652 static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, 1653 struct page **failed_page) 1654 { 1655 struct nilfs_segment_buffer *segbuf; 1656 struct page *bd_page = NULL, *fs_page = NULL; 1657 struct list_head *list = &sci->sc_copied_buffers; 1658 int err; 1659 1660 *failed_page = NULL; 1661 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1662 struct buffer_head *bh; 1663 1664 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1665 b_assoc_buffers) { 1666 if (bh->b_page != bd_page) { 1667 if (bd_page) { 1668 lock_page(bd_page); 1669 clear_page_dirty_for_io(bd_page); 1670 set_page_writeback(bd_page); 1671 unlock_page(bd_page); 1672 } 1673 bd_page = bh->b_page; 1674 } 1675 } 1676 1677 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1678 b_assoc_buffers) { 1679 if (bh == segbuf->sb_super_root) { 1680 if (bh->b_page != bd_page) { 1681 lock_page(bd_page); 1682 clear_page_dirty_for_io(bd_page); 1683 set_page_writeback(bd_page); 1684 unlock_page(bd_page); 1685 bd_page = bh->b_page; 1686 } 1687 break; 1688 } 1689 if (bh->b_page != fs_page) { 1690 err = nilfs_begin_page_io(fs_page, list); 1691 if (unlikely(err)) { 1692 *failed_page = fs_page; 1693 goto out; 1694 } 1695 fs_page = bh->b_page; 1696 } 1697 } 1698 } 1699 if (bd_page) { 1700 lock_page(bd_page); 1701 clear_page_dirty_for_io(bd_page); 1702 set_page_writeback(bd_page); 1703 unlock_page(bd_page); 1704 } 1705 err = nilfs_begin_page_io(fs_page, list); 1706 if (unlikely(err)) 1707 *failed_page = fs_page; 1708 out: 1709 return err; 1710 } 1711 1712 static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1713 struct the_nilfs *nilfs) 1714 { 1715 int ret; 1716 1717 ret = nilfs_write_logs(&sci->sc_segbufs, nilfs); 1718 list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs); 1719 return ret; 1720 } 1721 1722 static void __nilfs_end_page_io(struct page *page, int err) 1723 { 1724 if (!err) { 1725 if (!nilfs_page_buffers_clean(page)) 1726 __set_page_dirty_nobuffers(page); 1727 ClearPageError(page); 1728 } else { 1729 __set_page_dirty_nobuffers(page); 1730 SetPageError(page); 1731 } 1732 1733 if (buffer_nilfs_allocated(page_buffers(page))) { 1734 if (TestClearPageWriteback(page)) 1735 dec_zone_page_state(page, NR_WRITEBACK); 1736 } else 1737 end_page_writeback(page); 1738 } 1739 1740 static void nilfs_end_page_io(struct page *page, int err) 1741 { 1742 if (!page) 1743 return; 1744 1745 if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) { 1746 /* 1747 * For b-tree node pages, this function may be called twice 1748 * or more because they might be split in a segment. 1749 */ 1750 if (PageDirty(page)) { 1751 /* 1752 * For pages holding split b-tree node buffers, dirty 1753 * flag on the buffers may be cleared discretely. 1754 * In that case, the page is once redirtied for 1755 * remaining buffers, and it must be cancelled if 1756 * all the buffers get cleaned later. 1757 */ 1758 lock_page(page); 1759 if (nilfs_page_buffers_clean(page)) 1760 __nilfs_clear_page_dirty(page); 1761 unlock_page(page); 1762 } 1763 return; 1764 } 1765 1766 __nilfs_end_page_io(page, err); 1767 } 1768 1769 static void nilfs_clear_copied_buffers(struct list_head *list, int err) 1770 { 1771 struct buffer_head *bh, *head; 1772 struct page *page; 1773 1774 while (!list_empty(list)) { 1775 bh = list_entry(list->next, struct buffer_head, 1776 b_assoc_buffers); 1777 page = bh->b_page; 1778 page_cache_get(page); 1779 head = bh = page_buffers(page); 1780 do { 1781 if (!list_empty(&bh->b_assoc_buffers)) { 1782 list_del_init(&bh->b_assoc_buffers); 1783 if (!err) { 1784 set_buffer_uptodate(bh); 1785 clear_buffer_dirty(bh); 1786 clear_buffer_nilfs_volatile(bh); 1787 } 1788 brelse(bh); /* for b_assoc_buffers */ 1789 } 1790 } while ((bh = bh->b_this_page) != head); 1791 1792 __nilfs_end_page_io(page, err); 1793 page_cache_release(page); 1794 } 1795 } 1796 1797 static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, 1798 int err) 1799 { 1800 struct nilfs_segment_buffer *segbuf; 1801 struct page *bd_page = NULL, *fs_page = NULL; 1802 struct buffer_head *bh; 1803 1804 if (list_empty(logs)) 1805 return; 1806 1807 list_for_each_entry(segbuf, logs, sb_list) { 1808 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1809 b_assoc_buffers) { 1810 if (bh->b_page != bd_page) { 1811 if (bd_page) 1812 end_page_writeback(bd_page); 1813 bd_page = bh->b_page; 1814 } 1815 } 1816 1817 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1818 b_assoc_buffers) { 1819 if (bh == segbuf->sb_super_root) { 1820 if (bh->b_page != bd_page) { 1821 end_page_writeback(bd_page); 1822 bd_page = bh->b_page; 1823 } 1824 break; 1825 } 1826 if (bh->b_page != fs_page) { 1827 nilfs_end_page_io(fs_page, err); 1828 if (fs_page && fs_page == failed_page) 1829 return; 1830 fs_page = bh->b_page; 1831 } 1832 } 1833 } 1834 if (bd_page) 1835 end_page_writeback(bd_page); 1836 1837 nilfs_end_page_io(fs_page, err); 1838 } 1839 1840 static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, 1841 struct the_nilfs *nilfs, int err) 1842 { 1843 LIST_HEAD(logs); 1844 int ret; 1845 1846 list_splice_tail_init(&sci->sc_write_logs, &logs); 1847 ret = nilfs_wait_on_logs(&logs); 1848 nilfs_abort_logs(&logs, NULL, ret ? : err); 1849 1850 list_splice_tail_init(&sci->sc_segbufs, &logs); 1851 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1852 nilfs_free_incomplete_logs(&logs, nilfs); 1853 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); 1854 1855 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1856 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1857 sci->sc_freesegs, 1858 sci->sc_nfreesegs, 1859 NULL); 1860 WARN_ON(ret); /* do not happen */ 1861 } 1862 1863 nilfs_destroy_logs(&logs); 1864 } 1865 1866 static void nilfs_set_next_segment(struct the_nilfs *nilfs, 1867 struct nilfs_segment_buffer *segbuf) 1868 { 1869 nilfs->ns_segnum = segbuf->sb_segnum; 1870 nilfs->ns_nextnum = segbuf->sb_nextnum; 1871 nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start 1872 + segbuf->sb_sum.nblocks; 1873 nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq; 1874 nilfs->ns_ctime = segbuf->sb_sum.ctime; 1875 } 1876 1877 static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) 1878 { 1879 struct nilfs_segment_buffer *segbuf; 1880 struct page *bd_page = NULL, *fs_page = NULL; 1881 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 1882 int update_sr = false; 1883 1884 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { 1885 struct buffer_head *bh; 1886 1887 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1888 b_assoc_buffers) { 1889 set_buffer_uptodate(bh); 1890 clear_buffer_dirty(bh); 1891 if (bh->b_page != bd_page) { 1892 if (bd_page) 1893 end_page_writeback(bd_page); 1894 bd_page = bh->b_page; 1895 } 1896 } 1897 /* 1898 * We assume that the buffers which belong to the same page 1899 * continue over the buffer list. 1900 * Under this assumption, the last BHs of pages is 1901 * identifiable by the discontinuity of bh->b_page 1902 * (page != fs_page). 1903 * 1904 * For B-tree node blocks, however, this assumption is not 1905 * guaranteed. The cleanup code of B-tree node pages needs 1906 * special care. 1907 */ 1908 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1909 b_assoc_buffers) { 1910 set_buffer_uptodate(bh); 1911 clear_buffer_dirty(bh); 1912 clear_buffer_nilfs_volatile(bh); 1913 clear_buffer_nilfs_redirected(bh); 1914 if (bh == segbuf->sb_super_root) { 1915 if (bh->b_page != bd_page) { 1916 end_page_writeback(bd_page); 1917 bd_page = bh->b_page; 1918 } 1919 update_sr = true; 1920 break; 1921 } 1922 if (bh->b_page != fs_page) { 1923 nilfs_end_page_io(fs_page, 0); 1924 fs_page = bh->b_page; 1925 } 1926 } 1927 1928 if (!nilfs_segbuf_simplex(segbuf)) { 1929 if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) { 1930 set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1931 sci->sc_lseg_stime = jiffies; 1932 } 1933 if (segbuf->sb_sum.flags & NILFS_SS_LOGEND) 1934 clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1935 } 1936 } 1937 /* 1938 * Since pages may continue over multiple segment buffers, 1939 * end of the last page must be checked outside of the loop. 1940 */ 1941 if (bd_page) 1942 end_page_writeback(bd_page); 1943 1944 nilfs_end_page_io(fs_page, 0); 1945 1946 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0); 1947 1948 nilfs_drop_collected_inodes(&sci->sc_dirty_files); 1949 1950 if (nilfs_doing_gc()) 1951 nilfs_drop_collected_inodes(&sci->sc_gc_inodes); 1952 else 1953 nilfs->ns_nongc_ctime = sci->sc_seg_ctime; 1954 1955 sci->sc_nblk_inc += sci->sc_nblk_this_inc; 1956 1957 segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs); 1958 nilfs_set_next_segment(nilfs, segbuf); 1959 1960 if (update_sr) { 1961 nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, 1962 segbuf->sb_sum.seg_seq, nilfs->ns_cno++); 1963 1964 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 1965 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); 1966 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 1967 nilfs_segctor_clear_metadata_dirty(sci); 1968 } else 1969 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 1970 } 1971 1972 static int nilfs_segctor_wait(struct nilfs_sc_info *sci) 1973 { 1974 int ret; 1975 1976 ret = nilfs_wait_on_logs(&sci->sc_write_logs); 1977 if (!ret) { 1978 nilfs_segctor_complete_write(sci); 1979 nilfs_destroy_logs(&sci->sc_write_logs); 1980 } 1981 return ret; 1982 } 1983 1984 static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, 1985 struct nilfs_sb_info *sbi) 1986 { 1987 struct nilfs_inode_info *ii, *n; 1988 struct inode *ifile = sci->sc_root->ifile; 1989 1990 spin_lock(&sbi->s_inode_lock); 1991 retry: 1992 list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { 1993 if (!ii->i_bh) { 1994 struct buffer_head *ibh; 1995 int err; 1996 1997 spin_unlock(&sbi->s_inode_lock); 1998 err = nilfs_ifile_get_inode_block( 1999 ifile, ii->vfs_inode.i_ino, &ibh); 2000 if (unlikely(err)) { 2001 nilfs_warning(sbi->s_super, __func__, 2002 "failed to get inode block.\n"); 2003 return err; 2004 } 2005 nilfs_mdt_mark_buffer_dirty(ibh); 2006 nilfs_mdt_mark_dirty(ifile); 2007 spin_lock(&sbi->s_inode_lock); 2008 if (likely(!ii->i_bh)) 2009 ii->i_bh = ibh; 2010 else 2011 brelse(ibh); 2012 goto retry; 2013 } 2014 2015 clear_bit(NILFS_I_QUEUED, &ii->i_state); 2016 set_bit(NILFS_I_BUSY, &ii->i_state); 2017 list_del(&ii->i_dirty); 2018 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); 2019 } 2020 spin_unlock(&sbi->s_inode_lock); 2021 2022 return 0; 2023 } 2024 2025 static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, 2026 struct nilfs_sb_info *sbi) 2027 { 2028 struct nilfs_transaction_info *ti = current->journal_info; 2029 struct nilfs_inode_info *ii, *n; 2030 2031 spin_lock(&sbi->s_inode_lock); 2032 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { 2033 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || 2034 test_bit(NILFS_I_DIRTY, &ii->i_state)) 2035 continue; 2036 2037 clear_bit(NILFS_I_BUSY, &ii->i_state); 2038 brelse(ii->i_bh); 2039 ii->i_bh = NULL; 2040 list_del(&ii->i_dirty); 2041 list_add_tail(&ii->i_dirty, &ti->ti_garbage); 2042 } 2043 spin_unlock(&sbi->s_inode_lock); 2044 } 2045 2046 /* 2047 * Main procedure of segment constructor 2048 */ 2049 static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 2050 { 2051 struct nilfs_sb_info *sbi = sci->sc_sbi; 2052 struct the_nilfs *nilfs = sbi->s_nilfs; 2053 struct page *failed_page; 2054 int err; 2055 2056 sci->sc_stage.scnt = NILFS_ST_INIT; 2057 sci->sc_cno = nilfs->ns_cno; 2058 2059 err = nilfs_segctor_check_in_files(sci, sbi); 2060 if (unlikely(err)) 2061 goto out; 2062 2063 if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) 2064 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 2065 2066 if (nilfs_segctor_clean(sci)) 2067 goto out; 2068 2069 do { 2070 sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK; 2071 2072 err = nilfs_segctor_begin_construction(sci, nilfs); 2073 if (unlikely(err)) 2074 goto out; 2075 2076 /* Update time stamp */ 2077 sci->sc_seg_ctime = get_seconds(); 2078 2079 err = nilfs_segctor_collect(sci, nilfs, mode); 2080 if (unlikely(err)) 2081 goto failed; 2082 2083 /* Avoid empty segment */ 2084 if (sci->sc_stage.scnt == NILFS_ST_DONE && 2085 nilfs_segbuf_empty(sci->sc_curseg)) { 2086 nilfs_segctor_abort_construction(sci, nilfs, 1); 2087 goto out; 2088 } 2089 2090 err = nilfs_segctor_assign(sci, mode); 2091 if (unlikely(err)) 2092 goto failed; 2093 2094 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2095 nilfs_segctor_fill_in_file_bmap(sci); 2096 2097 if (mode == SC_LSEG_SR && 2098 sci->sc_stage.scnt >= NILFS_ST_CPFILE) { 2099 err = nilfs_segctor_fill_in_checkpoint(sci); 2100 if (unlikely(err)) 2101 goto failed_to_write; 2102 2103 nilfs_segctor_fill_in_super_root(sci, nilfs); 2104 } 2105 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); 2106 2107 /* Write partial segments */ 2108 err = nilfs_segctor_prepare_write(sci, &failed_page); 2109 if (err) { 2110 nilfs_abort_logs(&sci->sc_segbufs, failed_page, err); 2111 goto failed_to_write; 2112 } 2113 2114 nilfs_add_checksums_on_logs(&sci->sc_segbufs, 2115 nilfs->ns_crc_seed); 2116 2117 err = nilfs_segctor_write(sci, nilfs); 2118 if (unlikely(err)) 2119 goto failed_to_write; 2120 2121 if (sci->sc_stage.scnt == NILFS_ST_DONE || 2122 nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { 2123 /* 2124 * At this point, we avoid double buffering 2125 * for blocksize < pagesize because page dirty 2126 * flag is turned off during write and dirty 2127 * buffers are not properly collected for 2128 * pages crossing over segments. 2129 */ 2130 err = nilfs_segctor_wait(sci); 2131 if (err) 2132 goto failed_to_write; 2133 } 2134 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2135 2136 out: 2137 nilfs_segctor_check_out_files(sci, sbi); 2138 return err; 2139 2140 failed_to_write: 2141 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2142 nilfs_redirty_inodes(&sci->sc_dirty_files); 2143 2144 failed: 2145 if (nilfs_doing_gc()) 2146 nilfs_redirty_inodes(&sci->sc_gc_inodes); 2147 nilfs_segctor_abort_construction(sci, nilfs, err); 2148 goto out; 2149 } 2150 2151 /** 2152 * nilfs_segctor_start_timer - set timer of background write 2153 * @sci: nilfs_sc_info 2154 * 2155 * If the timer has already been set, it ignores the new request. 2156 * This function MUST be called within a section locking the segment 2157 * semaphore. 2158 */ 2159 static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) 2160 { 2161 spin_lock(&sci->sc_state_lock); 2162 if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { 2163 sci->sc_timer.expires = jiffies + sci->sc_interval; 2164 add_timer(&sci->sc_timer); 2165 sci->sc_state |= NILFS_SEGCTOR_COMMIT; 2166 } 2167 spin_unlock(&sci->sc_state_lock); 2168 } 2169 2170 static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) 2171 { 2172 spin_lock(&sci->sc_state_lock); 2173 if (!(sci->sc_flush_request & (1 << bn))) { 2174 unsigned long prev_req = sci->sc_flush_request; 2175 2176 sci->sc_flush_request |= (1 << bn); 2177 if (!prev_req) 2178 wake_up(&sci->sc_wait_daemon); 2179 } 2180 spin_unlock(&sci->sc_state_lock); 2181 } 2182 2183 /** 2184 * nilfs_flush_segment - trigger a segment construction for resource control 2185 * @sb: super block 2186 * @ino: inode number of the file to be flushed out. 2187 */ 2188 void nilfs_flush_segment(struct super_block *sb, ino_t ino) 2189 { 2190 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2191 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2192 2193 if (!sci || nilfs_doing_construction()) 2194 return; 2195 nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); 2196 /* assign bit 0 to data files */ 2197 } 2198 2199 struct nilfs_segctor_wait_request { 2200 wait_queue_t wq; 2201 __u32 seq; 2202 int err; 2203 atomic_t done; 2204 }; 2205 2206 static int nilfs_segctor_sync(struct nilfs_sc_info *sci) 2207 { 2208 struct nilfs_segctor_wait_request wait_req; 2209 int err = 0; 2210 2211 spin_lock(&sci->sc_state_lock); 2212 init_wait(&wait_req.wq); 2213 wait_req.err = 0; 2214 atomic_set(&wait_req.done, 0); 2215 wait_req.seq = ++sci->sc_seq_request; 2216 spin_unlock(&sci->sc_state_lock); 2217 2218 init_waitqueue_entry(&wait_req.wq, current); 2219 add_wait_queue(&sci->sc_wait_request, &wait_req.wq); 2220 set_current_state(TASK_INTERRUPTIBLE); 2221 wake_up(&sci->sc_wait_daemon); 2222 2223 for (;;) { 2224 if (atomic_read(&wait_req.done)) { 2225 err = wait_req.err; 2226 break; 2227 } 2228 if (!signal_pending(current)) { 2229 schedule(); 2230 continue; 2231 } 2232 err = -ERESTARTSYS; 2233 break; 2234 } 2235 finish_wait(&sci->sc_wait_request, &wait_req.wq); 2236 return err; 2237 } 2238 2239 static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) 2240 { 2241 struct nilfs_segctor_wait_request *wrq, *n; 2242 unsigned long flags; 2243 2244 spin_lock_irqsave(&sci->sc_wait_request.lock, flags); 2245 list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, 2246 wq.task_list) { 2247 if (!atomic_read(&wrq->done) && 2248 nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { 2249 wrq->err = err; 2250 atomic_set(&wrq->done, 1); 2251 } 2252 if (atomic_read(&wrq->done)) { 2253 wrq->wq.func(&wrq->wq, 2254 TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 2255 0, NULL); 2256 } 2257 } 2258 spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags); 2259 } 2260 2261 /** 2262 * nilfs_construct_segment - construct a logical segment 2263 * @sb: super block 2264 * 2265 * Return Value: On success, 0 is retured. On errors, one of the following 2266 * negative error code is returned. 2267 * 2268 * %-EROFS - Read only filesystem. 2269 * 2270 * %-EIO - I/O error 2271 * 2272 * %-ENOSPC - No space left on device (only in a panic state). 2273 * 2274 * %-ERESTARTSYS - Interrupted. 2275 * 2276 * %-ENOMEM - Insufficient memory available. 2277 */ 2278 int nilfs_construct_segment(struct super_block *sb) 2279 { 2280 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2281 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2282 struct nilfs_transaction_info *ti; 2283 int err; 2284 2285 if (!sci) 2286 return -EROFS; 2287 2288 /* A call inside transactions causes a deadlock. */ 2289 BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); 2290 2291 err = nilfs_segctor_sync(sci); 2292 return err; 2293 } 2294 2295 /** 2296 * nilfs_construct_dsync_segment - construct a data-only logical segment 2297 * @sb: super block 2298 * @inode: inode whose data blocks should be written out 2299 * @start: start byte offset 2300 * @end: end byte offset (inclusive) 2301 * 2302 * Return Value: On success, 0 is retured. On errors, one of the following 2303 * negative error code is returned. 2304 * 2305 * %-EROFS - Read only filesystem. 2306 * 2307 * %-EIO - I/O error 2308 * 2309 * %-ENOSPC - No space left on device (only in a panic state). 2310 * 2311 * %-ERESTARTSYS - Interrupted. 2312 * 2313 * %-ENOMEM - Insufficient memory available. 2314 */ 2315 int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, 2316 loff_t start, loff_t end) 2317 { 2318 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2319 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2320 struct nilfs_inode_info *ii; 2321 struct nilfs_transaction_info ti; 2322 int err = 0; 2323 2324 if (!sci) 2325 return -EROFS; 2326 2327 nilfs_transaction_lock(sbi, &ti, 0); 2328 2329 ii = NILFS_I(inode); 2330 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || 2331 nilfs_test_opt(sbi, STRICT_ORDER) || 2332 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2333 nilfs_discontinued(sbi->s_nilfs)) { 2334 nilfs_transaction_unlock(sbi); 2335 err = nilfs_segctor_sync(sci); 2336 return err; 2337 } 2338 2339 spin_lock(&sbi->s_inode_lock); 2340 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 2341 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 2342 spin_unlock(&sbi->s_inode_lock); 2343 nilfs_transaction_unlock(sbi); 2344 return 0; 2345 } 2346 spin_unlock(&sbi->s_inode_lock); 2347 sci->sc_dsync_inode = ii; 2348 sci->sc_dsync_start = start; 2349 sci->sc_dsync_end = end; 2350 2351 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); 2352 2353 nilfs_transaction_unlock(sbi); 2354 return err; 2355 } 2356 2357 #define FLUSH_FILE_BIT (0x1) /* data file only */ 2358 #define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */ 2359 2360 /** 2361 * nilfs_segctor_accept - record accepted sequence count of log-write requests 2362 * @sci: segment constructor object 2363 */ 2364 static void nilfs_segctor_accept(struct nilfs_sc_info *sci) 2365 { 2366 spin_lock(&sci->sc_state_lock); 2367 sci->sc_seq_accepted = sci->sc_seq_request; 2368 spin_unlock(&sci->sc_state_lock); 2369 del_timer_sync(&sci->sc_timer); 2370 } 2371 2372 /** 2373 * nilfs_segctor_notify - notify the result of request to caller threads 2374 * @sci: segment constructor object 2375 * @mode: mode of log forming 2376 * @err: error code to be notified 2377 */ 2378 static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) 2379 { 2380 /* Clear requests (even when the construction failed) */ 2381 spin_lock(&sci->sc_state_lock); 2382 2383 if (mode == SC_LSEG_SR) { 2384 sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; 2385 sci->sc_seq_done = sci->sc_seq_accepted; 2386 nilfs_segctor_wakeup(sci, err); 2387 sci->sc_flush_request = 0; 2388 } else { 2389 if (mode == SC_FLUSH_FILE) 2390 sci->sc_flush_request &= ~FLUSH_FILE_BIT; 2391 else if (mode == SC_FLUSH_DAT) 2392 sci->sc_flush_request &= ~FLUSH_DAT_BIT; 2393 2394 /* re-enable timer if checkpoint creation was not done */ 2395 if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2396 time_before(jiffies, sci->sc_timer.expires)) 2397 add_timer(&sci->sc_timer); 2398 } 2399 spin_unlock(&sci->sc_state_lock); 2400 } 2401 2402 /** 2403 * nilfs_segctor_construct - form logs and write them to disk 2404 * @sci: segment constructor object 2405 * @mode: mode of log forming 2406 */ 2407 static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) 2408 { 2409 struct nilfs_sb_info *sbi = sci->sc_sbi; 2410 struct the_nilfs *nilfs = sbi->s_nilfs; 2411 struct nilfs_super_block **sbp; 2412 int err = 0; 2413 2414 nilfs_segctor_accept(sci); 2415 2416 if (nilfs_discontinued(nilfs)) 2417 mode = SC_LSEG_SR; 2418 if (!nilfs_segctor_confirm(sci)) 2419 err = nilfs_segctor_do_construct(sci, mode); 2420 2421 if (likely(!err)) { 2422 if (mode != SC_FLUSH_DAT) 2423 atomic_set(&nilfs->ns_ndirtyblks, 0); 2424 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && 2425 nilfs_discontinued(nilfs)) { 2426 down_write(&nilfs->ns_sem); 2427 err = -EIO; 2428 sbp = nilfs_prepare_super(sbi, 2429 nilfs_sb_will_flip(nilfs)); 2430 if (likely(sbp)) { 2431 nilfs_set_log_cursor(sbp[0], nilfs); 2432 err = nilfs_commit_super(sbi, NILFS_SB_COMMIT); 2433 } 2434 up_write(&nilfs->ns_sem); 2435 } 2436 } 2437 2438 nilfs_segctor_notify(sci, mode, err); 2439 return err; 2440 } 2441 2442 static void nilfs_construction_timeout(unsigned long data) 2443 { 2444 struct task_struct *p = (struct task_struct *)data; 2445 wake_up_process(p); 2446 } 2447 2448 static void 2449 nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) 2450 { 2451 struct nilfs_inode_info *ii, *n; 2452 2453 list_for_each_entry_safe(ii, n, head, i_dirty) { 2454 if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) 2455 continue; 2456 list_del_init(&ii->i_dirty); 2457 iput(&ii->vfs_inode); 2458 } 2459 } 2460 2461 int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, 2462 void **kbufs) 2463 { 2464 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2465 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2466 struct the_nilfs *nilfs = sbi->s_nilfs; 2467 struct nilfs_transaction_info ti; 2468 int err; 2469 2470 if (unlikely(!sci)) 2471 return -EROFS; 2472 2473 nilfs_transaction_lock(sbi, &ti, 1); 2474 2475 err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); 2476 if (unlikely(err)) 2477 goto out_unlock; 2478 2479 err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); 2480 if (unlikely(err)) { 2481 nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat); 2482 goto out_unlock; 2483 } 2484 2485 sci->sc_freesegs = kbufs[4]; 2486 sci->sc_nfreesegs = argv[4].v_nmembs; 2487 list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes); 2488 2489 for (;;) { 2490 err = nilfs_segctor_construct(sci, SC_LSEG_SR); 2491 nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes); 2492 2493 if (likely(!err)) 2494 break; 2495 2496 nilfs_warning(sb, __func__, 2497 "segment construction failed. (err=%d)", err); 2498 set_current_state(TASK_INTERRUPTIBLE); 2499 schedule_timeout(sci->sc_interval); 2500 } 2501 if (nilfs_test_opt(sbi, DISCARD)) { 2502 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, 2503 sci->sc_nfreesegs); 2504 if (ret) { 2505 printk(KERN_WARNING 2506 "NILFS warning: error %d on discard request, " 2507 "turning discards off for the device\n", ret); 2508 nilfs_clear_opt(sbi, DISCARD); 2509 } 2510 } 2511 2512 out_unlock: 2513 sci->sc_freesegs = NULL; 2514 sci->sc_nfreesegs = 0; 2515 nilfs_mdt_clear_shadow_map(nilfs->ns_dat); 2516 nilfs_transaction_unlock(sbi); 2517 return err; 2518 } 2519 2520 static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) 2521 { 2522 struct nilfs_sb_info *sbi = sci->sc_sbi; 2523 struct nilfs_transaction_info ti; 2524 2525 nilfs_transaction_lock(sbi, &ti, 0); 2526 nilfs_segctor_construct(sci, mode); 2527 2528 /* 2529 * Unclosed segment should be retried. We do this using sc_timer. 2530 * Timeout of sc_timer will invoke complete construction which leads 2531 * to close the current logical segment. 2532 */ 2533 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) 2534 nilfs_segctor_start_timer(sci); 2535 2536 nilfs_transaction_unlock(sbi); 2537 } 2538 2539 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) 2540 { 2541 int mode = 0; 2542 int err; 2543 2544 spin_lock(&sci->sc_state_lock); 2545 mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? 2546 SC_FLUSH_DAT : SC_FLUSH_FILE; 2547 spin_unlock(&sci->sc_state_lock); 2548 2549 if (mode) { 2550 err = nilfs_segctor_do_construct(sci, mode); 2551 2552 spin_lock(&sci->sc_state_lock); 2553 sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? 2554 ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT; 2555 spin_unlock(&sci->sc_state_lock); 2556 } 2557 clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); 2558 } 2559 2560 static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) 2561 { 2562 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2563 time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) { 2564 if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT)) 2565 return SC_FLUSH_FILE; 2566 else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT)) 2567 return SC_FLUSH_DAT; 2568 } 2569 return SC_LSEG_SR; 2570 } 2571 2572 /** 2573 * nilfs_segctor_thread - main loop of the segment constructor thread. 2574 * @arg: pointer to a struct nilfs_sc_info. 2575 * 2576 * nilfs_segctor_thread() initializes a timer and serves as a daemon 2577 * to execute segment constructions. 2578 */ 2579 static int nilfs_segctor_thread(void *arg) 2580 { 2581 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; 2582 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 2583 int timeout = 0; 2584 2585 sci->sc_timer.data = (unsigned long)current; 2586 sci->sc_timer.function = nilfs_construction_timeout; 2587 2588 /* start sync. */ 2589 sci->sc_task = current; 2590 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ 2591 printk(KERN_INFO 2592 "segctord starting. Construction interval = %lu seconds, " 2593 "CP frequency < %lu seconds\n", 2594 sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); 2595 2596 spin_lock(&sci->sc_state_lock); 2597 loop: 2598 for (;;) { 2599 int mode; 2600 2601 if (sci->sc_state & NILFS_SEGCTOR_QUIT) 2602 goto end_thread; 2603 2604 if (timeout || sci->sc_seq_request != sci->sc_seq_done) 2605 mode = SC_LSEG_SR; 2606 else if (!sci->sc_flush_request) 2607 break; 2608 else 2609 mode = nilfs_segctor_flush_mode(sci); 2610 2611 spin_unlock(&sci->sc_state_lock); 2612 nilfs_segctor_thread_construct(sci, mode); 2613 spin_lock(&sci->sc_state_lock); 2614 timeout = 0; 2615 } 2616 2617 2618 if (freezing(current)) { 2619 spin_unlock(&sci->sc_state_lock); 2620 refrigerator(); 2621 spin_lock(&sci->sc_state_lock); 2622 } else { 2623 DEFINE_WAIT(wait); 2624 int should_sleep = 1; 2625 2626 prepare_to_wait(&sci->sc_wait_daemon, &wait, 2627 TASK_INTERRUPTIBLE); 2628 2629 if (sci->sc_seq_request != sci->sc_seq_done) 2630 should_sleep = 0; 2631 else if (sci->sc_flush_request) 2632 should_sleep = 0; 2633 else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) 2634 should_sleep = time_before(jiffies, 2635 sci->sc_timer.expires); 2636 2637 if (should_sleep) { 2638 spin_unlock(&sci->sc_state_lock); 2639 schedule(); 2640 spin_lock(&sci->sc_state_lock); 2641 } 2642 finish_wait(&sci->sc_wait_daemon, &wait); 2643 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2644 time_after_eq(jiffies, sci->sc_timer.expires)); 2645 2646 if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) 2647 set_nilfs_discontinued(nilfs); 2648 } 2649 goto loop; 2650 2651 end_thread: 2652 spin_unlock(&sci->sc_state_lock); 2653 2654 /* end sync. */ 2655 sci->sc_task = NULL; 2656 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ 2657 return 0; 2658 } 2659 2660 static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) 2661 { 2662 struct task_struct *t; 2663 2664 t = kthread_run(nilfs_segctor_thread, sci, "segctord"); 2665 if (IS_ERR(t)) { 2666 int err = PTR_ERR(t); 2667 2668 printk(KERN_ERR "NILFS: error %d creating segctord thread\n", 2669 err); 2670 return err; 2671 } 2672 wait_event(sci->sc_wait_task, sci->sc_task != NULL); 2673 return 0; 2674 } 2675 2676 static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) 2677 __acquires(&sci->sc_state_lock) 2678 __releases(&sci->sc_state_lock) 2679 { 2680 sci->sc_state |= NILFS_SEGCTOR_QUIT; 2681 2682 while (sci->sc_task) { 2683 wake_up(&sci->sc_wait_daemon); 2684 spin_unlock(&sci->sc_state_lock); 2685 wait_event(sci->sc_wait_task, sci->sc_task == NULL); 2686 spin_lock(&sci->sc_state_lock); 2687 } 2688 } 2689 2690 /* 2691 * Setup & clean-up functions 2692 */ 2693 static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi, 2694 struct nilfs_root *root) 2695 { 2696 struct nilfs_sc_info *sci; 2697 2698 sci = kzalloc(sizeof(*sci), GFP_KERNEL); 2699 if (!sci) 2700 return NULL; 2701 2702 sci->sc_sbi = sbi; 2703 sci->sc_super = sbi->s_super; 2704 2705 nilfs_get_root(root); 2706 sci->sc_root = root; 2707 2708 init_waitqueue_head(&sci->sc_wait_request); 2709 init_waitqueue_head(&sci->sc_wait_daemon); 2710 init_waitqueue_head(&sci->sc_wait_task); 2711 spin_lock_init(&sci->sc_state_lock); 2712 INIT_LIST_HEAD(&sci->sc_dirty_files); 2713 INIT_LIST_HEAD(&sci->sc_segbufs); 2714 INIT_LIST_HEAD(&sci->sc_write_logs); 2715 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2716 INIT_LIST_HEAD(&sci->sc_copied_buffers); 2717 init_timer(&sci->sc_timer); 2718 2719 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2720 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; 2721 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2722 2723 if (sbi->s_interval) 2724 sci->sc_interval = sbi->s_interval; 2725 if (sbi->s_watermark) 2726 sci->sc_watermark = sbi->s_watermark; 2727 return sci; 2728 } 2729 2730 static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) 2731 { 2732 int ret, retrycount = NILFS_SC_CLEANUP_RETRY; 2733 2734 /* The segctord thread was stopped and its timer was removed. 2735 But some tasks remain. */ 2736 do { 2737 struct nilfs_sb_info *sbi = sci->sc_sbi; 2738 struct nilfs_transaction_info ti; 2739 2740 nilfs_transaction_lock(sbi, &ti, 0); 2741 ret = nilfs_segctor_construct(sci, SC_LSEG_SR); 2742 nilfs_transaction_unlock(sbi); 2743 2744 } while (ret && retrycount-- > 0); 2745 } 2746 2747 /** 2748 * nilfs_segctor_destroy - destroy the segment constructor. 2749 * @sci: nilfs_sc_info 2750 * 2751 * nilfs_segctor_destroy() kills the segctord thread and frees 2752 * the nilfs_sc_info struct. 2753 * Caller must hold the segment semaphore. 2754 */ 2755 static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) 2756 { 2757 struct nilfs_sb_info *sbi = sci->sc_sbi; 2758 int flag; 2759 2760 up_write(&sbi->s_nilfs->ns_segctor_sem); 2761 2762 spin_lock(&sci->sc_state_lock); 2763 nilfs_segctor_kill_thread(sci); 2764 flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request 2765 || sci->sc_seq_request != sci->sc_seq_done); 2766 spin_unlock(&sci->sc_state_lock); 2767 2768 if (flag || !nilfs_segctor_confirm(sci)) 2769 nilfs_segctor_write_out(sci); 2770 2771 WARN_ON(!list_empty(&sci->sc_copied_buffers)); 2772 2773 if (!list_empty(&sci->sc_dirty_files)) { 2774 nilfs_warning(sbi->s_super, __func__, 2775 "dirty file(s) after the final construction\n"); 2776 nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); 2777 } 2778 2779 WARN_ON(!list_empty(&sci->sc_segbufs)); 2780 WARN_ON(!list_empty(&sci->sc_write_logs)); 2781 2782 nilfs_put_root(sci->sc_root); 2783 2784 down_write(&sbi->s_nilfs->ns_segctor_sem); 2785 2786 del_timer_sync(&sci->sc_timer); 2787 kfree(sci); 2788 } 2789 2790 /** 2791 * nilfs_attach_segment_constructor - attach a segment constructor 2792 * @sbi: nilfs_sb_info 2793 * @root: root object of the current filesystem tree 2794 * 2795 * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, 2796 * initializes it, and starts the segment constructor. 2797 * 2798 * Return Value: On success, 0 is returned. On error, one of the following 2799 * negative error code is returned. 2800 * 2801 * %-ENOMEM - Insufficient memory available. 2802 */ 2803 int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, 2804 struct nilfs_root *root) 2805 { 2806 int err; 2807 2808 if (NILFS_SC(sbi)) { 2809 /* 2810 * This happens if the filesystem was remounted 2811 * read/write after nilfs_error degenerated it into a 2812 * read-only mount. 2813 */ 2814 nilfs_detach_segment_constructor(sbi); 2815 } 2816 2817 sbi->s_sc_info = nilfs_segctor_new(sbi, root); 2818 if (!sbi->s_sc_info) 2819 return -ENOMEM; 2820 2821 err = nilfs_segctor_start_thread(NILFS_SC(sbi)); 2822 if (err) { 2823 kfree(sbi->s_sc_info); 2824 sbi->s_sc_info = NULL; 2825 } 2826 return err; 2827 } 2828 2829 /** 2830 * nilfs_detach_segment_constructor - destroy the segment constructor 2831 * @sbi: nilfs_sb_info 2832 * 2833 * nilfs_detach_segment_constructor() kills the segment constructor daemon, 2834 * frees the struct nilfs_sc_info, and destroy the dirty file list. 2835 */ 2836 void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) 2837 { 2838 struct the_nilfs *nilfs = sbi->s_nilfs; 2839 LIST_HEAD(garbage_list); 2840 2841 down_write(&nilfs->ns_segctor_sem); 2842 if (NILFS_SC(sbi)) { 2843 nilfs_segctor_destroy(NILFS_SC(sbi)); 2844 sbi->s_sc_info = NULL; 2845 } 2846 2847 /* Force to free the list of dirty files */ 2848 spin_lock(&sbi->s_inode_lock); 2849 if (!list_empty(&sbi->s_dirty_files)) { 2850 list_splice_init(&sbi->s_dirty_files, &garbage_list); 2851 nilfs_warning(sbi->s_super, __func__, 2852 "Non empty dirty list after the last " 2853 "segment construction\n"); 2854 } 2855 spin_unlock(&sbi->s_inode_lock); 2856 up_write(&nilfs->ns_segctor_sem); 2857 2858 nilfs_dispose_list(sbi, &garbage_list, 1); 2859 } 2860