1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS segment constructor. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Ryusuke Konishi. 8 * 9 */ 10 11 #include <linux/pagemap.h> 12 #include <linux/buffer_head.h> 13 #include <linux/writeback.h> 14 #include <linux/bitops.h> 15 #include <linux/bio.h> 16 #include <linux/completion.h> 17 #include <linux/blkdev.h> 18 #include <linux/backing-dev.h> 19 #include <linux/freezer.h> 20 #include <linux/kthread.h> 21 #include <linux/crc32.h> 22 #include <linux/pagevec.h> 23 #include <linux/slab.h> 24 #include <linux/sched/signal.h> 25 26 #include "nilfs.h" 27 #include "btnode.h" 28 #include "page.h" 29 #include "segment.h" 30 #include "sufile.h" 31 #include "cpfile.h" 32 #include "ifile.h" 33 #include "segbuf.h" 34 35 36 /* 37 * Segment constructor 38 */ 39 #define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */ 40 41 #define SC_MAX_SEGDELTA 64 /* 42 * Upper limit of the number of segments 43 * appended in collection retry loop 44 */ 45 46 /* Construction mode */ 47 enum { 48 SC_LSEG_SR = 1, /* Make a logical segment having a super root */ 49 SC_LSEG_DSYNC, /* 50 * Flush data blocks of a given file and make 51 * a logical segment without a super root. 52 */ 53 SC_FLUSH_FILE, /* 54 * Flush data files, leads to segment writes without 55 * creating a checkpoint. 56 */ 57 SC_FLUSH_DAT, /* 58 * Flush DAT file. This also creates segments 59 * without a checkpoint. 60 */ 61 }; 62 63 /* Stage numbers of dirty block collection */ 64 enum { 65 NILFS_ST_INIT = 0, 66 NILFS_ST_GC, /* Collecting dirty blocks for GC */ 67 NILFS_ST_FILE, 68 NILFS_ST_IFILE, 69 NILFS_ST_CPFILE, 70 NILFS_ST_SUFILE, 71 NILFS_ST_DAT, 72 NILFS_ST_SR, /* Super root */ 73 NILFS_ST_DSYNC, /* Data sync blocks */ 74 NILFS_ST_DONE, 75 }; 76 77 #define CREATE_TRACE_POINTS 78 #include <trace/events/nilfs2.h> 79 80 /* 81 * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are 82 * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of 83 * the variable must use them because transition of stage count must involve 84 * trace events (trace_nilfs2_collection_stage_transition). 85 * 86 * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't 87 * produce tracepoint events. It is provided just for making the intention 88 * clear. 89 */ 90 static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci) 91 { 92 sci->sc_stage.scnt++; 93 trace_nilfs2_collection_stage_transition(sci); 94 } 95 96 static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt) 97 { 98 sci->sc_stage.scnt = next_scnt; 99 trace_nilfs2_collection_stage_transition(sci); 100 } 101 102 static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci) 103 { 104 return sci->sc_stage.scnt; 105 } 106 107 /* State flags of collection */ 108 #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ 109 #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ 110 #define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */ 111 #define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED) 112 113 /* Operations depending on the construction mode and file type */ 114 struct nilfs_sc_operations { 115 int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *, 116 struct inode *); 117 int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *, 118 struct inode *); 119 int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *, 120 struct inode *); 121 void (*write_data_binfo)(struct nilfs_sc_info *, 122 struct nilfs_segsum_pointer *, 123 union nilfs_binfo *); 124 void (*write_node_binfo)(struct nilfs_sc_info *, 125 struct nilfs_segsum_pointer *, 126 union nilfs_binfo *); 127 }; 128 129 /* 130 * Other definitions 131 */ 132 static void nilfs_segctor_start_timer(struct nilfs_sc_info *); 133 static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); 134 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); 135 static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); 136 137 #define nilfs_cnt32_ge(a, b) \ 138 (typecheck(__u32, a) && typecheck(__u32, b) && \ 139 ((__s32)((a) - (b)) >= 0)) 140 141 static int nilfs_prepare_segment_lock(struct super_block *sb, 142 struct nilfs_transaction_info *ti) 143 { 144 struct nilfs_transaction_info *cur_ti = current->journal_info; 145 void *save = NULL; 146 147 if (cur_ti) { 148 if (cur_ti->ti_magic == NILFS_TI_MAGIC) 149 return ++cur_ti->ti_count; 150 151 /* 152 * If journal_info field is occupied by other FS, 153 * it is saved and will be restored on 154 * nilfs_transaction_commit(). 155 */ 156 nilfs_warn(sb, "journal info from a different FS"); 157 save = current->journal_info; 158 } 159 if (!ti) { 160 ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS); 161 if (!ti) 162 return -ENOMEM; 163 ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC; 164 } else { 165 ti->ti_flags = 0; 166 } 167 ti->ti_count = 0; 168 ti->ti_save = save; 169 ti->ti_magic = NILFS_TI_MAGIC; 170 current->journal_info = ti; 171 return 0; 172 } 173 174 /** 175 * nilfs_transaction_begin - start indivisible file operations. 176 * @sb: super block 177 * @ti: nilfs_transaction_info 178 * @vacancy_check: flags for vacancy rate checks 179 * 180 * nilfs_transaction_begin() acquires a reader/writer semaphore, called 181 * the segment semaphore, to make a segment construction and write tasks 182 * exclusive. The function is used with nilfs_transaction_commit() in pairs. 183 * The region enclosed by these two functions can be nested. To avoid a 184 * deadlock, the semaphore is only acquired or released in the outermost call. 185 * 186 * This function allocates a nilfs_transaction_info struct to keep context 187 * information on it. It is initialized and hooked onto the current task in 188 * the outermost call. If a pre-allocated struct is given to @ti, it is used 189 * instead; otherwise a new struct is assigned from a slab. 190 * 191 * When @vacancy_check flag is set, this function will check the amount of 192 * free space, and will wait for the GC to reclaim disk space if low capacity. 193 * 194 * Return: 0 on success, or one of the following negative error codes on 195 * failure: 196 * * %-ENOMEM - Insufficient memory available. 197 * * %-ENOSPC - No space left on device (if checking free space). 198 */ 199 int nilfs_transaction_begin(struct super_block *sb, 200 struct nilfs_transaction_info *ti, 201 int vacancy_check) 202 { 203 struct the_nilfs *nilfs; 204 int ret = nilfs_prepare_segment_lock(sb, ti); 205 struct nilfs_transaction_info *trace_ti; 206 207 if (unlikely(ret < 0)) 208 return ret; 209 if (ret > 0) { 210 trace_ti = current->journal_info; 211 212 trace_nilfs2_transaction_transition(sb, trace_ti, 213 trace_ti->ti_count, trace_ti->ti_flags, 214 TRACE_NILFS2_TRANSACTION_BEGIN); 215 return 0; 216 } 217 218 sb_start_intwrite(sb); 219 220 nilfs = sb->s_fs_info; 221 down_read(&nilfs->ns_segctor_sem); 222 if (vacancy_check && nilfs_near_disk_full(nilfs)) { 223 up_read(&nilfs->ns_segctor_sem); 224 ret = -ENOSPC; 225 goto failed; 226 } 227 228 trace_ti = current->journal_info; 229 trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count, 230 trace_ti->ti_flags, 231 TRACE_NILFS2_TRANSACTION_BEGIN); 232 return 0; 233 234 failed: 235 ti = current->journal_info; 236 current->journal_info = ti->ti_save; 237 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 238 kmem_cache_free(nilfs_transaction_cachep, ti); 239 sb_end_intwrite(sb); 240 return ret; 241 } 242 243 /** 244 * nilfs_transaction_commit - commit indivisible file operations. 245 * @sb: super block 246 * 247 * nilfs_transaction_commit() releases the read semaphore which is 248 * acquired by nilfs_transaction_begin(). This is only performed 249 * in outermost call of this function. If a commit flag is set, 250 * nilfs_transaction_commit() sets a timer to start the segment 251 * constructor. If a sync flag is set, it starts construction 252 * directly. 253 * 254 * Return: 0 on success, or a negative error code on failure. 255 */ 256 int nilfs_transaction_commit(struct super_block *sb) 257 { 258 struct nilfs_transaction_info *ti = current->journal_info; 259 struct the_nilfs *nilfs = sb->s_fs_info; 260 int err = 0; 261 262 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 263 ti->ti_flags |= NILFS_TI_COMMIT; 264 if (ti->ti_count > 0) { 265 ti->ti_count--; 266 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 267 ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); 268 return 0; 269 } 270 if (nilfs->ns_writer) { 271 struct nilfs_sc_info *sci = nilfs->ns_writer; 272 273 if (ti->ti_flags & NILFS_TI_COMMIT) 274 nilfs_segctor_start_timer(sci); 275 if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark) 276 nilfs_segctor_do_flush(sci, 0); 277 } 278 up_read(&nilfs->ns_segctor_sem); 279 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 280 ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); 281 282 current->journal_info = ti->ti_save; 283 284 if (ti->ti_flags & NILFS_TI_SYNC) 285 err = nilfs_construct_segment(sb); 286 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 287 kmem_cache_free(nilfs_transaction_cachep, ti); 288 sb_end_intwrite(sb); 289 return err; 290 } 291 292 void nilfs_transaction_abort(struct super_block *sb) 293 { 294 struct nilfs_transaction_info *ti = current->journal_info; 295 struct the_nilfs *nilfs = sb->s_fs_info; 296 297 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 298 if (ti->ti_count > 0) { 299 ti->ti_count--; 300 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 301 ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); 302 return; 303 } 304 up_read(&nilfs->ns_segctor_sem); 305 306 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 307 ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); 308 309 current->journal_info = ti->ti_save; 310 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 311 kmem_cache_free(nilfs_transaction_cachep, ti); 312 sb_end_intwrite(sb); 313 } 314 315 void nilfs_relax_pressure_in_lock(struct super_block *sb) 316 { 317 struct the_nilfs *nilfs = sb->s_fs_info; 318 struct nilfs_sc_info *sci = nilfs->ns_writer; 319 320 if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request) 321 return; 322 323 set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); 324 up_read(&nilfs->ns_segctor_sem); 325 326 down_write(&nilfs->ns_segctor_sem); 327 if (sci->sc_flush_request && 328 test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) { 329 struct nilfs_transaction_info *ti = current->journal_info; 330 331 ti->ti_flags |= NILFS_TI_WRITER; 332 nilfs_segctor_do_immediate_flush(sci); 333 ti->ti_flags &= ~NILFS_TI_WRITER; 334 } 335 downgrade_write(&nilfs->ns_segctor_sem); 336 } 337 338 static void nilfs_transaction_lock(struct super_block *sb, 339 struct nilfs_transaction_info *ti, 340 int gcflag) 341 { 342 struct nilfs_transaction_info *cur_ti = current->journal_info; 343 struct the_nilfs *nilfs = sb->s_fs_info; 344 struct nilfs_sc_info *sci = nilfs->ns_writer; 345 346 WARN_ON(cur_ti); 347 ti->ti_flags = NILFS_TI_WRITER; 348 ti->ti_count = 0; 349 ti->ti_save = cur_ti; 350 ti->ti_magic = NILFS_TI_MAGIC; 351 current->journal_info = ti; 352 353 for (;;) { 354 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 355 ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK); 356 357 down_write(&nilfs->ns_segctor_sem); 358 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) 359 break; 360 361 nilfs_segctor_do_immediate_flush(sci); 362 363 up_write(&nilfs->ns_segctor_sem); 364 cond_resched(); 365 } 366 if (gcflag) 367 ti->ti_flags |= NILFS_TI_GC; 368 369 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 370 ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK); 371 } 372 373 static void nilfs_transaction_unlock(struct super_block *sb) 374 { 375 struct nilfs_transaction_info *ti = current->journal_info; 376 struct the_nilfs *nilfs = sb->s_fs_info; 377 378 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 379 BUG_ON(ti->ti_count > 0); 380 381 up_write(&nilfs->ns_segctor_sem); 382 current->journal_info = ti->ti_save; 383 384 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 385 ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK); 386 } 387 388 static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 389 struct nilfs_segsum_pointer *ssp, 390 unsigned int bytes) 391 { 392 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 393 unsigned int blocksize = sci->sc_super->s_blocksize; 394 void *p; 395 396 if (unlikely(ssp->offset + bytes > blocksize)) { 397 ssp->offset = 0; 398 BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh, 399 &segbuf->sb_segsum_buffers)); 400 ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh); 401 } 402 p = ssp->bh->b_data + ssp->offset; 403 ssp->offset += bytes; 404 return p; 405 } 406 407 /** 408 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer 409 * @sci: nilfs_sc_info 410 * 411 * Return: 0 on success, or a negative error code on failure. 412 */ 413 static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) 414 { 415 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 416 struct buffer_head *sumbh; 417 unsigned int sumbytes; 418 unsigned int flags = 0; 419 int err; 420 421 if (nilfs_doing_gc()) 422 flags = NILFS_SS_GC; 423 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno); 424 if (unlikely(err)) 425 return err; 426 427 sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); 428 sumbytes = segbuf->sb_sum.sumbytes; 429 sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes; 430 sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes; 431 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; 432 return 0; 433 } 434 435 /** 436 * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area 437 * @sci: segment constructor object 438 * 439 * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of 440 * the current segment summary block. 441 */ 442 static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci) 443 { 444 struct nilfs_segsum_pointer *ssp; 445 446 ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr; 447 if (ssp->offset < ssp->bh->b_size) 448 memset(ssp->bh->b_data + ssp->offset, 0, 449 ssp->bh->b_size - ssp->offset); 450 } 451 452 static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) 453 { 454 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; 455 if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs)) 456 return -E2BIG; /* 457 * The current segment is filled up 458 * (internal code) 459 */ 460 nilfs_segctor_zeropad_segsum(sci); 461 sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); 462 return nilfs_segctor_reset_segment_buffer(sci); 463 } 464 465 static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) 466 { 467 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 468 int err; 469 470 if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) { 471 err = nilfs_segctor_feed_segment(sci); 472 if (err) 473 return err; 474 segbuf = sci->sc_curseg; 475 } 476 err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root); 477 if (likely(!err)) 478 segbuf->sb_sum.flags |= NILFS_SS_SR; 479 return err; 480 } 481 482 /* 483 * Functions for making segment summary and payloads 484 */ 485 static int nilfs_segctor_segsum_block_required( 486 struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp, 487 unsigned int binfo_size) 488 { 489 unsigned int blocksize = sci->sc_super->s_blocksize; 490 /* Size of finfo and binfo is enough small against blocksize */ 491 492 return ssp->offset + binfo_size + 493 (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) > 494 blocksize; 495 } 496 497 static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, 498 struct inode *inode) 499 { 500 sci->sc_curseg->sb_sum.nfinfo++; 501 sci->sc_binfo_ptr = sci->sc_finfo_ptr; 502 nilfs_segctor_map_segsum_entry( 503 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 504 505 if (NILFS_I(inode)->i_root && 506 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 507 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 508 /* skip finfo */ 509 } 510 511 static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, 512 struct inode *inode) 513 { 514 struct nilfs_finfo *finfo; 515 struct nilfs_inode_info *ii; 516 struct nilfs_segment_buffer *segbuf; 517 __u64 cno; 518 519 if (sci->sc_blk_cnt == 0) 520 return; 521 522 ii = NILFS_I(inode); 523 524 if (ii->i_type & NILFS_I_TYPE_GC) 525 cno = ii->i_cno; 526 else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) 527 cno = 0; 528 else 529 cno = sci->sc_cno; 530 531 finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, 532 sizeof(*finfo)); 533 finfo->fi_ino = cpu_to_le64(inode->i_ino); 534 finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); 535 finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); 536 finfo->fi_cno = cpu_to_le64(cno); 537 538 segbuf = sci->sc_curseg; 539 segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + 540 sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1); 541 sci->sc_finfo_ptr = sci->sc_binfo_ptr; 542 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; 543 } 544 545 static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, 546 struct buffer_head *bh, 547 struct inode *inode, 548 unsigned int binfo_size) 549 { 550 struct nilfs_segment_buffer *segbuf; 551 int required, err = 0; 552 553 retry: 554 segbuf = sci->sc_curseg; 555 required = nilfs_segctor_segsum_block_required( 556 sci, &sci->sc_binfo_ptr, binfo_size); 557 if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) { 558 nilfs_segctor_end_finfo(sci, inode); 559 err = nilfs_segctor_feed_segment(sci); 560 if (err) 561 return err; 562 goto retry; 563 } 564 if (unlikely(required)) { 565 nilfs_segctor_zeropad_segsum(sci); 566 err = nilfs_segbuf_extend_segsum(segbuf); 567 if (unlikely(err)) 568 goto failed; 569 } 570 if (sci->sc_blk_cnt == 0) 571 nilfs_segctor_begin_finfo(sci, inode); 572 573 nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size); 574 /* Substitution to vblocknr is delayed until update_blocknr() */ 575 nilfs_segbuf_add_file_buffer(segbuf, bh); 576 sci->sc_blk_cnt++; 577 failed: 578 return err; 579 } 580 581 /* 582 * Callback functions that enumerate, mark, and collect dirty blocks 583 */ 584 static int nilfs_collect_file_data(struct nilfs_sc_info *sci, 585 struct buffer_head *bh, struct inode *inode) 586 { 587 int err; 588 589 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 590 if (err < 0) 591 return err; 592 593 err = nilfs_segctor_add_file_block(sci, bh, inode, 594 sizeof(struct nilfs_binfo_v)); 595 if (!err) 596 sci->sc_datablk_cnt++; 597 return err; 598 } 599 600 static int nilfs_collect_file_node(struct nilfs_sc_info *sci, 601 struct buffer_head *bh, 602 struct inode *inode) 603 { 604 return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 605 } 606 607 static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, 608 struct buffer_head *bh, 609 struct inode *inode) 610 { 611 WARN_ON(!buffer_dirty(bh)); 612 return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 613 } 614 615 static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci, 616 struct nilfs_segsum_pointer *ssp, 617 union nilfs_binfo *binfo) 618 { 619 struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry( 620 sci, ssp, sizeof(*binfo_v)); 621 *binfo_v = binfo->bi_v; 622 } 623 624 static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, 625 struct nilfs_segsum_pointer *ssp, 626 union nilfs_binfo *binfo) 627 { 628 __le64 *vblocknr = nilfs_segctor_map_segsum_entry( 629 sci, ssp, sizeof(*vblocknr)); 630 *vblocknr = binfo->bi_v.bi_vblocknr; 631 } 632 633 static const struct nilfs_sc_operations nilfs_sc_file_ops = { 634 .collect_data = nilfs_collect_file_data, 635 .collect_node = nilfs_collect_file_node, 636 .collect_bmap = nilfs_collect_file_bmap, 637 .write_data_binfo = nilfs_write_file_data_binfo, 638 .write_node_binfo = nilfs_write_file_node_binfo, 639 }; 640 641 static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, 642 struct buffer_head *bh, struct inode *inode) 643 { 644 int err; 645 646 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 647 if (err < 0) 648 return err; 649 650 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 651 if (!err) 652 sci->sc_datablk_cnt++; 653 return err; 654 } 655 656 static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, 657 struct buffer_head *bh, struct inode *inode) 658 { 659 WARN_ON(!buffer_dirty(bh)); 660 return nilfs_segctor_add_file_block(sci, bh, inode, 661 sizeof(struct nilfs_binfo_dat)); 662 } 663 664 static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci, 665 struct nilfs_segsum_pointer *ssp, 666 union nilfs_binfo *binfo) 667 { 668 __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp, 669 sizeof(*blkoff)); 670 *blkoff = binfo->bi_dat.bi_blkoff; 671 } 672 673 static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, 674 struct nilfs_segsum_pointer *ssp, 675 union nilfs_binfo *binfo) 676 { 677 struct nilfs_binfo_dat *binfo_dat = 678 nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat)); 679 *binfo_dat = binfo->bi_dat; 680 } 681 682 static const struct nilfs_sc_operations nilfs_sc_dat_ops = { 683 .collect_data = nilfs_collect_dat_data, 684 .collect_node = nilfs_collect_file_node, 685 .collect_bmap = nilfs_collect_dat_bmap, 686 .write_data_binfo = nilfs_write_dat_data_binfo, 687 .write_node_binfo = nilfs_write_dat_node_binfo, 688 }; 689 690 static const struct nilfs_sc_operations nilfs_sc_dsync_ops = { 691 .collect_data = nilfs_collect_file_data, 692 .collect_node = NULL, 693 .collect_bmap = NULL, 694 .write_data_binfo = nilfs_write_file_data_binfo, 695 .write_node_binfo = NULL, 696 }; 697 698 static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, 699 struct list_head *listp, 700 size_t nlimit, 701 loff_t start, loff_t end) 702 { 703 struct address_space *mapping = inode->i_mapping; 704 struct folio_batch fbatch; 705 pgoff_t index = 0, last = ULONG_MAX; 706 size_t ndirties = 0; 707 int i; 708 709 if (unlikely(start != 0 || end != LLONG_MAX)) { 710 /* 711 * A valid range is given for sync-ing data pages. The 712 * range is rounded to per-page; extra dirty buffers 713 * may be included if blocksize < pagesize. 714 */ 715 index = start >> PAGE_SHIFT; 716 last = end >> PAGE_SHIFT; 717 } 718 folio_batch_init(&fbatch); 719 repeat: 720 if (unlikely(index > last) || 721 !filemap_get_folios_tag(mapping, &index, last, 722 PAGECACHE_TAG_DIRTY, &fbatch)) 723 return ndirties; 724 725 for (i = 0; i < folio_batch_count(&fbatch); i++) { 726 struct buffer_head *bh, *head; 727 struct folio *folio = fbatch.folios[i]; 728 729 folio_lock(folio); 730 if (unlikely(folio->mapping != mapping)) { 731 /* Exclude folios removed from the address space */ 732 folio_unlock(folio); 733 continue; 734 } 735 head = folio_buffers(folio); 736 if (!head) 737 head = create_empty_buffers(folio, 738 i_blocksize(inode), 0); 739 740 bh = head; 741 do { 742 if (!buffer_dirty(bh) || buffer_async_write(bh)) 743 continue; 744 get_bh(bh); 745 list_add_tail(&bh->b_assoc_buffers, listp); 746 ndirties++; 747 if (unlikely(ndirties >= nlimit)) { 748 folio_unlock(folio); 749 folio_batch_release(&fbatch); 750 cond_resched(); 751 return ndirties; 752 } 753 } while (bh = bh->b_this_page, bh != head); 754 755 folio_unlock(folio); 756 } 757 folio_batch_release(&fbatch); 758 cond_resched(); 759 goto repeat; 760 } 761 762 static void nilfs_lookup_dirty_node_buffers(struct inode *inode, 763 struct list_head *listp) 764 { 765 struct nilfs_inode_info *ii = NILFS_I(inode); 766 struct inode *btnc_inode = ii->i_assoc_inode; 767 struct folio_batch fbatch; 768 struct buffer_head *bh, *head; 769 unsigned int i; 770 pgoff_t index = 0; 771 772 if (!btnc_inode) 773 return; 774 folio_batch_init(&fbatch); 775 776 while (filemap_get_folios_tag(btnc_inode->i_mapping, &index, 777 (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) { 778 for (i = 0; i < folio_batch_count(&fbatch); i++) { 779 bh = head = folio_buffers(fbatch.folios[i]); 780 do { 781 if (buffer_dirty(bh) && 782 !buffer_async_write(bh)) { 783 get_bh(bh); 784 list_add_tail(&bh->b_assoc_buffers, 785 listp); 786 } 787 bh = bh->b_this_page; 788 } while (bh != head); 789 } 790 folio_batch_release(&fbatch); 791 cond_resched(); 792 } 793 } 794 795 static void nilfs_dispose_list(struct the_nilfs *nilfs, 796 struct list_head *head, int force) 797 { 798 struct nilfs_inode_info *ii, *n; 799 struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii; 800 unsigned int nv = 0; 801 802 while (!list_empty(head)) { 803 spin_lock(&nilfs->ns_inode_lock); 804 list_for_each_entry_safe(ii, n, head, i_dirty) { 805 list_del_init(&ii->i_dirty); 806 if (force) { 807 if (unlikely(ii->i_bh)) { 808 brelse(ii->i_bh); 809 ii->i_bh = NULL; 810 } 811 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { 812 set_bit(NILFS_I_QUEUED, &ii->i_state); 813 list_add_tail(&ii->i_dirty, 814 &nilfs->ns_dirty_files); 815 continue; 816 } 817 ivec[nv++] = ii; 818 if (nv == SC_N_INODEVEC) 819 break; 820 } 821 spin_unlock(&nilfs->ns_inode_lock); 822 823 for (pii = ivec; nv > 0; pii++, nv--) 824 iput(&(*pii)->vfs_inode); 825 } 826 } 827 828 static void nilfs_iput_work_func(struct work_struct *work) 829 { 830 struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info, 831 sc_iput_work); 832 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 833 834 nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0); 835 } 836 837 static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, 838 struct nilfs_root *root) 839 { 840 int ret = 0; 841 842 if (nilfs_mdt_fetch_dirty(root->ifile)) 843 ret++; 844 if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) 845 ret++; 846 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) 847 ret++; 848 if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat)) 849 ret++; 850 return ret; 851 } 852 853 static int nilfs_segctor_clean(struct nilfs_sc_info *sci) 854 { 855 return list_empty(&sci->sc_dirty_files) && 856 !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && 857 sci->sc_nfreesegs == 0 && 858 (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); 859 } 860 861 static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) 862 { 863 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 864 int ret = 0; 865 866 if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) 867 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 868 869 spin_lock(&nilfs->ns_inode_lock); 870 if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci)) 871 ret++; 872 873 spin_unlock(&nilfs->ns_inode_lock); 874 return ret; 875 } 876 877 static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) 878 { 879 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 880 881 nilfs_mdt_clear_dirty(sci->sc_root->ifile); 882 nilfs_mdt_clear_dirty(nilfs->ns_cpfile); 883 nilfs_mdt_clear_dirty(nilfs->ns_sufile); 884 nilfs_mdt_clear_dirty(nilfs->ns_dat); 885 } 886 887 static void nilfs_fill_in_file_bmap(struct inode *ifile, 888 struct nilfs_inode_info *ii) 889 890 { 891 struct buffer_head *ibh; 892 struct nilfs_inode *raw_inode; 893 894 if (test_bit(NILFS_I_BMAP, &ii->i_state)) { 895 ibh = ii->i_bh; 896 BUG_ON(!ibh); 897 raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, 898 ibh); 899 nilfs_bmap_write(ii->i_bmap, raw_inode); 900 nilfs_ifile_unmap_inode(raw_inode); 901 } 902 } 903 904 static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) 905 { 906 struct nilfs_inode_info *ii; 907 908 list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { 909 nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii); 910 set_bit(NILFS_I_COLLECTED, &ii->i_state); 911 } 912 } 913 914 /** 915 * nilfs_write_root_mdt_inode - export root metadata inode information to 916 * the on-disk inode 917 * @inode: inode object of the root metadata file 918 * @raw_inode: on-disk inode 919 * 920 * nilfs_write_root_mdt_inode() writes inode information and bmap data of 921 * @inode to the inode area of the metadata file allocated on the super root 922 * block created to finalize the log. Since super root blocks are configured 923 * each time, this function zero-fills the unused area of @raw_inode. 924 */ 925 static void nilfs_write_root_mdt_inode(struct inode *inode, 926 struct nilfs_inode *raw_inode) 927 { 928 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 929 930 nilfs_write_inode_common(inode, raw_inode); 931 932 /* zero-fill unused portion of raw_inode */ 933 raw_inode->i_xattr = 0; 934 raw_inode->i_pad = 0; 935 memset((void *)raw_inode + sizeof(*raw_inode), 0, 936 nilfs->ns_inode_size - sizeof(*raw_inode)); 937 938 nilfs_bmap_write(NILFS_I(inode)->i_bmap, raw_inode); 939 } 940 941 static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, 942 struct the_nilfs *nilfs) 943 { 944 struct buffer_head *bh_sr; 945 struct nilfs_super_root *raw_sr; 946 unsigned int isz, srsz; 947 948 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; 949 950 lock_buffer(bh_sr); 951 raw_sr = (struct nilfs_super_root *)bh_sr->b_data; 952 isz = nilfs->ns_inode_size; 953 srsz = NILFS_SR_BYTES(isz); 954 955 raw_sr->sr_sum = 0; /* Ensure initialization within this update */ 956 raw_sr->sr_bytes = cpu_to_le16(srsz); 957 raw_sr->sr_nongc_ctime 958 = cpu_to_le64(nilfs_doing_gc() ? 959 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 960 raw_sr->sr_flags = 0; 961 962 nilfs_write_root_mdt_inode(nilfs->ns_dat, (void *)raw_sr + 963 NILFS_SR_DAT_OFFSET(isz)); 964 nilfs_write_root_mdt_inode(nilfs->ns_cpfile, (void *)raw_sr + 965 NILFS_SR_CPFILE_OFFSET(isz)); 966 nilfs_write_root_mdt_inode(nilfs->ns_sufile, (void *)raw_sr + 967 NILFS_SR_SUFILE_OFFSET(isz)); 968 969 memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); 970 set_buffer_uptodate(bh_sr); 971 unlock_buffer(bh_sr); 972 } 973 974 static void nilfs_redirty_inodes(struct list_head *head) 975 { 976 struct nilfs_inode_info *ii; 977 978 list_for_each_entry(ii, head, i_dirty) { 979 if (test_bit(NILFS_I_COLLECTED, &ii->i_state)) 980 clear_bit(NILFS_I_COLLECTED, &ii->i_state); 981 } 982 } 983 984 static void nilfs_drop_collected_inodes(struct list_head *head) 985 { 986 struct nilfs_inode_info *ii; 987 988 list_for_each_entry(ii, head, i_dirty) { 989 if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state)) 990 continue; 991 992 clear_bit(NILFS_I_INODE_SYNC, &ii->i_state); 993 set_bit(NILFS_I_UPDATED, &ii->i_state); 994 } 995 } 996 997 static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, 998 struct inode *inode, 999 struct list_head *listp, 1000 int (*collect)(struct nilfs_sc_info *, 1001 struct buffer_head *, 1002 struct inode *)) 1003 { 1004 struct buffer_head *bh, *n; 1005 int err = 0; 1006 1007 if (collect) { 1008 list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) { 1009 list_del_init(&bh->b_assoc_buffers); 1010 err = collect(sci, bh, inode); 1011 brelse(bh); 1012 if (unlikely(err)) 1013 goto dispose_buffers; 1014 } 1015 return 0; 1016 } 1017 1018 dispose_buffers: 1019 while (!list_empty(listp)) { 1020 bh = list_first_entry(listp, struct buffer_head, 1021 b_assoc_buffers); 1022 list_del_init(&bh->b_assoc_buffers); 1023 brelse(bh); 1024 } 1025 return err; 1026 } 1027 1028 static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci) 1029 { 1030 /* Remaining number of blocks within segment buffer */ 1031 return sci->sc_segbuf_nblocks - 1032 (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks); 1033 } 1034 1035 static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, 1036 struct inode *inode, 1037 const struct nilfs_sc_operations *sc_ops) 1038 { 1039 LIST_HEAD(data_buffers); 1040 LIST_HEAD(node_buffers); 1041 int err; 1042 1043 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { 1044 size_t n, rest = nilfs_segctor_buffer_rest(sci); 1045 1046 n = nilfs_lookup_dirty_data_buffers( 1047 inode, &data_buffers, rest + 1, 0, LLONG_MAX); 1048 if (n > rest) { 1049 err = nilfs_segctor_apply_buffers( 1050 sci, inode, &data_buffers, 1051 sc_ops->collect_data); 1052 BUG_ON(!err); /* always receive -E2BIG or true error */ 1053 goto break_or_fail; 1054 } 1055 } 1056 nilfs_lookup_dirty_node_buffers(inode, &node_buffers); 1057 1058 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { 1059 err = nilfs_segctor_apply_buffers( 1060 sci, inode, &data_buffers, sc_ops->collect_data); 1061 if (unlikely(err)) { 1062 /* dispose node list */ 1063 nilfs_segctor_apply_buffers( 1064 sci, inode, &node_buffers, NULL); 1065 goto break_or_fail; 1066 } 1067 sci->sc_stage.flags |= NILFS_CF_NODE; 1068 } 1069 /* Collect node */ 1070 err = nilfs_segctor_apply_buffers( 1071 sci, inode, &node_buffers, sc_ops->collect_node); 1072 if (unlikely(err)) 1073 goto break_or_fail; 1074 1075 nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers); 1076 err = nilfs_segctor_apply_buffers( 1077 sci, inode, &node_buffers, sc_ops->collect_bmap); 1078 if (unlikely(err)) 1079 goto break_or_fail; 1080 1081 nilfs_segctor_end_finfo(sci, inode); 1082 sci->sc_stage.flags &= ~NILFS_CF_NODE; 1083 1084 break_or_fail: 1085 return err; 1086 } 1087 1088 static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, 1089 struct inode *inode) 1090 { 1091 LIST_HEAD(data_buffers); 1092 size_t n, rest = nilfs_segctor_buffer_rest(sci); 1093 int err; 1094 1095 n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1, 1096 sci->sc_dsync_start, 1097 sci->sc_dsync_end); 1098 1099 err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, 1100 nilfs_collect_file_data); 1101 if (!err) { 1102 nilfs_segctor_end_finfo(sci, inode); 1103 BUG_ON(n > rest); 1104 /* always receive -E2BIG or true error if n > rest */ 1105 } 1106 return err; 1107 } 1108 1109 /** 1110 * nilfs_free_segments - free the segments given by an array of segment numbers 1111 * @nilfs: nilfs object 1112 * @segnumv: array of segment numbers to be freed 1113 * @nsegs: number of segments to be freed in @segnumv 1114 * 1115 * nilfs_free_segments() wraps nilfs_sufile_freev() and 1116 * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file 1117 * (sufile) to free all segments given by @segnumv and @nsegs at once. If 1118 * it fails midway, it cancels the changes so that none of the segments are 1119 * freed. If @nsegs is 0, this function does nothing. 1120 * 1121 * The freeing of segments is not finalized until the writing of a log with 1122 * a super root block containing this sufile change is complete, and it can 1123 * be canceled with nilfs_sufile_cancel_freev() until then. 1124 * 1125 * Return: 0 on success, or one of the following negative error codes on 1126 * failure: 1127 * * %-EINVAL - Invalid segment number. 1128 * * %-EIO - I/O error (including metadata corruption). 1129 * * %-ENOMEM - Insufficient memory available. 1130 */ 1131 static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv, 1132 size_t nsegs) 1133 { 1134 size_t ndone; 1135 int ret; 1136 1137 if (!nsegs) 1138 return 0; 1139 1140 ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone); 1141 if (unlikely(ret)) { 1142 nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone, 1143 NULL); 1144 /* 1145 * If a segment usage of the segments to be freed is in a 1146 * hole block, nilfs_sufile_freev() will return -ENOENT. 1147 * In this case, -EINVAL should be returned to the caller 1148 * since there is something wrong with the given segment 1149 * number array. This error can only occur during GC, so 1150 * there is no need to worry about it propagating to other 1151 * callers (such as fsync). 1152 */ 1153 if (ret == -ENOENT) { 1154 nilfs_err(nilfs->ns_sb, 1155 "The segment usage entry %llu to be freed is invalid (in a hole)", 1156 (unsigned long long)segnumv[ndone]); 1157 ret = -EINVAL; 1158 } 1159 } 1160 return ret; 1161 } 1162 1163 static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) 1164 { 1165 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 1166 struct list_head *head; 1167 struct nilfs_inode_info *ii; 1168 int err = 0; 1169 1170 switch (nilfs_sc_cstage_get(sci)) { 1171 case NILFS_ST_INIT: 1172 /* Pre-processes */ 1173 sci->sc_stage.flags = 0; 1174 1175 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) { 1176 sci->sc_nblk_inc = 0; 1177 sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; 1178 if (mode == SC_LSEG_DSYNC) { 1179 nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC); 1180 goto dsync_mode; 1181 } 1182 } 1183 1184 sci->sc_stage.dirty_file_ptr = NULL; 1185 sci->sc_stage.gc_inode_ptr = NULL; 1186 if (mode == SC_FLUSH_DAT) { 1187 nilfs_sc_cstage_set(sci, NILFS_ST_DAT); 1188 goto dat_stage; 1189 } 1190 nilfs_sc_cstage_inc(sci); 1191 fallthrough; 1192 case NILFS_ST_GC: 1193 if (nilfs_doing_gc()) { 1194 head = &sci->sc_gc_inodes; 1195 ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr, 1196 head, i_dirty); 1197 list_for_each_entry_continue(ii, head, i_dirty) { 1198 err = nilfs_segctor_scan_file( 1199 sci, &ii->vfs_inode, 1200 &nilfs_sc_file_ops); 1201 if (unlikely(err)) { 1202 sci->sc_stage.gc_inode_ptr = list_entry( 1203 ii->i_dirty.prev, 1204 struct nilfs_inode_info, 1205 i_dirty); 1206 goto break_or_fail; 1207 } 1208 set_bit(NILFS_I_COLLECTED, &ii->i_state); 1209 } 1210 sci->sc_stage.gc_inode_ptr = NULL; 1211 } 1212 nilfs_sc_cstage_inc(sci); 1213 fallthrough; 1214 case NILFS_ST_FILE: 1215 head = &sci->sc_dirty_files; 1216 ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, 1217 i_dirty); 1218 list_for_each_entry_continue(ii, head, i_dirty) { 1219 clear_bit(NILFS_I_DIRTY, &ii->i_state); 1220 1221 err = nilfs_segctor_scan_file(sci, &ii->vfs_inode, 1222 &nilfs_sc_file_ops); 1223 if (unlikely(err)) { 1224 sci->sc_stage.dirty_file_ptr = 1225 list_entry(ii->i_dirty.prev, 1226 struct nilfs_inode_info, 1227 i_dirty); 1228 goto break_or_fail; 1229 } 1230 /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */ 1231 /* XXX: required ? */ 1232 } 1233 sci->sc_stage.dirty_file_ptr = NULL; 1234 if (mode == SC_FLUSH_FILE) { 1235 nilfs_sc_cstage_set(sci, NILFS_ST_DONE); 1236 return 0; 1237 } 1238 nilfs_sc_cstage_inc(sci); 1239 sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; 1240 fallthrough; 1241 case NILFS_ST_IFILE: 1242 err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile, 1243 &nilfs_sc_file_ops); 1244 if (unlikely(err)) 1245 break; 1246 nilfs_sc_cstage_inc(sci); 1247 /* Creating a checkpoint */ 1248 err = nilfs_cpfile_create_checkpoint(nilfs->ns_cpfile, 1249 nilfs->ns_cno); 1250 if (unlikely(err)) 1251 break; 1252 fallthrough; 1253 case NILFS_ST_CPFILE: 1254 err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile, 1255 &nilfs_sc_file_ops); 1256 if (unlikely(err)) 1257 break; 1258 nilfs_sc_cstage_inc(sci); 1259 fallthrough; 1260 case NILFS_ST_SUFILE: 1261 err = nilfs_free_segments(nilfs, sci->sc_freesegs, 1262 sci->sc_nfreesegs); 1263 if (unlikely(err)) 1264 break; 1265 sci->sc_stage.flags |= NILFS_CF_SUFREED; 1266 1267 err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, 1268 &nilfs_sc_file_ops); 1269 if (unlikely(err)) 1270 break; 1271 nilfs_sc_cstage_inc(sci); 1272 fallthrough; 1273 case NILFS_ST_DAT: 1274 dat_stage: 1275 err = nilfs_segctor_scan_file(sci, nilfs->ns_dat, 1276 &nilfs_sc_dat_ops); 1277 if (unlikely(err)) 1278 break; 1279 if (mode == SC_FLUSH_DAT) { 1280 nilfs_sc_cstage_set(sci, NILFS_ST_DONE); 1281 return 0; 1282 } 1283 nilfs_sc_cstage_inc(sci); 1284 fallthrough; 1285 case NILFS_ST_SR: 1286 if (mode == SC_LSEG_SR) { 1287 /* Appending a super root */ 1288 err = nilfs_segctor_add_super_root(sci); 1289 if (unlikely(err)) 1290 break; 1291 } 1292 /* End of a logical segment */ 1293 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1294 nilfs_sc_cstage_set(sci, NILFS_ST_DONE); 1295 return 0; 1296 case NILFS_ST_DSYNC: 1297 dsync_mode: 1298 sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; 1299 ii = sci->sc_dsync_inode; 1300 if (!test_bit(NILFS_I_BUSY, &ii->i_state)) 1301 break; 1302 1303 err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); 1304 if (unlikely(err)) 1305 break; 1306 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1307 nilfs_sc_cstage_set(sci, NILFS_ST_DONE); 1308 return 0; 1309 case NILFS_ST_DONE: 1310 return 0; 1311 default: 1312 BUG(); 1313 } 1314 1315 break_or_fail: 1316 return err; 1317 } 1318 1319 /** 1320 * nilfs_segctor_begin_construction - setup segment buffer to make a new log 1321 * @sci: nilfs_sc_info 1322 * @nilfs: nilfs object 1323 * 1324 * Return: 0 on success, or a negative error code on failure. 1325 */ 1326 static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, 1327 struct the_nilfs *nilfs) 1328 { 1329 struct nilfs_segment_buffer *segbuf, *prev; 1330 __u64 nextnum; 1331 int err, alloc = 0; 1332 1333 segbuf = nilfs_segbuf_new(sci->sc_super); 1334 if (unlikely(!segbuf)) 1335 return -ENOMEM; 1336 1337 if (list_empty(&sci->sc_write_logs)) { 1338 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 1339 nilfs->ns_pseg_offset, nilfs); 1340 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1341 nilfs_shift_to_next_segment(nilfs); 1342 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); 1343 } 1344 1345 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; 1346 nextnum = nilfs->ns_nextnum; 1347 1348 if (nilfs->ns_segnum == nilfs->ns_nextnum) 1349 /* Start from the head of a new full segment */ 1350 alloc++; 1351 } else { 1352 /* Continue logs */ 1353 prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs); 1354 nilfs_segbuf_map_cont(segbuf, prev); 1355 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq; 1356 nextnum = prev->sb_nextnum; 1357 1358 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1359 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); 1360 segbuf->sb_sum.seg_seq++; 1361 alloc++; 1362 } 1363 } 1364 1365 err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum); 1366 if (err) 1367 goto failed; 1368 1369 if (alloc) { 1370 err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); 1371 if (err) 1372 goto failed; 1373 } 1374 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); 1375 1376 BUG_ON(!list_empty(&sci->sc_segbufs)); 1377 list_add_tail(&segbuf->sb_list, &sci->sc_segbufs); 1378 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; 1379 return 0; 1380 1381 failed: 1382 nilfs_segbuf_free(segbuf); 1383 return err; 1384 } 1385 1386 static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, 1387 struct the_nilfs *nilfs, int nadd) 1388 { 1389 struct nilfs_segment_buffer *segbuf, *prev; 1390 struct inode *sufile = nilfs->ns_sufile; 1391 __u64 nextnextnum; 1392 LIST_HEAD(list); 1393 int err, ret, i; 1394 1395 prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs); 1396 /* 1397 * Since the segment specified with nextnum might be allocated during 1398 * the previous construction, the buffer including its segusage may 1399 * not be dirty. The following call ensures that the buffer is dirty 1400 * and will pin the buffer on memory until the sufile is written. 1401 */ 1402 err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum); 1403 if (unlikely(err)) 1404 return err; 1405 1406 for (i = 0; i < nadd; i++) { 1407 /* extend segment info */ 1408 err = -ENOMEM; 1409 segbuf = nilfs_segbuf_new(sci->sc_super); 1410 if (unlikely(!segbuf)) 1411 goto failed; 1412 1413 /* map this buffer to region of segment on-disk */ 1414 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); 1415 sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; 1416 1417 /* allocate the next next full segment */ 1418 err = nilfs_sufile_alloc(sufile, &nextnextnum); 1419 if (unlikely(err)) 1420 goto failed_segbuf; 1421 1422 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1; 1423 nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs); 1424 1425 list_add_tail(&segbuf->sb_list, &list); 1426 prev = segbuf; 1427 } 1428 list_splice_tail(&list, &sci->sc_segbufs); 1429 return 0; 1430 1431 failed_segbuf: 1432 nilfs_segbuf_free(segbuf); 1433 failed: 1434 list_for_each_entry(segbuf, &list, sb_list) { 1435 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1436 WARN_ON(ret); /* never fails */ 1437 } 1438 nilfs_destroy_logs(&list); 1439 return err; 1440 } 1441 1442 static void nilfs_free_incomplete_logs(struct list_head *logs, 1443 struct the_nilfs *nilfs) 1444 { 1445 struct nilfs_segment_buffer *segbuf, *prev; 1446 struct inode *sufile = nilfs->ns_sufile; 1447 int ret; 1448 1449 segbuf = NILFS_FIRST_SEGBUF(logs); 1450 if (nilfs->ns_nextnum != segbuf->sb_nextnum) { 1451 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1452 WARN_ON(ret); /* never fails */ 1453 } 1454 if (atomic_read(&segbuf->sb_err)) { 1455 /* Case 1: The first segment failed */ 1456 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) 1457 /* 1458 * Case 1a: Partial segment appended into an existing 1459 * segment 1460 */ 1461 nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start, 1462 segbuf->sb_fseg_end); 1463 else /* Case 1b: New full segment */ 1464 set_nilfs_discontinued(nilfs); 1465 } 1466 1467 prev = segbuf; 1468 list_for_each_entry_continue(segbuf, logs, sb_list) { 1469 if (prev->sb_nextnum != segbuf->sb_nextnum) { 1470 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1471 WARN_ON(ret); /* never fails */ 1472 } 1473 if (atomic_read(&segbuf->sb_err) && 1474 segbuf->sb_segnum != nilfs->ns_nextnum) 1475 /* Case 2: extended segment (!= next) failed */ 1476 nilfs_sufile_set_error(sufile, segbuf->sb_segnum); 1477 prev = segbuf; 1478 } 1479 } 1480 1481 static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, 1482 struct inode *sufile) 1483 { 1484 struct nilfs_segment_buffer *segbuf; 1485 unsigned long live_blocks; 1486 int ret; 1487 1488 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1489 live_blocks = segbuf->sb_sum.nblocks + 1490 (segbuf->sb_pseg_start - segbuf->sb_fseg_start); 1491 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1492 live_blocks, 1493 sci->sc_seg_ctime); 1494 WARN_ON(ret); /* always succeed because the segusage is dirty */ 1495 } 1496 } 1497 1498 static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile) 1499 { 1500 struct nilfs_segment_buffer *segbuf; 1501 int ret; 1502 1503 segbuf = NILFS_FIRST_SEGBUF(logs); 1504 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1505 segbuf->sb_pseg_start - 1506 segbuf->sb_fseg_start, 0); 1507 WARN_ON(ret); /* always succeed because the segusage is dirty */ 1508 1509 list_for_each_entry_continue(segbuf, logs, sb_list) { 1510 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1511 0, 0); 1512 WARN_ON(ret); /* always succeed */ 1513 } 1514 } 1515 1516 static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, 1517 struct nilfs_segment_buffer *last, 1518 struct inode *sufile) 1519 { 1520 struct nilfs_segment_buffer *segbuf = last; 1521 int ret; 1522 1523 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { 1524 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; 1525 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1526 WARN_ON(ret); 1527 } 1528 nilfs_truncate_logs(&sci->sc_segbufs, last); 1529 } 1530 1531 1532 static int nilfs_segctor_collect(struct nilfs_sc_info *sci, 1533 struct the_nilfs *nilfs, int mode) 1534 { 1535 struct nilfs_cstage prev_stage = sci->sc_stage; 1536 int err, nadd = 1; 1537 1538 /* Collection retry loop */ 1539 for (;;) { 1540 sci->sc_nblk_this_inc = 0; 1541 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1542 1543 err = nilfs_segctor_reset_segment_buffer(sci); 1544 if (unlikely(err)) 1545 goto failed; 1546 1547 err = nilfs_segctor_collect_blocks(sci, mode); 1548 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; 1549 if (!err) 1550 break; 1551 1552 if (unlikely(err != -E2BIG)) 1553 goto failed; 1554 1555 /* The current segment is filled up */ 1556 if (mode != SC_LSEG_SR || 1557 nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE) 1558 break; 1559 1560 nilfs_clear_logs(&sci->sc_segbufs); 1561 1562 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1563 err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1564 sci->sc_freesegs, 1565 sci->sc_nfreesegs, 1566 NULL); 1567 WARN_ON(err); /* do not happen */ 1568 sci->sc_stage.flags &= ~NILFS_CF_SUFREED; 1569 } 1570 1571 err = nilfs_segctor_extend_segments(sci, nilfs, nadd); 1572 if (unlikely(err)) 1573 return err; 1574 1575 nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); 1576 sci->sc_stage = prev_stage; 1577 } 1578 nilfs_segctor_zeropad_segsum(sci); 1579 nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); 1580 return 0; 1581 1582 failed: 1583 return err; 1584 } 1585 1586 static void nilfs_list_replace_buffer(struct buffer_head *old_bh, 1587 struct buffer_head *new_bh) 1588 { 1589 BUG_ON(!list_empty(&new_bh->b_assoc_buffers)); 1590 1591 list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers); 1592 /* The caller must release old_bh */ 1593 } 1594 1595 static int 1596 nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, 1597 struct nilfs_segment_buffer *segbuf, 1598 int mode) 1599 { 1600 struct inode *inode = NULL; 1601 sector_t blocknr; 1602 unsigned long nfinfo = segbuf->sb_sum.nfinfo; 1603 unsigned long nblocks = 0, ndatablk = 0; 1604 const struct nilfs_sc_operations *sc_op = NULL; 1605 struct nilfs_segsum_pointer ssp; 1606 struct nilfs_finfo *finfo = NULL; 1607 union nilfs_binfo binfo; 1608 struct buffer_head *bh, *bh_org; 1609 ino_t ino = 0; 1610 int err = 0; 1611 1612 if (!nfinfo) 1613 goto out; 1614 1615 blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk; 1616 ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); 1617 ssp.offset = sizeof(struct nilfs_segment_summary); 1618 1619 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 1620 if (bh == segbuf->sb_super_root) 1621 break; 1622 if (!finfo) { 1623 finfo = nilfs_segctor_map_segsum_entry( 1624 sci, &ssp, sizeof(*finfo)); 1625 ino = le64_to_cpu(finfo->fi_ino); 1626 nblocks = le32_to_cpu(finfo->fi_nblocks); 1627 ndatablk = le32_to_cpu(finfo->fi_ndatablk); 1628 1629 inode = bh->b_folio->mapping->host; 1630 1631 if (mode == SC_LSEG_DSYNC) 1632 sc_op = &nilfs_sc_dsync_ops; 1633 else if (ino == NILFS_DAT_INO) 1634 sc_op = &nilfs_sc_dat_ops; 1635 else /* file blocks */ 1636 sc_op = &nilfs_sc_file_ops; 1637 } 1638 bh_org = bh; 1639 get_bh(bh_org); 1640 err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr, 1641 &binfo); 1642 if (bh != bh_org) 1643 nilfs_list_replace_buffer(bh_org, bh); 1644 brelse(bh_org); 1645 if (unlikely(err)) 1646 goto failed_bmap; 1647 1648 if (ndatablk > 0) 1649 sc_op->write_data_binfo(sci, &ssp, &binfo); 1650 else 1651 sc_op->write_node_binfo(sci, &ssp, &binfo); 1652 1653 blocknr++; 1654 if (--nblocks == 0) { 1655 finfo = NULL; 1656 if (--nfinfo == 0) 1657 break; 1658 } else if (ndatablk > 0) 1659 ndatablk--; 1660 } 1661 out: 1662 return 0; 1663 1664 failed_bmap: 1665 return err; 1666 } 1667 1668 static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) 1669 { 1670 struct nilfs_segment_buffer *segbuf; 1671 int err; 1672 1673 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1674 err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode); 1675 if (unlikely(err)) 1676 return err; 1677 nilfs_segbuf_fill_in_segsum(segbuf); 1678 } 1679 return 0; 1680 } 1681 1682 static void nilfs_begin_folio_io(struct folio *folio) 1683 { 1684 if (!folio || folio_test_writeback(folio)) 1685 /* 1686 * For split b-tree node pages, this function may be called 1687 * twice. We ignore the 2nd or later calls by this check. 1688 */ 1689 return; 1690 1691 folio_lock(folio); 1692 folio_clear_dirty_for_io(folio); 1693 folio_start_writeback(folio); 1694 folio_unlock(folio); 1695 } 1696 1697 /** 1698 * nilfs_prepare_write_logs - prepare to write logs 1699 * @logs: logs to prepare for writing 1700 * @seed: checksum seed value 1701 * 1702 * nilfs_prepare_write_logs() adds checksums and prepares the block 1703 * buffers/folios for writing logs. In order to stabilize folios of 1704 * memory-mapped file blocks by putting them in writeback state before 1705 * calculating the checksums, first prepare to write payload blocks other 1706 * than segment summary and super root blocks in which the checksums will 1707 * be embedded. 1708 */ 1709 static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) 1710 { 1711 struct nilfs_segment_buffer *segbuf; 1712 struct folio *bd_folio = NULL, *fs_folio = NULL; 1713 struct buffer_head *bh; 1714 1715 /* Prepare to write payload blocks */ 1716 list_for_each_entry(segbuf, logs, sb_list) { 1717 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1718 b_assoc_buffers) { 1719 if (bh == segbuf->sb_super_root) 1720 break; 1721 set_buffer_async_write(bh); 1722 if (bh->b_folio != fs_folio) { 1723 nilfs_begin_folio_io(fs_folio); 1724 fs_folio = bh->b_folio; 1725 } 1726 } 1727 } 1728 nilfs_begin_folio_io(fs_folio); 1729 1730 nilfs_add_checksums_on_logs(logs, seed); 1731 1732 /* Prepare to write segment summary blocks */ 1733 list_for_each_entry(segbuf, logs, sb_list) { 1734 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1735 b_assoc_buffers) { 1736 mark_buffer_dirty(bh); 1737 if (bh->b_folio == bd_folio) 1738 continue; 1739 if (bd_folio) { 1740 folio_lock(bd_folio); 1741 folio_wait_writeback(bd_folio); 1742 folio_clear_dirty_for_io(bd_folio); 1743 folio_start_writeback(bd_folio); 1744 folio_unlock(bd_folio); 1745 } 1746 bd_folio = bh->b_folio; 1747 } 1748 } 1749 1750 /* Prepare to write super root block */ 1751 bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; 1752 if (bh) { 1753 mark_buffer_dirty(bh); 1754 if (bh->b_folio != bd_folio) { 1755 folio_lock(bd_folio); 1756 folio_wait_writeback(bd_folio); 1757 folio_clear_dirty_for_io(bd_folio); 1758 folio_start_writeback(bd_folio); 1759 folio_unlock(bd_folio); 1760 bd_folio = bh->b_folio; 1761 } 1762 } 1763 1764 if (bd_folio) { 1765 folio_lock(bd_folio); 1766 folio_wait_writeback(bd_folio); 1767 folio_clear_dirty_for_io(bd_folio); 1768 folio_start_writeback(bd_folio); 1769 folio_unlock(bd_folio); 1770 } 1771 } 1772 1773 static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1774 struct the_nilfs *nilfs) 1775 { 1776 int ret; 1777 1778 ret = nilfs_write_logs(&sci->sc_segbufs, nilfs); 1779 list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs); 1780 return ret; 1781 } 1782 1783 static void nilfs_end_folio_io(struct folio *folio, int err) 1784 { 1785 if (!folio) 1786 return; 1787 1788 if (buffer_nilfs_node(folio_buffers(folio)) && 1789 !folio_test_writeback(folio)) { 1790 /* 1791 * For b-tree node pages, this function may be called twice 1792 * or more because they might be split in a segment. 1793 */ 1794 if (folio_test_dirty(folio)) { 1795 /* 1796 * For pages holding split b-tree node buffers, dirty 1797 * flag on the buffers may be cleared discretely. 1798 * In that case, the page is once redirtied for 1799 * remaining buffers, and it must be cancelled if 1800 * all the buffers get cleaned later. 1801 */ 1802 folio_lock(folio); 1803 if (nilfs_folio_buffers_clean(folio)) 1804 __nilfs_clear_folio_dirty(folio); 1805 folio_unlock(folio); 1806 } 1807 return; 1808 } 1809 1810 if (err || !nilfs_folio_buffers_clean(folio)) 1811 filemap_dirty_folio(folio->mapping, folio); 1812 1813 folio_end_writeback(folio); 1814 } 1815 1816 static void nilfs_abort_logs(struct list_head *logs, int err) 1817 { 1818 struct nilfs_segment_buffer *segbuf; 1819 struct folio *bd_folio = NULL, *fs_folio = NULL; 1820 struct buffer_head *bh; 1821 1822 if (list_empty(logs)) 1823 return; 1824 1825 list_for_each_entry(segbuf, logs, sb_list) { 1826 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1827 b_assoc_buffers) { 1828 clear_buffer_uptodate(bh); 1829 if (bh->b_folio != bd_folio) { 1830 if (bd_folio) 1831 folio_end_writeback(bd_folio); 1832 bd_folio = bh->b_folio; 1833 } 1834 } 1835 1836 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1837 b_assoc_buffers) { 1838 if (bh == segbuf->sb_super_root) { 1839 clear_buffer_uptodate(bh); 1840 if (bh->b_folio != bd_folio) { 1841 folio_end_writeback(bd_folio); 1842 bd_folio = bh->b_folio; 1843 } 1844 break; 1845 } 1846 clear_buffer_async_write(bh); 1847 if (bh->b_folio != fs_folio) { 1848 nilfs_end_folio_io(fs_folio, err); 1849 fs_folio = bh->b_folio; 1850 } 1851 } 1852 } 1853 if (bd_folio) 1854 folio_end_writeback(bd_folio); 1855 1856 nilfs_end_folio_io(fs_folio, err); 1857 } 1858 1859 static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, 1860 struct the_nilfs *nilfs, int err) 1861 { 1862 LIST_HEAD(logs); 1863 int ret; 1864 1865 list_splice_tail_init(&sci->sc_write_logs, &logs); 1866 ret = nilfs_wait_on_logs(&logs); 1867 nilfs_abort_logs(&logs, ret ? : err); 1868 1869 list_splice_tail_init(&sci->sc_segbufs, &logs); 1870 if (list_empty(&logs)) 1871 return; /* if the first segment buffer preparation failed */ 1872 1873 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1874 nilfs_free_incomplete_logs(&logs, nilfs); 1875 1876 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1877 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1878 sci->sc_freesegs, 1879 sci->sc_nfreesegs, 1880 NULL); 1881 WARN_ON(ret); /* do not happen */ 1882 } 1883 1884 nilfs_destroy_logs(&logs); 1885 } 1886 1887 static void nilfs_set_next_segment(struct the_nilfs *nilfs, 1888 struct nilfs_segment_buffer *segbuf) 1889 { 1890 nilfs->ns_segnum = segbuf->sb_segnum; 1891 nilfs->ns_nextnum = segbuf->sb_nextnum; 1892 nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start 1893 + segbuf->sb_sum.nblocks; 1894 nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq; 1895 nilfs->ns_ctime = segbuf->sb_sum.ctime; 1896 } 1897 1898 static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) 1899 { 1900 struct nilfs_segment_buffer *segbuf; 1901 struct folio *bd_folio = NULL, *fs_folio = NULL; 1902 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 1903 int update_sr = false; 1904 1905 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { 1906 struct buffer_head *bh; 1907 1908 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1909 b_assoc_buffers) { 1910 set_buffer_uptodate(bh); 1911 clear_buffer_dirty(bh); 1912 if (bh->b_folio != bd_folio) { 1913 if (bd_folio) 1914 folio_end_writeback(bd_folio); 1915 bd_folio = bh->b_folio; 1916 } 1917 } 1918 /* 1919 * We assume that the buffers which belong to the same folio 1920 * continue over the buffer list. 1921 * Under this assumption, the last BHs of folios is 1922 * identifiable by the discontinuity of bh->b_folio 1923 * (folio != fs_folio). 1924 * 1925 * For B-tree node blocks, however, this assumption is not 1926 * guaranteed. The cleanup code of B-tree node folios needs 1927 * special care. 1928 */ 1929 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1930 b_assoc_buffers) { 1931 const unsigned long set_bits = BIT(BH_Uptodate); 1932 const unsigned long clear_bits = 1933 (BIT(BH_Dirty) | BIT(BH_Async_Write) | 1934 BIT(BH_Delay) | BIT(BH_NILFS_Volatile) | 1935 BIT(BH_NILFS_Redirected)); 1936 1937 if (bh == segbuf->sb_super_root) { 1938 set_buffer_uptodate(bh); 1939 clear_buffer_dirty(bh); 1940 if (bh->b_folio != bd_folio) { 1941 folio_end_writeback(bd_folio); 1942 bd_folio = bh->b_folio; 1943 } 1944 update_sr = true; 1945 break; 1946 } 1947 set_mask_bits(&bh->b_state, clear_bits, set_bits); 1948 if (bh->b_folio != fs_folio) { 1949 nilfs_end_folio_io(fs_folio, 0); 1950 fs_folio = bh->b_folio; 1951 } 1952 } 1953 1954 if (!nilfs_segbuf_simplex(segbuf)) { 1955 if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) { 1956 set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1957 sci->sc_lseg_stime = jiffies; 1958 } 1959 if (segbuf->sb_sum.flags & NILFS_SS_LOGEND) 1960 clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1961 } 1962 } 1963 /* 1964 * Since folios may continue over multiple segment buffers, 1965 * end of the last folio must be checked outside of the loop. 1966 */ 1967 if (bd_folio) 1968 folio_end_writeback(bd_folio); 1969 1970 nilfs_end_folio_io(fs_folio, 0); 1971 1972 nilfs_drop_collected_inodes(&sci->sc_dirty_files); 1973 1974 if (nilfs_doing_gc()) 1975 nilfs_drop_collected_inodes(&sci->sc_gc_inodes); 1976 else 1977 nilfs->ns_nongc_ctime = sci->sc_seg_ctime; 1978 1979 sci->sc_nblk_inc += sci->sc_nblk_this_inc; 1980 1981 segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs); 1982 nilfs_set_next_segment(nilfs, segbuf); 1983 1984 if (update_sr) { 1985 nilfs->ns_flushed_device = 0; 1986 nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, 1987 segbuf->sb_sum.seg_seq, nilfs->ns_cno++); 1988 1989 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 1990 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); 1991 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 1992 nilfs_segctor_clear_metadata_dirty(sci); 1993 } else 1994 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 1995 } 1996 1997 static int nilfs_segctor_wait(struct nilfs_sc_info *sci) 1998 { 1999 int ret; 2000 2001 ret = nilfs_wait_on_logs(&sci->sc_write_logs); 2002 if (!ret) { 2003 nilfs_segctor_complete_write(sci); 2004 nilfs_destroy_logs(&sci->sc_write_logs); 2005 } 2006 return ret; 2007 } 2008 2009 static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, 2010 struct the_nilfs *nilfs) 2011 { 2012 struct nilfs_inode_info *ii, *n; 2013 struct inode *ifile = sci->sc_root->ifile; 2014 2015 spin_lock(&nilfs->ns_inode_lock); 2016 retry: 2017 list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) { 2018 if (!ii->i_bh) { 2019 struct buffer_head *ibh; 2020 int err; 2021 2022 spin_unlock(&nilfs->ns_inode_lock); 2023 err = nilfs_ifile_get_inode_block( 2024 ifile, ii->vfs_inode.i_ino, &ibh); 2025 if (unlikely(err)) { 2026 nilfs_warn(sci->sc_super, 2027 "log writer: error %d getting inode block (ino=%lu)", 2028 err, ii->vfs_inode.i_ino); 2029 return err; 2030 } 2031 spin_lock(&nilfs->ns_inode_lock); 2032 if (likely(!ii->i_bh)) 2033 ii->i_bh = ibh; 2034 else 2035 brelse(ibh); 2036 goto retry; 2037 } 2038 2039 // Always redirty the buffer to avoid race condition 2040 mark_buffer_dirty(ii->i_bh); 2041 nilfs_mdt_mark_dirty(ifile); 2042 2043 clear_bit(NILFS_I_QUEUED, &ii->i_state); 2044 set_bit(NILFS_I_BUSY, &ii->i_state); 2045 list_move_tail(&ii->i_dirty, &sci->sc_dirty_files); 2046 } 2047 spin_unlock(&nilfs->ns_inode_lock); 2048 2049 return 0; 2050 } 2051 2052 static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, 2053 struct the_nilfs *nilfs) 2054 { 2055 struct nilfs_inode_info *ii, *n; 2056 int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE); 2057 int defer_iput = false; 2058 2059 spin_lock(&nilfs->ns_inode_lock); 2060 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { 2061 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || 2062 test_bit(NILFS_I_DIRTY, &ii->i_state)) 2063 continue; 2064 2065 clear_bit(NILFS_I_BUSY, &ii->i_state); 2066 brelse(ii->i_bh); 2067 ii->i_bh = NULL; 2068 list_del_init(&ii->i_dirty); 2069 if (!ii->vfs_inode.i_nlink || during_mount) { 2070 /* 2071 * Defer calling iput() to avoid deadlocks if 2072 * i_nlink == 0 or mount is not yet finished. 2073 */ 2074 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); 2075 defer_iput = true; 2076 } else { 2077 spin_unlock(&nilfs->ns_inode_lock); 2078 iput(&ii->vfs_inode); 2079 spin_lock(&nilfs->ns_inode_lock); 2080 } 2081 } 2082 spin_unlock(&nilfs->ns_inode_lock); 2083 2084 if (defer_iput) 2085 schedule_work(&sci->sc_iput_work); 2086 } 2087 2088 /* 2089 * Main procedure of segment constructor 2090 */ 2091 static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 2092 { 2093 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 2094 int err; 2095 2096 if (sb_rdonly(sci->sc_super)) 2097 return -EROFS; 2098 2099 nilfs_sc_cstage_set(sci, NILFS_ST_INIT); 2100 sci->sc_cno = nilfs->ns_cno; 2101 2102 err = nilfs_segctor_collect_dirty_files(sci, nilfs); 2103 if (unlikely(err)) 2104 goto out; 2105 2106 if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) 2107 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 2108 2109 if (nilfs_segctor_clean(sci)) 2110 goto out; 2111 2112 do { 2113 sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK; 2114 2115 err = nilfs_segctor_begin_construction(sci, nilfs); 2116 if (unlikely(err)) 2117 goto failed; 2118 2119 /* Update time stamp */ 2120 sci->sc_seg_ctime = ktime_get_real_seconds(); 2121 2122 err = nilfs_segctor_collect(sci, nilfs, mode); 2123 if (unlikely(err)) 2124 goto failed; 2125 2126 /* Avoid empty segment */ 2127 if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE && 2128 nilfs_segbuf_empty(sci->sc_curseg)) { 2129 nilfs_segctor_abort_construction(sci, nilfs, 1); 2130 goto out; 2131 } 2132 2133 err = nilfs_segctor_assign(sci, mode); 2134 if (unlikely(err)) 2135 goto failed; 2136 2137 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2138 nilfs_segctor_fill_in_file_bmap(sci); 2139 2140 if (mode == SC_LSEG_SR && 2141 nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) { 2142 err = nilfs_cpfile_finalize_checkpoint( 2143 nilfs->ns_cpfile, nilfs->ns_cno, sci->sc_root, 2144 sci->sc_nblk_inc + sci->sc_nblk_this_inc, 2145 sci->sc_seg_ctime, 2146 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)); 2147 if (unlikely(err)) 2148 goto failed_to_write; 2149 2150 nilfs_segctor_fill_in_super_root(sci, nilfs); 2151 } 2152 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); 2153 2154 /* Write partial segments */ 2155 nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); 2156 2157 err = nilfs_segctor_write(sci, nilfs); 2158 if (unlikely(err)) 2159 goto failed_to_write; 2160 2161 if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE || 2162 nilfs->ns_blocksize_bits != PAGE_SHIFT) { 2163 /* 2164 * At this point, we avoid double buffering 2165 * for blocksize < pagesize because page dirty 2166 * flag is turned off during write and dirty 2167 * buffers are not properly collected for 2168 * pages crossing over segments. 2169 */ 2170 err = nilfs_segctor_wait(sci); 2171 if (err) 2172 goto failed_to_write; 2173 } 2174 } while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE); 2175 2176 out: 2177 nilfs_segctor_drop_written_files(sci, nilfs); 2178 return err; 2179 2180 failed_to_write: 2181 failed: 2182 if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE) 2183 nilfs_redirty_inodes(&sci->sc_dirty_files); 2184 if (nilfs_doing_gc()) 2185 nilfs_redirty_inodes(&sci->sc_gc_inodes); 2186 nilfs_segctor_abort_construction(sci, nilfs, err); 2187 goto out; 2188 } 2189 2190 /** 2191 * nilfs_segctor_start_timer - set timer of background write 2192 * @sci: nilfs_sc_info 2193 * 2194 * If the timer has already been set, it ignores the new request. 2195 * This function MUST be called within a section locking the segment 2196 * semaphore. 2197 */ 2198 static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) 2199 { 2200 spin_lock(&sci->sc_state_lock); 2201 if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { 2202 if (sci->sc_task) { 2203 sci->sc_timer.expires = jiffies + sci->sc_interval; 2204 add_timer(&sci->sc_timer); 2205 } 2206 sci->sc_state |= NILFS_SEGCTOR_COMMIT; 2207 } 2208 spin_unlock(&sci->sc_state_lock); 2209 } 2210 2211 static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) 2212 { 2213 spin_lock(&sci->sc_state_lock); 2214 if (!(sci->sc_flush_request & BIT(bn))) { 2215 unsigned long prev_req = sci->sc_flush_request; 2216 2217 sci->sc_flush_request |= BIT(bn); 2218 if (!prev_req) 2219 wake_up(&sci->sc_wait_daemon); 2220 } 2221 spin_unlock(&sci->sc_state_lock); 2222 } 2223 2224 /** 2225 * nilfs_flush_segment - trigger a segment construction for resource control 2226 * @sb: super block 2227 * @ino: inode number of the file to be flushed out. 2228 */ 2229 void nilfs_flush_segment(struct super_block *sb, ino_t ino) 2230 { 2231 struct the_nilfs *nilfs = sb->s_fs_info; 2232 struct nilfs_sc_info *sci = nilfs->ns_writer; 2233 2234 if (!sci || nilfs_doing_construction()) 2235 return; 2236 nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); 2237 /* assign bit 0 to data files */ 2238 } 2239 2240 struct nilfs_segctor_wait_request { 2241 wait_queue_entry_t wq; 2242 __u32 seq; 2243 int err; 2244 atomic_t done; 2245 }; 2246 2247 static int nilfs_segctor_sync(struct nilfs_sc_info *sci) 2248 { 2249 struct nilfs_segctor_wait_request wait_req; 2250 int err = 0; 2251 2252 init_wait(&wait_req.wq); 2253 wait_req.err = 0; 2254 atomic_set(&wait_req.done, 0); 2255 init_waitqueue_entry(&wait_req.wq, current); 2256 2257 /* 2258 * To prevent a race issue where completion notifications from the 2259 * log writer thread are missed, increment the request sequence count 2260 * "sc_seq_request" and insert a wait queue entry using the current 2261 * sequence number into the "sc_wait_request" queue at the same time 2262 * within the lock section of "sc_state_lock". 2263 */ 2264 spin_lock(&sci->sc_state_lock); 2265 wait_req.seq = ++sci->sc_seq_request; 2266 add_wait_queue(&sci->sc_wait_request, &wait_req.wq); 2267 spin_unlock(&sci->sc_state_lock); 2268 2269 wake_up(&sci->sc_wait_daemon); 2270 2271 for (;;) { 2272 set_current_state(TASK_INTERRUPTIBLE); 2273 2274 /* 2275 * Synchronize only while the log writer thread is alive. 2276 * Leave flushing out after the log writer thread exits to 2277 * the cleanup work in nilfs_segctor_destroy(). 2278 */ 2279 if (!sci->sc_task) 2280 break; 2281 2282 if (atomic_read(&wait_req.done)) { 2283 err = wait_req.err; 2284 break; 2285 } 2286 if (!signal_pending(current)) { 2287 schedule(); 2288 continue; 2289 } 2290 err = -ERESTARTSYS; 2291 break; 2292 } 2293 finish_wait(&sci->sc_wait_request, &wait_req.wq); 2294 return err; 2295 } 2296 2297 static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force) 2298 { 2299 struct nilfs_segctor_wait_request *wrq, *n; 2300 unsigned long flags; 2301 2302 spin_lock_irqsave(&sci->sc_wait_request.lock, flags); 2303 list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) { 2304 if (!atomic_read(&wrq->done) && 2305 (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) { 2306 wrq->err = err; 2307 atomic_set(&wrq->done, 1); 2308 } 2309 if (atomic_read(&wrq->done)) { 2310 wrq->wq.func(&wrq->wq, 2311 TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 2312 0, NULL); 2313 } 2314 } 2315 spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags); 2316 } 2317 2318 /** 2319 * nilfs_construct_segment - construct a logical segment 2320 * @sb: super block 2321 * 2322 * Return: 0 on success, or one of the following negative error codes on 2323 * failure: 2324 * * %-EIO - I/O error (including metadata corruption). 2325 * * %-ENOMEM - Insufficient memory available. 2326 * * %-ENOSPC - No space left on device (only in a panic state). 2327 * * %-ERESTARTSYS - Interrupted. 2328 * * %-EROFS - Read only filesystem. 2329 */ 2330 int nilfs_construct_segment(struct super_block *sb) 2331 { 2332 struct the_nilfs *nilfs = sb->s_fs_info; 2333 struct nilfs_sc_info *sci = nilfs->ns_writer; 2334 struct nilfs_transaction_info *ti; 2335 2336 if (sb_rdonly(sb) || unlikely(!sci)) 2337 return -EROFS; 2338 2339 /* A call inside transactions causes a deadlock. */ 2340 BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); 2341 2342 return nilfs_segctor_sync(sci); 2343 } 2344 2345 /** 2346 * nilfs_construct_dsync_segment - construct a data-only logical segment 2347 * @sb: super block 2348 * @inode: inode whose data blocks should be written out 2349 * @start: start byte offset 2350 * @end: end byte offset (inclusive) 2351 * 2352 * Return: 0 on success, or one of the following negative error codes on 2353 * failure: 2354 * * %-EIO - I/O error (including metadata corruption). 2355 * * %-ENOMEM - Insufficient memory available. 2356 * * %-ENOSPC - No space left on device (only in a panic state). 2357 * * %-ERESTARTSYS - Interrupted. 2358 * * %-EROFS - Read only filesystem. 2359 */ 2360 int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, 2361 loff_t start, loff_t end) 2362 { 2363 struct the_nilfs *nilfs = sb->s_fs_info; 2364 struct nilfs_sc_info *sci = nilfs->ns_writer; 2365 struct nilfs_inode_info *ii; 2366 struct nilfs_transaction_info ti; 2367 int err = 0; 2368 2369 if (sb_rdonly(sb) || unlikely(!sci)) 2370 return -EROFS; 2371 2372 nilfs_transaction_lock(sb, &ti, 0); 2373 2374 ii = NILFS_I(inode); 2375 if (test_bit(NILFS_I_INODE_SYNC, &ii->i_state) || 2376 nilfs_test_opt(nilfs, STRICT_ORDER) || 2377 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2378 nilfs_discontinued(nilfs)) { 2379 nilfs_transaction_unlock(sb); 2380 err = nilfs_segctor_sync(sci); 2381 return err; 2382 } 2383 2384 spin_lock(&nilfs->ns_inode_lock); 2385 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 2386 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 2387 spin_unlock(&nilfs->ns_inode_lock); 2388 nilfs_transaction_unlock(sb); 2389 return 0; 2390 } 2391 spin_unlock(&nilfs->ns_inode_lock); 2392 sci->sc_dsync_inode = ii; 2393 sci->sc_dsync_start = start; 2394 sci->sc_dsync_end = end; 2395 2396 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); 2397 if (!err) 2398 nilfs->ns_flushed_device = 0; 2399 2400 nilfs_transaction_unlock(sb); 2401 return err; 2402 } 2403 2404 #define FLUSH_FILE_BIT (0x1) /* data file only */ 2405 #define FLUSH_DAT_BIT BIT(NILFS_DAT_INO) /* DAT only */ 2406 2407 /** 2408 * nilfs_segctor_accept - record accepted sequence count of log-write requests 2409 * @sci: segment constructor object 2410 */ 2411 static void nilfs_segctor_accept(struct nilfs_sc_info *sci) 2412 { 2413 bool thread_is_alive; 2414 2415 spin_lock(&sci->sc_state_lock); 2416 sci->sc_seq_accepted = sci->sc_seq_request; 2417 thread_is_alive = (bool)sci->sc_task; 2418 spin_unlock(&sci->sc_state_lock); 2419 2420 /* 2421 * This function does not race with the log writer thread's 2422 * termination. Therefore, deleting sc_timer, which should not be 2423 * done after the log writer thread exits, can be done safely outside 2424 * the area protected by sc_state_lock. 2425 */ 2426 if (thread_is_alive) 2427 del_timer_sync(&sci->sc_timer); 2428 } 2429 2430 /** 2431 * nilfs_segctor_notify - notify the result of request to caller threads 2432 * @sci: segment constructor object 2433 * @mode: mode of log forming 2434 * @err: error code to be notified 2435 */ 2436 static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) 2437 { 2438 /* Clear requests (even when the construction failed) */ 2439 spin_lock(&sci->sc_state_lock); 2440 2441 if (mode == SC_LSEG_SR) { 2442 sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; 2443 sci->sc_seq_done = sci->sc_seq_accepted; 2444 nilfs_segctor_wakeup(sci, err, false); 2445 sci->sc_flush_request = 0; 2446 } else { 2447 if (mode == SC_FLUSH_FILE) 2448 sci->sc_flush_request &= ~FLUSH_FILE_BIT; 2449 else if (mode == SC_FLUSH_DAT) 2450 sci->sc_flush_request &= ~FLUSH_DAT_BIT; 2451 2452 /* re-enable timer if checkpoint creation was not done */ 2453 if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task && 2454 time_before(jiffies, sci->sc_timer.expires)) 2455 add_timer(&sci->sc_timer); 2456 } 2457 spin_unlock(&sci->sc_state_lock); 2458 } 2459 2460 /** 2461 * nilfs_segctor_construct - form logs and write them to disk 2462 * @sci: segment constructor object 2463 * @mode: mode of log forming 2464 * 2465 * Return: 0 on success, or a negative error code on failure. 2466 */ 2467 static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) 2468 { 2469 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 2470 struct nilfs_super_block **sbp; 2471 int err = 0; 2472 2473 nilfs_segctor_accept(sci); 2474 2475 if (nilfs_discontinued(nilfs)) 2476 mode = SC_LSEG_SR; 2477 if (!nilfs_segctor_confirm(sci)) 2478 err = nilfs_segctor_do_construct(sci, mode); 2479 2480 if (likely(!err)) { 2481 if (mode != SC_FLUSH_DAT) 2482 atomic_set(&nilfs->ns_ndirtyblks, 0); 2483 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && 2484 nilfs_discontinued(nilfs)) { 2485 down_write(&nilfs->ns_sem); 2486 err = -EIO; 2487 sbp = nilfs_prepare_super(sci->sc_super, 2488 nilfs_sb_will_flip(nilfs)); 2489 if (likely(sbp)) { 2490 nilfs_set_log_cursor(sbp[0], nilfs); 2491 err = nilfs_commit_super(sci->sc_super, 2492 NILFS_SB_COMMIT); 2493 } 2494 up_write(&nilfs->ns_sem); 2495 } 2496 } 2497 2498 nilfs_segctor_notify(sci, mode, err); 2499 return err; 2500 } 2501 2502 static void nilfs_construction_timeout(struct timer_list *t) 2503 { 2504 struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer); 2505 2506 wake_up_process(sci->sc_task); 2507 } 2508 2509 static void 2510 nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) 2511 { 2512 struct nilfs_inode_info *ii, *n; 2513 2514 list_for_each_entry_safe(ii, n, head, i_dirty) { 2515 if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) 2516 continue; 2517 list_del_init(&ii->i_dirty); 2518 truncate_inode_pages(&ii->vfs_inode.i_data, 0); 2519 nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); 2520 iput(&ii->vfs_inode); 2521 } 2522 } 2523 2524 int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, 2525 void **kbufs) 2526 { 2527 struct the_nilfs *nilfs = sb->s_fs_info; 2528 struct nilfs_sc_info *sci = nilfs->ns_writer; 2529 struct nilfs_transaction_info ti; 2530 int err; 2531 2532 if (unlikely(!sci)) 2533 return -EROFS; 2534 2535 nilfs_transaction_lock(sb, &ti, 1); 2536 2537 err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); 2538 if (unlikely(err)) 2539 goto out_unlock; 2540 2541 err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); 2542 if (unlikely(err)) { 2543 nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat); 2544 goto out_unlock; 2545 } 2546 2547 sci->sc_freesegs = kbufs[4]; 2548 sci->sc_nfreesegs = argv[4].v_nmembs; 2549 list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes); 2550 2551 for (;;) { 2552 err = nilfs_segctor_construct(sci, SC_LSEG_SR); 2553 nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes); 2554 2555 if (likely(!err)) 2556 break; 2557 2558 nilfs_warn(sb, "error %d cleaning segments", err); 2559 set_current_state(TASK_INTERRUPTIBLE); 2560 schedule_timeout(sci->sc_interval); 2561 } 2562 if (nilfs_test_opt(nilfs, DISCARD)) { 2563 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, 2564 sci->sc_nfreesegs); 2565 if (ret) { 2566 nilfs_warn(sb, 2567 "error %d on discard request, turning discards off for the device", 2568 ret); 2569 nilfs_clear_opt(nilfs, DISCARD); 2570 } 2571 } 2572 2573 out_unlock: 2574 sci->sc_freesegs = NULL; 2575 sci->sc_nfreesegs = 0; 2576 nilfs_mdt_clear_shadow_map(nilfs->ns_dat); 2577 nilfs_transaction_unlock(sb); 2578 return err; 2579 } 2580 2581 static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) 2582 { 2583 struct nilfs_transaction_info ti; 2584 2585 nilfs_transaction_lock(sci->sc_super, &ti, 0); 2586 nilfs_segctor_construct(sci, mode); 2587 2588 /* 2589 * Unclosed segment should be retried. We do this using sc_timer. 2590 * Timeout of sc_timer will invoke complete construction which leads 2591 * to close the current logical segment. 2592 */ 2593 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) 2594 nilfs_segctor_start_timer(sci); 2595 2596 nilfs_transaction_unlock(sci->sc_super); 2597 } 2598 2599 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) 2600 { 2601 int mode = 0; 2602 2603 spin_lock(&sci->sc_state_lock); 2604 mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? 2605 SC_FLUSH_DAT : SC_FLUSH_FILE; 2606 spin_unlock(&sci->sc_state_lock); 2607 2608 if (mode) { 2609 nilfs_segctor_do_construct(sci, mode); 2610 2611 spin_lock(&sci->sc_state_lock); 2612 sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? 2613 ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT; 2614 spin_unlock(&sci->sc_state_lock); 2615 } 2616 clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); 2617 } 2618 2619 static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) 2620 { 2621 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2622 time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) { 2623 if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT)) 2624 return SC_FLUSH_FILE; 2625 else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT)) 2626 return SC_FLUSH_DAT; 2627 } 2628 return SC_LSEG_SR; 2629 } 2630 2631 /** 2632 * nilfs_log_write_required - determine whether log writing is required 2633 * @sci: nilfs_sc_info struct 2634 * @modep: location for storing log writing mode 2635 * 2636 * Return: true if log writing is required, false otherwise. If log writing 2637 * is required, the mode is stored in the location pointed to by @modep. 2638 */ 2639 static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep) 2640 { 2641 bool timedout, ret = true; 2642 2643 spin_lock(&sci->sc_state_lock); 2644 timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2645 time_after_eq(jiffies, sci->sc_timer.expires)); 2646 if (timedout || sci->sc_seq_request != sci->sc_seq_done) 2647 *modep = SC_LSEG_SR; 2648 else if (sci->sc_flush_request) 2649 *modep = nilfs_segctor_flush_mode(sci); 2650 else 2651 ret = false; 2652 2653 spin_unlock(&sci->sc_state_lock); 2654 return ret; 2655 } 2656 2657 /** 2658 * nilfs_segctor_thread - main loop of the log writer thread 2659 * @arg: pointer to a struct nilfs_sc_info. 2660 * 2661 * nilfs_segctor_thread() is the main loop function of the log writer kernel 2662 * thread, which determines whether log writing is necessary, and if so, 2663 * performs the log write in the background, or waits if not. It is also 2664 * used to decide the background writeback of the superblock. 2665 * 2666 * Return: Always 0. 2667 */ 2668 static int nilfs_segctor_thread(void *arg) 2669 { 2670 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; 2671 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 2672 2673 nilfs_info(sci->sc_super, 2674 "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", 2675 sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); 2676 2677 set_freezable(); 2678 2679 while (!kthread_should_stop()) { 2680 DEFINE_WAIT(wait); 2681 bool should_write; 2682 int mode; 2683 2684 if (freezing(current)) { 2685 try_to_freeze(); 2686 continue; 2687 } 2688 2689 prepare_to_wait(&sci->sc_wait_daemon, &wait, 2690 TASK_INTERRUPTIBLE); 2691 should_write = nilfs_log_write_required(sci, &mode); 2692 if (!should_write) 2693 schedule(); 2694 finish_wait(&sci->sc_wait_daemon, &wait); 2695 2696 if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) 2697 set_nilfs_discontinued(nilfs); 2698 2699 if (should_write) 2700 nilfs_segctor_thread_construct(sci, mode); 2701 } 2702 2703 /* end sync. */ 2704 spin_lock(&sci->sc_state_lock); 2705 sci->sc_task = NULL; 2706 timer_shutdown_sync(&sci->sc_timer); 2707 spin_unlock(&sci->sc_state_lock); 2708 return 0; 2709 } 2710 2711 /* 2712 * Setup & clean-up functions 2713 */ 2714 static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, 2715 struct nilfs_root *root) 2716 { 2717 struct the_nilfs *nilfs = sb->s_fs_info; 2718 struct nilfs_sc_info *sci; 2719 2720 sci = kzalloc(sizeof(*sci), GFP_KERNEL); 2721 if (!sci) 2722 return NULL; 2723 2724 sci->sc_super = sb; 2725 2726 nilfs_get_root(root); 2727 sci->sc_root = root; 2728 2729 init_waitqueue_head(&sci->sc_wait_request); 2730 init_waitqueue_head(&sci->sc_wait_daemon); 2731 spin_lock_init(&sci->sc_state_lock); 2732 INIT_LIST_HEAD(&sci->sc_dirty_files); 2733 INIT_LIST_HEAD(&sci->sc_segbufs); 2734 INIT_LIST_HEAD(&sci->sc_write_logs); 2735 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2736 INIT_LIST_HEAD(&sci->sc_iput_queue); 2737 INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); 2738 2739 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2740 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; 2741 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2742 2743 if (nilfs->ns_interval) 2744 sci->sc_interval = HZ * nilfs->ns_interval; 2745 if (nilfs->ns_watermark) 2746 sci->sc_watermark = nilfs->ns_watermark; 2747 return sci; 2748 } 2749 2750 static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) 2751 { 2752 int ret, retrycount = NILFS_SC_CLEANUP_RETRY; 2753 2754 /* 2755 * The segctord thread was stopped and its timer was removed. 2756 * But some tasks remain. 2757 */ 2758 do { 2759 struct nilfs_transaction_info ti; 2760 2761 nilfs_transaction_lock(sci->sc_super, &ti, 0); 2762 ret = nilfs_segctor_construct(sci, SC_LSEG_SR); 2763 nilfs_transaction_unlock(sci->sc_super); 2764 2765 flush_work(&sci->sc_iput_work); 2766 2767 } while (ret && ret != -EROFS && retrycount-- > 0); 2768 } 2769 2770 /** 2771 * nilfs_segctor_destroy - destroy the segment constructor. 2772 * @sci: nilfs_sc_info 2773 * 2774 * nilfs_segctor_destroy() kills the segctord thread and frees 2775 * the nilfs_sc_info struct. 2776 * Caller must hold the segment semaphore. 2777 */ 2778 static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) 2779 { 2780 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 2781 int flag; 2782 2783 up_write(&nilfs->ns_segctor_sem); 2784 2785 if (sci->sc_task) { 2786 wake_up(&sci->sc_wait_daemon); 2787 kthread_stop(sci->sc_task); 2788 } 2789 2790 spin_lock(&sci->sc_state_lock); 2791 flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request 2792 || sci->sc_seq_request != sci->sc_seq_done); 2793 spin_unlock(&sci->sc_state_lock); 2794 2795 /* 2796 * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can 2797 * be called from delayed iput() via nilfs_evict_inode() and can race 2798 * with the above log writer thread termination. 2799 */ 2800 nilfs_segctor_wakeup(sci, 0, true); 2801 2802 if (flush_work(&sci->sc_iput_work)) 2803 flag = true; 2804 2805 if (flag || !nilfs_segctor_confirm(sci)) 2806 nilfs_segctor_write_out(sci); 2807 2808 if (!list_empty(&sci->sc_dirty_files)) { 2809 nilfs_warn(sci->sc_super, 2810 "disposed unprocessed dirty file(s) when stopping log writer"); 2811 nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); 2812 } 2813 2814 if (!list_empty(&sci->sc_iput_queue)) { 2815 nilfs_warn(sci->sc_super, 2816 "disposed unprocessed inode(s) in iput queue when stopping log writer"); 2817 nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1); 2818 } 2819 2820 WARN_ON(!list_empty(&sci->sc_segbufs)); 2821 WARN_ON(!list_empty(&sci->sc_write_logs)); 2822 2823 nilfs_put_root(sci->sc_root); 2824 2825 down_write(&nilfs->ns_segctor_sem); 2826 2827 kfree(sci); 2828 } 2829 2830 /** 2831 * nilfs_attach_log_writer - attach log writer 2832 * @sb: super block instance 2833 * @root: root object of the current filesystem tree 2834 * 2835 * This allocates a log writer object, initializes it, and starts the 2836 * log writer. 2837 * 2838 * Return: 0 on success, or one of the following negative error codes on 2839 * failure: 2840 * * %-EINTR - Log writer thread creation failed due to interruption. 2841 * * %-ENOMEM - Insufficient memory available. 2842 */ 2843 int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) 2844 { 2845 struct the_nilfs *nilfs = sb->s_fs_info; 2846 struct nilfs_sc_info *sci; 2847 struct task_struct *t; 2848 int err; 2849 2850 if (nilfs->ns_writer) { 2851 /* 2852 * This happens if the filesystem is made read-only by 2853 * __nilfs_error or nilfs_remount and then remounted 2854 * read/write. In these cases, reuse the existing 2855 * writer. 2856 */ 2857 return 0; 2858 } 2859 2860 sci = nilfs_segctor_new(sb, root); 2861 if (unlikely(!sci)) 2862 return -ENOMEM; 2863 2864 nilfs->ns_writer = sci; 2865 t = kthread_create(nilfs_segctor_thread, sci, "segctord"); 2866 if (IS_ERR(t)) { 2867 err = PTR_ERR(t); 2868 nilfs_err(sb, "error %d creating segctord thread", err); 2869 nilfs_detach_log_writer(sb); 2870 return err; 2871 } 2872 sci->sc_task = t; 2873 timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); 2874 2875 wake_up_process(sci->sc_task); 2876 return 0; 2877 } 2878 2879 /** 2880 * nilfs_detach_log_writer - destroy log writer 2881 * @sb: super block instance 2882 * 2883 * This kills log writer daemon, frees the log writer object, and 2884 * destroys list of dirty files. 2885 */ 2886 void nilfs_detach_log_writer(struct super_block *sb) 2887 { 2888 struct the_nilfs *nilfs = sb->s_fs_info; 2889 LIST_HEAD(garbage_list); 2890 2891 down_write(&nilfs->ns_segctor_sem); 2892 if (nilfs->ns_writer) { 2893 nilfs_segctor_destroy(nilfs->ns_writer); 2894 nilfs->ns_writer = NULL; 2895 } 2896 set_nilfs_purging(nilfs); 2897 2898 /* Force to free the list of dirty files */ 2899 spin_lock(&nilfs->ns_inode_lock); 2900 if (!list_empty(&nilfs->ns_dirty_files)) { 2901 list_splice_init(&nilfs->ns_dirty_files, &garbage_list); 2902 nilfs_warn(sb, 2903 "disposed unprocessed dirty file(s) when detaching log writer"); 2904 } 2905 spin_unlock(&nilfs->ns_inode_lock); 2906 up_write(&nilfs->ns_segctor_sem); 2907 2908 nilfs_dispose_list(nilfs, &garbage_list, 1); 2909 clear_nilfs_purging(nilfs); 2910 } 2911