1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * NILFS inode operations.
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * Written by Ryusuke Konishi.
8 *
9 */
10
11 #include <linux/buffer_head.h>
12 #include <linux/gfp.h>
13 #include <linux/mpage.h>
14 #include <linux/pagemap.h>
15 #include <linux/writeback.h>
16 #include <linux/uio.h>
17 #include <linux/fiemap.h>
18 #include <linux/random.h>
19 #include "nilfs.h"
20 #include "btnode.h"
21 #include "segment.h"
22 #include "page.h"
23 #include "mdt.h"
24 #include "cpfile.h"
25 #include "ifile.h"
26
27 /**
28 * struct nilfs_iget_args - arguments used during comparison between inodes
29 * @ino: inode number
30 * @cno: checkpoint number
31 * @root: pointer on NILFS root object (mounted checkpoint)
32 * @type: inode type
33 */
34 struct nilfs_iget_args {
35 u64 ino;
36 __u64 cno;
37 struct nilfs_root *root;
38 unsigned int type;
39 };
40
41 static int nilfs_iget_test(struct inode *inode, void *opaque);
42
nilfs_inode_add_blocks(struct inode * inode,int n)43 void nilfs_inode_add_blocks(struct inode *inode, int n)
44 {
45 struct nilfs_root *root = NILFS_I(inode)->i_root;
46
47 inode_add_bytes(inode, i_blocksize(inode) * n);
48 if (root)
49 atomic64_add(n, &root->blocks_count);
50 }
51
nilfs_inode_sub_blocks(struct inode * inode,int n)52 void nilfs_inode_sub_blocks(struct inode *inode, int n)
53 {
54 struct nilfs_root *root = NILFS_I(inode)->i_root;
55
56 inode_sub_bytes(inode, i_blocksize(inode) * n);
57 if (root)
58 atomic64_sub(n, &root->blocks_count);
59 }
60
61 /**
62 * nilfs_get_block() - get a file block on the filesystem (callback function)
63 * @inode: inode struct of the target file
64 * @blkoff: file block number
65 * @bh_result: buffer head to be mapped on
66 * @create: indicate whether allocating the block or not when it has not
67 * been allocated yet.
68 *
69 * This function does not issue actual read request of the specified data
70 * block. It is done by VFS.
71 */
nilfs_get_block(struct inode * inode,sector_t blkoff,struct buffer_head * bh_result,int create)72 int nilfs_get_block(struct inode *inode, sector_t blkoff,
73 struct buffer_head *bh_result, int create)
74 {
75 struct nilfs_inode_info *ii = NILFS_I(inode);
76 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
77 __u64 blknum = 0;
78 int err = 0, ret;
79 unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits;
80
81 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
82 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
83 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
84 if (ret >= 0) { /* found */
85 map_bh(bh_result, inode->i_sb, blknum);
86 if (ret > 0)
87 bh_result->b_size = (ret << inode->i_blkbits);
88 goto out;
89 }
90 /* data block was not found */
91 if (ret == -ENOENT && create) {
92 struct nilfs_transaction_info ti;
93
94 bh_result->b_blocknr = 0;
95 err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
96 if (unlikely(err))
97 goto out;
98 err = nilfs_bmap_insert(ii->i_bmap, blkoff,
99 (unsigned long)bh_result);
100 if (unlikely(err != 0)) {
101 if (err == -EEXIST) {
102 /*
103 * The get_block() function could be called
104 * from multiple callers for an inode.
105 * However, the page having this block must
106 * be locked in this case.
107 */
108 nilfs_warn(inode->i_sb,
109 "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
110 __func__, inode->i_ino,
111 (unsigned long long)blkoff);
112 err = -EAGAIN;
113 }
114 nilfs_transaction_abort(inode->i_sb);
115 goto out;
116 }
117 nilfs_mark_inode_dirty_sync(inode);
118 nilfs_transaction_commit(inode->i_sb); /* never fails */
119 /* Error handling should be detailed */
120 set_buffer_new(bh_result);
121 set_buffer_delay(bh_result);
122 map_bh(bh_result, inode->i_sb, 0);
123 /* Disk block number must be changed to proper value */
124
125 } else if (ret == -ENOENT) {
126 /*
127 * not found is not error (e.g. hole); must return without
128 * the mapped state flag.
129 */
130 ;
131 } else {
132 err = ret;
133 }
134
135 out:
136 return err;
137 }
138
139 /**
140 * nilfs_read_folio() - implement read_folio() method of nilfs_aops {}
141 * address_space_operations.
142 * @file: file struct of the file to be read
143 * @folio: the folio to be read
144 */
nilfs_read_folio(struct file * file,struct folio * folio)145 static int nilfs_read_folio(struct file *file, struct folio *folio)
146 {
147 return mpage_read_folio(folio, nilfs_get_block);
148 }
149
nilfs_readahead(struct readahead_control * rac)150 static void nilfs_readahead(struct readahead_control *rac)
151 {
152 mpage_readahead(rac, nilfs_get_block);
153 }
154
nilfs_writepages(struct address_space * mapping,struct writeback_control * wbc)155 static int nilfs_writepages(struct address_space *mapping,
156 struct writeback_control *wbc)
157 {
158 struct inode *inode = mapping->host;
159 int err = 0;
160
161 if (sb_rdonly(inode->i_sb)) {
162 nilfs_clear_dirty_pages(mapping);
163 return -EROFS;
164 }
165
166 if (wbc->sync_mode == WB_SYNC_ALL)
167 err = nilfs_construct_dsync_segment(inode->i_sb, inode,
168 wbc->range_start,
169 wbc->range_end);
170 return err;
171 }
172
nilfs_writepage(struct page * page,struct writeback_control * wbc)173 static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
174 {
175 struct folio *folio = page_folio(page);
176 struct inode *inode = folio->mapping->host;
177 int err;
178
179 if (sb_rdonly(inode->i_sb)) {
180 /*
181 * It means that filesystem was remounted in read-only
182 * mode because of error or metadata corruption. But we
183 * have dirty pages that try to be flushed in background.
184 * So, here we simply discard this dirty page.
185 */
186 nilfs_clear_folio_dirty(folio);
187 folio_unlock(folio);
188 return -EROFS;
189 }
190
191 folio_redirty_for_writepage(wbc, folio);
192 folio_unlock(folio);
193
194 if (wbc->sync_mode == WB_SYNC_ALL) {
195 err = nilfs_construct_segment(inode->i_sb);
196 if (unlikely(err))
197 return err;
198 } else if (wbc->for_reclaim)
199 nilfs_flush_segment(inode->i_sb, inode->i_ino);
200
201 return 0;
202 }
203
nilfs_dirty_folio(struct address_space * mapping,struct folio * folio)204 static bool nilfs_dirty_folio(struct address_space *mapping,
205 struct folio *folio)
206 {
207 struct inode *inode = mapping->host;
208 struct buffer_head *head;
209 unsigned int nr_dirty = 0;
210 bool ret = filemap_dirty_folio(mapping, folio);
211
212 /*
213 * The page may not be locked, eg if called from try_to_unmap_one()
214 */
215 spin_lock(&mapping->i_private_lock);
216 head = folio_buffers(folio);
217 if (head) {
218 struct buffer_head *bh = head;
219
220 do {
221 /* Do not mark hole blocks dirty */
222 if (buffer_dirty(bh) || !buffer_mapped(bh))
223 continue;
224
225 set_buffer_dirty(bh);
226 nr_dirty++;
227 } while (bh = bh->b_this_page, bh != head);
228 } else if (ret) {
229 nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
230 }
231 spin_unlock(&mapping->i_private_lock);
232
233 if (nr_dirty)
234 nilfs_set_file_dirty(inode, nr_dirty);
235 return ret;
236 }
237
nilfs_write_failed(struct address_space * mapping,loff_t to)238 void nilfs_write_failed(struct address_space *mapping, loff_t to)
239 {
240 struct inode *inode = mapping->host;
241
242 if (to > inode->i_size) {
243 truncate_pagecache(inode, inode->i_size);
244 nilfs_truncate(inode);
245 }
246 }
247
nilfs_write_begin(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,struct folio ** foliop,void ** fsdata)248 static int nilfs_write_begin(struct file *file, struct address_space *mapping,
249 loff_t pos, unsigned len,
250 struct folio **foliop, void **fsdata)
251
252 {
253 struct inode *inode = mapping->host;
254 int err = nilfs_transaction_begin(inode->i_sb, NULL, 1);
255
256 if (unlikely(err))
257 return err;
258
259 err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block);
260 if (unlikely(err)) {
261 nilfs_write_failed(mapping, pos + len);
262 nilfs_transaction_abort(inode->i_sb);
263 }
264 return err;
265 }
266
nilfs_write_end(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct folio * folio,void * fsdata)267 static int nilfs_write_end(struct file *file, struct address_space *mapping,
268 loff_t pos, unsigned len, unsigned copied,
269 struct folio *folio, void *fsdata)
270 {
271 struct inode *inode = mapping->host;
272 unsigned int start = pos & (PAGE_SIZE - 1);
273 unsigned int nr_dirty;
274 int err;
275
276 nr_dirty = nilfs_page_count_clean_buffers(&folio->page, start,
277 start + copied);
278 copied = generic_write_end(file, mapping, pos, len, copied, folio,
279 fsdata);
280 nilfs_set_file_dirty(inode, nr_dirty);
281 err = nilfs_transaction_commit(inode->i_sb);
282 return err ? : copied;
283 }
284
285 static ssize_t
nilfs_direct_IO(struct kiocb * iocb,struct iov_iter * iter)286 nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
287 {
288 struct inode *inode = file_inode(iocb->ki_filp);
289
290 if (iov_iter_rw(iter) == WRITE)
291 return 0;
292
293 /* Needs synchronization with the cleaner */
294 return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block);
295 }
296
297 const struct address_space_operations nilfs_aops = {
298 .writepage = nilfs_writepage,
299 .read_folio = nilfs_read_folio,
300 .writepages = nilfs_writepages,
301 .dirty_folio = nilfs_dirty_folio,
302 .readahead = nilfs_readahead,
303 .write_begin = nilfs_write_begin,
304 .write_end = nilfs_write_end,
305 .invalidate_folio = block_invalidate_folio,
306 .direct_IO = nilfs_direct_IO,
307 .is_partially_uptodate = block_is_partially_uptodate,
308 };
309
nilfs_insert_inode_locked(struct inode * inode,struct nilfs_root * root,unsigned long ino)310 static int nilfs_insert_inode_locked(struct inode *inode,
311 struct nilfs_root *root,
312 unsigned long ino)
313 {
314 struct nilfs_iget_args args = {
315 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
316 };
317
318 return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
319 }
320
nilfs_new_inode(struct inode * dir,umode_t mode)321 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
322 {
323 struct super_block *sb = dir->i_sb;
324 struct inode *inode;
325 struct nilfs_inode_info *ii;
326 struct nilfs_root *root;
327 struct buffer_head *bh;
328 int err = -ENOMEM;
329 ino_t ino;
330
331 inode = new_inode(sb);
332 if (unlikely(!inode))
333 goto failed;
334
335 mapping_set_gfp_mask(inode->i_mapping,
336 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
337
338 root = NILFS_I(dir)->i_root;
339 ii = NILFS_I(inode);
340 ii->i_state = BIT(NILFS_I_NEW);
341 ii->i_type = NILFS_I_TYPE_NORMAL;
342 ii->i_root = root;
343
344 err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
345 if (unlikely(err))
346 goto failed_ifile_create_inode;
347 /* reference count of i_bh inherits from nilfs_mdt_read_block() */
348 ii->i_bh = bh;
349
350 atomic64_inc(&root->inodes_count);
351 inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
352 inode->i_ino = ino;
353 simple_inode_init_ts(inode);
354
355 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
356 err = nilfs_bmap_read(ii->i_bmap, NULL);
357 if (err < 0)
358 goto failed_after_creation;
359
360 set_bit(NILFS_I_BMAP, &ii->i_state);
361 /* No lock is needed; iget() ensures it. */
362 }
363
364 ii->i_flags = nilfs_mask_flags(
365 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED);
366
367 /* ii->i_file_acl = 0; */
368 /* ii->i_dir_acl = 0; */
369 ii->i_dir_start_lookup = 0;
370 nilfs_set_inode_flags(inode);
371 inode->i_generation = get_random_u32();
372 if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
373 err = -EIO;
374 goto failed_after_creation;
375 }
376
377 err = nilfs_init_acl(inode, dir);
378 if (unlikely(err))
379 /*
380 * Never occur. When supporting nilfs_init_acl(),
381 * proper cancellation of above jobs should be considered.
382 */
383 goto failed_after_creation;
384
385 return inode;
386
387 failed_after_creation:
388 clear_nlink(inode);
389 if (inode->i_state & I_NEW)
390 unlock_new_inode(inode);
391 iput(inode); /*
392 * raw_inode will be deleted through
393 * nilfs_evict_inode().
394 */
395 goto failed;
396
397 failed_ifile_create_inode:
398 make_bad_inode(inode);
399 iput(inode);
400 failed:
401 return ERR_PTR(err);
402 }
403
nilfs_set_inode_flags(struct inode * inode)404 void nilfs_set_inode_flags(struct inode *inode)
405 {
406 unsigned int flags = NILFS_I(inode)->i_flags;
407 unsigned int new_fl = 0;
408
409 if (flags & FS_SYNC_FL)
410 new_fl |= S_SYNC;
411 if (flags & FS_APPEND_FL)
412 new_fl |= S_APPEND;
413 if (flags & FS_IMMUTABLE_FL)
414 new_fl |= S_IMMUTABLE;
415 if (flags & FS_NOATIME_FL)
416 new_fl |= S_NOATIME;
417 if (flags & FS_DIRSYNC_FL)
418 new_fl |= S_DIRSYNC;
419 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE |
420 S_NOATIME | S_DIRSYNC);
421 }
422
nilfs_read_inode_common(struct inode * inode,struct nilfs_inode * raw_inode)423 int nilfs_read_inode_common(struct inode *inode,
424 struct nilfs_inode *raw_inode)
425 {
426 struct nilfs_inode_info *ii = NILFS_I(inode);
427 int err;
428
429 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
430 i_uid_write(inode, le32_to_cpu(raw_inode->i_uid));
431 i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
432 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
433 inode->i_size = le64_to_cpu(raw_inode->i_size);
434 inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime),
435 le32_to_cpu(raw_inode->i_mtime_nsec));
436 inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
437 le32_to_cpu(raw_inode->i_ctime_nsec));
438 inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime),
439 le32_to_cpu(raw_inode->i_mtime_nsec));
440 if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
441 return -EIO; /* this inode is for metadata and corrupted */
442 if (inode->i_nlink == 0)
443 return -ESTALE; /* this inode is deleted */
444
445 inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
446 ii->i_flags = le32_to_cpu(raw_inode->i_flags);
447 #if 0
448 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
449 ii->i_dir_acl = S_ISREG(inode->i_mode) ?
450 0 : le32_to_cpu(raw_inode->i_dir_acl);
451 #endif
452 ii->i_dir_start_lookup = 0;
453 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
454
455 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
456 S_ISLNK(inode->i_mode)) {
457 err = nilfs_bmap_read(ii->i_bmap, raw_inode);
458 if (err < 0)
459 return err;
460 set_bit(NILFS_I_BMAP, &ii->i_state);
461 /* No lock is needed; iget() ensures it. */
462 }
463 return 0;
464 }
465
__nilfs_read_inode(struct super_block * sb,struct nilfs_root * root,unsigned long ino,struct inode * inode)466 static int __nilfs_read_inode(struct super_block *sb,
467 struct nilfs_root *root, unsigned long ino,
468 struct inode *inode)
469 {
470 struct the_nilfs *nilfs = sb->s_fs_info;
471 struct buffer_head *bh;
472 struct nilfs_inode *raw_inode;
473 int err;
474
475 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
476 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh);
477 if (unlikely(err))
478 goto bad_inode;
479
480 raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh);
481
482 err = nilfs_read_inode_common(inode, raw_inode);
483 if (err)
484 goto failed_unmap;
485
486 if (S_ISREG(inode->i_mode)) {
487 inode->i_op = &nilfs_file_inode_operations;
488 inode->i_fop = &nilfs_file_operations;
489 inode->i_mapping->a_ops = &nilfs_aops;
490 } else if (S_ISDIR(inode->i_mode)) {
491 inode->i_op = &nilfs_dir_inode_operations;
492 inode->i_fop = &nilfs_dir_operations;
493 inode->i_mapping->a_ops = &nilfs_aops;
494 } else if (S_ISLNK(inode->i_mode)) {
495 inode->i_op = &nilfs_symlink_inode_operations;
496 inode_nohighmem(inode);
497 inode->i_mapping->a_ops = &nilfs_aops;
498 } else {
499 inode->i_op = &nilfs_special_inode_operations;
500 init_special_inode(
501 inode, inode->i_mode,
502 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
503 }
504 nilfs_ifile_unmap_inode(raw_inode);
505 brelse(bh);
506 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
507 nilfs_set_inode_flags(inode);
508 mapping_set_gfp_mask(inode->i_mapping,
509 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
510 return 0;
511
512 failed_unmap:
513 nilfs_ifile_unmap_inode(raw_inode);
514 brelse(bh);
515
516 bad_inode:
517 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
518 return err;
519 }
520
nilfs_iget_test(struct inode * inode,void * opaque)521 static int nilfs_iget_test(struct inode *inode, void *opaque)
522 {
523 struct nilfs_iget_args *args = opaque;
524 struct nilfs_inode_info *ii;
525
526 if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root)
527 return 0;
528
529 ii = NILFS_I(inode);
530 if (ii->i_type != args->type)
531 return 0;
532
533 return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno;
534 }
535
nilfs_iget_set(struct inode * inode,void * opaque)536 static int nilfs_iget_set(struct inode *inode, void *opaque)
537 {
538 struct nilfs_iget_args *args = opaque;
539
540 inode->i_ino = args->ino;
541 NILFS_I(inode)->i_cno = args->cno;
542 NILFS_I(inode)->i_root = args->root;
543 NILFS_I(inode)->i_type = args->type;
544 if (args->root && args->ino == NILFS_ROOT_INO)
545 nilfs_get_root(args->root);
546 return 0;
547 }
548
nilfs_ilookup(struct super_block * sb,struct nilfs_root * root,unsigned long ino)549 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
550 unsigned long ino)
551 {
552 struct nilfs_iget_args args = {
553 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
554 };
555
556 return ilookup5(sb, ino, nilfs_iget_test, &args);
557 }
558
nilfs_iget_locked(struct super_block * sb,struct nilfs_root * root,unsigned long ino)559 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
560 unsigned long ino)
561 {
562 struct nilfs_iget_args args = {
563 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
564 };
565
566 return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
567 }
568
nilfs_iget(struct super_block * sb,struct nilfs_root * root,unsigned long ino)569 struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
570 unsigned long ino)
571 {
572 struct inode *inode;
573 int err;
574
575 inode = nilfs_iget_locked(sb, root, ino);
576 if (unlikely(!inode))
577 return ERR_PTR(-ENOMEM);
578 if (!(inode->i_state & I_NEW))
579 return inode;
580
581 err = __nilfs_read_inode(sb, root, ino, inode);
582 if (unlikely(err)) {
583 iget_failed(inode);
584 return ERR_PTR(err);
585 }
586 unlock_new_inode(inode);
587 return inode;
588 }
589
nilfs_iget_for_gc(struct super_block * sb,unsigned long ino,__u64 cno)590 struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
591 __u64 cno)
592 {
593 struct nilfs_iget_args args = {
594 .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC
595 };
596 struct inode *inode;
597 int err;
598
599 inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
600 if (unlikely(!inode))
601 return ERR_PTR(-ENOMEM);
602 if (!(inode->i_state & I_NEW))
603 return inode;
604
605 err = nilfs_init_gcinode(inode);
606 if (unlikely(err)) {
607 iget_failed(inode);
608 return ERR_PTR(err);
609 }
610 unlock_new_inode(inode);
611 return inode;
612 }
613
614 /**
615 * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
616 * @inode: inode object
617 *
618 * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
619 * or does nothing if the inode already has it. This function allocates
620 * an additional inode to maintain page cache of B-tree nodes one-on-one.
621 *
622 * Return Value: On success, 0 is returned. On errors, one of the following
623 * negative error code is returned.
624 *
625 * %-ENOMEM - Insufficient memory available.
626 */
nilfs_attach_btree_node_cache(struct inode * inode)627 int nilfs_attach_btree_node_cache(struct inode *inode)
628 {
629 struct nilfs_inode_info *ii = NILFS_I(inode);
630 struct inode *btnc_inode;
631 struct nilfs_iget_args args;
632
633 if (ii->i_assoc_inode)
634 return 0;
635
636 args.ino = inode->i_ino;
637 args.root = ii->i_root;
638 args.cno = ii->i_cno;
639 args.type = ii->i_type | NILFS_I_TYPE_BTNC;
640
641 btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
642 nilfs_iget_set, &args);
643 if (unlikely(!btnc_inode))
644 return -ENOMEM;
645 if (btnc_inode->i_state & I_NEW) {
646 nilfs_init_btnc_inode(btnc_inode);
647 unlock_new_inode(btnc_inode);
648 }
649 NILFS_I(btnc_inode)->i_assoc_inode = inode;
650 NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
651 ii->i_assoc_inode = btnc_inode;
652
653 return 0;
654 }
655
656 /**
657 * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
658 * @inode: inode object
659 *
660 * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
661 * holder inode bound to @inode, or does nothing if @inode doesn't have it.
662 */
nilfs_detach_btree_node_cache(struct inode * inode)663 void nilfs_detach_btree_node_cache(struct inode *inode)
664 {
665 struct nilfs_inode_info *ii = NILFS_I(inode);
666 struct inode *btnc_inode = ii->i_assoc_inode;
667
668 if (btnc_inode) {
669 NILFS_I(btnc_inode)->i_assoc_inode = NULL;
670 ii->i_assoc_inode = NULL;
671 iput(btnc_inode);
672 }
673 }
674
675 /**
676 * nilfs_iget_for_shadow - obtain inode for shadow mapping
677 * @inode: inode object that uses shadow mapping
678 *
679 * nilfs_iget_for_shadow() allocates a pair of inodes that holds page
680 * caches for shadow mapping. The page cache for data pages is set up
681 * in one inode and the one for b-tree node pages is set up in the
682 * other inode, which is attached to the former inode.
683 *
684 * Return Value: On success, a pointer to the inode for data pages is
685 * returned. On errors, one of the following negative error code is returned
686 * in a pointer type.
687 *
688 * %-ENOMEM - Insufficient memory available.
689 */
nilfs_iget_for_shadow(struct inode * inode)690 struct inode *nilfs_iget_for_shadow(struct inode *inode)
691 {
692 struct nilfs_iget_args args = {
693 .ino = inode->i_ino, .root = NULL, .cno = 0,
694 .type = NILFS_I_TYPE_SHADOW
695 };
696 struct inode *s_inode;
697 int err;
698
699 s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
700 nilfs_iget_set, &args);
701 if (unlikely(!s_inode))
702 return ERR_PTR(-ENOMEM);
703 if (!(s_inode->i_state & I_NEW))
704 return inode;
705
706 NILFS_I(s_inode)->i_flags = 0;
707 memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
708 mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
709
710 err = nilfs_attach_btree_node_cache(s_inode);
711 if (unlikely(err)) {
712 iget_failed(s_inode);
713 return ERR_PTR(err);
714 }
715 unlock_new_inode(s_inode);
716 return s_inode;
717 }
718
719 /**
720 * nilfs_write_inode_common - export common inode information to on-disk inode
721 * @inode: inode object
722 * @raw_inode: on-disk inode
723 *
724 * This function writes standard information from the on-memory inode @inode
725 * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap
726 * data is not exported, nilfs_bmap_write() must be called separately during
727 * log writing.
728 */
nilfs_write_inode_common(struct inode * inode,struct nilfs_inode * raw_inode)729 void nilfs_write_inode_common(struct inode *inode,
730 struct nilfs_inode *raw_inode)
731 {
732 struct nilfs_inode_info *ii = NILFS_I(inode);
733
734 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
735 raw_inode->i_uid = cpu_to_le32(i_uid_read(inode));
736 raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
737 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
738 raw_inode->i_size = cpu_to_le64(inode->i_size);
739 raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
740 raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode));
741 raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
742 raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
743 raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
744
745 raw_inode->i_flags = cpu_to_le32(ii->i_flags);
746 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
747
748 /*
749 * When extending inode, nilfs->ns_inode_size should be checked
750 * for substitutions of appended fields.
751 */
752 }
753
nilfs_update_inode(struct inode * inode,struct buffer_head * ibh,int flags)754 void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
755 {
756 ino_t ino = inode->i_ino;
757 struct nilfs_inode_info *ii = NILFS_I(inode);
758 struct inode *ifile = ii->i_root->ifile;
759 struct nilfs_inode *raw_inode;
760
761 raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh);
762
763 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
764 memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size);
765 if (flags & I_DIRTY_DATASYNC)
766 set_bit(NILFS_I_INODE_SYNC, &ii->i_state);
767
768 nilfs_write_inode_common(inode, raw_inode);
769
770 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
771 raw_inode->i_device_code =
772 cpu_to_le64(huge_encode_dev(inode->i_rdev));
773
774 nilfs_ifile_unmap_inode(raw_inode);
775 }
776
777 #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */
778
nilfs_truncate_bmap(struct nilfs_inode_info * ii,unsigned long from)779 static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
780 unsigned long from)
781 {
782 __u64 b;
783 int ret;
784
785 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
786 return;
787 repeat:
788 ret = nilfs_bmap_last_key(ii->i_bmap, &b);
789 if (ret == -ENOENT)
790 return;
791 else if (ret < 0)
792 goto failed;
793
794 if (b < from)
795 return;
796
797 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
798 ret = nilfs_bmap_truncate(ii->i_bmap, b);
799 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
800 if (!ret || (ret == -ENOMEM &&
801 nilfs_bmap_truncate(ii->i_bmap, b) == 0))
802 goto repeat;
803
804 failed:
805 nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)",
806 ret, ii->vfs_inode.i_ino);
807 }
808
nilfs_truncate(struct inode * inode)809 void nilfs_truncate(struct inode *inode)
810 {
811 unsigned long blkoff;
812 unsigned int blocksize;
813 struct nilfs_transaction_info ti;
814 struct super_block *sb = inode->i_sb;
815 struct nilfs_inode_info *ii = NILFS_I(inode);
816
817 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
818 return;
819 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
820 return;
821
822 blocksize = sb->s_blocksize;
823 blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits;
824 nilfs_transaction_begin(sb, &ti, 0); /* never fails */
825
826 block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block);
827
828 nilfs_truncate_bmap(ii, blkoff);
829
830 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
831 if (IS_SYNC(inode))
832 nilfs_set_transaction_flag(NILFS_TI_SYNC);
833
834 nilfs_mark_inode_dirty(inode);
835 nilfs_set_file_dirty(inode, 0);
836 nilfs_transaction_commit(sb);
837 /*
838 * May construct a logical segment and may fail in sync mode.
839 * But truncate has no return value.
840 */
841 }
842
nilfs_clear_inode(struct inode * inode)843 static void nilfs_clear_inode(struct inode *inode)
844 {
845 struct nilfs_inode_info *ii = NILFS_I(inode);
846
847 /*
848 * Free resources allocated in nilfs_read_inode(), here.
849 */
850 BUG_ON(!list_empty(&ii->i_dirty));
851 brelse(ii->i_bh);
852 ii->i_bh = NULL;
853
854 if (nilfs_is_metadata_file_inode(inode))
855 nilfs_mdt_clear(inode);
856
857 if (test_bit(NILFS_I_BMAP, &ii->i_state))
858 nilfs_bmap_clear(ii->i_bmap);
859
860 if (!(ii->i_type & NILFS_I_TYPE_BTNC))
861 nilfs_detach_btree_node_cache(inode);
862
863 if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
864 nilfs_put_root(ii->i_root);
865 }
866
nilfs_evict_inode(struct inode * inode)867 void nilfs_evict_inode(struct inode *inode)
868 {
869 struct nilfs_transaction_info ti;
870 struct super_block *sb = inode->i_sb;
871 struct nilfs_inode_info *ii = NILFS_I(inode);
872 struct the_nilfs *nilfs;
873 int ret;
874
875 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
876 truncate_inode_pages_final(&inode->i_data);
877 clear_inode(inode);
878 nilfs_clear_inode(inode);
879 return;
880 }
881 nilfs_transaction_begin(sb, &ti, 0); /* never fails */
882
883 truncate_inode_pages_final(&inode->i_data);
884
885 nilfs = sb->s_fs_info;
886 if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
887 /*
888 * If this inode is about to be disposed after the file system
889 * has been degraded to read-only due to file system corruption
890 * or after the writer has been detached, do not make any
891 * changes that cause writes, just clear it.
892 * Do this check after read-locking ns_segctor_sem by
893 * nilfs_transaction_begin() in order to avoid a race with
894 * the writer detach operation.
895 */
896 clear_inode(inode);
897 nilfs_clear_inode(inode);
898 nilfs_transaction_abort(sb);
899 return;
900 }
901
902 /* TODO: some of the following operations may fail. */
903 nilfs_truncate_bmap(ii, 0);
904 nilfs_mark_inode_dirty(inode);
905 clear_inode(inode);
906
907 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
908 if (!ret)
909 atomic64_dec(&ii->i_root->inodes_count);
910
911 nilfs_clear_inode(inode);
912
913 if (IS_SYNC(inode))
914 nilfs_set_transaction_flag(NILFS_TI_SYNC);
915 nilfs_transaction_commit(sb);
916 /*
917 * May construct a logical segment and may fail in sync mode.
918 * But delete_inode has no return value.
919 */
920 }
921
nilfs_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * iattr)922 int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
923 struct iattr *iattr)
924 {
925 struct nilfs_transaction_info ti;
926 struct inode *inode = d_inode(dentry);
927 struct super_block *sb = inode->i_sb;
928 int err;
929
930 err = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
931 if (err)
932 return err;
933
934 err = nilfs_transaction_begin(sb, &ti, 0);
935 if (unlikely(err))
936 return err;
937
938 if ((iattr->ia_valid & ATTR_SIZE) &&
939 iattr->ia_size != i_size_read(inode)) {
940 inode_dio_wait(inode);
941 truncate_setsize(inode, iattr->ia_size);
942 nilfs_truncate(inode);
943 }
944
945 setattr_copy(&nop_mnt_idmap, inode, iattr);
946 mark_inode_dirty(inode);
947
948 if (iattr->ia_valid & ATTR_MODE) {
949 err = nilfs_acl_chmod(inode);
950 if (unlikely(err))
951 goto out_err;
952 }
953
954 return nilfs_transaction_commit(sb);
955
956 out_err:
957 nilfs_transaction_abort(sb);
958 return err;
959 }
960
nilfs_permission(struct mnt_idmap * idmap,struct inode * inode,int mask)961 int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
962 int mask)
963 {
964 struct nilfs_root *root = NILFS_I(inode)->i_root;
965
966 if ((mask & MAY_WRITE) && root &&
967 root->cno != NILFS_CPTREE_CURRENT_CNO)
968 return -EROFS; /* snapshot is not writable */
969
970 return generic_permission(&nop_mnt_idmap, inode, mask);
971 }
972
nilfs_load_inode_block(struct inode * inode,struct buffer_head ** pbh)973 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
974 {
975 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
976 struct nilfs_inode_info *ii = NILFS_I(inode);
977 int err;
978
979 spin_lock(&nilfs->ns_inode_lock);
980 if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
981 spin_unlock(&nilfs->ns_inode_lock);
982 err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
983 inode->i_ino, pbh);
984 if (unlikely(err))
985 return err;
986 spin_lock(&nilfs->ns_inode_lock);
987 if (ii->i_bh == NULL)
988 ii->i_bh = *pbh;
989 else if (unlikely(!buffer_uptodate(ii->i_bh))) {
990 __brelse(ii->i_bh);
991 ii->i_bh = *pbh;
992 } else {
993 brelse(*pbh);
994 *pbh = ii->i_bh;
995 }
996 } else
997 *pbh = ii->i_bh;
998
999 get_bh(*pbh);
1000 spin_unlock(&nilfs->ns_inode_lock);
1001 return 0;
1002 }
1003
nilfs_inode_dirty(struct inode * inode)1004 int nilfs_inode_dirty(struct inode *inode)
1005 {
1006 struct nilfs_inode_info *ii = NILFS_I(inode);
1007 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1008 int ret = 0;
1009
1010 if (!list_empty(&ii->i_dirty)) {
1011 spin_lock(&nilfs->ns_inode_lock);
1012 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
1013 test_bit(NILFS_I_BUSY, &ii->i_state);
1014 spin_unlock(&nilfs->ns_inode_lock);
1015 }
1016 return ret;
1017 }
1018
nilfs_set_file_dirty(struct inode * inode,unsigned int nr_dirty)1019 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
1020 {
1021 struct nilfs_inode_info *ii = NILFS_I(inode);
1022 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1023
1024 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks);
1025
1026 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
1027 return 0;
1028
1029 spin_lock(&nilfs->ns_inode_lock);
1030 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
1031 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
1032 /*
1033 * Because this routine may race with nilfs_dispose_list(),
1034 * we have to check NILFS_I_QUEUED here, too.
1035 */
1036 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
1037 /*
1038 * This will happen when somebody is freeing
1039 * this inode.
1040 */
1041 nilfs_warn(inode->i_sb,
1042 "cannot set file dirty (ino=%lu): the file is being freed",
1043 inode->i_ino);
1044 spin_unlock(&nilfs->ns_inode_lock);
1045 return -EINVAL; /*
1046 * NILFS_I_DIRTY may remain for
1047 * freeing inode.
1048 */
1049 }
1050 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
1051 set_bit(NILFS_I_QUEUED, &ii->i_state);
1052 }
1053 spin_unlock(&nilfs->ns_inode_lock);
1054 return 0;
1055 }
1056
__nilfs_mark_inode_dirty(struct inode * inode,int flags)1057 int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
1058 {
1059 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1060 struct buffer_head *ibh;
1061 int err;
1062
1063 /*
1064 * Do not dirty inodes after the log writer has been detached
1065 * and its nilfs_root struct has been freed.
1066 */
1067 if (unlikely(nilfs_purging(nilfs)))
1068 return 0;
1069
1070 err = nilfs_load_inode_block(inode, &ibh);
1071 if (unlikely(err)) {
1072 nilfs_warn(inode->i_sb,
1073 "cannot mark inode dirty (ino=%lu): error %d loading inode block",
1074 inode->i_ino, err);
1075 return err;
1076 }
1077 nilfs_update_inode(inode, ibh, flags);
1078 mark_buffer_dirty(ibh);
1079 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
1080 brelse(ibh);
1081 return 0;
1082 }
1083
1084 /**
1085 * nilfs_dirty_inode - reflect changes on given inode to an inode block.
1086 * @inode: inode of the file to be registered.
1087 * @flags: flags to determine the dirty state of the inode
1088 *
1089 * nilfs_dirty_inode() loads a inode block containing the specified
1090 * @inode and copies data from a nilfs_inode to a corresponding inode
1091 * entry in the inode block. This operation is excluded from the segment
1092 * construction. This function can be called both as a single operation
1093 * and as a part of indivisible file operations.
1094 */
nilfs_dirty_inode(struct inode * inode,int flags)1095 void nilfs_dirty_inode(struct inode *inode, int flags)
1096 {
1097 struct nilfs_transaction_info ti;
1098 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
1099
1100 if (is_bad_inode(inode)) {
1101 nilfs_warn(inode->i_sb,
1102 "tried to mark bad_inode dirty. ignored.");
1103 dump_stack();
1104 return;
1105 }
1106 if (mdi) {
1107 nilfs_mdt_mark_dirty(inode);
1108 return;
1109 }
1110 nilfs_transaction_begin(inode->i_sb, &ti, 0);
1111 __nilfs_mark_inode_dirty(inode, flags);
1112 nilfs_transaction_commit(inode->i_sb); /* never fails */
1113 }
1114
nilfs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,__u64 start,__u64 len)1115 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1116 __u64 start, __u64 len)
1117 {
1118 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1119 __u64 logical = 0, phys = 0, size = 0;
1120 __u32 flags = 0;
1121 loff_t isize;
1122 sector_t blkoff, end_blkoff;
1123 sector_t delalloc_blkoff;
1124 unsigned long delalloc_blklen;
1125 unsigned int blkbits = inode->i_blkbits;
1126 int ret, n;
1127
1128 ret = fiemap_prep(inode, fieinfo, start, &len, 0);
1129 if (ret)
1130 return ret;
1131
1132 inode_lock(inode);
1133
1134 isize = i_size_read(inode);
1135
1136 blkoff = start >> blkbits;
1137 end_blkoff = (start + len - 1) >> blkbits;
1138
1139 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff,
1140 &delalloc_blkoff);
1141
1142 do {
1143 __u64 blkphy;
1144 unsigned int maxblocks;
1145
1146 if (delalloc_blklen && blkoff == delalloc_blkoff) {
1147 if (size) {
1148 /* End of the current extent */
1149 ret = fiemap_fill_next_extent(
1150 fieinfo, logical, phys, size, flags);
1151 if (ret)
1152 break;
1153 }
1154 if (blkoff > end_blkoff)
1155 break;
1156
1157 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC;
1158 logical = blkoff << blkbits;
1159 phys = 0;
1160 size = delalloc_blklen << blkbits;
1161
1162 blkoff = delalloc_blkoff + delalloc_blklen;
1163 delalloc_blklen = nilfs_find_uncommitted_extent(
1164 inode, blkoff, &delalloc_blkoff);
1165 continue;
1166 }
1167
1168 /*
1169 * Limit the number of blocks that we look up so as
1170 * not to get into the next delayed allocation extent.
1171 */
1172 maxblocks = INT_MAX;
1173 if (delalloc_blklen)
1174 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff,
1175 maxblocks);
1176 blkphy = 0;
1177
1178 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1179 n = nilfs_bmap_lookup_contig(
1180 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks);
1181 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1182
1183 if (n < 0) {
1184 int past_eof;
1185
1186 if (unlikely(n != -ENOENT))
1187 break; /* error */
1188
1189 /* HOLE */
1190 blkoff++;
1191 past_eof = ((blkoff << blkbits) >= isize);
1192
1193 if (size) {
1194 /* End of the current extent */
1195
1196 if (past_eof)
1197 flags |= FIEMAP_EXTENT_LAST;
1198
1199 ret = fiemap_fill_next_extent(
1200 fieinfo, logical, phys, size, flags);
1201 if (ret)
1202 break;
1203 size = 0;
1204 }
1205 if (blkoff > end_blkoff || past_eof)
1206 break;
1207 } else {
1208 if (size) {
1209 if (phys && blkphy << blkbits == phys + size) {
1210 /* The current extent goes on */
1211 size += n << blkbits;
1212 } else {
1213 /* Terminate the current extent */
1214 ret = fiemap_fill_next_extent(
1215 fieinfo, logical, phys, size,
1216 flags);
1217 if (ret || blkoff > end_blkoff)
1218 break;
1219
1220 /* Start another extent */
1221 flags = FIEMAP_EXTENT_MERGED;
1222 logical = blkoff << blkbits;
1223 phys = blkphy << blkbits;
1224 size = n << blkbits;
1225 }
1226 } else {
1227 /* Start a new extent */
1228 flags = FIEMAP_EXTENT_MERGED;
1229 logical = blkoff << blkbits;
1230 phys = blkphy << blkbits;
1231 size = n << blkbits;
1232 }
1233 blkoff += n;
1234 }
1235 cond_resched();
1236 } while (true);
1237
1238 /* If ret is 1 then we just hit the end of the extent array */
1239 if (ret == 1)
1240 ret = 0;
1241
1242 inode_unlock(inode);
1243 return ret;
1244 }
1245