1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * NILFS inode operations.
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * Written by Ryusuke Konishi.
8 *
9 */
10
11 #include <linux/buffer_head.h>
12 #include <linux/gfp.h>
13 #include <linux/mpage.h>
14 #include <linux/pagemap.h>
15 #include <linux/writeback.h>
16 #include <linux/uio.h>
17 #include <linux/fiemap.h>
18 #include <linux/random.h>
19 #include "nilfs.h"
20 #include "btnode.h"
21 #include "segment.h"
22 #include "page.h"
23 #include "mdt.h"
24 #include "cpfile.h"
25 #include "ifile.h"
26
27 /**
28 * struct nilfs_iget_args - arguments used during comparison between inodes
29 * @ino: inode number
30 * @cno: checkpoint number
31 * @root: pointer on NILFS root object (mounted checkpoint)
32 * @type: inode type
33 */
34 struct nilfs_iget_args {
35 u64 ino;
36 __u64 cno;
37 struct nilfs_root *root;
38 unsigned int type;
39 };
40
41 static int nilfs_iget_test(struct inode *inode, void *opaque);
42
nilfs_inode_add_blocks(struct inode * inode,int n)43 void nilfs_inode_add_blocks(struct inode *inode, int n)
44 {
45 struct nilfs_root *root = NILFS_I(inode)->i_root;
46
47 inode_add_bytes(inode, i_blocksize(inode) * n);
48 if (root)
49 atomic64_add(n, &root->blocks_count);
50 }
51
nilfs_inode_sub_blocks(struct inode * inode,int n)52 void nilfs_inode_sub_blocks(struct inode *inode, int n)
53 {
54 struct nilfs_root *root = NILFS_I(inode)->i_root;
55
56 inode_sub_bytes(inode, i_blocksize(inode) * n);
57 if (root)
58 atomic64_sub(n, &root->blocks_count);
59 }
60
61 /**
62 * nilfs_get_block() - get a file block on the filesystem (callback function)
63 * @inode: inode struct of the target file
64 * @blkoff: file block number
65 * @bh_result: buffer head to be mapped on
66 * @create: indicate whether allocating the block or not when it has not
67 * been allocated yet.
68 *
69 * This function does not issue actual read request of the specified data
70 * block. It is done by VFS.
71 *
72 * Return: 0 on success, or a negative error code on failure.
73 */
nilfs_get_block(struct inode * inode,sector_t blkoff,struct buffer_head * bh_result,int create)74 int nilfs_get_block(struct inode *inode, sector_t blkoff,
75 struct buffer_head *bh_result, int create)
76 {
77 struct nilfs_inode_info *ii = NILFS_I(inode);
78 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
79 __u64 blknum = 0;
80 int err = 0, ret;
81 unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits;
82
83 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
84 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
85 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
86 if (ret >= 0) { /* found */
87 map_bh(bh_result, inode->i_sb, blknum);
88 if (ret > 0)
89 bh_result->b_size = (ret << inode->i_blkbits);
90 goto out;
91 }
92 /* data block was not found */
93 if (ret == -ENOENT && create) {
94 struct nilfs_transaction_info ti;
95
96 bh_result->b_blocknr = 0;
97 err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
98 if (unlikely(err))
99 goto out;
100 err = nilfs_bmap_insert(ii->i_bmap, blkoff,
101 (unsigned long)bh_result);
102 if (unlikely(err != 0)) {
103 if (err == -EEXIST) {
104 /*
105 * The get_block() function could be called
106 * from multiple callers for an inode.
107 * However, the page having this block must
108 * be locked in this case.
109 */
110 nilfs_warn(inode->i_sb,
111 "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
112 __func__, inode->i_ino,
113 (unsigned long long)blkoff);
114 err = -EAGAIN;
115 }
116 nilfs_transaction_abort(inode->i_sb);
117 goto out;
118 }
119 nilfs_mark_inode_dirty_sync(inode);
120 nilfs_transaction_commit(inode->i_sb); /* never fails */
121 /* Error handling should be detailed */
122 set_buffer_new(bh_result);
123 set_buffer_delay(bh_result);
124 map_bh(bh_result, inode->i_sb, 0);
125 /* Disk block number must be changed to proper value */
126
127 } else if (ret == -ENOENT) {
128 /*
129 * not found is not error (e.g. hole); must return without
130 * the mapped state flag.
131 */
132 ;
133 } else {
134 err = ret;
135 }
136
137 out:
138 return err;
139 }
140
141 /**
142 * nilfs_read_folio() - implement read_folio() method of nilfs_aops {}
143 * address_space_operations.
144 * @file: file struct of the file to be read
145 * @folio: the folio to be read
146 *
147 * Return: 0 on success, or a negative error code on failure.
148 */
nilfs_read_folio(struct file * file,struct folio * folio)149 static int nilfs_read_folio(struct file *file, struct folio *folio)
150 {
151 return mpage_read_folio(folio, nilfs_get_block);
152 }
153
nilfs_readahead(struct readahead_control * rac)154 static void nilfs_readahead(struct readahead_control *rac)
155 {
156 mpage_readahead(rac, nilfs_get_block);
157 }
158
nilfs_writepages(struct address_space * mapping,struct writeback_control * wbc)159 static int nilfs_writepages(struct address_space *mapping,
160 struct writeback_control *wbc)
161 {
162 struct inode *inode = mapping->host;
163 int err = 0;
164
165 if (sb_rdonly(inode->i_sb)) {
166 nilfs_clear_dirty_pages(mapping);
167 return -EROFS;
168 }
169
170 if (wbc->sync_mode == WB_SYNC_ALL)
171 err = nilfs_construct_dsync_segment(inode->i_sb, inode,
172 wbc->range_start,
173 wbc->range_end);
174 return err;
175 }
176
nilfs_dirty_folio(struct address_space * mapping,struct folio * folio)177 static bool nilfs_dirty_folio(struct address_space *mapping,
178 struct folio *folio)
179 {
180 struct inode *inode = mapping->host;
181 struct buffer_head *head;
182 unsigned int nr_dirty = 0;
183 bool ret = filemap_dirty_folio(mapping, folio);
184
185 /*
186 * The page may not be locked, eg if called from try_to_unmap_one()
187 */
188 spin_lock(&mapping->i_private_lock);
189 head = folio_buffers(folio);
190 if (head) {
191 struct buffer_head *bh = head;
192
193 do {
194 /* Do not mark hole blocks dirty */
195 if (buffer_dirty(bh) || !buffer_mapped(bh))
196 continue;
197
198 set_buffer_dirty(bh);
199 nr_dirty++;
200 } while (bh = bh->b_this_page, bh != head);
201 } else if (ret) {
202 nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
203 }
204 spin_unlock(&mapping->i_private_lock);
205
206 if (nr_dirty)
207 nilfs_set_file_dirty(inode, nr_dirty);
208 return ret;
209 }
210
nilfs_write_failed(struct address_space * mapping,loff_t to)211 void nilfs_write_failed(struct address_space *mapping, loff_t to)
212 {
213 struct inode *inode = mapping->host;
214
215 if (to > inode->i_size) {
216 truncate_pagecache(inode, inode->i_size);
217 nilfs_truncate(inode);
218 }
219 }
220
nilfs_write_begin(const struct kiocb * iocb,struct address_space * mapping,loff_t pos,unsigned len,struct folio ** foliop,void ** fsdata)221 static int nilfs_write_begin(const struct kiocb *iocb,
222 struct address_space *mapping,
223 loff_t pos, unsigned len,
224 struct folio **foliop, void **fsdata)
225
226 {
227 struct inode *inode = mapping->host;
228 int err = nilfs_transaction_begin(inode->i_sb, NULL, 1);
229
230 if (unlikely(err))
231 return err;
232
233 err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block);
234 if (unlikely(err)) {
235 nilfs_write_failed(mapping, pos + len);
236 nilfs_transaction_abort(inode->i_sb);
237 }
238 return err;
239 }
240
nilfs_write_end(const struct kiocb * iocb,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct folio * folio,void * fsdata)241 static int nilfs_write_end(const struct kiocb *iocb,
242 struct address_space *mapping,
243 loff_t pos, unsigned len, unsigned copied,
244 struct folio *folio, void *fsdata)
245 {
246 struct inode *inode = mapping->host;
247 unsigned int start = pos & (PAGE_SIZE - 1);
248 unsigned int nr_dirty;
249 int err;
250
251 nr_dirty = nilfs_page_count_clean_buffers(folio, start,
252 start + copied);
253 copied = generic_write_end(iocb, mapping, pos, len, copied, folio,
254 fsdata);
255 nilfs_set_file_dirty(inode, nr_dirty);
256 err = nilfs_transaction_commit(inode->i_sb);
257 return err ? : copied;
258 }
259
260 static ssize_t
nilfs_direct_IO(struct kiocb * iocb,struct iov_iter * iter)261 nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
262 {
263 struct inode *inode = file_inode(iocb->ki_filp);
264
265 if (iov_iter_rw(iter) == WRITE)
266 return 0;
267
268 /* Needs synchronization with the cleaner */
269 return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block);
270 }
271
272 const struct address_space_operations nilfs_aops = {
273 .read_folio = nilfs_read_folio,
274 .writepages = nilfs_writepages,
275 .dirty_folio = nilfs_dirty_folio,
276 .readahead = nilfs_readahead,
277 .write_begin = nilfs_write_begin,
278 .write_end = nilfs_write_end,
279 .invalidate_folio = block_invalidate_folio,
280 .direct_IO = nilfs_direct_IO,
281 .migrate_folio = buffer_migrate_folio_norefs,
282 .is_partially_uptodate = block_is_partially_uptodate,
283 };
284
285 const struct address_space_operations nilfs_buffer_cache_aops = {
286 .invalidate_folio = block_invalidate_folio,
287 };
288
nilfs_insert_inode_locked(struct inode * inode,struct nilfs_root * root,unsigned long ino)289 static int nilfs_insert_inode_locked(struct inode *inode,
290 struct nilfs_root *root,
291 unsigned long ino)
292 {
293 struct nilfs_iget_args args = {
294 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
295 };
296
297 return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
298 }
299
nilfs_new_inode(struct inode * dir,umode_t mode)300 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
301 {
302 struct super_block *sb = dir->i_sb;
303 struct inode *inode;
304 struct nilfs_inode_info *ii;
305 struct nilfs_root *root;
306 struct buffer_head *bh;
307 int err = -ENOMEM;
308 ino_t ino;
309
310 inode = new_inode(sb);
311 if (unlikely(!inode))
312 goto failed;
313
314 mapping_set_gfp_mask(inode->i_mapping,
315 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
316
317 root = NILFS_I(dir)->i_root;
318 ii = NILFS_I(inode);
319 ii->i_state = BIT(NILFS_I_NEW);
320 ii->i_type = NILFS_I_TYPE_NORMAL;
321 ii->i_root = root;
322
323 err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
324 if (unlikely(err))
325 goto failed_ifile_create_inode;
326 /* reference count of i_bh inherits from nilfs_mdt_read_block() */
327 ii->i_bh = bh;
328
329 atomic64_inc(&root->inodes_count);
330 inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
331 inode->i_ino = ino;
332 simple_inode_init_ts(inode);
333
334 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
335 err = nilfs_bmap_read(ii->i_bmap, NULL);
336 if (err < 0)
337 goto failed_after_creation;
338
339 set_bit(NILFS_I_BMAP, &ii->i_state);
340 /* No lock is needed; iget() ensures it. */
341 }
342
343 ii->i_flags = nilfs_mask_flags(
344 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED);
345
346 /* ii->i_file_acl = 0; */
347 /* ii->i_dir_acl = 0; */
348 ii->i_dir_start_lookup = 0;
349 nilfs_set_inode_flags(inode);
350 inode->i_generation = get_random_u32();
351 if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
352 err = -EIO;
353 goto failed_after_creation;
354 }
355
356 err = nilfs_init_acl(inode, dir);
357 if (unlikely(err))
358 /*
359 * Never occur. When supporting nilfs_init_acl(),
360 * proper cancellation of above jobs should be considered.
361 */
362 goto failed_after_creation;
363
364 return inode;
365
366 failed_after_creation:
367 clear_nlink(inode);
368 if (inode->i_state & I_NEW)
369 unlock_new_inode(inode);
370 iput(inode); /*
371 * raw_inode will be deleted through
372 * nilfs_evict_inode().
373 */
374 goto failed;
375
376 failed_ifile_create_inode:
377 make_bad_inode(inode);
378 iput(inode);
379 failed:
380 return ERR_PTR(err);
381 }
382
nilfs_set_inode_flags(struct inode * inode)383 void nilfs_set_inode_flags(struct inode *inode)
384 {
385 unsigned int flags = NILFS_I(inode)->i_flags;
386 unsigned int new_fl = 0;
387
388 if (flags & FS_SYNC_FL)
389 new_fl |= S_SYNC;
390 if (flags & FS_APPEND_FL)
391 new_fl |= S_APPEND;
392 if (flags & FS_IMMUTABLE_FL)
393 new_fl |= S_IMMUTABLE;
394 if (flags & FS_NOATIME_FL)
395 new_fl |= S_NOATIME;
396 if (flags & FS_DIRSYNC_FL)
397 new_fl |= S_DIRSYNC;
398 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE |
399 S_NOATIME | S_DIRSYNC);
400 }
401
nilfs_read_inode_common(struct inode * inode,struct nilfs_inode * raw_inode)402 int nilfs_read_inode_common(struct inode *inode,
403 struct nilfs_inode *raw_inode)
404 {
405 struct nilfs_inode_info *ii = NILFS_I(inode);
406 int err;
407
408 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
409 i_uid_write(inode, le32_to_cpu(raw_inode->i_uid));
410 i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
411 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
412 inode->i_size = le64_to_cpu(raw_inode->i_size);
413 inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime),
414 le32_to_cpu(raw_inode->i_mtime_nsec));
415 inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
416 le32_to_cpu(raw_inode->i_ctime_nsec));
417 inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime),
418 le32_to_cpu(raw_inode->i_mtime_nsec));
419 if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
420 return -EIO; /* this inode is for metadata and corrupted */
421 if (inode->i_nlink == 0)
422 return -ESTALE; /* this inode is deleted */
423
424 inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
425 ii->i_flags = le32_to_cpu(raw_inode->i_flags);
426 #if 0
427 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
428 ii->i_dir_acl = S_ISREG(inode->i_mode) ?
429 0 : le32_to_cpu(raw_inode->i_dir_acl);
430 #endif
431 ii->i_dir_start_lookup = 0;
432 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
433
434 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
435 S_ISLNK(inode->i_mode)) {
436 err = nilfs_bmap_read(ii->i_bmap, raw_inode);
437 if (err < 0)
438 return err;
439 set_bit(NILFS_I_BMAP, &ii->i_state);
440 /* No lock is needed; iget() ensures it. */
441 }
442 return 0;
443 }
444
__nilfs_read_inode(struct super_block * sb,struct nilfs_root * root,unsigned long ino,struct inode * inode)445 static int __nilfs_read_inode(struct super_block *sb,
446 struct nilfs_root *root, unsigned long ino,
447 struct inode *inode)
448 {
449 struct the_nilfs *nilfs = sb->s_fs_info;
450 struct buffer_head *bh;
451 struct nilfs_inode *raw_inode;
452 int err;
453
454 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
455 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh);
456 if (unlikely(err))
457 goto bad_inode;
458
459 raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh);
460
461 err = nilfs_read_inode_common(inode, raw_inode);
462 if (err)
463 goto failed_unmap;
464
465 if (S_ISREG(inode->i_mode)) {
466 inode->i_op = &nilfs_file_inode_operations;
467 inode->i_fop = &nilfs_file_operations;
468 inode->i_mapping->a_ops = &nilfs_aops;
469 } else if (S_ISDIR(inode->i_mode)) {
470 inode->i_op = &nilfs_dir_inode_operations;
471 inode->i_fop = &nilfs_dir_operations;
472 inode->i_mapping->a_ops = &nilfs_aops;
473 } else if (S_ISLNK(inode->i_mode)) {
474 inode->i_op = &nilfs_symlink_inode_operations;
475 inode_nohighmem(inode);
476 inode->i_mapping->a_ops = &nilfs_aops;
477 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
478 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
479 inode->i_op = &nilfs_special_inode_operations;
480 init_special_inode(
481 inode, inode->i_mode,
482 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
483 } else {
484 nilfs_error(sb,
485 "invalid file type bits in mode 0%o for inode %lu",
486 inode->i_mode, ino);
487 err = -EIO;
488 goto failed_unmap;
489 }
490 nilfs_ifile_unmap_inode(raw_inode);
491 brelse(bh);
492 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
493 nilfs_set_inode_flags(inode);
494 mapping_set_gfp_mask(inode->i_mapping,
495 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
496 return 0;
497
498 failed_unmap:
499 nilfs_ifile_unmap_inode(raw_inode);
500 brelse(bh);
501
502 bad_inode:
503 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
504 return err;
505 }
506
nilfs_iget_test(struct inode * inode,void * opaque)507 static int nilfs_iget_test(struct inode *inode, void *opaque)
508 {
509 struct nilfs_iget_args *args = opaque;
510 struct nilfs_inode_info *ii;
511
512 if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root)
513 return 0;
514
515 ii = NILFS_I(inode);
516 if (ii->i_type != args->type)
517 return 0;
518
519 return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno;
520 }
521
nilfs_iget_set(struct inode * inode,void * opaque)522 static int nilfs_iget_set(struct inode *inode, void *opaque)
523 {
524 struct nilfs_iget_args *args = opaque;
525
526 inode->i_ino = args->ino;
527 NILFS_I(inode)->i_cno = args->cno;
528 NILFS_I(inode)->i_root = args->root;
529 NILFS_I(inode)->i_type = args->type;
530 if (args->root && args->ino == NILFS_ROOT_INO)
531 nilfs_get_root(args->root);
532 return 0;
533 }
534
nilfs_ilookup(struct super_block * sb,struct nilfs_root * root,unsigned long ino)535 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
536 unsigned long ino)
537 {
538 struct nilfs_iget_args args = {
539 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
540 };
541
542 return ilookup5(sb, ino, nilfs_iget_test, &args);
543 }
544
nilfs_iget_locked(struct super_block * sb,struct nilfs_root * root,unsigned long ino)545 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
546 unsigned long ino)
547 {
548 struct nilfs_iget_args args = {
549 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
550 };
551
552 return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
553 }
554
nilfs_iget(struct super_block * sb,struct nilfs_root * root,unsigned long ino)555 struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
556 unsigned long ino)
557 {
558 struct inode *inode;
559 int err;
560
561 inode = nilfs_iget_locked(sb, root, ino);
562 if (unlikely(!inode))
563 return ERR_PTR(-ENOMEM);
564
565 if (!(inode->i_state & I_NEW)) {
566 if (!inode->i_nlink) {
567 iput(inode);
568 return ERR_PTR(-ESTALE);
569 }
570 return inode;
571 }
572
573 err = __nilfs_read_inode(sb, root, ino, inode);
574 if (unlikely(err)) {
575 iget_failed(inode);
576 return ERR_PTR(err);
577 }
578 unlock_new_inode(inode);
579 return inode;
580 }
581
nilfs_iget_for_gc(struct super_block * sb,unsigned long ino,__u64 cno)582 struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
583 __u64 cno)
584 {
585 struct nilfs_iget_args args = {
586 .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC
587 };
588 struct inode *inode;
589 int err;
590
591 inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
592 if (unlikely(!inode))
593 return ERR_PTR(-ENOMEM);
594 if (!(inode->i_state & I_NEW))
595 return inode;
596
597 err = nilfs_init_gcinode(inode);
598 if (unlikely(err)) {
599 iget_failed(inode);
600 return ERR_PTR(err);
601 }
602 unlock_new_inode(inode);
603 return inode;
604 }
605
606 /**
607 * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
608 * @inode: inode object
609 *
610 * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
611 * or does nothing if the inode already has it. This function allocates
612 * an additional inode to maintain page cache of B-tree nodes one-on-one.
613 *
614 * Return: 0 on success, or %-ENOMEM if memory is insufficient.
615 */
nilfs_attach_btree_node_cache(struct inode * inode)616 int nilfs_attach_btree_node_cache(struct inode *inode)
617 {
618 struct nilfs_inode_info *ii = NILFS_I(inode);
619 struct inode *btnc_inode;
620 struct nilfs_iget_args args;
621
622 if (ii->i_assoc_inode)
623 return 0;
624
625 args.ino = inode->i_ino;
626 args.root = ii->i_root;
627 args.cno = ii->i_cno;
628 args.type = ii->i_type | NILFS_I_TYPE_BTNC;
629
630 btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
631 nilfs_iget_set, &args);
632 if (unlikely(!btnc_inode))
633 return -ENOMEM;
634 if (btnc_inode->i_state & I_NEW) {
635 nilfs_init_btnc_inode(btnc_inode);
636 unlock_new_inode(btnc_inode);
637 }
638 NILFS_I(btnc_inode)->i_assoc_inode = inode;
639 NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
640 ii->i_assoc_inode = btnc_inode;
641
642 return 0;
643 }
644
645 /**
646 * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
647 * @inode: inode object
648 *
649 * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
650 * holder inode bound to @inode, or does nothing if @inode doesn't have it.
651 */
nilfs_detach_btree_node_cache(struct inode * inode)652 void nilfs_detach_btree_node_cache(struct inode *inode)
653 {
654 struct nilfs_inode_info *ii = NILFS_I(inode);
655 struct inode *btnc_inode = ii->i_assoc_inode;
656
657 if (btnc_inode) {
658 NILFS_I(btnc_inode)->i_assoc_inode = NULL;
659 ii->i_assoc_inode = NULL;
660 iput(btnc_inode);
661 }
662 }
663
664 /**
665 * nilfs_iget_for_shadow - obtain inode for shadow mapping
666 * @inode: inode object that uses shadow mapping
667 *
668 * nilfs_iget_for_shadow() allocates a pair of inodes that holds page
669 * caches for shadow mapping. The page cache for data pages is set up
670 * in one inode and the one for b-tree node pages is set up in the
671 * other inode, which is attached to the former inode.
672 *
673 * Return: a pointer to the inode for data pages on success, or %-ENOMEM
674 * if memory is insufficient.
675 */
nilfs_iget_for_shadow(struct inode * inode)676 struct inode *nilfs_iget_for_shadow(struct inode *inode)
677 {
678 struct nilfs_iget_args args = {
679 .ino = inode->i_ino, .root = NULL, .cno = 0,
680 .type = NILFS_I_TYPE_SHADOW
681 };
682 struct inode *s_inode;
683 int err;
684
685 s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
686 nilfs_iget_set, &args);
687 if (unlikely(!s_inode))
688 return ERR_PTR(-ENOMEM);
689 if (!(s_inode->i_state & I_NEW))
690 return inode;
691
692 NILFS_I(s_inode)->i_flags = 0;
693 memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
694 mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
695 s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
696
697 err = nilfs_attach_btree_node_cache(s_inode);
698 if (unlikely(err)) {
699 iget_failed(s_inode);
700 return ERR_PTR(err);
701 }
702 unlock_new_inode(s_inode);
703 return s_inode;
704 }
705
706 /**
707 * nilfs_write_inode_common - export common inode information to on-disk inode
708 * @inode: inode object
709 * @raw_inode: on-disk inode
710 *
711 * This function writes standard information from the on-memory inode @inode
712 * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap
713 * data is not exported, nilfs_bmap_write() must be called separately during
714 * log writing.
715 */
nilfs_write_inode_common(struct inode * inode,struct nilfs_inode * raw_inode)716 void nilfs_write_inode_common(struct inode *inode,
717 struct nilfs_inode *raw_inode)
718 {
719 struct nilfs_inode_info *ii = NILFS_I(inode);
720
721 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
722 raw_inode->i_uid = cpu_to_le32(i_uid_read(inode));
723 raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
724 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
725 raw_inode->i_size = cpu_to_le64(inode->i_size);
726 raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
727 raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode));
728 raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
729 raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
730 raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
731
732 raw_inode->i_flags = cpu_to_le32(ii->i_flags);
733 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
734
735 /*
736 * When extending inode, nilfs->ns_inode_size should be checked
737 * for substitutions of appended fields.
738 */
739 }
740
nilfs_update_inode(struct inode * inode,struct buffer_head * ibh,int flags)741 void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
742 {
743 ino_t ino = inode->i_ino;
744 struct nilfs_inode_info *ii = NILFS_I(inode);
745 struct inode *ifile = ii->i_root->ifile;
746 struct nilfs_inode *raw_inode;
747
748 raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh);
749
750 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
751 memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size);
752 if (flags & I_DIRTY_DATASYNC)
753 set_bit(NILFS_I_INODE_SYNC, &ii->i_state);
754
755 nilfs_write_inode_common(inode, raw_inode);
756
757 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
758 raw_inode->i_device_code =
759 cpu_to_le64(huge_encode_dev(inode->i_rdev));
760
761 nilfs_ifile_unmap_inode(raw_inode);
762 }
763
764 #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */
765
nilfs_truncate_bmap(struct nilfs_inode_info * ii,unsigned long from)766 static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
767 unsigned long from)
768 {
769 __u64 b;
770 int ret;
771
772 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
773 return;
774 repeat:
775 ret = nilfs_bmap_last_key(ii->i_bmap, &b);
776 if (ret == -ENOENT)
777 return;
778 else if (ret < 0)
779 goto failed;
780
781 if (b < from)
782 return;
783
784 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
785 ret = nilfs_bmap_truncate(ii->i_bmap, b);
786 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
787 if (!ret || (ret == -ENOMEM &&
788 nilfs_bmap_truncate(ii->i_bmap, b) == 0))
789 goto repeat;
790
791 failed:
792 nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)",
793 ret, ii->vfs_inode.i_ino);
794 }
795
nilfs_truncate(struct inode * inode)796 void nilfs_truncate(struct inode *inode)
797 {
798 unsigned long blkoff;
799 unsigned int blocksize;
800 struct nilfs_transaction_info ti;
801 struct super_block *sb = inode->i_sb;
802 struct nilfs_inode_info *ii = NILFS_I(inode);
803
804 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
805 return;
806 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
807 return;
808
809 blocksize = sb->s_blocksize;
810 blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits;
811 nilfs_transaction_begin(sb, &ti, 0); /* never fails */
812
813 block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block);
814
815 nilfs_truncate_bmap(ii, blkoff);
816
817 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
818 if (IS_SYNC(inode))
819 nilfs_set_transaction_flag(NILFS_TI_SYNC);
820
821 nilfs_mark_inode_dirty(inode);
822 nilfs_set_file_dirty(inode, 0);
823 nilfs_transaction_commit(sb);
824 /*
825 * May construct a logical segment and may fail in sync mode.
826 * But truncate has no return value.
827 */
828 }
829
nilfs_clear_inode(struct inode * inode)830 static void nilfs_clear_inode(struct inode *inode)
831 {
832 struct nilfs_inode_info *ii = NILFS_I(inode);
833
834 /*
835 * Free resources allocated in nilfs_read_inode(), here.
836 */
837 BUG_ON(!list_empty(&ii->i_dirty));
838 brelse(ii->i_bh);
839 ii->i_bh = NULL;
840
841 if (nilfs_is_metadata_file_inode(inode))
842 nilfs_mdt_clear(inode);
843
844 if (test_bit(NILFS_I_BMAP, &ii->i_state))
845 nilfs_bmap_clear(ii->i_bmap);
846
847 if (!(ii->i_type & NILFS_I_TYPE_BTNC))
848 nilfs_detach_btree_node_cache(inode);
849
850 if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
851 nilfs_put_root(ii->i_root);
852 }
853
nilfs_evict_inode(struct inode * inode)854 void nilfs_evict_inode(struct inode *inode)
855 {
856 struct nilfs_transaction_info ti;
857 struct super_block *sb = inode->i_sb;
858 struct nilfs_inode_info *ii = NILFS_I(inode);
859 struct the_nilfs *nilfs;
860 int ret;
861
862 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
863 truncate_inode_pages_final(&inode->i_data);
864 clear_inode(inode);
865 nilfs_clear_inode(inode);
866 return;
867 }
868 nilfs_transaction_begin(sb, &ti, 0); /* never fails */
869
870 truncate_inode_pages_final(&inode->i_data);
871
872 nilfs = sb->s_fs_info;
873 if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
874 /*
875 * If this inode is about to be disposed after the file system
876 * has been degraded to read-only due to file system corruption
877 * or after the writer has been detached, do not make any
878 * changes that cause writes, just clear it.
879 * Do this check after read-locking ns_segctor_sem by
880 * nilfs_transaction_begin() in order to avoid a race with
881 * the writer detach operation.
882 */
883 clear_inode(inode);
884 nilfs_clear_inode(inode);
885 nilfs_transaction_abort(sb);
886 return;
887 }
888
889 /* TODO: some of the following operations may fail. */
890 nilfs_truncate_bmap(ii, 0);
891 nilfs_mark_inode_dirty(inode);
892 clear_inode(inode);
893
894 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
895 if (!ret)
896 atomic64_dec(&ii->i_root->inodes_count);
897
898 nilfs_clear_inode(inode);
899
900 if (IS_SYNC(inode))
901 nilfs_set_transaction_flag(NILFS_TI_SYNC);
902 nilfs_transaction_commit(sb);
903 /*
904 * May construct a logical segment and may fail in sync mode.
905 * But delete_inode has no return value.
906 */
907 }
908
nilfs_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * iattr)909 int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
910 struct iattr *iattr)
911 {
912 struct nilfs_transaction_info ti;
913 struct inode *inode = d_inode(dentry);
914 struct super_block *sb = inode->i_sb;
915 int err;
916
917 err = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
918 if (err)
919 return err;
920
921 err = nilfs_transaction_begin(sb, &ti, 0);
922 if (unlikely(err))
923 return err;
924
925 if ((iattr->ia_valid & ATTR_SIZE) &&
926 iattr->ia_size != i_size_read(inode)) {
927 inode_dio_wait(inode);
928 truncate_setsize(inode, iattr->ia_size);
929 nilfs_truncate(inode);
930 }
931
932 setattr_copy(&nop_mnt_idmap, inode, iattr);
933 mark_inode_dirty(inode);
934
935 if (iattr->ia_valid & ATTR_MODE) {
936 err = nilfs_acl_chmod(inode);
937 if (unlikely(err))
938 goto out_err;
939 }
940
941 return nilfs_transaction_commit(sb);
942
943 out_err:
944 nilfs_transaction_abort(sb);
945 return err;
946 }
947
nilfs_permission(struct mnt_idmap * idmap,struct inode * inode,int mask)948 int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
949 int mask)
950 {
951 struct nilfs_root *root = NILFS_I(inode)->i_root;
952
953 if ((mask & MAY_WRITE) && root &&
954 root->cno != NILFS_CPTREE_CURRENT_CNO)
955 return -EROFS; /* snapshot is not writable */
956
957 return generic_permission(&nop_mnt_idmap, inode, mask);
958 }
959
nilfs_load_inode_block(struct inode * inode,struct buffer_head ** pbh)960 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
961 {
962 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
963 struct nilfs_inode_info *ii = NILFS_I(inode);
964 int err;
965
966 spin_lock(&nilfs->ns_inode_lock);
967 if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
968 spin_unlock(&nilfs->ns_inode_lock);
969 err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
970 inode->i_ino, pbh);
971 if (unlikely(err))
972 return err;
973 spin_lock(&nilfs->ns_inode_lock);
974 if (ii->i_bh == NULL)
975 ii->i_bh = *pbh;
976 else if (unlikely(!buffer_uptodate(ii->i_bh))) {
977 __brelse(ii->i_bh);
978 ii->i_bh = *pbh;
979 } else {
980 brelse(*pbh);
981 *pbh = ii->i_bh;
982 }
983 } else
984 *pbh = ii->i_bh;
985
986 get_bh(*pbh);
987 spin_unlock(&nilfs->ns_inode_lock);
988 return 0;
989 }
990
nilfs_inode_dirty(struct inode * inode)991 int nilfs_inode_dirty(struct inode *inode)
992 {
993 struct nilfs_inode_info *ii = NILFS_I(inode);
994 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
995 int ret = 0;
996
997 if (!list_empty(&ii->i_dirty)) {
998 spin_lock(&nilfs->ns_inode_lock);
999 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
1000 test_bit(NILFS_I_BUSY, &ii->i_state);
1001 spin_unlock(&nilfs->ns_inode_lock);
1002 }
1003 return ret;
1004 }
1005
nilfs_set_file_dirty(struct inode * inode,unsigned int nr_dirty)1006 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
1007 {
1008 struct nilfs_inode_info *ii = NILFS_I(inode);
1009 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1010
1011 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks);
1012
1013 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
1014 return 0;
1015
1016 spin_lock(&nilfs->ns_inode_lock);
1017 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
1018 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
1019 /*
1020 * Because this routine may race with nilfs_dispose_list(),
1021 * we have to check NILFS_I_QUEUED here, too.
1022 */
1023 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
1024 /*
1025 * This will happen when somebody is freeing
1026 * this inode.
1027 */
1028 nilfs_warn(inode->i_sb,
1029 "cannot set file dirty (ino=%lu): the file is being freed",
1030 inode->i_ino);
1031 spin_unlock(&nilfs->ns_inode_lock);
1032 return -EINVAL; /*
1033 * NILFS_I_DIRTY may remain for
1034 * freeing inode.
1035 */
1036 }
1037 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
1038 set_bit(NILFS_I_QUEUED, &ii->i_state);
1039 }
1040 spin_unlock(&nilfs->ns_inode_lock);
1041 return 0;
1042 }
1043
__nilfs_mark_inode_dirty(struct inode * inode,int flags)1044 int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
1045 {
1046 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1047 struct buffer_head *ibh;
1048 int err;
1049
1050 /*
1051 * Do not dirty inodes after the log writer has been detached
1052 * and its nilfs_root struct has been freed.
1053 */
1054 if (unlikely(nilfs_purging(nilfs)))
1055 return 0;
1056
1057 err = nilfs_load_inode_block(inode, &ibh);
1058 if (unlikely(err)) {
1059 nilfs_warn(inode->i_sb,
1060 "cannot mark inode dirty (ino=%lu): error %d loading inode block",
1061 inode->i_ino, err);
1062 return err;
1063 }
1064 nilfs_update_inode(inode, ibh, flags);
1065 mark_buffer_dirty(ibh);
1066 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
1067 brelse(ibh);
1068 return 0;
1069 }
1070
1071 /**
1072 * nilfs_dirty_inode - reflect changes on given inode to an inode block.
1073 * @inode: inode of the file to be registered.
1074 * @flags: flags to determine the dirty state of the inode
1075 *
1076 * nilfs_dirty_inode() loads a inode block containing the specified
1077 * @inode and copies data from a nilfs_inode to a corresponding inode
1078 * entry in the inode block. This operation is excluded from the segment
1079 * construction. This function can be called both as a single operation
1080 * and as a part of indivisible file operations.
1081 */
nilfs_dirty_inode(struct inode * inode,int flags)1082 void nilfs_dirty_inode(struct inode *inode, int flags)
1083 {
1084 struct nilfs_transaction_info ti;
1085 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
1086
1087 if (is_bad_inode(inode)) {
1088 nilfs_warn(inode->i_sb,
1089 "tried to mark bad_inode dirty. ignored.");
1090 dump_stack();
1091 return;
1092 }
1093 if (mdi) {
1094 nilfs_mdt_mark_dirty(inode);
1095 return;
1096 }
1097 nilfs_transaction_begin(inode->i_sb, &ti, 0);
1098 __nilfs_mark_inode_dirty(inode, flags);
1099 nilfs_transaction_commit(inode->i_sb); /* never fails */
1100 }
1101
nilfs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,__u64 start,__u64 len)1102 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1103 __u64 start, __u64 len)
1104 {
1105 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1106 __u64 logical = 0, phys = 0, size = 0;
1107 __u32 flags = 0;
1108 loff_t isize;
1109 sector_t blkoff, end_blkoff;
1110 sector_t delalloc_blkoff;
1111 unsigned long delalloc_blklen;
1112 unsigned int blkbits = inode->i_blkbits;
1113 int ret, n;
1114
1115 ret = fiemap_prep(inode, fieinfo, start, &len, 0);
1116 if (ret)
1117 return ret;
1118
1119 inode_lock(inode);
1120
1121 isize = i_size_read(inode);
1122
1123 blkoff = start >> blkbits;
1124 end_blkoff = (start + len - 1) >> blkbits;
1125
1126 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff,
1127 &delalloc_blkoff);
1128
1129 do {
1130 __u64 blkphy;
1131 unsigned int maxblocks;
1132
1133 if (delalloc_blklen && blkoff == delalloc_blkoff) {
1134 if (size) {
1135 /* End of the current extent */
1136 ret = fiemap_fill_next_extent(
1137 fieinfo, logical, phys, size, flags);
1138 if (ret)
1139 break;
1140 }
1141 if (blkoff > end_blkoff)
1142 break;
1143
1144 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC;
1145 logical = blkoff << blkbits;
1146 phys = 0;
1147 size = delalloc_blklen << blkbits;
1148
1149 blkoff = delalloc_blkoff + delalloc_blklen;
1150 delalloc_blklen = nilfs_find_uncommitted_extent(
1151 inode, blkoff, &delalloc_blkoff);
1152 continue;
1153 }
1154
1155 /*
1156 * Limit the number of blocks that we look up so as
1157 * not to get into the next delayed allocation extent.
1158 */
1159 maxblocks = INT_MAX;
1160 if (delalloc_blklen)
1161 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff,
1162 maxblocks);
1163 blkphy = 0;
1164
1165 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1166 n = nilfs_bmap_lookup_contig(
1167 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks);
1168 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1169
1170 if (n < 0) {
1171 int past_eof;
1172
1173 if (unlikely(n != -ENOENT))
1174 break; /* error */
1175
1176 /* HOLE */
1177 blkoff++;
1178 past_eof = ((blkoff << blkbits) >= isize);
1179
1180 if (size) {
1181 /* End of the current extent */
1182
1183 if (past_eof)
1184 flags |= FIEMAP_EXTENT_LAST;
1185
1186 ret = fiemap_fill_next_extent(
1187 fieinfo, logical, phys, size, flags);
1188 if (ret)
1189 break;
1190 size = 0;
1191 }
1192 if (blkoff > end_blkoff || past_eof)
1193 break;
1194 } else {
1195 if (size) {
1196 if (phys && blkphy << blkbits == phys + size) {
1197 /* The current extent goes on */
1198 size += (u64)n << blkbits;
1199 } else {
1200 /* Terminate the current extent */
1201 ret = fiemap_fill_next_extent(
1202 fieinfo, logical, phys, size,
1203 flags);
1204 if (ret || blkoff > end_blkoff)
1205 break;
1206
1207 /* Start another extent */
1208 flags = FIEMAP_EXTENT_MERGED;
1209 logical = blkoff << blkbits;
1210 phys = blkphy << blkbits;
1211 size = (u64)n << blkbits;
1212 }
1213 } else {
1214 /* Start a new extent */
1215 flags = FIEMAP_EXTENT_MERGED;
1216 logical = blkoff << blkbits;
1217 phys = blkphy << blkbits;
1218 size = (u64)n << blkbits;
1219 }
1220 blkoff += n;
1221 }
1222 cond_resched();
1223 } while (true);
1224
1225 /* If ret is 1 then we just hit the end of the extent array */
1226 if (ret == 1)
1227 ret = 0;
1228
1229 inode_unlock(inode);
1230 return ret;
1231 }
1232