xref: /linux/fs/nilfs2/inode.c (revision c532de5a67a70f8533d495f8f2aaa9a0491c3ad0)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * NILFS inode operations.
4  *
5  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6  *
7  * Written by Ryusuke Konishi.
8  *
9  */
10 
11 #include <linux/buffer_head.h>
12 #include <linux/gfp.h>
13 #include <linux/mpage.h>
14 #include <linux/pagemap.h>
15 #include <linux/writeback.h>
16 #include <linux/uio.h>
17 #include <linux/fiemap.h>
18 #include <linux/random.h>
19 #include "nilfs.h"
20 #include "btnode.h"
21 #include "segment.h"
22 #include "page.h"
23 #include "mdt.h"
24 #include "cpfile.h"
25 #include "ifile.h"
26 
27 /**
28  * struct nilfs_iget_args - arguments used during comparison between inodes
29  * @ino: inode number
30  * @cno: checkpoint number
31  * @root: pointer on NILFS root object (mounted checkpoint)
32  * @type: inode type
33  */
34 struct nilfs_iget_args {
35 	u64 ino;
36 	__u64 cno;
37 	struct nilfs_root *root;
38 	unsigned int type;
39 };
40 
41 static int nilfs_iget_test(struct inode *inode, void *opaque);
42 
43 void nilfs_inode_add_blocks(struct inode *inode, int n)
44 {
45 	struct nilfs_root *root = NILFS_I(inode)->i_root;
46 
47 	inode_add_bytes(inode, i_blocksize(inode) * n);
48 	if (root)
49 		atomic64_add(n, &root->blocks_count);
50 }
51 
52 void nilfs_inode_sub_blocks(struct inode *inode, int n)
53 {
54 	struct nilfs_root *root = NILFS_I(inode)->i_root;
55 
56 	inode_sub_bytes(inode, i_blocksize(inode) * n);
57 	if (root)
58 		atomic64_sub(n, &root->blocks_count);
59 }
60 
61 /**
62  * nilfs_get_block() - get a file block on the filesystem (callback function)
63  * @inode: inode struct of the target file
64  * @blkoff: file block number
65  * @bh_result: buffer head to be mapped on
66  * @create: indicate whether allocating the block or not when it has not
67  *      been allocated yet.
68  *
69  * This function does not issue actual read request of the specified data
70  * block. It is done by VFS.
71  */
72 int nilfs_get_block(struct inode *inode, sector_t blkoff,
73 		    struct buffer_head *bh_result, int create)
74 {
75 	struct nilfs_inode_info *ii = NILFS_I(inode);
76 	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
77 	__u64 blknum = 0;
78 	int err = 0, ret;
79 	unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits;
80 
81 	down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
82 	ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
83 	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
84 	if (ret >= 0) {	/* found */
85 		map_bh(bh_result, inode->i_sb, blknum);
86 		if (ret > 0)
87 			bh_result->b_size = (ret << inode->i_blkbits);
88 		goto out;
89 	}
90 	/* data block was not found */
91 	if (ret == -ENOENT && create) {
92 		struct nilfs_transaction_info ti;
93 
94 		bh_result->b_blocknr = 0;
95 		err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
96 		if (unlikely(err))
97 			goto out;
98 		err = nilfs_bmap_insert(ii->i_bmap, blkoff,
99 					(unsigned long)bh_result);
100 		if (unlikely(err != 0)) {
101 			if (err == -EEXIST) {
102 				/*
103 				 * The get_block() function could be called
104 				 * from multiple callers for an inode.
105 				 * However, the page having this block must
106 				 * be locked in this case.
107 				 */
108 				nilfs_warn(inode->i_sb,
109 					   "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
110 					   __func__, inode->i_ino,
111 					   (unsigned long long)blkoff);
112 				err = -EAGAIN;
113 			}
114 			nilfs_transaction_abort(inode->i_sb);
115 			goto out;
116 		}
117 		nilfs_mark_inode_dirty_sync(inode);
118 		nilfs_transaction_commit(inode->i_sb); /* never fails */
119 		/* Error handling should be detailed */
120 		set_buffer_new(bh_result);
121 		set_buffer_delay(bh_result);
122 		map_bh(bh_result, inode->i_sb, 0);
123 		/* Disk block number must be changed to proper value */
124 
125 	} else if (ret == -ENOENT) {
126 		/*
127 		 * not found is not error (e.g. hole); must return without
128 		 * the mapped state flag.
129 		 */
130 		;
131 	} else {
132 		err = ret;
133 	}
134 
135  out:
136 	return err;
137 }
138 
139 /**
140  * nilfs_read_folio() - implement read_folio() method of nilfs_aops {}
141  * address_space_operations.
142  * @file: file struct of the file to be read
143  * @folio: the folio to be read
144  */
145 static int nilfs_read_folio(struct file *file, struct folio *folio)
146 {
147 	return mpage_read_folio(folio, nilfs_get_block);
148 }
149 
150 static void nilfs_readahead(struct readahead_control *rac)
151 {
152 	mpage_readahead(rac, nilfs_get_block);
153 }
154 
155 static int nilfs_writepages(struct address_space *mapping,
156 			    struct writeback_control *wbc)
157 {
158 	struct inode *inode = mapping->host;
159 	int err = 0;
160 
161 	if (sb_rdonly(inode->i_sb)) {
162 		nilfs_clear_dirty_pages(mapping);
163 		return -EROFS;
164 	}
165 
166 	if (wbc->sync_mode == WB_SYNC_ALL)
167 		err = nilfs_construct_dsync_segment(inode->i_sb, inode,
168 						    wbc->range_start,
169 						    wbc->range_end);
170 	return err;
171 }
172 
173 static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
174 {
175 	struct folio *folio = page_folio(page);
176 	struct inode *inode = folio->mapping->host;
177 	int err;
178 
179 	if (sb_rdonly(inode->i_sb)) {
180 		/*
181 		 * It means that filesystem was remounted in read-only
182 		 * mode because of error or metadata corruption. But we
183 		 * have dirty pages that try to be flushed in background.
184 		 * So, here we simply discard this dirty page.
185 		 */
186 		nilfs_clear_folio_dirty(folio);
187 		folio_unlock(folio);
188 		return -EROFS;
189 	}
190 
191 	folio_redirty_for_writepage(wbc, folio);
192 	folio_unlock(folio);
193 
194 	if (wbc->sync_mode == WB_SYNC_ALL) {
195 		err = nilfs_construct_segment(inode->i_sb);
196 		if (unlikely(err))
197 			return err;
198 	} else if (wbc->for_reclaim)
199 		nilfs_flush_segment(inode->i_sb, inode->i_ino);
200 
201 	return 0;
202 }
203 
204 static bool nilfs_dirty_folio(struct address_space *mapping,
205 		struct folio *folio)
206 {
207 	struct inode *inode = mapping->host;
208 	struct buffer_head *head;
209 	unsigned int nr_dirty = 0;
210 	bool ret = filemap_dirty_folio(mapping, folio);
211 
212 	/*
213 	 * The page may not be locked, eg if called from try_to_unmap_one()
214 	 */
215 	spin_lock(&mapping->i_private_lock);
216 	head = folio_buffers(folio);
217 	if (head) {
218 		struct buffer_head *bh = head;
219 
220 		do {
221 			/* Do not mark hole blocks dirty */
222 			if (buffer_dirty(bh) || !buffer_mapped(bh))
223 				continue;
224 
225 			set_buffer_dirty(bh);
226 			nr_dirty++;
227 		} while (bh = bh->b_this_page, bh != head);
228 	} else if (ret) {
229 		nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
230 	}
231 	spin_unlock(&mapping->i_private_lock);
232 
233 	if (nr_dirty)
234 		nilfs_set_file_dirty(inode, nr_dirty);
235 	return ret;
236 }
237 
238 void nilfs_write_failed(struct address_space *mapping, loff_t to)
239 {
240 	struct inode *inode = mapping->host;
241 
242 	if (to > inode->i_size) {
243 		truncate_pagecache(inode, inode->i_size);
244 		nilfs_truncate(inode);
245 	}
246 }
247 
248 static int nilfs_write_begin(struct file *file, struct address_space *mapping,
249 			     loff_t pos, unsigned len,
250 			     struct folio **foliop, void **fsdata)
251 
252 {
253 	struct inode *inode = mapping->host;
254 	int err = nilfs_transaction_begin(inode->i_sb, NULL, 1);
255 
256 	if (unlikely(err))
257 		return err;
258 
259 	err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block);
260 	if (unlikely(err)) {
261 		nilfs_write_failed(mapping, pos + len);
262 		nilfs_transaction_abort(inode->i_sb);
263 	}
264 	return err;
265 }
266 
267 static int nilfs_write_end(struct file *file, struct address_space *mapping,
268 			   loff_t pos, unsigned len, unsigned copied,
269 			   struct folio *folio, void *fsdata)
270 {
271 	struct inode *inode = mapping->host;
272 	unsigned int start = pos & (PAGE_SIZE - 1);
273 	unsigned int nr_dirty;
274 	int err;
275 
276 	nr_dirty = nilfs_page_count_clean_buffers(&folio->page, start,
277 						  start + copied);
278 	copied = generic_write_end(file, mapping, pos, len, copied, folio,
279 				   fsdata);
280 	nilfs_set_file_dirty(inode, nr_dirty);
281 	err = nilfs_transaction_commit(inode->i_sb);
282 	return err ? : copied;
283 }
284 
285 static ssize_t
286 nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
287 {
288 	struct inode *inode = file_inode(iocb->ki_filp);
289 
290 	if (iov_iter_rw(iter) == WRITE)
291 		return 0;
292 
293 	/* Needs synchronization with the cleaner */
294 	return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block);
295 }
296 
297 const struct address_space_operations nilfs_aops = {
298 	.writepage		= nilfs_writepage,
299 	.read_folio		= nilfs_read_folio,
300 	.writepages		= nilfs_writepages,
301 	.dirty_folio		= nilfs_dirty_folio,
302 	.readahead		= nilfs_readahead,
303 	.write_begin		= nilfs_write_begin,
304 	.write_end		= nilfs_write_end,
305 	.invalidate_folio	= block_invalidate_folio,
306 	.direct_IO		= nilfs_direct_IO,
307 	.is_partially_uptodate  = block_is_partially_uptodate,
308 };
309 
310 static int nilfs_insert_inode_locked(struct inode *inode,
311 				     struct nilfs_root *root,
312 				     unsigned long ino)
313 {
314 	struct nilfs_iget_args args = {
315 		.ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
316 	};
317 
318 	return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
319 }
320 
321 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
322 {
323 	struct super_block *sb = dir->i_sb;
324 	struct inode *inode;
325 	struct nilfs_inode_info *ii;
326 	struct nilfs_root *root;
327 	struct buffer_head *bh;
328 	int err = -ENOMEM;
329 	ino_t ino;
330 
331 	inode = new_inode(sb);
332 	if (unlikely(!inode))
333 		goto failed;
334 
335 	mapping_set_gfp_mask(inode->i_mapping,
336 			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
337 
338 	root = NILFS_I(dir)->i_root;
339 	ii = NILFS_I(inode);
340 	ii->i_state = BIT(NILFS_I_NEW);
341 	ii->i_type = NILFS_I_TYPE_NORMAL;
342 	ii->i_root = root;
343 
344 	err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
345 	if (unlikely(err))
346 		goto failed_ifile_create_inode;
347 	/* reference count of i_bh inherits from nilfs_mdt_read_block() */
348 	ii->i_bh = bh;
349 
350 	atomic64_inc(&root->inodes_count);
351 	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
352 	inode->i_ino = ino;
353 	simple_inode_init_ts(inode);
354 
355 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
356 		err = nilfs_bmap_read(ii->i_bmap, NULL);
357 		if (err < 0)
358 			goto failed_after_creation;
359 
360 		set_bit(NILFS_I_BMAP, &ii->i_state);
361 		/* No lock is needed; iget() ensures it. */
362 	}
363 
364 	ii->i_flags = nilfs_mask_flags(
365 		mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED);
366 
367 	/* ii->i_file_acl = 0; */
368 	/* ii->i_dir_acl = 0; */
369 	ii->i_dir_start_lookup = 0;
370 	nilfs_set_inode_flags(inode);
371 	inode->i_generation = get_random_u32();
372 	if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
373 		err = -EIO;
374 		goto failed_after_creation;
375 	}
376 
377 	err = nilfs_init_acl(inode, dir);
378 	if (unlikely(err))
379 		/*
380 		 * Never occur.  When supporting nilfs_init_acl(),
381 		 * proper cancellation of above jobs should be considered.
382 		 */
383 		goto failed_after_creation;
384 
385 	return inode;
386 
387  failed_after_creation:
388 	clear_nlink(inode);
389 	if (inode->i_state & I_NEW)
390 		unlock_new_inode(inode);
391 	iput(inode);  /*
392 		       * raw_inode will be deleted through
393 		       * nilfs_evict_inode().
394 		       */
395 	goto failed;
396 
397  failed_ifile_create_inode:
398 	make_bad_inode(inode);
399 	iput(inode);
400  failed:
401 	return ERR_PTR(err);
402 }
403 
404 void nilfs_set_inode_flags(struct inode *inode)
405 {
406 	unsigned int flags = NILFS_I(inode)->i_flags;
407 	unsigned int new_fl = 0;
408 
409 	if (flags & FS_SYNC_FL)
410 		new_fl |= S_SYNC;
411 	if (flags & FS_APPEND_FL)
412 		new_fl |= S_APPEND;
413 	if (flags & FS_IMMUTABLE_FL)
414 		new_fl |= S_IMMUTABLE;
415 	if (flags & FS_NOATIME_FL)
416 		new_fl |= S_NOATIME;
417 	if (flags & FS_DIRSYNC_FL)
418 		new_fl |= S_DIRSYNC;
419 	inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE |
420 			S_NOATIME | S_DIRSYNC);
421 }
422 
423 int nilfs_read_inode_common(struct inode *inode,
424 			    struct nilfs_inode *raw_inode)
425 {
426 	struct nilfs_inode_info *ii = NILFS_I(inode);
427 	int err;
428 
429 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
430 	i_uid_write(inode, le32_to_cpu(raw_inode->i_uid));
431 	i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
432 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
433 	inode->i_size = le64_to_cpu(raw_inode->i_size);
434 	inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime),
435 			le32_to_cpu(raw_inode->i_mtime_nsec));
436 	inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
437 			le32_to_cpu(raw_inode->i_ctime_nsec));
438 	inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime),
439 			le32_to_cpu(raw_inode->i_mtime_nsec));
440 	if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
441 		return -EIO; /* this inode is for metadata and corrupted */
442 	if (inode->i_nlink == 0)
443 		return -ESTALE; /* this inode is deleted */
444 
445 	inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
446 	ii->i_flags = le32_to_cpu(raw_inode->i_flags);
447 #if 0
448 	ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
449 	ii->i_dir_acl = S_ISREG(inode->i_mode) ?
450 		0 : le32_to_cpu(raw_inode->i_dir_acl);
451 #endif
452 	ii->i_dir_start_lookup = 0;
453 	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
454 
455 	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
456 	    S_ISLNK(inode->i_mode)) {
457 		err = nilfs_bmap_read(ii->i_bmap, raw_inode);
458 		if (err < 0)
459 			return err;
460 		set_bit(NILFS_I_BMAP, &ii->i_state);
461 		/* No lock is needed; iget() ensures it. */
462 	}
463 	return 0;
464 }
465 
466 static int __nilfs_read_inode(struct super_block *sb,
467 			      struct nilfs_root *root, unsigned long ino,
468 			      struct inode *inode)
469 {
470 	struct the_nilfs *nilfs = sb->s_fs_info;
471 	struct buffer_head *bh;
472 	struct nilfs_inode *raw_inode;
473 	int err;
474 
475 	down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
476 	err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh);
477 	if (unlikely(err))
478 		goto bad_inode;
479 
480 	raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh);
481 
482 	err = nilfs_read_inode_common(inode, raw_inode);
483 	if (err)
484 		goto failed_unmap;
485 
486 	if (S_ISREG(inode->i_mode)) {
487 		inode->i_op = &nilfs_file_inode_operations;
488 		inode->i_fop = &nilfs_file_operations;
489 		inode->i_mapping->a_ops = &nilfs_aops;
490 	} else if (S_ISDIR(inode->i_mode)) {
491 		inode->i_op = &nilfs_dir_inode_operations;
492 		inode->i_fop = &nilfs_dir_operations;
493 		inode->i_mapping->a_ops = &nilfs_aops;
494 	} else if (S_ISLNK(inode->i_mode)) {
495 		inode->i_op = &nilfs_symlink_inode_operations;
496 		inode_nohighmem(inode);
497 		inode->i_mapping->a_ops = &nilfs_aops;
498 	} else {
499 		inode->i_op = &nilfs_special_inode_operations;
500 		init_special_inode(
501 			inode, inode->i_mode,
502 			huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
503 	}
504 	nilfs_ifile_unmap_inode(raw_inode);
505 	brelse(bh);
506 	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
507 	nilfs_set_inode_flags(inode);
508 	mapping_set_gfp_mask(inode->i_mapping,
509 			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
510 	return 0;
511 
512  failed_unmap:
513 	nilfs_ifile_unmap_inode(raw_inode);
514 	brelse(bh);
515 
516  bad_inode:
517 	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
518 	return err;
519 }
520 
521 static int nilfs_iget_test(struct inode *inode, void *opaque)
522 {
523 	struct nilfs_iget_args *args = opaque;
524 	struct nilfs_inode_info *ii;
525 
526 	if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root)
527 		return 0;
528 
529 	ii = NILFS_I(inode);
530 	if (ii->i_type != args->type)
531 		return 0;
532 
533 	return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno;
534 }
535 
536 static int nilfs_iget_set(struct inode *inode, void *opaque)
537 {
538 	struct nilfs_iget_args *args = opaque;
539 
540 	inode->i_ino = args->ino;
541 	NILFS_I(inode)->i_cno = args->cno;
542 	NILFS_I(inode)->i_root = args->root;
543 	NILFS_I(inode)->i_type = args->type;
544 	if (args->root && args->ino == NILFS_ROOT_INO)
545 		nilfs_get_root(args->root);
546 	return 0;
547 }
548 
549 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
550 			    unsigned long ino)
551 {
552 	struct nilfs_iget_args args = {
553 		.ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
554 	};
555 
556 	return ilookup5(sb, ino, nilfs_iget_test, &args);
557 }
558 
559 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
560 				unsigned long ino)
561 {
562 	struct nilfs_iget_args args = {
563 		.ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL
564 	};
565 
566 	return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
567 }
568 
569 struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
570 			 unsigned long ino)
571 {
572 	struct inode *inode;
573 	int err;
574 
575 	inode = nilfs_iget_locked(sb, root, ino);
576 	if (unlikely(!inode))
577 		return ERR_PTR(-ENOMEM);
578 	if (!(inode->i_state & I_NEW))
579 		return inode;
580 
581 	err = __nilfs_read_inode(sb, root, ino, inode);
582 	if (unlikely(err)) {
583 		iget_failed(inode);
584 		return ERR_PTR(err);
585 	}
586 	unlock_new_inode(inode);
587 	return inode;
588 }
589 
590 struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
591 				__u64 cno)
592 {
593 	struct nilfs_iget_args args = {
594 		.ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC
595 	};
596 	struct inode *inode;
597 	int err;
598 
599 	inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
600 	if (unlikely(!inode))
601 		return ERR_PTR(-ENOMEM);
602 	if (!(inode->i_state & I_NEW))
603 		return inode;
604 
605 	err = nilfs_init_gcinode(inode);
606 	if (unlikely(err)) {
607 		iget_failed(inode);
608 		return ERR_PTR(err);
609 	}
610 	unlock_new_inode(inode);
611 	return inode;
612 }
613 
614 /**
615  * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
616  * @inode: inode object
617  *
618  * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
619  * or does nothing if the inode already has it.  This function allocates
620  * an additional inode to maintain page cache of B-tree nodes one-on-one.
621  *
622  * Return Value: On success, 0 is returned. On errors, one of the following
623  * negative error code is returned.
624  *
625  * %-ENOMEM - Insufficient memory available.
626  */
627 int nilfs_attach_btree_node_cache(struct inode *inode)
628 {
629 	struct nilfs_inode_info *ii = NILFS_I(inode);
630 	struct inode *btnc_inode;
631 	struct nilfs_iget_args args;
632 
633 	if (ii->i_assoc_inode)
634 		return 0;
635 
636 	args.ino = inode->i_ino;
637 	args.root = ii->i_root;
638 	args.cno = ii->i_cno;
639 	args.type = ii->i_type | NILFS_I_TYPE_BTNC;
640 
641 	btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
642 				  nilfs_iget_set, &args);
643 	if (unlikely(!btnc_inode))
644 		return -ENOMEM;
645 	if (btnc_inode->i_state & I_NEW) {
646 		nilfs_init_btnc_inode(btnc_inode);
647 		unlock_new_inode(btnc_inode);
648 	}
649 	NILFS_I(btnc_inode)->i_assoc_inode = inode;
650 	NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
651 	ii->i_assoc_inode = btnc_inode;
652 
653 	return 0;
654 }
655 
656 /**
657  * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
658  * @inode: inode object
659  *
660  * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
661  * holder inode bound to @inode, or does nothing if @inode doesn't have it.
662  */
663 void nilfs_detach_btree_node_cache(struct inode *inode)
664 {
665 	struct nilfs_inode_info *ii = NILFS_I(inode);
666 	struct inode *btnc_inode = ii->i_assoc_inode;
667 
668 	if (btnc_inode) {
669 		NILFS_I(btnc_inode)->i_assoc_inode = NULL;
670 		ii->i_assoc_inode = NULL;
671 		iput(btnc_inode);
672 	}
673 }
674 
675 /**
676  * nilfs_iget_for_shadow - obtain inode for shadow mapping
677  * @inode: inode object that uses shadow mapping
678  *
679  * nilfs_iget_for_shadow() allocates a pair of inodes that holds page
680  * caches for shadow mapping.  The page cache for data pages is set up
681  * in one inode and the one for b-tree node pages is set up in the
682  * other inode, which is attached to the former inode.
683  *
684  * Return Value: On success, a pointer to the inode for data pages is
685  * returned. On errors, one of the following negative error code is returned
686  * in a pointer type.
687  *
688  * %-ENOMEM - Insufficient memory available.
689  */
690 struct inode *nilfs_iget_for_shadow(struct inode *inode)
691 {
692 	struct nilfs_iget_args args = {
693 		.ino = inode->i_ino, .root = NULL, .cno = 0,
694 		.type = NILFS_I_TYPE_SHADOW
695 	};
696 	struct inode *s_inode;
697 	int err;
698 
699 	s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
700 			       nilfs_iget_set, &args);
701 	if (unlikely(!s_inode))
702 		return ERR_PTR(-ENOMEM);
703 	if (!(s_inode->i_state & I_NEW))
704 		return inode;
705 
706 	NILFS_I(s_inode)->i_flags = 0;
707 	memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
708 	mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
709 
710 	err = nilfs_attach_btree_node_cache(s_inode);
711 	if (unlikely(err)) {
712 		iget_failed(s_inode);
713 		return ERR_PTR(err);
714 	}
715 	unlock_new_inode(s_inode);
716 	return s_inode;
717 }
718 
719 /**
720  * nilfs_write_inode_common - export common inode information to on-disk inode
721  * @inode:     inode object
722  * @raw_inode: on-disk inode
723  *
724  * This function writes standard information from the on-memory inode @inode
725  * to @raw_inode on ifile, cpfile or a super root block.  Since inode bmap
726  * data is not exported, nilfs_bmap_write() must be called separately during
727  * log writing.
728  */
729 void nilfs_write_inode_common(struct inode *inode,
730 			      struct nilfs_inode *raw_inode)
731 {
732 	struct nilfs_inode_info *ii = NILFS_I(inode);
733 
734 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
735 	raw_inode->i_uid = cpu_to_le32(i_uid_read(inode));
736 	raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
737 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
738 	raw_inode->i_size = cpu_to_le64(inode->i_size);
739 	raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
740 	raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode));
741 	raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
742 	raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
743 	raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
744 
745 	raw_inode->i_flags = cpu_to_le32(ii->i_flags);
746 	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
747 
748 	/*
749 	 * When extending inode, nilfs->ns_inode_size should be checked
750 	 * for substitutions of appended fields.
751 	 */
752 }
753 
754 void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
755 {
756 	ino_t ino = inode->i_ino;
757 	struct nilfs_inode_info *ii = NILFS_I(inode);
758 	struct inode *ifile = ii->i_root->ifile;
759 	struct nilfs_inode *raw_inode;
760 
761 	raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh);
762 
763 	if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
764 		memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size);
765 	if (flags & I_DIRTY_DATASYNC)
766 		set_bit(NILFS_I_INODE_SYNC, &ii->i_state);
767 
768 	nilfs_write_inode_common(inode, raw_inode);
769 
770 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
771 		raw_inode->i_device_code =
772 			cpu_to_le64(huge_encode_dev(inode->i_rdev));
773 
774 	nilfs_ifile_unmap_inode(raw_inode);
775 }
776 
777 #define NILFS_MAX_TRUNCATE_BLOCKS	16384  /* 64MB for 4KB block */
778 
779 static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
780 				unsigned long from)
781 {
782 	__u64 b;
783 	int ret;
784 
785 	if (!test_bit(NILFS_I_BMAP, &ii->i_state))
786 		return;
787 repeat:
788 	ret = nilfs_bmap_last_key(ii->i_bmap, &b);
789 	if (ret == -ENOENT)
790 		return;
791 	else if (ret < 0)
792 		goto failed;
793 
794 	if (b < from)
795 		return;
796 
797 	b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
798 	ret = nilfs_bmap_truncate(ii->i_bmap, b);
799 	nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
800 	if (!ret || (ret == -ENOMEM &&
801 		     nilfs_bmap_truncate(ii->i_bmap, b) == 0))
802 		goto repeat;
803 
804 failed:
805 	nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)",
806 		   ret, ii->vfs_inode.i_ino);
807 }
808 
809 void nilfs_truncate(struct inode *inode)
810 {
811 	unsigned long blkoff;
812 	unsigned int blocksize;
813 	struct nilfs_transaction_info ti;
814 	struct super_block *sb = inode->i_sb;
815 	struct nilfs_inode_info *ii = NILFS_I(inode);
816 
817 	if (!test_bit(NILFS_I_BMAP, &ii->i_state))
818 		return;
819 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
820 		return;
821 
822 	blocksize = sb->s_blocksize;
823 	blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits;
824 	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
825 
826 	block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block);
827 
828 	nilfs_truncate_bmap(ii, blkoff);
829 
830 	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
831 	if (IS_SYNC(inode))
832 		nilfs_set_transaction_flag(NILFS_TI_SYNC);
833 
834 	nilfs_mark_inode_dirty(inode);
835 	nilfs_set_file_dirty(inode, 0);
836 	nilfs_transaction_commit(sb);
837 	/*
838 	 * May construct a logical segment and may fail in sync mode.
839 	 * But truncate has no return value.
840 	 */
841 }
842 
843 static void nilfs_clear_inode(struct inode *inode)
844 {
845 	struct nilfs_inode_info *ii = NILFS_I(inode);
846 
847 	/*
848 	 * Free resources allocated in nilfs_read_inode(), here.
849 	 */
850 	BUG_ON(!list_empty(&ii->i_dirty));
851 	brelse(ii->i_bh);
852 	ii->i_bh = NULL;
853 
854 	if (nilfs_is_metadata_file_inode(inode))
855 		nilfs_mdt_clear(inode);
856 
857 	if (test_bit(NILFS_I_BMAP, &ii->i_state))
858 		nilfs_bmap_clear(ii->i_bmap);
859 
860 	if (!(ii->i_type & NILFS_I_TYPE_BTNC))
861 		nilfs_detach_btree_node_cache(inode);
862 
863 	if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
864 		nilfs_put_root(ii->i_root);
865 }
866 
867 void nilfs_evict_inode(struct inode *inode)
868 {
869 	struct nilfs_transaction_info ti;
870 	struct super_block *sb = inode->i_sb;
871 	struct nilfs_inode_info *ii = NILFS_I(inode);
872 	struct the_nilfs *nilfs;
873 	int ret;
874 
875 	if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
876 		truncate_inode_pages_final(&inode->i_data);
877 		clear_inode(inode);
878 		nilfs_clear_inode(inode);
879 		return;
880 	}
881 	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
882 
883 	truncate_inode_pages_final(&inode->i_data);
884 
885 	nilfs = sb->s_fs_info;
886 	if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
887 		/*
888 		 * If this inode is about to be disposed after the file system
889 		 * has been degraded to read-only due to file system corruption
890 		 * or after the writer has been detached, do not make any
891 		 * changes that cause writes, just clear it.
892 		 * Do this check after read-locking ns_segctor_sem by
893 		 * nilfs_transaction_begin() in order to avoid a race with
894 		 * the writer detach operation.
895 		 */
896 		clear_inode(inode);
897 		nilfs_clear_inode(inode);
898 		nilfs_transaction_abort(sb);
899 		return;
900 	}
901 
902 	/* TODO: some of the following operations may fail.  */
903 	nilfs_truncate_bmap(ii, 0);
904 	nilfs_mark_inode_dirty(inode);
905 	clear_inode(inode);
906 
907 	ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
908 	if (!ret)
909 		atomic64_dec(&ii->i_root->inodes_count);
910 
911 	nilfs_clear_inode(inode);
912 
913 	if (IS_SYNC(inode))
914 		nilfs_set_transaction_flag(NILFS_TI_SYNC);
915 	nilfs_transaction_commit(sb);
916 	/*
917 	 * May construct a logical segment and may fail in sync mode.
918 	 * But delete_inode has no return value.
919 	 */
920 }
921 
922 int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
923 		  struct iattr *iattr)
924 {
925 	struct nilfs_transaction_info ti;
926 	struct inode *inode = d_inode(dentry);
927 	struct super_block *sb = inode->i_sb;
928 	int err;
929 
930 	err = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
931 	if (err)
932 		return err;
933 
934 	err = nilfs_transaction_begin(sb, &ti, 0);
935 	if (unlikely(err))
936 		return err;
937 
938 	if ((iattr->ia_valid & ATTR_SIZE) &&
939 	    iattr->ia_size != i_size_read(inode)) {
940 		inode_dio_wait(inode);
941 		truncate_setsize(inode, iattr->ia_size);
942 		nilfs_truncate(inode);
943 	}
944 
945 	setattr_copy(&nop_mnt_idmap, inode, iattr);
946 	mark_inode_dirty(inode);
947 
948 	if (iattr->ia_valid & ATTR_MODE) {
949 		err = nilfs_acl_chmod(inode);
950 		if (unlikely(err))
951 			goto out_err;
952 	}
953 
954 	return nilfs_transaction_commit(sb);
955 
956 out_err:
957 	nilfs_transaction_abort(sb);
958 	return err;
959 }
960 
961 int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
962 		     int mask)
963 {
964 	struct nilfs_root *root = NILFS_I(inode)->i_root;
965 
966 	if ((mask & MAY_WRITE) && root &&
967 	    root->cno != NILFS_CPTREE_CURRENT_CNO)
968 		return -EROFS; /* snapshot is not writable */
969 
970 	return generic_permission(&nop_mnt_idmap, inode, mask);
971 }
972 
973 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
974 {
975 	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
976 	struct nilfs_inode_info *ii = NILFS_I(inode);
977 	int err;
978 
979 	spin_lock(&nilfs->ns_inode_lock);
980 	if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
981 		spin_unlock(&nilfs->ns_inode_lock);
982 		err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
983 						  inode->i_ino, pbh);
984 		if (unlikely(err))
985 			return err;
986 		spin_lock(&nilfs->ns_inode_lock);
987 		if (ii->i_bh == NULL)
988 			ii->i_bh = *pbh;
989 		else if (unlikely(!buffer_uptodate(ii->i_bh))) {
990 			__brelse(ii->i_bh);
991 			ii->i_bh = *pbh;
992 		} else {
993 			brelse(*pbh);
994 			*pbh = ii->i_bh;
995 		}
996 	} else
997 		*pbh = ii->i_bh;
998 
999 	get_bh(*pbh);
1000 	spin_unlock(&nilfs->ns_inode_lock);
1001 	return 0;
1002 }
1003 
1004 int nilfs_inode_dirty(struct inode *inode)
1005 {
1006 	struct nilfs_inode_info *ii = NILFS_I(inode);
1007 	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1008 	int ret = 0;
1009 
1010 	if (!list_empty(&ii->i_dirty)) {
1011 		spin_lock(&nilfs->ns_inode_lock);
1012 		ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
1013 			test_bit(NILFS_I_BUSY, &ii->i_state);
1014 		spin_unlock(&nilfs->ns_inode_lock);
1015 	}
1016 	return ret;
1017 }
1018 
1019 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
1020 {
1021 	struct nilfs_inode_info *ii = NILFS_I(inode);
1022 	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1023 
1024 	atomic_add(nr_dirty, &nilfs->ns_ndirtyblks);
1025 
1026 	if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
1027 		return 0;
1028 
1029 	spin_lock(&nilfs->ns_inode_lock);
1030 	if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
1031 	    !test_bit(NILFS_I_BUSY, &ii->i_state)) {
1032 		/*
1033 		 * Because this routine may race with nilfs_dispose_list(),
1034 		 * we have to check NILFS_I_QUEUED here, too.
1035 		 */
1036 		if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
1037 			/*
1038 			 * This will happen when somebody is freeing
1039 			 * this inode.
1040 			 */
1041 			nilfs_warn(inode->i_sb,
1042 				   "cannot set file dirty (ino=%lu): the file is being freed",
1043 				   inode->i_ino);
1044 			spin_unlock(&nilfs->ns_inode_lock);
1045 			return -EINVAL; /*
1046 					 * NILFS_I_DIRTY may remain for
1047 					 * freeing inode.
1048 					 */
1049 		}
1050 		list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
1051 		set_bit(NILFS_I_QUEUED, &ii->i_state);
1052 	}
1053 	spin_unlock(&nilfs->ns_inode_lock);
1054 	return 0;
1055 }
1056 
1057 int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
1058 {
1059 	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1060 	struct buffer_head *ibh;
1061 	int err;
1062 
1063 	/*
1064 	 * Do not dirty inodes after the log writer has been detached
1065 	 * and its nilfs_root struct has been freed.
1066 	 */
1067 	if (unlikely(nilfs_purging(nilfs)))
1068 		return 0;
1069 
1070 	err = nilfs_load_inode_block(inode, &ibh);
1071 	if (unlikely(err)) {
1072 		nilfs_warn(inode->i_sb,
1073 			   "cannot mark inode dirty (ino=%lu): error %d loading inode block",
1074 			   inode->i_ino, err);
1075 		return err;
1076 	}
1077 	nilfs_update_inode(inode, ibh, flags);
1078 	mark_buffer_dirty(ibh);
1079 	nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
1080 	brelse(ibh);
1081 	return 0;
1082 }
1083 
1084 /**
1085  * nilfs_dirty_inode - reflect changes on given inode to an inode block.
1086  * @inode: inode of the file to be registered.
1087  * @flags: flags to determine the dirty state of the inode
1088  *
1089  * nilfs_dirty_inode() loads a inode block containing the specified
1090  * @inode and copies data from a nilfs_inode to a corresponding inode
1091  * entry in the inode block. This operation is excluded from the segment
1092  * construction. This function can be called both as a single operation
1093  * and as a part of indivisible file operations.
1094  */
1095 void nilfs_dirty_inode(struct inode *inode, int flags)
1096 {
1097 	struct nilfs_transaction_info ti;
1098 	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
1099 
1100 	if (is_bad_inode(inode)) {
1101 		nilfs_warn(inode->i_sb,
1102 			   "tried to mark bad_inode dirty. ignored.");
1103 		dump_stack();
1104 		return;
1105 	}
1106 	if (mdi) {
1107 		nilfs_mdt_mark_dirty(inode);
1108 		return;
1109 	}
1110 	nilfs_transaction_begin(inode->i_sb, &ti, 0);
1111 	__nilfs_mark_inode_dirty(inode, flags);
1112 	nilfs_transaction_commit(inode->i_sb); /* never fails */
1113 }
1114 
1115 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1116 		 __u64 start, __u64 len)
1117 {
1118 	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1119 	__u64 logical = 0, phys = 0, size = 0;
1120 	__u32 flags = 0;
1121 	loff_t isize;
1122 	sector_t blkoff, end_blkoff;
1123 	sector_t delalloc_blkoff;
1124 	unsigned long delalloc_blklen;
1125 	unsigned int blkbits = inode->i_blkbits;
1126 	int ret, n;
1127 
1128 	ret = fiemap_prep(inode, fieinfo, start, &len, 0);
1129 	if (ret)
1130 		return ret;
1131 
1132 	inode_lock(inode);
1133 
1134 	isize = i_size_read(inode);
1135 
1136 	blkoff = start >> blkbits;
1137 	end_blkoff = (start + len - 1) >> blkbits;
1138 
1139 	delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff,
1140 							&delalloc_blkoff);
1141 
1142 	do {
1143 		__u64 blkphy;
1144 		unsigned int maxblocks;
1145 
1146 		if (delalloc_blklen && blkoff == delalloc_blkoff) {
1147 			if (size) {
1148 				/* End of the current extent */
1149 				ret = fiemap_fill_next_extent(
1150 					fieinfo, logical, phys, size, flags);
1151 				if (ret)
1152 					break;
1153 			}
1154 			if (blkoff > end_blkoff)
1155 				break;
1156 
1157 			flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC;
1158 			logical = blkoff << blkbits;
1159 			phys = 0;
1160 			size = delalloc_blklen << blkbits;
1161 
1162 			blkoff = delalloc_blkoff + delalloc_blklen;
1163 			delalloc_blklen = nilfs_find_uncommitted_extent(
1164 				inode, blkoff, &delalloc_blkoff);
1165 			continue;
1166 		}
1167 
1168 		/*
1169 		 * Limit the number of blocks that we look up so as
1170 		 * not to get into the next delayed allocation extent.
1171 		 */
1172 		maxblocks = INT_MAX;
1173 		if (delalloc_blklen)
1174 			maxblocks = min_t(sector_t, delalloc_blkoff - blkoff,
1175 					  maxblocks);
1176 		blkphy = 0;
1177 
1178 		down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1179 		n = nilfs_bmap_lookup_contig(
1180 			NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks);
1181 		up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1182 
1183 		if (n < 0) {
1184 			int past_eof;
1185 
1186 			if (unlikely(n != -ENOENT))
1187 				break; /* error */
1188 
1189 			/* HOLE */
1190 			blkoff++;
1191 			past_eof = ((blkoff << blkbits) >= isize);
1192 
1193 			if (size) {
1194 				/* End of the current extent */
1195 
1196 				if (past_eof)
1197 					flags |= FIEMAP_EXTENT_LAST;
1198 
1199 				ret = fiemap_fill_next_extent(
1200 					fieinfo, logical, phys, size, flags);
1201 				if (ret)
1202 					break;
1203 				size = 0;
1204 			}
1205 			if (blkoff > end_blkoff || past_eof)
1206 				break;
1207 		} else {
1208 			if (size) {
1209 				if (phys && blkphy << blkbits == phys + size) {
1210 					/* The current extent goes on */
1211 					size += n << blkbits;
1212 				} else {
1213 					/* Terminate the current extent */
1214 					ret = fiemap_fill_next_extent(
1215 						fieinfo, logical, phys, size,
1216 						flags);
1217 					if (ret || blkoff > end_blkoff)
1218 						break;
1219 
1220 					/* Start another extent */
1221 					flags = FIEMAP_EXTENT_MERGED;
1222 					logical = blkoff << blkbits;
1223 					phys = blkphy << blkbits;
1224 					size = n << blkbits;
1225 				}
1226 			} else {
1227 				/* Start a new extent */
1228 				flags = FIEMAP_EXTENT_MERGED;
1229 				logical = blkoff << blkbits;
1230 				phys = blkphy << blkbits;
1231 				size = n << blkbits;
1232 			}
1233 			blkoff += n;
1234 		}
1235 		cond_resched();
1236 	} while (true);
1237 
1238 	/* If ret is 1 then we just hit the end of the extent array */
1239 	if (ret == 1)
1240 		ret = 0;
1241 
1242 	inode_unlock(inode);
1243 	return ret;
1244 }
1245