1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6 #include <linux/iversion.h>
7 #include "xfs.h"
8 #include "xfs_fs.h"
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_inode.h"
16 #include "xfs_inode_util.h"
17 #include "xfs_trans.h"
18 #include "xfs_ialloc.h"
19 #include "xfs_health.h"
20 #include "xfs_bmap.h"
21 #include "xfs_error.h"
22 #include "xfs_trace.h"
23 #include "xfs_ag.h"
24 #include "xfs_iunlink_item.h"
25 #include "xfs_inode_item.h"
26
27 uint16_t
xfs_flags2diflags(struct xfs_inode * ip,unsigned int xflags)28 xfs_flags2diflags(
29 struct xfs_inode *ip,
30 unsigned int xflags)
31 {
32 /* can't set PREALLOC this way, just preserve it */
33 uint16_t di_flags =
34 (ip->i_diflags & XFS_DIFLAG_PREALLOC);
35
36 if (xflags & FS_XFLAG_IMMUTABLE)
37 di_flags |= XFS_DIFLAG_IMMUTABLE;
38 if (xflags & FS_XFLAG_APPEND)
39 di_flags |= XFS_DIFLAG_APPEND;
40 if (xflags & FS_XFLAG_SYNC)
41 di_flags |= XFS_DIFLAG_SYNC;
42 if (xflags & FS_XFLAG_NOATIME)
43 di_flags |= XFS_DIFLAG_NOATIME;
44 if (xflags & FS_XFLAG_NODUMP)
45 di_flags |= XFS_DIFLAG_NODUMP;
46 if (xflags & FS_XFLAG_NODEFRAG)
47 di_flags |= XFS_DIFLAG_NODEFRAG;
48 if (xflags & FS_XFLAG_FILESTREAM)
49 di_flags |= XFS_DIFLAG_FILESTREAM;
50 if (S_ISDIR(VFS_I(ip)->i_mode)) {
51 if (xflags & FS_XFLAG_RTINHERIT)
52 di_flags |= XFS_DIFLAG_RTINHERIT;
53 if (xflags & FS_XFLAG_NOSYMLINKS)
54 di_flags |= XFS_DIFLAG_NOSYMLINKS;
55 if (xflags & FS_XFLAG_EXTSZINHERIT)
56 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
57 if (xflags & FS_XFLAG_PROJINHERIT)
58 di_flags |= XFS_DIFLAG_PROJINHERIT;
59 } else if (S_ISREG(VFS_I(ip)->i_mode)) {
60 if (xflags & FS_XFLAG_REALTIME)
61 di_flags |= XFS_DIFLAG_REALTIME;
62 if (xflags & FS_XFLAG_EXTSIZE)
63 di_flags |= XFS_DIFLAG_EXTSIZE;
64 }
65
66 return di_flags;
67 }
68
69 uint64_t
xfs_flags2diflags2(struct xfs_inode * ip,unsigned int xflags)70 xfs_flags2diflags2(
71 struct xfs_inode *ip,
72 unsigned int xflags)
73 {
74 uint64_t di_flags2 =
75 (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
76 XFS_DIFLAG2_BIGTIME |
77 XFS_DIFLAG2_NREXT64));
78
79 if (xflags & FS_XFLAG_DAX)
80 di_flags2 |= XFS_DIFLAG2_DAX;
81 if (xflags & FS_XFLAG_COWEXTSIZE)
82 di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
83
84 return di_flags2;
85 }
86
87 uint32_t
xfs_ip2xflags(struct xfs_inode * ip)88 xfs_ip2xflags(
89 struct xfs_inode *ip)
90 {
91 uint32_t flags = 0;
92
93 if (ip->i_diflags & XFS_DIFLAG_ANY) {
94 if (ip->i_diflags & XFS_DIFLAG_REALTIME)
95 flags |= FS_XFLAG_REALTIME;
96 if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
97 flags |= FS_XFLAG_PREALLOC;
98 if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
99 flags |= FS_XFLAG_IMMUTABLE;
100 if (ip->i_diflags & XFS_DIFLAG_APPEND)
101 flags |= FS_XFLAG_APPEND;
102 if (ip->i_diflags & XFS_DIFLAG_SYNC)
103 flags |= FS_XFLAG_SYNC;
104 if (ip->i_diflags & XFS_DIFLAG_NOATIME)
105 flags |= FS_XFLAG_NOATIME;
106 if (ip->i_diflags & XFS_DIFLAG_NODUMP)
107 flags |= FS_XFLAG_NODUMP;
108 if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
109 flags |= FS_XFLAG_RTINHERIT;
110 if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
111 flags |= FS_XFLAG_PROJINHERIT;
112 if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
113 flags |= FS_XFLAG_NOSYMLINKS;
114 if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
115 flags |= FS_XFLAG_EXTSIZE;
116 if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
117 flags |= FS_XFLAG_EXTSZINHERIT;
118 if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
119 flags |= FS_XFLAG_NODEFRAG;
120 if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
121 flags |= FS_XFLAG_FILESTREAM;
122 }
123
124 if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
125 if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
126 flags |= FS_XFLAG_DAX;
127 if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
128 flags |= FS_XFLAG_COWEXTSIZE;
129 }
130
131 if (xfs_inode_has_attr_fork(ip))
132 flags |= FS_XFLAG_HASATTR;
133 return flags;
134 }
135
136 prid_t
xfs_get_initial_prid(struct xfs_inode * dp)137 xfs_get_initial_prid(struct xfs_inode *dp)
138 {
139 if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT)
140 return dp->i_projid;
141
142 /* Assign to the root project by default. */
143 return 0;
144 }
145
146 /* Propagate di_flags from a parent inode to a child inode. */
147 static inline void
xfs_inode_inherit_flags(struct xfs_inode * ip,const struct xfs_inode * pip)148 xfs_inode_inherit_flags(
149 struct xfs_inode *ip,
150 const struct xfs_inode *pip)
151 {
152 unsigned int di_flags = 0;
153 xfs_failaddr_t failaddr;
154 umode_t mode = VFS_I(ip)->i_mode;
155
156 if (S_ISDIR(mode)) {
157 if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
158 di_flags |= XFS_DIFLAG_RTINHERIT;
159 if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
160 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
161 ip->i_extsize = pip->i_extsize;
162 }
163 if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
164 di_flags |= XFS_DIFLAG_PROJINHERIT;
165 } else if (S_ISREG(mode)) {
166 if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
167 xfs_has_realtime(ip->i_mount))
168 di_flags |= XFS_DIFLAG_REALTIME;
169 if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
170 di_flags |= XFS_DIFLAG_EXTSIZE;
171 ip->i_extsize = pip->i_extsize;
172 }
173 }
174 if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
175 xfs_inherit_noatime)
176 di_flags |= XFS_DIFLAG_NOATIME;
177 if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
178 xfs_inherit_nodump)
179 di_flags |= XFS_DIFLAG_NODUMP;
180 if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
181 xfs_inherit_sync)
182 di_flags |= XFS_DIFLAG_SYNC;
183 if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
184 xfs_inherit_nosymlinks)
185 di_flags |= XFS_DIFLAG_NOSYMLINKS;
186 if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
187 xfs_inherit_nodefrag)
188 di_flags |= XFS_DIFLAG_NODEFRAG;
189 if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
190 di_flags |= XFS_DIFLAG_FILESTREAM;
191
192 ip->i_diflags |= di_flags;
193
194 /*
195 * Inode verifiers on older kernels only check that the extent size
196 * hint is an integer multiple of the rt extent size on realtime files.
197 * They did not check the hint alignment on a directory with both
198 * rtinherit and extszinherit flags set. If the misaligned hint is
199 * propagated from a directory into a new realtime file, new file
200 * allocations will fail due to math errors in the rt allocator and/or
201 * trip the verifiers. Validate the hint settings in the new file so
202 * that we don't let broken hints propagate.
203 */
204 failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
205 VFS_I(ip)->i_mode, ip->i_diflags);
206 if (failaddr) {
207 ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
208 XFS_DIFLAG_EXTSZINHERIT);
209 ip->i_extsize = 0;
210 }
211 }
212
213 /* Propagate di_flags2 from a parent inode to a child inode. */
214 static inline void
xfs_inode_inherit_flags2(struct xfs_inode * ip,const struct xfs_inode * pip)215 xfs_inode_inherit_flags2(
216 struct xfs_inode *ip,
217 const struct xfs_inode *pip)
218 {
219 xfs_failaddr_t failaddr;
220
221 if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
222 ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
223 ip->i_cowextsize = pip->i_cowextsize;
224 }
225 if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
226 ip->i_diflags2 |= XFS_DIFLAG2_DAX;
227 if (xfs_is_metadir_inode(pip))
228 ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
229
230 /* Don't let invalid cowextsize hints propagate. */
231 failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
232 VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
233 if (failaddr) {
234 ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
235 ip->i_cowextsize = 0;
236 }
237 }
238
239 /*
240 * If we need to create attributes immediately after allocating the inode,
241 * initialise an empty attribute fork right now. We use the default fork offset
242 * for attributes here as we don't know exactly what size or how many
243 * attributes we might be adding. We can do this safely here because we know
244 * the data fork is completely empty and this saves us from needing to run a
245 * separate transaction to set the fork offset in the immediate future.
246 *
247 * If we have parent pointers and the caller hasn't told us that the file will
248 * never be linked into a directory tree, we /must/ create the attr fork.
249 */
250 static inline bool
xfs_icreate_want_attrfork(struct xfs_mount * mp,const struct xfs_icreate_args * args)251 xfs_icreate_want_attrfork(
252 struct xfs_mount *mp,
253 const struct xfs_icreate_args *args)
254 {
255 if (args->flags & XFS_ICREATE_INIT_XATTRS)
256 return true;
257
258 if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp))
259 return true;
260
261 return false;
262 }
263
264 /* Initialise an inode's attributes. */
265 void
xfs_inode_init(struct xfs_trans * tp,const struct xfs_icreate_args * args,struct xfs_inode * ip)266 xfs_inode_init(
267 struct xfs_trans *tp,
268 const struct xfs_icreate_args *args,
269 struct xfs_inode *ip)
270 {
271 struct xfs_inode *pip = args->pip;
272 struct inode *dir = pip ? VFS_I(pip) : NULL;
273 struct xfs_mount *mp = tp->t_mountp;
274 struct inode *inode = VFS_I(ip);
275 unsigned int flags;
276 int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG |
277 XFS_ICHGTIME_ACCESS;
278
279 if (args->flags & XFS_ICREATE_TMPFILE)
280 set_nlink(inode, 0);
281 else if (S_ISDIR(args->mode))
282 set_nlink(inode, 2);
283 else
284 set_nlink(inode, 1);
285 inode->i_rdev = args->rdev;
286
287 if (!args->idmap || pip == NULL) {
288 /* creating a tree root, sb rooted, or detached file */
289 inode->i_uid = GLOBAL_ROOT_UID;
290 inode->i_gid = GLOBAL_ROOT_GID;
291 ip->i_projid = 0;
292 inode->i_mode = args->mode;
293 } else {
294 /* creating a child in the directory tree */
295 if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
296 inode_fsuid_set(inode, args->idmap);
297 inode->i_gid = dir->i_gid;
298 inode->i_mode = args->mode;
299 } else {
300 inode_init_owner(args->idmap, inode, dir, args->mode);
301 }
302
303 /*
304 * If the group ID of the new file does not match the effective
305 * group ID or one of the supplementary group IDs, the S_ISGID
306 * bit is cleared (and only if the irix_sgid_inherit
307 * compatibility variable is set).
308 */
309 if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
310 !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode)))
311 inode->i_mode &= ~S_ISGID;
312
313 ip->i_projid = xfs_get_initial_prid(pip);
314 }
315
316 ip->i_disk_size = 0;
317 ip->i_df.if_nextents = 0;
318 ASSERT(ip->i_nblocks == 0);
319
320 ip->i_extsize = 0;
321 ip->i_diflags = 0;
322
323 if (xfs_has_v3inodes(mp)) {
324 inode_set_iversion(inode, 1);
325 ip->i_cowextsize = 0;
326 times |= XFS_ICHGTIME_CREATE;
327 }
328
329 xfs_trans_ichgtime(tp, ip, times);
330
331 flags = XFS_ILOG_CORE;
332 switch (args->mode & S_IFMT) {
333 case S_IFIFO:
334 case S_IFCHR:
335 case S_IFBLK:
336 case S_IFSOCK:
337 ip->i_df.if_format = XFS_DINODE_FMT_DEV;
338 flags |= XFS_ILOG_DEV;
339 break;
340 case S_IFREG:
341 case S_IFDIR:
342 if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
343 xfs_inode_inherit_flags(ip, pip);
344 if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
345 xfs_inode_inherit_flags2(ip, pip);
346 fallthrough;
347 case S_IFLNK:
348 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
349 ip->i_df.if_bytes = 0;
350 ip->i_df.if_data = NULL;
351 break;
352 default:
353 ASSERT(0);
354 }
355
356 if (xfs_icreate_want_attrfork(mp, args)) {
357 ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
358 xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
359
360 if (!xfs_has_attr(mp)) {
361 spin_lock(&mp->m_sb_lock);
362 xfs_add_attr(mp);
363 spin_unlock(&mp->m_sb_lock);
364 xfs_log_sb(tp);
365 }
366 }
367
368 xfs_trans_log_inode(tp, ip, flags);
369 }
370
371 /*
372 * In-Core Unlinked List Lookups
373 * =============================
374 *
375 * Every inode is supposed to be reachable from some other piece of metadata
376 * with the exception of the root directory. Inodes with a connection to a
377 * file descriptor but not linked from anywhere in the on-disk directory tree
378 * are collectively known as unlinked inodes, though the filesystem itself
379 * maintains links to these inodes so that on-disk metadata are consistent.
380 *
381 * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
382 * header contains a number of buckets that point to an inode, and each inode
383 * record has a pointer to the next inode in the hash chain. This
384 * singly-linked list causes scaling problems in the iunlink remove function
385 * because we must walk that list to find the inode that points to the inode
386 * being removed from the unlinked hash bucket list.
387 *
388 * Hence we keep an in-memory double linked list to link each inode on an
389 * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
390 * based lists would require having 64 list heads in the perag, one for each
391 * list. This is expensive in terms of memory (think millions of AGs) and cache
392 * misses on lookups. Instead, use the fact that inodes on the unlinked list
393 * must be referenced at the VFS level to keep them on the list and hence we
394 * have an existence guarantee for inodes on the unlinked list.
395 *
396 * Given we have an existence guarantee, we can use lockless inode cache lookups
397 * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
398 * for the double linked unlinked list, and we don't need any extra locking to
399 * keep the list safe as all manipulations are done under the AGI buffer lock.
400 * Keeping the list up to date does not require memory allocation, just finding
401 * the XFS inode and updating the next/prev unlinked list aginos.
402 */
403
404 /*
405 * Update the prev pointer of the next agino. Returns -ENOLINK if the inode
406 * is not in cache.
407 */
408 static int
xfs_iunlink_update_backref(struct xfs_perag * pag,xfs_agino_t prev_agino,xfs_agino_t next_agino)409 xfs_iunlink_update_backref(
410 struct xfs_perag *pag,
411 xfs_agino_t prev_agino,
412 xfs_agino_t next_agino)
413 {
414 struct xfs_inode *ip;
415
416 /* No update necessary if we are at the end of the list. */
417 if (next_agino == NULLAGINO)
418 return 0;
419
420 ip = xfs_iunlink_lookup(pag, next_agino);
421 if (!ip)
422 return -ENOLINK;
423
424 ip->i_prev_unlinked = prev_agino;
425 return 0;
426 }
427
428 /*
429 * Point the AGI unlinked bucket at an inode and log the results. The caller
430 * is responsible for validating the old value.
431 */
432 STATIC int
xfs_iunlink_update_bucket(struct xfs_trans * tp,struct xfs_perag * pag,struct xfs_buf * agibp,unsigned int bucket_index,xfs_agino_t new_agino)433 xfs_iunlink_update_bucket(
434 struct xfs_trans *tp,
435 struct xfs_perag *pag,
436 struct xfs_buf *agibp,
437 unsigned int bucket_index,
438 xfs_agino_t new_agino)
439 {
440 struct xfs_agi *agi = agibp->b_addr;
441 xfs_agino_t old_value;
442 int offset;
443
444 ASSERT(xfs_verify_agino_or_null(pag, new_agino));
445
446 old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
447 trace_xfs_iunlink_update_bucket(pag, bucket_index, old_value,
448 new_agino);
449
450 /*
451 * We should never find the head of the list already set to the value
452 * passed in because either we're adding or removing ourselves from the
453 * head of the list.
454 */
455 if (old_value == new_agino) {
456 xfs_buf_mark_corrupt(agibp);
457 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
458 return -EFSCORRUPTED;
459 }
460
461 agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
462 offset = offsetof(struct xfs_agi, agi_unlinked) +
463 (sizeof(xfs_agino_t) * bucket_index);
464 xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
465 return 0;
466 }
467
468 static int
xfs_iunlink_insert_inode(struct xfs_trans * tp,struct xfs_perag * pag,struct xfs_buf * agibp,struct xfs_inode * ip)469 xfs_iunlink_insert_inode(
470 struct xfs_trans *tp,
471 struct xfs_perag *pag,
472 struct xfs_buf *agibp,
473 struct xfs_inode *ip)
474 {
475 struct xfs_mount *mp = tp->t_mountp;
476 struct xfs_agi *agi = agibp->b_addr;
477 xfs_agino_t next_agino;
478 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
479 short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
480 int error;
481
482 /*
483 * Get the index into the agi hash table for the list this inode will
484 * go on. Make sure the pointer isn't garbage and that this inode
485 * isn't already on the list.
486 */
487 next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
488 if (next_agino == agino ||
489 !xfs_verify_agino_or_null(pag, next_agino)) {
490 xfs_buf_mark_corrupt(agibp);
491 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
492 return -EFSCORRUPTED;
493 }
494
495 /*
496 * Update the prev pointer in the next inode to point back to this
497 * inode.
498 */
499 error = xfs_iunlink_update_backref(pag, agino, next_agino);
500 if (error == -ENOLINK)
501 error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
502 if (error)
503 return error;
504
505 if (next_agino != NULLAGINO) {
506 /*
507 * There is already another inode in the bucket, so point this
508 * inode to the current head of the list.
509 */
510 error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
511 if (error)
512 return error;
513 ip->i_next_unlinked = next_agino;
514 }
515
516 /* Point the head of the list to point to this inode. */
517 ip->i_prev_unlinked = NULLAGINO;
518 return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
519 }
520
521 /*
522 * This is called when the inode's link count has gone to 0 or we are creating
523 * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
524 *
525 * We place the on-disk inode on a list in the AGI. It will be pulled from this
526 * list when the inode is freed.
527 */
528 int
xfs_iunlink(struct xfs_trans * tp,struct xfs_inode * ip)529 xfs_iunlink(
530 struct xfs_trans *tp,
531 struct xfs_inode *ip)
532 {
533 struct xfs_mount *mp = tp->t_mountp;
534 struct xfs_perag *pag;
535 struct xfs_buf *agibp;
536 int error;
537
538 ASSERT(VFS_I(ip)->i_nlink == 0);
539 ASSERT(VFS_I(ip)->i_mode != 0);
540 trace_xfs_iunlink(ip);
541
542 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
543
544 /* Get the agi buffer first. It ensures lock ordering on the list. */
545 error = xfs_read_agi(pag, tp, 0, &agibp);
546 if (error)
547 goto out;
548
549 error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
550 out:
551 xfs_perag_put(pag);
552 return error;
553 }
554
555 static int
xfs_iunlink_remove_inode(struct xfs_trans * tp,struct xfs_perag * pag,struct xfs_buf * agibp,struct xfs_inode * ip)556 xfs_iunlink_remove_inode(
557 struct xfs_trans *tp,
558 struct xfs_perag *pag,
559 struct xfs_buf *agibp,
560 struct xfs_inode *ip)
561 {
562 struct xfs_mount *mp = tp->t_mountp;
563 struct xfs_agi *agi = agibp->b_addr;
564 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
565 xfs_agino_t head_agino;
566 short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
567 int error;
568
569 trace_xfs_iunlink_remove(ip);
570
571 /*
572 * Get the index into the agi hash table for the list this inode will
573 * go on. Make sure the head pointer isn't garbage.
574 */
575 head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
576 if (!xfs_verify_agino(pag, head_agino)) {
577 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
578 agi, sizeof(*agi));
579 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
580 return -EFSCORRUPTED;
581 }
582
583 /*
584 * Set our inode's next_unlinked pointer to NULL and then return
585 * the old pointer value so that we can update whatever was previous
586 * to us in the list to point to whatever was next in the list.
587 */
588 error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
589 if (error)
590 return error;
591
592 /*
593 * Update the prev pointer in the next inode to point back to previous
594 * inode in the chain.
595 */
596 error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
597 ip->i_next_unlinked);
598 if (error == -ENOLINK)
599 error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
600 ip->i_next_unlinked);
601 if (error)
602 return error;
603
604 if (head_agino != agino) {
605 struct xfs_inode *prev_ip;
606
607 prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
608 if (!prev_ip) {
609 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
610 return -EFSCORRUPTED;
611 }
612
613 error = xfs_iunlink_log_inode(tp, prev_ip, pag,
614 ip->i_next_unlinked);
615 prev_ip->i_next_unlinked = ip->i_next_unlinked;
616 } else {
617 /* Point the head of the list to the next unlinked inode. */
618 error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
619 ip->i_next_unlinked);
620 }
621
622 ip->i_next_unlinked = NULLAGINO;
623 ip->i_prev_unlinked = 0;
624 return error;
625 }
626
627 /*
628 * Pull the on-disk inode from the AGI unlinked list.
629 */
630 int
xfs_iunlink_remove(struct xfs_trans * tp,struct xfs_perag * pag,struct xfs_inode * ip)631 xfs_iunlink_remove(
632 struct xfs_trans *tp,
633 struct xfs_perag *pag,
634 struct xfs_inode *ip)
635 {
636 struct xfs_buf *agibp;
637 int error;
638
639 trace_xfs_iunlink_remove(ip);
640
641 /* Get the agi buffer first. It ensures lock ordering on the list. */
642 error = xfs_read_agi(pag, tp, 0, &agibp);
643 if (error)
644 return error;
645
646 return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
647 }
648
649 /*
650 * Decrement the link count on an inode & log the change. If this causes the
651 * link count to go to zero, move the inode to AGI unlinked list so that it can
652 * be freed when the last active reference goes away via xfs_inactive().
653 */
654 int
xfs_droplink(struct xfs_trans * tp,struct xfs_inode * ip)655 xfs_droplink(
656 struct xfs_trans *tp,
657 struct xfs_inode *ip)
658 {
659 struct inode *inode = VFS_I(ip);
660
661 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
662
663 if (inode->i_nlink == 0) {
664 xfs_info_ratelimited(tp->t_mountp,
665 "Inode 0x%llx link count dropped below zero. Pinning link count.",
666 ip->i_ino);
667 set_nlink(inode, XFS_NLINK_PINNED);
668 }
669 if (inode->i_nlink != XFS_NLINK_PINNED)
670 drop_nlink(inode);
671
672 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
673
674 if (inode->i_nlink)
675 return 0;
676
677 return xfs_iunlink(tp, ip);
678 }
679
680 /*
681 * Increment the link count on an inode & log the change.
682 */
683 void
xfs_bumplink(struct xfs_trans * tp,struct xfs_inode * ip)684 xfs_bumplink(
685 struct xfs_trans *tp,
686 struct xfs_inode *ip)
687 {
688 struct inode *inode = VFS_I(ip);
689
690 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
691
692 if (inode->i_nlink == XFS_NLINK_PINNED - 1)
693 xfs_info_ratelimited(tp->t_mountp,
694 "Inode 0x%llx link count exceeded maximum. Pinning link count.",
695 ip->i_ino);
696 if (inode->i_nlink != XFS_NLINK_PINNED)
697 inc_nlink(inode);
698
699 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
700 }
701
702 /* Free an inode in the ondisk index and zero it out. */
703 int
xfs_inode_uninit(struct xfs_trans * tp,struct xfs_perag * pag,struct xfs_inode * ip,struct xfs_icluster * xic)704 xfs_inode_uninit(
705 struct xfs_trans *tp,
706 struct xfs_perag *pag,
707 struct xfs_inode *ip,
708 struct xfs_icluster *xic)
709 {
710 struct xfs_mount *mp = ip->i_mount;
711 int error;
712
713 /*
714 * Free the inode first so that we guarantee that the AGI lock is going
715 * to be taken before we remove the inode from the unlinked list. This
716 * makes the AGI lock -> unlinked list modification order the same as
717 * used in O_TMPFILE creation.
718 */
719 error = xfs_difree(tp, pag, ip->i_ino, xic);
720 if (error)
721 return error;
722
723 error = xfs_iunlink_remove(tp, pag, ip);
724 if (error)
725 return error;
726
727 /*
728 * Free any local-format data sitting around before we reset the
729 * data fork to extents format. Note that the attr fork data has
730 * already been freed by xfs_attr_inactive.
731 */
732 if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
733 kfree(ip->i_df.if_data);
734 ip->i_df.if_data = NULL;
735 ip->i_df.if_bytes = 0;
736 }
737
738 VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
739 ip->i_diflags = 0;
740 ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
741 ip->i_forkoff = 0; /* mark the attr fork not in use */
742 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
743
744 /*
745 * Bump the generation count so no one will be confused
746 * by reincarnations of this inode.
747 */
748 VFS_I(ip)->i_generation++;
749 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
750 return 0;
751 }
752