xref: /linux/fs/xfs/xfs_itable.c (revision 954ea91fb68b771dba6d87cfa61b68e09cc2497f)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4   * All Rights Reserved.
5   */
6  #include "xfs.h"
7  #include "xfs_fs.h"
8  #include "xfs_shared.h"
9  #include "xfs_format.h"
10  #include "xfs_log_format.h"
11  #include "xfs_trans_resv.h"
12  #include "xfs_mount.h"
13  #include "xfs_inode.h"
14  #include "xfs_btree.h"
15  #include "xfs_ialloc.h"
16  #include "xfs_ialloc_btree.h"
17  #include "xfs_iwalk.h"
18  #include "xfs_itable.h"
19  #include "xfs_error.h"
20  #include "xfs_icache.h"
21  #include "xfs_health.h"
22  #include "xfs_trans.h"
23  
24  /*
25   * Bulk Stat
26   * =========
27   *
28   * Use the inode walking functions to fill out struct xfs_bulkstat for every
29   * allocated inode, then pass the stat information to some externally provided
30   * iteration function.
31   */
32  
33  struct xfs_bstat_chunk {
34  	bulkstat_one_fmt_pf	formatter;
35  	struct xfs_ibulk	*breq;
36  	struct xfs_bulkstat	*buf;
37  };
38  
39  /*
40   * Fill out the bulkstat info for a single inode and report it somewhere.
41   *
42   * bc->breq->lastino is effectively the inode cursor as we walk through the
43   * filesystem.  Therefore, we update it any time we need to move the cursor
44   * forward, regardless of whether or not we're sending any bstat information
45   * back to userspace.  If the inode is internal metadata or, has been freed
46   * out from under us, we just simply keep going.
47   *
48   * However, if any other type of error happens we want to stop right where we
49   * are so that userspace will call back with exact number of the bad inode and
50   * we can send back an error code.
51   *
52   * Note that if the formatter tells us there's no space left in the buffer we
53   * move the cursor forward and abort the walk.
54   */
55  STATIC int
56  xfs_bulkstat_one_int(
57  	struct xfs_mount	*mp,
58  	struct mnt_idmap	*idmap,
59  	struct xfs_trans	*tp,
60  	xfs_ino_t		ino,
61  	struct xfs_bstat_chunk	*bc)
62  {
63  	struct user_namespace	*sb_userns = mp->m_super->s_user_ns;
64  	struct xfs_inode	*ip;		/* incore inode pointer */
65  	struct inode		*inode;
66  	struct xfs_bulkstat	*buf = bc->buf;
67  	xfs_extnum_t		nextents;
68  	int			error = -EINVAL;
69  	vfsuid_t		vfsuid;
70  	vfsgid_t		vfsgid;
71  
72  	if (xfs_internal_inum(mp, ino))
73  		goto out_advance;
74  
75  	error = xfs_iget(mp, tp, ino,
76  			 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
77  			 XFS_ILOCK_SHARED, &ip);
78  	if (error == -ENOENT || error == -EINVAL)
79  		goto out_advance;
80  	if (error)
81  		goto out;
82  
83  	ASSERT(ip != NULL);
84  	ASSERT(ip->i_imap.im_blkno != 0);
85  	inode = VFS_I(ip);
86  	vfsuid = i_uid_into_vfsuid(idmap, inode);
87  	vfsgid = i_gid_into_vfsgid(idmap, inode);
88  
89  	/* xfs_iget returns the following without needing
90  	 * further change.
91  	 */
92  	buf->bs_projectid = ip->i_projid;
93  	buf->bs_ino = ino;
94  	buf->bs_uid = from_kuid(sb_userns, vfsuid_into_kuid(vfsuid));
95  	buf->bs_gid = from_kgid(sb_userns, vfsgid_into_kgid(vfsgid));
96  	buf->bs_size = ip->i_disk_size;
97  
98  	buf->bs_nlink = inode->i_nlink;
99  	buf->bs_atime = inode->i_atime.tv_sec;
100  	buf->bs_atime_nsec = inode->i_atime.tv_nsec;
101  	buf->bs_mtime = inode->i_mtime.tv_sec;
102  	buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
103  	buf->bs_ctime = inode->i_ctime.tv_sec;
104  	buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
105  	buf->bs_gen = inode->i_generation;
106  	buf->bs_mode = inode->i_mode;
107  
108  	buf->bs_xflags = xfs_ip2xflags(ip);
109  	buf->bs_extsize_blks = ip->i_extsize;
110  
111  	nextents = xfs_ifork_nextents(&ip->i_df);
112  	if (!(bc->breq->flags & XFS_IBULK_NREXT64))
113  		buf->bs_extents = min(nextents, XFS_MAX_EXTCNT_DATA_FORK_SMALL);
114  	else
115  		buf->bs_extents64 = nextents;
116  
117  	xfs_bulkstat_health(ip, buf);
118  	buf->bs_aextents = xfs_ifork_nextents(&ip->i_af);
119  	buf->bs_forkoff = xfs_inode_fork_boff(ip);
120  	buf->bs_version = XFS_BULKSTAT_VERSION_V5;
121  
122  	if (xfs_has_v3inodes(mp)) {
123  		buf->bs_btime = ip->i_crtime.tv_sec;
124  		buf->bs_btime_nsec = ip->i_crtime.tv_nsec;
125  		if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
126  			buf->bs_cowextsize_blks = ip->i_cowextsize;
127  	}
128  
129  	switch (ip->i_df.if_format) {
130  	case XFS_DINODE_FMT_DEV:
131  		buf->bs_rdev = sysv_encode_dev(inode->i_rdev);
132  		buf->bs_blksize = BLKDEV_IOSIZE;
133  		buf->bs_blocks = 0;
134  		break;
135  	case XFS_DINODE_FMT_LOCAL:
136  		buf->bs_rdev = 0;
137  		buf->bs_blksize = mp->m_sb.sb_blocksize;
138  		buf->bs_blocks = 0;
139  		break;
140  	case XFS_DINODE_FMT_EXTENTS:
141  	case XFS_DINODE_FMT_BTREE:
142  		buf->bs_rdev = 0;
143  		buf->bs_blksize = mp->m_sb.sb_blocksize;
144  		buf->bs_blocks = ip->i_nblocks + ip->i_delayed_blks;
145  		break;
146  	}
147  	xfs_iunlock(ip, XFS_ILOCK_SHARED);
148  	xfs_irele(ip);
149  
150  	error = bc->formatter(bc->breq, buf);
151  	if (error == -ECANCELED)
152  		goto out_advance;
153  	if (error)
154  		goto out;
155  
156  out_advance:
157  	/*
158  	 * Advance the cursor to the inode that comes after the one we just
159  	 * looked at.  We want the caller to move along if the bulkstat
160  	 * information was copied successfully; if we tried to grab the inode
161  	 * but it's no longer allocated; or if it's internal metadata.
162  	 */
163  	bc->breq->startino = ino + 1;
164  out:
165  	return error;
166  }
167  
168  /* Bulkstat a single inode. */
169  int
170  xfs_bulkstat_one(
171  	struct xfs_ibulk	*breq,
172  	bulkstat_one_fmt_pf	formatter)
173  {
174  	struct xfs_bstat_chunk	bc = {
175  		.formatter	= formatter,
176  		.breq		= breq,
177  	};
178  	struct xfs_trans	*tp;
179  	int			error;
180  
181  	if (breq->idmap != &nop_mnt_idmap) {
182  		xfs_warn_ratelimited(breq->mp,
183  			"bulkstat not supported inside of idmapped mounts.");
184  		return -EINVAL;
185  	}
186  
187  	ASSERT(breq->icount == 1);
188  
189  	bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
190  			KM_MAYFAIL);
191  	if (!bc.buf)
192  		return -ENOMEM;
193  
194  	/*
195  	 * Grab an empty transaction so that we can use its recursive buffer
196  	 * locking abilities to detect cycles in the inobt without deadlocking.
197  	 */
198  	error = xfs_trans_alloc_empty(breq->mp, &tp);
199  	if (error)
200  		goto out;
201  
202  	error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp,
203  			breq->startino, &bc);
204  	xfs_trans_cancel(tp);
205  out:
206  	kmem_free(bc.buf);
207  
208  	/*
209  	 * If we reported one inode to userspace then we abort because we hit
210  	 * the end of the buffer.  Don't leak that back to userspace.
211  	 */
212  	if (error == -ECANCELED)
213  		error = 0;
214  
215  	return error;
216  }
217  
218  static int
219  xfs_bulkstat_iwalk(
220  	struct xfs_mount	*mp,
221  	struct xfs_trans	*tp,
222  	xfs_ino_t		ino,
223  	void			*data)
224  {
225  	struct xfs_bstat_chunk	*bc = data;
226  	int			error;
227  
228  	error = xfs_bulkstat_one_int(mp, bc->breq->idmap, tp, ino, data);
229  	/* bulkstat just skips over missing inodes */
230  	if (error == -ENOENT || error == -EINVAL)
231  		return 0;
232  	return error;
233  }
234  
235  /*
236   * Check the incoming lastino parameter.
237   *
238   * We allow any inode value that could map to physical space inside the
239   * filesystem because if there are no inodes there, bulkstat moves on to the
240   * next chunk.  In other words, the magic agino value of zero takes us to the
241   * first chunk in the AG, and an agino value past the end of the AG takes us to
242   * the first chunk in the next AG.
243   *
244   * Therefore we can end early if the requested inode is beyond the end of the
245   * filesystem or doesn't map properly.
246   */
247  static inline bool
248  xfs_bulkstat_already_done(
249  	struct xfs_mount	*mp,
250  	xfs_ino_t		startino)
251  {
252  	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, startino);
253  	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, startino);
254  
255  	return agno >= mp->m_sb.sb_agcount ||
256  	       startino != XFS_AGINO_TO_INO(mp, agno, agino);
257  }
258  
259  /* Return stat information in bulk (by-inode) for the filesystem. */
260  int
261  xfs_bulkstat(
262  	struct xfs_ibulk	*breq,
263  	bulkstat_one_fmt_pf	formatter)
264  {
265  	struct xfs_bstat_chunk	bc = {
266  		.formatter	= formatter,
267  		.breq		= breq,
268  	};
269  	struct xfs_trans	*tp;
270  	unsigned int		iwalk_flags = 0;
271  	int			error;
272  
273  	if (breq->idmap != &nop_mnt_idmap) {
274  		xfs_warn_ratelimited(breq->mp,
275  			"bulkstat not supported inside of idmapped mounts.");
276  		return -EINVAL;
277  	}
278  	if (xfs_bulkstat_already_done(breq->mp, breq->startino))
279  		return 0;
280  
281  	bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
282  			KM_MAYFAIL);
283  	if (!bc.buf)
284  		return -ENOMEM;
285  
286  	/*
287  	 * Grab an empty transaction so that we can use its recursive buffer
288  	 * locking abilities to detect cycles in the inobt without deadlocking.
289  	 */
290  	error = xfs_trans_alloc_empty(breq->mp, &tp);
291  	if (error)
292  		goto out;
293  
294  	if (breq->flags & XFS_IBULK_SAME_AG)
295  		iwalk_flags |= XFS_IWALK_SAME_AG;
296  
297  	error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags,
298  			xfs_bulkstat_iwalk, breq->icount, &bc);
299  	xfs_trans_cancel(tp);
300  out:
301  	kmem_free(bc.buf);
302  
303  	/*
304  	 * We found some inodes, so clear the error status and return them.
305  	 * The lastino pointer will point directly at the inode that triggered
306  	 * any error that occurred, so on the next call the error will be
307  	 * triggered again and propagated to userspace as there will be no
308  	 * formatted inodes in the buffer.
309  	 */
310  	if (breq->ocount > 0)
311  		error = 0;
312  
313  	return error;
314  }
315  
316  /* Convert bulkstat (v5) to bstat (v1). */
317  void
318  xfs_bulkstat_to_bstat(
319  	struct xfs_mount		*mp,
320  	struct xfs_bstat		*bs1,
321  	const struct xfs_bulkstat	*bstat)
322  {
323  	/* memset is needed here because of padding holes in the structure. */
324  	memset(bs1, 0, sizeof(struct xfs_bstat));
325  	bs1->bs_ino = bstat->bs_ino;
326  	bs1->bs_mode = bstat->bs_mode;
327  	bs1->bs_nlink = bstat->bs_nlink;
328  	bs1->bs_uid = bstat->bs_uid;
329  	bs1->bs_gid = bstat->bs_gid;
330  	bs1->bs_rdev = bstat->bs_rdev;
331  	bs1->bs_blksize = bstat->bs_blksize;
332  	bs1->bs_size = bstat->bs_size;
333  	bs1->bs_atime.tv_sec = bstat->bs_atime;
334  	bs1->bs_mtime.tv_sec = bstat->bs_mtime;
335  	bs1->bs_ctime.tv_sec = bstat->bs_ctime;
336  	bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec;
337  	bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec;
338  	bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec;
339  	bs1->bs_blocks = bstat->bs_blocks;
340  	bs1->bs_xflags = bstat->bs_xflags;
341  	bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks);
342  	bs1->bs_extents = bstat->bs_extents;
343  	bs1->bs_gen = bstat->bs_gen;
344  	bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF;
345  	bs1->bs_forkoff = bstat->bs_forkoff;
346  	bs1->bs_projid_hi = bstat->bs_projectid >> 16;
347  	bs1->bs_sick = bstat->bs_sick;
348  	bs1->bs_checked = bstat->bs_checked;
349  	bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks);
350  	bs1->bs_dmevmask = 0;
351  	bs1->bs_dmstate = 0;
352  	bs1->bs_aextents = bstat->bs_aextents;
353  }
354  
355  struct xfs_inumbers_chunk {
356  	inumbers_fmt_pf		formatter;
357  	struct xfs_ibulk	*breq;
358  };
359  
360  /*
361   * INUMBERS
362   * ========
363   * This is how we export inode btree records to userspace, so that XFS tools
364   * can figure out where inodes are allocated.
365   */
366  
367  /*
368   * Format the inode group structure and report it somewhere.
369   *
370   * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk
371   * through the filesystem so we move it forward unless there was a runtime
372   * error.  If the formatter tells us the buffer is now full we also move the
373   * cursor forward and abort the walk.
374   */
375  STATIC int
376  xfs_inumbers_walk(
377  	struct xfs_mount	*mp,
378  	struct xfs_trans	*tp,
379  	xfs_agnumber_t		agno,
380  	const struct xfs_inobt_rec_incore *irec,
381  	void			*data)
382  {
383  	struct xfs_inumbers	inogrp = {
384  		.xi_startino	= XFS_AGINO_TO_INO(mp, agno, irec->ir_startino),
385  		.xi_alloccount	= irec->ir_count - irec->ir_freecount,
386  		.xi_allocmask	= ~irec->ir_free,
387  		.xi_version	= XFS_INUMBERS_VERSION_V5,
388  	};
389  	struct xfs_inumbers_chunk *ic = data;
390  	int			error;
391  
392  	error = ic->formatter(ic->breq, &inogrp);
393  	if (error && error != -ECANCELED)
394  		return error;
395  
396  	ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) +
397  			XFS_INODES_PER_CHUNK;
398  	return error;
399  }
400  
401  /*
402   * Return inode number table for the filesystem.
403   */
404  int
405  xfs_inumbers(
406  	struct xfs_ibulk	*breq,
407  	inumbers_fmt_pf		formatter)
408  {
409  	struct xfs_inumbers_chunk ic = {
410  		.formatter	= formatter,
411  		.breq		= breq,
412  	};
413  	struct xfs_trans	*tp;
414  	int			error = 0;
415  
416  	if (xfs_bulkstat_already_done(breq->mp, breq->startino))
417  		return 0;
418  
419  	/*
420  	 * Grab an empty transaction so that we can use its recursive buffer
421  	 * locking abilities to detect cycles in the inobt without deadlocking.
422  	 */
423  	error = xfs_trans_alloc_empty(breq->mp, &tp);
424  	if (error)
425  		goto out;
426  
427  	error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
428  			xfs_inumbers_walk, breq->icount, &ic);
429  	xfs_trans_cancel(tp);
430  out:
431  
432  	/*
433  	 * We found some inode groups, so clear the error status and return
434  	 * them.  The lastino pointer will point directly at the inode that
435  	 * triggered any error that occurred, so on the next call the error
436  	 * will be triggered again and propagated to userspace as there will be
437  	 * no formatted inode groups in the buffer.
438  	 */
439  	if (breq->ocount > 0)
440  		error = 0;
441  
442  	return error;
443  }
444  
445  /* Convert an inumbers (v5) struct to a inogrp (v1) struct. */
446  void
447  xfs_inumbers_to_inogrp(
448  	struct xfs_inogrp		*ig1,
449  	const struct xfs_inumbers	*ig)
450  {
451  	/* memset is needed here because of padding holes in the structure. */
452  	memset(ig1, 0, sizeof(struct xfs_inogrp));
453  	ig1->xi_startino = ig->xi_startino;
454  	ig1->xi_alloccount = ig->xi_alloccount;
455  	ig1->xi_allocmask = ig->xi_allocmask;
456  }
457