xref: /linux/fs/file.c (revision 44e694958b95395bd1c41508c88c8ca141bf9bd7)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  linux/fs/file.c
4   *
5   *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
6   *
7   *  Manage the dynamic fd arrays in the process files_struct.
8   */
9  
10  #include <linux/syscalls.h>
11  #include <linux/export.h>
12  #include <linux/fs.h>
13  #include <linux/kernel.h>
14  #include <linux/mm.h>
15  #include <linux/sched/signal.h>
16  #include <linux/slab.h>
17  #include <linux/file.h>
18  #include <linux/fdtable.h>
19  #include <linux/bitops.h>
20  #include <linux/spinlock.h>
21  #include <linux/rcupdate.h>
22  #include <linux/close_range.h>
23  #include <net/sock.h>
24  
25  #include "internal.h"
26  
27  unsigned int sysctl_nr_open __read_mostly = 1024*1024;
28  unsigned int sysctl_nr_open_min = BITS_PER_LONG;
29  /* our min() is unusable in constant expressions ;-/ */
30  #define __const_min(x, y) ((x) < (y) ? (x) : (y))
31  unsigned int sysctl_nr_open_max =
32  	__const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
33  
34  static void __free_fdtable(struct fdtable *fdt)
35  {
36  	kvfree(fdt->fd);
37  	kvfree(fdt->open_fds);
38  	kfree(fdt);
39  }
40  
41  static void free_fdtable_rcu(struct rcu_head *rcu)
42  {
43  	__free_fdtable(container_of(rcu, struct fdtable, rcu));
44  }
45  
46  #define BITBIT_NR(nr)	BITS_TO_LONGS(BITS_TO_LONGS(nr))
47  #define BITBIT_SIZE(nr)	(BITBIT_NR(nr) * sizeof(long))
48  
49  /*
50   * Copy 'count' fd bits from the old table to the new table and clear the extra
51   * space if any.  This does not copy the file pointers.  Called with the files
52   * spinlock held for write.
53   */
54  static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
55  			    unsigned int count)
56  {
57  	unsigned int cpy, set;
58  
59  	cpy = count / BITS_PER_BYTE;
60  	set = (nfdt->max_fds - count) / BITS_PER_BYTE;
61  	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
62  	memset((char *)nfdt->open_fds + cpy, 0, set);
63  	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
64  	memset((char *)nfdt->close_on_exec + cpy, 0, set);
65  
66  	cpy = BITBIT_SIZE(count);
67  	set = BITBIT_SIZE(nfdt->max_fds) - cpy;
68  	memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
69  	memset((char *)nfdt->full_fds_bits + cpy, 0, set);
70  }
71  
72  /*
73   * Copy all file descriptors from the old table to the new, expanded table and
74   * clear the extra space.  Called with the files spinlock held for write.
75   */
76  static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
77  {
78  	size_t cpy, set;
79  
80  	BUG_ON(nfdt->max_fds < ofdt->max_fds);
81  
82  	cpy = ofdt->max_fds * sizeof(struct file *);
83  	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
84  	memcpy(nfdt->fd, ofdt->fd, cpy);
85  	memset((char *)nfdt->fd + cpy, 0, set);
86  
87  	copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
88  }
89  
90  /*
91   * Note how the fdtable bitmap allocations very much have to be a multiple of
92   * BITS_PER_LONG. This is not only because we walk those things in chunks of
93   * 'unsigned long' in some places, but simply because that is how the Linux
94   * kernel bitmaps are defined to work: they are not "bits in an array of bytes",
95   * they are very much "bits in an array of unsigned long".
96   *
97   * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
98   * by that "1024/sizeof(ptr)" before, we already know there are sufficient
99   * clear low bits. Clang seems to realize that, gcc ends up being confused.
100   *
101   * On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
102   * let's consider it documentation (and maybe a test-case for gcc to improve
103   * its code generation ;)
104   */
105  static struct fdtable * alloc_fdtable(unsigned int nr)
106  {
107  	struct fdtable *fdt;
108  	void *data;
109  
110  	/*
111  	 * Figure out how many fds we actually want to support in this fdtable.
112  	 * Allocation steps are keyed to the size of the fdarray, since it
113  	 * grows far faster than any of the other dynamic data. We try to fit
114  	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
115  	 * and growing in powers of two from there on.
116  	 */
117  	nr /= (1024 / sizeof(struct file *));
118  	nr = roundup_pow_of_two(nr + 1);
119  	nr *= (1024 / sizeof(struct file *));
120  	nr = ALIGN(nr, BITS_PER_LONG);
121  	/*
122  	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
123  	 * had been set lower between the check in expand_files() and here.  Deal
124  	 * with that in caller, it's cheaper that way.
125  	 *
126  	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
127  	 * bitmaps handling below becomes unpleasant, to put it mildly...
128  	 */
129  	if (unlikely(nr > sysctl_nr_open))
130  		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
131  
132  	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
133  	if (!fdt)
134  		goto out;
135  	fdt->max_fds = nr;
136  	data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
137  	if (!data)
138  		goto out_fdt;
139  	fdt->fd = data;
140  
141  	data = kvmalloc(max_t(size_t,
142  				 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
143  				 GFP_KERNEL_ACCOUNT);
144  	if (!data)
145  		goto out_arr;
146  	fdt->open_fds = data;
147  	data += nr / BITS_PER_BYTE;
148  	fdt->close_on_exec = data;
149  	data += nr / BITS_PER_BYTE;
150  	fdt->full_fds_bits = data;
151  
152  	return fdt;
153  
154  out_arr:
155  	kvfree(fdt->fd);
156  out_fdt:
157  	kfree(fdt);
158  out:
159  	return NULL;
160  }
161  
162  /*
163   * Expand the file descriptor table.
164   * This function will allocate a new fdtable and both fd array and fdset, of
165   * the given size.
166   * Return <0 error code on error; 1 on successful completion.
167   * The files->file_lock should be held on entry, and will be held on exit.
168   */
169  static int expand_fdtable(struct files_struct *files, unsigned int nr)
170  	__releases(files->file_lock)
171  	__acquires(files->file_lock)
172  {
173  	struct fdtable *new_fdt, *cur_fdt;
174  
175  	spin_unlock(&files->file_lock);
176  	new_fdt = alloc_fdtable(nr);
177  
178  	/* make sure all fd_install() have seen resize_in_progress
179  	 * or have finished their rcu_read_lock_sched() section.
180  	 */
181  	if (atomic_read(&files->count) > 1)
182  		synchronize_rcu();
183  
184  	spin_lock(&files->file_lock);
185  	if (!new_fdt)
186  		return -ENOMEM;
187  	/*
188  	 * extremely unlikely race - sysctl_nr_open decreased between the check in
189  	 * caller and alloc_fdtable().  Cheaper to catch it here...
190  	 */
191  	if (unlikely(new_fdt->max_fds <= nr)) {
192  		__free_fdtable(new_fdt);
193  		return -EMFILE;
194  	}
195  	cur_fdt = files_fdtable(files);
196  	BUG_ON(nr < cur_fdt->max_fds);
197  	copy_fdtable(new_fdt, cur_fdt);
198  	rcu_assign_pointer(files->fdt, new_fdt);
199  	if (cur_fdt != &files->fdtab)
200  		call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
201  	/* coupled with smp_rmb() in fd_install() */
202  	smp_wmb();
203  	return 1;
204  }
205  
206  /*
207   * Expand files.
208   * This function will expand the file structures, if the requested size exceeds
209   * the current capacity and there is room for expansion.
210   * Return <0 error code on error; 0 when nothing done; 1 when files were
211   * expanded and execution may have blocked.
212   * The files->file_lock should be held on entry, and will be held on exit.
213   */
214  static int expand_files(struct files_struct *files, unsigned int nr)
215  	__releases(files->file_lock)
216  	__acquires(files->file_lock)
217  {
218  	struct fdtable *fdt;
219  	int expanded = 0;
220  
221  repeat:
222  	fdt = files_fdtable(files);
223  
224  	/* Do we need to expand? */
225  	if (nr < fdt->max_fds)
226  		return expanded;
227  
228  	/* Can we expand? */
229  	if (nr >= sysctl_nr_open)
230  		return -EMFILE;
231  
232  	if (unlikely(files->resize_in_progress)) {
233  		spin_unlock(&files->file_lock);
234  		expanded = 1;
235  		wait_event(files->resize_wait, !files->resize_in_progress);
236  		spin_lock(&files->file_lock);
237  		goto repeat;
238  	}
239  
240  	/* All good, so we try */
241  	files->resize_in_progress = true;
242  	expanded = expand_fdtable(files, nr);
243  	files->resize_in_progress = false;
244  
245  	wake_up_all(&files->resize_wait);
246  	return expanded;
247  }
248  
249  static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
250  {
251  	__set_bit(fd, fdt->close_on_exec);
252  }
253  
254  static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
255  {
256  	if (test_bit(fd, fdt->close_on_exec))
257  		__clear_bit(fd, fdt->close_on_exec);
258  }
259  
260  static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
261  {
262  	__set_bit(fd, fdt->open_fds);
263  	fd /= BITS_PER_LONG;
264  	if (!~fdt->open_fds[fd])
265  		__set_bit(fd, fdt->full_fds_bits);
266  }
267  
268  static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
269  {
270  	__clear_bit(fd, fdt->open_fds);
271  	__clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
272  }
273  
274  static unsigned int count_open_files(struct fdtable *fdt)
275  {
276  	unsigned int size = fdt->max_fds;
277  	unsigned int i;
278  
279  	/* Find the last open fd */
280  	for (i = size / BITS_PER_LONG; i > 0; ) {
281  		if (fdt->open_fds[--i])
282  			break;
283  	}
284  	i = (i + 1) * BITS_PER_LONG;
285  	return i;
286  }
287  
288  /*
289   * Note that a sane fdtable size always has to be a multiple of
290   * BITS_PER_LONG, since we have bitmaps that are sized by this.
291   *
292   * 'max_fds' will normally already be properly aligned, but it
293   * turns out that in the close_range() -> __close_range() ->
294   * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
295   * up having a 'max_fds' value that isn't already aligned.
296   *
297   * Rather than make close_range() have to worry about this,
298   * just make that BITS_PER_LONG alignment be part of a sane
299   * fdtable size. Becuase that's really what it is.
300   */
301  static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
302  {
303  	unsigned int count;
304  
305  	count = count_open_files(fdt);
306  	if (max_fds < NR_OPEN_DEFAULT)
307  		max_fds = NR_OPEN_DEFAULT;
308  	return ALIGN(min(count, max_fds), BITS_PER_LONG);
309  }
310  
311  /*
312   * Allocate a new files structure and copy contents from the
313   * passed in files structure.
314   * errorp will be valid only when the returned files_struct is NULL.
315   */
316  struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
317  {
318  	struct files_struct *newf;
319  	struct file **old_fds, **new_fds;
320  	unsigned int open_files, i;
321  	struct fdtable *old_fdt, *new_fdt;
322  
323  	*errorp = -ENOMEM;
324  	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
325  	if (!newf)
326  		goto out;
327  
328  	atomic_set(&newf->count, 1);
329  
330  	spin_lock_init(&newf->file_lock);
331  	newf->resize_in_progress = false;
332  	init_waitqueue_head(&newf->resize_wait);
333  	newf->next_fd = 0;
334  	new_fdt = &newf->fdtab;
335  	new_fdt->max_fds = NR_OPEN_DEFAULT;
336  	new_fdt->close_on_exec = newf->close_on_exec_init;
337  	new_fdt->open_fds = newf->open_fds_init;
338  	new_fdt->full_fds_bits = newf->full_fds_bits_init;
339  	new_fdt->fd = &newf->fd_array[0];
340  
341  	spin_lock(&oldf->file_lock);
342  	old_fdt = files_fdtable(oldf);
343  	open_files = sane_fdtable_size(old_fdt, max_fds);
344  
345  	/*
346  	 * Check whether we need to allocate a larger fd array and fd set.
347  	 */
348  	while (unlikely(open_files > new_fdt->max_fds)) {
349  		spin_unlock(&oldf->file_lock);
350  
351  		if (new_fdt != &newf->fdtab)
352  			__free_fdtable(new_fdt);
353  
354  		new_fdt = alloc_fdtable(open_files - 1);
355  		if (!new_fdt) {
356  			*errorp = -ENOMEM;
357  			goto out_release;
358  		}
359  
360  		/* beyond sysctl_nr_open; nothing to do */
361  		if (unlikely(new_fdt->max_fds < open_files)) {
362  			__free_fdtable(new_fdt);
363  			*errorp = -EMFILE;
364  			goto out_release;
365  		}
366  
367  		/*
368  		 * Reacquire the oldf lock and a pointer to its fd table
369  		 * who knows it may have a new bigger fd table. We need
370  		 * the latest pointer.
371  		 */
372  		spin_lock(&oldf->file_lock);
373  		old_fdt = files_fdtable(oldf);
374  		open_files = sane_fdtable_size(old_fdt, max_fds);
375  	}
376  
377  	copy_fd_bitmaps(new_fdt, old_fdt, open_files);
378  
379  	old_fds = old_fdt->fd;
380  	new_fds = new_fdt->fd;
381  
382  	for (i = open_files; i != 0; i--) {
383  		struct file *f = *old_fds++;
384  		if (f) {
385  			get_file(f);
386  		} else {
387  			/*
388  			 * The fd may be claimed in the fd bitmap but not yet
389  			 * instantiated in the files array if a sibling thread
390  			 * is partway through open().  So make sure that this
391  			 * fd is available to the new process.
392  			 */
393  			__clear_open_fd(open_files - i, new_fdt);
394  		}
395  		rcu_assign_pointer(*new_fds++, f);
396  	}
397  	spin_unlock(&oldf->file_lock);
398  
399  	/* clear the remainder */
400  	memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *));
401  
402  	rcu_assign_pointer(newf->fdt, new_fdt);
403  
404  	return newf;
405  
406  out_release:
407  	kmem_cache_free(files_cachep, newf);
408  out:
409  	return NULL;
410  }
411  
412  static struct fdtable *close_files(struct files_struct * files)
413  {
414  	/*
415  	 * It is safe to dereference the fd table without RCU or
416  	 * ->file_lock because this is the last reference to the
417  	 * files structure.
418  	 */
419  	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
420  	unsigned int i, j = 0;
421  
422  	for (;;) {
423  		unsigned long set;
424  		i = j * BITS_PER_LONG;
425  		if (i >= fdt->max_fds)
426  			break;
427  		set = fdt->open_fds[j++];
428  		while (set) {
429  			if (set & 1) {
430  				struct file * file = xchg(&fdt->fd[i], NULL);
431  				if (file) {
432  					filp_close(file, files);
433  					cond_resched();
434  				}
435  			}
436  			i++;
437  			set >>= 1;
438  		}
439  	}
440  
441  	return fdt;
442  }
443  
444  void put_files_struct(struct files_struct *files)
445  {
446  	if (atomic_dec_and_test(&files->count)) {
447  		struct fdtable *fdt = close_files(files);
448  
449  		/* free the arrays if they are not embedded */
450  		if (fdt != &files->fdtab)
451  			__free_fdtable(fdt);
452  		kmem_cache_free(files_cachep, files);
453  	}
454  }
455  
456  void exit_files(struct task_struct *tsk)
457  {
458  	struct files_struct * files = tsk->files;
459  
460  	if (files) {
461  		task_lock(tsk);
462  		tsk->files = NULL;
463  		task_unlock(tsk);
464  		put_files_struct(files);
465  	}
466  }
467  
468  struct files_struct init_files = {
469  	.count		= ATOMIC_INIT(1),
470  	.fdt		= &init_files.fdtab,
471  	.fdtab		= {
472  		.max_fds	= NR_OPEN_DEFAULT,
473  		.fd		= &init_files.fd_array[0],
474  		.close_on_exec	= init_files.close_on_exec_init,
475  		.open_fds	= init_files.open_fds_init,
476  		.full_fds_bits	= init_files.full_fds_bits_init,
477  	},
478  	.file_lock	= __SPIN_LOCK_UNLOCKED(init_files.file_lock),
479  	.resize_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
480  };
481  
482  static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
483  {
484  	unsigned int maxfd = fdt->max_fds;
485  	unsigned int maxbit = maxfd / BITS_PER_LONG;
486  	unsigned int bitbit = start / BITS_PER_LONG;
487  
488  	bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
489  	if (bitbit > maxfd)
490  		return maxfd;
491  	if (bitbit > start)
492  		start = bitbit;
493  	return find_next_zero_bit(fdt->open_fds, maxfd, start);
494  }
495  
496  /*
497   * allocate a file descriptor, mark it busy.
498   */
499  static int alloc_fd(unsigned start, unsigned end, unsigned flags)
500  {
501  	struct files_struct *files = current->files;
502  	unsigned int fd;
503  	int error;
504  	struct fdtable *fdt;
505  
506  	spin_lock(&files->file_lock);
507  repeat:
508  	fdt = files_fdtable(files);
509  	fd = start;
510  	if (fd < files->next_fd)
511  		fd = files->next_fd;
512  
513  	if (fd < fdt->max_fds)
514  		fd = find_next_fd(fdt, fd);
515  
516  	/*
517  	 * N.B. For clone tasks sharing a files structure, this test
518  	 * will limit the total number of files that can be opened.
519  	 */
520  	error = -EMFILE;
521  	if (fd >= end)
522  		goto out;
523  
524  	error = expand_files(files, fd);
525  	if (error < 0)
526  		goto out;
527  
528  	/*
529  	 * If we needed to expand the fs array we
530  	 * might have blocked - try again.
531  	 */
532  	if (error)
533  		goto repeat;
534  
535  	if (start <= files->next_fd)
536  		files->next_fd = fd + 1;
537  
538  	__set_open_fd(fd, fdt);
539  	if (flags & O_CLOEXEC)
540  		__set_close_on_exec(fd, fdt);
541  	else
542  		__clear_close_on_exec(fd, fdt);
543  	error = fd;
544  #if 1
545  	/* Sanity check */
546  	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
547  		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
548  		rcu_assign_pointer(fdt->fd[fd], NULL);
549  	}
550  #endif
551  
552  out:
553  	spin_unlock(&files->file_lock);
554  	return error;
555  }
556  
557  int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
558  {
559  	return alloc_fd(0, nofile, flags);
560  }
561  
562  int get_unused_fd_flags(unsigned flags)
563  {
564  	return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
565  }
566  EXPORT_SYMBOL(get_unused_fd_flags);
567  
568  static void __put_unused_fd(struct files_struct *files, unsigned int fd)
569  {
570  	struct fdtable *fdt = files_fdtable(files);
571  	__clear_open_fd(fd, fdt);
572  	if (fd < files->next_fd)
573  		files->next_fd = fd;
574  }
575  
576  void put_unused_fd(unsigned int fd)
577  {
578  	struct files_struct *files = current->files;
579  	spin_lock(&files->file_lock);
580  	__put_unused_fd(files, fd);
581  	spin_unlock(&files->file_lock);
582  }
583  
584  EXPORT_SYMBOL(put_unused_fd);
585  
586  /*
587   * Install a file pointer in the fd array.
588   *
589   * The VFS is full of places where we drop the files lock between
590   * setting the open_fds bitmap and installing the file in the file
591   * array.  At any such point, we are vulnerable to a dup2() race
592   * installing a file in the array before us.  We need to detect this and
593   * fput() the struct file we are about to overwrite in this case.
594   *
595   * It should never happen - if we allow dup2() do it, _really_ bad things
596   * will follow.
597   *
598   * This consumes the "file" refcount, so callers should treat it
599   * as if they had called fput(file).
600   */
601  
602  void fd_install(unsigned int fd, struct file *file)
603  {
604  	struct files_struct *files = current->files;
605  	struct fdtable *fdt;
606  
607  	if (WARN_ON_ONCE(unlikely(file->f_mode & FMODE_BACKING)))
608  		return;
609  
610  	rcu_read_lock_sched();
611  
612  	if (unlikely(files->resize_in_progress)) {
613  		rcu_read_unlock_sched();
614  		spin_lock(&files->file_lock);
615  		fdt = files_fdtable(files);
616  		BUG_ON(fdt->fd[fd] != NULL);
617  		rcu_assign_pointer(fdt->fd[fd], file);
618  		spin_unlock(&files->file_lock);
619  		return;
620  	}
621  	/* coupled with smp_wmb() in expand_fdtable() */
622  	smp_rmb();
623  	fdt = rcu_dereference_sched(files->fdt);
624  	BUG_ON(fdt->fd[fd] != NULL);
625  	rcu_assign_pointer(fdt->fd[fd], file);
626  	rcu_read_unlock_sched();
627  }
628  
629  EXPORT_SYMBOL(fd_install);
630  
631  /**
632   * pick_file - return file associatd with fd
633   * @files: file struct to retrieve file from
634   * @fd: file descriptor to retrieve file for
635   *
636   * Context: files_lock must be held.
637   *
638   * Returns: The file associated with @fd (NULL if @fd is not open)
639   */
640  static struct file *pick_file(struct files_struct *files, unsigned fd)
641  {
642  	struct fdtable *fdt = files_fdtable(files);
643  	struct file *file;
644  
645  	if (fd >= fdt->max_fds)
646  		return NULL;
647  
648  	fd = array_index_nospec(fd, fdt->max_fds);
649  	file = fdt->fd[fd];
650  	if (file) {
651  		rcu_assign_pointer(fdt->fd[fd], NULL);
652  		__put_unused_fd(files, fd);
653  	}
654  	return file;
655  }
656  
657  int close_fd(unsigned fd)
658  {
659  	struct files_struct *files = current->files;
660  	struct file *file;
661  
662  	spin_lock(&files->file_lock);
663  	file = pick_file(files, fd);
664  	spin_unlock(&files->file_lock);
665  	if (!file)
666  		return -EBADF;
667  
668  	return filp_close(file, files);
669  }
670  EXPORT_SYMBOL(close_fd); /* for ksys_close() */
671  
672  /**
673   * last_fd - return last valid index into fd table
674   * @fdt: File descriptor table.
675   *
676   * Context: Either rcu read lock or files_lock must be held.
677   *
678   * Returns: Last valid index into fdtable.
679   */
680  static inline unsigned last_fd(struct fdtable *fdt)
681  {
682  	return fdt->max_fds - 1;
683  }
684  
685  static inline void __range_cloexec(struct files_struct *cur_fds,
686  				   unsigned int fd, unsigned int max_fd)
687  {
688  	struct fdtable *fdt;
689  
690  	/* make sure we're using the correct maximum value */
691  	spin_lock(&cur_fds->file_lock);
692  	fdt = files_fdtable(cur_fds);
693  	max_fd = min(last_fd(fdt), max_fd);
694  	if (fd <= max_fd)
695  		bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
696  	spin_unlock(&cur_fds->file_lock);
697  }
698  
699  static inline void __range_close(struct files_struct *files, unsigned int fd,
700  				 unsigned int max_fd)
701  {
702  	struct file *file;
703  	unsigned n;
704  
705  	spin_lock(&files->file_lock);
706  	n = last_fd(files_fdtable(files));
707  	max_fd = min(max_fd, n);
708  
709  	for (; fd <= max_fd; fd++) {
710  		file = pick_file(files, fd);
711  		if (file) {
712  			spin_unlock(&files->file_lock);
713  			filp_close(file, files);
714  			cond_resched();
715  			spin_lock(&files->file_lock);
716  		} else if (need_resched()) {
717  			spin_unlock(&files->file_lock);
718  			cond_resched();
719  			spin_lock(&files->file_lock);
720  		}
721  	}
722  	spin_unlock(&files->file_lock);
723  }
724  
725  /**
726   * __close_range() - Close all file descriptors in a given range.
727   *
728   * @fd:     starting file descriptor to close
729   * @max_fd: last file descriptor to close
730   * @flags:  CLOSE_RANGE flags.
731   *
732   * This closes a range of file descriptors. All file descriptors
733   * from @fd up to and including @max_fd are closed.
734   */
735  int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
736  {
737  	struct task_struct *me = current;
738  	struct files_struct *cur_fds = me->files, *fds = NULL;
739  
740  	if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC))
741  		return -EINVAL;
742  
743  	if (fd > max_fd)
744  		return -EINVAL;
745  
746  	if (flags & CLOSE_RANGE_UNSHARE) {
747  		int ret;
748  		unsigned int max_unshare_fds = NR_OPEN_MAX;
749  
750  		/*
751  		 * If the caller requested all fds to be made cloexec we always
752  		 * copy all of the file descriptors since they still want to
753  		 * use them.
754  		 */
755  		if (!(flags & CLOSE_RANGE_CLOEXEC)) {
756  			/*
757  			 * If the requested range is greater than the current
758  			 * maximum, we're closing everything so only copy all
759  			 * file descriptors beneath the lowest file descriptor.
760  			 */
761  			rcu_read_lock();
762  			if (max_fd >= last_fd(files_fdtable(cur_fds)))
763  				max_unshare_fds = fd;
764  			rcu_read_unlock();
765  		}
766  
767  		ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
768  		if (ret)
769  			return ret;
770  
771  		/*
772  		 * We used to share our file descriptor table, and have now
773  		 * created a private one, make sure we're using it below.
774  		 */
775  		if (fds)
776  			swap(cur_fds, fds);
777  	}
778  
779  	if (flags & CLOSE_RANGE_CLOEXEC)
780  		__range_cloexec(cur_fds, fd, max_fd);
781  	else
782  		__range_close(cur_fds, fd, max_fd);
783  
784  	if (fds) {
785  		/*
786  		 * We're done closing the files we were supposed to. Time to install
787  		 * the new file descriptor table and drop the old one.
788  		 */
789  		task_lock(me);
790  		me->files = cur_fds;
791  		task_unlock(me);
792  		put_files_struct(fds);
793  	}
794  
795  	return 0;
796  }
797  
798  /*
799   * See close_fd_get_file() below, this variant assumes current->files->file_lock
800   * is held.
801   */
802  struct file *__close_fd_get_file(unsigned int fd)
803  {
804  	return pick_file(current->files, fd);
805  }
806  
807  /*
808   * variant of close_fd that gets a ref on the file for later fput.
809   * The caller must ensure that filp_close() called on the file.
810   */
811  struct file *close_fd_get_file(unsigned int fd)
812  {
813  	struct files_struct *files = current->files;
814  	struct file *file;
815  
816  	spin_lock(&files->file_lock);
817  	file = pick_file(files, fd);
818  	spin_unlock(&files->file_lock);
819  
820  	return file;
821  }
822  
823  void do_close_on_exec(struct files_struct *files)
824  {
825  	unsigned i;
826  	struct fdtable *fdt;
827  
828  	/* exec unshares first */
829  	spin_lock(&files->file_lock);
830  	for (i = 0; ; i++) {
831  		unsigned long set;
832  		unsigned fd = i * BITS_PER_LONG;
833  		fdt = files_fdtable(files);
834  		if (fd >= fdt->max_fds)
835  			break;
836  		set = fdt->close_on_exec[i];
837  		if (!set)
838  			continue;
839  		fdt->close_on_exec[i] = 0;
840  		for ( ; set ; fd++, set >>= 1) {
841  			struct file *file;
842  			if (!(set & 1))
843  				continue;
844  			file = fdt->fd[fd];
845  			if (!file)
846  				continue;
847  			rcu_assign_pointer(fdt->fd[fd], NULL);
848  			__put_unused_fd(files, fd);
849  			spin_unlock(&files->file_lock);
850  			filp_close(file, files);
851  			cond_resched();
852  			spin_lock(&files->file_lock);
853  		}
854  
855  	}
856  	spin_unlock(&files->file_lock);
857  }
858  
859  static struct file *__get_file_rcu(struct file __rcu **f)
860  {
861  	struct file __rcu *file;
862  	struct file __rcu *file_reloaded;
863  	struct file __rcu *file_reloaded_cmp;
864  
865  	file = rcu_dereference_raw(*f);
866  	if (!file)
867  		return NULL;
868  
869  	if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
870  		return ERR_PTR(-EAGAIN);
871  
872  	file_reloaded = rcu_dereference_raw(*f);
873  
874  	/*
875  	 * Ensure that all accesses have a dependency on the load from
876  	 * rcu_dereference_raw() above so we get correct ordering
877  	 * between reuse/allocation and the pointer check below.
878  	 */
879  	file_reloaded_cmp = file_reloaded;
880  	OPTIMIZER_HIDE_VAR(file_reloaded_cmp);
881  
882  	/*
883  	 * atomic_long_inc_not_zero() above provided a full memory
884  	 * barrier when we acquired a reference.
885  	 *
886  	 * This is paired with the write barrier from assigning to the
887  	 * __rcu protected file pointer so that if that pointer still
888  	 * matches the current file, we know we have successfully
889  	 * acquired a reference to the right file.
890  	 *
891  	 * If the pointers don't match the file has been reallocated by
892  	 * SLAB_TYPESAFE_BY_RCU.
893  	 */
894  	if (file == file_reloaded_cmp)
895  		return file_reloaded;
896  
897  	fput(file);
898  	return ERR_PTR(-EAGAIN);
899  }
900  
901  /**
902   * get_file_rcu - try go get a reference to a file under rcu
903   * @f: the file to get a reference on
904   *
905   * This function tries to get a reference on @f carefully verifying that
906   * @f hasn't been reused.
907   *
908   * This function should rarely have to be used and only by users who
909   * understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it.
910   *
911   * Return: Returns @f with the reference count increased or NULL.
912   */
913  struct file *get_file_rcu(struct file __rcu **f)
914  {
915  	for (;;) {
916  		struct file __rcu *file;
917  
918  		file = __get_file_rcu(f);
919  		if (unlikely(!file))
920  			return NULL;
921  
922  		if (unlikely(IS_ERR(file)))
923  			continue;
924  
925  		return file;
926  	}
927  }
928  EXPORT_SYMBOL_GPL(get_file_rcu);
929  
930  /**
931   * get_file_active - try go get a reference to a file
932   * @f: the file to get a reference on
933   *
934   * In contast to get_file_rcu() the pointer itself isn't part of the
935   * reference counting.
936   *
937   * This function should rarely have to be used and only by users who
938   * understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it.
939   *
940   * Return: Returns @f with the reference count increased or NULL.
941   */
942  struct file *get_file_active(struct file **f)
943  {
944  	struct file __rcu *file;
945  
946  	rcu_read_lock();
947  	file = __get_file_rcu(f);
948  	rcu_read_unlock();
949  	if (IS_ERR(file))
950  		file = NULL;
951  	return file;
952  }
953  EXPORT_SYMBOL_GPL(get_file_active);
954  
955  static inline struct file *__fget_files_rcu(struct files_struct *files,
956         unsigned int fd, fmode_t mask)
957  {
958  	for (;;) {
959  		struct file *file;
960  		struct fdtable *fdt = rcu_dereference_raw(files->fdt);
961  		struct file __rcu **fdentry;
962  
963  		if (unlikely(fd >= fdt->max_fds))
964  			return NULL;
965  
966  		fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
967  
968  		/*
969  		 * Ok, we have a file pointer. However, because we do
970  		 * this all locklessly under RCU, we may be racing with
971  		 * that file being closed.
972  		 *
973  		 * Such a race can take two forms:
974  		 *
975  		 *  (a) the file ref already went down to zero and the
976  		 *      file hasn't been reused yet or the file count
977  		 *      isn't zero but the file has already been reused.
978  		 */
979  		file = __get_file_rcu(fdentry);
980  		if (unlikely(!file))
981  			return NULL;
982  
983  		if (unlikely(IS_ERR(file)))
984  			continue;
985  
986  		/*
987  		 *  (b) the file table entry has changed under us.
988  		 *       Note that we don't need to re-check the 'fdt->fd'
989  		 *       pointer having changed, because it always goes
990  		 *       hand-in-hand with 'fdt'.
991  		 *
992  		 * If so, we need to put our ref and try again.
993  		 */
994  		if (unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
995  			fput(file);
996  			continue;
997  		}
998  
999  		/*
1000  		 * This isn't the file we're looking for or we're not
1001  		 * allowed to get a reference to it.
1002  		 */
1003  		if (unlikely(file->f_mode & mask)) {
1004  			fput(file);
1005  			return NULL;
1006  		}
1007  
1008  		/*
1009  		 * Ok, we have a ref to the file, and checked that it
1010  		 * still exists.
1011  		 */
1012  		return file;
1013  	}
1014  }
1015  
1016  static struct file *__fget_files(struct files_struct *files, unsigned int fd,
1017  				 fmode_t mask)
1018  {
1019  	struct file *file;
1020  
1021  	rcu_read_lock();
1022  	file = __fget_files_rcu(files, fd, mask);
1023  	rcu_read_unlock();
1024  
1025  	return file;
1026  }
1027  
1028  static inline struct file *__fget(unsigned int fd, fmode_t mask)
1029  {
1030  	return __fget_files(current->files, fd, mask);
1031  }
1032  
1033  struct file *fget(unsigned int fd)
1034  {
1035  	return __fget(fd, FMODE_PATH);
1036  }
1037  EXPORT_SYMBOL(fget);
1038  
1039  struct file *fget_raw(unsigned int fd)
1040  {
1041  	return __fget(fd, 0);
1042  }
1043  EXPORT_SYMBOL(fget_raw);
1044  
1045  struct file *fget_task(struct task_struct *task, unsigned int fd)
1046  {
1047  	struct file *file = NULL;
1048  
1049  	task_lock(task);
1050  	if (task->files)
1051  		file = __fget_files(task->files, fd, 0);
1052  	task_unlock(task);
1053  
1054  	return file;
1055  }
1056  
1057  struct file *lookup_fdget_rcu(unsigned int fd)
1058  {
1059  	return __fget_files_rcu(current->files, fd, 0);
1060  
1061  }
1062  EXPORT_SYMBOL_GPL(lookup_fdget_rcu);
1063  
1064  struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd)
1065  {
1066  	/* Must be called with rcu_read_lock held */
1067  	struct files_struct *files;
1068  	struct file *file = NULL;
1069  
1070  	task_lock(task);
1071  	files = task->files;
1072  	if (files)
1073  		file = __fget_files_rcu(files, fd, 0);
1074  	task_unlock(task);
1075  
1076  	return file;
1077  }
1078  
1079  struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *ret_fd)
1080  {
1081  	/* Must be called with rcu_read_lock held */
1082  	struct files_struct *files;
1083  	unsigned int fd = *ret_fd;
1084  	struct file *file = NULL;
1085  
1086  	task_lock(task);
1087  	files = task->files;
1088  	if (files) {
1089  		for (; fd < files_fdtable(files)->max_fds; fd++) {
1090  			file = __fget_files_rcu(files, fd, 0);
1091  			if (file)
1092  				break;
1093  		}
1094  	}
1095  	task_unlock(task);
1096  	*ret_fd = fd;
1097  	return file;
1098  }
1099  EXPORT_SYMBOL(task_lookup_next_fdget_rcu);
1100  
1101  /*
1102   * Lightweight file lookup - no refcnt increment if fd table isn't shared.
1103   *
1104   * You can use this instead of fget if you satisfy all of the following
1105   * conditions:
1106   * 1) You must call fput_light before exiting the syscall and returning control
1107   *    to userspace (i.e. you cannot remember the returned struct file * after
1108   *    returning to userspace).
1109   * 2) You must not call filp_close on the returned struct file * in between
1110   *    calls to fget_light and fput_light.
1111   * 3) You must not clone the current task in between the calls to fget_light
1112   *    and fput_light.
1113   *
1114   * The fput_needed flag returned by fget_light should be passed to the
1115   * corresponding fput_light.
1116   */
1117  static unsigned long __fget_light(unsigned int fd, fmode_t mask)
1118  {
1119  	struct files_struct *files = current->files;
1120  	struct file *file;
1121  
1122  	/*
1123  	 * If another thread is concurrently calling close_fd() followed
1124  	 * by put_files_struct(), we must not observe the old table
1125  	 * entry combined with the new refcount - otherwise we could
1126  	 * return a file that is concurrently being freed.
1127  	 *
1128  	 * atomic_read_acquire() pairs with atomic_dec_and_test() in
1129  	 * put_files_struct().
1130  	 */
1131  	if (atomic_read_acquire(&files->count) == 1) {
1132  		file = files_lookup_fd_raw(files, fd);
1133  		if (!file || unlikely(file->f_mode & mask))
1134  			return 0;
1135  		return (unsigned long)file;
1136  	} else {
1137  		file = __fget(fd, mask);
1138  		if (!file)
1139  			return 0;
1140  		return FDPUT_FPUT | (unsigned long)file;
1141  	}
1142  }
1143  unsigned long __fdget(unsigned int fd)
1144  {
1145  	return __fget_light(fd, FMODE_PATH);
1146  }
1147  EXPORT_SYMBOL(__fdget);
1148  
1149  unsigned long __fdget_raw(unsigned int fd)
1150  {
1151  	return __fget_light(fd, 0);
1152  }
1153  
1154  /*
1155   * Try to avoid f_pos locking. We only need it if the
1156   * file is marked for FMODE_ATOMIC_POS, and it can be
1157   * accessed multiple ways.
1158   *
1159   * Always do it for directories, because pidfd_getfd()
1160   * can make a file accessible even if it otherwise would
1161   * not be, and for directories this is a correctness
1162   * issue, not a "POSIX requirement".
1163   */
1164  static inline bool file_needs_f_pos_lock(struct file *file)
1165  {
1166  	return (file->f_mode & FMODE_ATOMIC_POS) &&
1167  		(file_count(file) > 1 || file->f_op->iterate_shared);
1168  }
1169  
1170  unsigned long __fdget_pos(unsigned int fd)
1171  {
1172  	unsigned long v = __fdget(fd);
1173  	struct file *file = (struct file *)(v & ~3);
1174  
1175  	if (file && file_needs_f_pos_lock(file)) {
1176  		v |= FDPUT_POS_UNLOCK;
1177  		mutex_lock(&file->f_pos_lock);
1178  	}
1179  	return v;
1180  }
1181  
1182  void __f_unlock_pos(struct file *f)
1183  {
1184  	mutex_unlock(&f->f_pos_lock);
1185  }
1186  
1187  /*
1188   * We only lock f_pos if we have threads or if the file might be
1189   * shared with another process. In both cases we'll have an elevated
1190   * file count (done either by fdget() or by fork()).
1191   */
1192  
1193  void set_close_on_exec(unsigned int fd, int flag)
1194  {
1195  	struct files_struct *files = current->files;
1196  	struct fdtable *fdt;
1197  	spin_lock(&files->file_lock);
1198  	fdt = files_fdtable(files);
1199  	if (flag)
1200  		__set_close_on_exec(fd, fdt);
1201  	else
1202  		__clear_close_on_exec(fd, fdt);
1203  	spin_unlock(&files->file_lock);
1204  }
1205  
1206  bool get_close_on_exec(unsigned int fd)
1207  {
1208  	struct files_struct *files = current->files;
1209  	struct fdtable *fdt;
1210  	bool res;
1211  	rcu_read_lock();
1212  	fdt = files_fdtable(files);
1213  	res = close_on_exec(fd, fdt);
1214  	rcu_read_unlock();
1215  	return res;
1216  }
1217  
1218  static int do_dup2(struct files_struct *files,
1219  	struct file *file, unsigned fd, unsigned flags)
1220  __releases(&files->file_lock)
1221  {
1222  	struct file *tofree;
1223  	struct fdtable *fdt;
1224  
1225  	/*
1226  	 * We need to detect attempts to do dup2() over allocated but still
1227  	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
1228  	 * extra work in their equivalent of fget() - they insert struct
1229  	 * file immediately after grabbing descriptor, mark it larval if
1230  	 * more work (e.g. actual opening) is needed and make sure that
1231  	 * fget() treats larval files as absent.  Potentially interesting,
1232  	 * but while extra work in fget() is trivial, locking implications
1233  	 * and amount of surgery on open()-related paths in VFS are not.
1234  	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
1235  	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
1236  	 * scope of POSIX or SUS, since neither considers shared descriptor
1237  	 * tables and this condition does not arise without those.
1238  	 */
1239  	fdt = files_fdtable(files);
1240  	tofree = fdt->fd[fd];
1241  	if (!tofree && fd_is_open(fd, fdt))
1242  		goto Ebusy;
1243  	get_file(file);
1244  	rcu_assign_pointer(fdt->fd[fd], file);
1245  	__set_open_fd(fd, fdt);
1246  	if (flags & O_CLOEXEC)
1247  		__set_close_on_exec(fd, fdt);
1248  	else
1249  		__clear_close_on_exec(fd, fdt);
1250  	spin_unlock(&files->file_lock);
1251  
1252  	if (tofree)
1253  		filp_close(tofree, files);
1254  
1255  	return fd;
1256  
1257  Ebusy:
1258  	spin_unlock(&files->file_lock);
1259  	return -EBUSY;
1260  }
1261  
1262  int replace_fd(unsigned fd, struct file *file, unsigned flags)
1263  {
1264  	int err;
1265  	struct files_struct *files = current->files;
1266  
1267  	if (!file)
1268  		return close_fd(fd);
1269  
1270  	if (fd >= rlimit(RLIMIT_NOFILE))
1271  		return -EBADF;
1272  
1273  	spin_lock(&files->file_lock);
1274  	err = expand_files(files, fd);
1275  	if (unlikely(err < 0))
1276  		goto out_unlock;
1277  	return do_dup2(files, file, fd, flags);
1278  
1279  out_unlock:
1280  	spin_unlock(&files->file_lock);
1281  	return err;
1282  }
1283  
1284  /**
1285   * __receive_fd() - Install received file into file descriptor table
1286   * @file: struct file that was received from another process
1287   * @ufd: __user pointer to write new fd number to
1288   * @o_flags: the O_* flags to apply to the new fd entry
1289   *
1290   * Installs a received file into the file descriptor table, with appropriate
1291   * checks and count updates. Optionally writes the fd number to userspace, if
1292   * @ufd is non-NULL.
1293   *
1294   * This helper handles its own reference counting of the incoming
1295   * struct file.
1296   *
1297   * Returns newly install fd or -ve on error.
1298   */
1299  int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
1300  {
1301  	int new_fd;
1302  	int error;
1303  
1304  	error = security_file_receive(file);
1305  	if (error)
1306  		return error;
1307  
1308  	new_fd = get_unused_fd_flags(o_flags);
1309  	if (new_fd < 0)
1310  		return new_fd;
1311  
1312  	if (ufd) {
1313  		error = put_user(new_fd, ufd);
1314  		if (error) {
1315  			put_unused_fd(new_fd);
1316  			return error;
1317  		}
1318  	}
1319  
1320  	fd_install(new_fd, get_file(file));
1321  	__receive_sock(file);
1322  	return new_fd;
1323  }
1324  
1325  int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
1326  {
1327  	int error;
1328  
1329  	error = security_file_receive(file);
1330  	if (error)
1331  		return error;
1332  	error = replace_fd(new_fd, file, o_flags);
1333  	if (error)
1334  		return error;
1335  	__receive_sock(file);
1336  	return new_fd;
1337  }
1338  
1339  int receive_fd(struct file *file, unsigned int o_flags)
1340  {
1341  	return __receive_fd(file, NULL, o_flags);
1342  }
1343  EXPORT_SYMBOL_GPL(receive_fd);
1344  
1345  static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
1346  {
1347  	int err = -EBADF;
1348  	struct file *file;
1349  	struct files_struct *files = current->files;
1350  
1351  	if ((flags & ~O_CLOEXEC) != 0)
1352  		return -EINVAL;
1353  
1354  	if (unlikely(oldfd == newfd))
1355  		return -EINVAL;
1356  
1357  	if (newfd >= rlimit(RLIMIT_NOFILE))
1358  		return -EBADF;
1359  
1360  	spin_lock(&files->file_lock);
1361  	err = expand_files(files, newfd);
1362  	file = files_lookup_fd_locked(files, oldfd);
1363  	if (unlikely(!file))
1364  		goto Ebadf;
1365  	if (unlikely(err < 0)) {
1366  		if (err == -EMFILE)
1367  			goto Ebadf;
1368  		goto out_unlock;
1369  	}
1370  	return do_dup2(files, file, newfd, flags);
1371  
1372  Ebadf:
1373  	err = -EBADF;
1374  out_unlock:
1375  	spin_unlock(&files->file_lock);
1376  	return err;
1377  }
1378  
1379  SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
1380  {
1381  	return ksys_dup3(oldfd, newfd, flags);
1382  }
1383  
1384  SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
1385  {
1386  	if (unlikely(newfd == oldfd)) { /* corner case */
1387  		struct files_struct *files = current->files;
1388  		struct file *f;
1389  		int retval = oldfd;
1390  
1391  		rcu_read_lock();
1392  		f = __fget_files_rcu(files, oldfd, 0);
1393  		if (!f)
1394  			retval = -EBADF;
1395  		rcu_read_unlock();
1396  		if (f)
1397  			fput(f);
1398  		return retval;
1399  	}
1400  	return ksys_dup3(oldfd, newfd, 0);
1401  }
1402  
1403  SYSCALL_DEFINE1(dup, unsigned int, fildes)
1404  {
1405  	int ret = -EBADF;
1406  	struct file *file = fget_raw(fildes);
1407  
1408  	if (file) {
1409  		ret = get_unused_fd_flags(0);
1410  		if (ret >= 0)
1411  			fd_install(ret, file);
1412  		else
1413  			fput(file);
1414  	}
1415  	return ret;
1416  }
1417  
1418  int f_dupfd(unsigned int from, struct file *file, unsigned flags)
1419  {
1420  	unsigned long nofile = rlimit(RLIMIT_NOFILE);
1421  	int err;
1422  	if (from >= nofile)
1423  		return -EINVAL;
1424  	err = alloc_fd(from, nofile, flags);
1425  	if (err >= 0) {
1426  		get_file(file);
1427  		fd_install(err, file);
1428  	}
1429  	return err;
1430  }
1431  
1432  int iterate_fd(struct files_struct *files, unsigned n,
1433  		int (*f)(const void *, struct file *, unsigned),
1434  		const void *p)
1435  {
1436  	struct fdtable *fdt;
1437  	int res = 0;
1438  	if (!files)
1439  		return 0;
1440  	spin_lock(&files->file_lock);
1441  	for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
1442  		struct file *file;
1443  		file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
1444  		if (!file)
1445  			continue;
1446  		res = f(p, file, n);
1447  		if (res)
1448  			break;
1449  	}
1450  	spin_unlock(&files->file_lock);
1451  	return res;
1452  }
1453  EXPORT_SYMBOL(iterate_fd);
1454