xref: /linux/fs/proc/base.c (revision 79d2e1919a2728ef49d938eb20ebd5903c14dfb0)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  linux/fs/proc/base.c
4   *
5   *  Copyright (C) 1991, 1992 Linus Torvalds
6   *
7   *  proc base directory handling functions
8   *
9   *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
10   *  Instead of using magical inumbers to determine the kind of object
11   *  we allocate and fill in-core inodes upon lookup. They don't even
12   *  go into icache. We cache the reference to task_struct upon lookup too.
13   *  Eventually it should become a filesystem in its own. We don't use the
14   *  rest of procfs anymore.
15   *
16   *
17   *  Changelog:
18   *  17-Jan-2005
19   *  Allan Bezerra
20   *  Bruna Moreira <bruna.moreira@indt.org.br>
21   *  Edjard Mota <edjard.mota@indt.org.br>
22   *  Ilias Biris <ilias.biris@indt.org.br>
23   *  Mauricio Lin <mauricio.lin@indt.org.br>
24   *
25   *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
26   *
27   *  A new process specific entry (smaps) included in /proc. It shows the
28   *  size of rss for each memory area. The maps entry lacks information
29   *  about physical memory size (rss) for each mapped file, i.e.,
30   *  rss information for executables and library files.
31   *  This additional information is useful for any tools that need to know
32   *  about physical memory consumption for a process specific library.
33   *
34   *  Changelog:
35   *  21-Feb-2005
36   *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
37   *  Pud inclusion in the page table walking.
38   *
39   *  ChangeLog:
40   *  10-Mar-2005
41   *  10LE Instituto Nokia de Tecnologia - INdT:
42   *  A better way to walks through the page table as suggested by Hugh Dickins.
43   *
44   *  Simo Piiroinen <simo.piiroinen@nokia.com>:
45   *  Smaps information related to shared, private, clean and dirty pages.
46   *
47   *  Paul Mundt <paul.mundt@nokia.com>:
48   *  Overall revision about smaps.
49   */
50  
51  #include <linux/uaccess.h>
52  
53  #include <linux/errno.h>
54  #include <linux/time.h>
55  #include <linux/proc_fs.h>
56  #include <linux/stat.h>
57  #include <linux/task_io_accounting_ops.h>
58  #include <linux/init.h>
59  #include <linux/capability.h>
60  #include <linux/file.h>
61  #include <linux/generic-radix-tree.h>
62  #include <linux/string.h>
63  #include <linux/seq_file.h>
64  #include <linux/namei.h>
65  #include <linux/mnt_namespace.h>
66  #include <linux/mm.h>
67  #include <linux/swap.h>
68  #include <linux/rcupdate.h>
69  #include <linux/kallsyms.h>
70  #include <linux/stacktrace.h>
71  #include <linux/resource.h>
72  #include <linux/module.h>
73  #include <linux/mount.h>
74  #include <linux/security.h>
75  #include <linux/ptrace.h>
76  #include <linux/printk.h>
77  #include <linux/cache.h>
78  #include <linux/cgroup.h>
79  #include <linux/cpuset.h>
80  #include <linux/audit.h>
81  #include <linux/poll.h>
82  #include <linux/nsproxy.h>
83  #include <linux/oom.h>
84  #include <linux/elf.h>
85  #include <linux/pid_namespace.h>
86  #include <linux/user_namespace.h>
87  #include <linux/fs_parser.h>
88  #include <linux/fs_struct.h>
89  #include <linux/slab.h>
90  #include <linux/sched/autogroup.h>
91  #include <linux/sched/mm.h>
92  #include <linux/sched/coredump.h>
93  #include <linux/sched/debug.h>
94  #include <linux/sched/stat.h>
95  #include <linux/posix-timers.h>
96  #include <linux/time_namespace.h>
97  #include <linux/resctrl.h>
98  #include <linux/cn_proc.h>
99  #include <linux/ksm.h>
100  #include <uapi/linux/lsm.h>
101  #include <trace/events/oom.h>
102  #include "internal.h"
103  #include "fd.h"
104  
105  #include "../../lib/kstrtox.h"
106  
107  /* NOTE:
108   *	Implementing inode permission operations in /proc is almost
109   *	certainly an error.  Permission checks need to happen during
110   *	each system call not at open time.  The reason is that most of
111   *	what we wish to check for permissions in /proc varies at runtime.
112   *
113   *	The classic example of a problem is opening file descriptors
114   *	in /proc for a task before it execs a suid executable.
115   */
116  
117  static u8 nlink_tid __ro_after_init;
118  static u8 nlink_tgid __ro_after_init;
119  
120  enum proc_mem_force {
121  	PROC_MEM_FORCE_ALWAYS,
122  	PROC_MEM_FORCE_PTRACE,
123  	PROC_MEM_FORCE_NEVER
124  };
125  
126  static enum proc_mem_force proc_mem_force_override __ro_after_init =
127  	IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER :
128  	IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE :
129  	PROC_MEM_FORCE_ALWAYS;
130  
131  static const struct constant_table proc_mem_force_table[] __initconst = {
132  	{ "always", PROC_MEM_FORCE_ALWAYS },
133  	{ "ptrace", PROC_MEM_FORCE_PTRACE },
134  	{ "never", PROC_MEM_FORCE_NEVER },
135  	{ }
136  };
137  
138  static int __init early_proc_mem_force_override(char *buf)
139  {
140  	if (!buf)
141  		return -EINVAL;
142  
143  	/*
144  	 * lookup_constant() defaults to proc_mem_force_override to preseve
145  	 * the initial Kconfig choice in case an invalid param gets passed.
146  	 */
147  	proc_mem_force_override = lookup_constant(proc_mem_force_table,
148  						  buf, proc_mem_force_override);
149  
150  	return 0;
151  }
152  early_param("proc_mem.force_override", early_proc_mem_force_override);
153  
154  struct pid_entry {
155  	const char *name;
156  	unsigned int len;
157  	umode_t mode;
158  	const struct inode_operations *iop;
159  	const struct file_operations *fop;
160  	union proc_op op;
161  };
162  
163  #define NOD(NAME, MODE, IOP, FOP, OP) {			\
164  	.name = (NAME),					\
165  	.len  = sizeof(NAME) - 1,			\
166  	.mode = MODE,					\
167  	.iop  = IOP,					\
168  	.fop  = FOP,					\
169  	.op   = OP,					\
170  }
171  
172  #define DIR(NAME, MODE, iops, fops)	\
173  	NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
174  #define LNK(NAME, get_link)					\
175  	NOD(NAME, (S_IFLNK|S_IRWXUGO),				\
176  		&proc_pid_link_inode_operations, NULL,		\
177  		{ .proc_get_link = get_link } )
178  #define REG(NAME, MODE, fops)				\
179  	NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
180  #define ONE(NAME, MODE, show)				\
181  	NOD(NAME, (S_IFREG|(MODE)),			\
182  		NULL, &proc_single_file_operations,	\
183  		{ .proc_show = show } )
184  #define ATTR(LSMID, NAME, MODE)				\
185  	NOD(NAME, (S_IFREG|(MODE)),			\
186  		NULL, &proc_pid_attr_operations,	\
187  		{ .lsmid = LSMID })
188  
189  /*
190   * Count the number of hardlinks for the pid_entry table, excluding the .
191   * and .. links.
192   */
193  static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
194  	unsigned int n)
195  {
196  	unsigned int i;
197  	unsigned int count;
198  
199  	count = 2;
200  	for (i = 0; i < n; ++i) {
201  		if (S_ISDIR(entries[i].mode))
202  			++count;
203  	}
204  
205  	return count;
206  }
207  
208  static int get_task_root(struct task_struct *task, struct path *root)
209  {
210  	int result = -ENOENT;
211  
212  	task_lock(task);
213  	if (task->fs) {
214  		get_fs_root(task->fs, root);
215  		result = 0;
216  	}
217  	task_unlock(task);
218  	return result;
219  }
220  
221  static int proc_cwd_link(struct dentry *dentry, struct path *path)
222  {
223  	struct task_struct *task = get_proc_task(d_inode(dentry));
224  	int result = -ENOENT;
225  
226  	if (task) {
227  		task_lock(task);
228  		if (task->fs) {
229  			get_fs_pwd(task->fs, path);
230  			result = 0;
231  		}
232  		task_unlock(task);
233  		put_task_struct(task);
234  	}
235  	return result;
236  }
237  
238  static int proc_root_link(struct dentry *dentry, struct path *path)
239  {
240  	struct task_struct *task = get_proc_task(d_inode(dentry));
241  	int result = -ENOENT;
242  
243  	if (task) {
244  		result = get_task_root(task, path);
245  		put_task_struct(task);
246  	}
247  	return result;
248  }
249  
250  /*
251   * If the user used setproctitle(), we just get the string from
252   * user space at arg_start, and limit it to a maximum of one page.
253   */
254  static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
255  				size_t count, unsigned long pos,
256  				unsigned long arg_start)
257  {
258  	char *page;
259  	int ret, got;
260  
261  	if (pos >= PAGE_SIZE)
262  		return 0;
263  
264  	page = (char *)__get_free_page(GFP_KERNEL);
265  	if (!page)
266  		return -ENOMEM;
267  
268  	ret = 0;
269  	got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
270  	if (got > 0) {
271  		int len = strnlen(page, got);
272  
273  		/* Include the NUL character if it was found */
274  		if (len < got)
275  			len++;
276  
277  		if (len > pos) {
278  			len -= pos;
279  			if (len > count)
280  				len = count;
281  			len -= copy_to_user(buf, page+pos, len);
282  			if (!len)
283  				len = -EFAULT;
284  			ret = len;
285  		}
286  	}
287  	free_page((unsigned long)page);
288  	return ret;
289  }
290  
291  static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
292  			      size_t count, loff_t *ppos)
293  {
294  	unsigned long arg_start, arg_end, env_start, env_end;
295  	unsigned long pos, len;
296  	char *page, c;
297  
298  	/* Check if process spawned far enough to have cmdline. */
299  	if (!mm->env_end)
300  		return 0;
301  
302  	spin_lock(&mm->arg_lock);
303  	arg_start = mm->arg_start;
304  	arg_end = mm->arg_end;
305  	env_start = mm->env_start;
306  	env_end = mm->env_end;
307  	spin_unlock(&mm->arg_lock);
308  
309  	if (arg_start >= arg_end)
310  		return 0;
311  
312  	/*
313  	 * We allow setproctitle() to overwrite the argument
314  	 * strings, and overflow past the original end. But
315  	 * only when it overflows into the environment area.
316  	 */
317  	if (env_start != arg_end || env_end < env_start)
318  		env_start = env_end = arg_end;
319  	len = env_end - arg_start;
320  
321  	/* We're not going to care if "*ppos" has high bits set */
322  	pos = *ppos;
323  	if (pos >= len)
324  		return 0;
325  	if (count > len - pos)
326  		count = len - pos;
327  	if (!count)
328  		return 0;
329  
330  	/*
331  	 * Magical special case: if the argv[] end byte is not
332  	 * zero, the user has overwritten it with setproctitle(3).
333  	 *
334  	 * Possible future enhancement: do this only once when
335  	 * pos is 0, and set a flag in the 'struct file'.
336  	 */
337  	if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
338  		return get_mm_proctitle(mm, buf, count, pos, arg_start);
339  
340  	/*
341  	 * For the non-setproctitle() case we limit things strictly
342  	 * to the [arg_start, arg_end[ range.
343  	 */
344  	pos += arg_start;
345  	if (pos < arg_start || pos >= arg_end)
346  		return 0;
347  	if (count > arg_end - pos)
348  		count = arg_end - pos;
349  
350  	page = (char *)__get_free_page(GFP_KERNEL);
351  	if (!page)
352  		return -ENOMEM;
353  
354  	len = 0;
355  	while (count) {
356  		int got;
357  		size_t size = min_t(size_t, PAGE_SIZE, count);
358  
359  		got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
360  		if (got <= 0)
361  			break;
362  		got -= copy_to_user(buf, page, got);
363  		if (unlikely(!got)) {
364  			if (!len)
365  				len = -EFAULT;
366  			break;
367  		}
368  		pos += got;
369  		buf += got;
370  		len += got;
371  		count -= got;
372  	}
373  
374  	free_page((unsigned long)page);
375  	return len;
376  }
377  
378  static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf,
379  				size_t count, loff_t *pos)
380  {
381  	struct mm_struct *mm;
382  	ssize_t ret;
383  
384  	mm = get_task_mm(tsk);
385  	if (!mm)
386  		return 0;
387  
388  	ret = get_mm_cmdline(mm, buf, count, pos);
389  	mmput(mm);
390  	return ret;
391  }
392  
393  static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
394  				     size_t count, loff_t *pos)
395  {
396  	struct task_struct *tsk;
397  	ssize_t ret;
398  
399  	BUG_ON(*pos < 0);
400  
401  	tsk = get_proc_task(file_inode(file));
402  	if (!tsk)
403  		return -ESRCH;
404  	ret = get_task_cmdline(tsk, buf, count, pos);
405  	put_task_struct(tsk);
406  	if (ret > 0)
407  		*pos += ret;
408  	return ret;
409  }
410  
411  static const struct file_operations proc_pid_cmdline_ops = {
412  	.read	= proc_pid_cmdline_read,
413  	.llseek	= generic_file_llseek,
414  };
415  
416  #ifdef CONFIG_KALLSYMS
417  /*
418   * Provides a wchan file via kallsyms in a proper one-value-per-file format.
419   * Returns the resolved symbol.  If that fails, simply return the address.
420   */
421  static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
422  			  struct pid *pid, struct task_struct *task)
423  {
424  	unsigned long wchan;
425  	char symname[KSYM_NAME_LEN];
426  
427  	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
428  		goto print0;
429  
430  	wchan = get_wchan(task);
431  	if (wchan && !lookup_symbol_name(wchan, symname)) {
432  		seq_puts(m, symname);
433  		return 0;
434  	}
435  
436  print0:
437  	seq_putc(m, '0');
438  	return 0;
439  }
440  #endif /* CONFIG_KALLSYMS */
441  
442  static int lock_trace(struct task_struct *task)
443  {
444  	int err = down_read_killable(&task->signal->exec_update_lock);
445  	if (err)
446  		return err;
447  	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
448  		up_read(&task->signal->exec_update_lock);
449  		return -EPERM;
450  	}
451  	return 0;
452  }
453  
454  static void unlock_trace(struct task_struct *task)
455  {
456  	up_read(&task->signal->exec_update_lock);
457  }
458  
459  #ifdef CONFIG_STACKTRACE
460  
461  #define MAX_STACK_TRACE_DEPTH	64
462  
463  static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
464  			  struct pid *pid, struct task_struct *task)
465  {
466  	unsigned long *entries;
467  	int err;
468  
469  	/*
470  	 * The ability to racily run the kernel stack unwinder on a running task
471  	 * and then observe the unwinder output is scary; while it is useful for
472  	 * debugging kernel issues, it can also allow an attacker to leak kernel
473  	 * stack contents.
474  	 * Doing this in a manner that is at least safe from races would require
475  	 * some work to ensure that the remote task can not be scheduled; and
476  	 * even then, this would still expose the unwinder as local attack
477  	 * surface.
478  	 * Therefore, this interface is restricted to root.
479  	 */
480  	if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
481  		return -EACCES;
482  
483  	entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
484  				GFP_KERNEL);
485  	if (!entries)
486  		return -ENOMEM;
487  
488  	err = lock_trace(task);
489  	if (!err) {
490  		unsigned int i, nr_entries;
491  
492  		nr_entries = stack_trace_save_tsk(task, entries,
493  						  MAX_STACK_TRACE_DEPTH, 0);
494  
495  		for (i = 0; i < nr_entries; i++) {
496  			seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
497  		}
498  
499  		unlock_trace(task);
500  	}
501  	kfree(entries);
502  
503  	return err;
504  }
505  #endif
506  
507  #ifdef CONFIG_SCHED_INFO
508  /*
509   * Provides /proc/PID/schedstat
510   */
511  static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
512  			      struct pid *pid, struct task_struct *task)
513  {
514  	if (unlikely(!sched_info_on()))
515  		seq_puts(m, "0 0 0\n");
516  	else
517  		seq_printf(m, "%llu %llu %lu\n",
518  		   (unsigned long long)task->se.sum_exec_runtime,
519  		   (unsigned long long)task->sched_info.run_delay,
520  		   task->sched_info.pcount);
521  
522  	return 0;
523  }
524  #endif
525  
526  #ifdef CONFIG_LATENCYTOP
527  static int lstats_show_proc(struct seq_file *m, void *v)
528  {
529  	int i;
530  	struct inode *inode = m->private;
531  	struct task_struct *task = get_proc_task(inode);
532  
533  	if (!task)
534  		return -ESRCH;
535  	seq_puts(m, "Latency Top version : v0.1\n");
536  	for (i = 0; i < LT_SAVECOUNT; i++) {
537  		struct latency_record *lr = &task->latency_record[i];
538  		if (lr->backtrace[0]) {
539  			int q;
540  			seq_printf(m, "%i %li %li",
541  				   lr->count, lr->time, lr->max);
542  			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
543  				unsigned long bt = lr->backtrace[q];
544  
545  				if (!bt)
546  					break;
547  				seq_printf(m, " %ps", (void *)bt);
548  			}
549  			seq_putc(m, '\n');
550  		}
551  
552  	}
553  	put_task_struct(task);
554  	return 0;
555  }
556  
557  static int lstats_open(struct inode *inode, struct file *file)
558  {
559  	return single_open(file, lstats_show_proc, inode);
560  }
561  
562  static ssize_t lstats_write(struct file *file, const char __user *buf,
563  			    size_t count, loff_t *offs)
564  {
565  	struct task_struct *task = get_proc_task(file_inode(file));
566  
567  	if (!task)
568  		return -ESRCH;
569  	clear_tsk_latency_tracing(task);
570  	put_task_struct(task);
571  
572  	return count;
573  }
574  
575  static const struct file_operations proc_lstats_operations = {
576  	.open		= lstats_open,
577  	.read		= seq_read,
578  	.write		= lstats_write,
579  	.llseek		= seq_lseek,
580  	.release	= single_release,
581  };
582  
583  #endif
584  
585  static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
586  			  struct pid *pid, struct task_struct *task)
587  {
588  	unsigned long totalpages = totalram_pages() + total_swap_pages;
589  	unsigned long points = 0;
590  	long badness;
591  
592  	badness = oom_badness(task, totalpages);
593  	/*
594  	 * Special case OOM_SCORE_ADJ_MIN for all others scale the
595  	 * badness value into [0, 2000] range which we have been
596  	 * exporting for a long time so userspace might depend on it.
597  	 */
598  	if (badness != LONG_MIN)
599  		points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3;
600  
601  	seq_printf(m, "%lu\n", points);
602  
603  	return 0;
604  }
605  
606  struct limit_names {
607  	const char *name;
608  	const char *unit;
609  };
610  
611  static const struct limit_names lnames[RLIM_NLIMITS] = {
612  	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
613  	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
614  	[RLIMIT_DATA] = {"Max data size", "bytes"},
615  	[RLIMIT_STACK] = {"Max stack size", "bytes"},
616  	[RLIMIT_CORE] = {"Max core file size", "bytes"},
617  	[RLIMIT_RSS] = {"Max resident set", "bytes"},
618  	[RLIMIT_NPROC] = {"Max processes", "processes"},
619  	[RLIMIT_NOFILE] = {"Max open files", "files"},
620  	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
621  	[RLIMIT_AS] = {"Max address space", "bytes"},
622  	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
623  	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
624  	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
625  	[RLIMIT_NICE] = {"Max nice priority", NULL},
626  	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
627  	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
628  };
629  
630  /* Display limits for a process */
631  static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
632  			   struct pid *pid, struct task_struct *task)
633  {
634  	unsigned int i;
635  	unsigned long flags;
636  
637  	struct rlimit rlim[RLIM_NLIMITS];
638  
639  	if (!lock_task_sighand(task, &flags))
640  		return 0;
641  	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
642  	unlock_task_sighand(task, &flags);
643  
644  	/*
645  	 * print the file header
646  	 */
647  	seq_puts(m, "Limit                     "
648  		"Soft Limit           "
649  		"Hard Limit           "
650  		"Units     \n");
651  
652  	for (i = 0; i < RLIM_NLIMITS; i++) {
653  		if (rlim[i].rlim_cur == RLIM_INFINITY)
654  			seq_printf(m, "%-25s %-20s ",
655  				   lnames[i].name, "unlimited");
656  		else
657  			seq_printf(m, "%-25s %-20lu ",
658  				   lnames[i].name, rlim[i].rlim_cur);
659  
660  		if (rlim[i].rlim_max == RLIM_INFINITY)
661  			seq_printf(m, "%-20s ", "unlimited");
662  		else
663  			seq_printf(m, "%-20lu ", rlim[i].rlim_max);
664  
665  		if (lnames[i].unit)
666  			seq_printf(m, "%-10s\n", lnames[i].unit);
667  		else
668  			seq_putc(m, '\n');
669  	}
670  
671  	return 0;
672  }
673  
674  #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
675  static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
676  			    struct pid *pid, struct task_struct *task)
677  {
678  	struct syscall_info info;
679  	u64 *args = &info.data.args[0];
680  	int res;
681  
682  	res = lock_trace(task);
683  	if (res)
684  		return res;
685  
686  	if (task_current_syscall(task, &info))
687  		seq_puts(m, "running\n");
688  	else if (info.data.nr < 0)
689  		seq_printf(m, "%d 0x%llx 0x%llx\n",
690  			   info.data.nr, info.sp, info.data.instruction_pointer);
691  	else
692  		seq_printf(m,
693  		       "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
694  		       info.data.nr,
695  		       args[0], args[1], args[2], args[3], args[4], args[5],
696  		       info.sp, info.data.instruction_pointer);
697  	unlock_trace(task);
698  
699  	return 0;
700  }
701  #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
702  
703  /************************************************************************/
704  /*                       Here the fs part begins                        */
705  /************************************************************************/
706  
707  /* permission checks */
708  static bool proc_fd_access_allowed(struct inode *inode)
709  {
710  	struct task_struct *task;
711  	bool allowed = false;
712  	/* Allow access to a task's file descriptors if it is us or we
713  	 * may use ptrace attach to the process and find out that
714  	 * information.
715  	 */
716  	task = get_proc_task(inode);
717  	if (task) {
718  		allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
719  		put_task_struct(task);
720  	}
721  	return allowed;
722  }
723  
724  int proc_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
725  		 struct iattr *attr)
726  {
727  	int error;
728  	struct inode *inode = d_inode(dentry);
729  
730  	if (attr->ia_valid & ATTR_MODE)
731  		return -EPERM;
732  
733  	error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
734  	if (error)
735  		return error;
736  
737  	setattr_copy(&nop_mnt_idmap, inode, attr);
738  	return 0;
739  }
740  
741  /*
742   * May current process learn task's sched/cmdline info (for hide_pid_min=1)
743   * or euid/egid (for hide_pid_min=2)?
744   */
745  static bool has_pid_permissions(struct proc_fs_info *fs_info,
746  				 struct task_struct *task,
747  				 enum proc_hidepid hide_pid_min)
748  {
749  	/*
750  	 * If 'hidpid' mount option is set force a ptrace check,
751  	 * we indicate that we are using a filesystem syscall
752  	 * by passing PTRACE_MODE_READ_FSCREDS
753  	 */
754  	if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
755  		return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
756  
757  	if (fs_info->hide_pid < hide_pid_min)
758  		return true;
759  	if (in_group_p(fs_info->pid_gid))
760  		return true;
761  	return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
762  }
763  
764  
765  static int proc_pid_permission(struct mnt_idmap *idmap,
766  			       struct inode *inode, int mask)
767  {
768  	struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
769  	struct task_struct *task;
770  	bool has_perms;
771  
772  	task = get_proc_task(inode);
773  	if (!task)
774  		return -ESRCH;
775  	has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
776  	put_task_struct(task);
777  
778  	if (!has_perms) {
779  		if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
780  			/*
781  			 * Let's make getdents(), stat(), and open()
782  			 * consistent with each other.  If a process
783  			 * may not stat() a file, it shouldn't be seen
784  			 * in procfs at all.
785  			 */
786  			return -ENOENT;
787  		}
788  
789  		return -EPERM;
790  	}
791  	return generic_permission(&nop_mnt_idmap, inode, mask);
792  }
793  
794  
795  
796  static const struct inode_operations proc_def_inode_operations = {
797  	.setattr	= proc_setattr,
798  };
799  
800  static int proc_single_show(struct seq_file *m, void *v)
801  {
802  	struct inode *inode = m->private;
803  	struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
804  	struct pid *pid = proc_pid(inode);
805  	struct task_struct *task;
806  	int ret;
807  
808  	task = get_pid_task(pid, PIDTYPE_PID);
809  	if (!task)
810  		return -ESRCH;
811  
812  	ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
813  
814  	put_task_struct(task);
815  	return ret;
816  }
817  
818  static int proc_single_open(struct inode *inode, struct file *filp)
819  {
820  	return single_open(filp, proc_single_show, inode);
821  }
822  
823  static const struct file_operations proc_single_file_operations = {
824  	.open		= proc_single_open,
825  	.read		= seq_read,
826  	.llseek		= seq_lseek,
827  	.release	= single_release,
828  };
829  
830  
831  struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
832  {
833  	struct task_struct *task = get_proc_task(inode);
834  	struct mm_struct *mm;
835  
836  	if (!task)
837  		return ERR_PTR(-ESRCH);
838  
839  	mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
840  	put_task_struct(task);
841  
842  	if (IS_ERR(mm))
843  		return mm == ERR_PTR(-ESRCH) ? NULL : mm;
844  
845  	/* ensure this mm_struct can't be freed */
846  	mmgrab(mm);
847  	/* but do not pin its memory */
848  	mmput(mm);
849  
850  	return mm;
851  }
852  
853  static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
854  {
855  	struct mm_struct *mm = proc_mem_open(inode, mode);
856  
857  	if (IS_ERR(mm))
858  		return PTR_ERR(mm);
859  
860  	file->private_data = mm;
861  	return 0;
862  }
863  
864  static int mem_open(struct inode *inode, struct file *file)
865  {
866  	if (WARN_ON_ONCE(!(file->f_op->fop_flags & FOP_UNSIGNED_OFFSET)))
867  		return -EINVAL;
868  	return __mem_open(inode, file, PTRACE_MODE_ATTACH);
869  }
870  
871  static bool proc_mem_foll_force(struct file *file, struct mm_struct *mm)
872  {
873  	struct task_struct *task;
874  	bool ptrace_active = false;
875  
876  	switch (proc_mem_force_override) {
877  	case PROC_MEM_FORCE_NEVER:
878  		return false;
879  	case PROC_MEM_FORCE_PTRACE:
880  		task = get_proc_task(file_inode(file));
881  		if (task) {
882  			ptrace_active =	READ_ONCE(task->ptrace) &&
883  					READ_ONCE(task->mm) == mm &&
884  					READ_ONCE(task->parent) == current;
885  			put_task_struct(task);
886  		}
887  		return ptrace_active;
888  	default:
889  		return true;
890  	}
891  }
892  
893  static ssize_t mem_rw(struct file *file, char __user *buf,
894  			size_t count, loff_t *ppos, int write)
895  {
896  	struct mm_struct *mm = file->private_data;
897  	unsigned long addr = *ppos;
898  	ssize_t copied;
899  	char *page;
900  	unsigned int flags;
901  
902  	if (!mm)
903  		return 0;
904  
905  	page = (char *)__get_free_page(GFP_KERNEL);
906  	if (!page)
907  		return -ENOMEM;
908  
909  	copied = 0;
910  	if (!mmget_not_zero(mm))
911  		goto free;
912  
913  	flags = write ? FOLL_WRITE : 0;
914  	if (proc_mem_foll_force(file, mm))
915  		flags |= FOLL_FORCE;
916  
917  	while (count > 0) {
918  		size_t this_len = min_t(size_t, count, PAGE_SIZE);
919  
920  		if (write && copy_from_user(page, buf, this_len)) {
921  			copied = -EFAULT;
922  			break;
923  		}
924  
925  		this_len = access_remote_vm(mm, addr, page, this_len, flags);
926  		if (!this_len) {
927  			if (!copied)
928  				copied = -EIO;
929  			break;
930  		}
931  
932  		if (!write && copy_to_user(buf, page, this_len)) {
933  			copied = -EFAULT;
934  			break;
935  		}
936  
937  		buf += this_len;
938  		addr += this_len;
939  		copied += this_len;
940  		count -= this_len;
941  	}
942  	*ppos = addr;
943  
944  	mmput(mm);
945  free:
946  	free_page((unsigned long) page);
947  	return copied;
948  }
949  
950  static ssize_t mem_read(struct file *file, char __user *buf,
951  			size_t count, loff_t *ppos)
952  {
953  	return mem_rw(file, buf, count, ppos, 0);
954  }
955  
956  static ssize_t mem_write(struct file *file, const char __user *buf,
957  			 size_t count, loff_t *ppos)
958  {
959  	return mem_rw(file, (char __user*)buf, count, ppos, 1);
960  }
961  
962  loff_t mem_lseek(struct file *file, loff_t offset, int orig)
963  {
964  	switch (orig) {
965  	case 0:
966  		file->f_pos = offset;
967  		break;
968  	case 1:
969  		file->f_pos += offset;
970  		break;
971  	default:
972  		return -EINVAL;
973  	}
974  	force_successful_syscall_return();
975  	return file->f_pos;
976  }
977  
978  static int mem_release(struct inode *inode, struct file *file)
979  {
980  	struct mm_struct *mm = file->private_data;
981  	if (mm)
982  		mmdrop(mm);
983  	return 0;
984  }
985  
986  static const struct file_operations proc_mem_operations = {
987  	.llseek		= mem_lseek,
988  	.read		= mem_read,
989  	.write		= mem_write,
990  	.open		= mem_open,
991  	.release	= mem_release,
992  	.fop_flags	= FOP_UNSIGNED_OFFSET,
993  };
994  
995  static int environ_open(struct inode *inode, struct file *file)
996  {
997  	return __mem_open(inode, file, PTRACE_MODE_READ);
998  }
999  
1000  static ssize_t environ_read(struct file *file, char __user *buf,
1001  			size_t count, loff_t *ppos)
1002  {
1003  	char *page;
1004  	unsigned long src = *ppos;
1005  	int ret = 0;
1006  	struct mm_struct *mm = file->private_data;
1007  	unsigned long env_start, env_end;
1008  
1009  	/* Ensure the process spawned far enough to have an environment. */
1010  	if (!mm || !mm->env_end)
1011  		return 0;
1012  
1013  	page = (char *)__get_free_page(GFP_KERNEL);
1014  	if (!page)
1015  		return -ENOMEM;
1016  
1017  	ret = 0;
1018  	if (!mmget_not_zero(mm))
1019  		goto free;
1020  
1021  	spin_lock(&mm->arg_lock);
1022  	env_start = mm->env_start;
1023  	env_end = mm->env_end;
1024  	spin_unlock(&mm->arg_lock);
1025  
1026  	while (count > 0) {
1027  		size_t this_len, max_len;
1028  		int retval;
1029  
1030  		if (src >= (env_end - env_start))
1031  			break;
1032  
1033  		this_len = env_end - (env_start + src);
1034  
1035  		max_len = min_t(size_t, PAGE_SIZE, count);
1036  		this_len = min(max_len, this_len);
1037  
1038  		retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
1039  
1040  		if (retval <= 0) {
1041  			ret = retval;
1042  			break;
1043  		}
1044  
1045  		if (copy_to_user(buf, page, retval)) {
1046  			ret = -EFAULT;
1047  			break;
1048  		}
1049  
1050  		ret += retval;
1051  		src += retval;
1052  		buf += retval;
1053  		count -= retval;
1054  	}
1055  	*ppos = src;
1056  	mmput(mm);
1057  
1058  free:
1059  	free_page((unsigned long) page);
1060  	return ret;
1061  }
1062  
1063  static const struct file_operations proc_environ_operations = {
1064  	.open		= environ_open,
1065  	.read		= environ_read,
1066  	.llseek		= generic_file_llseek,
1067  	.release	= mem_release,
1068  };
1069  
1070  static int auxv_open(struct inode *inode, struct file *file)
1071  {
1072  	return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
1073  }
1074  
1075  static ssize_t auxv_read(struct file *file, char __user *buf,
1076  			size_t count, loff_t *ppos)
1077  {
1078  	struct mm_struct *mm = file->private_data;
1079  	unsigned int nwords = 0;
1080  
1081  	if (!mm)
1082  		return 0;
1083  	do {
1084  		nwords += 2;
1085  	} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
1086  	return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
1087  				       nwords * sizeof(mm->saved_auxv[0]));
1088  }
1089  
1090  static const struct file_operations proc_auxv_operations = {
1091  	.open		= auxv_open,
1092  	.read		= auxv_read,
1093  	.llseek		= generic_file_llseek,
1094  	.release	= mem_release,
1095  };
1096  
1097  static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
1098  			    loff_t *ppos)
1099  {
1100  	struct task_struct *task = get_proc_task(file_inode(file));
1101  	char buffer[PROC_NUMBUF];
1102  	int oom_adj = OOM_ADJUST_MIN;
1103  	size_t len;
1104  
1105  	if (!task)
1106  		return -ESRCH;
1107  	if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
1108  		oom_adj = OOM_ADJUST_MAX;
1109  	else
1110  		oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
1111  			  OOM_SCORE_ADJ_MAX;
1112  	put_task_struct(task);
1113  	if (oom_adj > OOM_ADJUST_MAX)
1114  		oom_adj = OOM_ADJUST_MAX;
1115  	len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
1116  	return simple_read_from_buffer(buf, count, ppos, buffer, len);
1117  }
1118  
1119  static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
1120  {
1121  	struct mm_struct *mm = NULL;
1122  	struct task_struct *task;
1123  	int err = 0;
1124  
1125  	task = get_proc_task(file_inode(file));
1126  	if (!task)
1127  		return -ESRCH;
1128  
1129  	mutex_lock(&oom_adj_mutex);
1130  	if (legacy) {
1131  		if (oom_adj < task->signal->oom_score_adj &&
1132  				!capable(CAP_SYS_RESOURCE)) {
1133  			err = -EACCES;
1134  			goto err_unlock;
1135  		}
1136  		/*
1137  		 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
1138  		 * /proc/pid/oom_score_adj instead.
1139  		 */
1140  		pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
1141  			  current->comm, task_pid_nr(current), task_pid_nr(task),
1142  			  task_pid_nr(task));
1143  	} else {
1144  		if ((short)oom_adj < task->signal->oom_score_adj_min &&
1145  				!capable(CAP_SYS_RESOURCE)) {
1146  			err = -EACCES;
1147  			goto err_unlock;
1148  		}
1149  	}
1150  
1151  	/*
1152  	 * Make sure we will check other processes sharing the mm if this is
1153  	 * not vfrok which wants its own oom_score_adj.
1154  	 * pin the mm so it doesn't go away and get reused after task_unlock
1155  	 */
1156  	if (!task->vfork_done) {
1157  		struct task_struct *p = find_lock_task_mm(task);
1158  
1159  		if (p) {
1160  			if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) {
1161  				mm = p->mm;
1162  				mmgrab(mm);
1163  			}
1164  			task_unlock(p);
1165  		}
1166  	}
1167  
1168  	task->signal->oom_score_adj = oom_adj;
1169  	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
1170  		task->signal->oom_score_adj_min = (short)oom_adj;
1171  	trace_oom_score_adj_update(task);
1172  
1173  	if (mm) {
1174  		struct task_struct *p;
1175  
1176  		rcu_read_lock();
1177  		for_each_process(p) {
1178  			if (same_thread_group(task, p))
1179  				continue;
1180  
1181  			/* do not touch kernel threads or the global init */
1182  			if (p->flags & PF_KTHREAD || is_global_init(p))
1183  				continue;
1184  
1185  			task_lock(p);
1186  			if (!p->vfork_done && process_shares_mm(p, mm)) {
1187  				p->signal->oom_score_adj = oom_adj;
1188  				if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
1189  					p->signal->oom_score_adj_min = (short)oom_adj;
1190  			}
1191  			task_unlock(p);
1192  		}
1193  		rcu_read_unlock();
1194  		mmdrop(mm);
1195  	}
1196  err_unlock:
1197  	mutex_unlock(&oom_adj_mutex);
1198  	put_task_struct(task);
1199  	return err;
1200  }
1201  
1202  /*
1203   * /proc/pid/oom_adj exists solely for backwards compatibility with previous
1204   * kernels.  The effective policy is defined by oom_score_adj, which has a
1205   * different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
1206   * Values written to oom_adj are simply mapped linearly to oom_score_adj.
1207   * Processes that become oom disabled via oom_adj will still be oom disabled
1208   * with this implementation.
1209   *
1210   * oom_adj cannot be removed since existing userspace binaries use it.
1211   */
1212  static ssize_t oom_adj_write(struct file *file, const char __user *buf,
1213  			     size_t count, loff_t *ppos)
1214  {
1215  	char buffer[PROC_NUMBUF] = {};
1216  	int oom_adj;
1217  	int err;
1218  
1219  	if (count > sizeof(buffer) - 1)
1220  		count = sizeof(buffer) - 1;
1221  	if (copy_from_user(buffer, buf, count)) {
1222  		err = -EFAULT;
1223  		goto out;
1224  	}
1225  
1226  	err = kstrtoint(strstrip(buffer), 0, &oom_adj);
1227  	if (err)
1228  		goto out;
1229  	if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
1230  	     oom_adj != OOM_DISABLE) {
1231  		err = -EINVAL;
1232  		goto out;
1233  	}
1234  
1235  	/*
1236  	 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
1237  	 * value is always attainable.
1238  	 */
1239  	if (oom_adj == OOM_ADJUST_MAX)
1240  		oom_adj = OOM_SCORE_ADJ_MAX;
1241  	else
1242  		oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
1243  
1244  	err = __set_oom_adj(file, oom_adj, true);
1245  out:
1246  	return err < 0 ? err : count;
1247  }
1248  
1249  static const struct file_operations proc_oom_adj_operations = {
1250  	.read		= oom_adj_read,
1251  	.write		= oom_adj_write,
1252  	.llseek		= generic_file_llseek,
1253  };
1254  
1255  static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
1256  					size_t count, loff_t *ppos)
1257  {
1258  	struct task_struct *task = get_proc_task(file_inode(file));
1259  	char buffer[PROC_NUMBUF];
1260  	short oom_score_adj = OOM_SCORE_ADJ_MIN;
1261  	size_t len;
1262  
1263  	if (!task)
1264  		return -ESRCH;
1265  	oom_score_adj = task->signal->oom_score_adj;
1266  	put_task_struct(task);
1267  	len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
1268  	return simple_read_from_buffer(buf, count, ppos, buffer, len);
1269  }
1270  
1271  static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1272  					size_t count, loff_t *ppos)
1273  {
1274  	char buffer[PROC_NUMBUF] = {};
1275  	int oom_score_adj;
1276  	int err;
1277  
1278  	if (count > sizeof(buffer) - 1)
1279  		count = sizeof(buffer) - 1;
1280  	if (copy_from_user(buffer, buf, count)) {
1281  		err = -EFAULT;
1282  		goto out;
1283  	}
1284  
1285  	err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
1286  	if (err)
1287  		goto out;
1288  	if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
1289  			oom_score_adj > OOM_SCORE_ADJ_MAX) {
1290  		err = -EINVAL;
1291  		goto out;
1292  	}
1293  
1294  	err = __set_oom_adj(file, oom_score_adj, false);
1295  out:
1296  	return err < 0 ? err : count;
1297  }
1298  
1299  static const struct file_operations proc_oom_score_adj_operations = {
1300  	.read		= oom_score_adj_read,
1301  	.write		= oom_score_adj_write,
1302  	.llseek		= default_llseek,
1303  };
1304  
1305  #ifdef CONFIG_AUDIT
1306  #define TMPBUFLEN 11
1307  static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1308  				  size_t count, loff_t *ppos)
1309  {
1310  	struct inode * inode = file_inode(file);
1311  	struct task_struct *task = get_proc_task(inode);
1312  	ssize_t length;
1313  	char tmpbuf[TMPBUFLEN];
1314  
1315  	if (!task)
1316  		return -ESRCH;
1317  	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1318  			   from_kuid(file->f_cred->user_ns,
1319  				     audit_get_loginuid(task)));
1320  	put_task_struct(task);
1321  	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1322  }
1323  
1324  static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1325  				   size_t count, loff_t *ppos)
1326  {
1327  	struct inode * inode = file_inode(file);
1328  	uid_t loginuid;
1329  	kuid_t kloginuid;
1330  	int rv;
1331  
1332  	/* Don't let kthreads write their own loginuid */
1333  	if (current->flags & PF_KTHREAD)
1334  		return -EPERM;
1335  
1336  	rcu_read_lock();
1337  	if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
1338  		rcu_read_unlock();
1339  		return -EPERM;
1340  	}
1341  	rcu_read_unlock();
1342  
1343  	if (*ppos != 0) {
1344  		/* No partial writes. */
1345  		return -EINVAL;
1346  	}
1347  
1348  	rv = kstrtou32_from_user(buf, count, 10, &loginuid);
1349  	if (rv < 0)
1350  		return rv;
1351  
1352  	/* is userspace tring to explicitly UNSET the loginuid? */
1353  	if (loginuid == AUDIT_UID_UNSET) {
1354  		kloginuid = INVALID_UID;
1355  	} else {
1356  		kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
1357  		if (!uid_valid(kloginuid))
1358  			return -EINVAL;
1359  	}
1360  
1361  	rv = audit_set_loginuid(kloginuid);
1362  	if (rv < 0)
1363  		return rv;
1364  	return count;
1365  }
1366  
1367  static const struct file_operations proc_loginuid_operations = {
1368  	.read		= proc_loginuid_read,
1369  	.write		= proc_loginuid_write,
1370  	.llseek		= generic_file_llseek,
1371  };
1372  
1373  static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1374  				  size_t count, loff_t *ppos)
1375  {
1376  	struct inode * inode = file_inode(file);
1377  	struct task_struct *task = get_proc_task(inode);
1378  	ssize_t length;
1379  	char tmpbuf[TMPBUFLEN];
1380  
1381  	if (!task)
1382  		return -ESRCH;
1383  	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1384  				audit_get_sessionid(task));
1385  	put_task_struct(task);
1386  	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1387  }
1388  
1389  static const struct file_operations proc_sessionid_operations = {
1390  	.read		= proc_sessionid_read,
1391  	.llseek		= generic_file_llseek,
1392  };
1393  #endif
1394  
1395  #ifdef CONFIG_FAULT_INJECTION
1396  static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
1397  				      size_t count, loff_t *ppos)
1398  {
1399  	struct task_struct *task = get_proc_task(file_inode(file));
1400  	char buffer[PROC_NUMBUF];
1401  	size_t len;
1402  	int make_it_fail;
1403  
1404  	if (!task)
1405  		return -ESRCH;
1406  	make_it_fail = task->make_it_fail;
1407  	put_task_struct(task);
1408  
1409  	len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
1410  
1411  	return simple_read_from_buffer(buf, count, ppos, buffer, len);
1412  }
1413  
1414  static ssize_t proc_fault_inject_write(struct file * file,
1415  			const char __user * buf, size_t count, loff_t *ppos)
1416  {
1417  	struct task_struct *task;
1418  	char buffer[PROC_NUMBUF] = {};
1419  	int make_it_fail;
1420  	int rv;
1421  
1422  	if (!capable(CAP_SYS_RESOURCE))
1423  		return -EPERM;
1424  
1425  	if (count > sizeof(buffer) - 1)
1426  		count = sizeof(buffer) - 1;
1427  	if (copy_from_user(buffer, buf, count))
1428  		return -EFAULT;
1429  	rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
1430  	if (rv < 0)
1431  		return rv;
1432  	if (make_it_fail < 0 || make_it_fail > 1)
1433  		return -EINVAL;
1434  
1435  	task = get_proc_task(file_inode(file));
1436  	if (!task)
1437  		return -ESRCH;
1438  	task->make_it_fail = make_it_fail;
1439  	put_task_struct(task);
1440  
1441  	return count;
1442  }
1443  
1444  static const struct file_operations proc_fault_inject_operations = {
1445  	.read		= proc_fault_inject_read,
1446  	.write		= proc_fault_inject_write,
1447  	.llseek		= generic_file_llseek,
1448  };
1449  
1450  static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
1451  				   size_t count, loff_t *ppos)
1452  {
1453  	struct task_struct *task;
1454  	int err;
1455  	unsigned int n;
1456  
1457  	err = kstrtouint_from_user(buf, count, 0, &n);
1458  	if (err)
1459  		return err;
1460  
1461  	task = get_proc_task(file_inode(file));
1462  	if (!task)
1463  		return -ESRCH;
1464  	task->fail_nth = n;
1465  	put_task_struct(task);
1466  
1467  	return count;
1468  }
1469  
1470  static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
1471  				  size_t count, loff_t *ppos)
1472  {
1473  	struct task_struct *task;
1474  	char numbuf[PROC_NUMBUF];
1475  	ssize_t len;
1476  
1477  	task = get_proc_task(file_inode(file));
1478  	if (!task)
1479  		return -ESRCH;
1480  	len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
1481  	put_task_struct(task);
1482  	return simple_read_from_buffer(buf, count, ppos, numbuf, len);
1483  }
1484  
1485  static const struct file_operations proc_fail_nth_operations = {
1486  	.read		= proc_fail_nth_read,
1487  	.write		= proc_fail_nth_write,
1488  };
1489  #endif
1490  
1491  
1492  #ifdef CONFIG_SCHED_DEBUG
1493  /*
1494   * Print out various scheduling related per-task fields:
1495   */
1496  static int sched_show(struct seq_file *m, void *v)
1497  {
1498  	struct inode *inode = m->private;
1499  	struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
1500  	struct task_struct *p;
1501  
1502  	p = get_proc_task(inode);
1503  	if (!p)
1504  		return -ESRCH;
1505  	proc_sched_show_task(p, ns, m);
1506  
1507  	put_task_struct(p);
1508  
1509  	return 0;
1510  }
1511  
1512  static ssize_t
1513  sched_write(struct file *file, const char __user *buf,
1514  	    size_t count, loff_t *offset)
1515  {
1516  	struct inode *inode = file_inode(file);
1517  	struct task_struct *p;
1518  
1519  	p = get_proc_task(inode);
1520  	if (!p)
1521  		return -ESRCH;
1522  	proc_sched_set_task(p);
1523  
1524  	put_task_struct(p);
1525  
1526  	return count;
1527  }
1528  
1529  static int sched_open(struct inode *inode, struct file *filp)
1530  {
1531  	return single_open(filp, sched_show, inode);
1532  }
1533  
1534  static const struct file_operations proc_pid_sched_operations = {
1535  	.open		= sched_open,
1536  	.read		= seq_read,
1537  	.write		= sched_write,
1538  	.llseek		= seq_lseek,
1539  	.release	= single_release,
1540  };
1541  
1542  #endif
1543  
1544  #ifdef CONFIG_SCHED_AUTOGROUP
1545  /*
1546   * Print out autogroup related information:
1547   */
1548  static int sched_autogroup_show(struct seq_file *m, void *v)
1549  {
1550  	struct inode *inode = m->private;
1551  	struct task_struct *p;
1552  
1553  	p = get_proc_task(inode);
1554  	if (!p)
1555  		return -ESRCH;
1556  	proc_sched_autogroup_show_task(p, m);
1557  
1558  	put_task_struct(p);
1559  
1560  	return 0;
1561  }
1562  
1563  static ssize_t
1564  sched_autogroup_write(struct file *file, const char __user *buf,
1565  	    size_t count, loff_t *offset)
1566  {
1567  	struct inode *inode = file_inode(file);
1568  	struct task_struct *p;
1569  	char buffer[PROC_NUMBUF] = {};
1570  	int nice;
1571  	int err;
1572  
1573  	if (count > sizeof(buffer) - 1)
1574  		count = sizeof(buffer) - 1;
1575  	if (copy_from_user(buffer, buf, count))
1576  		return -EFAULT;
1577  
1578  	err = kstrtoint(strstrip(buffer), 0, &nice);
1579  	if (err < 0)
1580  		return err;
1581  
1582  	p = get_proc_task(inode);
1583  	if (!p)
1584  		return -ESRCH;
1585  
1586  	err = proc_sched_autogroup_set_nice(p, nice);
1587  	if (err)
1588  		count = err;
1589  
1590  	put_task_struct(p);
1591  
1592  	return count;
1593  }
1594  
1595  static int sched_autogroup_open(struct inode *inode, struct file *filp)
1596  {
1597  	int ret;
1598  
1599  	ret = single_open(filp, sched_autogroup_show, NULL);
1600  	if (!ret) {
1601  		struct seq_file *m = filp->private_data;
1602  
1603  		m->private = inode;
1604  	}
1605  	return ret;
1606  }
1607  
1608  static const struct file_operations proc_pid_sched_autogroup_operations = {
1609  	.open		= sched_autogroup_open,
1610  	.read		= seq_read,
1611  	.write		= sched_autogroup_write,
1612  	.llseek		= seq_lseek,
1613  	.release	= single_release,
1614  };
1615  
1616  #endif /* CONFIG_SCHED_AUTOGROUP */
1617  
1618  #ifdef CONFIG_TIME_NS
1619  static int timens_offsets_show(struct seq_file *m, void *v)
1620  {
1621  	struct task_struct *p;
1622  
1623  	p = get_proc_task(file_inode(m->file));
1624  	if (!p)
1625  		return -ESRCH;
1626  	proc_timens_show_offsets(p, m);
1627  
1628  	put_task_struct(p);
1629  
1630  	return 0;
1631  }
1632  
1633  static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
1634  				    size_t count, loff_t *ppos)
1635  {
1636  	struct inode *inode = file_inode(file);
1637  	struct proc_timens_offset offsets[2];
1638  	char *kbuf = NULL, *pos, *next_line;
1639  	struct task_struct *p;
1640  	int ret, noffsets;
1641  
1642  	/* Only allow < page size writes at the beginning of the file */
1643  	if ((*ppos != 0) || (count >= PAGE_SIZE))
1644  		return -EINVAL;
1645  
1646  	/* Slurp in the user data */
1647  	kbuf = memdup_user_nul(buf, count);
1648  	if (IS_ERR(kbuf))
1649  		return PTR_ERR(kbuf);
1650  
1651  	/* Parse the user data */
1652  	ret = -EINVAL;
1653  	noffsets = 0;
1654  	for (pos = kbuf; pos; pos = next_line) {
1655  		struct proc_timens_offset *off = &offsets[noffsets];
1656  		char clock[10];
1657  		int err;
1658  
1659  		/* Find the end of line and ensure we don't look past it */
1660  		next_line = strchr(pos, '\n');
1661  		if (next_line) {
1662  			*next_line = '\0';
1663  			next_line++;
1664  			if (*next_line == '\0')
1665  				next_line = NULL;
1666  		}
1667  
1668  		err = sscanf(pos, "%9s %lld %lu", clock,
1669  				&off->val.tv_sec, &off->val.tv_nsec);
1670  		if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
1671  			goto out;
1672  
1673  		clock[sizeof(clock) - 1] = 0;
1674  		if (strcmp(clock, "monotonic") == 0 ||
1675  		    strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
1676  			off->clockid = CLOCK_MONOTONIC;
1677  		else if (strcmp(clock, "boottime") == 0 ||
1678  			 strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
1679  			off->clockid = CLOCK_BOOTTIME;
1680  		else
1681  			goto out;
1682  
1683  		noffsets++;
1684  		if (noffsets == ARRAY_SIZE(offsets)) {
1685  			if (next_line)
1686  				count = next_line - kbuf;
1687  			break;
1688  		}
1689  	}
1690  
1691  	ret = -ESRCH;
1692  	p = get_proc_task(inode);
1693  	if (!p)
1694  		goto out;
1695  	ret = proc_timens_set_offset(file, p, offsets, noffsets);
1696  	put_task_struct(p);
1697  	if (ret)
1698  		goto out;
1699  
1700  	ret = count;
1701  out:
1702  	kfree(kbuf);
1703  	return ret;
1704  }
1705  
1706  static int timens_offsets_open(struct inode *inode, struct file *filp)
1707  {
1708  	return single_open(filp, timens_offsets_show, inode);
1709  }
1710  
1711  static const struct file_operations proc_timens_offsets_operations = {
1712  	.open		= timens_offsets_open,
1713  	.read		= seq_read,
1714  	.write		= timens_offsets_write,
1715  	.llseek		= seq_lseek,
1716  	.release	= single_release,
1717  };
1718  #endif /* CONFIG_TIME_NS */
1719  
1720  static ssize_t comm_write(struct file *file, const char __user *buf,
1721  				size_t count, loff_t *offset)
1722  {
1723  	struct inode *inode = file_inode(file);
1724  	struct task_struct *p;
1725  	char buffer[TASK_COMM_LEN] = {};
1726  	const size_t maxlen = sizeof(buffer) - 1;
1727  
1728  	if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
1729  		return -EFAULT;
1730  
1731  	p = get_proc_task(inode);
1732  	if (!p)
1733  		return -ESRCH;
1734  
1735  	if (same_thread_group(current, p)) {
1736  		set_task_comm(p, buffer);
1737  		proc_comm_connector(p);
1738  	}
1739  	else
1740  		count = -EINVAL;
1741  
1742  	put_task_struct(p);
1743  
1744  	return count;
1745  }
1746  
1747  static int comm_show(struct seq_file *m, void *v)
1748  {
1749  	struct inode *inode = m->private;
1750  	struct task_struct *p;
1751  
1752  	p = get_proc_task(inode);
1753  	if (!p)
1754  		return -ESRCH;
1755  
1756  	proc_task_name(m, p, false);
1757  	seq_putc(m, '\n');
1758  
1759  	put_task_struct(p);
1760  
1761  	return 0;
1762  }
1763  
1764  static int comm_open(struct inode *inode, struct file *filp)
1765  {
1766  	return single_open(filp, comm_show, inode);
1767  }
1768  
1769  static const struct file_operations proc_pid_set_comm_operations = {
1770  	.open		= comm_open,
1771  	.read		= seq_read,
1772  	.write		= comm_write,
1773  	.llseek		= seq_lseek,
1774  	.release	= single_release,
1775  };
1776  
1777  static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
1778  {
1779  	struct task_struct *task;
1780  	struct file *exe_file;
1781  
1782  	task = get_proc_task(d_inode(dentry));
1783  	if (!task)
1784  		return -ENOENT;
1785  	exe_file = get_task_exe_file(task);
1786  	put_task_struct(task);
1787  	if (exe_file) {
1788  		*exe_path = exe_file->f_path;
1789  		path_get(&exe_file->f_path);
1790  		fput(exe_file);
1791  		return 0;
1792  	} else
1793  		return -ENOENT;
1794  }
1795  
1796  static const char *proc_pid_get_link(struct dentry *dentry,
1797  				     struct inode *inode,
1798  				     struct delayed_call *done)
1799  {
1800  	struct path path;
1801  	int error = -EACCES;
1802  
1803  	if (!dentry)
1804  		return ERR_PTR(-ECHILD);
1805  
1806  	/* Are we allowed to snoop on the tasks file descriptors? */
1807  	if (!proc_fd_access_allowed(inode))
1808  		goto out;
1809  
1810  	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1811  	if (error)
1812  		goto out;
1813  
1814  	error = nd_jump_link(&path);
1815  out:
1816  	return ERR_PTR(error);
1817  }
1818  
1819  static int do_proc_readlink(const struct path *path, char __user *buffer, int buflen)
1820  {
1821  	char *tmp = kmalloc(PATH_MAX, GFP_KERNEL);
1822  	char *pathname;
1823  	int len;
1824  
1825  	if (!tmp)
1826  		return -ENOMEM;
1827  
1828  	pathname = d_path(path, tmp, PATH_MAX);
1829  	len = PTR_ERR(pathname);
1830  	if (IS_ERR(pathname))
1831  		goto out;
1832  	len = tmp + PATH_MAX - 1 - pathname;
1833  
1834  	if (len > buflen)
1835  		len = buflen;
1836  	if (copy_to_user(buffer, pathname, len))
1837  		len = -EFAULT;
1838   out:
1839  	kfree(tmp);
1840  	return len;
1841  }
1842  
1843  static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
1844  {
1845  	int error = -EACCES;
1846  	struct inode *inode = d_inode(dentry);
1847  	struct path path;
1848  
1849  	/* Are we allowed to snoop on the tasks file descriptors? */
1850  	if (!proc_fd_access_allowed(inode))
1851  		goto out;
1852  
1853  	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1854  	if (error)
1855  		goto out;
1856  
1857  	error = do_proc_readlink(&path, buffer, buflen);
1858  	path_put(&path);
1859  out:
1860  	return error;
1861  }
1862  
1863  const struct inode_operations proc_pid_link_inode_operations = {
1864  	.readlink	= proc_pid_readlink,
1865  	.get_link	= proc_pid_get_link,
1866  	.setattr	= proc_setattr,
1867  };
1868  
1869  
1870  /* building an inode */
1871  
1872  void task_dump_owner(struct task_struct *task, umode_t mode,
1873  		     kuid_t *ruid, kgid_t *rgid)
1874  {
1875  	/* Depending on the state of dumpable compute who should own a
1876  	 * proc file for a task.
1877  	 */
1878  	const struct cred *cred;
1879  	kuid_t uid;
1880  	kgid_t gid;
1881  
1882  	if (unlikely(task->flags & PF_KTHREAD)) {
1883  		*ruid = GLOBAL_ROOT_UID;
1884  		*rgid = GLOBAL_ROOT_GID;
1885  		return;
1886  	}
1887  
1888  	/* Default to the tasks effective ownership */
1889  	rcu_read_lock();
1890  	cred = __task_cred(task);
1891  	uid = cred->euid;
1892  	gid = cred->egid;
1893  	rcu_read_unlock();
1894  
1895  	/*
1896  	 * Before the /proc/pid/status file was created the only way to read
1897  	 * the effective uid of a /process was to stat /proc/pid.  Reading
1898  	 * /proc/pid/status is slow enough that procps and other packages
1899  	 * kept stating /proc/pid.  To keep the rules in /proc simple I have
1900  	 * made this apply to all per process world readable and executable
1901  	 * directories.
1902  	 */
1903  	if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
1904  		struct mm_struct *mm;
1905  		task_lock(task);
1906  		mm = task->mm;
1907  		/* Make non-dumpable tasks owned by some root */
1908  		if (mm) {
1909  			if (get_dumpable(mm) != SUID_DUMP_USER) {
1910  				struct user_namespace *user_ns = mm->user_ns;
1911  
1912  				uid = make_kuid(user_ns, 0);
1913  				if (!uid_valid(uid))
1914  					uid = GLOBAL_ROOT_UID;
1915  
1916  				gid = make_kgid(user_ns, 0);
1917  				if (!gid_valid(gid))
1918  					gid = GLOBAL_ROOT_GID;
1919  			}
1920  		} else {
1921  			uid = GLOBAL_ROOT_UID;
1922  			gid = GLOBAL_ROOT_GID;
1923  		}
1924  		task_unlock(task);
1925  	}
1926  	*ruid = uid;
1927  	*rgid = gid;
1928  }
1929  
1930  void proc_pid_evict_inode(struct proc_inode *ei)
1931  {
1932  	struct pid *pid = ei->pid;
1933  
1934  	if (S_ISDIR(ei->vfs_inode.i_mode)) {
1935  		spin_lock(&pid->lock);
1936  		hlist_del_init_rcu(&ei->sibling_inodes);
1937  		spin_unlock(&pid->lock);
1938  	}
1939  }
1940  
1941  struct inode *proc_pid_make_inode(struct super_block *sb,
1942  				  struct task_struct *task, umode_t mode)
1943  {
1944  	struct inode * inode;
1945  	struct proc_inode *ei;
1946  	struct pid *pid;
1947  
1948  	/* We need a new inode */
1949  
1950  	inode = new_inode(sb);
1951  	if (!inode)
1952  		goto out;
1953  
1954  	/* Common stuff */
1955  	ei = PROC_I(inode);
1956  	inode->i_mode = mode;
1957  	inode->i_ino = get_next_ino();
1958  	simple_inode_init_ts(inode);
1959  	inode->i_op = &proc_def_inode_operations;
1960  
1961  	/*
1962  	 * grab the reference to task.
1963  	 */
1964  	pid = get_task_pid(task, PIDTYPE_PID);
1965  	if (!pid)
1966  		goto out_unlock;
1967  
1968  	/* Let the pid remember us for quick removal */
1969  	ei->pid = pid;
1970  
1971  	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
1972  	security_task_to_inode(task, inode);
1973  
1974  out:
1975  	return inode;
1976  
1977  out_unlock:
1978  	iput(inode);
1979  	return NULL;
1980  }
1981  
1982  /*
1983   * Generating an inode and adding it into @pid->inodes, so that task will
1984   * invalidate inode's dentry before being released.
1985   *
1986   * This helper is used for creating dir-type entries under '/proc' and
1987   * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>'
1988   * can be released by invalidating '/proc/<tgid>' dentry.
1989   * In theory, dentries under '/proc/<tgid>/task' can also be released by
1990   * invalidating '/proc/<tgid>' dentry, we reserve it to handle single
1991   * thread exiting situation: Any one of threads should invalidate its
1992   * '/proc/<tgid>/task/<pid>' dentry before released.
1993   */
1994  static struct inode *proc_pid_make_base_inode(struct super_block *sb,
1995  				struct task_struct *task, umode_t mode)
1996  {
1997  	struct inode *inode;
1998  	struct proc_inode *ei;
1999  	struct pid *pid;
2000  
2001  	inode = proc_pid_make_inode(sb, task, mode);
2002  	if (!inode)
2003  		return NULL;
2004  
2005  	/* Let proc_flush_pid find this directory inode */
2006  	ei = PROC_I(inode);
2007  	pid = ei->pid;
2008  	spin_lock(&pid->lock);
2009  	hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
2010  	spin_unlock(&pid->lock);
2011  
2012  	return inode;
2013  }
2014  
2015  int pid_getattr(struct mnt_idmap *idmap, const struct path *path,
2016  		struct kstat *stat, u32 request_mask, unsigned int query_flags)
2017  {
2018  	struct inode *inode = d_inode(path->dentry);
2019  	struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
2020  	struct task_struct *task;
2021  
2022  	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
2023  
2024  	stat->uid = GLOBAL_ROOT_UID;
2025  	stat->gid = GLOBAL_ROOT_GID;
2026  	rcu_read_lock();
2027  	task = pid_task(proc_pid(inode), PIDTYPE_PID);
2028  	if (task) {
2029  		if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
2030  			rcu_read_unlock();
2031  			/*
2032  			 * This doesn't prevent learning whether PID exists,
2033  			 * it only makes getattr() consistent with readdir().
2034  			 */
2035  			return -ENOENT;
2036  		}
2037  		task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
2038  	}
2039  	rcu_read_unlock();
2040  	return 0;
2041  }
2042  
2043  /* dentry stuff */
2044  
2045  /*
2046   * Set <pid>/... inode ownership (can change due to setuid(), etc.)
2047   */
2048  void pid_update_inode(struct task_struct *task, struct inode *inode)
2049  {
2050  	task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
2051  
2052  	inode->i_mode &= ~(S_ISUID | S_ISGID);
2053  	security_task_to_inode(task, inode);
2054  }
2055  
2056  /*
2057   * Rewrite the inode's ownerships here because the owning task may have
2058   * performed a setuid(), etc.
2059   *
2060   */
2061  static int pid_revalidate(struct dentry *dentry, unsigned int flags)
2062  {
2063  	struct inode *inode;
2064  	struct task_struct *task;
2065  	int ret = 0;
2066  
2067  	rcu_read_lock();
2068  	inode = d_inode_rcu(dentry);
2069  	if (!inode)
2070  		goto out;
2071  	task = pid_task(proc_pid(inode), PIDTYPE_PID);
2072  
2073  	if (task) {
2074  		pid_update_inode(task, inode);
2075  		ret = 1;
2076  	}
2077  out:
2078  	rcu_read_unlock();
2079  	return ret;
2080  }
2081  
2082  static inline bool proc_inode_is_dead(struct inode *inode)
2083  {
2084  	return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
2085  }
2086  
2087  int pid_delete_dentry(const struct dentry *dentry)
2088  {
2089  	/* Is the task we represent dead?
2090  	 * If so, then don't put the dentry on the lru list,
2091  	 * kill it immediately.
2092  	 */
2093  	return proc_inode_is_dead(d_inode(dentry));
2094  }
2095  
2096  const struct dentry_operations pid_dentry_operations =
2097  {
2098  	.d_revalidate	= pid_revalidate,
2099  	.d_delete	= pid_delete_dentry,
2100  };
2101  
2102  /* Lookups */
2103  
2104  /*
2105   * Fill a directory entry.
2106   *
2107   * If possible create the dcache entry and derive our inode number and
2108   * file type from dcache entry.
2109   *
2110   * Since all of the proc inode numbers are dynamically generated, the inode
2111   * numbers do not exist until the inode is cache.  This means creating
2112   * the dcache entry in readdir is necessary to keep the inode numbers
2113   * reported by readdir in sync with the inode numbers reported
2114   * by stat.
2115   */
2116  bool proc_fill_cache(struct file *file, struct dir_context *ctx,
2117  	const char *name, unsigned int len,
2118  	instantiate_t instantiate, struct task_struct *task, const void *ptr)
2119  {
2120  	struct dentry *child, *dir = file->f_path.dentry;
2121  	struct qstr qname = QSTR_INIT(name, len);
2122  	struct inode *inode;
2123  	unsigned type = DT_UNKNOWN;
2124  	ino_t ino = 1;
2125  
2126  	child = d_hash_and_lookup(dir, &qname);
2127  	if (!child) {
2128  		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
2129  		child = d_alloc_parallel(dir, &qname, &wq);
2130  		if (IS_ERR(child))
2131  			goto end_instantiate;
2132  		if (d_in_lookup(child)) {
2133  			struct dentry *res;
2134  			res = instantiate(child, task, ptr);
2135  			d_lookup_done(child);
2136  			if (unlikely(res)) {
2137  				dput(child);
2138  				child = res;
2139  				if (IS_ERR(child))
2140  					goto end_instantiate;
2141  			}
2142  		}
2143  	}
2144  	inode = d_inode(child);
2145  	ino = inode->i_ino;
2146  	type = inode->i_mode >> 12;
2147  	dput(child);
2148  end_instantiate:
2149  	return dir_emit(ctx, name, len, ino, type);
2150  }
2151  
2152  /*
2153   * dname_to_vma_addr - maps a dentry name into two unsigned longs
2154   * which represent vma start and end addresses.
2155   */
2156  static int dname_to_vma_addr(struct dentry *dentry,
2157  			     unsigned long *start, unsigned long *end)
2158  {
2159  	const char *str = dentry->d_name.name;
2160  	unsigned long long sval, eval;
2161  	unsigned int len;
2162  
2163  	if (str[0] == '0' && str[1] != '-')
2164  		return -EINVAL;
2165  	len = _parse_integer(str, 16, &sval);
2166  	if (len & KSTRTOX_OVERFLOW)
2167  		return -EINVAL;
2168  	if (sval != (unsigned long)sval)
2169  		return -EINVAL;
2170  	str += len;
2171  
2172  	if (*str != '-')
2173  		return -EINVAL;
2174  	str++;
2175  
2176  	if (str[0] == '0' && str[1])
2177  		return -EINVAL;
2178  	len = _parse_integer(str, 16, &eval);
2179  	if (len & KSTRTOX_OVERFLOW)
2180  		return -EINVAL;
2181  	if (eval != (unsigned long)eval)
2182  		return -EINVAL;
2183  	str += len;
2184  
2185  	if (*str != '\0')
2186  		return -EINVAL;
2187  
2188  	*start = sval;
2189  	*end = eval;
2190  
2191  	return 0;
2192  }
2193  
2194  static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
2195  {
2196  	unsigned long vm_start, vm_end;
2197  	bool exact_vma_exists = false;
2198  	struct mm_struct *mm = NULL;
2199  	struct task_struct *task;
2200  	struct inode *inode;
2201  	int status = 0;
2202  
2203  	if (flags & LOOKUP_RCU)
2204  		return -ECHILD;
2205  
2206  	inode = d_inode(dentry);
2207  	task = get_proc_task(inode);
2208  	if (!task)
2209  		goto out_notask;
2210  
2211  	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
2212  	if (IS_ERR(mm))
2213  		goto out;
2214  
2215  	if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
2216  		status = mmap_read_lock_killable(mm);
2217  		if (!status) {
2218  			exact_vma_exists = !!find_exact_vma(mm, vm_start,
2219  							    vm_end);
2220  			mmap_read_unlock(mm);
2221  		}
2222  	}
2223  
2224  	mmput(mm);
2225  
2226  	if (exact_vma_exists) {
2227  		task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
2228  
2229  		security_task_to_inode(task, inode);
2230  		status = 1;
2231  	}
2232  
2233  out:
2234  	put_task_struct(task);
2235  
2236  out_notask:
2237  	return status;
2238  }
2239  
2240  static const struct dentry_operations tid_map_files_dentry_operations = {
2241  	.d_revalidate	= map_files_d_revalidate,
2242  	.d_delete	= pid_delete_dentry,
2243  };
2244  
2245  static int map_files_get_link(struct dentry *dentry, struct path *path)
2246  {
2247  	unsigned long vm_start, vm_end;
2248  	struct vm_area_struct *vma;
2249  	struct task_struct *task;
2250  	struct mm_struct *mm;
2251  	int rc;
2252  
2253  	rc = -ENOENT;
2254  	task = get_proc_task(d_inode(dentry));
2255  	if (!task)
2256  		goto out;
2257  
2258  	mm = get_task_mm(task);
2259  	put_task_struct(task);
2260  	if (!mm)
2261  		goto out;
2262  
2263  	rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
2264  	if (rc)
2265  		goto out_mmput;
2266  
2267  	rc = mmap_read_lock_killable(mm);
2268  	if (rc)
2269  		goto out_mmput;
2270  
2271  	rc = -ENOENT;
2272  	vma = find_exact_vma(mm, vm_start, vm_end);
2273  	if (vma && vma->vm_file) {
2274  		*path = *file_user_path(vma->vm_file);
2275  		path_get(path);
2276  		rc = 0;
2277  	}
2278  	mmap_read_unlock(mm);
2279  
2280  out_mmput:
2281  	mmput(mm);
2282  out:
2283  	return rc;
2284  }
2285  
2286  struct map_files_info {
2287  	unsigned long	start;
2288  	unsigned long	end;
2289  	fmode_t		mode;
2290  };
2291  
2292  /*
2293   * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
2294   * to concerns about how the symlinks may be used to bypass permissions on
2295   * ancestor directories in the path to the file in question.
2296   */
2297  static const char *
2298  proc_map_files_get_link(struct dentry *dentry,
2299  			struct inode *inode,
2300  		        struct delayed_call *done)
2301  {
2302  	if (!checkpoint_restore_ns_capable(&init_user_ns))
2303  		return ERR_PTR(-EPERM);
2304  
2305  	return proc_pid_get_link(dentry, inode, done);
2306  }
2307  
2308  /*
2309   * Identical to proc_pid_link_inode_operations except for get_link()
2310   */
2311  static const struct inode_operations proc_map_files_link_inode_operations = {
2312  	.readlink	= proc_pid_readlink,
2313  	.get_link	= proc_map_files_get_link,
2314  	.setattr	= proc_setattr,
2315  };
2316  
2317  static struct dentry *
2318  proc_map_files_instantiate(struct dentry *dentry,
2319  			   struct task_struct *task, const void *ptr)
2320  {
2321  	fmode_t mode = (fmode_t)(unsigned long)ptr;
2322  	struct proc_inode *ei;
2323  	struct inode *inode;
2324  
2325  	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK |
2326  				    ((mode & FMODE_READ ) ? S_IRUSR : 0) |
2327  				    ((mode & FMODE_WRITE) ? S_IWUSR : 0));
2328  	if (!inode)
2329  		return ERR_PTR(-ENOENT);
2330  
2331  	ei = PROC_I(inode);
2332  	ei->op.proc_get_link = map_files_get_link;
2333  
2334  	inode->i_op = &proc_map_files_link_inode_operations;
2335  	inode->i_size = 64;
2336  
2337  	return proc_splice_unmountable(inode, dentry,
2338  				       &tid_map_files_dentry_operations);
2339  }
2340  
2341  static struct dentry *proc_map_files_lookup(struct inode *dir,
2342  		struct dentry *dentry, unsigned int flags)
2343  {
2344  	unsigned long vm_start, vm_end;
2345  	struct vm_area_struct *vma;
2346  	struct task_struct *task;
2347  	struct dentry *result;
2348  	struct mm_struct *mm;
2349  
2350  	result = ERR_PTR(-ENOENT);
2351  	task = get_proc_task(dir);
2352  	if (!task)
2353  		goto out;
2354  
2355  	result = ERR_PTR(-EACCES);
2356  	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
2357  		goto out_put_task;
2358  
2359  	result = ERR_PTR(-ENOENT);
2360  	if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
2361  		goto out_put_task;
2362  
2363  	mm = get_task_mm(task);
2364  	if (!mm)
2365  		goto out_put_task;
2366  
2367  	result = ERR_PTR(-EINTR);
2368  	if (mmap_read_lock_killable(mm))
2369  		goto out_put_mm;
2370  
2371  	result = ERR_PTR(-ENOENT);
2372  	vma = find_exact_vma(mm, vm_start, vm_end);
2373  	if (!vma)
2374  		goto out_no_vma;
2375  
2376  	if (vma->vm_file)
2377  		result = proc_map_files_instantiate(dentry, task,
2378  				(void *)(unsigned long)vma->vm_file->f_mode);
2379  
2380  out_no_vma:
2381  	mmap_read_unlock(mm);
2382  out_put_mm:
2383  	mmput(mm);
2384  out_put_task:
2385  	put_task_struct(task);
2386  out:
2387  	return result;
2388  }
2389  
2390  static const struct inode_operations proc_map_files_inode_operations = {
2391  	.lookup		= proc_map_files_lookup,
2392  	.permission	= proc_fd_permission,
2393  	.setattr	= proc_setattr,
2394  };
2395  
2396  static int
2397  proc_map_files_readdir(struct file *file, struct dir_context *ctx)
2398  {
2399  	struct vm_area_struct *vma;
2400  	struct task_struct *task;
2401  	struct mm_struct *mm;
2402  	unsigned long nr_files, pos, i;
2403  	GENRADIX(struct map_files_info) fa;
2404  	struct map_files_info *p;
2405  	int ret;
2406  	struct vma_iterator vmi;
2407  
2408  	genradix_init(&fa);
2409  
2410  	ret = -ENOENT;
2411  	task = get_proc_task(file_inode(file));
2412  	if (!task)
2413  		goto out;
2414  
2415  	ret = -EACCES;
2416  	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
2417  		goto out_put_task;
2418  
2419  	ret = 0;
2420  	if (!dir_emit_dots(file, ctx))
2421  		goto out_put_task;
2422  
2423  	mm = get_task_mm(task);
2424  	if (!mm)
2425  		goto out_put_task;
2426  
2427  	ret = mmap_read_lock_killable(mm);
2428  	if (ret) {
2429  		mmput(mm);
2430  		goto out_put_task;
2431  	}
2432  
2433  	nr_files = 0;
2434  
2435  	/*
2436  	 * We need two passes here:
2437  	 *
2438  	 *  1) Collect vmas of mapped files with mmap_lock taken
2439  	 *  2) Release mmap_lock and instantiate entries
2440  	 *
2441  	 * otherwise we get lockdep complained, since filldir()
2442  	 * routine might require mmap_lock taken in might_fault().
2443  	 */
2444  
2445  	pos = 2;
2446  	vma_iter_init(&vmi, mm, 0);
2447  	for_each_vma(vmi, vma) {
2448  		if (!vma->vm_file)
2449  			continue;
2450  		if (++pos <= ctx->pos)
2451  			continue;
2452  
2453  		p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
2454  		if (!p) {
2455  			ret = -ENOMEM;
2456  			mmap_read_unlock(mm);
2457  			mmput(mm);
2458  			goto out_put_task;
2459  		}
2460  
2461  		p->start = vma->vm_start;
2462  		p->end = vma->vm_end;
2463  		p->mode = vma->vm_file->f_mode;
2464  	}
2465  	mmap_read_unlock(mm);
2466  	mmput(mm);
2467  
2468  	for (i = 0; i < nr_files; i++) {
2469  		char buf[4 * sizeof(long) + 2];	/* max: %lx-%lx\0 */
2470  		unsigned int len;
2471  
2472  		p = genradix_ptr(&fa, i);
2473  		len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
2474  		if (!proc_fill_cache(file, ctx,
2475  				      buf, len,
2476  				      proc_map_files_instantiate,
2477  				      task,
2478  				      (void *)(unsigned long)p->mode))
2479  			break;
2480  		ctx->pos++;
2481  	}
2482  
2483  out_put_task:
2484  	put_task_struct(task);
2485  out:
2486  	genradix_free(&fa);
2487  	return ret;
2488  }
2489  
2490  static const struct file_operations proc_map_files_operations = {
2491  	.read		= generic_read_dir,
2492  	.iterate_shared	= proc_map_files_readdir,
2493  	.llseek		= generic_file_llseek,
2494  };
2495  
2496  #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
2497  struct timers_private {
2498  	struct pid *pid;
2499  	struct task_struct *task;
2500  	struct sighand_struct *sighand;
2501  	struct pid_namespace *ns;
2502  	unsigned long flags;
2503  };
2504  
2505  static void *timers_start(struct seq_file *m, loff_t *pos)
2506  {
2507  	struct timers_private *tp = m->private;
2508  
2509  	tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
2510  	if (!tp->task)
2511  		return ERR_PTR(-ESRCH);
2512  
2513  	tp->sighand = lock_task_sighand(tp->task, &tp->flags);
2514  	if (!tp->sighand)
2515  		return ERR_PTR(-ESRCH);
2516  
2517  	return seq_hlist_start(&tp->task->signal->posix_timers, *pos);
2518  }
2519  
2520  static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
2521  {
2522  	struct timers_private *tp = m->private;
2523  	return seq_hlist_next(v, &tp->task->signal->posix_timers, pos);
2524  }
2525  
2526  static void timers_stop(struct seq_file *m, void *v)
2527  {
2528  	struct timers_private *tp = m->private;
2529  
2530  	if (tp->sighand) {
2531  		unlock_task_sighand(tp->task, &tp->flags);
2532  		tp->sighand = NULL;
2533  	}
2534  
2535  	if (tp->task) {
2536  		put_task_struct(tp->task);
2537  		tp->task = NULL;
2538  	}
2539  }
2540  
2541  static int show_timer(struct seq_file *m, void *v)
2542  {
2543  	struct k_itimer *timer;
2544  	struct timers_private *tp = m->private;
2545  	int notify;
2546  	static const char * const nstr[] = {
2547  		[SIGEV_SIGNAL] = "signal",
2548  		[SIGEV_NONE] = "none",
2549  		[SIGEV_THREAD] = "thread",
2550  	};
2551  
2552  	timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list);
2553  	notify = timer->it_sigev_notify;
2554  
2555  	seq_printf(m, "ID: %d\n", timer->it_id);
2556  	seq_printf(m, "signal: %d/%px\n",
2557  		   timer->sigq.info.si_signo,
2558  		   timer->sigq.info.si_value.sival_ptr);
2559  	seq_printf(m, "notify: %s/%s.%d\n",
2560  		   nstr[notify & ~SIGEV_THREAD_ID],
2561  		   (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
2562  		   pid_nr_ns(timer->it_pid, tp->ns));
2563  	seq_printf(m, "ClockID: %d\n", timer->it_clock);
2564  
2565  	return 0;
2566  }
2567  
2568  static const struct seq_operations proc_timers_seq_ops = {
2569  	.start	= timers_start,
2570  	.next	= timers_next,
2571  	.stop	= timers_stop,
2572  	.show	= show_timer,
2573  };
2574  
2575  static int proc_timers_open(struct inode *inode, struct file *file)
2576  {
2577  	struct timers_private *tp;
2578  
2579  	tp = __seq_open_private(file, &proc_timers_seq_ops,
2580  			sizeof(struct timers_private));
2581  	if (!tp)
2582  		return -ENOMEM;
2583  
2584  	tp->pid = proc_pid(inode);
2585  	tp->ns = proc_pid_ns(inode->i_sb);
2586  	return 0;
2587  }
2588  
2589  static const struct file_operations proc_timers_operations = {
2590  	.open		= proc_timers_open,
2591  	.read		= seq_read,
2592  	.llseek		= seq_lseek,
2593  	.release	= seq_release_private,
2594  };
2595  #endif
2596  
2597  static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
2598  					size_t count, loff_t *offset)
2599  {
2600  	struct inode *inode = file_inode(file);
2601  	struct task_struct *p;
2602  	u64 slack_ns;
2603  	int err;
2604  
2605  	err = kstrtoull_from_user(buf, count, 10, &slack_ns);
2606  	if (err < 0)
2607  		return err;
2608  
2609  	p = get_proc_task(inode);
2610  	if (!p)
2611  		return -ESRCH;
2612  
2613  	if (p != current) {
2614  		rcu_read_lock();
2615  		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
2616  			rcu_read_unlock();
2617  			count = -EPERM;
2618  			goto out;
2619  		}
2620  		rcu_read_unlock();
2621  
2622  		err = security_task_setscheduler(p);
2623  		if (err) {
2624  			count = err;
2625  			goto out;
2626  		}
2627  	}
2628  
2629  	task_lock(p);
2630  	if (rt_or_dl_task_policy(p))
2631  		slack_ns = 0;
2632  	else if (slack_ns == 0)
2633  		slack_ns = p->default_timer_slack_ns;
2634  	p->timer_slack_ns = slack_ns;
2635  	task_unlock(p);
2636  
2637  out:
2638  	put_task_struct(p);
2639  
2640  	return count;
2641  }
2642  
2643  static int timerslack_ns_show(struct seq_file *m, void *v)
2644  {
2645  	struct inode *inode = m->private;
2646  	struct task_struct *p;
2647  	int err = 0;
2648  
2649  	p = get_proc_task(inode);
2650  	if (!p)
2651  		return -ESRCH;
2652  
2653  	if (p != current) {
2654  		rcu_read_lock();
2655  		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
2656  			rcu_read_unlock();
2657  			err = -EPERM;
2658  			goto out;
2659  		}
2660  		rcu_read_unlock();
2661  
2662  		err = security_task_getscheduler(p);
2663  		if (err)
2664  			goto out;
2665  	}
2666  
2667  	task_lock(p);
2668  	seq_printf(m, "%llu\n", p->timer_slack_ns);
2669  	task_unlock(p);
2670  
2671  out:
2672  	put_task_struct(p);
2673  
2674  	return err;
2675  }
2676  
2677  static int timerslack_ns_open(struct inode *inode, struct file *filp)
2678  {
2679  	return single_open(filp, timerslack_ns_show, inode);
2680  }
2681  
2682  static const struct file_operations proc_pid_set_timerslack_ns_operations = {
2683  	.open		= timerslack_ns_open,
2684  	.read		= seq_read,
2685  	.write		= timerslack_ns_write,
2686  	.llseek		= seq_lseek,
2687  	.release	= single_release,
2688  };
2689  
2690  static struct dentry *proc_pident_instantiate(struct dentry *dentry,
2691  	struct task_struct *task, const void *ptr)
2692  {
2693  	const struct pid_entry *p = ptr;
2694  	struct inode *inode;
2695  	struct proc_inode *ei;
2696  
2697  	inode = proc_pid_make_inode(dentry->d_sb, task, p->mode);
2698  	if (!inode)
2699  		return ERR_PTR(-ENOENT);
2700  
2701  	ei = PROC_I(inode);
2702  	if (S_ISDIR(inode->i_mode))
2703  		set_nlink(inode, 2);	/* Use getattr to fix if necessary */
2704  	if (p->iop)
2705  		inode->i_op = p->iop;
2706  	if (p->fop)
2707  		inode->i_fop = p->fop;
2708  	ei->op = p->op;
2709  	pid_update_inode(task, inode);
2710  	d_set_d_op(dentry, &pid_dentry_operations);
2711  	return d_splice_alias(inode, dentry);
2712  }
2713  
2714  static struct dentry *proc_pident_lookup(struct inode *dir,
2715  					 struct dentry *dentry,
2716  					 const struct pid_entry *p,
2717  					 const struct pid_entry *end)
2718  {
2719  	struct task_struct *task = get_proc_task(dir);
2720  	struct dentry *res = ERR_PTR(-ENOENT);
2721  
2722  	if (!task)
2723  		goto out_no_task;
2724  
2725  	/*
2726  	 * Yes, it does not scale. And it should not. Don't add
2727  	 * new entries into /proc/<tgid>/ without very good reasons.
2728  	 */
2729  	for (; p < end; p++) {
2730  		if (p->len != dentry->d_name.len)
2731  			continue;
2732  		if (!memcmp(dentry->d_name.name, p->name, p->len)) {
2733  			res = proc_pident_instantiate(dentry, task, p);
2734  			break;
2735  		}
2736  	}
2737  	put_task_struct(task);
2738  out_no_task:
2739  	return res;
2740  }
2741  
2742  static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
2743  		const struct pid_entry *ents, unsigned int nents)
2744  {
2745  	struct task_struct *task = get_proc_task(file_inode(file));
2746  	const struct pid_entry *p;
2747  
2748  	if (!task)
2749  		return -ENOENT;
2750  
2751  	if (!dir_emit_dots(file, ctx))
2752  		goto out;
2753  
2754  	if (ctx->pos >= nents + 2)
2755  		goto out;
2756  
2757  	for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
2758  		if (!proc_fill_cache(file, ctx, p->name, p->len,
2759  				proc_pident_instantiate, task, p))
2760  			break;
2761  		ctx->pos++;
2762  	}
2763  out:
2764  	put_task_struct(task);
2765  	return 0;
2766  }
2767  
2768  #ifdef CONFIG_SECURITY
2769  static int proc_pid_attr_open(struct inode *inode, struct file *file)
2770  {
2771  	file->private_data = NULL;
2772  	__mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
2773  	return 0;
2774  }
2775  
2776  static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2777  				  size_t count, loff_t *ppos)
2778  {
2779  	struct inode * inode = file_inode(file);
2780  	char *p = NULL;
2781  	ssize_t length;
2782  	struct task_struct *task = get_proc_task(inode);
2783  
2784  	if (!task)
2785  		return -ESRCH;
2786  
2787  	length = security_getprocattr(task, PROC_I(inode)->op.lsmid,
2788  				      file->f_path.dentry->d_name.name,
2789  				      &p);
2790  	put_task_struct(task);
2791  	if (length > 0)
2792  		length = simple_read_from_buffer(buf, count, ppos, p, length);
2793  	kfree(p);
2794  	return length;
2795  }
2796  
2797  static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2798  				   size_t count, loff_t *ppos)
2799  {
2800  	struct inode * inode = file_inode(file);
2801  	struct task_struct *task;
2802  	void *page;
2803  	int rv;
2804  
2805  	/* A task may only write when it was the opener. */
2806  	if (file->private_data != current->mm)
2807  		return -EPERM;
2808  
2809  	rcu_read_lock();
2810  	task = pid_task(proc_pid(inode), PIDTYPE_PID);
2811  	if (!task) {
2812  		rcu_read_unlock();
2813  		return -ESRCH;
2814  	}
2815  	/* A task may only write its own attributes. */
2816  	if (current != task) {
2817  		rcu_read_unlock();
2818  		return -EACCES;
2819  	}
2820  	/* Prevent changes to overridden credentials. */
2821  	if (current_cred() != current_real_cred()) {
2822  		rcu_read_unlock();
2823  		return -EBUSY;
2824  	}
2825  	rcu_read_unlock();
2826  
2827  	if (count > PAGE_SIZE)
2828  		count = PAGE_SIZE;
2829  
2830  	/* No partial writes. */
2831  	if (*ppos != 0)
2832  		return -EINVAL;
2833  
2834  	page = memdup_user(buf, count);
2835  	if (IS_ERR(page)) {
2836  		rv = PTR_ERR(page);
2837  		goto out;
2838  	}
2839  
2840  	/* Guard against adverse ptrace interaction */
2841  	rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
2842  	if (rv < 0)
2843  		goto out_free;
2844  
2845  	rv = security_setprocattr(PROC_I(inode)->op.lsmid,
2846  				  file->f_path.dentry->d_name.name, page,
2847  				  count);
2848  	mutex_unlock(&current->signal->cred_guard_mutex);
2849  out_free:
2850  	kfree(page);
2851  out:
2852  	return rv;
2853  }
2854  
2855  static const struct file_operations proc_pid_attr_operations = {
2856  	.open		= proc_pid_attr_open,
2857  	.read		= proc_pid_attr_read,
2858  	.write		= proc_pid_attr_write,
2859  	.llseek		= generic_file_llseek,
2860  	.release	= mem_release,
2861  };
2862  
2863  #define LSM_DIR_OPS(LSM) \
2864  static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
2865  			     struct dir_context *ctx) \
2866  { \
2867  	return proc_pident_readdir(filp, ctx, \
2868  				   LSM##_attr_dir_stuff, \
2869  				   ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2870  } \
2871  \
2872  static const struct file_operations proc_##LSM##_attr_dir_ops = { \
2873  	.read		= generic_read_dir, \
2874  	.iterate_shared	= proc_##LSM##_attr_dir_iterate, \
2875  	.llseek		= default_llseek, \
2876  }; \
2877  \
2878  static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
2879  				struct dentry *dentry, unsigned int flags) \
2880  { \
2881  	return proc_pident_lookup(dir, dentry, \
2882  				  LSM##_attr_dir_stuff, \
2883  				  LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2884  } \
2885  \
2886  static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
2887  	.lookup		= proc_##LSM##_attr_dir_lookup, \
2888  	.getattr	= pid_getattr, \
2889  	.setattr	= proc_setattr, \
2890  }
2891  
2892  #ifdef CONFIG_SECURITY_SMACK
2893  static const struct pid_entry smack_attr_dir_stuff[] = {
2894  	ATTR(LSM_ID_SMACK, "current",	0666),
2895  };
2896  LSM_DIR_OPS(smack);
2897  #endif
2898  
2899  #ifdef CONFIG_SECURITY_APPARMOR
2900  static const struct pid_entry apparmor_attr_dir_stuff[] = {
2901  	ATTR(LSM_ID_APPARMOR, "current",	0666),
2902  	ATTR(LSM_ID_APPARMOR, "prev",		0444),
2903  	ATTR(LSM_ID_APPARMOR, "exec",		0666),
2904  };
2905  LSM_DIR_OPS(apparmor);
2906  #endif
2907  
2908  static const struct pid_entry attr_dir_stuff[] = {
2909  	ATTR(LSM_ID_UNDEF, "current",	0666),
2910  	ATTR(LSM_ID_UNDEF, "prev",		0444),
2911  	ATTR(LSM_ID_UNDEF, "exec",		0666),
2912  	ATTR(LSM_ID_UNDEF, "fscreate",	0666),
2913  	ATTR(LSM_ID_UNDEF, "keycreate",	0666),
2914  	ATTR(LSM_ID_UNDEF, "sockcreate",	0666),
2915  #ifdef CONFIG_SECURITY_SMACK
2916  	DIR("smack",			0555,
2917  	    proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
2918  #endif
2919  #ifdef CONFIG_SECURITY_APPARMOR
2920  	DIR("apparmor",			0555,
2921  	    proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
2922  #endif
2923  };
2924  
2925  static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
2926  {
2927  	return proc_pident_readdir(file, ctx,
2928  				   attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2929  }
2930  
2931  static const struct file_operations proc_attr_dir_operations = {
2932  	.read		= generic_read_dir,
2933  	.iterate_shared	= proc_attr_dir_readdir,
2934  	.llseek		= generic_file_llseek,
2935  };
2936  
2937  static struct dentry *proc_attr_dir_lookup(struct inode *dir,
2938  				struct dentry *dentry, unsigned int flags)
2939  {
2940  	return proc_pident_lookup(dir, dentry,
2941  				  attr_dir_stuff,
2942  				  attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff));
2943  }
2944  
2945  static const struct inode_operations proc_attr_dir_inode_operations = {
2946  	.lookup		= proc_attr_dir_lookup,
2947  	.getattr	= pid_getattr,
2948  	.setattr	= proc_setattr,
2949  };
2950  
2951  #endif
2952  
2953  #ifdef CONFIG_ELF_CORE
2954  static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2955  					 size_t count, loff_t *ppos)
2956  {
2957  	struct task_struct *task = get_proc_task(file_inode(file));
2958  	struct mm_struct *mm;
2959  	char buffer[PROC_NUMBUF];
2960  	size_t len;
2961  	int ret;
2962  
2963  	if (!task)
2964  		return -ESRCH;
2965  
2966  	ret = 0;
2967  	mm = get_task_mm(task);
2968  	if (mm) {
2969  		len = snprintf(buffer, sizeof(buffer), "%08lx\n",
2970  			       ((mm->flags & MMF_DUMP_FILTER_MASK) >>
2971  				MMF_DUMP_FILTER_SHIFT));
2972  		mmput(mm);
2973  		ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
2974  	}
2975  
2976  	put_task_struct(task);
2977  
2978  	return ret;
2979  }
2980  
2981  static ssize_t proc_coredump_filter_write(struct file *file,
2982  					  const char __user *buf,
2983  					  size_t count,
2984  					  loff_t *ppos)
2985  {
2986  	struct task_struct *task;
2987  	struct mm_struct *mm;
2988  	unsigned int val;
2989  	int ret;
2990  	int i;
2991  	unsigned long mask;
2992  
2993  	ret = kstrtouint_from_user(buf, count, 0, &val);
2994  	if (ret < 0)
2995  		return ret;
2996  
2997  	ret = -ESRCH;
2998  	task = get_proc_task(file_inode(file));
2999  	if (!task)
3000  		goto out_no_task;
3001  
3002  	mm = get_task_mm(task);
3003  	if (!mm)
3004  		goto out_no_mm;
3005  	ret = 0;
3006  
3007  	for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
3008  		if (val & mask)
3009  			set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
3010  		else
3011  			clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
3012  	}
3013  
3014  	mmput(mm);
3015   out_no_mm:
3016  	put_task_struct(task);
3017   out_no_task:
3018  	if (ret < 0)
3019  		return ret;
3020  	return count;
3021  }
3022  
3023  static const struct file_operations proc_coredump_filter_operations = {
3024  	.read		= proc_coredump_filter_read,
3025  	.write		= proc_coredump_filter_write,
3026  	.llseek		= generic_file_llseek,
3027  };
3028  #endif
3029  
3030  #ifdef CONFIG_TASK_IO_ACCOUNTING
3031  static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
3032  {
3033  	struct task_io_accounting acct;
3034  	int result;
3035  
3036  	result = down_read_killable(&task->signal->exec_update_lock);
3037  	if (result)
3038  		return result;
3039  
3040  	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
3041  		result = -EACCES;
3042  		goto out_unlock;
3043  	}
3044  
3045  	if (whole) {
3046  		struct signal_struct *sig = task->signal;
3047  		struct task_struct *t;
3048  		unsigned int seq = 1;
3049  		unsigned long flags;
3050  
3051  		rcu_read_lock();
3052  		do {
3053  			seq++; /* 2 on the 1st/lockless path, otherwise odd */
3054  			flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
3055  
3056  			acct = sig->ioac;
3057  			__for_each_thread(sig, t)
3058  				task_io_accounting_add(&acct, &t->ioac);
3059  
3060  		} while (need_seqretry(&sig->stats_lock, seq));
3061  		done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
3062  		rcu_read_unlock();
3063  	} else {
3064  		acct = task->ioac;
3065  	}
3066  
3067  	seq_printf(m,
3068  		   "rchar: %llu\n"
3069  		   "wchar: %llu\n"
3070  		   "syscr: %llu\n"
3071  		   "syscw: %llu\n"
3072  		   "read_bytes: %llu\n"
3073  		   "write_bytes: %llu\n"
3074  		   "cancelled_write_bytes: %llu\n",
3075  		   (unsigned long long)acct.rchar,
3076  		   (unsigned long long)acct.wchar,
3077  		   (unsigned long long)acct.syscr,
3078  		   (unsigned long long)acct.syscw,
3079  		   (unsigned long long)acct.read_bytes,
3080  		   (unsigned long long)acct.write_bytes,
3081  		   (unsigned long long)acct.cancelled_write_bytes);
3082  	result = 0;
3083  
3084  out_unlock:
3085  	up_read(&task->signal->exec_update_lock);
3086  	return result;
3087  }
3088  
3089  static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
3090  				  struct pid *pid, struct task_struct *task)
3091  {
3092  	return do_io_accounting(task, m, 0);
3093  }
3094  
3095  static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
3096  				   struct pid *pid, struct task_struct *task)
3097  {
3098  	return do_io_accounting(task, m, 1);
3099  }
3100  #endif /* CONFIG_TASK_IO_ACCOUNTING */
3101  
3102  #ifdef CONFIG_USER_NS
3103  static int proc_id_map_open(struct inode *inode, struct file *file,
3104  	const struct seq_operations *seq_ops)
3105  {
3106  	struct user_namespace *ns = NULL;
3107  	struct task_struct *task;
3108  	struct seq_file *seq;
3109  	int ret = -EINVAL;
3110  
3111  	task = get_proc_task(inode);
3112  	if (task) {
3113  		rcu_read_lock();
3114  		ns = get_user_ns(task_cred_xxx(task, user_ns));
3115  		rcu_read_unlock();
3116  		put_task_struct(task);
3117  	}
3118  	if (!ns)
3119  		goto err;
3120  
3121  	ret = seq_open(file, seq_ops);
3122  	if (ret)
3123  		goto err_put_ns;
3124  
3125  	seq = file->private_data;
3126  	seq->private = ns;
3127  
3128  	return 0;
3129  err_put_ns:
3130  	put_user_ns(ns);
3131  err:
3132  	return ret;
3133  }
3134  
3135  static int proc_id_map_release(struct inode *inode, struct file *file)
3136  {
3137  	struct seq_file *seq = file->private_data;
3138  	struct user_namespace *ns = seq->private;
3139  	put_user_ns(ns);
3140  	return seq_release(inode, file);
3141  }
3142  
3143  static int proc_uid_map_open(struct inode *inode, struct file *file)
3144  {
3145  	return proc_id_map_open(inode, file, &proc_uid_seq_operations);
3146  }
3147  
3148  static int proc_gid_map_open(struct inode *inode, struct file *file)
3149  {
3150  	return proc_id_map_open(inode, file, &proc_gid_seq_operations);
3151  }
3152  
3153  static int proc_projid_map_open(struct inode *inode, struct file *file)
3154  {
3155  	return proc_id_map_open(inode, file, &proc_projid_seq_operations);
3156  }
3157  
3158  static const struct file_operations proc_uid_map_operations = {
3159  	.open		= proc_uid_map_open,
3160  	.write		= proc_uid_map_write,
3161  	.read		= seq_read,
3162  	.llseek		= seq_lseek,
3163  	.release	= proc_id_map_release,
3164  };
3165  
3166  static const struct file_operations proc_gid_map_operations = {
3167  	.open		= proc_gid_map_open,
3168  	.write		= proc_gid_map_write,
3169  	.read		= seq_read,
3170  	.llseek		= seq_lseek,
3171  	.release	= proc_id_map_release,
3172  };
3173  
3174  static const struct file_operations proc_projid_map_operations = {
3175  	.open		= proc_projid_map_open,
3176  	.write		= proc_projid_map_write,
3177  	.read		= seq_read,
3178  	.llseek		= seq_lseek,
3179  	.release	= proc_id_map_release,
3180  };
3181  
3182  static int proc_setgroups_open(struct inode *inode, struct file *file)
3183  {
3184  	struct user_namespace *ns = NULL;
3185  	struct task_struct *task;
3186  	int ret;
3187  
3188  	ret = -ESRCH;
3189  	task = get_proc_task(inode);
3190  	if (task) {
3191  		rcu_read_lock();
3192  		ns = get_user_ns(task_cred_xxx(task, user_ns));
3193  		rcu_read_unlock();
3194  		put_task_struct(task);
3195  	}
3196  	if (!ns)
3197  		goto err;
3198  
3199  	if (file->f_mode & FMODE_WRITE) {
3200  		ret = -EACCES;
3201  		if (!ns_capable(ns, CAP_SYS_ADMIN))
3202  			goto err_put_ns;
3203  	}
3204  
3205  	ret = single_open(file, &proc_setgroups_show, ns);
3206  	if (ret)
3207  		goto err_put_ns;
3208  
3209  	return 0;
3210  err_put_ns:
3211  	put_user_ns(ns);
3212  err:
3213  	return ret;
3214  }
3215  
3216  static int proc_setgroups_release(struct inode *inode, struct file *file)
3217  {
3218  	struct seq_file *seq = file->private_data;
3219  	struct user_namespace *ns = seq->private;
3220  	int ret = single_release(inode, file);
3221  	put_user_ns(ns);
3222  	return ret;
3223  }
3224  
3225  static const struct file_operations proc_setgroups_operations = {
3226  	.open		= proc_setgroups_open,
3227  	.write		= proc_setgroups_write,
3228  	.read		= seq_read,
3229  	.llseek		= seq_lseek,
3230  	.release	= proc_setgroups_release,
3231  };
3232  #endif /* CONFIG_USER_NS */
3233  
3234  static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
3235  				struct pid *pid, struct task_struct *task)
3236  {
3237  	int err = lock_trace(task);
3238  	if (!err) {
3239  		seq_printf(m, "%08x\n", task->personality);
3240  		unlock_trace(task);
3241  	}
3242  	return err;
3243  }
3244  
3245  #ifdef CONFIG_LIVEPATCH
3246  static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
3247  				struct pid *pid, struct task_struct *task)
3248  {
3249  	seq_printf(m, "%d\n", task->patch_state);
3250  	return 0;
3251  }
3252  #endif /* CONFIG_LIVEPATCH */
3253  
3254  #ifdef CONFIG_KSM
3255  static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *ns,
3256  				struct pid *pid, struct task_struct *task)
3257  {
3258  	struct mm_struct *mm;
3259  
3260  	mm = get_task_mm(task);
3261  	if (mm) {
3262  		seq_printf(m, "%lu\n", mm->ksm_merging_pages);
3263  		mmput(mm);
3264  	}
3265  
3266  	return 0;
3267  }
3268  static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns,
3269  				struct pid *pid, struct task_struct *task)
3270  {
3271  	struct mm_struct *mm;
3272  
3273  	mm = get_task_mm(task);
3274  	if (mm) {
3275  		seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items);
3276  		seq_printf(m, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm));
3277  		seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages);
3278  		seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm));
3279  		mmput(mm);
3280  	}
3281  
3282  	return 0;
3283  }
3284  #endif /* CONFIG_KSM */
3285  
3286  #ifdef CONFIG_STACKLEAK_METRICS
3287  static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
3288  				struct pid *pid, struct task_struct *task)
3289  {
3290  	unsigned long prev_depth = THREAD_SIZE -
3291  				(task->prev_lowest_stack & (THREAD_SIZE - 1));
3292  	unsigned long depth = THREAD_SIZE -
3293  				(task->lowest_stack & (THREAD_SIZE - 1));
3294  
3295  	seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
3296  							prev_depth, depth);
3297  	return 0;
3298  }
3299  #endif /* CONFIG_STACKLEAK_METRICS */
3300  
3301  /*
3302   * Thread groups
3303   */
3304  static const struct file_operations proc_task_operations;
3305  static const struct inode_operations proc_task_inode_operations;
3306  
3307  static const struct pid_entry tgid_base_stuff[] = {
3308  	DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
3309  	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3310  	DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
3311  	DIR("fdinfo",     S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3312  	DIR("ns",	  S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3313  #ifdef CONFIG_NET
3314  	DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
3315  #endif
3316  	REG("environ",    S_IRUSR, proc_environ_operations),
3317  	REG("auxv",       S_IRUSR, proc_auxv_operations),
3318  	ONE("status",     S_IRUGO, proc_pid_status),
3319  	ONE("personality", S_IRUSR, proc_pid_personality),
3320  	ONE("limits",	  S_IRUGO, proc_pid_limits),
3321  #ifdef CONFIG_SCHED_DEBUG
3322  	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3323  #endif
3324  #ifdef CONFIG_SCHED_AUTOGROUP
3325  	REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
3326  #endif
3327  #ifdef CONFIG_TIME_NS
3328  	REG("timens_offsets",  S_IRUGO|S_IWUSR, proc_timens_offsets_operations),
3329  #endif
3330  	REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3331  #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3332  	ONE("syscall",    S_IRUSR, proc_pid_syscall),
3333  #endif
3334  	REG("cmdline",    S_IRUGO, proc_pid_cmdline_ops),
3335  	ONE("stat",       S_IRUGO, proc_tgid_stat),
3336  	ONE("statm",      S_IRUGO, proc_pid_statm),
3337  	REG("maps",       S_IRUGO, proc_pid_maps_operations),
3338  #ifdef CONFIG_NUMA
3339  	REG("numa_maps",  S_IRUGO, proc_pid_numa_maps_operations),
3340  #endif
3341  	REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
3342  	LNK("cwd",        proc_cwd_link),
3343  	LNK("root",       proc_root_link),
3344  	LNK("exe",        proc_exe_link),
3345  	REG("mounts",     S_IRUGO, proc_mounts_operations),
3346  	REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
3347  	REG("mountstats", S_IRUSR, proc_mountstats_operations),
3348  #ifdef CONFIG_PROC_PAGE_MONITOR
3349  	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3350  	REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
3351  	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
3352  	REG("pagemap",    S_IRUSR, proc_pagemap_operations),
3353  #endif
3354  #ifdef CONFIG_SECURITY
3355  	DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3356  #endif
3357  #ifdef CONFIG_KALLSYMS
3358  	ONE("wchan",      S_IRUGO, proc_pid_wchan),
3359  #endif
3360  #ifdef CONFIG_STACKTRACE
3361  	ONE("stack",      S_IRUSR, proc_pid_stack),
3362  #endif
3363  #ifdef CONFIG_SCHED_INFO
3364  	ONE("schedstat",  S_IRUGO, proc_pid_schedstat),
3365  #endif
3366  #ifdef CONFIG_LATENCYTOP
3367  	REG("latency",  S_IRUGO, proc_lstats_operations),
3368  #endif
3369  #ifdef CONFIG_PROC_PID_CPUSET
3370  	ONE("cpuset",     S_IRUGO, proc_cpuset_show),
3371  #endif
3372  #ifdef CONFIG_CGROUPS
3373  	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
3374  #endif
3375  #ifdef CONFIG_PROC_CPU_RESCTRL
3376  	ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
3377  #endif
3378  	ONE("oom_score",  S_IRUGO, proc_oom_score),
3379  	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
3380  	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3381  #ifdef CONFIG_AUDIT
3382  	REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
3383  	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
3384  #endif
3385  #ifdef CONFIG_FAULT_INJECTION
3386  	REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
3387  	REG("fail-nth", 0644, proc_fail_nth_operations),
3388  #endif
3389  #ifdef CONFIG_ELF_CORE
3390  	REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
3391  #endif
3392  #ifdef CONFIG_TASK_IO_ACCOUNTING
3393  	ONE("io",	S_IRUSR, proc_tgid_io_accounting),
3394  #endif
3395  #ifdef CONFIG_USER_NS
3396  	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
3397  	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
3398  	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
3399  	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
3400  #endif
3401  #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
3402  	REG("timers",	  S_IRUGO, proc_timers_operations),
3403  #endif
3404  	REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
3405  #ifdef CONFIG_LIVEPATCH
3406  	ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
3407  #endif
3408  #ifdef CONFIG_STACKLEAK_METRICS
3409  	ONE("stack_depth", S_IRUGO, proc_stack_depth),
3410  #endif
3411  #ifdef CONFIG_PROC_PID_ARCH_STATUS
3412  	ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3413  #endif
3414  #ifdef CONFIG_SECCOMP_CACHE_DEBUG
3415  	ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
3416  #endif
3417  #ifdef CONFIG_KSM
3418  	ONE("ksm_merging_pages",  S_IRUSR, proc_pid_ksm_merging_pages),
3419  	ONE("ksm_stat",  S_IRUSR, proc_pid_ksm_stat),
3420  #endif
3421  };
3422  
3423  static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
3424  {
3425  	return proc_pident_readdir(file, ctx,
3426  				   tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
3427  }
3428  
3429  static const struct file_operations proc_tgid_base_operations = {
3430  	.read		= generic_read_dir,
3431  	.iterate_shared	= proc_tgid_base_readdir,
3432  	.llseek		= generic_file_llseek,
3433  };
3434  
3435  struct pid *tgid_pidfd_to_pid(const struct file *file)
3436  {
3437  	if (file->f_op != &proc_tgid_base_operations)
3438  		return ERR_PTR(-EBADF);
3439  
3440  	return proc_pid(file_inode(file));
3441  }
3442  
3443  static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
3444  {
3445  	return proc_pident_lookup(dir, dentry,
3446  				  tgid_base_stuff,
3447  				  tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff));
3448  }
3449  
3450  static const struct inode_operations proc_tgid_base_inode_operations = {
3451  	.lookup		= proc_tgid_base_lookup,
3452  	.getattr	= pid_getattr,
3453  	.setattr	= proc_setattr,
3454  	.permission	= proc_pid_permission,
3455  };
3456  
3457  /**
3458   * proc_flush_pid -  Remove dcache entries for @pid from the /proc dcache.
3459   * @pid: pid that should be flushed.
3460   *
3461   * This function walks a list of inodes (that belong to any proc
3462   * filesystem) that are attached to the pid and flushes them from
3463   * the dentry cache.
3464   *
3465   * It is safe and reasonable to cache /proc entries for a task until
3466   * that task exits.  After that they just clog up the dcache with
3467   * useless entries, possibly causing useful dcache entries to be
3468   * flushed instead.  This routine is provided to flush those useless
3469   * dcache entries when a process is reaped.
3470   *
3471   * NOTE: This routine is just an optimization so it does not guarantee
3472   *       that no dcache entries will exist after a process is reaped
3473   *       it just makes it very unlikely that any will persist.
3474   */
3475  
3476  void proc_flush_pid(struct pid *pid)
3477  {
3478  	proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
3479  }
3480  
3481  static struct dentry *proc_pid_instantiate(struct dentry * dentry,
3482  				   struct task_struct *task, const void *ptr)
3483  {
3484  	struct inode *inode;
3485  
3486  	inode = proc_pid_make_base_inode(dentry->d_sb, task,
3487  					 S_IFDIR | S_IRUGO | S_IXUGO);
3488  	if (!inode)
3489  		return ERR_PTR(-ENOENT);
3490  
3491  	inode->i_op = &proc_tgid_base_inode_operations;
3492  	inode->i_fop = &proc_tgid_base_operations;
3493  	inode->i_flags|=S_IMMUTABLE;
3494  
3495  	set_nlink(inode, nlink_tgid);
3496  	pid_update_inode(task, inode);
3497  
3498  	d_set_d_op(dentry, &pid_dentry_operations);
3499  	return d_splice_alias(inode, dentry);
3500  }
3501  
3502  struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
3503  {
3504  	struct task_struct *task;
3505  	unsigned tgid;
3506  	struct proc_fs_info *fs_info;
3507  	struct pid_namespace *ns;
3508  	struct dentry *result = ERR_PTR(-ENOENT);
3509  
3510  	tgid = name_to_int(&dentry->d_name);
3511  	if (tgid == ~0U)
3512  		goto out;
3513  
3514  	fs_info = proc_sb_info(dentry->d_sb);
3515  	ns = fs_info->pid_ns;
3516  	rcu_read_lock();
3517  	task = find_task_by_pid_ns(tgid, ns);
3518  	if (task)
3519  		get_task_struct(task);
3520  	rcu_read_unlock();
3521  	if (!task)
3522  		goto out;
3523  
3524  	/* Limit procfs to only ptraceable tasks */
3525  	if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
3526  		if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
3527  			goto out_put_task;
3528  	}
3529  
3530  	result = proc_pid_instantiate(dentry, task, NULL);
3531  out_put_task:
3532  	put_task_struct(task);
3533  out:
3534  	return result;
3535  }
3536  
3537  /*
3538   * Find the first task with tgid >= tgid
3539   *
3540   */
3541  struct tgid_iter {
3542  	unsigned int tgid;
3543  	struct task_struct *task;
3544  };
3545  static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
3546  {
3547  	struct pid *pid;
3548  
3549  	if (iter.task)
3550  		put_task_struct(iter.task);
3551  	rcu_read_lock();
3552  retry:
3553  	iter.task = NULL;
3554  	pid = find_ge_pid(iter.tgid, ns);
3555  	if (pid) {
3556  		iter.tgid = pid_nr_ns(pid, ns);
3557  		iter.task = pid_task(pid, PIDTYPE_TGID);
3558  		if (!iter.task) {
3559  			iter.tgid += 1;
3560  			goto retry;
3561  		}
3562  		get_task_struct(iter.task);
3563  	}
3564  	rcu_read_unlock();
3565  	return iter;
3566  }
3567  
3568  #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
3569  
3570  /* for the /proc/ directory itself, after non-process stuff has been done */
3571  int proc_pid_readdir(struct file *file, struct dir_context *ctx)
3572  {
3573  	struct tgid_iter iter;
3574  	struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
3575  	struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
3576  	loff_t pos = ctx->pos;
3577  
3578  	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
3579  		return 0;
3580  
3581  	if (pos == TGID_OFFSET - 2) {
3582  		struct inode *inode = d_inode(fs_info->proc_self);
3583  		if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
3584  			return 0;
3585  		ctx->pos = pos = pos + 1;
3586  	}
3587  	if (pos == TGID_OFFSET - 1) {
3588  		struct inode *inode = d_inode(fs_info->proc_thread_self);
3589  		if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
3590  			return 0;
3591  		ctx->pos = pos = pos + 1;
3592  	}
3593  	iter.tgid = pos - TGID_OFFSET;
3594  	iter.task = NULL;
3595  	for (iter = next_tgid(ns, iter);
3596  	     iter.task;
3597  	     iter.tgid += 1, iter = next_tgid(ns, iter)) {
3598  		char name[10 + 1];
3599  		unsigned int len;
3600  
3601  		cond_resched();
3602  		if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
3603  			continue;
3604  
3605  		len = snprintf(name, sizeof(name), "%u", iter.tgid);
3606  		ctx->pos = iter.tgid + TGID_OFFSET;
3607  		if (!proc_fill_cache(file, ctx, name, len,
3608  				     proc_pid_instantiate, iter.task, NULL)) {
3609  			put_task_struct(iter.task);
3610  			return 0;
3611  		}
3612  	}
3613  	ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
3614  	return 0;
3615  }
3616  
3617  /*
3618   * proc_tid_comm_permission is a special permission function exclusively
3619   * used for the node /proc/<pid>/task/<tid>/comm.
3620   * It bypasses generic permission checks in the case where a task of the same
3621   * task group attempts to access the node.
3622   * The rationale behind this is that glibc and bionic access this node for
3623   * cross thread naming (pthread_set/getname_np(!self)). However, if
3624   * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
3625   * which locks out the cross thread naming implementation.
3626   * This function makes sure that the node is always accessible for members of
3627   * same thread group.
3628   */
3629  static int proc_tid_comm_permission(struct mnt_idmap *idmap,
3630  				    struct inode *inode, int mask)
3631  {
3632  	bool is_same_tgroup;
3633  	struct task_struct *task;
3634  
3635  	task = get_proc_task(inode);
3636  	if (!task)
3637  		return -ESRCH;
3638  	is_same_tgroup = same_thread_group(current, task);
3639  	put_task_struct(task);
3640  
3641  	if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
3642  		/* This file (/proc/<pid>/task/<tid>/comm) can always be
3643  		 * read or written by the members of the corresponding
3644  		 * thread group.
3645  		 */
3646  		return 0;
3647  	}
3648  
3649  	return generic_permission(&nop_mnt_idmap, inode, mask);
3650  }
3651  
3652  static const struct inode_operations proc_tid_comm_inode_operations = {
3653  		.setattr	= proc_setattr,
3654  		.permission	= proc_tid_comm_permission,
3655  };
3656  
3657  /*
3658   * Tasks
3659   */
3660  static const struct pid_entry tid_base_stuff[] = {
3661  	DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3662  	DIR("fdinfo",    S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3663  	DIR("ns",	 S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3664  #ifdef CONFIG_NET
3665  	DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
3666  #endif
3667  	REG("environ",   S_IRUSR, proc_environ_operations),
3668  	REG("auxv",      S_IRUSR, proc_auxv_operations),
3669  	ONE("status",    S_IRUGO, proc_pid_status),
3670  	ONE("personality", S_IRUSR, proc_pid_personality),
3671  	ONE("limits",	 S_IRUGO, proc_pid_limits),
3672  #ifdef CONFIG_SCHED_DEBUG
3673  	REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3674  #endif
3675  	NOD("comm",      S_IFREG|S_IRUGO|S_IWUSR,
3676  			 &proc_tid_comm_inode_operations,
3677  			 &proc_pid_set_comm_operations, {}),
3678  #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3679  	ONE("syscall",   S_IRUSR, proc_pid_syscall),
3680  #endif
3681  	REG("cmdline",   S_IRUGO, proc_pid_cmdline_ops),
3682  	ONE("stat",      S_IRUGO, proc_tid_stat),
3683  	ONE("statm",     S_IRUGO, proc_pid_statm),
3684  	REG("maps",      S_IRUGO, proc_pid_maps_operations),
3685  #ifdef CONFIG_PROC_CHILDREN
3686  	REG("children",  S_IRUGO, proc_tid_children_operations),
3687  #endif
3688  #ifdef CONFIG_NUMA
3689  	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
3690  #endif
3691  	REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
3692  	LNK("cwd",       proc_cwd_link),
3693  	LNK("root",      proc_root_link),
3694  	LNK("exe",       proc_exe_link),
3695  	REG("mounts",    S_IRUGO, proc_mounts_operations),
3696  	REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
3697  #ifdef CONFIG_PROC_PAGE_MONITOR
3698  	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3699  	REG("smaps",     S_IRUGO, proc_pid_smaps_operations),
3700  	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
3701  	REG("pagemap",    S_IRUSR, proc_pagemap_operations),
3702  #endif
3703  #ifdef CONFIG_SECURITY
3704  	DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3705  #endif
3706  #ifdef CONFIG_KALLSYMS
3707  	ONE("wchan",     S_IRUGO, proc_pid_wchan),
3708  #endif
3709  #ifdef CONFIG_STACKTRACE
3710  	ONE("stack",      S_IRUSR, proc_pid_stack),
3711  #endif
3712  #ifdef CONFIG_SCHED_INFO
3713  	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
3714  #endif
3715  #ifdef CONFIG_LATENCYTOP
3716  	REG("latency",  S_IRUGO, proc_lstats_operations),
3717  #endif
3718  #ifdef CONFIG_PROC_PID_CPUSET
3719  	ONE("cpuset",    S_IRUGO, proc_cpuset_show),
3720  #endif
3721  #ifdef CONFIG_CGROUPS
3722  	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
3723  #endif
3724  #ifdef CONFIG_PROC_CPU_RESCTRL
3725  	ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
3726  #endif
3727  	ONE("oom_score", S_IRUGO, proc_oom_score),
3728  	REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
3729  	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3730  #ifdef CONFIG_AUDIT
3731  	REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
3732  	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
3733  #endif
3734  #ifdef CONFIG_FAULT_INJECTION
3735  	REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
3736  	REG("fail-nth", 0644, proc_fail_nth_operations),
3737  #endif
3738  #ifdef CONFIG_TASK_IO_ACCOUNTING
3739  	ONE("io",	S_IRUSR, proc_tid_io_accounting),
3740  #endif
3741  #ifdef CONFIG_USER_NS
3742  	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
3743  	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
3744  	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
3745  	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
3746  #endif
3747  #ifdef CONFIG_LIVEPATCH
3748  	ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
3749  #endif
3750  #ifdef CONFIG_PROC_PID_ARCH_STATUS
3751  	ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3752  #endif
3753  #ifdef CONFIG_SECCOMP_CACHE_DEBUG
3754  	ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
3755  #endif
3756  #ifdef CONFIG_KSM
3757  	ONE("ksm_merging_pages",  S_IRUSR, proc_pid_ksm_merging_pages),
3758  	ONE("ksm_stat",  S_IRUSR, proc_pid_ksm_stat),
3759  #endif
3760  };
3761  
3762  static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
3763  {
3764  	return proc_pident_readdir(file, ctx,
3765  				   tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3766  }
3767  
3768  static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
3769  {
3770  	return proc_pident_lookup(dir, dentry,
3771  				  tid_base_stuff,
3772  				  tid_base_stuff + ARRAY_SIZE(tid_base_stuff));
3773  }
3774  
3775  static const struct file_operations proc_tid_base_operations = {
3776  	.read		= generic_read_dir,
3777  	.iterate_shared	= proc_tid_base_readdir,
3778  	.llseek		= generic_file_llseek,
3779  };
3780  
3781  static const struct inode_operations proc_tid_base_inode_operations = {
3782  	.lookup		= proc_tid_base_lookup,
3783  	.getattr	= pid_getattr,
3784  	.setattr	= proc_setattr,
3785  };
3786  
3787  static struct dentry *proc_task_instantiate(struct dentry *dentry,
3788  	struct task_struct *task, const void *ptr)
3789  {
3790  	struct inode *inode;
3791  	inode = proc_pid_make_base_inode(dentry->d_sb, task,
3792  					 S_IFDIR | S_IRUGO | S_IXUGO);
3793  	if (!inode)
3794  		return ERR_PTR(-ENOENT);
3795  
3796  	inode->i_op = &proc_tid_base_inode_operations;
3797  	inode->i_fop = &proc_tid_base_operations;
3798  	inode->i_flags |= S_IMMUTABLE;
3799  
3800  	set_nlink(inode, nlink_tid);
3801  	pid_update_inode(task, inode);
3802  
3803  	d_set_d_op(dentry, &pid_dentry_operations);
3804  	return d_splice_alias(inode, dentry);
3805  }
3806  
3807  static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
3808  {
3809  	struct task_struct *task;
3810  	struct task_struct *leader = get_proc_task(dir);
3811  	unsigned tid;
3812  	struct proc_fs_info *fs_info;
3813  	struct pid_namespace *ns;
3814  	struct dentry *result = ERR_PTR(-ENOENT);
3815  
3816  	if (!leader)
3817  		goto out_no_task;
3818  
3819  	tid = name_to_int(&dentry->d_name);
3820  	if (tid == ~0U)
3821  		goto out;
3822  
3823  	fs_info = proc_sb_info(dentry->d_sb);
3824  	ns = fs_info->pid_ns;
3825  	rcu_read_lock();
3826  	task = find_task_by_pid_ns(tid, ns);
3827  	if (task)
3828  		get_task_struct(task);
3829  	rcu_read_unlock();
3830  	if (!task)
3831  		goto out;
3832  	if (!same_thread_group(leader, task))
3833  		goto out_drop_task;
3834  
3835  	result = proc_task_instantiate(dentry, task, NULL);
3836  out_drop_task:
3837  	put_task_struct(task);
3838  out:
3839  	put_task_struct(leader);
3840  out_no_task:
3841  	return result;
3842  }
3843  
3844  /*
3845   * Find the first tid of a thread group to return to user space.
3846   *
3847   * Usually this is just the thread group leader, but if the users
3848   * buffer was too small or there was a seek into the middle of the
3849   * directory we have more work todo.
3850   *
3851   * In the case of a short read we start with find_task_by_pid.
3852   *
3853   * In the case of a seek we start with the leader and walk nr
3854   * threads past it.
3855   */
3856  static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos,
3857  					struct pid_namespace *ns)
3858  {
3859  	struct task_struct *pos, *task;
3860  	unsigned long nr = f_pos;
3861  
3862  	if (nr != f_pos)	/* 32bit overflow? */
3863  		return NULL;
3864  
3865  	rcu_read_lock();
3866  	task = pid_task(pid, PIDTYPE_PID);
3867  	if (!task)
3868  		goto fail;
3869  
3870  	/* Attempt to start with the tid of a thread */
3871  	if (tid && nr) {
3872  		pos = find_task_by_pid_ns(tid, ns);
3873  		if (pos && same_thread_group(pos, task))
3874  			goto found;
3875  	}
3876  
3877  	/* If nr exceeds the number of threads there is nothing todo */
3878  	if (nr >= get_nr_threads(task))
3879  		goto fail;
3880  
3881  	/* If we haven't found our starting place yet start
3882  	 * with the leader and walk nr threads forward.
3883  	 */
3884  	for_each_thread(task, pos) {
3885  		if (!nr--)
3886  			goto found;
3887  	}
3888  fail:
3889  	pos = NULL;
3890  	goto out;
3891  found:
3892  	get_task_struct(pos);
3893  out:
3894  	rcu_read_unlock();
3895  	return pos;
3896  }
3897  
3898  /*
3899   * Find the next thread in the thread list.
3900   * Return NULL if there is an error or no next thread.
3901   *
3902   * The reference to the input task_struct is released.
3903   */
3904  static struct task_struct *next_tid(struct task_struct *start)
3905  {
3906  	struct task_struct *pos = NULL;
3907  	rcu_read_lock();
3908  	if (pid_alive(start)) {
3909  		pos = __next_thread(start);
3910  		if (pos)
3911  			get_task_struct(pos);
3912  	}
3913  	rcu_read_unlock();
3914  	put_task_struct(start);
3915  	return pos;
3916  }
3917  
3918  /* for the /proc/TGID/task/ directories */
3919  static int proc_task_readdir(struct file *file, struct dir_context *ctx)
3920  {
3921  	struct inode *inode = file_inode(file);
3922  	struct task_struct *task;
3923  	struct pid_namespace *ns;
3924  	int tid;
3925  
3926  	if (proc_inode_is_dead(inode))
3927  		return -ENOENT;
3928  
3929  	if (!dir_emit_dots(file, ctx))
3930  		return 0;
3931  
3932  	/* We cache the tgid value that the last readdir call couldn't
3933  	 * return and lseek resets it to 0.
3934  	 */
3935  	ns = proc_pid_ns(inode->i_sb);
3936  	tid = (int)(intptr_t)file->private_data;
3937  	file->private_data = NULL;
3938  	for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
3939  	     task;
3940  	     task = next_tid(task), ctx->pos++) {
3941  		char name[10 + 1];
3942  		unsigned int len;
3943  
3944  		tid = task_pid_nr_ns(task, ns);
3945  		if (!tid)
3946  			continue;	/* The task has just exited. */
3947  		len = snprintf(name, sizeof(name), "%u", tid);
3948  		if (!proc_fill_cache(file, ctx, name, len,
3949  				proc_task_instantiate, task, NULL)) {
3950  			/* returning this tgid failed, save it as the first
3951  			 * pid for the next readir call */
3952  			file->private_data = (void *)(intptr_t)tid;
3953  			put_task_struct(task);
3954  			break;
3955  		}
3956  	}
3957  
3958  	return 0;
3959  }
3960  
3961  static int proc_task_getattr(struct mnt_idmap *idmap,
3962  			     const struct path *path, struct kstat *stat,
3963  			     u32 request_mask, unsigned int query_flags)
3964  {
3965  	struct inode *inode = d_inode(path->dentry);
3966  	struct task_struct *p = get_proc_task(inode);
3967  	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
3968  
3969  	if (p) {
3970  		stat->nlink += get_nr_threads(p);
3971  		put_task_struct(p);
3972  	}
3973  
3974  	return 0;
3975  }
3976  
3977  /*
3978   * proc_task_readdir() set @file->private_data to a positive integer
3979   * value, so casting that to u64 is safe. generic_llseek_cookie() will
3980   * set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is
3981   * here to catch any unexpected change in behavior either in
3982   * proc_task_readdir() or generic_llseek_cookie().
3983   */
3984  static loff_t proc_dir_llseek(struct file *file, loff_t offset, int whence)
3985  {
3986  	u64 cookie = (u64)(intptr_t)file->private_data;
3987  	loff_t off;
3988  
3989  	off = generic_llseek_cookie(file, offset, whence, &cookie);
3990  	WARN_ON_ONCE(cookie > INT_MAX);
3991  	file->private_data = (void *)(intptr_t)cookie; /* serialized by f_pos_lock */
3992  	return off;
3993  }
3994  
3995  static const struct inode_operations proc_task_inode_operations = {
3996  	.lookup		= proc_task_lookup,
3997  	.getattr	= proc_task_getattr,
3998  	.setattr	= proc_setattr,
3999  	.permission	= proc_pid_permission,
4000  };
4001  
4002  static const struct file_operations proc_task_operations = {
4003  	.read		= generic_read_dir,
4004  	.iterate_shared	= proc_task_readdir,
4005  	.llseek		= proc_dir_llseek,
4006  };
4007  
4008  void __init set_proc_pid_nlink(void)
4009  {
4010  	nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
4011  	nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
4012  }
4013