1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/anon_inodes.h> 3 #include <linux/file.h> 4 #include <linux/fs.h> 5 #include <linux/magic.h> 6 #include <linux/mount.h> 7 #include <linux/pid.h> 8 #include <linux/pidfs.h> 9 #include <linux/pid_namespace.h> 10 #include <linux/poll.h> 11 #include <linux/proc_fs.h> 12 #include <linux/proc_ns.h> 13 #include <linux/pseudo_fs.h> 14 #include <linux/seq_file.h> 15 #include <uapi/linux/pidfd.h> 16 17 #include "internal.h" 18 19 static int pidfd_release(struct inode *inode, struct file *file) 20 { 21 #ifndef CONFIG_FS_PID 22 struct pid *pid = file->private_data; 23 24 file->private_data = NULL; 25 put_pid(pid); 26 #endif 27 return 0; 28 } 29 30 #ifdef CONFIG_PROC_FS 31 /** 32 * pidfd_show_fdinfo - print information about a pidfd 33 * @m: proc fdinfo file 34 * @f: file referencing a pidfd 35 * 36 * Pid: 37 * This function will print the pid that a given pidfd refers to in the 38 * pid namespace of the procfs instance. 39 * If the pid namespace of the process is not a descendant of the pid 40 * namespace of the procfs instance 0 will be shown as its pid. This is 41 * similar to calling getppid() on a process whose parent is outside of 42 * its pid namespace. 43 * 44 * NSpid: 45 * If pid namespaces are supported then this function will also print 46 * the pid of a given pidfd refers to for all descendant pid namespaces 47 * starting from the current pid namespace of the instance, i.e. the 48 * Pid field and the first entry in the NSpid field will be identical. 49 * If the pid namespace of the process is not a descendant of the pid 50 * namespace of the procfs instance 0 will be shown as its first NSpid 51 * entry and no others will be shown. 52 * Note that this differs from the Pid and NSpid fields in 53 * /proc/<pid>/status where Pid and NSpid are always shown relative to 54 * the pid namespace of the procfs instance. The difference becomes 55 * obvious when sending around a pidfd between pid namespaces from a 56 * different branch of the tree, i.e. where no ancestral relation is 57 * present between the pid namespaces: 58 * - create two new pid namespaces ns1 and ns2 in the initial pid 59 * namespace (also take care to create new mount namespaces in the 60 * new pid namespace and mount procfs) 61 * - create a process with a pidfd in ns1 62 * - send pidfd from ns1 to ns2 63 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid 64 * have exactly one entry, which is 0 65 */ 66 static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) 67 { 68 struct pid *pid = pidfd_pid(f); 69 struct pid_namespace *ns; 70 pid_t nr = -1; 71 72 if (likely(pid_has_task(pid, PIDTYPE_PID))) { 73 ns = proc_pid_ns(file_inode(m->file)->i_sb); 74 nr = pid_nr_ns(pid, ns); 75 } 76 77 seq_put_decimal_ll(m, "Pid:\t", nr); 78 79 #ifdef CONFIG_PID_NS 80 seq_put_decimal_ll(m, "\nNSpid:\t", nr); 81 if (nr > 0) { 82 int i; 83 84 /* If nr is non-zero it means that 'pid' is valid and that 85 * ns, i.e. the pid namespace associated with the procfs 86 * instance, is in the pid namespace hierarchy of pid. 87 * Start at one below the already printed level. 88 */ 89 for (i = ns->level + 1; i <= pid->level; i++) 90 seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); 91 } 92 #endif 93 seq_putc(m, '\n'); 94 } 95 #endif 96 97 /* 98 * Poll support for process exit notification. 99 */ 100 static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) 101 { 102 struct pid *pid = pidfd_pid(file); 103 bool thread = file->f_flags & PIDFD_THREAD; 104 struct task_struct *task; 105 __poll_t poll_flags = 0; 106 107 poll_wait(file, &pid->wait_pidfd, pts); 108 /* 109 * Depending on PIDFD_THREAD, inform pollers when the thread 110 * or the whole thread-group exits. 111 */ 112 guard(rcu)(); 113 task = pid_task(pid, PIDTYPE_PID); 114 if (!task) 115 poll_flags = EPOLLIN | EPOLLRDNORM | EPOLLHUP; 116 else if (task->exit_state && (thread || thread_group_empty(task))) 117 poll_flags = EPOLLIN | EPOLLRDNORM; 118 119 return poll_flags; 120 } 121 122 static const struct file_operations pidfs_file_operations = { 123 .release = pidfd_release, 124 .poll = pidfd_poll, 125 #ifdef CONFIG_PROC_FS 126 .show_fdinfo = pidfd_show_fdinfo, 127 #endif 128 }; 129 130 struct pid *pidfd_pid(const struct file *file) 131 { 132 if (file->f_op != &pidfs_file_operations) 133 return ERR_PTR(-EBADF); 134 #ifdef CONFIG_FS_PID 135 return file_inode(file)->i_private; 136 #else 137 return file->private_data; 138 #endif 139 } 140 141 #ifdef CONFIG_FS_PID 142 static struct vfsmount *pidfs_mnt __ro_after_init; 143 144 /* 145 * The vfs falls back to simple_setattr() if i_op->setattr() isn't 146 * implemented. Let's reject it completely until we have a clean 147 * permission concept for pidfds. 148 */ 149 static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 150 struct iattr *attr) 151 { 152 return -EOPNOTSUPP; 153 } 154 155 static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, 156 struct kstat *stat, u32 request_mask, 157 unsigned int query_flags) 158 { 159 struct inode *inode = d_inode(path->dentry); 160 161 generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); 162 return 0; 163 } 164 165 static const struct inode_operations pidfs_inode_operations = { 166 .getattr = pidfs_getattr, 167 .setattr = pidfs_setattr, 168 }; 169 170 static void pidfs_evict_inode(struct inode *inode) 171 { 172 struct pid *pid = inode->i_private; 173 174 clear_inode(inode); 175 put_pid(pid); 176 } 177 178 static const struct super_operations pidfs_sops = { 179 .drop_inode = generic_delete_inode, 180 .evict_inode = pidfs_evict_inode, 181 .statfs = simple_statfs, 182 }; 183 184 static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) 185 { 186 return dynamic_dname(buffer, buflen, "pidfd:[%lu]", 187 d_inode(dentry)->i_ino); 188 } 189 190 static const struct dentry_operations pidfs_dentry_operations = { 191 .d_delete = always_delete_dentry, 192 .d_dname = pidfs_dname, 193 .d_prune = stashed_dentry_prune, 194 }; 195 196 static void pidfs_init_inode(struct inode *inode, void *data) 197 { 198 inode->i_private = data; 199 inode->i_flags |= S_PRIVATE; 200 inode->i_mode |= S_IRWXU; 201 inode->i_op = &pidfs_inode_operations; 202 inode->i_fop = &pidfs_file_operations; 203 } 204 205 static void pidfs_put_data(void *data) 206 { 207 struct pid *pid = data; 208 put_pid(pid); 209 } 210 211 static const struct stashed_operations pidfs_stashed_ops = { 212 .init_inode = pidfs_init_inode, 213 .put_data = pidfs_put_data, 214 }; 215 216 static int pidfs_init_fs_context(struct fs_context *fc) 217 { 218 struct pseudo_fs_context *ctx; 219 220 ctx = init_pseudo(fc, PID_FS_MAGIC); 221 if (!ctx) 222 return -ENOMEM; 223 224 ctx->ops = &pidfs_sops; 225 ctx->dops = &pidfs_dentry_operations; 226 fc->s_fs_info = (void *)&pidfs_stashed_ops; 227 return 0; 228 } 229 230 static struct file_system_type pidfs_type = { 231 .name = "pidfs", 232 .init_fs_context = pidfs_init_fs_context, 233 .kill_sb = kill_anon_super, 234 }; 235 236 struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) 237 { 238 239 struct file *pidfd_file; 240 struct path path; 241 int ret; 242 243 /* 244 * Inode numbering for pidfs start at RESERVED_PIDS + 1. 245 * This avoids collisions with the root inode which is 1 246 * for pseudo filesystems. 247 */ 248 ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt, 249 get_pid(pid), &path); 250 if (ret < 0) 251 return ERR_PTR(ret); 252 253 pidfd_file = dentry_open(&path, flags, current_cred()); 254 path_put(&path); 255 return pidfd_file; 256 } 257 258 void __init pidfs_init(void) 259 { 260 pidfs_mnt = kern_mount(&pidfs_type); 261 if (IS_ERR(pidfs_mnt)) 262 panic("Failed to mount pidfs pseudo filesystem"); 263 } 264 265 bool is_pidfs_sb(const struct super_block *sb) 266 { 267 return sb == pidfs_mnt->mnt_sb; 268 } 269 270 #else /* !CONFIG_FS_PID */ 271 272 struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) 273 { 274 struct file *pidfd_file; 275 276 pidfd_file = anon_inode_getfile("[pidfd]", &pidfs_file_operations, pid, 277 flags | O_RDWR); 278 if (IS_ERR(pidfd_file)) 279 return pidfd_file; 280 281 get_pid(pid); 282 return pidfd_file; 283 } 284 285 void __init pidfs_init(void) { } 286 bool is_pidfs_sb(const struct super_block *sb) 287 { 288 return false; 289 } 290 #endif 291