1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/anon_inodes.h> 3 #include <linux/exportfs.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/cgroup.h> 7 #include <linux/magic.h> 8 #include <linux/mount.h> 9 #include <linux/pid.h> 10 #include <linux/pidfs.h> 11 #include <linux/pid_namespace.h> 12 #include <linux/poll.h> 13 #include <linux/proc_fs.h> 14 #include <linux/proc_ns.h> 15 #include <linux/pseudo_fs.h> 16 #include <linux/ptrace.h> 17 #include <linux/seq_file.h> 18 #include <uapi/linux/pidfd.h> 19 #include <linux/ipc_namespace.h> 20 #include <linux/time_namespace.h> 21 #include <linux/utsname.h> 22 #include <net/net_namespace.h> 23 24 #include "internal.h" 25 #include "mount.h" 26 27 static struct rb_root pidfs_ino_tree = RB_ROOT; 28 29 #if BITS_PER_LONG == 32 30 static inline unsigned long pidfs_ino(u64 ino) 31 { 32 return lower_32_bits(ino); 33 } 34 35 /* On 32 bit the generation number are the upper 32 bits. */ 36 static inline u32 pidfs_gen(u64 ino) 37 { 38 return upper_32_bits(ino); 39 } 40 41 #else 42 43 /* On 64 bit simply return ino. */ 44 static inline unsigned long pidfs_ino(u64 ino) 45 { 46 return ino; 47 } 48 49 /* On 64 bit the generation number is 0. */ 50 static inline u32 pidfs_gen(u64 ino) 51 { 52 return 0; 53 } 54 #endif 55 56 static int pidfs_ino_cmp(struct rb_node *a, const struct rb_node *b) 57 { 58 struct pid *pid_a = rb_entry(a, struct pid, pidfs_node); 59 struct pid *pid_b = rb_entry(b, struct pid, pidfs_node); 60 u64 pid_ino_a = pid_a->ino; 61 u64 pid_ino_b = pid_b->ino; 62 63 if (pid_ino_a < pid_ino_b) 64 return -1; 65 if (pid_ino_a > pid_ino_b) 66 return 1; 67 return 0; 68 } 69 70 void pidfs_add_pid(struct pid *pid) 71 { 72 static u64 pidfs_ino_nr = 2; 73 74 /* 75 * On 64 bit nothing special happens. The 64bit number assigned 76 * to struct pid is the inode number. 77 * 78 * On 32 bit the 64 bit number assigned to struct pid is split 79 * into two 32 bit numbers. The lower 32 bits are used as the 80 * inode number and the upper 32 bits are used as the inode 81 * generation number. 82 * 83 * On 32 bit pidfs_ino() will return the lower 32 bit. When 84 * pidfs_ino() returns zero a wrap around happened. When a 85 * wraparound happens the 64 bit number will be incremented by 2 86 * so inode numbering starts at 2 again. 87 * 88 * On 64 bit comparing two pidfds is as simple as comparing 89 * inode numbers. 90 * 91 * When a wraparound happens on 32 bit multiple pidfds with the 92 * same inode number are likely to exist (This isn't a problem 93 * since before pidfs pidfds used the anonymous inode meaning 94 * all pidfds had the same inode number.). Userspace can 95 * reconstruct the 64 bit identifier by retrieving both the 96 * inode number and the inode generation number to compare or 97 * use file handles. 98 */ 99 if (pidfs_ino(pidfs_ino_nr) == 0) 100 pidfs_ino_nr += 2; 101 102 pid->ino = pidfs_ino_nr; 103 pid->stashed = NULL; 104 pidfs_ino_nr++; 105 106 write_seqcount_begin(&pidmap_lock_seq); 107 rb_find_add_rcu(&pid->pidfs_node, &pidfs_ino_tree, pidfs_ino_cmp); 108 write_seqcount_end(&pidmap_lock_seq); 109 } 110 111 void pidfs_remove_pid(struct pid *pid) 112 { 113 write_seqcount_begin(&pidmap_lock_seq); 114 rb_erase(&pid->pidfs_node, &pidfs_ino_tree); 115 write_seqcount_end(&pidmap_lock_seq); 116 } 117 118 #ifdef CONFIG_PROC_FS 119 /** 120 * pidfd_show_fdinfo - print information about a pidfd 121 * @m: proc fdinfo file 122 * @f: file referencing a pidfd 123 * 124 * Pid: 125 * This function will print the pid that a given pidfd refers to in the 126 * pid namespace of the procfs instance. 127 * If the pid namespace of the process is not a descendant of the pid 128 * namespace of the procfs instance 0 will be shown as its pid. This is 129 * similar to calling getppid() on a process whose parent is outside of 130 * its pid namespace. 131 * 132 * NSpid: 133 * If pid namespaces are supported then this function will also print 134 * the pid of a given pidfd refers to for all descendant pid namespaces 135 * starting from the current pid namespace of the instance, i.e. the 136 * Pid field and the first entry in the NSpid field will be identical. 137 * If the pid namespace of the process is not a descendant of the pid 138 * namespace of the procfs instance 0 will be shown as its first NSpid 139 * entry and no others will be shown. 140 * Note that this differs from the Pid and NSpid fields in 141 * /proc/<pid>/status where Pid and NSpid are always shown relative to 142 * the pid namespace of the procfs instance. The difference becomes 143 * obvious when sending around a pidfd between pid namespaces from a 144 * different branch of the tree, i.e. where no ancestral relation is 145 * present between the pid namespaces: 146 * - create two new pid namespaces ns1 and ns2 in the initial pid 147 * namespace (also take care to create new mount namespaces in the 148 * new pid namespace and mount procfs) 149 * - create a process with a pidfd in ns1 150 * - send pidfd from ns1 to ns2 151 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid 152 * have exactly one entry, which is 0 153 */ 154 static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) 155 { 156 struct pid *pid = pidfd_pid(f); 157 struct pid_namespace *ns; 158 pid_t nr = -1; 159 160 if (likely(pid_has_task(pid, PIDTYPE_PID))) { 161 ns = proc_pid_ns(file_inode(m->file)->i_sb); 162 nr = pid_nr_ns(pid, ns); 163 } 164 165 seq_put_decimal_ll(m, "Pid:\t", nr); 166 167 #ifdef CONFIG_PID_NS 168 seq_put_decimal_ll(m, "\nNSpid:\t", nr); 169 if (nr > 0) { 170 int i; 171 172 /* If nr is non-zero it means that 'pid' is valid and that 173 * ns, i.e. the pid namespace associated with the procfs 174 * instance, is in the pid namespace hierarchy of pid. 175 * Start at one below the already printed level. 176 */ 177 for (i = ns->level + 1; i <= pid->level; i++) 178 seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); 179 } 180 #endif 181 seq_putc(m, '\n'); 182 } 183 #endif 184 185 /* 186 * Poll support for process exit notification. 187 */ 188 static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) 189 { 190 struct pid *pid = pidfd_pid(file); 191 bool thread = file->f_flags & PIDFD_THREAD; 192 struct task_struct *task; 193 __poll_t poll_flags = 0; 194 195 poll_wait(file, &pid->wait_pidfd, pts); 196 /* 197 * Depending on PIDFD_THREAD, inform pollers when the thread 198 * or the whole thread-group exits. 199 */ 200 guard(rcu)(); 201 task = pid_task(pid, PIDTYPE_PID); 202 if (!task) 203 poll_flags = EPOLLIN | EPOLLRDNORM | EPOLLHUP; 204 else if (task->exit_state && (thread || thread_group_empty(task))) 205 poll_flags = EPOLLIN | EPOLLRDNORM; 206 207 return poll_flags; 208 } 209 210 static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg) 211 { 212 struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; 213 size_t usize = _IOC_SIZE(cmd); 214 struct pidfd_info kinfo = {}; 215 struct user_namespace *user_ns; 216 const struct cred *c; 217 __u64 mask; 218 #ifdef CONFIG_CGROUPS 219 struct cgroup *cgrp; 220 #endif 221 222 if (!uinfo) 223 return -EINVAL; 224 if (usize < PIDFD_INFO_SIZE_VER0) 225 return -EINVAL; /* First version, no smaller struct possible */ 226 227 if (copy_from_user(&mask, &uinfo->mask, sizeof(mask))) 228 return -EFAULT; 229 230 c = get_task_cred(task); 231 if (!c) 232 return -ESRCH; 233 234 /* Unconditionally return identifiers and credentials, the rest only on request */ 235 236 user_ns = current_user_ns(); 237 kinfo.ruid = from_kuid_munged(user_ns, c->uid); 238 kinfo.rgid = from_kgid_munged(user_ns, c->gid); 239 kinfo.euid = from_kuid_munged(user_ns, c->euid); 240 kinfo.egid = from_kgid_munged(user_ns, c->egid); 241 kinfo.suid = from_kuid_munged(user_ns, c->suid); 242 kinfo.sgid = from_kgid_munged(user_ns, c->sgid); 243 kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid); 244 kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid); 245 kinfo.mask |= PIDFD_INFO_CREDS; 246 put_cred(c); 247 248 #ifdef CONFIG_CGROUPS 249 rcu_read_lock(); 250 cgrp = task_dfl_cgroup(task); 251 kinfo.cgroupid = cgroup_id(cgrp); 252 kinfo.mask |= PIDFD_INFO_CGROUPID; 253 rcu_read_unlock(); 254 #endif 255 256 /* 257 * Copy pid/tgid last, to reduce the chances the information might be 258 * stale. Note that it is not possible to ensure it will be valid as the 259 * task might return as soon as the copy_to_user finishes, but that's ok 260 * and userspace expects that might happen and can act accordingly, so 261 * this is just best-effort. What we can do however is checking that all 262 * the fields are set correctly, or return ESRCH to avoid providing 263 * incomplete information. */ 264 265 kinfo.ppid = task_ppid_nr_ns(task, NULL); 266 kinfo.tgid = task_tgid_vnr(task); 267 kinfo.pid = task_pid_vnr(task); 268 kinfo.mask |= PIDFD_INFO_PID; 269 270 if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) 271 return -ESRCH; 272 273 /* 274 * If userspace and the kernel have the same struct size it can just 275 * be copied. If userspace provides an older struct, only the bits that 276 * userspace knows about will be copied. If userspace provides a new 277 * struct, only the bits that the kernel knows about will be copied. 278 */ 279 if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo)))) 280 return -EFAULT; 281 282 return 0; 283 } 284 285 static bool pidfs_ioctl_valid(unsigned int cmd) 286 { 287 switch (cmd) { 288 case FS_IOC_GETVERSION: 289 case PIDFD_GET_CGROUP_NAMESPACE: 290 case PIDFD_GET_IPC_NAMESPACE: 291 case PIDFD_GET_MNT_NAMESPACE: 292 case PIDFD_GET_NET_NAMESPACE: 293 case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: 294 case PIDFD_GET_TIME_NAMESPACE: 295 case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: 296 case PIDFD_GET_UTS_NAMESPACE: 297 case PIDFD_GET_USER_NAMESPACE: 298 case PIDFD_GET_PID_NAMESPACE: 299 return true; 300 } 301 302 /* Extensible ioctls require some more careful checks. */ 303 switch (_IOC_NR(cmd)) { 304 case _IOC_NR(PIDFD_GET_INFO): 305 /* 306 * Try to prevent performing a pidfd ioctl when someone 307 * erronously mistook the file descriptor for a pidfd. 308 * This is not perfect but will catch most cases. 309 */ 310 return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO)); 311 } 312 313 return false; 314 } 315 316 static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 317 { 318 struct task_struct *task __free(put_task) = NULL; 319 struct nsproxy *nsp __free(put_nsproxy) = NULL; 320 struct pid *pid = pidfd_pid(file); 321 struct ns_common *ns_common = NULL; 322 struct pid_namespace *pid_ns; 323 324 if (!pidfs_ioctl_valid(cmd)) 325 return -ENOIOCTLCMD; 326 327 if (cmd == FS_IOC_GETVERSION) { 328 if (!arg) 329 return -EINVAL; 330 331 __u32 __user *argp = (__u32 __user *)arg; 332 return put_user(file_inode(file)->i_generation, argp); 333 } 334 335 task = get_pid_task(pid, PIDTYPE_PID); 336 if (!task) 337 return -ESRCH; 338 339 /* Extensible IOCTL that does not open namespace FDs, take a shortcut */ 340 if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO)) 341 return pidfd_info(task, cmd, arg); 342 343 if (arg) 344 return -EINVAL; 345 346 scoped_guard(task_lock, task) { 347 nsp = task->nsproxy; 348 if (nsp) 349 get_nsproxy(nsp); 350 } 351 if (!nsp) 352 return -ESRCH; /* just pretend it didn't exist */ 353 354 /* 355 * We're trying to open a file descriptor to the namespace so perform a 356 * filesystem cred ptrace check. Also, we mirror nsfs behavior. 357 */ 358 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 359 return -EACCES; 360 361 switch (cmd) { 362 /* Namespaces that hang of nsproxy. */ 363 case PIDFD_GET_CGROUP_NAMESPACE: 364 if (IS_ENABLED(CONFIG_CGROUPS)) { 365 get_cgroup_ns(nsp->cgroup_ns); 366 ns_common = to_ns_common(nsp->cgroup_ns); 367 } 368 break; 369 case PIDFD_GET_IPC_NAMESPACE: 370 if (IS_ENABLED(CONFIG_IPC_NS)) { 371 get_ipc_ns(nsp->ipc_ns); 372 ns_common = to_ns_common(nsp->ipc_ns); 373 } 374 break; 375 case PIDFD_GET_MNT_NAMESPACE: 376 get_mnt_ns(nsp->mnt_ns); 377 ns_common = to_ns_common(nsp->mnt_ns); 378 break; 379 case PIDFD_GET_NET_NAMESPACE: 380 if (IS_ENABLED(CONFIG_NET_NS)) { 381 ns_common = to_ns_common(nsp->net_ns); 382 get_net_ns(ns_common); 383 } 384 break; 385 case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: 386 if (IS_ENABLED(CONFIG_PID_NS)) { 387 get_pid_ns(nsp->pid_ns_for_children); 388 ns_common = to_ns_common(nsp->pid_ns_for_children); 389 } 390 break; 391 case PIDFD_GET_TIME_NAMESPACE: 392 if (IS_ENABLED(CONFIG_TIME_NS)) { 393 get_time_ns(nsp->time_ns); 394 ns_common = to_ns_common(nsp->time_ns); 395 } 396 break; 397 case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: 398 if (IS_ENABLED(CONFIG_TIME_NS)) { 399 get_time_ns(nsp->time_ns_for_children); 400 ns_common = to_ns_common(nsp->time_ns_for_children); 401 } 402 break; 403 case PIDFD_GET_UTS_NAMESPACE: 404 if (IS_ENABLED(CONFIG_UTS_NS)) { 405 get_uts_ns(nsp->uts_ns); 406 ns_common = to_ns_common(nsp->uts_ns); 407 } 408 break; 409 /* Namespaces that don't hang of nsproxy. */ 410 case PIDFD_GET_USER_NAMESPACE: 411 if (IS_ENABLED(CONFIG_USER_NS)) { 412 rcu_read_lock(); 413 ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); 414 rcu_read_unlock(); 415 } 416 break; 417 case PIDFD_GET_PID_NAMESPACE: 418 if (IS_ENABLED(CONFIG_PID_NS)) { 419 rcu_read_lock(); 420 pid_ns = task_active_pid_ns(task); 421 if (pid_ns) 422 ns_common = to_ns_common(get_pid_ns(pid_ns)); 423 rcu_read_unlock(); 424 } 425 break; 426 default: 427 return -ENOIOCTLCMD; 428 } 429 430 if (!ns_common) 431 return -EOPNOTSUPP; 432 433 /* open_namespace() unconditionally consumes the reference */ 434 return open_namespace(ns_common); 435 } 436 437 static const struct file_operations pidfs_file_operations = { 438 .poll = pidfd_poll, 439 #ifdef CONFIG_PROC_FS 440 .show_fdinfo = pidfd_show_fdinfo, 441 #endif 442 .unlocked_ioctl = pidfd_ioctl, 443 .compat_ioctl = compat_ptr_ioctl, 444 }; 445 446 struct pid *pidfd_pid(const struct file *file) 447 { 448 if (file->f_op != &pidfs_file_operations) 449 return ERR_PTR(-EBADF); 450 return file_inode(file)->i_private; 451 } 452 453 static struct vfsmount *pidfs_mnt __ro_after_init; 454 455 /* 456 * The vfs falls back to simple_setattr() if i_op->setattr() isn't 457 * implemented. Let's reject it completely until we have a clean 458 * permission concept for pidfds. 459 */ 460 static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 461 struct iattr *attr) 462 { 463 return -EOPNOTSUPP; 464 } 465 466 467 /* 468 * User space expects pidfs inodes to have no file type in st_mode. 469 * 470 * In particular, 'lsof' has this legacy logic: 471 * 472 * type = s->st_mode & S_IFMT; 473 * switch (type) { 474 * ... 475 * case 0: 476 * if (!strcmp(p, "anon_inode")) 477 * Lf->ntype = Ntype = N_ANON_INODE; 478 * 479 * to detect our old anon_inode logic. 480 * 481 * Rather than mess with our internal sane inode data, just fix it 482 * up here in getattr() by masking off the format bits. 483 */ 484 static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, 485 struct kstat *stat, u32 request_mask, 486 unsigned int query_flags) 487 { 488 struct inode *inode = d_inode(path->dentry); 489 490 generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); 491 stat->mode &= ~S_IFMT; 492 return 0; 493 } 494 495 static const struct inode_operations pidfs_inode_operations = { 496 .getattr = pidfs_getattr, 497 .setattr = pidfs_setattr, 498 }; 499 500 static void pidfs_evict_inode(struct inode *inode) 501 { 502 struct pid *pid = inode->i_private; 503 504 clear_inode(inode); 505 put_pid(pid); 506 } 507 508 static const struct super_operations pidfs_sops = { 509 .drop_inode = generic_delete_inode, 510 .evict_inode = pidfs_evict_inode, 511 .statfs = simple_statfs, 512 }; 513 514 /* 515 * 'lsof' has knowledge of out historical anon_inode use, and expects 516 * the pidfs dentry name to start with 'anon_inode'. 517 */ 518 static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) 519 { 520 return dynamic_dname(buffer, buflen, "anon_inode:[pidfd]"); 521 } 522 523 const struct dentry_operations pidfs_dentry_operations = { 524 .d_dname = pidfs_dname, 525 .d_prune = stashed_dentry_prune, 526 }; 527 528 static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, 529 struct inode *parent) 530 { 531 const struct pid *pid = inode->i_private; 532 533 if (*max_len < 2) { 534 *max_len = 2; 535 return FILEID_INVALID; 536 } 537 538 *max_len = 2; 539 *(u64 *)fh = pid->ino; 540 return FILEID_KERNFS; 541 } 542 543 static int pidfs_ino_find(const void *key, const struct rb_node *node) 544 { 545 const u64 pid_ino = *(u64 *)key; 546 const struct pid *pid = rb_entry(node, struct pid, pidfs_node); 547 548 if (pid_ino < pid->ino) 549 return -1; 550 if (pid_ino > pid->ino) 551 return 1; 552 return 0; 553 } 554 555 /* Find a struct pid based on the inode number. */ 556 static struct pid *pidfs_ino_get_pid(u64 ino) 557 { 558 struct pid *pid; 559 struct rb_node *node; 560 unsigned int seq; 561 562 guard(rcu)(); 563 do { 564 seq = read_seqcount_begin(&pidmap_lock_seq); 565 node = rb_find_rcu(&ino, &pidfs_ino_tree, pidfs_ino_find); 566 if (node) 567 break; 568 } while (read_seqcount_retry(&pidmap_lock_seq, seq)); 569 570 if (!node) 571 return NULL; 572 573 pid = rb_entry(node, struct pid, pidfs_node); 574 575 /* Within our pid namespace hierarchy? */ 576 if (pid_vnr(pid) == 0) 577 return NULL; 578 579 return get_pid(pid); 580 } 581 582 static struct dentry *pidfs_fh_to_dentry(struct super_block *sb, 583 struct fid *fid, int fh_len, 584 int fh_type) 585 { 586 int ret; 587 u64 pid_ino; 588 struct path path; 589 struct pid *pid; 590 591 if (fh_len < 2) 592 return NULL; 593 594 switch (fh_type) { 595 case FILEID_KERNFS: 596 pid_ino = *(u64 *)fid; 597 break; 598 default: 599 return NULL; 600 } 601 602 pid = pidfs_ino_get_pid(pid_ino); 603 if (!pid) 604 return NULL; 605 606 ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); 607 if (ret < 0) 608 return ERR_PTR(ret); 609 610 mntput(path.mnt); 611 return path.dentry; 612 } 613 614 /* 615 * Make sure that we reject any nonsensical flags that users pass via 616 * open_by_handle_at(). Note that PIDFD_THREAD is defined as O_EXCL, and 617 * PIDFD_NONBLOCK as O_NONBLOCK. 618 */ 619 #define VALID_FILE_HANDLE_OPEN_FLAGS \ 620 (O_RDONLY | O_WRONLY | O_RDWR | O_NONBLOCK | O_CLOEXEC | O_EXCL) 621 622 static int pidfs_export_permission(struct handle_to_path_ctx *ctx, 623 unsigned int oflags) 624 { 625 if (oflags & ~(VALID_FILE_HANDLE_OPEN_FLAGS | O_LARGEFILE)) 626 return -EINVAL; 627 628 /* 629 * pidfd_ino_get_pid() will verify that the struct pid is part 630 * of the caller's pid namespace hierarchy. No further 631 * permission checks are needed. 632 */ 633 return 0; 634 } 635 636 static struct file *pidfs_export_open(struct path *path, unsigned int oflags) 637 { 638 /* 639 * Clear O_LARGEFILE as open_by_handle_at() forces it and raise 640 * O_RDWR as pidfds always are. 641 */ 642 oflags &= ~O_LARGEFILE; 643 return dentry_open(path, oflags | O_RDWR, current_cred()); 644 } 645 646 static const struct export_operations pidfs_export_operations = { 647 .encode_fh = pidfs_encode_fh, 648 .fh_to_dentry = pidfs_fh_to_dentry, 649 .open = pidfs_export_open, 650 .permission = pidfs_export_permission, 651 }; 652 653 static int pidfs_init_inode(struct inode *inode, void *data) 654 { 655 const struct pid *pid = data; 656 657 inode->i_private = data; 658 inode->i_flags |= S_PRIVATE; 659 inode->i_mode |= S_IRWXU; 660 inode->i_op = &pidfs_inode_operations; 661 inode->i_fop = &pidfs_file_operations; 662 inode->i_ino = pidfs_ino(pid->ino); 663 inode->i_generation = pidfs_gen(pid->ino); 664 return 0; 665 } 666 667 static void pidfs_put_data(void *data) 668 { 669 struct pid *pid = data; 670 put_pid(pid); 671 } 672 673 static const struct stashed_operations pidfs_stashed_ops = { 674 .init_inode = pidfs_init_inode, 675 .put_data = pidfs_put_data, 676 }; 677 678 static int pidfs_init_fs_context(struct fs_context *fc) 679 { 680 struct pseudo_fs_context *ctx; 681 682 ctx = init_pseudo(fc, PID_FS_MAGIC); 683 if (!ctx) 684 return -ENOMEM; 685 686 ctx->ops = &pidfs_sops; 687 ctx->eops = &pidfs_export_operations; 688 ctx->dops = &pidfs_dentry_operations; 689 fc->s_fs_info = (void *)&pidfs_stashed_ops; 690 return 0; 691 } 692 693 static struct file_system_type pidfs_type = { 694 .name = "pidfs", 695 .init_fs_context = pidfs_init_fs_context, 696 .kill_sb = kill_anon_super, 697 }; 698 699 struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) 700 { 701 702 struct file *pidfd_file; 703 struct path path; 704 int ret; 705 706 ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); 707 if (ret < 0) 708 return ERR_PTR(ret); 709 710 pidfd_file = dentry_open(&path, flags, current_cred()); 711 path_put(&path); 712 return pidfd_file; 713 } 714 715 void __init pidfs_init(void) 716 { 717 pidfs_mnt = kern_mount(&pidfs_type); 718 if (IS_ERR(pidfs_mnt)) 719 panic("Failed to mount pidfs pseudo filesystem"); 720 } 721