1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/anon_inodes.h> 3 #include <linux/exportfs.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/cgroup.h> 7 #include <linux/magic.h> 8 #include <linux/mount.h> 9 #include <linux/pid.h> 10 #include <linux/pidfs.h> 11 #include <linux/pid_namespace.h> 12 #include <linux/poll.h> 13 #include <linux/proc_fs.h> 14 #include <linux/proc_ns.h> 15 #include <linux/pseudo_fs.h> 16 #include <linux/ptrace.h> 17 #include <linux/seq_file.h> 18 #include <uapi/linux/pidfd.h> 19 #include <linux/ipc_namespace.h> 20 #include <linux/time_namespace.h> 21 #include <linux/utsname.h> 22 #include <net/net_namespace.h> 23 24 #include "internal.h" 25 #include "mount.h" 26 27 static struct rb_root pidfs_ino_tree = RB_ROOT; 28 29 #if BITS_PER_LONG == 32 30 static inline unsigned long pidfs_ino(u64 ino) 31 { 32 return lower_32_bits(ino); 33 } 34 35 /* On 32 bit the generation number are the upper 32 bits. */ 36 static inline u32 pidfs_gen(u64 ino) 37 { 38 return upper_32_bits(ino); 39 } 40 41 #else 42 43 /* On 64 bit simply return ino. */ 44 static inline unsigned long pidfs_ino(u64 ino) 45 { 46 return ino; 47 } 48 49 /* On 64 bit the generation number is 0. */ 50 static inline u32 pidfs_gen(u64 ino) 51 { 52 return 0; 53 } 54 #endif 55 56 static int pidfs_ino_cmp(struct rb_node *a, const struct rb_node *b) 57 { 58 struct pid *pid_a = rb_entry(a, struct pid, pidfs_node); 59 struct pid *pid_b = rb_entry(b, struct pid, pidfs_node); 60 u64 pid_ino_a = pid_a->ino; 61 u64 pid_ino_b = pid_b->ino; 62 63 if (pid_ino_a < pid_ino_b) 64 return -1; 65 if (pid_ino_a > pid_ino_b) 66 return 1; 67 return 0; 68 } 69 70 void pidfs_add_pid(struct pid *pid) 71 { 72 static u64 pidfs_ino_nr = 2; 73 74 /* 75 * On 64 bit nothing special happens. The 64bit number assigned 76 * to struct pid is the inode number. 77 * 78 * On 32 bit the 64 bit number assigned to struct pid is split 79 * into two 32 bit numbers. The lower 32 bits are used as the 80 * inode number and the upper 32 bits are used as the inode 81 * generation number. 82 * 83 * On 32 bit pidfs_ino() will return the lower 32 bit. When 84 * pidfs_ino() returns zero a wrap around happened. When a 85 * wraparound happens the 64 bit number will be incremented by 2 86 * so inode numbering starts at 2 again. 87 * 88 * On 64 bit comparing two pidfds is as simple as comparing 89 * inode numbers. 90 * 91 * When a wraparound happens on 32 bit multiple pidfds with the 92 * same inode number are likely to exist (This isn't a problem 93 * since before pidfs pidfds used the anonymous inode meaning 94 * all pidfds had the same inode number.). Userspace can 95 * reconstruct the 64 bit identifier by retrieving both the 96 * inode number and the inode generation number to compare or 97 * use file handles. 98 */ 99 if (pidfs_ino(pidfs_ino_nr) == 0) 100 pidfs_ino_nr += 2; 101 102 pid->ino = pidfs_ino_nr; 103 pid->stashed = NULL; 104 pidfs_ino_nr++; 105 106 write_seqcount_begin(&pidmap_lock_seq); 107 rb_find_add_rcu(&pid->pidfs_node, &pidfs_ino_tree, pidfs_ino_cmp); 108 write_seqcount_end(&pidmap_lock_seq); 109 } 110 111 void pidfs_remove_pid(struct pid *pid) 112 { 113 write_seqcount_begin(&pidmap_lock_seq); 114 rb_erase(&pid->pidfs_node, &pidfs_ino_tree); 115 write_seqcount_end(&pidmap_lock_seq); 116 } 117 118 #ifdef CONFIG_PROC_FS 119 /** 120 * pidfd_show_fdinfo - print information about a pidfd 121 * @m: proc fdinfo file 122 * @f: file referencing a pidfd 123 * 124 * Pid: 125 * This function will print the pid that a given pidfd refers to in the 126 * pid namespace of the procfs instance. 127 * If the pid namespace of the process is not a descendant of the pid 128 * namespace of the procfs instance 0 will be shown as its pid. This is 129 * similar to calling getppid() on a process whose parent is outside of 130 * its pid namespace. 131 * 132 * NSpid: 133 * If pid namespaces are supported then this function will also print 134 * the pid of a given pidfd refers to for all descendant pid namespaces 135 * starting from the current pid namespace of the instance, i.e. the 136 * Pid field and the first entry in the NSpid field will be identical. 137 * If the pid namespace of the process is not a descendant of the pid 138 * namespace of the procfs instance 0 will be shown as its first NSpid 139 * entry and no others will be shown. 140 * Note that this differs from the Pid and NSpid fields in 141 * /proc/<pid>/status where Pid and NSpid are always shown relative to 142 * the pid namespace of the procfs instance. The difference becomes 143 * obvious when sending around a pidfd between pid namespaces from a 144 * different branch of the tree, i.e. where no ancestral relation is 145 * present between the pid namespaces: 146 * - create two new pid namespaces ns1 and ns2 in the initial pid 147 * namespace (also take care to create new mount namespaces in the 148 * new pid namespace and mount procfs) 149 * - create a process with a pidfd in ns1 150 * - send pidfd from ns1 to ns2 151 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid 152 * have exactly one entry, which is 0 153 */ 154 static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) 155 { 156 struct pid *pid = pidfd_pid(f); 157 struct pid_namespace *ns; 158 pid_t nr = -1; 159 160 if (likely(pid_has_task(pid, PIDTYPE_PID))) { 161 ns = proc_pid_ns(file_inode(m->file)->i_sb); 162 nr = pid_nr_ns(pid, ns); 163 } 164 165 seq_put_decimal_ll(m, "Pid:\t", nr); 166 167 #ifdef CONFIG_PID_NS 168 seq_put_decimal_ll(m, "\nNSpid:\t", nr); 169 if (nr > 0) { 170 int i; 171 172 /* If nr is non-zero it means that 'pid' is valid and that 173 * ns, i.e. the pid namespace associated with the procfs 174 * instance, is in the pid namespace hierarchy of pid. 175 * Start at one below the already printed level. 176 */ 177 for (i = ns->level + 1; i <= pid->level; i++) 178 seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); 179 } 180 #endif 181 seq_putc(m, '\n'); 182 } 183 #endif 184 185 /* 186 * Poll support for process exit notification. 187 */ 188 static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) 189 { 190 struct pid *pid = pidfd_pid(file); 191 bool thread = file->f_flags & PIDFD_THREAD; 192 struct task_struct *task; 193 __poll_t poll_flags = 0; 194 195 poll_wait(file, &pid->wait_pidfd, pts); 196 /* 197 * Depending on PIDFD_THREAD, inform pollers when the thread 198 * or the whole thread-group exits. 199 */ 200 guard(rcu)(); 201 task = pid_task(pid, PIDTYPE_PID); 202 if (!task) 203 poll_flags = EPOLLIN | EPOLLRDNORM | EPOLLHUP; 204 else if (task->exit_state && (thread || thread_group_empty(task))) 205 poll_flags = EPOLLIN | EPOLLRDNORM; 206 207 return poll_flags; 208 } 209 210 static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg) 211 { 212 struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; 213 size_t usize = _IOC_SIZE(cmd); 214 struct pidfd_info kinfo = {}; 215 struct user_namespace *user_ns; 216 const struct cred *c; 217 __u64 mask; 218 #ifdef CONFIG_CGROUPS 219 struct cgroup *cgrp; 220 #endif 221 222 if (!uinfo) 223 return -EINVAL; 224 if (usize < PIDFD_INFO_SIZE_VER0) 225 return -EINVAL; /* First version, no smaller struct possible */ 226 227 if (copy_from_user(&mask, &uinfo->mask, sizeof(mask))) 228 return -EFAULT; 229 230 c = get_task_cred(task); 231 if (!c) 232 return -ESRCH; 233 234 /* Unconditionally return identifiers and credentials, the rest only on request */ 235 236 user_ns = current_user_ns(); 237 kinfo.ruid = from_kuid_munged(user_ns, c->uid); 238 kinfo.rgid = from_kgid_munged(user_ns, c->gid); 239 kinfo.euid = from_kuid_munged(user_ns, c->euid); 240 kinfo.egid = from_kgid_munged(user_ns, c->egid); 241 kinfo.suid = from_kuid_munged(user_ns, c->suid); 242 kinfo.sgid = from_kgid_munged(user_ns, c->sgid); 243 kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid); 244 kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid); 245 kinfo.mask |= PIDFD_INFO_CREDS; 246 put_cred(c); 247 248 #ifdef CONFIG_CGROUPS 249 rcu_read_lock(); 250 cgrp = task_dfl_cgroup(task); 251 kinfo.cgroupid = cgroup_id(cgrp); 252 kinfo.mask |= PIDFD_INFO_CGROUPID; 253 rcu_read_unlock(); 254 #endif 255 256 /* 257 * Copy pid/tgid last, to reduce the chances the information might be 258 * stale. Note that it is not possible to ensure it will be valid as the 259 * task might return as soon as the copy_to_user finishes, but that's ok 260 * and userspace expects that might happen and can act accordingly, so 261 * this is just best-effort. What we can do however is checking that all 262 * the fields are set correctly, or return ESRCH to avoid providing 263 * incomplete information. */ 264 265 kinfo.ppid = task_ppid_nr_ns(task, NULL); 266 kinfo.tgid = task_tgid_vnr(task); 267 kinfo.pid = task_pid_vnr(task); 268 kinfo.mask |= PIDFD_INFO_PID; 269 270 if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) 271 return -ESRCH; 272 273 /* 274 * If userspace and the kernel have the same struct size it can just 275 * be copied. If userspace provides an older struct, only the bits that 276 * userspace knows about will be copied. If userspace provides a new 277 * struct, only the bits that the kernel knows about will be copied. 278 */ 279 if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo)))) 280 return -EFAULT; 281 282 return 0; 283 } 284 285 static bool pidfs_ioctl_valid(unsigned int cmd) 286 { 287 switch (cmd) { 288 case FS_IOC_GETVERSION: 289 case PIDFD_GET_CGROUP_NAMESPACE: 290 case PIDFD_GET_INFO: 291 case PIDFD_GET_IPC_NAMESPACE: 292 case PIDFD_GET_MNT_NAMESPACE: 293 case PIDFD_GET_NET_NAMESPACE: 294 case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: 295 case PIDFD_GET_TIME_NAMESPACE: 296 case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: 297 case PIDFD_GET_UTS_NAMESPACE: 298 case PIDFD_GET_USER_NAMESPACE: 299 case PIDFD_GET_PID_NAMESPACE: 300 return true; 301 } 302 303 return false; 304 } 305 306 static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 307 { 308 struct task_struct *task __free(put_task) = NULL; 309 struct nsproxy *nsp __free(put_nsproxy) = NULL; 310 struct pid *pid = pidfd_pid(file); 311 struct ns_common *ns_common = NULL; 312 struct pid_namespace *pid_ns; 313 314 if (!pidfs_ioctl_valid(cmd)) 315 return -ENOIOCTLCMD; 316 317 if (cmd == FS_IOC_GETVERSION) { 318 if (!arg) 319 return -EINVAL; 320 321 __u32 __user *argp = (__u32 __user *)arg; 322 return put_user(file_inode(file)->i_generation, argp); 323 } 324 325 task = get_pid_task(pid, PIDTYPE_PID); 326 if (!task) 327 return -ESRCH; 328 329 /* Extensible IOCTL that does not open namespace FDs, take a shortcut */ 330 if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO)) 331 return pidfd_info(task, cmd, arg); 332 333 if (arg) 334 return -EINVAL; 335 336 scoped_guard(task_lock, task) { 337 nsp = task->nsproxy; 338 if (nsp) 339 get_nsproxy(nsp); 340 } 341 if (!nsp) 342 return -ESRCH; /* just pretend it didn't exist */ 343 344 /* 345 * We're trying to open a file descriptor to the namespace so perform a 346 * filesystem cred ptrace check. Also, we mirror nsfs behavior. 347 */ 348 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 349 return -EACCES; 350 351 switch (cmd) { 352 /* Namespaces that hang of nsproxy. */ 353 case PIDFD_GET_CGROUP_NAMESPACE: 354 if (IS_ENABLED(CONFIG_CGROUPS)) { 355 get_cgroup_ns(nsp->cgroup_ns); 356 ns_common = to_ns_common(nsp->cgroup_ns); 357 } 358 break; 359 case PIDFD_GET_IPC_NAMESPACE: 360 if (IS_ENABLED(CONFIG_IPC_NS)) { 361 get_ipc_ns(nsp->ipc_ns); 362 ns_common = to_ns_common(nsp->ipc_ns); 363 } 364 break; 365 case PIDFD_GET_MNT_NAMESPACE: 366 get_mnt_ns(nsp->mnt_ns); 367 ns_common = to_ns_common(nsp->mnt_ns); 368 break; 369 case PIDFD_GET_NET_NAMESPACE: 370 if (IS_ENABLED(CONFIG_NET_NS)) { 371 ns_common = to_ns_common(nsp->net_ns); 372 get_net_ns(ns_common); 373 } 374 break; 375 case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: 376 if (IS_ENABLED(CONFIG_PID_NS)) { 377 get_pid_ns(nsp->pid_ns_for_children); 378 ns_common = to_ns_common(nsp->pid_ns_for_children); 379 } 380 break; 381 case PIDFD_GET_TIME_NAMESPACE: 382 if (IS_ENABLED(CONFIG_TIME_NS)) { 383 get_time_ns(nsp->time_ns); 384 ns_common = to_ns_common(nsp->time_ns); 385 } 386 break; 387 case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: 388 if (IS_ENABLED(CONFIG_TIME_NS)) { 389 get_time_ns(nsp->time_ns_for_children); 390 ns_common = to_ns_common(nsp->time_ns_for_children); 391 } 392 break; 393 case PIDFD_GET_UTS_NAMESPACE: 394 if (IS_ENABLED(CONFIG_UTS_NS)) { 395 get_uts_ns(nsp->uts_ns); 396 ns_common = to_ns_common(nsp->uts_ns); 397 } 398 break; 399 /* Namespaces that don't hang of nsproxy. */ 400 case PIDFD_GET_USER_NAMESPACE: 401 if (IS_ENABLED(CONFIG_USER_NS)) { 402 rcu_read_lock(); 403 ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); 404 rcu_read_unlock(); 405 } 406 break; 407 case PIDFD_GET_PID_NAMESPACE: 408 if (IS_ENABLED(CONFIG_PID_NS)) { 409 rcu_read_lock(); 410 pid_ns = task_active_pid_ns(task); 411 if (pid_ns) 412 ns_common = to_ns_common(get_pid_ns(pid_ns)); 413 rcu_read_unlock(); 414 } 415 break; 416 default: 417 return -ENOIOCTLCMD; 418 } 419 420 if (!ns_common) 421 return -EOPNOTSUPP; 422 423 /* open_namespace() unconditionally consumes the reference */ 424 return open_namespace(ns_common); 425 } 426 427 static const struct file_operations pidfs_file_operations = { 428 .poll = pidfd_poll, 429 #ifdef CONFIG_PROC_FS 430 .show_fdinfo = pidfd_show_fdinfo, 431 #endif 432 .unlocked_ioctl = pidfd_ioctl, 433 .compat_ioctl = compat_ptr_ioctl, 434 }; 435 436 struct pid *pidfd_pid(const struct file *file) 437 { 438 if (file->f_op != &pidfs_file_operations) 439 return ERR_PTR(-EBADF); 440 return file_inode(file)->i_private; 441 } 442 443 static struct vfsmount *pidfs_mnt __ro_after_init; 444 445 /* 446 * The vfs falls back to simple_setattr() if i_op->setattr() isn't 447 * implemented. Let's reject it completely until we have a clean 448 * permission concept for pidfds. 449 */ 450 static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 451 struct iattr *attr) 452 { 453 return -EOPNOTSUPP; 454 } 455 456 457 /* 458 * User space expects pidfs inodes to have no file type in st_mode. 459 * 460 * In particular, 'lsof' has this legacy logic: 461 * 462 * type = s->st_mode & S_IFMT; 463 * switch (type) { 464 * ... 465 * case 0: 466 * if (!strcmp(p, "anon_inode")) 467 * Lf->ntype = Ntype = N_ANON_INODE; 468 * 469 * to detect our old anon_inode logic. 470 * 471 * Rather than mess with our internal sane inode data, just fix it 472 * up here in getattr() by masking off the format bits. 473 */ 474 static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, 475 struct kstat *stat, u32 request_mask, 476 unsigned int query_flags) 477 { 478 struct inode *inode = d_inode(path->dentry); 479 480 generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); 481 stat->mode &= ~S_IFMT; 482 return 0; 483 } 484 485 static const struct inode_operations pidfs_inode_operations = { 486 .getattr = pidfs_getattr, 487 .setattr = pidfs_setattr, 488 }; 489 490 static void pidfs_evict_inode(struct inode *inode) 491 { 492 struct pid *pid = inode->i_private; 493 494 clear_inode(inode); 495 put_pid(pid); 496 } 497 498 static const struct super_operations pidfs_sops = { 499 .drop_inode = generic_delete_inode, 500 .evict_inode = pidfs_evict_inode, 501 .statfs = simple_statfs, 502 }; 503 504 /* 505 * 'lsof' has knowledge of out historical anon_inode use, and expects 506 * the pidfs dentry name to start with 'anon_inode'. 507 */ 508 static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) 509 { 510 return dynamic_dname(buffer, buflen, "anon_inode:[pidfd]"); 511 } 512 513 const struct dentry_operations pidfs_dentry_operations = { 514 .d_delete = always_delete_dentry, 515 .d_dname = pidfs_dname, 516 .d_prune = stashed_dentry_prune, 517 }; 518 519 static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, 520 struct inode *parent) 521 { 522 const struct pid *pid = inode->i_private; 523 524 if (*max_len < 2) { 525 *max_len = 2; 526 return FILEID_INVALID; 527 } 528 529 *max_len = 2; 530 *(u64 *)fh = pid->ino; 531 return FILEID_KERNFS; 532 } 533 534 static int pidfs_ino_find(const void *key, const struct rb_node *node) 535 { 536 const u64 pid_ino = *(u64 *)key; 537 const struct pid *pid = rb_entry(node, struct pid, pidfs_node); 538 539 if (pid_ino < pid->ino) 540 return -1; 541 if (pid_ino > pid->ino) 542 return 1; 543 return 0; 544 } 545 546 /* Find a struct pid based on the inode number. */ 547 static struct pid *pidfs_ino_get_pid(u64 ino) 548 { 549 struct pid *pid; 550 struct rb_node *node; 551 unsigned int seq; 552 553 guard(rcu)(); 554 do { 555 seq = read_seqcount_begin(&pidmap_lock_seq); 556 node = rb_find_rcu(&ino, &pidfs_ino_tree, pidfs_ino_find); 557 if (node) 558 break; 559 } while (read_seqcount_retry(&pidmap_lock_seq, seq)); 560 561 if (!node) 562 return NULL; 563 564 pid = rb_entry(node, struct pid, pidfs_node); 565 566 /* Within our pid namespace hierarchy? */ 567 if (pid_vnr(pid) == 0) 568 return NULL; 569 570 return get_pid(pid); 571 } 572 573 static struct dentry *pidfs_fh_to_dentry(struct super_block *sb, 574 struct fid *fid, int fh_len, 575 int fh_type) 576 { 577 int ret; 578 u64 pid_ino; 579 struct path path; 580 struct pid *pid; 581 582 if (fh_len < 2) 583 return NULL; 584 585 switch (fh_type) { 586 case FILEID_KERNFS: 587 pid_ino = *(u64 *)fid; 588 break; 589 default: 590 return NULL; 591 } 592 593 pid = pidfs_ino_get_pid(pid_ino); 594 if (!pid) 595 return NULL; 596 597 ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); 598 if (ret < 0) 599 return ERR_PTR(ret); 600 601 mntput(path.mnt); 602 return path.dentry; 603 } 604 605 /* 606 * Make sure that we reject any nonsensical flags that users pass via 607 * open_by_handle_at(). Note that PIDFD_THREAD is defined as O_EXCL, and 608 * PIDFD_NONBLOCK as O_NONBLOCK. 609 */ 610 #define VALID_FILE_HANDLE_OPEN_FLAGS \ 611 (O_RDONLY | O_WRONLY | O_RDWR | O_NONBLOCK | O_CLOEXEC | O_EXCL) 612 613 static int pidfs_export_permission(struct handle_to_path_ctx *ctx, 614 unsigned int oflags) 615 { 616 if (oflags & ~(VALID_FILE_HANDLE_OPEN_FLAGS | O_LARGEFILE)) 617 return -EINVAL; 618 619 /* 620 * pidfd_ino_get_pid() will verify that the struct pid is part 621 * of the caller's pid namespace hierarchy. No further 622 * permission checks are needed. 623 */ 624 return 0; 625 } 626 627 static struct file *pidfs_export_open(struct path *path, unsigned int oflags) 628 { 629 /* 630 * Clear O_LARGEFILE as open_by_handle_at() forces it and raise 631 * O_RDWR as pidfds always are. 632 */ 633 oflags &= ~O_LARGEFILE; 634 return dentry_open(path, oflags | O_RDWR, current_cred()); 635 } 636 637 static const struct export_operations pidfs_export_operations = { 638 .encode_fh = pidfs_encode_fh, 639 .fh_to_dentry = pidfs_fh_to_dentry, 640 .open = pidfs_export_open, 641 .permission = pidfs_export_permission, 642 }; 643 644 static int pidfs_init_inode(struct inode *inode, void *data) 645 { 646 const struct pid *pid = data; 647 648 inode->i_private = data; 649 inode->i_flags |= S_PRIVATE; 650 inode->i_mode |= S_IRWXU; 651 inode->i_op = &pidfs_inode_operations; 652 inode->i_fop = &pidfs_file_operations; 653 inode->i_ino = pidfs_ino(pid->ino); 654 inode->i_generation = pidfs_gen(pid->ino); 655 return 0; 656 } 657 658 static void pidfs_put_data(void *data) 659 { 660 struct pid *pid = data; 661 put_pid(pid); 662 } 663 664 static const struct stashed_operations pidfs_stashed_ops = { 665 .init_inode = pidfs_init_inode, 666 .put_data = pidfs_put_data, 667 }; 668 669 static int pidfs_init_fs_context(struct fs_context *fc) 670 { 671 struct pseudo_fs_context *ctx; 672 673 ctx = init_pseudo(fc, PID_FS_MAGIC); 674 if (!ctx) 675 return -ENOMEM; 676 677 ctx->ops = &pidfs_sops; 678 ctx->eops = &pidfs_export_operations; 679 ctx->dops = &pidfs_dentry_operations; 680 fc->s_fs_info = (void *)&pidfs_stashed_ops; 681 return 0; 682 } 683 684 static struct file_system_type pidfs_type = { 685 .name = "pidfs", 686 .init_fs_context = pidfs_init_fs_context, 687 .kill_sb = kill_anon_super, 688 }; 689 690 struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) 691 { 692 693 struct file *pidfd_file; 694 struct path path; 695 int ret; 696 697 ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); 698 if (ret < 0) 699 return ERR_PTR(ret); 700 701 pidfd_file = dentry_open(&path, flags, current_cred()); 702 path_put(&path); 703 return pidfd_file; 704 } 705 706 void __init pidfs_init(void) 707 { 708 pidfs_mnt = kern_mount(&pidfs_type); 709 if (IS_ERR(pidfs_mnt)) 710 panic("Failed to mount pidfs pseudo filesystem"); 711 } 712