1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * binfmt_misc.c
4 *
5 * Copyright (C) 1997 Richard Günther
6 *
7 * binfmt_misc detects binaries via a magic or filename extension and invokes
8 * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details.
9 */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/sched/mm.h>
17 #include <linux/magic.h>
18 #include <linux/binfmts.h>
19 #include <linux/slab.h>
20 #include <linux/ctype.h>
21 #include <linux/string_helpers.h>
22 #include <linux/file.h>
23 #include <linux/pagemap.h>
24 #include <linux/namei.h>
25 #include <linux/mount.h>
26 #include <linux/fs_context.h>
27 #include <linux/syscalls.h>
28 #include <linux/fs.h>
29 #include <linux/uaccess.h>
30
31 #include "internal.h"
32
33 #ifdef DEBUG
34 # define USE_DEBUG 1
35 #else
36 # define USE_DEBUG 0
37 #endif
38
39 enum {
40 VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
41 };
42
43 enum {Enabled, Magic};
44 #define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
45 #define MISC_FMT_OPEN_BINARY (1UL << 30)
46 #define MISC_FMT_CREDENTIALS (1UL << 29)
47 #define MISC_FMT_OPEN_FILE (1UL << 28)
48
49 typedef struct {
50 struct list_head list;
51 unsigned long flags; /* type, status, etc. */
52 int offset; /* offset of magic */
53 int size; /* size of magic/mask */
54 char *magic; /* magic or filename extension */
55 char *mask; /* mask, NULL for exact match */
56 const char *interpreter; /* filename of interpreter */
57 char *name;
58 struct dentry *dentry;
59 struct file *interp_file;
60 refcount_t users; /* sync removal with load_misc_binary() */
61 } Node;
62
63 static struct file_system_type bm_fs_type;
64
65 /*
66 * Max length of the register string. Determined by:
67 * - 7 delimiters
68 * - name: ~50 bytes
69 * - type: 1 byte
70 * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE)
71 * - magic: 128 bytes (512 in escaped form)
72 * - mask: 128 bytes (512 in escaped form)
73 * - interp: ~50 bytes
74 * - flags: 5 bytes
75 * Round that up a bit, and then back off to hold the internal data
76 * (like struct Node).
77 */
78 #define MAX_REGISTER_LENGTH 1920
79
80 /**
81 * search_binfmt_handler - search for a binary handler for @bprm
82 * @misc: handle to binfmt_misc instance
83 * @bprm: binary for which we are looking for a handler
84 *
85 * Search for a binary type handler for @bprm in the list of registered binary
86 * type handlers.
87 *
88 * Return: binary type list entry on success, NULL on failure
89 */
search_binfmt_handler(struct binfmt_misc * misc,struct linux_binprm * bprm)90 static Node *search_binfmt_handler(struct binfmt_misc *misc,
91 struct linux_binprm *bprm)
92 {
93 char *p = strrchr(bprm->interp, '.');
94 Node *e;
95
96 /* Walk all the registered handlers. */
97 list_for_each_entry(e, &misc->entries, list) {
98 char *s;
99 int j;
100
101 /* Make sure this one is currently enabled. */
102 if (!test_bit(Enabled, &e->flags))
103 continue;
104
105 /* Do matching based on extension if applicable. */
106 if (!test_bit(Magic, &e->flags)) {
107 if (p && !strcmp(e->magic, p + 1))
108 return e;
109 continue;
110 }
111
112 /* Do matching based on magic & mask. */
113 s = bprm->buf + e->offset;
114 if (e->mask) {
115 for (j = 0; j < e->size; j++)
116 if ((*s++ ^ e->magic[j]) & e->mask[j])
117 break;
118 } else {
119 for (j = 0; j < e->size; j++)
120 if ((*s++ ^ e->magic[j]))
121 break;
122 }
123 if (j == e->size)
124 return e;
125 }
126
127 return NULL;
128 }
129
130 /**
131 * get_binfmt_handler - try to find a binary type handler
132 * @misc: handle to binfmt_misc instance
133 * @bprm: binary for which we are looking for a handler
134 *
135 * Try to find a binfmt handler for the binary type. If one is found take a
136 * reference to protect against removal via bm_{entry,status}_write().
137 *
138 * Return: binary type list entry on success, NULL on failure
139 */
get_binfmt_handler(struct binfmt_misc * misc,struct linux_binprm * bprm)140 static Node *get_binfmt_handler(struct binfmt_misc *misc,
141 struct linux_binprm *bprm)
142 {
143 Node *e;
144
145 read_lock(&misc->entries_lock);
146 e = search_binfmt_handler(misc, bprm);
147 if (e)
148 refcount_inc(&e->users);
149 read_unlock(&misc->entries_lock);
150 return e;
151 }
152
153 /**
154 * put_binfmt_handler - put binary handler node
155 * @e: node to put
156 *
157 * Free node syncing with load_misc_binary() and defer final free to
158 * load_misc_binary() in case it is using the binary type handler we were
159 * requested to remove.
160 */
put_binfmt_handler(Node * e)161 static void put_binfmt_handler(Node *e)
162 {
163 if (refcount_dec_and_test(&e->users)) {
164 if (e->flags & MISC_FMT_OPEN_FILE)
165 filp_close(e->interp_file, NULL);
166 kfree(e);
167 }
168 }
169
170 /**
171 * load_binfmt_misc - load the binfmt_misc of the caller's user namespace
172 *
173 * To be called in load_misc_binary() to load the relevant struct binfmt_misc.
174 * If a user namespace doesn't have its own binfmt_misc mount it can make use
175 * of its ancestor's binfmt_misc handlers. This mimicks the behavior of
176 * pre-namespaced binfmt_misc where all registered binfmt_misc handlers where
177 * available to all user and user namespaces on the system.
178 *
179 * Return: the binfmt_misc instance of the caller's user namespace
180 */
load_binfmt_misc(void)181 static struct binfmt_misc *load_binfmt_misc(void)
182 {
183 const struct user_namespace *user_ns;
184 struct binfmt_misc *misc;
185
186 user_ns = current_user_ns();
187 while (user_ns) {
188 /* Pairs with smp_store_release() in bm_fill_super(). */
189 misc = smp_load_acquire(&user_ns->binfmt_misc);
190 if (misc)
191 return misc;
192
193 user_ns = user_ns->parent;
194 }
195
196 return &init_binfmt_misc;
197 }
198
199 /*
200 * the loader itself
201 */
load_misc_binary(struct linux_binprm * bprm)202 static int load_misc_binary(struct linux_binprm *bprm)
203 {
204 Node *fmt;
205 struct file *interp_file = NULL;
206 int retval = -ENOEXEC;
207 struct binfmt_misc *misc;
208
209 misc = load_binfmt_misc();
210 if (!misc->enabled)
211 return retval;
212
213 fmt = get_binfmt_handler(misc, bprm);
214 if (!fmt)
215 return retval;
216
217 /* Need to be able to load the file after exec */
218 retval = -ENOENT;
219 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
220 goto ret;
221
222 if (fmt->flags & MISC_FMT_PRESERVE_ARGV0) {
223 bprm->interp_flags |= BINPRM_FLAGS_PRESERVE_ARGV0;
224 } else {
225 retval = remove_arg_zero(bprm);
226 if (retval)
227 goto ret;
228 }
229
230 if (fmt->flags & MISC_FMT_OPEN_BINARY)
231 bprm->have_execfd = 1;
232
233 /* make argv[1] be the path to the binary */
234 retval = copy_string_kernel(bprm->interp, bprm);
235 if (retval < 0)
236 goto ret;
237 bprm->argc++;
238
239 /* add the interp as argv[0] */
240 retval = copy_string_kernel(fmt->interpreter, bprm);
241 if (retval < 0)
242 goto ret;
243 bprm->argc++;
244
245 /* Update interp in case binfmt_script needs it. */
246 retval = bprm_change_interp(fmt->interpreter, bprm);
247 if (retval < 0)
248 goto ret;
249
250 if (fmt->flags & MISC_FMT_OPEN_FILE)
251 interp_file = file_clone_open(fmt->interp_file);
252 else
253 interp_file = open_exec(fmt->interpreter);
254 retval = PTR_ERR(interp_file);
255 if (IS_ERR(interp_file))
256 goto ret;
257
258 bprm->interpreter = interp_file;
259 if (fmt->flags & MISC_FMT_CREDENTIALS)
260 bprm->execfd_creds = 1;
261
262 retval = 0;
263 ret:
264
265 /*
266 * If we actually put the node here all concurrent calls to
267 * load_misc_binary() will have finished. We also know
268 * that for the refcount to be zero someone must have concurently
269 * removed the binary type handler from the list and it's our job to
270 * free it.
271 */
272 put_binfmt_handler(fmt);
273
274 return retval;
275 }
276
277 /* Command parsers */
278
279 /*
280 * parses and copies one argument enclosed in del from *sp to *dp,
281 * recognising the \x special.
282 * returns pointer to the copied argument or NULL in case of an
283 * error (and sets err) or null argument length.
284 */
scanarg(char * s,char del)285 static char *scanarg(char *s, char del)
286 {
287 char c;
288
289 while ((c = *s++) != del) {
290 if (c == '\\' && *s == 'x') {
291 s++;
292 if (!isxdigit(*s++))
293 return NULL;
294 if (!isxdigit(*s++))
295 return NULL;
296 }
297 }
298 s[-1] ='\0';
299 return s;
300 }
301
check_special_flags(char * sfs,Node * e)302 static char *check_special_flags(char *sfs, Node *e)
303 {
304 char *p = sfs;
305 int cont = 1;
306
307 /* special flags */
308 while (cont) {
309 switch (*p) {
310 case 'P':
311 pr_debug("register: flag: P (preserve argv0)\n");
312 p++;
313 e->flags |= MISC_FMT_PRESERVE_ARGV0;
314 break;
315 case 'O':
316 pr_debug("register: flag: O (open binary)\n");
317 p++;
318 e->flags |= MISC_FMT_OPEN_BINARY;
319 break;
320 case 'C':
321 pr_debug("register: flag: C (preserve creds)\n");
322 p++;
323 /* this flags also implies the
324 open-binary flag */
325 e->flags |= (MISC_FMT_CREDENTIALS |
326 MISC_FMT_OPEN_BINARY);
327 break;
328 case 'F':
329 pr_debug("register: flag: F: open interpreter file now\n");
330 p++;
331 e->flags |= MISC_FMT_OPEN_FILE;
332 break;
333 default:
334 cont = 0;
335 }
336 }
337
338 return p;
339 }
340
341 /*
342 * This registers a new binary format, it recognises the syntax
343 * ':name:type:offset:magic:mask:interpreter:flags'
344 * where the ':' is the IFS, that can be chosen with the first char
345 */
create_entry(const char __user * buffer,size_t count)346 static Node *create_entry(const char __user *buffer, size_t count)
347 {
348 Node *e;
349 int memsize, err;
350 char *buf, *p;
351 char del;
352
353 pr_debug("register: received %zu bytes\n", count);
354
355 /* some sanity checks */
356 err = -EINVAL;
357 if ((count < 11) || (count > MAX_REGISTER_LENGTH))
358 goto out;
359
360 err = -ENOMEM;
361 memsize = sizeof(Node) + count + 8;
362 e = kmalloc(memsize, GFP_KERNEL_ACCOUNT);
363 if (!e)
364 goto out;
365
366 p = buf = (char *)e + sizeof(Node);
367
368 memset(e, 0, sizeof(Node));
369 if (copy_from_user(buf, buffer, count))
370 goto efault;
371
372 del = *p++; /* delimeter */
373
374 pr_debug("register: delim: %#x {%c}\n", del, del);
375
376 /* Pad the buffer with the delim to simplify parsing below. */
377 memset(buf + count, del, 8);
378
379 /* Parse the 'name' field. */
380 e->name = p;
381 p = strchr(p, del);
382 if (!p)
383 goto einval;
384 *p++ = '\0';
385 if (!e->name[0] ||
386 !strcmp(e->name, ".") ||
387 !strcmp(e->name, "..") ||
388 strchr(e->name, '/'))
389 goto einval;
390
391 pr_debug("register: name: {%s}\n", e->name);
392
393 /* Parse the 'type' field. */
394 switch (*p++) {
395 case 'E':
396 pr_debug("register: type: E (extension)\n");
397 e->flags = 1 << Enabled;
398 break;
399 case 'M':
400 pr_debug("register: type: M (magic)\n");
401 e->flags = (1 << Enabled) | (1 << Magic);
402 break;
403 default:
404 goto einval;
405 }
406 if (*p++ != del)
407 goto einval;
408
409 if (test_bit(Magic, &e->flags)) {
410 /* Handle the 'M' (magic) format. */
411 char *s;
412
413 /* Parse the 'offset' field. */
414 s = strchr(p, del);
415 if (!s)
416 goto einval;
417 *s = '\0';
418 if (p != s) {
419 int r = kstrtoint(p, 10, &e->offset);
420 if (r != 0 || e->offset < 0)
421 goto einval;
422 }
423 p = s;
424 if (*p++)
425 goto einval;
426 pr_debug("register: offset: %#x\n", e->offset);
427
428 /* Parse the 'magic' field. */
429 e->magic = p;
430 p = scanarg(p, del);
431 if (!p)
432 goto einval;
433 if (!e->magic[0])
434 goto einval;
435 if (USE_DEBUG)
436 print_hex_dump_bytes(
437 KBUILD_MODNAME ": register: magic[raw]: ",
438 DUMP_PREFIX_NONE, e->magic, p - e->magic);
439
440 /* Parse the 'mask' field. */
441 e->mask = p;
442 p = scanarg(p, del);
443 if (!p)
444 goto einval;
445 if (!e->mask[0]) {
446 e->mask = NULL;
447 pr_debug("register: mask[raw]: none\n");
448 } else if (USE_DEBUG)
449 print_hex_dump_bytes(
450 KBUILD_MODNAME ": register: mask[raw]: ",
451 DUMP_PREFIX_NONE, e->mask, p - e->mask);
452
453 /*
454 * Decode the magic & mask fields.
455 * Note: while we might have accepted embedded NUL bytes from
456 * above, the unescape helpers here will stop at the first one
457 * it encounters.
458 */
459 e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
460 if (e->mask &&
461 string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
462 goto einval;
463 if (e->size > BINPRM_BUF_SIZE ||
464 BINPRM_BUF_SIZE - e->size < e->offset)
465 goto einval;
466 pr_debug("register: magic/mask length: %i\n", e->size);
467 if (USE_DEBUG) {
468 print_hex_dump_bytes(
469 KBUILD_MODNAME ": register: magic[decoded]: ",
470 DUMP_PREFIX_NONE, e->magic, e->size);
471
472 if (e->mask) {
473 int i;
474 char *masked = kmalloc(e->size, GFP_KERNEL_ACCOUNT);
475
476 print_hex_dump_bytes(
477 KBUILD_MODNAME ": register: mask[decoded]: ",
478 DUMP_PREFIX_NONE, e->mask, e->size);
479
480 if (masked) {
481 for (i = 0; i < e->size; ++i)
482 masked[i] = e->magic[i] & e->mask[i];
483 print_hex_dump_bytes(
484 KBUILD_MODNAME ": register: magic[masked]: ",
485 DUMP_PREFIX_NONE, masked, e->size);
486
487 kfree(masked);
488 }
489 }
490 }
491 } else {
492 /* Handle the 'E' (extension) format. */
493
494 /* Skip the 'offset' field. */
495 p = strchr(p, del);
496 if (!p)
497 goto einval;
498 *p++ = '\0';
499
500 /* Parse the 'magic' field. */
501 e->magic = p;
502 p = strchr(p, del);
503 if (!p)
504 goto einval;
505 *p++ = '\0';
506 if (!e->magic[0] || strchr(e->magic, '/'))
507 goto einval;
508 pr_debug("register: extension: {%s}\n", e->magic);
509
510 /* Skip the 'mask' field. */
511 p = strchr(p, del);
512 if (!p)
513 goto einval;
514 *p++ = '\0';
515 }
516
517 /* Parse the 'interpreter' field. */
518 e->interpreter = p;
519 p = strchr(p, del);
520 if (!p)
521 goto einval;
522 *p++ = '\0';
523 if (!e->interpreter[0])
524 goto einval;
525 pr_debug("register: interpreter: {%s}\n", e->interpreter);
526
527 /* Parse the 'flags' field. */
528 p = check_special_flags(p, e);
529 if (*p == '\n')
530 p++;
531 if (p != buf + count)
532 goto einval;
533
534 return e;
535
536 out:
537 return ERR_PTR(err);
538
539 efault:
540 kfree(e);
541 return ERR_PTR(-EFAULT);
542 einval:
543 kfree(e);
544 return ERR_PTR(-EINVAL);
545 }
546
547 /*
548 * Set status of entry/binfmt_misc:
549 * '1' enables, '0' disables and '-1' clears entry/binfmt_misc
550 */
parse_command(const char __user * buffer,size_t count)551 static int parse_command(const char __user *buffer, size_t count)
552 {
553 char s[4];
554
555 if (count > 3)
556 return -EINVAL;
557 if (copy_from_user(s, buffer, count))
558 return -EFAULT;
559 if (!count)
560 return 0;
561 if (s[count - 1] == '\n')
562 count--;
563 if (count == 1 && s[0] == '0')
564 return 1;
565 if (count == 1 && s[0] == '1')
566 return 2;
567 if (count == 2 && s[0] == '-' && s[1] == '1')
568 return 3;
569 return -EINVAL;
570 }
571
572 /* generic stuff */
573
entry_status(Node * e,char * page)574 static void entry_status(Node *e, char *page)
575 {
576 char *dp = page;
577 const char *status = "disabled";
578
579 if (test_bit(Enabled, &e->flags))
580 status = "enabled";
581
582 if (!VERBOSE_STATUS) {
583 sprintf(page, "%s\n", status);
584 return;
585 }
586
587 dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter);
588
589 /* print the special flags */
590 dp += sprintf(dp, "flags: ");
591 if (e->flags & MISC_FMT_PRESERVE_ARGV0)
592 *dp++ = 'P';
593 if (e->flags & MISC_FMT_OPEN_BINARY)
594 *dp++ = 'O';
595 if (e->flags & MISC_FMT_CREDENTIALS)
596 *dp++ = 'C';
597 if (e->flags & MISC_FMT_OPEN_FILE)
598 *dp++ = 'F';
599 *dp++ = '\n';
600
601 if (!test_bit(Magic, &e->flags)) {
602 sprintf(dp, "extension .%s\n", e->magic);
603 } else {
604 dp += sprintf(dp, "offset %i\nmagic ", e->offset);
605 dp = bin2hex(dp, e->magic, e->size);
606 if (e->mask) {
607 dp += sprintf(dp, "\nmask ");
608 dp = bin2hex(dp, e->mask, e->size);
609 }
610 *dp++ = '\n';
611 *dp = '\0';
612 }
613 }
614
bm_get_inode(struct super_block * sb,int mode)615 static struct inode *bm_get_inode(struct super_block *sb, int mode)
616 {
617 struct inode *inode = new_inode(sb);
618
619 if (inode) {
620 inode->i_ino = get_next_ino();
621 inode->i_mode = mode;
622 simple_inode_init_ts(inode);
623 }
624 return inode;
625 }
626
627 /**
628 * i_binfmt_misc - retrieve struct binfmt_misc from a binfmt_misc inode
629 * @inode: inode of the relevant binfmt_misc instance
630 *
631 * This helper retrieves struct binfmt_misc from a binfmt_misc inode. This can
632 * be done without any memory barriers because we are guaranteed that
633 * user_ns->binfmt_misc is fully initialized. It was fully initialized when the
634 * binfmt_misc mount was first created.
635 *
636 * Return: struct binfmt_misc of the relevant binfmt_misc instance
637 */
i_binfmt_misc(struct inode * inode)638 static struct binfmt_misc *i_binfmt_misc(struct inode *inode)
639 {
640 return inode->i_sb->s_user_ns->binfmt_misc;
641 }
642
643 /**
644 * bm_evict_inode - cleanup data associated with @inode
645 * @inode: inode to which the data is attached
646 *
647 * Cleanup the binary type handler data associated with @inode if a binary type
648 * entry is removed or the filesystem is unmounted and the super block is
649 * shutdown.
650 *
651 * If the ->evict call was not caused by a super block shutdown but by a write
652 * to remove the entry or all entries via bm_{entry,status}_write() the entry
653 * will have already been removed from the list. We keep the list_empty() check
654 * to make that explicit.
655 */
bm_evict_inode(struct inode * inode)656 static void bm_evict_inode(struct inode *inode)
657 {
658 Node *e = inode->i_private;
659
660 clear_inode(inode);
661
662 if (e) {
663 struct binfmt_misc *misc;
664
665 misc = i_binfmt_misc(inode);
666 write_lock(&misc->entries_lock);
667 if (!list_empty(&e->list))
668 list_del_init(&e->list);
669 write_unlock(&misc->entries_lock);
670 put_binfmt_handler(e);
671 }
672 }
673
674 /**
675 * unlink_binfmt_dentry - remove the dentry for the binary type handler
676 * @dentry: dentry associated with the binary type handler
677 *
678 * Do the actual filesystem work to remove a dentry for a registered binary
679 * type handler. Since binfmt_misc only allows simple files to be created
680 * directly under the root dentry of the filesystem we ensure that we are
681 * indeed passed a dentry directly beneath the root dentry, that the inode
682 * associated with the root dentry is locked, and that it is a regular file we
683 * are asked to remove.
684 */
unlink_binfmt_dentry(struct dentry * dentry)685 static void unlink_binfmt_dentry(struct dentry *dentry)
686 {
687 struct dentry *parent = dentry->d_parent;
688 struct inode *inode, *parent_inode;
689
690 /* All entries are immediate descendants of the root dentry. */
691 if (WARN_ON_ONCE(dentry->d_sb->s_root != parent))
692 return;
693
694 /* We only expect to be called on regular files. */
695 inode = d_inode(dentry);
696 if (WARN_ON_ONCE(!S_ISREG(inode->i_mode)))
697 return;
698
699 /* The parent inode must be locked. */
700 parent_inode = d_inode(parent);
701 if (WARN_ON_ONCE(!inode_is_locked(parent_inode)))
702 return;
703
704 if (simple_positive(dentry)) {
705 dget(dentry);
706 simple_unlink(parent_inode, dentry);
707 d_delete(dentry);
708 dput(dentry);
709 }
710 }
711
712 /**
713 * remove_binfmt_handler - remove a binary type handler
714 * @misc: handle to binfmt_misc instance
715 * @e: binary type handler to remove
716 *
717 * Remove a binary type handler from the list of binary type handlers and
718 * remove its associated dentry. This is called from
719 * binfmt_{entry,status}_write(). In the future, we might want to think about
720 * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's
721 * to use writes to files in order to delete binary type handlers. But it has
722 * worked for so long that it's not a pressing issue.
723 */
remove_binfmt_handler(struct binfmt_misc * misc,Node * e)724 static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e)
725 {
726 write_lock(&misc->entries_lock);
727 list_del_init(&e->list);
728 write_unlock(&misc->entries_lock);
729 unlink_binfmt_dentry(e->dentry);
730 }
731
732 /* /<entry> */
733
734 static ssize_t
bm_entry_read(struct file * file,char __user * buf,size_t nbytes,loff_t * ppos)735 bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
736 {
737 Node *e = file_inode(file)->i_private;
738 ssize_t res;
739 char *page;
740
741 page = (char *) __get_free_page(GFP_KERNEL);
742 if (!page)
743 return -ENOMEM;
744
745 entry_status(e, page);
746
747 res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
748
749 free_page((unsigned long) page);
750 return res;
751 }
752
bm_entry_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)753 static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
754 size_t count, loff_t *ppos)
755 {
756 struct inode *inode = file_inode(file);
757 Node *e = inode->i_private;
758 int res = parse_command(buffer, count);
759
760 switch (res) {
761 case 1:
762 /* Disable this handler. */
763 clear_bit(Enabled, &e->flags);
764 break;
765 case 2:
766 /* Enable this handler. */
767 set_bit(Enabled, &e->flags);
768 break;
769 case 3:
770 /* Delete this handler. */
771 inode = d_inode(inode->i_sb->s_root);
772 inode_lock(inode);
773
774 /*
775 * In order to add new element or remove elements from the list
776 * via bm_{entry,register,status}_write() inode_lock() on the
777 * root inode must be held.
778 * The lock is exclusive ensuring that the list can't be
779 * modified. Only load_misc_binary() can access but does so
780 * read-only. So we only need to take the write lock when we
781 * actually remove the entry from the list.
782 */
783 if (!list_empty(&e->list))
784 remove_binfmt_handler(i_binfmt_misc(inode), e);
785
786 inode_unlock(inode);
787 break;
788 default:
789 return res;
790 }
791
792 return count;
793 }
794
795 static const struct file_operations bm_entry_operations = {
796 .read = bm_entry_read,
797 .write = bm_entry_write,
798 .llseek = default_llseek,
799 };
800
801 /* /register */
802
bm_register_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)803 static ssize_t bm_register_write(struct file *file, const char __user *buffer,
804 size_t count, loff_t *ppos)
805 {
806 Node *e;
807 struct inode *inode;
808 struct super_block *sb = file_inode(file)->i_sb;
809 struct dentry *root = sb->s_root, *dentry;
810 struct binfmt_misc *misc;
811 int err = 0;
812 struct file *f = NULL;
813
814 e = create_entry(buffer, count);
815
816 if (IS_ERR(e))
817 return PTR_ERR(e);
818
819 if (e->flags & MISC_FMT_OPEN_FILE) {
820 const struct cred *old_cred;
821
822 /*
823 * Now that we support unprivileged binfmt_misc mounts make
824 * sure we use the credentials that the register @file was
825 * opened with to also open the interpreter. Before that this
826 * didn't matter much as only a privileged process could open
827 * the register file.
828 */
829 old_cred = override_creds(file->f_cred);
830 f = open_exec(e->interpreter);
831 revert_creds(old_cred);
832 if (IS_ERR(f)) {
833 pr_notice("register: failed to install interpreter file %s\n",
834 e->interpreter);
835 kfree(e);
836 return PTR_ERR(f);
837 }
838 e->interp_file = f;
839 }
840
841 inode_lock(d_inode(root));
842 dentry = lookup_one_len(e->name, root, strlen(e->name));
843 err = PTR_ERR(dentry);
844 if (IS_ERR(dentry))
845 goto out;
846
847 err = -EEXIST;
848 if (d_really_is_positive(dentry))
849 goto out2;
850
851 inode = bm_get_inode(sb, S_IFREG | 0644);
852
853 err = -ENOMEM;
854 if (!inode)
855 goto out2;
856
857 refcount_set(&e->users, 1);
858 e->dentry = dget(dentry);
859 inode->i_private = e;
860 inode->i_fop = &bm_entry_operations;
861
862 d_instantiate(dentry, inode);
863 misc = i_binfmt_misc(inode);
864 write_lock(&misc->entries_lock);
865 list_add(&e->list, &misc->entries);
866 write_unlock(&misc->entries_lock);
867
868 err = 0;
869 out2:
870 dput(dentry);
871 out:
872 inode_unlock(d_inode(root));
873
874 if (err) {
875 if (f)
876 filp_close(f, NULL);
877 kfree(e);
878 return err;
879 }
880 return count;
881 }
882
883 static const struct file_operations bm_register_operations = {
884 .write = bm_register_write,
885 .llseek = noop_llseek,
886 };
887
888 /* /status */
889
890 static ssize_t
bm_status_read(struct file * file,char __user * buf,size_t nbytes,loff_t * ppos)891 bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
892 {
893 struct binfmt_misc *misc;
894 char *s;
895
896 misc = i_binfmt_misc(file_inode(file));
897 s = misc->enabled ? "enabled\n" : "disabled\n";
898 return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
899 }
900
bm_status_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)901 static ssize_t bm_status_write(struct file *file, const char __user *buffer,
902 size_t count, loff_t *ppos)
903 {
904 struct binfmt_misc *misc;
905 int res = parse_command(buffer, count);
906 Node *e, *next;
907 struct inode *inode;
908
909 misc = i_binfmt_misc(file_inode(file));
910 switch (res) {
911 case 1:
912 /* Disable all handlers. */
913 misc->enabled = false;
914 break;
915 case 2:
916 /* Enable all handlers. */
917 misc->enabled = true;
918 break;
919 case 3:
920 /* Delete all handlers. */
921 inode = d_inode(file_inode(file)->i_sb->s_root);
922 inode_lock(inode);
923
924 /*
925 * In order to add new element or remove elements from the list
926 * via bm_{entry,register,status}_write() inode_lock() on the
927 * root inode must be held.
928 * The lock is exclusive ensuring that the list can't be
929 * modified. Only load_misc_binary() can access but does so
930 * read-only. So we only need to take the write lock when we
931 * actually remove the entry from the list.
932 */
933 list_for_each_entry_safe(e, next, &misc->entries, list)
934 remove_binfmt_handler(misc, e);
935
936 inode_unlock(inode);
937 break;
938 default:
939 return res;
940 }
941
942 return count;
943 }
944
945 static const struct file_operations bm_status_operations = {
946 .read = bm_status_read,
947 .write = bm_status_write,
948 .llseek = default_llseek,
949 };
950
951 /* Superblock handling */
952
bm_put_super(struct super_block * sb)953 static void bm_put_super(struct super_block *sb)
954 {
955 struct user_namespace *user_ns = sb->s_fs_info;
956
957 sb->s_fs_info = NULL;
958 put_user_ns(user_ns);
959 }
960
961 static const struct super_operations s_ops = {
962 .statfs = simple_statfs,
963 .evict_inode = bm_evict_inode,
964 .put_super = bm_put_super,
965 };
966
bm_fill_super(struct super_block * sb,struct fs_context * fc)967 static int bm_fill_super(struct super_block *sb, struct fs_context *fc)
968 {
969 int err;
970 struct user_namespace *user_ns = sb->s_user_ns;
971 struct binfmt_misc *misc;
972 static const struct tree_descr bm_files[] = {
973 [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
974 [3] = {"register", &bm_register_operations, S_IWUSR},
975 /* last one */ {""}
976 };
977
978 if (WARN_ON(user_ns != current_user_ns()))
979 return -EINVAL;
980
981 /*
982 * Lazily allocate a new binfmt_misc instance for this namespace, i.e.
983 * do it here during the first mount of binfmt_misc. We don't need to
984 * waste memory for every user namespace allocation. It's likely much
985 * more common to not mount a separate binfmt_misc instance than it is
986 * to mount one.
987 *
988 * While multiple superblocks can exist they are keyed by userns in
989 * s_fs_info for binfmt_misc. Hence, the vfs guarantees that
990 * bm_fill_super() is called exactly once whenever a binfmt_misc
991 * superblock for a userns is created. This in turn lets us conclude
992 * that when a binfmt_misc superblock is created for the first time for
993 * a userns there's no one racing us. Therefore we don't need any
994 * barriers when we dereference binfmt_misc.
995 */
996 misc = user_ns->binfmt_misc;
997 if (!misc) {
998 /*
999 * If it turns out that most user namespaces actually want to
1000 * register their own binary type handler and therefore all
1001 * create their own separate binfm_misc mounts we should
1002 * consider turning this into a kmem cache.
1003 */
1004 misc = kzalloc(sizeof(struct binfmt_misc), GFP_KERNEL);
1005 if (!misc)
1006 return -ENOMEM;
1007
1008 INIT_LIST_HEAD(&misc->entries);
1009 rwlock_init(&misc->entries_lock);
1010
1011 /* Pairs with smp_load_acquire() in load_binfmt_misc(). */
1012 smp_store_release(&user_ns->binfmt_misc, misc);
1013 }
1014
1015 /*
1016 * When the binfmt_misc superblock for this userns is shutdown
1017 * ->enabled might have been set to false and we don't reinitialize
1018 * ->enabled again in put_super() as someone might already be mounting
1019 * binfmt_misc again. It also would be pointless since by the time
1020 * ->put_super() is called we know that the binary type list for this
1021 * bintfmt_misc mount is empty making load_misc_binary() return
1022 * -ENOEXEC independent of whether ->enabled is true. Instead, if
1023 * someone mounts binfmt_misc for the first time or again we simply
1024 * reset ->enabled to true.
1025 */
1026 misc->enabled = true;
1027
1028 err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
1029 if (!err)
1030 sb->s_op = &s_ops;
1031 return err;
1032 }
1033
bm_free(struct fs_context * fc)1034 static void bm_free(struct fs_context *fc)
1035 {
1036 if (fc->s_fs_info)
1037 put_user_ns(fc->s_fs_info);
1038 }
1039
bm_get_tree(struct fs_context * fc)1040 static int bm_get_tree(struct fs_context *fc)
1041 {
1042 return get_tree_keyed(fc, bm_fill_super, get_user_ns(fc->user_ns));
1043 }
1044
1045 static const struct fs_context_operations bm_context_ops = {
1046 .free = bm_free,
1047 .get_tree = bm_get_tree,
1048 };
1049
bm_init_fs_context(struct fs_context * fc)1050 static int bm_init_fs_context(struct fs_context *fc)
1051 {
1052 fc->ops = &bm_context_ops;
1053 return 0;
1054 }
1055
1056 static struct linux_binfmt misc_format = {
1057 .module = THIS_MODULE,
1058 .load_binary = load_misc_binary,
1059 };
1060
1061 static struct file_system_type bm_fs_type = {
1062 .owner = THIS_MODULE,
1063 .name = "binfmt_misc",
1064 .init_fs_context = bm_init_fs_context,
1065 .fs_flags = FS_USERNS_MOUNT,
1066 .kill_sb = kill_litter_super,
1067 };
1068 MODULE_ALIAS_FS("binfmt_misc");
1069
init_misc_binfmt(void)1070 static int __init init_misc_binfmt(void)
1071 {
1072 int err = register_filesystem(&bm_fs_type);
1073 if (!err)
1074 insert_binfmt(&misc_format);
1075 return err;
1076 }
1077
exit_misc_binfmt(void)1078 static void __exit exit_misc_binfmt(void)
1079 {
1080 unregister_binfmt(&misc_format);
1081 unregister_filesystem(&bm_fs_type);
1082 }
1083
1084 core_initcall(init_misc_binfmt);
1085 module_exit(exit_misc_binfmt);
1086 MODULE_DESCRIPTION("Kernel support for miscellaneous binaries");
1087 MODULE_LICENSE("GPL");
1088