1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/kernel/sys.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/export.h> 9 #include <linux/mm.h> 10 #include <linux/mm_inline.h> 11 #include <linux/utsname.h> 12 #include <linux/mman.h> 13 #include <linux/reboot.h> 14 #include <linux/prctl.h> 15 #include <linux/highuid.h> 16 #include <linux/fs.h> 17 #include <linux/kmod.h> 18 #include <linux/ksm.h> 19 #include <linux/perf_event.h> 20 #include <linux/resource.h> 21 #include <linux/kernel.h> 22 #include <linux/workqueue.h> 23 #include <linux/capability.h> 24 #include <linux/device.h> 25 #include <linux/key.h> 26 #include <linux/times.h> 27 #include <linux/posix-timers.h> 28 #include <linux/security.h> 29 #include <linux/random.h> 30 #include <linux/suspend.h> 31 #include <linux/tty.h> 32 #include <linux/signal.h> 33 #include <linux/cn_proc.h> 34 #include <linux/getcpu.h> 35 #include <linux/task_io_accounting_ops.h> 36 #include <linux/seccomp.h> 37 #include <linux/cpu.h> 38 #include <linux/personality.h> 39 #include <linux/ptrace.h> 40 #include <linux/fs_struct.h> 41 #include <linux/file.h> 42 #include <linux/mount.h> 43 #include <linux/gfp.h> 44 #include <linux/syscore_ops.h> 45 #include <linux/version.h> 46 #include <linux/ctype.h> 47 #include <linux/syscall_user_dispatch.h> 48 49 #include <linux/compat.h> 50 #include <linux/syscalls.h> 51 #include <linux/kprobes.h> 52 #include <linux/user_namespace.h> 53 #include <linux/time_namespace.h> 54 #include <linux/binfmts.h> 55 #include <linux/futex.h> 56 57 #include <linux/sched.h> 58 #include <linux/sched/autogroup.h> 59 #include <linux/sched/loadavg.h> 60 #include <linux/sched/stat.h> 61 #include <linux/sched/mm.h> 62 #include <linux/sched/coredump.h> 63 #include <linux/sched/task.h> 64 #include <linux/sched/cputime.h> 65 #include <linux/rcupdate.h> 66 #include <linux/uidgid.h> 67 #include <linux/cred.h> 68 69 #include <linux/nospec.h> 70 71 #include <linux/kmsg_dump.h> 72 /* Move somewhere else to avoid recompiling? */ 73 #include <generated/utsrelease.h> 74 75 #include <linux/uaccess.h> 76 #include <asm/io.h> 77 #include <asm/unistd.h> 78 79 #include <trace/events/task.h> 80 81 #include "uid16.h" 82 83 #ifndef SET_UNALIGN_CTL 84 # define SET_UNALIGN_CTL(a, b) (-EINVAL) 85 #endif 86 #ifndef GET_UNALIGN_CTL 87 # define GET_UNALIGN_CTL(a, b) (-EINVAL) 88 #endif 89 #ifndef SET_FPEMU_CTL 90 # define SET_FPEMU_CTL(a, b) (-EINVAL) 91 #endif 92 #ifndef GET_FPEMU_CTL 93 # define GET_FPEMU_CTL(a, b) (-EINVAL) 94 #endif 95 #ifndef SET_FPEXC_CTL 96 # define SET_FPEXC_CTL(a, b) (-EINVAL) 97 #endif 98 #ifndef GET_FPEXC_CTL 99 # define GET_FPEXC_CTL(a, b) (-EINVAL) 100 #endif 101 #ifndef GET_ENDIAN 102 # define GET_ENDIAN(a, b) (-EINVAL) 103 #endif 104 #ifndef SET_ENDIAN 105 # define SET_ENDIAN(a, b) (-EINVAL) 106 #endif 107 #ifndef GET_TSC_CTL 108 # define GET_TSC_CTL(a) (-EINVAL) 109 #endif 110 #ifndef SET_TSC_CTL 111 # define SET_TSC_CTL(a) (-EINVAL) 112 #endif 113 #ifndef GET_FP_MODE 114 # define GET_FP_MODE(a) (-EINVAL) 115 #endif 116 #ifndef SET_FP_MODE 117 # define SET_FP_MODE(a,b) (-EINVAL) 118 #endif 119 #ifndef SVE_SET_VL 120 # define SVE_SET_VL(a) (-EINVAL) 121 #endif 122 #ifndef SVE_GET_VL 123 # define SVE_GET_VL() (-EINVAL) 124 #endif 125 #ifndef SME_SET_VL 126 # define SME_SET_VL(a) (-EINVAL) 127 #endif 128 #ifndef SME_GET_VL 129 # define SME_GET_VL() (-EINVAL) 130 #endif 131 #ifndef PAC_RESET_KEYS 132 # define PAC_RESET_KEYS(a, b) (-EINVAL) 133 #endif 134 #ifndef PAC_SET_ENABLED_KEYS 135 # define PAC_SET_ENABLED_KEYS(a, b, c) (-EINVAL) 136 #endif 137 #ifndef PAC_GET_ENABLED_KEYS 138 # define PAC_GET_ENABLED_KEYS(a) (-EINVAL) 139 #endif 140 #ifndef SET_TAGGED_ADDR_CTRL 141 # define SET_TAGGED_ADDR_CTRL(a) (-EINVAL) 142 #endif 143 #ifndef GET_TAGGED_ADDR_CTRL 144 # define GET_TAGGED_ADDR_CTRL() (-EINVAL) 145 #endif 146 #ifndef RISCV_V_SET_CONTROL 147 # define RISCV_V_SET_CONTROL(a) (-EINVAL) 148 #endif 149 #ifndef RISCV_V_GET_CONTROL 150 # define RISCV_V_GET_CONTROL() (-EINVAL) 151 #endif 152 #ifndef RISCV_SET_ICACHE_FLUSH_CTX 153 # define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL) 154 #endif 155 #ifndef PPC_GET_DEXCR_ASPECT 156 # define PPC_GET_DEXCR_ASPECT(a, b) (-EINVAL) 157 #endif 158 #ifndef PPC_SET_DEXCR_ASPECT 159 # define PPC_SET_DEXCR_ASPECT(a, b, c) (-EINVAL) 160 #endif 161 162 /* 163 * this is where the system-wide overflow UID and GID are defined, for 164 * architectures that now have 32-bit UID/GID but didn't in the past 165 */ 166 167 int overflowuid = DEFAULT_OVERFLOWUID; 168 int overflowgid = DEFAULT_OVERFLOWGID; 169 170 EXPORT_SYMBOL(overflowuid); 171 EXPORT_SYMBOL(overflowgid); 172 173 /* 174 * the same as above, but for filesystems which can only store a 16-bit 175 * UID and GID. as such, this is needed on all architectures 176 */ 177 178 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 179 int fs_overflowgid = DEFAULT_FS_OVERFLOWGID; 180 181 EXPORT_SYMBOL(fs_overflowuid); 182 EXPORT_SYMBOL(fs_overflowgid); 183 184 static const struct ctl_table overflow_sysctl_table[] = { 185 { 186 .procname = "overflowuid", 187 .data = &overflowuid, 188 .maxlen = sizeof(int), 189 .mode = 0644, 190 .proc_handler = proc_dointvec_minmax, 191 .extra1 = SYSCTL_ZERO, 192 .extra2 = SYSCTL_MAXOLDUID, 193 }, 194 { 195 .procname = "overflowgid", 196 .data = &overflowgid, 197 .maxlen = sizeof(int), 198 .mode = 0644, 199 .proc_handler = proc_dointvec_minmax, 200 .extra1 = SYSCTL_ZERO, 201 .extra2 = SYSCTL_MAXOLDUID, 202 }, 203 }; 204 205 static int __init init_overflow_sysctl(void) 206 { 207 register_sysctl_init("kernel", overflow_sysctl_table); 208 return 0; 209 } 210 211 postcore_initcall(init_overflow_sysctl); 212 213 /* 214 * Returns true if current's euid is same as p's uid or euid, 215 * or has CAP_SYS_NICE to p's user_ns. 216 * 217 * Called with rcu_read_lock, creds are safe 218 */ 219 static bool set_one_prio_perm(struct task_struct *p) 220 { 221 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 222 223 if (uid_eq(pcred->uid, cred->euid) || 224 uid_eq(pcred->euid, cred->euid)) 225 return true; 226 if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) 227 return true; 228 return false; 229 } 230 231 /* 232 * set the priority of a task 233 * - the caller must hold the RCU read lock 234 */ 235 static int set_one_prio(struct task_struct *p, int niceval, int error) 236 { 237 int no_nice; 238 239 if (!set_one_prio_perm(p)) { 240 error = -EPERM; 241 goto out; 242 } 243 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 244 error = -EACCES; 245 goto out; 246 } 247 no_nice = security_task_setnice(p, niceval); 248 if (no_nice) { 249 error = no_nice; 250 goto out; 251 } 252 if (error == -ESRCH) 253 error = 0; 254 set_user_nice(p, niceval); 255 out: 256 return error; 257 } 258 259 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 260 { 261 struct task_struct *g, *p; 262 struct user_struct *user; 263 const struct cred *cred = current_cred(); 264 int error = -EINVAL; 265 struct pid *pgrp; 266 kuid_t uid; 267 268 if (which > PRIO_USER || which < PRIO_PROCESS) 269 goto out; 270 271 /* normalize: avoid signed division (rounding problems) */ 272 error = -ESRCH; 273 if (niceval < MIN_NICE) 274 niceval = MIN_NICE; 275 if (niceval > MAX_NICE) 276 niceval = MAX_NICE; 277 278 rcu_read_lock(); 279 switch (which) { 280 case PRIO_PROCESS: 281 if (who) 282 p = find_task_by_vpid(who); 283 else 284 p = current; 285 if (p) 286 error = set_one_prio(p, niceval, error); 287 break; 288 case PRIO_PGRP: 289 if (who) 290 pgrp = find_vpid(who); 291 else 292 pgrp = task_pgrp(current); 293 read_lock(&tasklist_lock); 294 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 295 error = set_one_prio(p, niceval, error); 296 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 297 read_unlock(&tasklist_lock); 298 break; 299 case PRIO_USER: 300 uid = make_kuid(cred->user_ns, who); 301 user = cred->user; 302 if (!who) 303 uid = cred->uid; 304 else if (!uid_eq(uid, cred->uid)) { 305 user = find_user(uid); 306 if (!user) 307 goto out_unlock; /* No processes for this user */ 308 } 309 for_each_process_thread(g, p) { 310 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) 311 error = set_one_prio(p, niceval, error); 312 } 313 if (!uid_eq(uid, cred->uid)) 314 free_uid(user); /* For find_user() */ 315 break; 316 } 317 out_unlock: 318 rcu_read_unlock(); 319 out: 320 return error; 321 } 322 323 /* 324 * Ugh. To avoid negative return values, "getpriority()" will 325 * not return the normal nice-value, but a negated value that 326 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 327 * to stay compatible. 328 */ 329 SYSCALL_DEFINE2(getpriority, int, which, int, who) 330 { 331 struct task_struct *g, *p; 332 struct user_struct *user; 333 const struct cred *cred = current_cred(); 334 long niceval, retval = -ESRCH; 335 struct pid *pgrp; 336 kuid_t uid; 337 338 if (which > PRIO_USER || which < PRIO_PROCESS) 339 return -EINVAL; 340 341 rcu_read_lock(); 342 switch (which) { 343 case PRIO_PROCESS: 344 if (who) 345 p = find_task_by_vpid(who); 346 else 347 p = current; 348 if (p) { 349 niceval = nice_to_rlimit(task_nice(p)); 350 if (niceval > retval) 351 retval = niceval; 352 } 353 break; 354 case PRIO_PGRP: 355 if (who) 356 pgrp = find_vpid(who); 357 else 358 pgrp = task_pgrp(current); 359 read_lock(&tasklist_lock); 360 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 361 niceval = nice_to_rlimit(task_nice(p)); 362 if (niceval > retval) 363 retval = niceval; 364 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 365 read_unlock(&tasklist_lock); 366 break; 367 case PRIO_USER: 368 uid = make_kuid(cred->user_ns, who); 369 user = cred->user; 370 if (!who) 371 uid = cred->uid; 372 else if (!uid_eq(uid, cred->uid)) { 373 user = find_user(uid); 374 if (!user) 375 goto out_unlock; /* No processes for this user */ 376 } 377 for_each_process_thread(g, p) { 378 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) { 379 niceval = nice_to_rlimit(task_nice(p)); 380 if (niceval > retval) 381 retval = niceval; 382 } 383 } 384 if (!uid_eq(uid, cred->uid)) 385 free_uid(user); /* for find_user() */ 386 break; 387 } 388 out_unlock: 389 rcu_read_unlock(); 390 391 return retval; 392 } 393 394 /* 395 * Unprivileged users may change the real gid to the effective gid 396 * or vice versa. (BSD-style) 397 * 398 * If you set the real gid at all, or set the effective gid to a value not 399 * equal to the real gid, then the saved gid is set to the new effective gid. 400 * 401 * This makes it possible for a setgid program to completely drop its 402 * privileges, which is often a useful assertion to make when you are doing 403 * a security audit over a program. 404 * 405 * The general idea is that a program which uses just setregid() will be 406 * 100% compatible with BSD. A program which uses just setgid() will be 407 * 100% compatible with POSIX with saved IDs. 408 * 409 * SMP: There are not races, the GIDs are checked only by filesystem 410 * operations (as far as semantic preservation is concerned). 411 */ 412 #ifdef CONFIG_MULTIUSER 413 long __sys_setregid(gid_t rgid, gid_t egid) 414 { 415 struct user_namespace *ns = current_user_ns(); 416 const struct cred *old; 417 struct cred *new; 418 int retval; 419 kgid_t krgid, kegid; 420 421 krgid = make_kgid(ns, rgid); 422 kegid = make_kgid(ns, egid); 423 424 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 425 return -EINVAL; 426 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 427 return -EINVAL; 428 429 new = prepare_creds(); 430 if (!new) 431 return -ENOMEM; 432 old = current_cred(); 433 434 retval = -EPERM; 435 if (rgid != (gid_t) -1) { 436 if (gid_eq(old->gid, krgid) || 437 gid_eq(old->egid, krgid) || 438 ns_capable_setid(old->user_ns, CAP_SETGID)) 439 new->gid = krgid; 440 else 441 goto error; 442 } 443 if (egid != (gid_t) -1) { 444 if (gid_eq(old->gid, kegid) || 445 gid_eq(old->egid, kegid) || 446 gid_eq(old->sgid, kegid) || 447 ns_capable_setid(old->user_ns, CAP_SETGID)) 448 new->egid = kegid; 449 else 450 goto error; 451 } 452 453 if (rgid != (gid_t) -1 || 454 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) 455 new->sgid = new->egid; 456 new->fsgid = new->egid; 457 458 retval = security_task_fix_setgid(new, old, LSM_SETID_RE); 459 if (retval < 0) 460 goto error; 461 462 return commit_creds(new); 463 464 error: 465 abort_creds(new); 466 return retval; 467 } 468 469 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 470 { 471 return __sys_setregid(rgid, egid); 472 } 473 474 /* 475 * setgid() is implemented like SysV w/ SAVED_IDS 476 * 477 * SMP: Same implicit races as above. 478 */ 479 long __sys_setgid(gid_t gid) 480 { 481 struct user_namespace *ns = current_user_ns(); 482 const struct cred *old; 483 struct cred *new; 484 int retval; 485 kgid_t kgid; 486 487 kgid = make_kgid(ns, gid); 488 if (!gid_valid(kgid)) 489 return -EINVAL; 490 491 new = prepare_creds(); 492 if (!new) 493 return -ENOMEM; 494 old = current_cred(); 495 496 retval = -EPERM; 497 if (ns_capable_setid(old->user_ns, CAP_SETGID)) 498 new->gid = new->egid = new->sgid = new->fsgid = kgid; 499 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 500 new->egid = new->fsgid = kgid; 501 else 502 goto error; 503 504 retval = security_task_fix_setgid(new, old, LSM_SETID_ID); 505 if (retval < 0) 506 goto error; 507 508 return commit_creds(new); 509 510 error: 511 abort_creds(new); 512 return retval; 513 } 514 515 SYSCALL_DEFINE1(setgid, gid_t, gid) 516 { 517 return __sys_setgid(gid); 518 } 519 520 /* 521 * change the user struct in a credentials set to match the new UID 522 */ 523 static int set_user(struct cred *new) 524 { 525 struct user_struct *new_user; 526 527 new_user = alloc_uid(new->uid); 528 if (!new_user) 529 return -EAGAIN; 530 531 free_uid(new->user); 532 new->user = new_user; 533 return 0; 534 } 535 536 static void flag_nproc_exceeded(struct cred *new) 537 { 538 if (new->ucounts == current_ucounts()) 539 return; 540 541 /* 542 * We don't fail in case of NPROC limit excess here because too many 543 * poorly written programs don't check set*uid() return code, assuming 544 * it never fails if called by root. We may still enforce NPROC limit 545 * for programs doing set*uid()+execve() by harmlessly deferring the 546 * failure to the execve() stage. 547 */ 548 if (is_rlimit_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && 549 new->user != INIT_USER) 550 current->flags |= PF_NPROC_EXCEEDED; 551 else 552 current->flags &= ~PF_NPROC_EXCEEDED; 553 } 554 555 /* 556 * Unprivileged users may change the real uid to the effective uid 557 * or vice versa. (BSD-style) 558 * 559 * If you set the real uid at all, or set the effective uid to a value not 560 * equal to the real uid, then the saved uid is set to the new effective uid. 561 * 562 * This makes it possible for a setuid program to completely drop its 563 * privileges, which is often a useful assertion to make when you are doing 564 * a security audit over a program. 565 * 566 * The general idea is that a program which uses just setreuid() will be 567 * 100% compatible with BSD. A program which uses just setuid() will be 568 * 100% compatible with POSIX with saved IDs. 569 */ 570 long __sys_setreuid(uid_t ruid, uid_t euid) 571 { 572 struct user_namespace *ns = current_user_ns(); 573 const struct cred *old; 574 struct cred *new; 575 int retval; 576 kuid_t kruid, keuid; 577 578 kruid = make_kuid(ns, ruid); 579 keuid = make_kuid(ns, euid); 580 581 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 582 return -EINVAL; 583 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 584 return -EINVAL; 585 586 new = prepare_creds(); 587 if (!new) 588 return -ENOMEM; 589 old = current_cred(); 590 591 retval = -EPERM; 592 if (ruid != (uid_t) -1) { 593 new->uid = kruid; 594 if (!uid_eq(old->uid, kruid) && 595 !uid_eq(old->euid, kruid) && 596 !ns_capable_setid(old->user_ns, CAP_SETUID)) 597 goto error; 598 } 599 600 if (euid != (uid_t) -1) { 601 new->euid = keuid; 602 if (!uid_eq(old->uid, keuid) && 603 !uid_eq(old->euid, keuid) && 604 !uid_eq(old->suid, keuid) && 605 !ns_capable_setid(old->user_ns, CAP_SETUID)) 606 goto error; 607 } 608 609 if (!uid_eq(new->uid, old->uid)) { 610 retval = set_user(new); 611 if (retval < 0) 612 goto error; 613 } 614 if (ruid != (uid_t) -1 || 615 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) 616 new->suid = new->euid; 617 new->fsuid = new->euid; 618 619 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 620 if (retval < 0) 621 goto error; 622 623 retval = set_cred_ucounts(new); 624 if (retval < 0) 625 goto error; 626 627 flag_nproc_exceeded(new); 628 return commit_creds(new); 629 630 error: 631 abort_creds(new); 632 return retval; 633 } 634 635 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 636 { 637 return __sys_setreuid(ruid, euid); 638 } 639 640 /* 641 * setuid() is implemented like SysV with SAVED_IDS 642 * 643 * Note that SAVED_ID's is deficient in that a setuid root program 644 * like sendmail, for example, cannot set its uid to be a normal 645 * user and then switch back, because if you're root, setuid() sets 646 * the saved uid too. If you don't like this, blame the bright people 647 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 648 * will allow a root program to temporarily drop privileges and be able to 649 * regain them by swapping the real and effective uid. 650 */ 651 long __sys_setuid(uid_t uid) 652 { 653 struct user_namespace *ns = current_user_ns(); 654 const struct cred *old; 655 struct cred *new; 656 int retval; 657 kuid_t kuid; 658 659 kuid = make_kuid(ns, uid); 660 if (!uid_valid(kuid)) 661 return -EINVAL; 662 663 new = prepare_creds(); 664 if (!new) 665 return -ENOMEM; 666 old = current_cred(); 667 668 retval = -EPERM; 669 if (ns_capable_setid(old->user_ns, CAP_SETUID)) { 670 new->suid = new->uid = kuid; 671 if (!uid_eq(kuid, old->uid)) { 672 retval = set_user(new); 673 if (retval < 0) 674 goto error; 675 } 676 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { 677 goto error; 678 } 679 680 new->fsuid = new->euid = kuid; 681 682 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 683 if (retval < 0) 684 goto error; 685 686 retval = set_cred_ucounts(new); 687 if (retval < 0) 688 goto error; 689 690 flag_nproc_exceeded(new); 691 return commit_creds(new); 692 693 error: 694 abort_creds(new); 695 return retval; 696 } 697 698 SYSCALL_DEFINE1(setuid, uid_t, uid) 699 { 700 return __sys_setuid(uid); 701 } 702 703 704 /* 705 * This function implements a generic ability to update ruid, euid, 706 * and suid. This allows you to implement the 4.4 compatible seteuid(). 707 */ 708 long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) 709 { 710 struct user_namespace *ns = current_user_ns(); 711 const struct cred *old; 712 struct cred *new; 713 int retval; 714 kuid_t kruid, keuid, ksuid; 715 bool ruid_new, euid_new, suid_new; 716 717 kruid = make_kuid(ns, ruid); 718 keuid = make_kuid(ns, euid); 719 ksuid = make_kuid(ns, suid); 720 721 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 722 return -EINVAL; 723 724 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 725 return -EINVAL; 726 727 if ((suid != (uid_t) -1) && !uid_valid(ksuid)) 728 return -EINVAL; 729 730 old = current_cred(); 731 732 /* check for no-op */ 733 if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) && 734 (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) && 735 uid_eq(keuid, old->fsuid))) && 736 (suid == (uid_t) -1 || uid_eq(ksuid, old->suid))) 737 return 0; 738 739 ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 740 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid); 741 euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && 742 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid); 743 suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && 744 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid); 745 if ((ruid_new || euid_new || suid_new) && 746 !ns_capable_setid(old->user_ns, CAP_SETUID)) 747 return -EPERM; 748 749 new = prepare_creds(); 750 if (!new) 751 return -ENOMEM; 752 753 if (ruid != (uid_t) -1) { 754 new->uid = kruid; 755 if (!uid_eq(kruid, old->uid)) { 756 retval = set_user(new); 757 if (retval < 0) 758 goto error; 759 } 760 } 761 if (euid != (uid_t) -1) 762 new->euid = keuid; 763 if (suid != (uid_t) -1) 764 new->suid = ksuid; 765 new->fsuid = new->euid; 766 767 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 768 if (retval < 0) 769 goto error; 770 771 retval = set_cred_ucounts(new); 772 if (retval < 0) 773 goto error; 774 775 flag_nproc_exceeded(new); 776 return commit_creds(new); 777 778 error: 779 abort_creds(new); 780 return retval; 781 } 782 783 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 784 { 785 return __sys_setresuid(ruid, euid, suid); 786 } 787 788 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) 789 { 790 const struct cred *cred = current_cred(); 791 int retval; 792 uid_t ruid, euid, suid; 793 794 ruid = from_kuid_munged(cred->user_ns, cred->uid); 795 euid = from_kuid_munged(cred->user_ns, cred->euid); 796 suid = from_kuid_munged(cred->user_ns, cred->suid); 797 798 retval = put_user(ruid, ruidp); 799 if (!retval) { 800 retval = put_user(euid, euidp); 801 if (!retval) 802 return put_user(suid, suidp); 803 } 804 return retval; 805 } 806 807 /* 808 * Same as above, but for rgid, egid, sgid. 809 */ 810 long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) 811 { 812 struct user_namespace *ns = current_user_ns(); 813 const struct cred *old; 814 struct cred *new; 815 int retval; 816 kgid_t krgid, kegid, ksgid; 817 bool rgid_new, egid_new, sgid_new; 818 819 krgid = make_kgid(ns, rgid); 820 kegid = make_kgid(ns, egid); 821 ksgid = make_kgid(ns, sgid); 822 823 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 824 return -EINVAL; 825 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 826 return -EINVAL; 827 if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) 828 return -EINVAL; 829 830 old = current_cred(); 831 832 /* check for no-op */ 833 if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) && 834 (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) && 835 gid_eq(kegid, old->fsgid))) && 836 (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid))) 837 return 0; 838 839 rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 840 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid); 841 egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && 842 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid); 843 sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && 844 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid); 845 if ((rgid_new || egid_new || sgid_new) && 846 !ns_capable_setid(old->user_ns, CAP_SETGID)) 847 return -EPERM; 848 849 new = prepare_creds(); 850 if (!new) 851 return -ENOMEM; 852 853 if (rgid != (gid_t) -1) 854 new->gid = krgid; 855 if (egid != (gid_t) -1) 856 new->egid = kegid; 857 if (sgid != (gid_t) -1) 858 new->sgid = ksgid; 859 new->fsgid = new->egid; 860 861 retval = security_task_fix_setgid(new, old, LSM_SETID_RES); 862 if (retval < 0) 863 goto error; 864 865 return commit_creds(new); 866 867 error: 868 abort_creds(new); 869 return retval; 870 } 871 872 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 873 { 874 return __sys_setresgid(rgid, egid, sgid); 875 } 876 877 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) 878 { 879 const struct cred *cred = current_cred(); 880 int retval; 881 gid_t rgid, egid, sgid; 882 883 rgid = from_kgid_munged(cred->user_ns, cred->gid); 884 egid = from_kgid_munged(cred->user_ns, cred->egid); 885 sgid = from_kgid_munged(cred->user_ns, cred->sgid); 886 887 retval = put_user(rgid, rgidp); 888 if (!retval) { 889 retval = put_user(egid, egidp); 890 if (!retval) 891 retval = put_user(sgid, sgidp); 892 } 893 894 return retval; 895 } 896 897 898 /* 899 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 900 * is used for "access()" and for the NFS daemon (letting nfsd stay at 901 * whatever uid it wants to). It normally shadows "euid", except when 902 * explicitly set by setfsuid() or for access.. 903 */ 904 long __sys_setfsuid(uid_t uid) 905 { 906 const struct cred *old; 907 struct cred *new; 908 uid_t old_fsuid; 909 kuid_t kuid; 910 911 old = current_cred(); 912 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); 913 914 kuid = make_kuid(old->user_ns, uid); 915 if (!uid_valid(kuid)) 916 return old_fsuid; 917 918 new = prepare_creds(); 919 if (!new) 920 return old_fsuid; 921 922 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 923 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 924 ns_capable_setid(old->user_ns, CAP_SETUID)) { 925 if (!uid_eq(kuid, old->fsuid)) { 926 new->fsuid = kuid; 927 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 928 goto change_okay; 929 } 930 } 931 932 abort_creds(new); 933 return old_fsuid; 934 935 change_okay: 936 commit_creds(new); 937 return old_fsuid; 938 } 939 940 SYSCALL_DEFINE1(setfsuid, uid_t, uid) 941 { 942 return __sys_setfsuid(uid); 943 } 944 945 /* 946 * Samma på svenska.. 947 */ 948 long __sys_setfsgid(gid_t gid) 949 { 950 const struct cred *old; 951 struct cred *new; 952 gid_t old_fsgid; 953 kgid_t kgid; 954 955 old = current_cred(); 956 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); 957 958 kgid = make_kgid(old->user_ns, gid); 959 if (!gid_valid(kgid)) 960 return old_fsgid; 961 962 new = prepare_creds(); 963 if (!new) 964 return old_fsgid; 965 966 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 967 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 968 ns_capable_setid(old->user_ns, CAP_SETGID)) { 969 if (!gid_eq(kgid, old->fsgid)) { 970 new->fsgid = kgid; 971 if (security_task_fix_setgid(new,old,LSM_SETID_FS) == 0) 972 goto change_okay; 973 } 974 } 975 976 abort_creds(new); 977 return old_fsgid; 978 979 change_okay: 980 commit_creds(new); 981 return old_fsgid; 982 } 983 984 SYSCALL_DEFINE1(setfsgid, gid_t, gid) 985 { 986 return __sys_setfsgid(gid); 987 } 988 #endif /* CONFIG_MULTIUSER */ 989 990 /** 991 * sys_getpid - return the thread group id of the current process 992 * 993 * Note, despite the name, this returns the tgid not the pid. The tgid and 994 * the pid are identical unless CLONE_THREAD was specified on clone() in 995 * which case the tgid is the same in all threads of the same group. 996 * 997 * This is SMP safe as current->tgid does not change. 998 */ 999 SYSCALL_DEFINE0(getpid) 1000 { 1001 return task_tgid_vnr(current); 1002 } 1003 1004 /* Thread ID - the internal kernel "pid" */ 1005 SYSCALL_DEFINE0(gettid) 1006 { 1007 return task_pid_vnr(current); 1008 } 1009 1010 /* 1011 * Accessing ->real_parent is not SMP-safe, it could 1012 * change from under us. However, we can use a stale 1013 * value of ->real_parent under rcu_read_lock(), see 1014 * release_task()->call_rcu(delayed_put_task_struct). 1015 */ 1016 SYSCALL_DEFINE0(getppid) 1017 { 1018 int pid; 1019 1020 rcu_read_lock(); 1021 pid = task_tgid_vnr(rcu_dereference(current->real_parent)); 1022 rcu_read_unlock(); 1023 1024 return pid; 1025 } 1026 1027 SYSCALL_DEFINE0(getuid) 1028 { 1029 /* Only we change this so SMP safe */ 1030 return from_kuid_munged(current_user_ns(), current_uid()); 1031 } 1032 1033 SYSCALL_DEFINE0(geteuid) 1034 { 1035 /* Only we change this so SMP safe */ 1036 return from_kuid_munged(current_user_ns(), current_euid()); 1037 } 1038 1039 SYSCALL_DEFINE0(getgid) 1040 { 1041 /* Only we change this so SMP safe */ 1042 return from_kgid_munged(current_user_ns(), current_gid()); 1043 } 1044 1045 SYSCALL_DEFINE0(getegid) 1046 { 1047 /* Only we change this so SMP safe */ 1048 return from_kgid_munged(current_user_ns(), current_egid()); 1049 } 1050 1051 static void do_sys_times(struct tms *tms) 1052 { 1053 u64 tgutime, tgstime, cutime, cstime; 1054 1055 thread_group_cputime_adjusted(current, &tgutime, &tgstime); 1056 cutime = current->signal->cutime; 1057 cstime = current->signal->cstime; 1058 tms->tms_utime = nsec_to_clock_t(tgutime); 1059 tms->tms_stime = nsec_to_clock_t(tgstime); 1060 tms->tms_cutime = nsec_to_clock_t(cutime); 1061 tms->tms_cstime = nsec_to_clock_t(cstime); 1062 } 1063 1064 SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 1065 { 1066 if (tbuf) { 1067 struct tms tmp; 1068 1069 do_sys_times(&tmp); 1070 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1071 return -EFAULT; 1072 } 1073 force_successful_syscall_return(); 1074 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1075 } 1076 1077 #ifdef CONFIG_COMPAT 1078 static compat_clock_t clock_t_to_compat_clock_t(clock_t x) 1079 { 1080 return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); 1081 } 1082 1083 COMPAT_SYSCALL_DEFINE1(times, struct compat_tms __user *, tbuf) 1084 { 1085 if (tbuf) { 1086 struct tms tms; 1087 struct compat_tms tmp; 1088 1089 do_sys_times(&tms); 1090 /* Convert our struct tms to the compat version. */ 1091 tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime); 1092 tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime); 1093 tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime); 1094 tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime); 1095 if (copy_to_user(tbuf, &tmp, sizeof(tmp))) 1096 return -EFAULT; 1097 } 1098 force_successful_syscall_return(); 1099 return compat_jiffies_to_clock_t(jiffies); 1100 } 1101 #endif 1102 1103 /* 1104 * This needs some heavy checking ... 1105 * I just haven't the stomach for it. I also don't fully 1106 * understand sessions/pgrp etc. Let somebody who does explain it. 1107 * 1108 * OK, I think I have the protection semantics right.... this is really 1109 * only important on a multi-user system anyway, to make sure one user 1110 * can't send a signal to a process owned by another. -TYT, 12/12/91 1111 * 1112 * !PF_FORKNOEXEC check to conform completely to POSIX. 1113 */ 1114 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 1115 { 1116 struct task_struct *p; 1117 struct task_struct *group_leader = current->group_leader; 1118 struct pid *pids[PIDTYPE_MAX] = { 0 }; 1119 struct pid *pgrp; 1120 int err; 1121 1122 if (!pid) 1123 pid = task_pid_vnr(group_leader); 1124 if (!pgid) 1125 pgid = pid; 1126 if (pgid < 0) 1127 return -EINVAL; 1128 rcu_read_lock(); 1129 1130 /* From this point forward we keep holding onto the tasklist lock 1131 * so that our parent does not change from under us. -DaveM 1132 */ 1133 write_lock_irq(&tasklist_lock); 1134 1135 err = -ESRCH; 1136 p = find_task_by_vpid(pid); 1137 if (!p) 1138 goto out; 1139 1140 err = -EINVAL; 1141 if (!thread_group_leader(p)) 1142 goto out; 1143 1144 if (same_thread_group(p->real_parent, group_leader)) { 1145 err = -EPERM; 1146 if (task_session(p) != task_session(group_leader)) 1147 goto out; 1148 err = -EACCES; 1149 if (!(p->flags & PF_FORKNOEXEC)) 1150 goto out; 1151 } else { 1152 err = -ESRCH; 1153 if (p != group_leader) 1154 goto out; 1155 } 1156 1157 err = -EPERM; 1158 if (p->signal->leader) 1159 goto out; 1160 1161 pgrp = task_pid(p); 1162 if (pgid != pid) { 1163 struct task_struct *g; 1164 1165 pgrp = find_vpid(pgid); 1166 g = pid_task(pgrp, PIDTYPE_PGID); 1167 if (!g || task_session(g) != task_session(group_leader)) 1168 goto out; 1169 } 1170 1171 err = security_task_setpgid(p, pgid); 1172 if (err) 1173 goto out; 1174 1175 if (task_pgrp(p) != pgrp) 1176 change_pid(pids, p, PIDTYPE_PGID, pgrp); 1177 1178 err = 0; 1179 out: 1180 /* All paths lead to here, thus we are safe. -DaveM */ 1181 write_unlock_irq(&tasklist_lock); 1182 rcu_read_unlock(); 1183 free_pids(pids); 1184 return err; 1185 } 1186 1187 static int do_getpgid(pid_t pid) 1188 { 1189 struct task_struct *p; 1190 struct pid *grp; 1191 int retval; 1192 1193 rcu_read_lock(); 1194 if (!pid) 1195 grp = task_pgrp(current); 1196 else { 1197 retval = -ESRCH; 1198 p = find_task_by_vpid(pid); 1199 if (!p) 1200 goto out; 1201 grp = task_pgrp(p); 1202 if (!grp) 1203 goto out; 1204 1205 retval = security_task_getpgid(p); 1206 if (retval) 1207 goto out; 1208 } 1209 retval = pid_vnr(grp); 1210 out: 1211 rcu_read_unlock(); 1212 return retval; 1213 } 1214 1215 SYSCALL_DEFINE1(getpgid, pid_t, pid) 1216 { 1217 return do_getpgid(pid); 1218 } 1219 1220 #ifdef __ARCH_WANT_SYS_GETPGRP 1221 1222 SYSCALL_DEFINE0(getpgrp) 1223 { 1224 return do_getpgid(0); 1225 } 1226 1227 #endif 1228 1229 SYSCALL_DEFINE1(getsid, pid_t, pid) 1230 { 1231 struct task_struct *p; 1232 struct pid *sid; 1233 int retval; 1234 1235 rcu_read_lock(); 1236 if (!pid) 1237 sid = task_session(current); 1238 else { 1239 retval = -ESRCH; 1240 p = find_task_by_vpid(pid); 1241 if (!p) 1242 goto out; 1243 sid = task_session(p); 1244 if (!sid) 1245 goto out; 1246 1247 retval = security_task_getsid(p); 1248 if (retval) 1249 goto out; 1250 } 1251 retval = pid_vnr(sid); 1252 out: 1253 rcu_read_unlock(); 1254 return retval; 1255 } 1256 1257 static void set_special_pids(struct pid **pids, struct pid *pid) 1258 { 1259 struct task_struct *curr = current->group_leader; 1260 1261 if (task_session(curr) != pid) 1262 change_pid(pids, curr, PIDTYPE_SID, pid); 1263 1264 if (task_pgrp(curr) != pid) 1265 change_pid(pids, curr, PIDTYPE_PGID, pid); 1266 } 1267 1268 int ksys_setsid(void) 1269 { 1270 struct task_struct *group_leader = current->group_leader; 1271 struct pid *sid = task_pid(group_leader); 1272 struct pid *pids[PIDTYPE_MAX] = { 0 }; 1273 pid_t session = pid_vnr(sid); 1274 int err = -EPERM; 1275 1276 write_lock_irq(&tasklist_lock); 1277 /* Fail if I am already a session leader */ 1278 if (group_leader->signal->leader) 1279 goto out; 1280 1281 /* Fail if a process group id already exists that equals the 1282 * proposed session id. 1283 */ 1284 if (pid_task(sid, PIDTYPE_PGID)) 1285 goto out; 1286 1287 group_leader->signal->leader = 1; 1288 set_special_pids(pids, sid); 1289 1290 proc_clear_tty(group_leader); 1291 1292 err = session; 1293 out: 1294 write_unlock_irq(&tasklist_lock); 1295 free_pids(pids); 1296 if (err > 0) { 1297 proc_sid_connector(group_leader); 1298 sched_autogroup_create_attach(group_leader); 1299 } 1300 return err; 1301 } 1302 1303 SYSCALL_DEFINE0(setsid) 1304 { 1305 return ksys_setsid(); 1306 } 1307 1308 DECLARE_RWSEM(uts_sem); 1309 1310 #ifdef COMPAT_UTS_MACHINE 1311 #define override_architecture(name) \ 1312 (personality(current->personality) == PER_LINUX32 && \ 1313 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1314 sizeof(COMPAT_UTS_MACHINE))) 1315 #else 1316 #define override_architecture(name) 0 1317 #endif 1318 1319 /* 1320 * Work around broken programs that cannot handle "Linux 3.0". 1321 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1322 * And we map 4.x and later versions to 2.6.60+x, so 4.0/5.0/6.0/... would be 1323 * 2.6.60. 1324 */ 1325 static int override_release(char __user *release, size_t len) 1326 { 1327 int ret = 0; 1328 1329 if (current->personality & UNAME26) { 1330 const char *rest = UTS_RELEASE; 1331 char buf[65] = { 0 }; 1332 int ndots = 0; 1333 unsigned v; 1334 size_t copy; 1335 1336 while (*rest) { 1337 if (*rest == '.' && ++ndots >= 3) 1338 break; 1339 if (!isdigit(*rest) && *rest != '.') 1340 break; 1341 rest++; 1342 } 1343 v = LINUX_VERSION_PATCHLEVEL + 60; 1344 copy = clamp_t(size_t, len, 1, sizeof(buf)); 1345 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); 1346 ret = copy_to_user(release, buf, copy + 1); 1347 } 1348 return ret; 1349 } 1350 1351 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1352 { 1353 struct new_utsname tmp; 1354 1355 down_read(&uts_sem); 1356 memcpy(&tmp, utsname(), sizeof(tmp)); 1357 up_read(&uts_sem); 1358 if (copy_to_user(name, &tmp, sizeof(tmp))) 1359 return -EFAULT; 1360 1361 if (override_release(name->release, sizeof(name->release))) 1362 return -EFAULT; 1363 if (override_architecture(name)) 1364 return -EFAULT; 1365 return 0; 1366 } 1367 1368 #ifdef __ARCH_WANT_SYS_OLD_UNAME 1369 /* 1370 * Old cruft 1371 */ 1372 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1373 { 1374 struct old_utsname tmp; 1375 1376 if (!name) 1377 return -EFAULT; 1378 1379 down_read(&uts_sem); 1380 memcpy(&tmp, utsname(), sizeof(tmp)); 1381 up_read(&uts_sem); 1382 if (copy_to_user(name, &tmp, sizeof(tmp))) 1383 return -EFAULT; 1384 1385 if (override_release(name->release, sizeof(name->release))) 1386 return -EFAULT; 1387 if (override_architecture(name)) 1388 return -EFAULT; 1389 return 0; 1390 } 1391 1392 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1393 { 1394 struct oldold_utsname tmp; 1395 1396 if (!name) 1397 return -EFAULT; 1398 1399 memset(&tmp, 0, sizeof(tmp)); 1400 1401 down_read(&uts_sem); 1402 memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN); 1403 memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN); 1404 memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN); 1405 memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN); 1406 memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN); 1407 up_read(&uts_sem); 1408 if (copy_to_user(name, &tmp, sizeof(tmp))) 1409 return -EFAULT; 1410 1411 if (override_architecture(name)) 1412 return -EFAULT; 1413 if (override_release(name->release, sizeof(name->release))) 1414 return -EFAULT; 1415 return 0; 1416 } 1417 #endif 1418 1419 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1420 { 1421 int errno; 1422 char tmp[__NEW_UTS_LEN]; 1423 1424 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1425 return -EPERM; 1426 1427 if (len < 0 || len > __NEW_UTS_LEN) 1428 return -EINVAL; 1429 errno = -EFAULT; 1430 if (!copy_from_user(tmp, name, len)) { 1431 struct new_utsname *u; 1432 1433 add_device_randomness(tmp, len); 1434 down_write(&uts_sem); 1435 u = utsname(); 1436 memcpy(u->nodename, tmp, len); 1437 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1438 errno = 0; 1439 uts_proc_notify(UTS_PROC_HOSTNAME); 1440 up_write(&uts_sem); 1441 } 1442 return errno; 1443 } 1444 1445 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1446 1447 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1448 { 1449 int i; 1450 struct new_utsname *u; 1451 char tmp[__NEW_UTS_LEN + 1]; 1452 1453 if (len < 0) 1454 return -EINVAL; 1455 down_read(&uts_sem); 1456 u = utsname(); 1457 i = 1 + strlen(u->nodename); 1458 if (i > len) 1459 i = len; 1460 memcpy(tmp, u->nodename, i); 1461 up_read(&uts_sem); 1462 if (copy_to_user(name, tmp, i)) 1463 return -EFAULT; 1464 return 0; 1465 } 1466 1467 #endif 1468 1469 /* 1470 * Only setdomainname; getdomainname can be implemented by calling 1471 * uname() 1472 */ 1473 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1474 { 1475 int errno; 1476 char tmp[__NEW_UTS_LEN]; 1477 1478 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1479 return -EPERM; 1480 if (len < 0 || len > __NEW_UTS_LEN) 1481 return -EINVAL; 1482 1483 errno = -EFAULT; 1484 if (!copy_from_user(tmp, name, len)) { 1485 struct new_utsname *u; 1486 1487 add_device_randomness(tmp, len); 1488 down_write(&uts_sem); 1489 u = utsname(); 1490 memcpy(u->domainname, tmp, len); 1491 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1492 errno = 0; 1493 uts_proc_notify(UTS_PROC_DOMAINNAME); 1494 up_write(&uts_sem); 1495 } 1496 return errno; 1497 } 1498 1499 /* make sure you are allowed to change @tsk limits before calling this */ 1500 static int do_prlimit(struct task_struct *tsk, unsigned int resource, 1501 struct rlimit *new_rlim, struct rlimit *old_rlim) 1502 { 1503 struct rlimit *rlim; 1504 int retval = 0; 1505 1506 if (resource >= RLIM_NLIMITS) 1507 return -EINVAL; 1508 resource = array_index_nospec(resource, RLIM_NLIMITS); 1509 1510 if (new_rlim) { 1511 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1512 return -EINVAL; 1513 if (resource == RLIMIT_NOFILE && 1514 new_rlim->rlim_max > sysctl_nr_open) 1515 return -EPERM; 1516 } 1517 1518 /* Holding a refcount on tsk protects tsk->signal from disappearing. */ 1519 rlim = tsk->signal->rlim + resource; 1520 task_lock(tsk->group_leader); 1521 if (new_rlim) { 1522 /* 1523 * Keep the capable check against init_user_ns until cgroups can 1524 * contain all limits. 1525 */ 1526 if (new_rlim->rlim_max > rlim->rlim_max && 1527 !capable(CAP_SYS_RESOURCE)) 1528 retval = -EPERM; 1529 if (!retval) 1530 retval = security_task_setrlimit(tsk, resource, new_rlim); 1531 } 1532 if (!retval) { 1533 if (old_rlim) 1534 *old_rlim = *rlim; 1535 if (new_rlim) 1536 *rlim = *new_rlim; 1537 } 1538 task_unlock(tsk->group_leader); 1539 1540 /* 1541 * RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not 1542 * infinite. In case of RLIM_INFINITY the posix CPU timer code 1543 * ignores the rlimit. 1544 */ 1545 if (!retval && new_rlim && resource == RLIMIT_CPU && 1546 new_rlim->rlim_cur != RLIM_INFINITY && 1547 IS_ENABLED(CONFIG_POSIX_TIMERS)) { 1548 /* 1549 * update_rlimit_cpu can fail if the task is exiting, but there 1550 * may be other tasks in the thread group that are not exiting, 1551 * and they need their cpu timers adjusted. 1552 * 1553 * The group_leader is the last task to be released, so if we 1554 * cannot update_rlimit_cpu on it, then the entire process is 1555 * exiting and we do not need to update at all. 1556 */ 1557 update_rlimit_cpu(tsk->group_leader, new_rlim->rlim_cur); 1558 } 1559 1560 return retval; 1561 } 1562 1563 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1564 { 1565 struct rlimit value; 1566 int ret; 1567 1568 ret = do_prlimit(current, resource, NULL, &value); 1569 if (!ret) 1570 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1571 1572 return ret; 1573 } 1574 1575 #ifdef CONFIG_COMPAT 1576 1577 COMPAT_SYSCALL_DEFINE2(setrlimit, unsigned int, resource, 1578 struct compat_rlimit __user *, rlim) 1579 { 1580 struct rlimit r; 1581 struct compat_rlimit r32; 1582 1583 if (copy_from_user(&r32, rlim, sizeof(struct compat_rlimit))) 1584 return -EFAULT; 1585 1586 if (r32.rlim_cur == COMPAT_RLIM_INFINITY) 1587 r.rlim_cur = RLIM_INFINITY; 1588 else 1589 r.rlim_cur = r32.rlim_cur; 1590 if (r32.rlim_max == COMPAT_RLIM_INFINITY) 1591 r.rlim_max = RLIM_INFINITY; 1592 else 1593 r.rlim_max = r32.rlim_max; 1594 return do_prlimit(current, resource, &r, NULL); 1595 } 1596 1597 COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource, 1598 struct compat_rlimit __user *, rlim) 1599 { 1600 struct rlimit r; 1601 int ret; 1602 1603 ret = do_prlimit(current, resource, NULL, &r); 1604 if (!ret) { 1605 struct compat_rlimit r32; 1606 if (r.rlim_cur > COMPAT_RLIM_INFINITY) 1607 r32.rlim_cur = COMPAT_RLIM_INFINITY; 1608 else 1609 r32.rlim_cur = r.rlim_cur; 1610 if (r.rlim_max > COMPAT_RLIM_INFINITY) 1611 r32.rlim_max = COMPAT_RLIM_INFINITY; 1612 else 1613 r32.rlim_max = r.rlim_max; 1614 1615 if (copy_to_user(rlim, &r32, sizeof(struct compat_rlimit))) 1616 return -EFAULT; 1617 } 1618 return ret; 1619 } 1620 1621 #endif 1622 1623 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1624 1625 /* 1626 * Back compatibility for getrlimit. Needed for some apps. 1627 */ 1628 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1629 struct rlimit __user *, rlim) 1630 { 1631 struct rlimit x; 1632 if (resource >= RLIM_NLIMITS) 1633 return -EINVAL; 1634 1635 resource = array_index_nospec(resource, RLIM_NLIMITS); 1636 task_lock(current->group_leader); 1637 x = current->signal->rlim[resource]; 1638 task_unlock(current->group_leader); 1639 if (x.rlim_cur > 0x7FFFFFFF) 1640 x.rlim_cur = 0x7FFFFFFF; 1641 if (x.rlim_max > 0x7FFFFFFF) 1642 x.rlim_max = 0x7FFFFFFF; 1643 return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0; 1644 } 1645 1646 #ifdef CONFIG_COMPAT 1647 COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1648 struct compat_rlimit __user *, rlim) 1649 { 1650 struct rlimit r; 1651 1652 if (resource >= RLIM_NLIMITS) 1653 return -EINVAL; 1654 1655 resource = array_index_nospec(resource, RLIM_NLIMITS); 1656 task_lock(current->group_leader); 1657 r = current->signal->rlim[resource]; 1658 task_unlock(current->group_leader); 1659 if (r.rlim_cur > 0x7FFFFFFF) 1660 r.rlim_cur = 0x7FFFFFFF; 1661 if (r.rlim_max > 0x7FFFFFFF) 1662 r.rlim_max = 0x7FFFFFFF; 1663 1664 if (put_user(r.rlim_cur, &rlim->rlim_cur) || 1665 put_user(r.rlim_max, &rlim->rlim_max)) 1666 return -EFAULT; 1667 return 0; 1668 } 1669 #endif 1670 1671 #endif 1672 1673 static inline bool rlim64_is_infinity(__u64 rlim64) 1674 { 1675 #if BITS_PER_LONG < 64 1676 return rlim64 >= ULONG_MAX; 1677 #else 1678 return rlim64 == RLIM64_INFINITY; 1679 #endif 1680 } 1681 1682 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1683 { 1684 if (rlim->rlim_cur == RLIM_INFINITY) 1685 rlim64->rlim_cur = RLIM64_INFINITY; 1686 else 1687 rlim64->rlim_cur = rlim->rlim_cur; 1688 if (rlim->rlim_max == RLIM_INFINITY) 1689 rlim64->rlim_max = RLIM64_INFINITY; 1690 else 1691 rlim64->rlim_max = rlim->rlim_max; 1692 } 1693 1694 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1695 { 1696 if (rlim64_is_infinity(rlim64->rlim_cur)) 1697 rlim->rlim_cur = RLIM_INFINITY; 1698 else 1699 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1700 if (rlim64_is_infinity(rlim64->rlim_max)) 1701 rlim->rlim_max = RLIM_INFINITY; 1702 else 1703 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1704 } 1705 1706 /* rcu lock must be held */ 1707 static int check_prlimit_permission(struct task_struct *task, 1708 unsigned int flags) 1709 { 1710 const struct cred *cred = current_cred(), *tcred; 1711 bool id_match; 1712 1713 if (current == task) 1714 return 0; 1715 1716 tcred = __task_cred(task); 1717 id_match = (uid_eq(cred->uid, tcred->euid) && 1718 uid_eq(cred->uid, tcred->suid) && 1719 uid_eq(cred->uid, tcred->uid) && 1720 gid_eq(cred->gid, tcred->egid) && 1721 gid_eq(cred->gid, tcred->sgid) && 1722 gid_eq(cred->gid, tcred->gid)); 1723 if (!id_match && !ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) 1724 return -EPERM; 1725 1726 return security_task_prlimit(cred, tcred, flags); 1727 } 1728 1729 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1730 const struct rlimit64 __user *, new_rlim, 1731 struct rlimit64 __user *, old_rlim) 1732 { 1733 struct rlimit64 old64, new64; 1734 struct rlimit old, new; 1735 struct task_struct *tsk; 1736 unsigned int checkflags = 0; 1737 int ret; 1738 1739 if (old_rlim) 1740 checkflags |= LSM_PRLIMIT_READ; 1741 1742 if (new_rlim) { 1743 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1744 return -EFAULT; 1745 rlim64_to_rlim(&new64, &new); 1746 checkflags |= LSM_PRLIMIT_WRITE; 1747 } 1748 1749 rcu_read_lock(); 1750 tsk = pid ? find_task_by_vpid(pid) : current; 1751 if (!tsk) { 1752 rcu_read_unlock(); 1753 return -ESRCH; 1754 } 1755 ret = check_prlimit_permission(tsk, checkflags); 1756 if (ret) { 1757 rcu_read_unlock(); 1758 return ret; 1759 } 1760 get_task_struct(tsk); 1761 rcu_read_unlock(); 1762 1763 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1764 old_rlim ? &old : NULL); 1765 1766 if (!ret && old_rlim) { 1767 rlim_to_rlim64(&old, &old64); 1768 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1769 ret = -EFAULT; 1770 } 1771 1772 put_task_struct(tsk); 1773 return ret; 1774 } 1775 1776 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1777 { 1778 struct rlimit new_rlim; 1779 1780 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1781 return -EFAULT; 1782 return do_prlimit(current, resource, &new_rlim, NULL); 1783 } 1784 1785 /* 1786 * It would make sense to put struct rusage in the task_struct, 1787 * except that would make the task_struct be *really big*. After 1788 * task_struct gets moved into malloc'ed memory, it would 1789 * make sense to do this. It will make moving the rest of the information 1790 * a lot simpler! (Which we're not doing right now because we're not 1791 * measuring them yet). 1792 * 1793 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1794 * races with threads incrementing their own counters. But since word 1795 * reads are atomic, we either get new values or old values and we don't 1796 * care which for the sums. We always take the siglock to protect reading 1797 * the c* fields from p->signal from races with exit.c updating those 1798 * fields when reaping, so a sample either gets all the additions of a 1799 * given child after it's reaped, or none so this sample is before reaping. 1800 * 1801 * Locking: 1802 * We need to take the siglock for CHILDEREN, SELF and BOTH 1803 * for the cases current multithreaded, non-current single threaded 1804 * non-current multithreaded. Thread traversal is now safe with 1805 * the siglock held. 1806 * Strictly speaking, we donot need to take the siglock if we are current and 1807 * single threaded, as no one else can take our signal_struct away, no one 1808 * else can reap the children to update signal->c* counters, and no one else 1809 * can race with the signal-> fields. If we do not take any lock, the 1810 * signal-> fields could be read out of order while another thread was just 1811 * exiting. So we should place a read memory barrier when we avoid the lock. 1812 * On the writer side, write memory barrier is implied in __exit_signal 1813 * as __exit_signal releases the siglock spinlock after updating the signal-> 1814 * fields. But we don't do this yet to keep things simple. 1815 * 1816 */ 1817 1818 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1819 { 1820 r->ru_nvcsw += t->nvcsw; 1821 r->ru_nivcsw += t->nivcsw; 1822 r->ru_minflt += t->min_flt; 1823 r->ru_majflt += t->maj_flt; 1824 r->ru_inblock += task_io_get_inblock(t); 1825 r->ru_oublock += task_io_get_oublock(t); 1826 } 1827 1828 void getrusage(struct task_struct *p, int who, struct rusage *r) 1829 { 1830 struct task_struct *t; 1831 unsigned long flags; 1832 u64 tgutime, tgstime, utime, stime; 1833 unsigned long maxrss; 1834 struct mm_struct *mm; 1835 struct signal_struct *sig = p->signal; 1836 unsigned int seq = 0; 1837 1838 retry: 1839 memset(r, 0, sizeof(*r)); 1840 utime = stime = 0; 1841 maxrss = 0; 1842 1843 if (who == RUSAGE_THREAD) { 1844 task_cputime_adjusted(current, &utime, &stime); 1845 accumulate_thread_rusage(p, r); 1846 maxrss = sig->maxrss; 1847 goto out_thread; 1848 } 1849 1850 flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); 1851 1852 switch (who) { 1853 case RUSAGE_BOTH: 1854 case RUSAGE_CHILDREN: 1855 utime = sig->cutime; 1856 stime = sig->cstime; 1857 r->ru_nvcsw = sig->cnvcsw; 1858 r->ru_nivcsw = sig->cnivcsw; 1859 r->ru_minflt = sig->cmin_flt; 1860 r->ru_majflt = sig->cmaj_flt; 1861 r->ru_inblock = sig->cinblock; 1862 r->ru_oublock = sig->coublock; 1863 maxrss = sig->cmaxrss; 1864 1865 if (who == RUSAGE_CHILDREN) 1866 break; 1867 fallthrough; 1868 1869 case RUSAGE_SELF: 1870 r->ru_nvcsw += sig->nvcsw; 1871 r->ru_nivcsw += sig->nivcsw; 1872 r->ru_minflt += sig->min_flt; 1873 r->ru_majflt += sig->maj_flt; 1874 r->ru_inblock += sig->inblock; 1875 r->ru_oublock += sig->oublock; 1876 if (maxrss < sig->maxrss) 1877 maxrss = sig->maxrss; 1878 1879 rcu_read_lock(); 1880 __for_each_thread(sig, t) 1881 accumulate_thread_rusage(t, r); 1882 rcu_read_unlock(); 1883 1884 break; 1885 1886 default: 1887 BUG(); 1888 } 1889 1890 if (need_seqretry(&sig->stats_lock, seq)) { 1891 seq = 1; 1892 goto retry; 1893 } 1894 done_seqretry_irqrestore(&sig->stats_lock, seq, flags); 1895 1896 if (who == RUSAGE_CHILDREN) 1897 goto out_children; 1898 1899 thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1900 utime += tgutime; 1901 stime += tgstime; 1902 1903 out_thread: 1904 mm = get_task_mm(p); 1905 if (mm) { 1906 setmax_mm_hiwater_rss(&maxrss, mm); 1907 mmput(mm); 1908 } 1909 1910 out_children: 1911 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1912 r->ru_utime = ns_to_kernel_old_timeval(utime); 1913 r->ru_stime = ns_to_kernel_old_timeval(stime); 1914 } 1915 1916 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1917 { 1918 struct rusage r; 1919 1920 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1921 who != RUSAGE_THREAD) 1922 return -EINVAL; 1923 1924 getrusage(current, who, &r); 1925 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1926 } 1927 1928 #ifdef CONFIG_COMPAT 1929 COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru) 1930 { 1931 struct rusage r; 1932 1933 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1934 who != RUSAGE_THREAD) 1935 return -EINVAL; 1936 1937 getrusage(current, who, &r); 1938 return put_compat_rusage(&r, ru); 1939 } 1940 #endif 1941 1942 SYSCALL_DEFINE1(umask, int, mask) 1943 { 1944 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1945 return mask; 1946 } 1947 1948 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1949 { 1950 CLASS(fd, exe)(fd); 1951 struct inode *inode; 1952 int err; 1953 1954 if (fd_empty(exe)) 1955 return -EBADF; 1956 1957 inode = file_inode(fd_file(exe)); 1958 1959 /* 1960 * Because the original mm->exe_file points to executable file, make 1961 * sure that this one is executable as well, to avoid breaking an 1962 * overall picture. 1963 */ 1964 if (!S_ISREG(inode->i_mode) || path_noexec(&fd_file(exe)->f_path)) 1965 return -EACCES; 1966 1967 err = file_permission(fd_file(exe), MAY_EXEC); 1968 if (err) 1969 return err; 1970 1971 return replace_mm_exe_file(mm, fd_file(exe)); 1972 } 1973 1974 /* 1975 * Check arithmetic relations of passed addresses. 1976 * 1977 * WARNING: we don't require any capability here so be very careful 1978 * in what is allowed for modification from userspace. 1979 */ 1980 static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map) 1981 { 1982 unsigned long mmap_max_addr = TASK_SIZE; 1983 int error = -EINVAL, i; 1984 1985 static const unsigned char offsets[] = { 1986 offsetof(struct prctl_mm_map, start_code), 1987 offsetof(struct prctl_mm_map, end_code), 1988 offsetof(struct prctl_mm_map, start_data), 1989 offsetof(struct prctl_mm_map, end_data), 1990 offsetof(struct prctl_mm_map, start_brk), 1991 offsetof(struct prctl_mm_map, brk), 1992 offsetof(struct prctl_mm_map, start_stack), 1993 offsetof(struct prctl_mm_map, arg_start), 1994 offsetof(struct prctl_mm_map, arg_end), 1995 offsetof(struct prctl_mm_map, env_start), 1996 offsetof(struct prctl_mm_map, env_end), 1997 }; 1998 1999 /* 2000 * Make sure the members are not somewhere outside 2001 * of allowed address space. 2002 */ 2003 for (i = 0; i < ARRAY_SIZE(offsets); i++) { 2004 u64 val = *(u64 *)((char *)prctl_map + offsets[i]); 2005 2006 if ((unsigned long)val >= mmap_max_addr || 2007 (unsigned long)val < mmap_min_addr) 2008 goto out; 2009 } 2010 2011 /* 2012 * Make sure the pairs are ordered. 2013 */ 2014 #define __prctl_check_order(__m1, __op, __m2) \ 2015 ((unsigned long)prctl_map->__m1 __op \ 2016 (unsigned long)prctl_map->__m2) ? 0 : -EINVAL 2017 error = __prctl_check_order(start_code, <, end_code); 2018 error |= __prctl_check_order(start_data,<=, end_data); 2019 error |= __prctl_check_order(start_brk, <=, brk); 2020 error |= __prctl_check_order(arg_start, <=, arg_end); 2021 error |= __prctl_check_order(env_start, <=, env_end); 2022 if (error) 2023 goto out; 2024 #undef __prctl_check_order 2025 2026 error = -EINVAL; 2027 2028 /* 2029 * Neither we should allow to override limits if they set. 2030 */ 2031 if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk, 2032 prctl_map->start_brk, prctl_map->end_data, 2033 prctl_map->start_data)) 2034 goto out; 2035 2036 error = 0; 2037 out: 2038 return error; 2039 } 2040 2041 #ifdef CONFIG_CHECKPOINT_RESTORE 2042 static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size) 2043 { 2044 struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, }; 2045 unsigned long user_auxv[AT_VECTOR_SIZE]; 2046 struct mm_struct *mm = current->mm; 2047 int error; 2048 2049 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 2050 BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256); 2051 2052 if (opt == PR_SET_MM_MAP_SIZE) 2053 return put_user((unsigned int)sizeof(prctl_map), 2054 (unsigned int __user *)addr); 2055 2056 if (data_size != sizeof(prctl_map)) 2057 return -EINVAL; 2058 2059 if (copy_from_user(&prctl_map, addr, sizeof(prctl_map))) 2060 return -EFAULT; 2061 2062 error = validate_prctl_map_addr(&prctl_map); 2063 if (error) 2064 return error; 2065 2066 if (prctl_map.auxv_size) { 2067 /* 2068 * Someone is trying to cheat the auxv vector. 2069 */ 2070 if (!prctl_map.auxv || 2071 prctl_map.auxv_size > sizeof(mm->saved_auxv)) 2072 return -EINVAL; 2073 2074 memset(user_auxv, 0, sizeof(user_auxv)); 2075 if (copy_from_user(user_auxv, 2076 (const void __user *)prctl_map.auxv, 2077 prctl_map.auxv_size)) 2078 return -EFAULT; 2079 2080 /* Last entry must be AT_NULL as specification requires */ 2081 user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL; 2082 user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; 2083 } 2084 2085 if (prctl_map.exe_fd != (u32)-1) { 2086 /* 2087 * Check if the current user is checkpoint/restore capable. 2088 * At the time of this writing, it checks for CAP_SYS_ADMIN 2089 * or CAP_CHECKPOINT_RESTORE. 2090 * Note that a user with access to ptrace can masquerade an 2091 * arbitrary program as any executable, even setuid ones. 2092 * This may have implications in the tomoyo subsystem. 2093 */ 2094 if (!checkpoint_restore_ns_capable(current_user_ns())) 2095 return -EPERM; 2096 2097 error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); 2098 if (error) 2099 return error; 2100 } 2101 2102 /* 2103 * arg_lock protects concurrent updates but we still need mmap_lock for 2104 * read to exclude races with sys_brk. 2105 */ 2106 mmap_read_lock(mm); 2107 2108 /* 2109 * We don't validate if these members are pointing to 2110 * real present VMAs because application may have correspond 2111 * VMAs already unmapped and kernel uses these members for statistics 2112 * output in procfs mostly, except 2113 * 2114 * - @start_brk/@brk which are used in do_brk_flags but kernel lookups 2115 * for VMAs when updating these members so anything wrong written 2116 * here cause kernel to swear at userspace program but won't lead 2117 * to any problem in kernel itself 2118 */ 2119 2120 spin_lock(&mm->arg_lock); 2121 mm->start_code = prctl_map.start_code; 2122 mm->end_code = prctl_map.end_code; 2123 mm->start_data = prctl_map.start_data; 2124 mm->end_data = prctl_map.end_data; 2125 mm->start_brk = prctl_map.start_brk; 2126 mm->brk = prctl_map.brk; 2127 mm->start_stack = prctl_map.start_stack; 2128 mm->arg_start = prctl_map.arg_start; 2129 mm->arg_end = prctl_map.arg_end; 2130 mm->env_start = prctl_map.env_start; 2131 mm->env_end = prctl_map.env_end; 2132 spin_unlock(&mm->arg_lock); 2133 2134 /* 2135 * Note this update of @saved_auxv is lockless thus 2136 * if someone reads this member in procfs while we're 2137 * updating -- it may get partly updated results. It's 2138 * known and acceptable trade off: we leave it as is to 2139 * not introduce additional locks here making the kernel 2140 * more complex. 2141 */ 2142 if (prctl_map.auxv_size) 2143 memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); 2144 2145 mmap_read_unlock(mm); 2146 return 0; 2147 } 2148 #endif /* CONFIG_CHECKPOINT_RESTORE */ 2149 2150 static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr, 2151 unsigned long len) 2152 { 2153 /* 2154 * This doesn't move the auxiliary vector itself since it's pinned to 2155 * mm_struct, but it permits filling the vector with new values. It's 2156 * up to the caller to provide sane values here, otherwise userspace 2157 * tools which use this vector might be unhappy. 2158 */ 2159 unsigned long user_auxv[AT_VECTOR_SIZE] = {}; 2160 2161 if (len > sizeof(user_auxv)) 2162 return -EINVAL; 2163 2164 if (copy_from_user(user_auxv, (const void __user *)addr, len)) 2165 return -EFAULT; 2166 2167 /* Make sure the last entry is always AT_NULL */ 2168 user_auxv[AT_VECTOR_SIZE - 2] = 0; 2169 user_auxv[AT_VECTOR_SIZE - 1] = 0; 2170 2171 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 2172 2173 task_lock(current); 2174 memcpy(mm->saved_auxv, user_auxv, len); 2175 task_unlock(current); 2176 2177 return 0; 2178 } 2179 2180 static int prctl_set_mm(int opt, unsigned long addr, 2181 unsigned long arg4, unsigned long arg5) 2182 { 2183 struct mm_struct *mm = current->mm; 2184 struct prctl_mm_map prctl_map = { 2185 .auxv = NULL, 2186 .auxv_size = 0, 2187 .exe_fd = -1, 2188 }; 2189 struct vm_area_struct *vma; 2190 int error; 2191 2192 if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV && 2193 opt != PR_SET_MM_MAP && 2194 opt != PR_SET_MM_MAP_SIZE))) 2195 return -EINVAL; 2196 2197 #ifdef CONFIG_CHECKPOINT_RESTORE 2198 if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE) 2199 return prctl_set_mm_map(opt, (const void __user *)addr, arg4); 2200 #endif 2201 2202 if (!capable(CAP_SYS_RESOURCE)) 2203 return -EPERM; 2204 2205 if (opt == PR_SET_MM_EXE_FILE) 2206 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 2207 2208 if (opt == PR_SET_MM_AUXV) 2209 return prctl_set_auxv(mm, addr, arg4); 2210 2211 if (addr >= TASK_SIZE || addr < mmap_min_addr) 2212 return -EINVAL; 2213 2214 error = -EINVAL; 2215 2216 /* 2217 * arg_lock protects concurrent updates of arg boundaries, we need 2218 * mmap_lock for a) concurrent sys_brk, b) finding VMA for addr 2219 * validation. 2220 */ 2221 mmap_read_lock(mm); 2222 vma = find_vma(mm, addr); 2223 2224 spin_lock(&mm->arg_lock); 2225 prctl_map.start_code = mm->start_code; 2226 prctl_map.end_code = mm->end_code; 2227 prctl_map.start_data = mm->start_data; 2228 prctl_map.end_data = mm->end_data; 2229 prctl_map.start_brk = mm->start_brk; 2230 prctl_map.brk = mm->brk; 2231 prctl_map.start_stack = mm->start_stack; 2232 prctl_map.arg_start = mm->arg_start; 2233 prctl_map.arg_end = mm->arg_end; 2234 prctl_map.env_start = mm->env_start; 2235 prctl_map.env_end = mm->env_end; 2236 2237 switch (opt) { 2238 case PR_SET_MM_START_CODE: 2239 prctl_map.start_code = addr; 2240 break; 2241 case PR_SET_MM_END_CODE: 2242 prctl_map.end_code = addr; 2243 break; 2244 case PR_SET_MM_START_DATA: 2245 prctl_map.start_data = addr; 2246 break; 2247 case PR_SET_MM_END_DATA: 2248 prctl_map.end_data = addr; 2249 break; 2250 case PR_SET_MM_START_STACK: 2251 prctl_map.start_stack = addr; 2252 break; 2253 case PR_SET_MM_START_BRK: 2254 prctl_map.start_brk = addr; 2255 break; 2256 case PR_SET_MM_BRK: 2257 prctl_map.brk = addr; 2258 break; 2259 case PR_SET_MM_ARG_START: 2260 prctl_map.arg_start = addr; 2261 break; 2262 case PR_SET_MM_ARG_END: 2263 prctl_map.arg_end = addr; 2264 break; 2265 case PR_SET_MM_ENV_START: 2266 prctl_map.env_start = addr; 2267 break; 2268 case PR_SET_MM_ENV_END: 2269 prctl_map.env_end = addr; 2270 break; 2271 default: 2272 goto out; 2273 } 2274 2275 error = validate_prctl_map_addr(&prctl_map); 2276 if (error) 2277 goto out; 2278 2279 switch (opt) { 2280 /* 2281 * If command line arguments and environment 2282 * are placed somewhere else on stack, we can 2283 * set them up here, ARG_START/END to setup 2284 * command line arguments and ENV_START/END 2285 * for environment. 2286 */ 2287 case PR_SET_MM_START_STACK: 2288 case PR_SET_MM_ARG_START: 2289 case PR_SET_MM_ARG_END: 2290 case PR_SET_MM_ENV_START: 2291 case PR_SET_MM_ENV_END: 2292 if (!vma) { 2293 error = -EFAULT; 2294 goto out; 2295 } 2296 } 2297 2298 mm->start_code = prctl_map.start_code; 2299 mm->end_code = prctl_map.end_code; 2300 mm->start_data = prctl_map.start_data; 2301 mm->end_data = prctl_map.end_data; 2302 mm->start_brk = prctl_map.start_brk; 2303 mm->brk = prctl_map.brk; 2304 mm->start_stack = prctl_map.start_stack; 2305 mm->arg_start = prctl_map.arg_start; 2306 mm->arg_end = prctl_map.arg_end; 2307 mm->env_start = prctl_map.env_start; 2308 mm->env_end = prctl_map.env_end; 2309 2310 error = 0; 2311 out: 2312 spin_unlock(&mm->arg_lock); 2313 mmap_read_unlock(mm); 2314 return error; 2315 } 2316 2317 #ifdef CONFIG_CHECKPOINT_RESTORE 2318 static int prctl_get_tid_address(struct task_struct *me, int __user * __user *tid_addr) 2319 { 2320 return put_user(me->clear_child_tid, tid_addr); 2321 } 2322 #else 2323 static int prctl_get_tid_address(struct task_struct *me, int __user * __user *tid_addr) 2324 { 2325 return -EINVAL; 2326 } 2327 #endif 2328 2329 static int propagate_has_child_subreaper(struct task_struct *p, void *data) 2330 { 2331 /* 2332 * If task has has_child_subreaper - all its descendants 2333 * already have these flag too and new descendants will 2334 * inherit it on fork, skip them. 2335 * 2336 * If we've found child_reaper - skip descendants in 2337 * it's subtree as they will never get out pidns. 2338 */ 2339 if (p->signal->has_child_subreaper || 2340 is_child_reaper(task_pid(p))) 2341 return 0; 2342 2343 p->signal->has_child_subreaper = 1; 2344 return 1; 2345 } 2346 2347 int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) 2348 { 2349 return -EINVAL; 2350 } 2351 2352 int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, 2353 unsigned long ctrl) 2354 { 2355 return -EINVAL; 2356 } 2357 2358 int __weak arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) 2359 { 2360 return -EINVAL; 2361 } 2362 2363 int __weak arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) 2364 { 2365 return -EINVAL; 2366 } 2367 2368 int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status) 2369 { 2370 return -EINVAL; 2371 } 2372 2373 #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) 2374 2375 static int prctl_set_vma(unsigned long opt, unsigned long addr, 2376 unsigned long size, unsigned long arg) 2377 { 2378 int error; 2379 2380 switch (opt) { 2381 case PR_SET_VMA_ANON_NAME: 2382 error = set_anon_vma_name(addr, size, (const char __user *)arg); 2383 break; 2384 default: 2385 error = -EINVAL; 2386 } 2387 2388 return error; 2389 } 2390 2391 static inline unsigned long get_current_mdwe(void) 2392 { 2393 unsigned long ret = 0; 2394 2395 if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) 2396 ret |= PR_MDWE_REFUSE_EXEC_GAIN; 2397 if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags)) 2398 ret |= PR_MDWE_NO_INHERIT; 2399 2400 return ret; 2401 } 2402 2403 static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, 2404 unsigned long arg4, unsigned long arg5) 2405 { 2406 unsigned long current_bits; 2407 2408 if (arg3 || arg4 || arg5) 2409 return -EINVAL; 2410 2411 if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT)) 2412 return -EINVAL; 2413 2414 /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */ 2415 if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) 2416 return -EINVAL; 2417 2418 /* 2419 * EOPNOTSUPP might be more appropriate here in principle, but 2420 * existing userspace depends on EINVAL specifically. 2421 */ 2422 if (!arch_memory_deny_write_exec_supported()) 2423 return -EINVAL; 2424 2425 current_bits = get_current_mdwe(); 2426 if (current_bits && current_bits != bits) 2427 return -EPERM; /* Cannot unset the flags */ 2428 2429 if (bits & PR_MDWE_NO_INHERIT) 2430 set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags); 2431 if (bits & PR_MDWE_REFUSE_EXEC_GAIN) 2432 set_bit(MMF_HAS_MDWE, ¤t->mm->flags); 2433 2434 return 0; 2435 } 2436 2437 static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3, 2438 unsigned long arg4, unsigned long arg5) 2439 { 2440 if (arg2 || arg3 || arg4 || arg5) 2441 return -EINVAL; 2442 return get_current_mdwe(); 2443 } 2444 2445 static int prctl_get_auxv(void __user *addr, unsigned long len) 2446 { 2447 struct mm_struct *mm = current->mm; 2448 unsigned long size = min_t(unsigned long, sizeof(mm->saved_auxv), len); 2449 2450 if (size && copy_to_user(addr, mm->saved_auxv, size)) 2451 return -EFAULT; 2452 return sizeof(mm->saved_auxv); 2453 } 2454 2455 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2456 unsigned long, arg4, unsigned long, arg5) 2457 { 2458 struct task_struct *me = current; 2459 unsigned char comm[sizeof(me->comm)]; 2460 long error; 2461 2462 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2463 if (error != -ENOSYS) 2464 return error; 2465 2466 error = 0; 2467 switch (option) { 2468 case PR_SET_PDEATHSIG: 2469 if (!valid_signal(arg2)) { 2470 error = -EINVAL; 2471 break; 2472 } 2473 me->pdeath_signal = arg2; 2474 break; 2475 case PR_GET_PDEATHSIG: 2476 error = put_user(me->pdeath_signal, (int __user *)arg2); 2477 break; 2478 case PR_GET_DUMPABLE: 2479 error = get_dumpable(me->mm); 2480 break; 2481 case PR_SET_DUMPABLE: 2482 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { 2483 error = -EINVAL; 2484 break; 2485 } 2486 set_dumpable(me->mm, arg2); 2487 break; 2488 2489 case PR_SET_UNALIGN: 2490 error = SET_UNALIGN_CTL(me, arg2); 2491 break; 2492 case PR_GET_UNALIGN: 2493 error = GET_UNALIGN_CTL(me, arg2); 2494 break; 2495 case PR_SET_FPEMU: 2496 error = SET_FPEMU_CTL(me, arg2); 2497 break; 2498 case PR_GET_FPEMU: 2499 error = GET_FPEMU_CTL(me, arg2); 2500 break; 2501 case PR_SET_FPEXC: 2502 error = SET_FPEXC_CTL(me, arg2); 2503 break; 2504 case PR_GET_FPEXC: 2505 error = GET_FPEXC_CTL(me, arg2); 2506 break; 2507 case PR_GET_TIMING: 2508 error = PR_TIMING_STATISTICAL; 2509 break; 2510 case PR_SET_TIMING: 2511 if (arg2 != PR_TIMING_STATISTICAL) 2512 error = -EINVAL; 2513 break; 2514 case PR_SET_NAME: 2515 comm[sizeof(me->comm) - 1] = 0; 2516 if (strncpy_from_user(comm, (char __user *)arg2, 2517 sizeof(me->comm) - 1) < 0) 2518 return -EFAULT; 2519 set_task_comm(me, comm); 2520 proc_comm_connector(me); 2521 break; 2522 case PR_GET_NAME: 2523 get_task_comm(comm, me); 2524 if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) 2525 return -EFAULT; 2526 break; 2527 case PR_GET_ENDIAN: 2528 error = GET_ENDIAN(me, arg2); 2529 break; 2530 case PR_SET_ENDIAN: 2531 error = SET_ENDIAN(me, arg2); 2532 break; 2533 case PR_GET_SECCOMP: 2534 error = prctl_get_seccomp(); 2535 break; 2536 case PR_SET_SECCOMP: 2537 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2538 break; 2539 case PR_GET_TSC: 2540 error = GET_TSC_CTL(arg2); 2541 break; 2542 case PR_SET_TSC: 2543 error = SET_TSC_CTL(arg2); 2544 break; 2545 case PR_TASK_PERF_EVENTS_DISABLE: 2546 error = perf_event_task_disable(); 2547 break; 2548 case PR_TASK_PERF_EVENTS_ENABLE: 2549 error = perf_event_task_enable(); 2550 break; 2551 case PR_GET_TIMERSLACK: 2552 if (current->timer_slack_ns > ULONG_MAX) 2553 error = ULONG_MAX; 2554 else 2555 error = current->timer_slack_ns; 2556 break; 2557 case PR_SET_TIMERSLACK: 2558 if (rt_or_dl_task_policy(current)) 2559 break; 2560 if (arg2 <= 0) 2561 current->timer_slack_ns = 2562 current->default_timer_slack_ns; 2563 else 2564 current->timer_slack_ns = arg2; 2565 break; 2566 case PR_MCE_KILL: 2567 if (arg4 | arg5) 2568 return -EINVAL; 2569 switch (arg2) { 2570 case PR_MCE_KILL_CLEAR: 2571 if (arg3 != 0) 2572 return -EINVAL; 2573 current->flags &= ~PF_MCE_PROCESS; 2574 break; 2575 case PR_MCE_KILL_SET: 2576 current->flags |= PF_MCE_PROCESS; 2577 if (arg3 == PR_MCE_KILL_EARLY) 2578 current->flags |= PF_MCE_EARLY; 2579 else if (arg3 == PR_MCE_KILL_LATE) 2580 current->flags &= ~PF_MCE_EARLY; 2581 else if (arg3 == PR_MCE_KILL_DEFAULT) 2582 current->flags &= 2583 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2584 else 2585 return -EINVAL; 2586 break; 2587 default: 2588 return -EINVAL; 2589 } 2590 break; 2591 case PR_MCE_KILL_GET: 2592 if (arg2 | arg3 | arg4 | arg5) 2593 return -EINVAL; 2594 if (current->flags & PF_MCE_PROCESS) 2595 error = (current->flags & PF_MCE_EARLY) ? 2596 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2597 else 2598 error = PR_MCE_KILL_DEFAULT; 2599 break; 2600 case PR_SET_MM: 2601 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2602 break; 2603 case PR_GET_TID_ADDRESS: 2604 error = prctl_get_tid_address(me, (int __user * __user *)arg2); 2605 break; 2606 case PR_SET_CHILD_SUBREAPER: 2607 me->signal->is_child_subreaper = !!arg2; 2608 if (!arg2) 2609 break; 2610 2611 walk_process_tree(me, propagate_has_child_subreaper, NULL); 2612 break; 2613 case PR_GET_CHILD_SUBREAPER: 2614 error = put_user(me->signal->is_child_subreaper, 2615 (int __user *)arg2); 2616 break; 2617 case PR_SET_NO_NEW_PRIVS: 2618 if (arg2 != 1 || arg3 || arg4 || arg5) 2619 return -EINVAL; 2620 2621 task_set_no_new_privs(current); 2622 break; 2623 case PR_GET_NO_NEW_PRIVS: 2624 if (arg2 || arg3 || arg4 || arg5) 2625 return -EINVAL; 2626 return task_no_new_privs(current) ? 1 : 0; 2627 case PR_GET_THP_DISABLE: 2628 if (arg2 || arg3 || arg4 || arg5) 2629 return -EINVAL; 2630 error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags); 2631 break; 2632 case PR_SET_THP_DISABLE: 2633 if (arg3 || arg4 || arg5) 2634 return -EINVAL; 2635 if (mmap_write_lock_killable(me->mm)) 2636 return -EINTR; 2637 if (arg2) 2638 set_bit(MMF_DISABLE_THP, &me->mm->flags); 2639 else 2640 clear_bit(MMF_DISABLE_THP, &me->mm->flags); 2641 mmap_write_unlock(me->mm); 2642 break; 2643 case PR_MPX_ENABLE_MANAGEMENT: 2644 case PR_MPX_DISABLE_MANAGEMENT: 2645 /* No longer implemented: */ 2646 return -EINVAL; 2647 case PR_SET_FP_MODE: 2648 error = SET_FP_MODE(me, arg2); 2649 break; 2650 case PR_GET_FP_MODE: 2651 error = GET_FP_MODE(me); 2652 break; 2653 case PR_SVE_SET_VL: 2654 error = SVE_SET_VL(arg2); 2655 break; 2656 case PR_SVE_GET_VL: 2657 error = SVE_GET_VL(); 2658 break; 2659 case PR_SME_SET_VL: 2660 error = SME_SET_VL(arg2); 2661 break; 2662 case PR_SME_GET_VL: 2663 error = SME_GET_VL(); 2664 break; 2665 case PR_GET_SPECULATION_CTRL: 2666 if (arg3 || arg4 || arg5) 2667 return -EINVAL; 2668 error = arch_prctl_spec_ctrl_get(me, arg2); 2669 break; 2670 case PR_SET_SPECULATION_CTRL: 2671 if (arg4 || arg5) 2672 return -EINVAL; 2673 error = arch_prctl_spec_ctrl_set(me, arg2, arg3); 2674 break; 2675 case PR_PAC_RESET_KEYS: 2676 if (arg3 || arg4 || arg5) 2677 return -EINVAL; 2678 error = PAC_RESET_KEYS(me, arg2); 2679 break; 2680 case PR_PAC_SET_ENABLED_KEYS: 2681 if (arg4 || arg5) 2682 return -EINVAL; 2683 error = PAC_SET_ENABLED_KEYS(me, arg2, arg3); 2684 break; 2685 case PR_PAC_GET_ENABLED_KEYS: 2686 if (arg2 || arg3 || arg4 || arg5) 2687 return -EINVAL; 2688 error = PAC_GET_ENABLED_KEYS(me); 2689 break; 2690 case PR_SET_TAGGED_ADDR_CTRL: 2691 if (arg3 || arg4 || arg5) 2692 return -EINVAL; 2693 error = SET_TAGGED_ADDR_CTRL(arg2); 2694 break; 2695 case PR_GET_TAGGED_ADDR_CTRL: 2696 if (arg2 || arg3 || arg4 || arg5) 2697 return -EINVAL; 2698 error = GET_TAGGED_ADDR_CTRL(); 2699 break; 2700 case PR_SET_IO_FLUSHER: 2701 if (!capable(CAP_SYS_RESOURCE)) 2702 return -EPERM; 2703 2704 if (arg3 || arg4 || arg5) 2705 return -EINVAL; 2706 2707 if (arg2 == 1) 2708 current->flags |= PR_IO_FLUSHER; 2709 else if (!arg2) 2710 current->flags &= ~PR_IO_FLUSHER; 2711 else 2712 return -EINVAL; 2713 break; 2714 case PR_GET_IO_FLUSHER: 2715 if (!capable(CAP_SYS_RESOURCE)) 2716 return -EPERM; 2717 2718 if (arg2 || arg3 || arg4 || arg5) 2719 return -EINVAL; 2720 2721 error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; 2722 break; 2723 case PR_SET_SYSCALL_USER_DISPATCH: 2724 error = set_syscall_user_dispatch(arg2, arg3, arg4, 2725 (char __user *) arg5); 2726 break; 2727 #ifdef CONFIG_SCHED_CORE 2728 case PR_SCHED_CORE: 2729 error = sched_core_share_pid(arg2, arg3, arg4, arg5); 2730 break; 2731 #endif 2732 case PR_SET_MDWE: 2733 error = prctl_set_mdwe(arg2, arg3, arg4, arg5); 2734 break; 2735 case PR_GET_MDWE: 2736 error = prctl_get_mdwe(arg2, arg3, arg4, arg5); 2737 break; 2738 case PR_PPC_GET_DEXCR: 2739 if (arg3 || arg4 || arg5) 2740 return -EINVAL; 2741 error = PPC_GET_DEXCR_ASPECT(me, arg2); 2742 break; 2743 case PR_PPC_SET_DEXCR: 2744 if (arg4 || arg5) 2745 return -EINVAL; 2746 error = PPC_SET_DEXCR_ASPECT(me, arg2, arg3); 2747 break; 2748 case PR_SET_VMA: 2749 error = prctl_set_vma(arg2, arg3, arg4, arg5); 2750 break; 2751 case PR_GET_AUXV: 2752 if (arg4 || arg5) 2753 return -EINVAL; 2754 error = prctl_get_auxv((void __user *)arg2, arg3); 2755 break; 2756 #ifdef CONFIG_KSM 2757 case PR_SET_MEMORY_MERGE: 2758 if (arg3 || arg4 || arg5) 2759 return -EINVAL; 2760 if (mmap_write_lock_killable(me->mm)) 2761 return -EINTR; 2762 2763 if (arg2) 2764 error = ksm_enable_merge_any(me->mm); 2765 else 2766 error = ksm_disable_merge_any(me->mm); 2767 mmap_write_unlock(me->mm); 2768 break; 2769 case PR_GET_MEMORY_MERGE: 2770 if (arg2 || arg3 || arg4 || arg5) 2771 return -EINVAL; 2772 2773 error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags); 2774 break; 2775 #endif 2776 case PR_RISCV_V_SET_CONTROL: 2777 error = RISCV_V_SET_CONTROL(arg2); 2778 break; 2779 case PR_RISCV_V_GET_CONTROL: 2780 error = RISCV_V_GET_CONTROL(); 2781 break; 2782 case PR_RISCV_SET_ICACHE_FLUSH_CTX: 2783 error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); 2784 break; 2785 case PR_GET_SHADOW_STACK_STATUS: 2786 if (arg3 || arg4 || arg5) 2787 return -EINVAL; 2788 error = arch_get_shadow_stack_status(me, (unsigned long __user *) arg2); 2789 break; 2790 case PR_SET_SHADOW_STACK_STATUS: 2791 if (arg3 || arg4 || arg5) 2792 return -EINVAL; 2793 error = arch_set_shadow_stack_status(me, arg2); 2794 break; 2795 case PR_LOCK_SHADOW_STACK_STATUS: 2796 if (arg3 || arg4 || arg5) 2797 return -EINVAL; 2798 error = arch_lock_shadow_stack_status(me, arg2); 2799 break; 2800 case PR_TIMER_CREATE_RESTORE_IDS: 2801 if (arg3 || arg4 || arg5) 2802 return -EINVAL; 2803 error = posixtimer_create_prctl(arg2); 2804 break; 2805 case PR_FUTEX_HASH: 2806 error = futex_hash_prctl(arg2, arg3, arg4); 2807 break; 2808 default: 2809 trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); 2810 error = -EINVAL; 2811 break; 2812 } 2813 return error; 2814 } 2815 2816 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 2817 struct getcpu_cache __user *, unused) 2818 { 2819 int err = 0; 2820 int cpu = raw_smp_processor_id(); 2821 2822 if (cpup) 2823 err |= put_user(cpu, cpup); 2824 if (nodep) 2825 err |= put_user(cpu_to_node(cpu), nodep); 2826 return err ? -EFAULT : 0; 2827 } 2828 2829 /** 2830 * do_sysinfo - fill in sysinfo struct 2831 * @info: pointer to buffer to fill 2832 */ 2833 static int do_sysinfo(struct sysinfo *info) 2834 { 2835 unsigned long mem_total, sav_total; 2836 unsigned int mem_unit, bitcount; 2837 struct timespec64 tp; 2838 2839 memset(info, 0, sizeof(struct sysinfo)); 2840 2841 ktime_get_boottime_ts64(&tp); 2842 timens_add_boottime(&tp); 2843 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 2844 2845 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 2846 2847 info->procs = nr_threads; 2848 2849 si_meminfo(info); 2850 si_swapinfo(info); 2851 2852 /* 2853 * If the sum of all the available memory (i.e. ram + swap) 2854 * is less than can be stored in a 32 bit unsigned long then 2855 * we can be binary compatible with 2.2.x kernels. If not, 2856 * well, in that case 2.2.x was broken anyways... 2857 * 2858 * -Erik Andersen <andersee@debian.org> 2859 */ 2860 2861 mem_total = info->totalram + info->totalswap; 2862 if (mem_total < info->totalram || mem_total < info->totalswap) 2863 goto out; 2864 bitcount = 0; 2865 mem_unit = info->mem_unit; 2866 while (mem_unit > 1) { 2867 bitcount++; 2868 mem_unit >>= 1; 2869 sav_total = mem_total; 2870 mem_total <<= 1; 2871 if (mem_total < sav_total) 2872 goto out; 2873 } 2874 2875 /* 2876 * If mem_total did not overflow, multiply all memory values by 2877 * info->mem_unit and set it to 1. This leaves things compatible 2878 * with 2.2.x, and also retains compatibility with earlier 2.4.x 2879 * kernels... 2880 */ 2881 2882 info->mem_unit = 1; 2883 info->totalram <<= bitcount; 2884 info->freeram <<= bitcount; 2885 info->sharedram <<= bitcount; 2886 info->bufferram <<= bitcount; 2887 info->totalswap <<= bitcount; 2888 info->freeswap <<= bitcount; 2889 info->totalhigh <<= bitcount; 2890 info->freehigh <<= bitcount; 2891 2892 out: 2893 return 0; 2894 } 2895 2896 SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) 2897 { 2898 struct sysinfo val; 2899 2900 do_sysinfo(&val); 2901 2902 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 2903 return -EFAULT; 2904 2905 return 0; 2906 } 2907 2908 #ifdef CONFIG_COMPAT 2909 struct compat_sysinfo { 2910 s32 uptime; 2911 u32 loads[3]; 2912 u32 totalram; 2913 u32 freeram; 2914 u32 sharedram; 2915 u32 bufferram; 2916 u32 totalswap; 2917 u32 freeswap; 2918 u16 procs; 2919 u16 pad; 2920 u32 totalhigh; 2921 u32 freehigh; 2922 u32 mem_unit; 2923 char _f[20-2*sizeof(u32)-sizeof(int)]; 2924 }; 2925 2926 COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) 2927 { 2928 struct sysinfo s; 2929 struct compat_sysinfo s_32; 2930 2931 do_sysinfo(&s); 2932 2933 /* Check to see if any memory value is too large for 32-bit and scale 2934 * down if needed 2935 */ 2936 if (upper_32_bits(s.totalram) || upper_32_bits(s.totalswap)) { 2937 int bitcount = 0; 2938 2939 while (s.mem_unit < PAGE_SIZE) { 2940 s.mem_unit <<= 1; 2941 bitcount++; 2942 } 2943 2944 s.totalram >>= bitcount; 2945 s.freeram >>= bitcount; 2946 s.sharedram >>= bitcount; 2947 s.bufferram >>= bitcount; 2948 s.totalswap >>= bitcount; 2949 s.freeswap >>= bitcount; 2950 s.totalhigh >>= bitcount; 2951 s.freehigh >>= bitcount; 2952 } 2953 2954 memset(&s_32, 0, sizeof(s_32)); 2955 s_32.uptime = s.uptime; 2956 s_32.loads[0] = s.loads[0]; 2957 s_32.loads[1] = s.loads[1]; 2958 s_32.loads[2] = s.loads[2]; 2959 s_32.totalram = s.totalram; 2960 s_32.freeram = s.freeram; 2961 s_32.sharedram = s.sharedram; 2962 s_32.bufferram = s.bufferram; 2963 s_32.totalswap = s.totalswap; 2964 s_32.freeswap = s.freeswap; 2965 s_32.procs = s.procs; 2966 s_32.totalhigh = s.totalhigh; 2967 s_32.freehigh = s.freehigh; 2968 s_32.mem_unit = s.mem_unit; 2969 if (copy_to_user(info, &s_32, sizeof(s_32))) 2970 return -EFAULT; 2971 return 0; 2972 } 2973 #endif /* CONFIG_COMPAT */ 2974