1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/export.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/reboot.h> 12 #include <linux/prctl.h> 13 #include <linux/highuid.h> 14 #include <linux/fs.h> 15 #include <linux/kmod.h> 16 #include <linux/perf_event.h> 17 #include <linux/resource.h> 18 #include <linux/kernel.h> 19 #include <linux/kexec.h> 20 #include <linux/workqueue.h> 21 #include <linux/capability.h> 22 #include <linux/device.h> 23 #include <linux/key.h> 24 #include <linux/times.h> 25 #include <linux/posix-timers.h> 26 #include <linux/security.h> 27 #include <linux/dcookies.h> 28 #include <linux/suspend.h> 29 #include <linux/tty.h> 30 #include <linux/signal.h> 31 #include <linux/cn_proc.h> 32 #include <linux/getcpu.h> 33 #include <linux/task_io_accounting_ops.h> 34 #include <linux/seccomp.h> 35 #include <linux/cpu.h> 36 #include <linux/personality.h> 37 #include <linux/ptrace.h> 38 #include <linux/fs_struct.h> 39 #include <linux/file.h> 40 #include <linux/mount.h> 41 #include <linux/gfp.h> 42 #include <linux/syscore_ops.h> 43 #include <linux/version.h> 44 #include <linux/ctype.h> 45 46 #include <linux/compat.h> 47 #include <linux/syscalls.h> 48 #include <linux/kprobes.h> 49 #include <linux/user_namespace.h> 50 #include <linux/binfmts.h> 51 52 #include <linux/kmsg_dump.h> 53 /* Move somewhere else to avoid recompiling? */ 54 #include <generated/utsrelease.h> 55 56 #include <asm/uaccess.h> 57 #include <asm/io.h> 58 #include <asm/unistd.h> 59 60 #ifndef SET_UNALIGN_CTL 61 # define SET_UNALIGN_CTL(a,b) (-EINVAL) 62 #endif 63 #ifndef GET_UNALIGN_CTL 64 # define GET_UNALIGN_CTL(a,b) (-EINVAL) 65 #endif 66 #ifndef SET_FPEMU_CTL 67 # define SET_FPEMU_CTL(a,b) (-EINVAL) 68 #endif 69 #ifndef GET_FPEMU_CTL 70 # define GET_FPEMU_CTL(a,b) (-EINVAL) 71 #endif 72 #ifndef SET_FPEXC_CTL 73 # define SET_FPEXC_CTL(a,b) (-EINVAL) 74 #endif 75 #ifndef GET_FPEXC_CTL 76 # define GET_FPEXC_CTL(a,b) (-EINVAL) 77 #endif 78 #ifndef GET_ENDIAN 79 # define GET_ENDIAN(a,b) (-EINVAL) 80 #endif 81 #ifndef SET_ENDIAN 82 # define SET_ENDIAN(a,b) (-EINVAL) 83 #endif 84 #ifndef GET_TSC_CTL 85 # define GET_TSC_CTL(a) (-EINVAL) 86 #endif 87 #ifndef SET_TSC_CTL 88 # define SET_TSC_CTL(a) (-EINVAL) 89 #endif 90 91 /* 92 * this is where the system-wide overflow UID and GID are defined, for 93 * architectures that now have 32-bit UID/GID but didn't in the past 94 */ 95 96 int overflowuid = DEFAULT_OVERFLOWUID; 97 int overflowgid = DEFAULT_OVERFLOWGID; 98 99 EXPORT_SYMBOL(overflowuid); 100 EXPORT_SYMBOL(overflowgid); 101 102 /* 103 * the same as above, but for filesystems which can only store a 16-bit 104 * UID and GID. as such, this is needed on all architectures 105 */ 106 107 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 108 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 109 110 EXPORT_SYMBOL(fs_overflowuid); 111 EXPORT_SYMBOL(fs_overflowgid); 112 113 /* 114 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 115 */ 116 117 int C_A_D = 1; 118 struct pid *cad_pid; 119 EXPORT_SYMBOL(cad_pid); 120 121 /* 122 * If set, this is used for preparing the system to power off. 123 */ 124 125 void (*pm_power_off_prepare)(void); 126 127 /* 128 * Returns true if current's euid is same as p's uid or euid, 129 * or has CAP_SYS_NICE to p's user_ns. 130 * 131 * Called with rcu_read_lock, creds are safe 132 */ 133 static bool set_one_prio_perm(struct task_struct *p) 134 { 135 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 136 137 if (uid_eq(pcred->uid, cred->euid) || 138 uid_eq(pcred->euid, cred->euid)) 139 return true; 140 if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) 141 return true; 142 return false; 143 } 144 145 /* 146 * set the priority of a task 147 * - the caller must hold the RCU read lock 148 */ 149 static int set_one_prio(struct task_struct *p, int niceval, int error) 150 { 151 int no_nice; 152 153 if (!set_one_prio_perm(p)) { 154 error = -EPERM; 155 goto out; 156 } 157 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 158 error = -EACCES; 159 goto out; 160 } 161 no_nice = security_task_setnice(p, niceval); 162 if (no_nice) { 163 error = no_nice; 164 goto out; 165 } 166 if (error == -ESRCH) 167 error = 0; 168 set_user_nice(p, niceval); 169 out: 170 return error; 171 } 172 173 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 174 { 175 struct task_struct *g, *p; 176 struct user_struct *user; 177 const struct cred *cred = current_cred(); 178 int error = -EINVAL; 179 struct pid *pgrp; 180 kuid_t uid; 181 182 if (which > PRIO_USER || which < PRIO_PROCESS) 183 goto out; 184 185 /* normalize: avoid signed division (rounding problems) */ 186 error = -ESRCH; 187 if (niceval < -20) 188 niceval = -20; 189 if (niceval > 19) 190 niceval = 19; 191 192 rcu_read_lock(); 193 read_lock(&tasklist_lock); 194 switch (which) { 195 case PRIO_PROCESS: 196 if (who) 197 p = find_task_by_vpid(who); 198 else 199 p = current; 200 if (p) 201 error = set_one_prio(p, niceval, error); 202 break; 203 case PRIO_PGRP: 204 if (who) 205 pgrp = find_vpid(who); 206 else 207 pgrp = task_pgrp(current); 208 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 209 error = set_one_prio(p, niceval, error); 210 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 211 break; 212 case PRIO_USER: 213 uid = make_kuid(cred->user_ns, who); 214 user = cred->user; 215 if (!who) 216 uid = cred->uid; 217 else if (!uid_eq(uid, cred->uid) && 218 !(user = find_user(uid))) 219 goto out_unlock; /* No processes for this user */ 220 221 do_each_thread(g, p) { 222 if (uid_eq(task_uid(p), uid)) 223 error = set_one_prio(p, niceval, error); 224 } while_each_thread(g, p); 225 if (!uid_eq(uid, cred->uid)) 226 free_uid(user); /* For find_user() */ 227 break; 228 } 229 out_unlock: 230 read_unlock(&tasklist_lock); 231 rcu_read_unlock(); 232 out: 233 return error; 234 } 235 236 /* 237 * Ugh. To avoid negative return values, "getpriority()" will 238 * not return the normal nice-value, but a negated value that 239 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 240 * to stay compatible. 241 */ 242 SYSCALL_DEFINE2(getpriority, int, which, int, who) 243 { 244 struct task_struct *g, *p; 245 struct user_struct *user; 246 const struct cred *cred = current_cred(); 247 long niceval, retval = -ESRCH; 248 struct pid *pgrp; 249 kuid_t uid; 250 251 if (which > PRIO_USER || which < PRIO_PROCESS) 252 return -EINVAL; 253 254 rcu_read_lock(); 255 read_lock(&tasklist_lock); 256 switch (which) { 257 case PRIO_PROCESS: 258 if (who) 259 p = find_task_by_vpid(who); 260 else 261 p = current; 262 if (p) { 263 niceval = 20 - task_nice(p); 264 if (niceval > retval) 265 retval = niceval; 266 } 267 break; 268 case PRIO_PGRP: 269 if (who) 270 pgrp = find_vpid(who); 271 else 272 pgrp = task_pgrp(current); 273 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 274 niceval = 20 - task_nice(p); 275 if (niceval > retval) 276 retval = niceval; 277 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 278 break; 279 case PRIO_USER: 280 uid = make_kuid(cred->user_ns, who); 281 user = cred->user; 282 if (!who) 283 uid = cred->uid; 284 else if (!uid_eq(uid, cred->uid) && 285 !(user = find_user(uid))) 286 goto out_unlock; /* No processes for this user */ 287 288 do_each_thread(g, p) { 289 if (uid_eq(task_uid(p), uid)) { 290 niceval = 20 - task_nice(p); 291 if (niceval > retval) 292 retval = niceval; 293 } 294 } while_each_thread(g, p); 295 if (!uid_eq(uid, cred->uid)) 296 free_uid(user); /* for find_user() */ 297 break; 298 } 299 out_unlock: 300 read_unlock(&tasklist_lock); 301 rcu_read_unlock(); 302 303 return retval; 304 } 305 306 /** 307 * emergency_restart - reboot the system 308 * 309 * Without shutting down any hardware or taking any locks 310 * reboot the system. This is called when we know we are in 311 * trouble so this is our best effort to reboot. This is 312 * safe to call in interrupt context. 313 */ 314 void emergency_restart(void) 315 { 316 kmsg_dump(KMSG_DUMP_EMERG); 317 machine_emergency_restart(); 318 } 319 EXPORT_SYMBOL_GPL(emergency_restart); 320 321 void kernel_restart_prepare(char *cmd) 322 { 323 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 324 system_state = SYSTEM_RESTART; 325 usermodehelper_disable(); 326 device_shutdown(); 327 } 328 329 /** 330 * register_reboot_notifier - Register function to be called at reboot time 331 * @nb: Info about notifier function to be called 332 * 333 * Registers a function with the list of functions 334 * to be called at reboot time. 335 * 336 * Currently always returns zero, as blocking_notifier_chain_register() 337 * always returns zero. 338 */ 339 int register_reboot_notifier(struct notifier_block *nb) 340 { 341 return blocking_notifier_chain_register(&reboot_notifier_list, nb); 342 } 343 EXPORT_SYMBOL(register_reboot_notifier); 344 345 /** 346 * unregister_reboot_notifier - Unregister previously registered reboot notifier 347 * @nb: Hook to be unregistered 348 * 349 * Unregisters a previously registered reboot 350 * notifier function. 351 * 352 * Returns zero on success, or %-ENOENT on failure. 353 */ 354 int unregister_reboot_notifier(struct notifier_block *nb) 355 { 356 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); 357 } 358 EXPORT_SYMBOL(unregister_reboot_notifier); 359 360 /** 361 * kernel_restart - reboot the system 362 * @cmd: pointer to buffer containing command to execute for restart 363 * or %NULL 364 * 365 * Shutdown everything and perform a clean reboot. 366 * This is not safe to call in interrupt context. 367 */ 368 void kernel_restart(char *cmd) 369 { 370 kernel_restart_prepare(cmd); 371 disable_nonboot_cpus(); 372 syscore_shutdown(); 373 if (!cmd) 374 printk(KERN_EMERG "Restarting system.\n"); 375 else 376 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 377 kmsg_dump(KMSG_DUMP_RESTART); 378 machine_restart(cmd); 379 } 380 EXPORT_SYMBOL_GPL(kernel_restart); 381 382 static void kernel_shutdown_prepare(enum system_states state) 383 { 384 blocking_notifier_call_chain(&reboot_notifier_list, 385 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 386 system_state = state; 387 usermodehelper_disable(); 388 device_shutdown(); 389 } 390 /** 391 * kernel_halt - halt the system 392 * 393 * Shutdown everything and perform a clean system halt. 394 */ 395 void kernel_halt(void) 396 { 397 kernel_shutdown_prepare(SYSTEM_HALT); 398 disable_nonboot_cpus(); 399 syscore_shutdown(); 400 printk(KERN_EMERG "System halted.\n"); 401 kmsg_dump(KMSG_DUMP_HALT); 402 machine_halt(); 403 } 404 405 EXPORT_SYMBOL_GPL(kernel_halt); 406 407 /** 408 * kernel_power_off - power_off the system 409 * 410 * Shutdown everything and perform a clean system power_off. 411 */ 412 void kernel_power_off(void) 413 { 414 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 415 if (pm_power_off_prepare) 416 pm_power_off_prepare(); 417 disable_nonboot_cpus(); 418 syscore_shutdown(); 419 printk(KERN_EMERG "Power down.\n"); 420 kmsg_dump(KMSG_DUMP_POWEROFF); 421 machine_power_off(); 422 } 423 EXPORT_SYMBOL_GPL(kernel_power_off); 424 425 static DEFINE_MUTEX(reboot_mutex); 426 427 /* 428 * Reboot system call: for obvious reasons only root may call it, 429 * and even root needs to set up some magic numbers in the registers 430 * so that some mistake won't make this reboot the whole machine. 431 * You can also set the meaning of the ctrl-alt-del-key here. 432 * 433 * reboot doesn't sync: do that yourself before calling this. 434 */ 435 SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, 436 void __user *, arg) 437 { 438 struct pid_namespace *pid_ns = task_active_pid_ns(current); 439 char buffer[256]; 440 int ret = 0; 441 442 /* We only trust the superuser with rebooting the system. */ 443 if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) 444 return -EPERM; 445 446 /* For safety, we require "magic" arguments. */ 447 if (magic1 != LINUX_REBOOT_MAGIC1 || 448 (magic2 != LINUX_REBOOT_MAGIC2 && 449 magic2 != LINUX_REBOOT_MAGIC2A && 450 magic2 != LINUX_REBOOT_MAGIC2B && 451 magic2 != LINUX_REBOOT_MAGIC2C)) 452 return -EINVAL; 453 454 /* 455 * If pid namespaces are enabled and the current task is in a child 456 * pid_namespace, the command is handled by reboot_pid_ns() which will 457 * call do_exit(). 458 */ 459 ret = reboot_pid_ns(pid_ns, cmd); 460 if (ret) 461 return ret; 462 463 /* Instead of trying to make the power_off code look like 464 * halt when pm_power_off is not set do it the easy way. 465 */ 466 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 467 cmd = LINUX_REBOOT_CMD_HALT; 468 469 mutex_lock(&reboot_mutex); 470 switch (cmd) { 471 case LINUX_REBOOT_CMD_RESTART: 472 kernel_restart(NULL); 473 break; 474 475 case LINUX_REBOOT_CMD_CAD_ON: 476 C_A_D = 1; 477 break; 478 479 case LINUX_REBOOT_CMD_CAD_OFF: 480 C_A_D = 0; 481 break; 482 483 case LINUX_REBOOT_CMD_HALT: 484 kernel_halt(); 485 do_exit(0); 486 panic("cannot halt"); 487 488 case LINUX_REBOOT_CMD_POWER_OFF: 489 kernel_power_off(); 490 do_exit(0); 491 break; 492 493 case LINUX_REBOOT_CMD_RESTART2: 494 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 495 ret = -EFAULT; 496 break; 497 } 498 buffer[sizeof(buffer) - 1] = '\0'; 499 500 kernel_restart(buffer); 501 break; 502 503 #ifdef CONFIG_KEXEC 504 case LINUX_REBOOT_CMD_KEXEC: 505 ret = kernel_kexec(); 506 break; 507 #endif 508 509 #ifdef CONFIG_HIBERNATION 510 case LINUX_REBOOT_CMD_SW_SUSPEND: 511 ret = hibernate(); 512 break; 513 #endif 514 515 default: 516 ret = -EINVAL; 517 break; 518 } 519 mutex_unlock(&reboot_mutex); 520 return ret; 521 } 522 523 static void deferred_cad(struct work_struct *dummy) 524 { 525 kernel_restart(NULL); 526 } 527 528 /* 529 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 530 * As it's called within an interrupt, it may NOT sync: the only choice 531 * is whether to reboot at once, or just ignore the ctrl-alt-del. 532 */ 533 void ctrl_alt_del(void) 534 { 535 static DECLARE_WORK(cad_work, deferred_cad); 536 537 if (C_A_D) 538 schedule_work(&cad_work); 539 else 540 kill_cad_pid(SIGINT, 1); 541 } 542 543 /* 544 * Unprivileged users may change the real gid to the effective gid 545 * or vice versa. (BSD-style) 546 * 547 * If you set the real gid at all, or set the effective gid to a value not 548 * equal to the real gid, then the saved gid is set to the new effective gid. 549 * 550 * This makes it possible for a setgid program to completely drop its 551 * privileges, which is often a useful assertion to make when you are doing 552 * a security audit over a program. 553 * 554 * The general idea is that a program which uses just setregid() will be 555 * 100% compatible with BSD. A program which uses just setgid() will be 556 * 100% compatible with POSIX with saved IDs. 557 * 558 * SMP: There are not races, the GIDs are checked only by filesystem 559 * operations (as far as semantic preservation is concerned). 560 */ 561 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 562 { 563 struct user_namespace *ns = current_user_ns(); 564 const struct cred *old; 565 struct cred *new; 566 int retval; 567 kgid_t krgid, kegid; 568 569 krgid = make_kgid(ns, rgid); 570 kegid = make_kgid(ns, egid); 571 572 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 573 return -EINVAL; 574 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 575 return -EINVAL; 576 577 new = prepare_creds(); 578 if (!new) 579 return -ENOMEM; 580 old = current_cred(); 581 582 retval = -EPERM; 583 if (rgid != (gid_t) -1) { 584 if (gid_eq(old->gid, krgid) || 585 gid_eq(old->egid, krgid) || 586 nsown_capable(CAP_SETGID)) 587 new->gid = krgid; 588 else 589 goto error; 590 } 591 if (egid != (gid_t) -1) { 592 if (gid_eq(old->gid, kegid) || 593 gid_eq(old->egid, kegid) || 594 gid_eq(old->sgid, kegid) || 595 nsown_capable(CAP_SETGID)) 596 new->egid = kegid; 597 else 598 goto error; 599 } 600 601 if (rgid != (gid_t) -1 || 602 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) 603 new->sgid = new->egid; 604 new->fsgid = new->egid; 605 606 return commit_creds(new); 607 608 error: 609 abort_creds(new); 610 return retval; 611 } 612 613 /* 614 * setgid() is implemented like SysV w/ SAVED_IDS 615 * 616 * SMP: Same implicit races as above. 617 */ 618 SYSCALL_DEFINE1(setgid, gid_t, gid) 619 { 620 struct user_namespace *ns = current_user_ns(); 621 const struct cred *old; 622 struct cred *new; 623 int retval; 624 kgid_t kgid; 625 626 kgid = make_kgid(ns, gid); 627 if (!gid_valid(kgid)) 628 return -EINVAL; 629 630 new = prepare_creds(); 631 if (!new) 632 return -ENOMEM; 633 old = current_cred(); 634 635 retval = -EPERM; 636 if (nsown_capable(CAP_SETGID)) 637 new->gid = new->egid = new->sgid = new->fsgid = kgid; 638 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 639 new->egid = new->fsgid = kgid; 640 else 641 goto error; 642 643 return commit_creds(new); 644 645 error: 646 abort_creds(new); 647 return retval; 648 } 649 650 /* 651 * change the user struct in a credentials set to match the new UID 652 */ 653 static int set_user(struct cred *new) 654 { 655 struct user_struct *new_user; 656 657 new_user = alloc_uid(new->uid); 658 if (!new_user) 659 return -EAGAIN; 660 661 /* 662 * We don't fail in case of NPROC limit excess here because too many 663 * poorly written programs don't check set*uid() return code, assuming 664 * it never fails if called by root. We may still enforce NPROC limit 665 * for programs doing set*uid()+execve() by harmlessly deferring the 666 * failure to the execve() stage. 667 */ 668 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && 669 new_user != INIT_USER) 670 current->flags |= PF_NPROC_EXCEEDED; 671 else 672 current->flags &= ~PF_NPROC_EXCEEDED; 673 674 free_uid(new->user); 675 new->user = new_user; 676 return 0; 677 } 678 679 /* 680 * Unprivileged users may change the real uid to the effective uid 681 * or vice versa. (BSD-style) 682 * 683 * If you set the real uid at all, or set the effective uid to a value not 684 * equal to the real uid, then the saved uid is set to the new effective uid. 685 * 686 * This makes it possible for a setuid program to completely drop its 687 * privileges, which is often a useful assertion to make when you are doing 688 * a security audit over a program. 689 * 690 * The general idea is that a program which uses just setreuid() will be 691 * 100% compatible with BSD. A program which uses just setuid() will be 692 * 100% compatible with POSIX with saved IDs. 693 */ 694 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 695 { 696 struct user_namespace *ns = current_user_ns(); 697 const struct cred *old; 698 struct cred *new; 699 int retval; 700 kuid_t kruid, keuid; 701 702 kruid = make_kuid(ns, ruid); 703 keuid = make_kuid(ns, euid); 704 705 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 706 return -EINVAL; 707 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 708 return -EINVAL; 709 710 new = prepare_creds(); 711 if (!new) 712 return -ENOMEM; 713 old = current_cred(); 714 715 retval = -EPERM; 716 if (ruid != (uid_t) -1) { 717 new->uid = kruid; 718 if (!uid_eq(old->uid, kruid) && 719 !uid_eq(old->euid, kruid) && 720 !nsown_capable(CAP_SETUID)) 721 goto error; 722 } 723 724 if (euid != (uid_t) -1) { 725 new->euid = keuid; 726 if (!uid_eq(old->uid, keuid) && 727 !uid_eq(old->euid, keuid) && 728 !uid_eq(old->suid, keuid) && 729 !nsown_capable(CAP_SETUID)) 730 goto error; 731 } 732 733 if (!uid_eq(new->uid, old->uid)) { 734 retval = set_user(new); 735 if (retval < 0) 736 goto error; 737 } 738 if (ruid != (uid_t) -1 || 739 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) 740 new->suid = new->euid; 741 new->fsuid = new->euid; 742 743 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 744 if (retval < 0) 745 goto error; 746 747 return commit_creds(new); 748 749 error: 750 abort_creds(new); 751 return retval; 752 } 753 754 /* 755 * setuid() is implemented like SysV with SAVED_IDS 756 * 757 * Note that SAVED_ID's is deficient in that a setuid root program 758 * like sendmail, for example, cannot set its uid to be a normal 759 * user and then switch back, because if you're root, setuid() sets 760 * the saved uid too. If you don't like this, blame the bright people 761 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 762 * will allow a root program to temporarily drop privileges and be able to 763 * regain them by swapping the real and effective uid. 764 */ 765 SYSCALL_DEFINE1(setuid, uid_t, uid) 766 { 767 struct user_namespace *ns = current_user_ns(); 768 const struct cred *old; 769 struct cred *new; 770 int retval; 771 kuid_t kuid; 772 773 kuid = make_kuid(ns, uid); 774 if (!uid_valid(kuid)) 775 return -EINVAL; 776 777 new = prepare_creds(); 778 if (!new) 779 return -ENOMEM; 780 old = current_cred(); 781 782 retval = -EPERM; 783 if (nsown_capable(CAP_SETUID)) { 784 new->suid = new->uid = kuid; 785 if (!uid_eq(kuid, old->uid)) { 786 retval = set_user(new); 787 if (retval < 0) 788 goto error; 789 } 790 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { 791 goto error; 792 } 793 794 new->fsuid = new->euid = kuid; 795 796 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 797 if (retval < 0) 798 goto error; 799 800 return commit_creds(new); 801 802 error: 803 abort_creds(new); 804 return retval; 805 } 806 807 808 /* 809 * This function implements a generic ability to update ruid, euid, 810 * and suid. This allows you to implement the 4.4 compatible seteuid(). 811 */ 812 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 813 { 814 struct user_namespace *ns = current_user_ns(); 815 const struct cred *old; 816 struct cred *new; 817 int retval; 818 kuid_t kruid, keuid, ksuid; 819 820 kruid = make_kuid(ns, ruid); 821 keuid = make_kuid(ns, euid); 822 ksuid = make_kuid(ns, suid); 823 824 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 825 return -EINVAL; 826 827 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 828 return -EINVAL; 829 830 if ((suid != (uid_t) -1) && !uid_valid(ksuid)) 831 return -EINVAL; 832 833 new = prepare_creds(); 834 if (!new) 835 return -ENOMEM; 836 837 old = current_cred(); 838 839 retval = -EPERM; 840 if (!nsown_capable(CAP_SETUID)) { 841 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 842 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) 843 goto error; 844 if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && 845 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) 846 goto error; 847 if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && 848 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) 849 goto error; 850 } 851 852 if (ruid != (uid_t) -1) { 853 new->uid = kruid; 854 if (!uid_eq(kruid, old->uid)) { 855 retval = set_user(new); 856 if (retval < 0) 857 goto error; 858 } 859 } 860 if (euid != (uid_t) -1) 861 new->euid = keuid; 862 if (suid != (uid_t) -1) 863 new->suid = ksuid; 864 new->fsuid = new->euid; 865 866 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 867 if (retval < 0) 868 goto error; 869 870 return commit_creds(new); 871 872 error: 873 abort_creds(new); 874 return retval; 875 } 876 877 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) 878 { 879 const struct cred *cred = current_cred(); 880 int retval; 881 uid_t ruid, euid, suid; 882 883 ruid = from_kuid_munged(cred->user_ns, cred->uid); 884 euid = from_kuid_munged(cred->user_ns, cred->euid); 885 suid = from_kuid_munged(cred->user_ns, cred->suid); 886 887 if (!(retval = put_user(ruid, ruidp)) && 888 !(retval = put_user(euid, euidp))) 889 retval = put_user(suid, suidp); 890 891 return retval; 892 } 893 894 /* 895 * Same as above, but for rgid, egid, sgid. 896 */ 897 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 898 { 899 struct user_namespace *ns = current_user_ns(); 900 const struct cred *old; 901 struct cred *new; 902 int retval; 903 kgid_t krgid, kegid, ksgid; 904 905 krgid = make_kgid(ns, rgid); 906 kegid = make_kgid(ns, egid); 907 ksgid = make_kgid(ns, sgid); 908 909 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 910 return -EINVAL; 911 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 912 return -EINVAL; 913 if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) 914 return -EINVAL; 915 916 new = prepare_creds(); 917 if (!new) 918 return -ENOMEM; 919 old = current_cred(); 920 921 retval = -EPERM; 922 if (!nsown_capable(CAP_SETGID)) { 923 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 924 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) 925 goto error; 926 if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && 927 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) 928 goto error; 929 if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && 930 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) 931 goto error; 932 } 933 934 if (rgid != (gid_t) -1) 935 new->gid = krgid; 936 if (egid != (gid_t) -1) 937 new->egid = kegid; 938 if (sgid != (gid_t) -1) 939 new->sgid = ksgid; 940 new->fsgid = new->egid; 941 942 return commit_creds(new); 943 944 error: 945 abort_creds(new); 946 return retval; 947 } 948 949 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) 950 { 951 const struct cred *cred = current_cred(); 952 int retval; 953 gid_t rgid, egid, sgid; 954 955 rgid = from_kgid_munged(cred->user_ns, cred->gid); 956 egid = from_kgid_munged(cred->user_ns, cred->egid); 957 sgid = from_kgid_munged(cred->user_ns, cred->sgid); 958 959 if (!(retval = put_user(rgid, rgidp)) && 960 !(retval = put_user(egid, egidp))) 961 retval = put_user(sgid, sgidp); 962 963 return retval; 964 } 965 966 967 /* 968 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 969 * is used for "access()" and for the NFS daemon (letting nfsd stay at 970 * whatever uid it wants to). It normally shadows "euid", except when 971 * explicitly set by setfsuid() or for access.. 972 */ 973 SYSCALL_DEFINE1(setfsuid, uid_t, uid) 974 { 975 const struct cred *old; 976 struct cred *new; 977 uid_t old_fsuid; 978 kuid_t kuid; 979 980 old = current_cred(); 981 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); 982 983 kuid = make_kuid(old->user_ns, uid); 984 if (!uid_valid(kuid)) 985 return old_fsuid; 986 987 new = prepare_creds(); 988 if (!new) 989 return old_fsuid; 990 991 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 992 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 993 nsown_capable(CAP_SETUID)) { 994 if (!uid_eq(kuid, old->fsuid)) { 995 new->fsuid = kuid; 996 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 997 goto change_okay; 998 } 999 } 1000 1001 abort_creds(new); 1002 return old_fsuid; 1003 1004 change_okay: 1005 commit_creds(new); 1006 return old_fsuid; 1007 } 1008 1009 /* 1010 * Samma på svenska.. 1011 */ 1012 SYSCALL_DEFINE1(setfsgid, gid_t, gid) 1013 { 1014 const struct cred *old; 1015 struct cred *new; 1016 gid_t old_fsgid; 1017 kgid_t kgid; 1018 1019 old = current_cred(); 1020 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); 1021 1022 kgid = make_kgid(old->user_ns, gid); 1023 if (!gid_valid(kgid)) 1024 return old_fsgid; 1025 1026 new = prepare_creds(); 1027 if (!new) 1028 return old_fsgid; 1029 1030 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 1031 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 1032 nsown_capable(CAP_SETGID)) { 1033 if (!gid_eq(kgid, old->fsgid)) { 1034 new->fsgid = kgid; 1035 goto change_okay; 1036 } 1037 } 1038 1039 abort_creds(new); 1040 return old_fsgid; 1041 1042 change_okay: 1043 commit_creds(new); 1044 return old_fsgid; 1045 } 1046 1047 void do_sys_times(struct tms *tms) 1048 { 1049 cputime_t tgutime, tgstime, cutime, cstime; 1050 1051 spin_lock_irq(¤t->sighand->siglock); 1052 thread_group_cputime_adjusted(current, &tgutime, &tgstime); 1053 cutime = current->signal->cutime; 1054 cstime = current->signal->cstime; 1055 spin_unlock_irq(¤t->sighand->siglock); 1056 tms->tms_utime = cputime_to_clock_t(tgutime); 1057 tms->tms_stime = cputime_to_clock_t(tgstime); 1058 tms->tms_cutime = cputime_to_clock_t(cutime); 1059 tms->tms_cstime = cputime_to_clock_t(cstime); 1060 } 1061 1062 SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 1063 { 1064 if (tbuf) { 1065 struct tms tmp; 1066 1067 do_sys_times(&tmp); 1068 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1069 return -EFAULT; 1070 } 1071 force_successful_syscall_return(); 1072 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1073 } 1074 1075 /* 1076 * This needs some heavy checking ... 1077 * I just haven't the stomach for it. I also don't fully 1078 * understand sessions/pgrp etc. Let somebody who does explain it. 1079 * 1080 * OK, I think I have the protection semantics right.... this is really 1081 * only important on a multi-user system anyway, to make sure one user 1082 * can't send a signal to a process owned by another. -TYT, 12/12/91 1083 * 1084 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 1085 * LBT 04.03.94 1086 */ 1087 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 1088 { 1089 struct task_struct *p; 1090 struct task_struct *group_leader = current->group_leader; 1091 struct pid *pgrp; 1092 int err; 1093 1094 if (!pid) 1095 pid = task_pid_vnr(group_leader); 1096 if (!pgid) 1097 pgid = pid; 1098 if (pgid < 0) 1099 return -EINVAL; 1100 rcu_read_lock(); 1101 1102 /* From this point forward we keep holding onto the tasklist lock 1103 * so that our parent does not change from under us. -DaveM 1104 */ 1105 write_lock_irq(&tasklist_lock); 1106 1107 err = -ESRCH; 1108 p = find_task_by_vpid(pid); 1109 if (!p) 1110 goto out; 1111 1112 err = -EINVAL; 1113 if (!thread_group_leader(p)) 1114 goto out; 1115 1116 if (same_thread_group(p->real_parent, group_leader)) { 1117 err = -EPERM; 1118 if (task_session(p) != task_session(group_leader)) 1119 goto out; 1120 err = -EACCES; 1121 if (p->did_exec) 1122 goto out; 1123 } else { 1124 err = -ESRCH; 1125 if (p != group_leader) 1126 goto out; 1127 } 1128 1129 err = -EPERM; 1130 if (p->signal->leader) 1131 goto out; 1132 1133 pgrp = task_pid(p); 1134 if (pgid != pid) { 1135 struct task_struct *g; 1136 1137 pgrp = find_vpid(pgid); 1138 g = pid_task(pgrp, PIDTYPE_PGID); 1139 if (!g || task_session(g) != task_session(group_leader)) 1140 goto out; 1141 } 1142 1143 err = security_task_setpgid(p, pgid); 1144 if (err) 1145 goto out; 1146 1147 if (task_pgrp(p) != pgrp) 1148 change_pid(p, PIDTYPE_PGID, pgrp); 1149 1150 err = 0; 1151 out: 1152 /* All paths lead to here, thus we are safe. -DaveM */ 1153 write_unlock_irq(&tasklist_lock); 1154 rcu_read_unlock(); 1155 return err; 1156 } 1157 1158 SYSCALL_DEFINE1(getpgid, pid_t, pid) 1159 { 1160 struct task_struct *p; 1161 struct pid *grp; 1162 int retval; 1163 1164 rcu_read_lock(); 1165 if (!pid) 1166 grp = task_pgrp(current); 1167 else { 1168 retval = -ESRCH; 1169 p = find_task_by_vpid(pid); 1170 if (!p) 1171 goto out; 1172 grp = task_pgrp(p); 1173 if (!grp) 1174 goto out; 1175 1176 retval = security_task_getpgid(p); 1177 if (retval) 1178 goto out; 1179 } 1180 retval = pid_vnr(grp); 1181 out: 1182 rcu_read_unlock(); 1183 return retval; 1184 } 1185 1186 #ifdef __ARCH_WANT_SYS_GETPGRP 1187 1188 SYSCALL_DEFINE0(getpgrp) 1189 { 1190 return sys_getpgid(0); 1191 } 1192 1193 #endif 1194 1195 SYSCALL_DEFINE1(getsid, pid_t, pid) 1196 { 1197 struct task_struct *p; 1198 struct pid *sid; 1199 int retval; 1200 1201 rcu_read_lock(); 1202 if (!pid) 1203 sid = task_session(current); 1204 else { 1205 retval = -ESRCH; 1206 p = find_task_by_vpid(pid); 1207 if (!p) 1208 goto out; 1209 sid = task_session(p); 1210 if (!sid) 1211 goto out; 1212 1213 retval = security_task_getsid(p); 1214 if (retval) 1215 goto out; 1216 } 1217 retval = pid_vnr(sid); 1218 out: 1219 rcu_read_unlock(); 1220 return retval; 1221 } 1222 1223 SYSCALL_DEFINE0(setsid) 1224 { 1225 struct task_struct *group_leader = current->group_leader; 1226 struct pid *sid = task_pid(group_leader); 1227 pid_t session = pid_vnr(sid); 1228 int err = -EPERM; 1229 1230 write_lock_irq(&tasklist_lock); 1231 /* Fail if I am already a session leader */ 1232 if (group_leader->signal->leader) 1233 goto out; 1234 1235 /* Fail if a process group id already exists that equals the 1236 * proposed session id. 1237 */ 1238 if (pid_task(sid, PIDTYPE_PGID)) 1239 goto out; 1240 1241 group_leader->signal->leader = 1; 1242 __set_special_pids(sid); 1243 1244 proc_clear_tty(group_leader); 1245 1246 err = session; 1247 out: 1248 write_unlock_irq(&tasklist_lock); 1249 if (err > 0) { 1250 proc_sid_connector(group_leader); 1251 sched_autogroup_create_attach(group_leader); 1252 } 1253 return err; 1254 } 1255 1256 DECLARE_RWSEM(uts_sem); 1257 1258 #ifdef COMPAT_UTS_MACHINE 1259 #define override_architecture(name) \ 1260 (personality(current->personality) == PER_LINUX32 && \ 1261 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1262 sizeof(COMPAT_UTS_MACHINE))) 1263 #else 1264 #define override_architecture(name) 0 1265 #endif 1266 1267 /* 1268 * Work around broken programs that cannot handle "Linux 3.0". 1269 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1270 */ 1271 static int override_release(char __user *release, size_t len) 1272 { 1273 int ret = 0; 1274 1275 if (current->personality & UNAME26) { 1276 const char *rest = UTS_RELEASE; 1277 char buf[65] = { 0 }; 1278 int ndots = 0; 1279 unsigned v; 1280 size_t copy; 1281 1282 while (*rest) { 1283 if (*rest == '.' && ++ndots >= 3) 1284 break; 1285 if (!isdigit(*rest) && *rest != '.') 1286 break; 1287 rest++; 1288 } 1289 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; 1290 copy = clamp_t(size_t, len, 1, sizeof(buf)); 1291 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); 1292 ret = copy_to_user(release, buf, copy + 1); 1293 } 1294 return ret; 1295 } 1296 1297 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1298 { 1299 int errno = 0; 1300 1301 down_read(&uts_sem); 1302 if (copy_to_user(name, utsname(), sizeof *name)) 1303 errno = -EFAULT; 1304 up_read(&uts_sem); 1305 1306 if (!errno && override_release(name->release, sizeof(name->release))) 1307 errno = -EFAULT; 1308 if (!errno && override_architecture(name)) 1309 errno = -EFAULT; 1310 return errno; 1311 } 1312 1313 #ifdef __ARCH_WANT_SYS_OLD_UNAME 1314 /* 1315 * Old cruft 1316 */ 1317 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1318 { 1319 int error = 0; 1320 1321 if (!name) 1322 return -EFAULT; 1323 1324 down_read(&uts_sem); 1325 if (copy_to_user(name, utsname(), sizeof(*name))) 1326 error = -EFAULT; 1327 up_read(&uts_sem); 1328 1329 if (!error && override_release(name->release, sizeof(name->release))) 1330 error = -EFAULT; 1331 if (!error && override_architecture(name)) 1332 error = -EFAULT; 1333 return error; 1334 } 1335 1336 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1337 { 1338 int error; 1339 1340 if (!name) 1341 return -EFAULT; 1342 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) 1343 return -EFAULT; 1344 1345 down_read(&uts_sem); 1346 error = __copy_to_user(&name->sysname, &utsname()->sysname, 1347 __OLD_UTS_LEN); 1348 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 1349 error |= __copy_to_user(&name->nodename, &utsname()->nodename, 1350 __OLD_UTS_LEN); 1351 error |= __put_user(0, name->nodename + __OLD_UTS_LEN); 1352 error |= __copy_to_user(&name->release, &utsname()->release, 1353 __OLD_UTS_LEN); 1354 error |= __put_user(0, name->release + __OLD_UTS_LEN); 1355 error |= __copy_to_user(&name->version, &utsname()->version, 1356 __OLD_UTS_LEN); 1357 error |= __put_user(0, name->version + __OLD_UTS_LEN); 1358 error |= __copy_to_user(&name->machine, &utsname()->machine, 1359 __OLD_UTS_LEN); 1360 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 1361 up_read(&uts_sem); 1362 1363 if (!error && override_architecture(name)) 1364 error = -EFAULT; 1365 if (!error && override_release(name->release, sizeof(name->release))) 1366 error = -EFAULT; 1367 return error ? -EFAULT : 0; 1368 } 1369 #endif 1370 1371 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1372 { 1373 int errno; 1374 char tmp[__NEW_UTS_LEN]; 1375 1376 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1377 return -EPERM; 1378 1379 if (len < 0 || len > __NEW_UTS_LEN) 1380 return -EINVAL; 1381 down_write(&uts_sem); 1382 errno = -EFAULT; 1383 if (!copy_from_user(tmp, name, len)) { 1384 struct new_utsname *u = utsname(); 1385 1386 memcpy(u->nodename, tmp, len); 1387 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1388 errno = 0; 1389 uts_proc_notify(UTS_PROC_HOSTNAME); 1390 } 1391 up_write(&uts_sem); 1392 return errno; 1393 } 1394 1395 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1396 1397 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1398 { 1399 int i, errno; 1400 struct new_utsname *u; 1401 1402 if (len < 0) 1403 return -EINVAL; 1404 down_read(&uts_sem); 1405 u = utsname(); 1406 i = 1 + strlen(u->nodename); 1407 if (i > len) 1408 i = len; 1409 errno = 0; 1410 if (copy_to_user(name, u->nodename, i)) 1411 errno = -EFAULT; 1412 up_read(&uts_sem); 1413 return errno; 1414 } 1415 1416 #endif 1417 1418 /* 1419 * Only setdomainname; getdomainname can be implemented by calling 1420 * uname() 1421 */ 1422 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1423 { 1424 int errno; 1425 char tmp[__NEW_UTS_LEN]; 1426 1427 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1428 return -EPERM; 1429 if (len < 0 || len > __NEW_UTS_LEN) 1430 return -EINVAL; 1431 1432 down_write(&uts_sem); 1433 errno = -EFAULT; 1434 if (!copy_from_user(tmp, name, len)) { 1435 struct new_utsname *u = utsname(); 1436 1437 memcpy(u->domainname, tmp, len); 1438 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1439 errno = 0; 1440 uts_proc_notify(UTS_PROC_DOMAINNAME); 1441 } 1442 up_write(&uts_sem); 1443 return errno; 1444 } 1445 1446 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1447 { 1448 struct rlimit value; 1449 int ret; 1450 1451 ret = do_prlimit(current, resource, NULL, &value); 1452 if (!ret) 1453 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1454 1455 return ret; 1456 } 1457 1458 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1459 1460 /* 1461 * Back compatibility for getrlimit. Needed for some apps. 1462 */ 1463 1464 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1465 struct rlimit __user *, rlim) 1466 { 1467 struct rlimit x; 1468 if (resource >= RLIM_NLIMITS) 1469 return -EINVAL; 1470 1471 task_lock(current->group_leader); 1472 x = current->signal->rlim[resource]; 1473 task_unlock(current->group_leader); 1474 if (x.rlim_cur > 0x7FFFFFFF) 1475 x.rlim_cur = 0x7FFFFFFF; 1476 if (x.rlim_max > 0x7FFFFFFF) 1477 x.rlim_max = 0x7FFFFFFF; 1478 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1479 } 1480 1481 #endif 1482 1483 static inline bool rlim64_is_infinity(__u64 rlim64) 1484 { 1485 #if BITS_PER_LONG < 64 1486 return rlim64 >= ULONG_MAX; 1487 #else 1488 return rlim64 == RLIM64_INFINITY; 1489 #endif 1490 } 1491 1492 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1493 { 1494 if (rlim->rlim_cur == RLIM_INFINITY) 1495 rlim64->rlim_cur = RLIM64_INFINITY; 1496 else 1497 rlim64->rlim_cur = rlim->rlim_cur; 1498 if (rlim->rlim_max == RLIM_INFINITY) 1499 rlim64->rlim_max = RLIM64_INFINITY; 1500 else 1501 rlim64->rlim_max = rlim->rlim_max; 1502 } 1503 1504 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1505 { 1506 if (rlim64_is_infinity(rlim64->rlim_cur)) 1507 rlim->rlim_cur = RLIM_INFINITY; 1508 else 1509 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1510 if (rlim64_is_infinity(rlim64->rlim_max)) 1511 rlim->rlim_max = RLIM_INFINITY; 1512 else 1513 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1514 } 1515 1516 /* make sure you are allowed to change @tsk limits before calling this */ 1517 int do_prlimit(struct task_struct *tsk, unsigned int resource, 1518 struct rlimit *new_rlim, struct rlimit *old_rlim) 1519 { 1520 struct rlimit *rlim; 1521 int retval = 0; 1522 1523 if (resource >= RLIM_NLIMITS) 1524 return -EINVAL; 1525 if (new_rlim) { 1526 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1527 return -EINVAL; 1528 if (resource == RLIMIT_NOFILE && 1529 new_rlim->rlim_max > sysctl_nr_open) 1530 return -EPERM; 1531 } 1532 1533 /* protect tsk->signal and tsk->sighand from disappearing */ 1534 read_lock(&tasklist_lock); 1535 if (!tsk->sighand) { 1536 retval = -ESRCH; 1537 goto out; 1538 } 1539 1540 rlim = tsk->signal->rlim + resource; 1541 task_lock(tsk->group_leader); 1542 if (new_rlim) { 1543 /* Keep the capable check against init_user_ns until 1544 cgroups can contain all limits */ 1545 if (new_rlim->rlim_max > rlim->rlim_max && 1546 !capable(CAP_SYS_RESOURCE)) 1547 retval = -EPERM; 1548 if (!retval) 1549 retval = security_task_setrlimit(tsk->group_leader, 1550 resource, new_rlim); 1551 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { 1552 /* 1553 * The caller is asking for an immediate RLIMIT_CPU 1554 * expiry. But we use the zero value to mean "it was 1555 * never set". So let's cheat and make it one second 1556 * instead 1557 */ 1558 new_rlim->rlim_cur = 1; 1559 } 1560 } 1561 if (!retval) { 1562 if (old_rlim) 1563 *old_rlim = *rlim; 1564 if (new_rlim) 1565 *rlim = *new_rlim; 1566 } 1567 task_unlock(tsk->group_leader); 1568 1569 /* 1570 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1571 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1572 * very long-standing error, and fixing it now risks breakage of 1573 * applications, so we live with it 1574 */ 1575 if (!retval && new_rlim && resource == RLIMIT_CPU && 1576 new_rlim->rlim_cur != RLIM_INFINITY) 1577 update_rlimit_cpu(tsk, new_rlim->rlim_cur); 1578 out: 1579 read_unlock(&tasklist_lock); 1580 return retval; 1581 } 1582 1583 /* rcu lock must be held */ 1584 static int check_prlimit_permission(struct task_struct *task) 1585 { 1586 const struct cred *cred = current_cred(), *tcred; 1587 1588 if (current == task) 1589 return 0; 1590 1591 tcred = __task_cred(task); 1592 if (uid_eq(cred->uid, tcred->euid) && 1593 uid_eq(cred->uid, tcred->suid) && 1594 uid_eq(cred->uid, tcred->uid) && 1595 gid_eq(cred->gid, tcred->egid) && 1596 gid_eq(cred->gid, tcred->sgid) && 1597 gid_eq(cred->gid, tcred->gid)) 1598 return 0; 1599 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) 1600 return 0; 1601 1602 return -EPERM; 1603 } 1604 1605 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1606 const struct rlimit64 __user *, new_rlim, 1607 struct rlimit64 __user *, old_rlim) 1608 { 1609 struct rlimit64 old64, new64; 1610 struct rlimit old, new; 1611 struct task_struct *tsk; 1612 int ret; 1613 1614 if (new_rlim) { 1615 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1616 return -EFAULT; 1617 rlim64_to_rlim(&new64, &new); 1618 } 1619 1620 rcu_read_lock(); 1621 tsk = pid ? find_task_by_vpid(pid) : current; 1622 if (!tsk) { 1623 rcu_read_unlock(); 1624 return -ESRCH; 1625 } 1626 ret = check_prlimit_permission(tsk); 1627 if (ret) { 1628 rcu_read_unlock(); 1629 return ret; 1630 } 1631 get_task_struct(tsk); 1632 rcu_read_unlock(); 1633 1634 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1635 old_rlim ? &old : NULL); 1636 1637 if (!ret && old_rlim) { 1638 rlim_to_rlim64(&old, &old64); 1639 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1640 ret = -EFAULT; 1641 } 1642 1643 put_task_struct(tsk); 1644 return ret; 1645 } 1646 1647 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1648 { 1649 struct rlimit new_rlim; 1650 1651 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1652 return -EFAULT; 1653 return do_prlimit(current, resource, &new_rlim, NULL); 1654 } 1655 1656 /* 1657 * It would make sense to put struct rusage in the task_struct, 1658 * except that would make the task_struct be *really big*. After 1659 * task_struct gets moved into malloc'ed memory, it would 1660 * make sense to do this. It will make moving the rest of the information 1661 * a lot simpler! (Which we're not doing right now because we're not 1662 * measuring them yet). 1663 * 1664 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1665 * races with threads incrementing their own counters. But since word 1666 * reads are atomic, we either get new values or old values and we don't 1667 * care which for the sums. We always take the siglock to protect reading 1668 * the c* fields from p->signal from races with exit.c updating those 1669 * fields when reaping, so a sample either gets all the additions of a 1670 * given child after it's reaped, or none so this sample is before reaping. 1671 * 1672 * Locking: 1673 * We need to take the siglock for CHILDEREN, SELF and BOTH 1674 * for the cases current multithreaded, non-current single threaded 1675 * non-current multithreaded. Thread traversal is now safe with 1676 * the siglock held. 1677 * Strictly speaking, we donot need to take the siglock if we are current and 1678 * single threaded, as no one else can take our signal_struct away, no one 1679 * else can reap the children to update signal->c* counters, and no one else 1680 * can race with the signal-> fields. If we do not take any lock, the 1681 * signal-> fields could be read out of order while another thread was just 1682 * exiting. So we should place a read memory barrier when we avoid the lock. 1683 * On the writer side, write memory barrier is implied in __exit_signal 1684 * as __exit_signal releases the siglock spinlock after updating the signal-> 1685 * fields. But we don't do this yet to keep things simple. 1686 * 1687 */ 1688 1689 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1690 { 1691 r->ru_nvcsw += t->nvcsw; 1692 r->ru_nivcsw += t->nivcsw; 1693 r->ru_minflt += t->min_flt; 1694 r->ru_majflt += t->maj_flt; 1695 r->ru_inblock += task_io_get_inblock(t); 1696 r->ru_oublock += task_io_get_oublock(t); 1697 } 1698 1699 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1700 { 1701 struct task_struct *t; 1702 unsigned long flags; 1703 cputime_t tgutime, tgstime, utime, stime; 1704 unsigned long maxrss = 0; 1705 1706 memset((char *) r, 0, sizeof *r); 1707 utime = stime = 0; 1708 1709 if (who == RUSAGE_THREAD) { 1710 task_cputime_adjusted(current, &utime, &stime); 1711 accumulate_thread_rusage(p, r); 1712 maxrss = p->signal->maxrss; 1713 goto out; 1714 } 1715 1716 if (!lock_task_sighand(p, &flags)) 1717 return; 1718 1719 switch (who) { 1720 case RUSAGE_BOTH: 1721 case RUSAGE_CHILDREN: 1722 utime = p->signal->cutime; 1723 stime = p->signal->cstime; 1724 r->ru_nvcsw = p->signal->cnvcsw; 1725 r->ru_nivcsw = p->signal->cnivcsw; 1726 r->ru_minflt = p->signal->cmin_flt; 1727 r->ru_majflt = p->signal->cmaj_flt; 1728 r->ru_inblock = p->signal->cinblock; 1729 r->ru_oublock = p->signal->coublock; 1730 maxrss = p->signal->cmaxrss; 1731 1732 if (who == RUSAGE_CHILDREN) 1733 break; 1734 1735 case RUSAGE_SELF: 1736 thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1737 utime += tgutime; 1738 stime += tgstime; 1739 r->ru_nvcsw += p->signal->nvcsw; 1740 r->ru_nivcsw += p->signal->nivcsw; 1741 r->ru_minflt += p->signal->min_flt; 1742 r->ru_majflt += p->signal->maj_flt; 1743 r->ru_inblock += p->signal->inblock; 1744 r->ru_oublock += p->signal->oublock; 1745 if (maxrss < p->signal->maxrss) 1746 maxrss = p->signal->maxrss; 1747 t = p; 1748 do { 1749 accumulate_thread_rusage(t, r); 1750 t = next_thread(t); 1751 } while (t != p); 1752 break; 1753 1754 default: 1755 BUG(); 1756 } 1757 unlock_task_sighand(p, &flags); 1758 1759 out: 1760 cputime_to_timeval(utime, &r->ru_utime); 1761 cputime_to_timeval(stime, &r->ru_stime); 1762 1763 if (who != RUSAGE_CHILDREN) { 1764 struct mm_struct *mm = get_task_mm(p); 1765 if (mm) { 1766 setmax_mm_hiwater_rss(&maxrss, mm); 1767 mmput(mm); 1768 } 1769 } 1770 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1771 } 1772 1773 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1774 { 1775 struct rusage r; 1776 k_getrusage(p, who, &r); 1777 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1778 } 1779 1780 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1781 { 1782 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1783 who != RUSAGE_THREAD) 1784 return -EINVAL; 1785 return getrusage(current, who, ru); 1786 } 1787 1788 SYSCALL_DEFINE1(umask, int, mask) 1789 { 1790 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1791 return mask; 1792 } 1793 1794 #ifdef CONFIG_CHECKPOINT_RESTORE 1795 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1796 { 1797 struct fd exe; 1798 struct inode *inode; 1799 int err; 1800 1801 exe = fdget(fd); 1802 if (!exe.file) 1803 return -EBADF; 1804 1805 inode = file_inode(exe.file); 1806 1807 /* 1808 * Because the original mm->exe_file points to executable file, make 1809 * sure that this one is executable as well, to avoid breaking an 1810 * overall picture. 1811 */ 1812 err = -EACCES; 1813 if (!S_ISREG(inode->i_mode) || 1814 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1815 goto exit; 1816 1817 err = inode_permission(inode, MAY_EXEC); 1818 if (err) 1819 goto exit; 1820 1821 down_write(&mm->mmap_sem); 1822 1823 /* 1824 * Forbid mm->exe_file change if old file still mapped. 1825 */ 1826 err = -EBUSY; 1827 if (mm->exe_file) { 1828 struct vm_area_struct *vma; 1829 1830 for (vma = mm->mmap; vma; vma = vma->vm_next) 1831 if (vma->vm_file && 1832 path_equal(&vma->vm_file->f_path, 1833 &mm->exe_file->f_path)) 1834 goto exit_unlock; 1835 } 1836 1837 /* 1838 * The symlink can be changed only once, just to disallow arbitrary 1839 * transitions malicious software might bring in. This means one 1840 * could make a snapshot over all processes running and monitor 1841 * /proc/pid/exe changes to notice unusual activity if needed. 1842 */ 1843 err = -EPERM; 1844 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) 1845 goto exit_unlock; 1846 1847 err = 0; 1848 set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ 1849 exit_unlock: 1850 up_write(&mm->mmap_sem); 1851 1852 exit: 1853 fdput(exe); 1854 return err; 1855 } 1856 1857 static int prctl_set_mm(int opt, unsigned long addr, 1858 unsigned long arg4, unsigned long arg5) 1859 { 1860 unsigned long rlim = rlimit(RLIMIT_DATA); 1861 struct mm_struct *mm = current->mm; 1862 struct vm_area_struct *vma; 1863 int error; 1864 1865 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) 1866 return -EINVAL; 1867 1868 if (!capable(CAP_SYS_RESOURCE)) 1869 return -EPERM; 1870 1871 if (opt == PR_SET_MM_EXE_FILE) 1872 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1873 1874 if (addr >= TASK_SIZE || addr < mmap_min_addr) 1875 return -EINVAL; 1876 1877 error = -EINVAL; 1878 1879 down_read(&mm->mmap_sem); 1880 vma = find_vma(mm, addr); 1881 1882 switch (opt) { 1883 case PR_SET_MM_START_CODE: 1884 mm->start_code = addr; 1885 break; 1886 case PR_SET_MM_END_CODE: 1887 mm->end_code = addr; 1888 break; 1889 case PR_SET_MM_START_DATA: 1890 mm->start_data = addr; 1891 break; 1892 case PR_SET_MM_END_DATA: 1893 mm->end_data = addr; 1894 break; 1895 1896 case PR_SET_MM_START_BRK: 1897 if (addr <= mm->end_data) 1898 goto out; 1899 1900 if (rlim < RLIM_INFINITY && 1901 (mm->brk - addr) + 1902 (mm->end_data - mm->start_data) > rlim) 1903 goto out; 1904 1905 mm->start_brk = addr; 1906 break; 1907 1908 case PR_SET_MM_BRK: 1909 if (addr <= mm->end_data) 1910 goto out; 1911 1912 if (rlim < RLIM_INFINITY && 1913 (addr - mm->start_brk) + 1914 (mm->end_data - mm->start_data) > rlim) 1915 goto out; 1916 1917 mm->brk = addr; 1918 break; 1919 1920 /* 1921 * If command line arguments and environment 1922 * are placed somewhere else on stack, we can 1923 * set them up here, ARG_START/END to setup 1924 * command line argumets and ENV_START/END 1925 * for environment. 1926 */ 1927 case PR_SET_MM_START_STACK: 1928 case PR_SET_MM_ARG_START: 1929 case PR_SET_MM_ARG_END: 1930 case PR_SET_MM_ENV_START: 1931 case PR_SET_MM_ENV_END: 1932 if (!vma) { 1933 error = -EFAULT; 1934 goto out; 1935 } 1936 if (opt == PR_SET_MM_START_STACK) 1937 mm->start_stack = addr; 1938 else if (opt == PR_SET_MM_ARG_START) 1939 mm->arg_start = addr; 1940 else if (opt == PR_SET_MM_ARG_END) 1941 mm->arg_end = addr; 1942 else if (opt == PR_SET_MM_ENV_START) 1943 mm->env_start = addr; 1944 else if (opt == PR_SET_MM_ENV_END) 1945 mm->env_end = addr; 1946 break; 1947 1948 /* 1949 * This doesn't move auxiliary vector itself 1950 * since it's pinned to mm_struct, but allow 1951 * to fill vector with new values. It's up 1952 * to a caller to provide sane values here 1953 * otherwise user space tools which use this 1954 * vector might be unhappy. 1955 */ 1956 case PR_SET_MM_AUXV: { 1957 unsigned long user_auxv[AT_VECTOR_SIZE]; 1958 1959 if (arg4 > sizeof(user_auxv)) 1960 goto out; 1961 up_read(&mm->mmap_sem); 1962 1963 if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) 1964 return -EFAULT; 1965 1966 /* Make sure the last entry is always AT_NULL */ 1967 user_auxv[AT_VECTOR_SIZE - 2] = 0; 1968 user_auxv[AT_VECTOR_SIZE - 1] = 0; 1969 1970 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 1971 1972 task_lock(current); 1973 memcpy(mm->saved_auxv, user_auxv, arg4); 1974 task_unlock(current); 1975 1976 return 0; 1977 } 1978 default: 1979 goto out; 1980 } 1981 1982 error = 0; 1983 out: 1984 up_read(&mm->mmap_sem); 1985 return error; 1986 } 1987 1988 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 1989 { 1990 return put_user(me->clear_child_tid, tid_addr); 1991 } 1992 1993 #else /* CONFIG_CHECKPOINT_RESTORE */ 1994 static int prctl_set_mm(int opt, unsigned long addr, 1995 unsigned long arg4, unsigned long arg5) 1996 { 1997 return -EINVAL; 1998 } 1999 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 2000 { 2001 return -EINVAL; 2002 } 2003 #endif 2004 2005 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2006 unsigned long, arg4, unsigned long, arg5) 2007 { 2008 struct task_struct *me = current; 2009 unsigned char comm[sizeof(me->comm)]; 2010 long error; 2011 2012 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2013 if (error != -ENOSYS) 2014 return error; 2015 2016 error = 0; 2017 switch (option) { 2018 case PR_SET_PDEATHSIG: 2019 if (!valid_signal(arg2)) { 2020 error = -EINVAL; 2021 break; 2022 } 2023 me->pdeath_signal = arg2; 2024 break; 2025 case PR_GET_PDEATHSIG: 2026 error = put_user(me->pdeath_signal, (int __user *)arg2); 2027 break; 2028 case PR_GET_DUMPABLE: 2029 error = get_dumpable(me->mm); 2030 break; 2031 case PR_SET_DUMPABLE: 2032 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { 2033 error = -EINVAL; 2034 break; 2035 } 2036 set_dumpable(me->mm, arg2); 2037 break; 2038 2039 case PR_SET_UNALIGN: 2040 error = SET_UNALIGN_CTL(me, arg2); 2041 break; 2042 case PR_GET_UNALIGN: 2043 error = GET_UNALIGN_CTL(me, arg2); 2044 break; 2045 case PR_SET_FPEMU: 2046 error = SET_FPEMU_CTL(me, arg2); 2047 break; 2048 case PR_GET_FPEMU: 2049 error = GET_FPEMU_CTL(me, arg2); 2050 break; 2051 case PR_SET_FPEXC: 2052 error = SET_FPEXC_CTL(me, arg2); 2053 break; 2054 case PR_GET_FPEXC: 2055 error = GET_FPEXC_CTL(me, arg2); 2056 break; 2057 case PR_GET_TIMING: 2058 error = PR_TIMING_STATISTICAL; 2059 break; 2060 case PR_SET_TIMING: 2061 if (arg2 != PR_TIMING_STATISTICAL) 2062 error = -EINVAL; 2063 break; 2064 case PR_SET_NAME: 2065 comm[sizeof(me->comm) - 1] = 0; 2066 if (strncpy_from_user(comm, (char __user *)arg2, 2067 sizeof(me->comm) - 1) < 0) 2068 return -EFAULT; 2069 set_task_comm(me, comm); 2070 proc_comm_connector(me); 2071 break; 2072 case PR_GET_NAME: 2073 get_task_comm(comm, me); 2074 if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) 2075 return -EFAULT; 2076 break; 2077 case PR_GET_ENDIAN: 2078 error = GET_ENDIAN(me, arg2); 2079 break; 2080 case PR_SET_ENDIAN: 2081 error = SET_ENDIAN(me, arg2); 2082 break; 2083 case PR_GET_SECCOMP: 2084 error = prctl_get_seccomp(); 2085 break; 2086 case PR_SET_SECCOMP: 2087 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2088 break; 2089 case PR_GET_TSC: 2090 error = GET_TSC_CTL(arg2); 2091 break; 2092 case PR_SET_TSC: 2093 error = SET_TSC_CTL(arg2); 2094 break; 2095 case PR_TASK_PERF_EVENTS_DISABLE: 2096 error = perf_event_task_disable(); 2097 break; 2098 case PR_TASK_PERF_EVENTS_ENABLE: 2099 error = perf_event_task_enable(); 2100 break; 2101 case PR_GET_TIMERSLACK: 2102 error = current->timer_slack_ns; 2103 break; 2104 case PR_SET_TIMERSLACK: 2105 if (arg2 <= 0) 2106 current->timer_slack_ns = 2107 current->default_timer_slack_ns; 2108 else 2109 current->timer_slack_ns = arg2; 2110 break; 2111 case PR_MCE_KILL: 2112 if (arg4 | arg5) 2113 return -EINVAL; 2114 switch (arg2) { 2115 case PR_MCE_KILL_CLEAR: 2116 if (arg3 != 0) 2117 return -EINVAL; 2118 current->flags &= ~PF_MCE_PROCESS; 2119 break; 2120 case PR_MCE_KILL_SET: 2121 current->flags |= PF_MCE_PROCESS; 2122 if (arg3 == PR_MCE_KILL_EARLY) 2123 current->flags |= PF_MCE_EARLY; 2124 else if (arg3 == PR_MCE_KILL_LATE) 2125 current->flags &= ~PF_MCE_EARLY; 2126 else if (arg3 == PR_MCE_KILL_DEFAULT) 2127 current->flags &= 2128 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2129 else 2130 return -EINVAL; 2131 break; 2132 default: 2133 return -EINVAL; 2134 } 2135 break; 2136 case PR_MCE_KILL_GET: 2137 if (arg2 | arg3 | arg4 | arg5) 2138 return -EINVAL; 2139 if (current->flags & PF_MCE_PROCESS) 2140 error = (current->flags & PF_MCE_EARLY) ? 2141 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2142 else 2143 error = PR_MCE_KILL_DEFAULT; 2144 break; 2145 case PR_SET_MM: 2146 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2147 break; 2148 case PR_GET_TID_ADDRESS: 2149 error = prctl_get_tid_address(me, (int __user **)arg2); 2150 break; 2151 case PR_SET_CHILD_SUBREAPER: 2152 me->signal->is_child_subreaper = !!arg2; 2153 break; 2154 case PR_GET_CHILD_SUBREAPER: 2155 error = put_user(me->signal->is_child_subreaper, 2156 (int __user *)arg2); 2157 break; 2158 case PR_SET_NO_NEW_PRIVS: 2159 if (arg2 != 1 || arg3 || arg4 || arg5) 2160 return -EINVAL; 2161 2162 current->no_new_privs = 1; 2163 break; 2164 case PR_GET_NO_NEW_PRIVS: 2165 if (arg2 || arg3 || arg4 || arg5) 2166 return -EINVAL; 2167 return current->no_new_privs ? 1 : 0; 2168 default: 2169 error = -EINVAL; 2170 break; 2171 } 2172 return error; 2173 } 2174 2175 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 2176 struct getcpu_cache __user *, unused) 2177 { 2178 int err = 0; 2179 int cpu = raw_smp_processor_id(); 2180 if (cpup) 2181 err |= put_user(cpu, cpup); 2182 if (nodep) 2183 err |= put_user(cpu_to_node(cpu), nodep); 2184 return err ? -EFAULT : 0; 2185 } 2186 2187 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 2188 2189 static int __orderly_poweroff(bool force) 2190 { 2191 char **argv; 2192 static char *envp[] = { 2193 "HOME=/", 2194 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", 2195 NULL 2196 }; 2197 int ret; 2198 2199 argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); 2200 if (argv) { 2201 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); 2202 argv_free(argv); 2203 } else { 2204 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 2205 __func__, poweroff_cmd); 2206 ret = -ENOMEM; 2207 } 2208 2209 if (ret && force) { 2210 printk(KERN_WARNING "Failed to start orderly shutdown: " 2211 "forcing the issue\n"); 2212 /* 2213 * I guess this should try to kick off some daemon to sync and 2214 * poweroff asap. Or not even bother syncing if we're doing an 2215 * emergency shutdown? 2216 */ 2217 emergency_sync(); 2218 kernel_power_off(); 2219 } 2220 2221 return ret; 2222 } 2223 2224 static bool poweroff_force; 2225 2226 static void poweroff_work_func(struct work_struct *work) 2227 { 2228 __orderly_poweroff(poweroff_force); 2229 } 2230 2231 static DECLARE_WORK(poweroff_work, poweroff_work_func); 2232 2233 /** 2234 * orderly_poweroff - Trigger an orderly system poweroff 2235 * @force: force poweroff if command execution fails 2236 * 2237 * This may be called from any context to trigger a system shutdown. 2238 * If the orderly shutdown fails, it will force an immediate shutdown. 2239 */ 2240 int orderly_poweroff(bool force) 2241 { 2242 if (force) /* do not override the pending "true" */ 2243 poweroff_force = true; 2244 schedule_work(&poweroff_work); 2245 return 0; 2246 } 2247 EXPORT_SYMBOL_GPL(orderly_poweroff); 2248