1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/module.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/smp_lock.h> 12 #include <linux/notifier.h> 13 #include <linux/reboot.h> 14 #include <linux/prctl.h> 15 #include <linux/highuid.h> 16 #include <linux/fs.h> 17 #include <linux/kernel.h> 18 #include <linux/kexec.h> 19 #include <linux/workqueue.h> 20 #include <linux/capability.h> 21 #include <linux/device.h> 22 #include <linux/key.h> 23 #include <linux/times.h> 24 #include <linux/posix-timers.h> 25 #include <linux/security.h> 26 #include <linux/dcookies.h> 27 #include <linux/suspend.h> 28 #include <linux/tty.h> 29 #include <linux/signal.h> 30 #include <linux/cn_proc.h> 31 #include <linux/getcpu.h> 32 33 #include <linux/compat.h> 34 #include <linux/syscalls.h> 35 #include <linux/kprobes.h> 36 37 #include <asm/uaccess.h> 38 #include <asm/io.h> 39 #include <asm/unistd.h> 40 41 #ifndef SET_UNALIGN_CTL 42 # define SET_UNALIGN_CTL(a,b) (-EINVAL) 43 #endif 44 #ifndef GET_UNALIGN_CTL 45 # define GET_UNALIGN_CTL(a,b) (-EINVAL) 46 #endif 47 #ifndef SET_FPEMU_CTL 48 # define SET_FPEMU_CTL(a,b) (-EINVAL) 49 #endif 50 #ifndef GET_FPEMU_CTL 51 # define GET_FPEMU_CTL(a,b) (-EINVAL) 52 #endif 53 #ifndef SET_FPEXC_CTL 54 # define SET_FPEXC_CTL(a,b) (-EINVAL) 55 #endif 56 #ifndef GET_FPEXC_CTL 57 # define GET_FPEXC_CTL(a,b) (-EINVAL) 58 #endif 59 #ifndef GET_ENDIAN 60 # define GET_ENDIAN(a,b) (-EINVAL) 61 #endif 62 #ifndef SET_ENDIAN 63 # define SET_ENDIAN(a,b) (-EINVAL) 64 #endif 65 66 /* 67 * this is where the system-wide overflow UID and GID are defined, for 68 * architectures that now have 32-bit UID/GID but didn't in the past 69 */ 70 71 int overflowuid = DEFAULT_OVERFLOWUID; 72 int overflowgid = DEFAULT_OVERFLOWGID; 73 74 #ifdef CONFIG_UID16 75 EXPORT_SYMBOL(overflowuid); 76 EXPORT_SYMBOL(overflowgid); 77 #endif 78 79 /* 80 * the same as above, but for filesystems which can only store a 16-bit 81 * UID and GID. as such, this is needed on all architectures 82 */ 83 84 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 85 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 86 87 EXPORT_SYMBOL(fs_overflowuid); 88 EXPORT_SYMBOL(fs_overflowgid); 89 90 /* 91 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 92 */ 93 94 int C_A_D = 1; 95 struct pid *cad_pid; 96 EXPORT_SYMBOL(cad_pid); 97 98 /* 99 * Notifier list for kernel code which wants to be called 100 * at shutdown. This is used to stop any idling DMA operations 101 * and the like. 102 */ 103 104 static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); 105 106 /* 107 * Notifier chain core routines. The exported routines below 108 * are layered on top of these, with appropriate locking added. 109 */ 110 111 static int notifier_chain_register(struct notifier_block **nl, 112 struct notifier_block *n) 113 { 114 while ((*nl) != NULL) { 115 if (n->priority > (*nl)->priority) 116 break; 117 nl = &((*nl)->next); 118 } 119 n->next = *nl; 120 rcu_assign_pointer(*nl, n); 121 return 0; 122 } 123 124 static int notifier_chain_unregister(struct notifier_block **nl, 125 struct notifier_block *n) 126 { 127 while ((*nl) != NULL) { 128 if ((*nl) == n) { 129 rcu_assign_pointer(*nl, n->next); 130 return 0; 131 } 132 nl = &((*nl)->next); 133 } 134 return -ENOENT; 135 } 136 137 static int __kprobes notifier_call_chain(struct notifier_block **nl, 138 unsigned long val, void *v) 139 { 140 int ret = NOTIFY_DONE; 141 struct notifier_block *nb, *next_nb; 142 143 nb = rcu_dereference(*nl); 144 while (nb) { 145 next_nb = rcu_dereference(nb->next); 146 ret = nb->notifier_call(nb, val, v); 147 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) 148 break; 149 nb = next_nb; 150 } 151 return ret; 152 } 153 154 /* 155 * Atomic notifier chain routines. Registration and unregistration 156 * use a spinlock, and call_chain is synchronized by RCU (no locks). 157 */ 158 159 /** 160 * atomic_notifier_chain_register - Add notifier to an atomic notifier chain 161 * @nh: Pointer to head of the atomic notifier chain 162 * @n: New entry in notifier chain 163 * 164 * Adds a notifier to an atomic notifier chain. 165 * 166 * Currently always returns zero. 167 */ 168 169 int atomic_notifier_chain_register(struct atomic_notifier_head *nh, 170 struct notifier_block *n) 171 { 172 unsigned long flags; 173 int ret; 174 175 spin_lock_irqsave(&nh->lock, flags); 176 ret = notifier_chain_register(&nh->head, n); 177 spin_unlock_irqrestore(&nh->lock, flags); 178 return ret; 179 } 180 181 EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); 182 183 /** 184 * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain 185 * @nh: Pointer to head of the atomic notifier chain 186 * @n: Entry to remove from notifier chain 187 * 188 * Removes a notifier from an atomic notifier chain. 189 * 190 * Returns zero on success or %-ENOENT on failure. 191 */ 192 int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, 193 struct notifier_block *n) 194 { 195 unsigned long flags; 196 int ret; 197 198 spin_lock_irqsave(&nh->lock, flags); 199 ret = notifier_chain_unregister(&nh->head, n); 200 spin_unlock_irqrestore(&nh->lock, flags); 201 synchronize_rcu(); 202 return ret; 203 } 204 205 EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); 206 207 /** 208 * atomic_notifier_call_chain - Call functions in an atomic notifier chain 209 * @nh: Pointer to head of the atomic notifier chain 210 * @val: Value passed unmodified to notifier function 211 * @v: Pointer passed unmodified to notifier function 212 * 213 * Calls each function in a notifier chain in turn. The functions 214 * run in an atomic context, so they must not block. 215 * This routine uses RCU to synchronize with changes to the chain. 216 * 217 * If the return value of the notifier can be and'ed 218 * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain() 219 * will return immediately, with the return value of 220 * the notifier function which halted execution. 221 * Otherwise the return value is the return value 222 * of the last notifier function called. 223 */ 224 225 int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh, 226 unsigned long val, void *v) 227 { 228 int ret; 229 230 rcu_read_lock(); 231 ret = notifier_call_chain(&nh->head, val, v); 232 rcu_read_unlock(); 233 return ret; 234 } 235 236 EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); 237 238 /* 239 * Blocking notifier chain routines. All access to the chain is 240 * synchronized by an rwsem. 241 */ 242 243 /** 244 * blocking_notifier_chain_register - Add notifier to a blocking notifier chain 245 * @nh: Pointer to head of the blocking notifier chain 246 * @n: New entry in notifier chain 247 * 248 * Adds a notifier to a blocking notifier chain. 249 * Must be called in process context. 250 * 251 * Currently always returns zero. 252 */ 253 254 int blocking_notifier_chain_register(struct blocking_notifier_head *nh, 255 struct notifier_block *n) 256 { 257 int ret; 258 259 /* 260 * This code gets used during boot-up, when task switching is 261 * not yet working and interrupts must remain disabled. At 262 * such times we must not call down_write(). 263 */ 264 if (unlikely(system_state == SYSTEM_BOOTING)) 265 return notifier_chain_register(&nh->head, n); 266 267 down_write(&nh->rwsem); 268 ret = notifier_chain_register(&nh->head, n); 269 up_write(&nh->rwsem); 270 return ret; 271 } 272 273 EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); 274 275 /** 276 * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain 277 * @nh: Pointer to head of the blocking notifier chain 278 * @n: Entry to remove from notifier chain 279 * 280 * Removes a notifier from a blocking notifier chain. 281 * Must be called from process context. 282 * 283 * Returns zero on success or %-ENOENT on failure. 284 */ 285 int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, 286 struct notifier_block *n) 287 { 288 int ret; 289 290 /* 291 * This code gets used during boot-up, when task switching is 292 * not yet working and interrupts must remain disabled. At 293 * such times we must not call down_write(). 294 */ 295 if (unlikely(system_state == SYSTEM_BOOTING)) 296 return notifier_chain_unregister(&nh->head, n); 297 298 down_write(&nh->rwsem); 299 ret = notifier_chain_unregister(&nh->head, n); 300 up_write(&nh->rwsem); 301 return ret; 302 } 303 304 EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); 305 306 /** 307 * blocking_notifier_call_chain - Call functions in a blocking notifier chain 308 * @nh: Pointer to head of the blocking notifier chain 309 * @val: Value passed unmodified to notifier function 310 * @v: Pointer passed unmodified to notifier function 311 * 312 * Calls each function in a notifier chain in turn. The functions 313 * run in a process context, so they are allowed to block. 314 * 315 * If the return value of the notifier can be and'ed 316 * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain() 317 * will return immediately, with the return value of 318 * the notifier function which halted execution. 319 * Otherwise the return value is the return value 320 * of the last notifier function called. 321 */ 322 323 int blocking_notifier_call_chain(struct blocking_notifier_head *nh, 324 unsigned long val, void *v) 325 { 326 int ret = NOTIFY_DONE; 327 328 /* 329 * We check the head outside the lock, but if this access is 330 * racy then it does not matter what the result of the test 331 * is, we re-check the list after having taken the lock anyway: 332 */ 333 if (rcu_dereference(nh->head)) { 334 down_read(&nh->rwsem); 335 ret = notifier_call_chain(&nh->head, val, v); 336 up_read(&nh->rwsem); 337 } 338 return ret; 339 } 340 341 EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); 342 343 /* 344 * Raw notifier chain routines. There is no protection; 345 * the caller must provide it. Use at your own risk! 346 */ 347 348 /** 349 * raw_notifier_chain_register - Add notifier to a raw notifier chain 350 * @nh: Pointer to head of the raw notifier chain 351 * @n: New entry in notifier chain 352 * 353 * Adds a notifier to a raw notifier chain. 354 * All locking must be provided by the caller. 355 * 356 * Currently always returns zero. 357 */ 358 359 int raw_notifier_chain_register(struct raw_notifier_head *nh, 360 struct notifier_block *n) 361 { 362 return notifier_chain_register(&nh->head, n); 363 } 364 365 EXPORT_SYMBOL_GPL(raw_notifier_chain_register); 366 367 /** 368 * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain 369 * @nh: Pointer to head of the raw notifier chain 370 * @n: Entry to remove from notifier chain 371 * 372 * Removes a notifier from a raw notifier chain. 373 * All locking must be provided by the caller. 374 * 375 * Returns zero on success or %-ENOENT on failure. 376 */ 377 int raw_notifier_chain_unregister(struct raw_notifier_head *nh, 378 struct notifier_block *n) 379 { 380 return notifier_chain_unregister(&nh->head, n); 381 } 382 383 EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); 384 385 /** 386 * raw_notifier_call_chain - Call functions in a raw notifier chain 387 * @nh: Pointer to head of the raw notifier chain 388 * @val: Value passed unmodified to notifier function 389 * @v: Pointer passed unmodified to notifier function 390 * 391 * Calls each function in a notifier chain in turn. The functions 392 * run in an undefined context. 393 * All locking must be provided by the caller. 394 * 395 * If the return value of the notifier can be and'ed 396 * with %NOTIFY_STOP_MASK then raw_notifier_call_chain() 397 * will return immediately, with the return value of 398 * the notifier function which halted execution. 399 * Otherwise the return value is the return value 400 * of the last notifier function called. 401 */ 402 403 int raw_notifier_call_chain(struct raw_notifier_head *nh, 404 unsigned long val, void *v) 405 { 406 return notifier_call_chain(&nh->head, val, v); 407 } 408 409 EXPORT_SYMBOL_GPL(raw_notifier_call_chain); 410 411 /* 412 * SRCU notifier chain routines. Registration and unregistration 413 * use a mutex, and call_chain is synchronized by SRCU (no locks). 414 */ 415 416 /** 417 * srcu_notifier_chain_register - Add notifier to an SRCU notifier chain 418 * @nh: Pointer to head of the SRCU notifier chain 419 * @n: New entry in notifier chain 420 * 421 * Adds a notifier to an SRCU notifier chain. 422 * Must be called in process context. 423 * 424 * Currently always returns zero. 425 */ 426 427 int srcu_notifier_chain_register(struct srcu_notifier_head *nh, 428 struct notifier_block *n) 429 { 430 int ret; 431 432 /* 433 * This code gets used during boot-up, when task switching is 434 * not yet working and interrupts must remain disabled. At 435 * such times we must not call mutex_lock(). 436 */ 437 if (unlikely(system_state == SYSTEM_BOOTING)) 438 return notifier_chain_register(&nh->head, n); 439 440 mutex_lock(&nh->mutex); 441 ret = notifier_chain_register(&nh->head, n); 442 mutex_unlock(&nh->mutex); 443 return ret; 444 } 445 446 EXPORT_SYMBOL_GPL(srcu_notifier_chain_register); 447 448 /** 449 * srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain 450 * @nh: Pointer to head of the SRCU notifier chain 451 * @n: Entry to remove from notifier chain 452 * 453 * Removes a notifier from an SRCU notifier chain. 454 * Must be called from process context. 455 * 456 * Returns zero on success or %-ENOENT on failure. 457 */ 458 int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, 459 struct notifier_block *n) 460 { 461 int ret; 462 463 /* 464 * This code gets used during boot-up, when task switching is 465 * not yet working and interrupts must remain disabled. At 466 * such times we must not call mutex_lock(). 467 */ 468 if (unlikely(system_state == SYSTEM_BOOTING)) 469 return notifier_chain_unregister(&nh->head, n); 470 471 mutex_lock(&nh->mutex); 472 ret = notifier_chain_unregister(&nh->head, n); 473 mutex_unlock(&nh->mutex); 474 synchronize_srcu(&nh->srcu); 475 return ret; 476 } 477 478 EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); 479 480 /** 481 * srcu_notifier_call_chain - Call functions in an SRCU notifier chain 482 * @nh: Pointer to head of the SRCU notifier chain 483 * @val: Value passed unmodified to notifier function 484 * @v: Pointer passed unmodified to notifier function 485 * 486 * Calls each function in a notifier chain in turn. The functions 487 * run in a process context, so they are allowed to block. 488 * 489 * If the return value of the notifier can be and'ed 490 * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain() 491 * will return immediately, with the return value of 492 * the notifier function which halted execution. 493 * Otherwise the return value is the return value 494 * of the last notifier function called. 495 */ 496 497 int srcu_notifier_call_chain(struct srcu_notifier_head *nh, 498 unsigned long val, void *v) 499 { 500 int ret; 501 int idx; 502 503 idx = srcu_read_lock(&nh->srcu); 504 ret = notifier_call_chain(&nh->head, val, v); 505 srcu_read_unlock(&nh->srcu, idx); 506 return ret; 507 } 508 509 EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); 510 511 /** 512 * srcu_init_notifier_head - Initialize an SRCU notifier head 513 * @nh: Pointer to head of the srcu notifier chain 514 * 515 * Unlike other sorts of notifier heads, SRCU notifier heads require 516 * dynamic initialization. Be sure to call this routine before 517 * calling any of the other SRCU notifier routines for this head. 518 * 519 * If an SRCU notifier head is deallocated, it must first be cleaned 520 * up by calling srcu_cleanup_notifier_head(). Otherwise the head's 521 * per-cpu data (used by the SRCU mechanism) will leak. 522 */ 523 524 void srcu_init_notifier_head(struct srcu_notifier_head *nh) 525 { 526 mutex_init(&nh->mutex); 527 if (init_srcu_struct(&nh->srcu) < 0) 528 BUG(); 529 nh->head = NULL; 530 } 531 532 EXPORT_SYMBOL_GPL(srcu_init_notifier_head); 533 534 /** 535 * register_reboot_notifier - Register function to be called at reboot time 536 * @nb: Info about notifier function to be called 537 * 538 * Registers a function with the list of functions 539 * to be called at reboot time. 540 * 541 * Currently always returns zero, as blocking_notifier_chain_register() 542 * always returns zero. 543 */ 544 545 int register_reboot_notifier(struct notifier_block * nb) 546 { 547 return blocking_notifier_chain_register(&reboot_notifier_list, nb); 548 } 549 550 EXPORT_SYMBOL(register_reboot_notifier); 551 552 /** 553 * unregister_reboot_notifier - Unregister previously registered reboot notifier 554 * @nb: Hook to be unregistered 555 * 556 * Unregisters a previously registered reboot 557 * notifier function. 558 * 559 * Returns zero on success, or %-ENOENT on failure. 560 */ 561 562 int unregister_reboot_notifier(struct notifier_block * nb) 563 { 564 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); 565 } 566 567 EXPORT_SYMBOL(unregister_reboot_notifier); 568 569 static int set_one_prio(struct task_struct *p, int niceval, int error) 570 { 571 int no_nice; 572 573 if (p->uid != current->euid && 574 p->euid != current->euid && !capable(CAP_SYS_NICE)) { 575 error = -EPERM; 576 goto out; 577 } 578 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 579 error = -EACCES; 580 goto out; 581 } 582 no_nice = security_task_setnice(p, niceval); 583 if (no_nice) { 584 error = no_nice; 585 goto out; 586 } 587 if (error == -ESRCH) 588 error = 0; 589 set_user_nice(p, niceval); 590 out: 591 return error; 592 } 593 594 asmlinkage long sys_setpriority(int which, int who, int niceval) 595 { 596 struct task_struct *g, *p; 597 struct user_struct *user; 598 int error = -EINVAL; 599 struct pid *pgrp; 600 601 if (which > 2 || which < 0) 602 goto out; 603 604 /* normalize: avoid signed division (rounding problems) */ 605 error = -ESRCH; 606 if (niceval < -20) 607 niceval = -20; 608 if (niceval > 19) 609 niceval = 19; 610 611 read_lock(&tasklist_lock); 612 switch (which) { 613 case PRIO_PROCESS: 614 if (who) 615 p = find_task_by_pid(who); 616 else 617 p = current; 618 if (p) 619 error = set_one_prio(p, niceval, error); 620 break; 621 case PRIO_PGRP: 622 if (who) 623 pgrp = find_pid(who); 624 else 625 pgrp = task_pgrp(current); 626 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 627 error = set_one_prio(p, niceval, error); 628 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 629 break; 630 case PRIO_USER: 631 user = current->user; 632 if (!who) 633 who = current->uid; 634 else 635 if ((who != current->uid) && !(user = find_user(who))) 636 goto out_unlock; /* No processes for this user */ 637 638 do_each_thread(g, p) 639 if (p->uid == who) 640 error = set_one_prio(p, niceval, error); 641 while_each_thread(g, p); 642 if (who != current->uid) 643 free_uid(user); /* For find_user() */ 644 break; 645 } 646 out_unlock: 647 read_unlock(&tasklist_lock); 648 out: 649 return error; 650 } 651 652 /* 653 * Ugh. To avoid negative return values, "getpriority()" will 654 * not return the normal nice-value, but a negated value that 655 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 656 * to stay compatible. 657 */ 658 asmlinkage long sys_getpriority(int which, int who) 659 { 660 struct task_struct *g, *p; 661 struct user_struct *user; 662 long niceval, retval = -ESRCH; 663 struct pid *pgrp; 664 665 if (which > 2 || which < 0) 666 return -EINVAL; 667 668 read_lock(&tasklist_lock); 669 switch (which) { 670 case PRIO_PROCESS: 671 if (who) 672 p = find_task_by_pid(who); 673 else 674 p = current; 675 if (p) { 676 niceval = 20 - task_nice(p); 677 if (niceval > retval) 678 retval = niceval; 679 } 680 break; 681 case PRIO_PGRP: 682 if (who) 683 pgrp = find_pid(who); 684 else 685 pgrp = task_pgrp(current); 686 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 687 niceval = 20 - task_nice(p); 688 if (niceval > retval) 689 retval = niceval; 690 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 691 break; 692 case PRIO_USER: 693 user = current->user; 694 if (!who) 695 who = current->uid; 696 else 697 if ((who != current->uid) && !(user = find_user(who))) 698 goto out_unlock; /* No processes for this user */ 699 700 do_each_thread(g, p) 701 if (p->uid == who) { 702 niceval = 20 - task_nice(p); 703 if (niceval > retval) 704 retval = niceval; 705 } 706 while_each_thread(g, p); 707 if (who != current->uid) 708 free_uid(user); /* for find_user() */ 709 break; 710 } 711 out_unlock: 712 read_unlock(&tasklist_lock); 713 714 return retval; 715 } 716 717 /** 718 * emergency_restart - reboot the system 719 * 720 * Without shutting down any hardware or taking any locks 721 * reboot the system. This is called when we know we are in 722 * trouble so this is our best effort to reboot. This is 723 * safe to call in interrupt context. 724 */ 725 void emergency_restart(void) 726 { 727 machine_emergency_restart(); 728 } 729 EXPORT_SYMBOL_GPL(emergency_restart); 730 731 static void kernel_restart_prepare(char *cmd) 732 { 733 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 734 system_state = SYSTEM_RESTART; 735 device_shutdown(); 736 } 737 738 /** 739 * kernel_restart - reboot the system 740 * @cmd: pointer to buffer containing command to execute for restart 741 * or %NULL 742 * 743 * Shutdown everything and perform a clean reboot. 744 * This is not safe to call in interrupt context. 745 */ 746 void kernel_restart(char *cmd) 747 { 748 kernel_restart_prepare(cmd); 749 if (!cmd) 750 printk(KERN_EMERG "Restarting system.\n"); 751 else 752 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 753 machine_restart(cmd); 754 } 755 EXPORT_SYMBOL_GPL(kernel_restart); 756 757 /** 758 * kernel_kexec - reboot the system 759 * 760 * Move into place and start executing a preloaded standalone 761 * executable. If nothing was preloaded return an error. 762 */ 763 static void kernel_kexec(void) 764 { 765 #ifdef CONFIG_KEXEC 766 struct kimage *image; 767 image = xchg(&kexec_image, NULL); 768 if (!image) 769 return; 770 kernel_restart_prepare(NULL); 771 printk(KERN_EMERG "Starting new kernel\n"); 772 machine_shutdown(); 773 machine_kexec(image); 774 #endif 775 } 776 777 void kernel_shutdown_prepare(enum system_states state) 778 { 779 blocking_notifier_call_chain(&reboot_notifier_list, 780 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 781 system_state = state; 782 device_shutdown(); 783 } 784 /** 785 * kernel_halt - halt the system 786 * 787 * Shutdown everything and perform a clean system halt. 788 */ 789 void kernel_halt(void) 790 { 791 kernel_shutdown_prepare(SYSTEM_HALT); 792 printk(KERN_EMERG "System halted.\n"); 793 machine_halt(); 794 } 795 796 EXPORT_SYMBOL_GPL(kernel_halt); 797 798 /** 799 * kernel_power_off - power_off the system 800 * 801 * Shutdown everything and perform a clean system power_off. 802 */ 803 void kernel_power_off(void) 804 { 805 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 806 printk(KERN_EMERG "Power down.\n"); 807 machine_power_off(); 808 } 809 EXPORT_SYMBOL_GPL(kernel_power_off); 810 /* 811 * Reboot system call: for obvious reasons only root may call it, 812 * and even root needs to set up some magic numbers in the registers 813 * so that some mistake won't make this reboot the whole machine. 814 * You can also set the meaning of the ctrl-alt-del-key here. 815 * 816 * reboot doesn't sync: do that yourself before calling this. 817 */ 818 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg) 819 { 820 char buffer[256]; 821 822 /* We only trust the superuser with rebooting the system. */ 823 if (!capable(CAP_SYS_BOOT)) 824 return -EPERM; 825 826 /* For safety, we require "magic" arguments. */ 827 if (magic1 != LINUX_REBOOT_MAGIC1 || 828 (magic2 != LINUX_REBOOT_MAGIC2 && 829 magic2 != LINUX_REBOOT_MAGIC2A && 830 magic2 != LINUX_REBOOT_MAGIC2B && 831 magic2 != LINUX_REBOOT_MAGIC2C)) 832 return -EINVAL; 833 834 /* Instead of trying to make the power_off code look like 835 * halt when pm_power_off is not set do it the easy way. 836 */ 837 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 838 cmd = LINUX_REBOOT_CMD_HALT; 839 840 lock_kernel(); 841 switch (cmd) { 842 case LINUX_REBOOT_CMD_RESTART: 843 kernel_restart(NULL); 844 break; 845 846 case LINUX_REBOOT_CMD_CAD_ON: 847 C_A_D = 1; 848 break; 849 850 case LINUX_REBOOT_CMD_CAD_OFF: 851 C_A_D = 0; 852 break; 853 854 case LINUX_REBOOT_CMD_HALT: 855 kernel_halt(); 856 unlock_kernel(); 857 do_exit(0); 858 break; 859 860 case LINUX_REBOOT_CMD_POWER_OFF: 861 kernel_power_off(); 862 unlock_kernel(); 863 do_exit(0); 864 break; 865 866 case LINUX_REBOOT_CMD_RESTART2: 867 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 868 unlock_kernel(); 869 return -EFAULT; 870 } 871 buffer[sizeof(buffer) - 1] = '\0'; 872 873 kernel_restart(buffer); 874 break; 875 876 case LINUX_REBOOT_CMD_KEXEC: 877 kernel_kexec(); 878 unlock_kernel(); 879 return -EINVAL; 880 881 #ifdef CONFIG_SOFTWARE_SUSPEND 882 case LINUX_REBOOT_CMD_SW_SUSPEND: 883 { 884 int ret = software_suspend(); 885 unlock_kernel(); 886 return ret; 887 } 888 #endif 889 890 default: 891 unlock_kernel(); 892 return -EINVAL; 893 } 894 unlock_kernel(); 895 return 0; 896 } 897 898 static void deferred_cad(struct work_struct *dummy) 899 { 900 kernel_restart(NULL); 901 } 902 903 /* 904 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 905 * As it's called within an interrupt, it may NOT sync: the only choice 906 * is whether to reboot at once, or just ignore the ctrl-alt-del. 907 */ 908 void ctrl_alt_del(void) 909 { 910 static DECLARE_WORK(cad_work, deferred_cad); 911 912 if (C_A_D) 913 schedule_work(&cad_work); 914 else 915 kill_cad_pid(SIGINT, 1); 916 } 917 918 /* 919 * Unprivileged users may change the real gid to the effective gid 920 * or vice versa. (BSD-style) 921 * 922 * If you set the real gid at all, or set the effective gid to a value not 923 * equal to the real gid, then the saved gid is set to the new effective gid. 924 * 925 * This makes it possible for a setgid program to completely drop its 926 * privileges, which is often a useful assertion to make when you are doing 927 * a security audit over a program. 928 * 929 * The general idea is that a program which uses just setregid() will be 930 * 100% compatible with BSD. A program which uses just setgid() will be 931 * 100% compatible with POSIX with saved IDs. 932 * 933 * SMP: There are not races, the GIDs are checked only by filesystem 934 * operations (as far as semantic preservation is concerned). 935 */ 936 asmlinkage long sys_setregid(gid_t rgid, gid_t egid) 937 { 938 int old_rgid = current->gid; 939 int old_egid = current->egid; 940 int new_rgid = old_rgid; 941 int new_egid = old_egid; 942 int retval; 943 944 retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE); 945 if (retval) 946 return retval; 947 948 if (rgid != (gid_t) -1) { 949 if ((old_rgid == rgid) || 950 (current->egid==rgid) || 951 capable(CAP_SETGID)) 952 new_rgid = rgid; 953 else 954 return -EPERM; 955 } 956 if (egid != (gid_t) -1) { 957 if ((old_rgid == egid) || 958 (current->egid == egid) || 959 (current->sgid == egid) || 960 capable(CAP_SETGID)) 961 new_egid = egid; 962 else 963 return -EPERM; 964 } 965 if (new_egid != old_egid) { 966 current->mm->dumpable = suid_dumpable; 967 smp_wmb(); 968 } 969 if (rgid != (gid_t) -1 || 970 (egid != (gid_t) -1 && egid != old_rgid)) 971 current->sgid = new_egid; 972 current->fsgid = new_egid; 973 current->egid = new_egid; 974 current->gid = new_rgid; 975 key_fsgid_changed(current); 976 proc_id_connector(current, PROC_EVENT_GID); 977 return 0; 978 } 979 980 /* 981 * setgid() is implemented like SysV w/ SAVED_IDS 982 * 983 * SMP: Same implicit races as above. 984 */ 985 asmlinkage long sys_setgid(gid_t gid) 986 { 987 int old_egid = current->egid; 988 int retval; 989 990 retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); 991 if (retval) 992 return retval; 993 994 if (capable(CAP_SETGID)) { 995 if (old_egid != gid) { 996 current->mm->dumpable = suid_dumpable; 997 smp_wmb(); 998 } 999 current->gid = current->egid = current->sgid = current->fsgid = gid; 1000 } else if ((gid == current->gid) || (gid == current->sgid)) { 1001 if (old_egid != gid) { 1002 current->mm->dumpable = suid_dumpable; 1003 smp_wmb(); 1004 } 1005 current->egid = current->fsgid = gid; 1006 } 1007 else 1008 return -EPERM; 1009 1010 key_fsgid_changed(current); 1011 proc_id_connector(current, PROC_EVENT_GID); 1012 return 0; 1013 } 1014 1015 static int set_user(uid_t new_ruid, int dumpclear) 1016 { 1017 struct user_struct *new_user; 1018 1019 new_user = alloc_uid(new_ruid); 1020 if (!new_user) 1021 return -EAGAIN; 1022 1023 if (atomic_read(&new_user->processes) >= 1024 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 1025 new_user != &root_user) { 1026 free_uid(new_user); 1027 return -EAGAIN; 1028 } 1029 1030 switch_uid(new_user); 1031 1032 if (dumpclear) { 1033 current->mm->dumpable = suid_dumpable; 1034 smp_wmb(); 1035 } 1036 current->uid = new_ruid; 1037 return 0; 1038 } 1039 1040 /* 1041 * Unprivileged users may change the real uid to the effective uid 1042 * or vice versa. (BSD-style) 1043 * 1044 * If you set the real uid at all, or set the effective uid to a value not 1045 * equal to the real uid, then the saved uid is set to the new effective uid. 1046 * 1047 * This makes it possible for a setuid program to completely drop its 1048 * privileges, which is often a useful assertion to make when you are doing 1049 * a security audit over a program. 1050 * 1051 * The general idea is that a program which uses just setreuid() will be 1052 * 100% compatible with BSD. A program which uses just setuid() will be 1053 * 100% compatible with POSIX with saved IDs. 1054 */ 1055 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) 1056 { 1057 int old_ruid, old_euid, old_suid, new_ruid, new_euid; 1058 int retval; 1059 1060 retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE); 1061 if (retval) 1062 return retval; 1063 1064 new_ruid = old_ruid = current->uid; 1065 new_euid = old_euid = current->euid; 1066 old_suid = current->suid; 1067 1068 if (ruid != (uid_t) -1) { 1069 new_ruid = ruid; 1070 if ((old_ruid != ruid) && 1071 (current->euid != ruid) && 1072 !capable(CAP_SETUID)) 1073 return -EPERM; 1074 } 1075 1076 if (euid != (uid_t) -1) { 1077 new_euid = euid; 1078 if ((old_ruid != euid) && 1079 (current->euid != euid) && 1080 (current->suid != euid) && 1081 !capable(CAP_SETUID)) 1082 return -EPERM; 1083 } 1084 1085 if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) 1086 return -EAGAIN; 1087 1088 if (new_euid != old_euid) { 1089 current->mm->dumpable = suid_dumpable; 1090 smp_wmb(); 1091 } 1092 current->fsuid = current->euid = new_euid; 1093 if (ruid != (uid_t) -1 || 1094 (euid != (uid_t) -1 && euid != old_ruid)) 1095 current->suid = current->euid; 1096 current->fsuid = current->euid; 1097 1098 key_fsuid_changed(current); 1099 proc_id_connector(current, PROC_EVENT_UID); 1100 1101 return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); 1102 } 1103 1104 1105 1106 /* 1107 * setuid() is implemented like SysV with SAVED_IDS 1108 * 1109 * Note that SAVED_ID's is deficient in that a setuid root program 1110 * like sendmail, for example, cannot set its uid to be a normal 1111 * user and then switch back, because if you're root, setuid() sets 1112 * the saved uid too. If you don't like this, blame the bright people 1113 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 1114 * will allow a root program to temporarily drop privileges and be able to 1115 * regain them by swapping the real and effective uid. 1116 */ 1117 asmlinkage long sys_setuid(uid_t uid) 1118 { 1119 int old_euid = current->euid; 1120 int old_ruid, old_suid, new_suid; 1121 int retval; 1122 1123 retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); 1124 if (retval) 1125 return retval; 1126 1127 old_ruid = current->uid; 1128 old_suid = current->suid; 1129 new_suid = old_suid; 1130 1131 if (capable(CAP_SETUID)) { 1132 if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) 1133 return -EAGAIN; 1134 new_suid = uid; 1135 } else if ((uid != current->uid) && (uid != new_suid)) 1136 return -EPERM; 1137 1138 if (old_euid != uid) { 1139 current->mm->dumpable = suid_dumpable; 1140 smp_wmb(); 1141 } 1142 current->fsuid = current->euid = uid; 1143 current->suid = new_suid; 1144 1145 key_fsuid_changed(current); 1146 proc_id_connector(current, PROC_EVENT_UID); 1147 1148 return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); 1149 } 1150 1151 1152 /* 1153 * This function implements a generic ability to update ruid, euid, 1154 * and suid. This allows you to implement the 4.4 compatible seteuid(). 1155 */ 1156 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) 1157 { 1158 int old_ruid = current->uid; 1159 int old_euid = current->euid; 1160 int old_suid = current->suid; 1161 int retval; 1162 1163 retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); 1164 if (retval) 1165 return retval; 1166 1167 if (!capable(CAP_SETUID)) { 1168 if ((ruid != (uid_t) -1) && (ruid != current->uid) && 1169 (ruid != current->euid) && (ruid != current->suid)) 1170 return -EPERM; 1171 if ((euid != (uid_t) -1) && (euid != current->uid) && 1172 (euid != current->euid) && (euid != current->suid)) 1173 return -EPERM; 1174 if ((suid != (uid_t) -1) && (suid != current->uid) && 1175 (suid != current->euid) && (suid != current->suid)) 1176 return -EPERM; 1177 } 1178 if (ruid != (uid_t) -1) { 1179 if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) 1180 return -EAGAIN; 1181 } 1182 if (euid != (uid_t) -1) { 1183 if (euid != current->euid) { 1184 current->mm->dumpable = suid_dumpable; 1185 smp_wmb(); 1186 } 1187 current->euid = euid; 1188 } 1189 current->fsuid = current->euid; 1190 if (suid != (uid_t) -1) 1191 current->suid = suid; 1192 1193 key_fsuid_changed(current); 1194 proc_id_connector(current, PROC_EVENT_UID); 1195 1196 return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); 1197 } 1198 1199 asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) 1200 { 1201 int retval; 1202 1203 if (!(retval = put_user(current->uid, ruid)) && 1204 !(retval = put_user(current->euid, euid))) 1205 retval = put_user(current->suid, suid); 1206 1207 return retval; 1208 } 1209 1210 /* 1211 * Same as above, but for rgid, egid, sgid. 1212 */ 1213 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) 1214 { 1215 int retval; 1216 1217 retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); 1218 if (retval) 1219 return retval; 1220 1221 if (!capable(CAP_SETGID)) { 1222 if ((rgid != (gid_t) -1) && (rgid != current->gid) && 1223 (rgid != current->egid) && (rgid != current->sgid)) 1224 return -EPERM; 1225 if ((egid != (gid_t) -1) && (egid != current->gid) && 1226 (egid != current->egid) && (egid != current->sgid)) 1227 return -EPERM; 1228 if ((sgid != (gid_t) -1) && (sgid != current->gid) && 1229 (sgid != current->egid) && (sgid != current->sgid)) 1230 return -EPERM; 1231 } 1232 if (egid != (gid_t) -1) { 1233 if (egid != current->egid) { 1234 current->mm->dumpable = suid_dumpable; 1235 smp_wmb(); 1236 } 1237 current->egid = egid; 1238 } 1239 current->fsgid = current->egid; 1240 if (rgid != (gid_t) -1) 1241 current->gid = rgid; 1242 if (sgid != (gid_t) -1) 1243 current->sgid = sgid; 1244 1245 key_fsgid_changed(current); 1246 proc_id_connector(current, PROC_EVENT_GID); 1247 return 0; 1248 } 1249 1250 asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) 1251 { 1252 int retval; 1253 1254 if (!(retval = put_user(current->gid, rgid)) && 1255 !(retval = put_user(current->egid, egid))) 1256 retval = put_user(current->sgid, sgid); 1257 1258 return retval; 1259 } 1260 1261 1262 /* 1263 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 1264 * is used for "access()" and for the NFS daemon (letting nfsd stay at 1265 * whatever uid it wants to). It normally shadows "euid", except when 1266 * explicitly set by setfsuid() or for access.. 1267 */ 1268 asmlinkage long sys_setfsuid(uid_t uid) 1269 { 1270 int old_fsuid; 1271 1272 old_fsuid = current->fsuid; 1273 if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) 1274 return old_fsuid; 1275 1276 if (uid == current->uid || uid == current->euid || 1277 uid == current->suid || uid == current->fsuid || 1278 capable(CAP_SETUID)) { 1279 if (uid != old_fsuid) { 1280 current->mm->dumpable = suid_dumpable; 1281 smp_wmb(); 1282 } 1283 current->fsuid = uid; 1284 } 1285 1286 key_fsuid_changed(current); 1287 proc_id_connector(current, PROC_EVENT_UID); 1288 1289 security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); 1290 1291 return old_fsuid; 1292 } 1293 1294 /* 1295 * Samma p� svenska.. 1296 */ 1297 asmlinkage long sys_setfsgid(gid_t gid) 1298 { 1299 int old_fsgid; 1300 1301 old_fsgid = current->fsgid; 1302 if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) 1303 return old_fsgid; 1304 1305 if (gid == current->gid || gid == current->egid || 1306 gid == current->sgid || gid == current->fsgid || 1307 capable(CAP_SETGID)) { 1308 if (gid != old_fsgid) { 1309 current->mm->dumpable = suid_dumpable; 1310 smp_wmb(); 1311 } 1312 current->fsgid = gid; 1313 key_fsgid_changed(current); 1314 proc_id_connector(current, PROC_EVENT_GID); 1315 } 1316 return old_fsgid; 1317 } 1318 1319 asmlinkage long sys_times(struct tms __user * tbuf) 1320 { 1321 /* 1322 * In the SMP world we might just be unlucky and have one of 1323 * the times increment as we use it. Since the value is an 1324 * atomically safe type this is just fine. Conceptually its 1325 * as if the syscall took an instant longer to occur. 1326 */ 1327 if (tbuf) { 1328 struct tms tmp; 1329 struct task_struct *tsk = current; 1330 struct task_struct *t; 1331 cputime_t utime, stime, cutime, cstime; 1332 1333 spin_lock_irq(&tsk->sighand->siglock); 1334 utime = tsk->signal->utime; 1335 stime = tsk->signal->stime; 1336 t = tsk; 1337 do { 1338 utime = cputime_add(utime, t->utime); 1339 stime = cputime_add(stime, t->stime); 1340 t = next_thread(t); 1341 } while (t != tsk); 1342 1343 cutime = tsk->signal->cutime; 1344 cstime = tsk->signal->cstime; 1345 spin_unlock_irq(&tsk->sighand->siglock); 1346 1347 tmp.tms_utime = cputime_to_clock_t(utime); 1348 tmp.tms_stime = cputime_to_clock_t(stime); 1349 tmp.tms_cutime = cputime_to_clock_t(cutime); 1350 tmp.tms_cstime = cputime_to_clock_t(cstime); 1351 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1352 return -EFAULT; 1353 } 1354 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1355 } 1356 1357 /* 1358 * This needs some heavy checking ... 1359 * I just haven't the stomach for it. I also don't fully 1360 * understand sessions/pgrp etc. Let somebody who does explain it. 1361 * 1362 * OK, I think I have the protection semantics right.... this is really 1363 * only important on a multi-user system anyway, to make sure one user 1364 * can't send a signal to a process owned by another. -TYT, 12/12/91 1365 * 1366 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 1367 * LBT 04.03.94 1368 */ 1369 1370 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) 1371 { 1372 struct task_struct *p; 1373 struct task_struct *group_leader = current->group_leader; 1374 int err = -EINVAL; 1375 1376 if (!pid) 1377 pid = group_leader->pid; 1378 if (!pgid) 1379 pgid = pid; 1380 if (pgid < 0) 1381 return -EINVAL; 1382 1383 /* From this point forward we keep holding onto the tasklist lock 1384 * so that our parent does not change from under us. -DaveM 1385 */ 1386 write_lock_irq(&tasklist_lock); 1387 1388 err = -ESRCH; 1389 p = find_task_by_pid(pid); 1390 if (!p) 1391 goto out; 1392 1393 err = -EINVAL; 1394 if (!thread_group_leader(p)) 1395 goto out; 1396 1397 if (p->real_parent == group_leader) { 1398 err = -EPERM; 1399 if (task_session(p) != task_session(group_leader)) 1400 goto out; 1401 err = -EACCES; 1402 if (p->did_exec) 1403 goto out; 1404 } else { 1405 err = -ESRCH; 1406 if (p != group_leader) 1407 goto out; 1408 } 1409 1410 err = -EPERM; 1411 if (p->signal->leader) 1412 goto out; 1413 1414 if (pgid != pid) { 1415 struct task_struct *g = 1416 find_task_by_pid_type(PIDTYPE_PGID, pgid); 1417 1418 if (!g || task_session(g) != task_session(group_leader)) 1419 goto out; 1420 } 1421 1422 err = security_task_setpgid(p, pgid); 1423 if (err) 1424 goto out; 1425 1426 if (process_group(p) != pgid) { 1427 detach_pid(p, PIDTYPE_PGID); 1428 p->signal->pgrp = pgid; 1429 attach_pid(p, PIDTYPE_PGID, pgid); 1430 } 1431 1432 err = 0; 1433 out: 1434 /* All paths lead to here, thus we are safe. -DaveM */ 1435 write_unlock_irq(&tasklist_lock); 1436 return err; 1437 } 1438 1439 asmlinkage long sys_getpgid(pid_t pid) 1440 { 1441 if (!pid) 1442 return process_group(current); 1443 else { 1444 int retval; 1445 struct task_struct *p; 1446 1447 read_lock(&tasklist_lock); 1448 p = find_task_by_pid(pid); 1449 1450 retval = -ESRCH; 1451 if (p) { 1452 retval = security_task_getpgid(p); 1453 if (!retval) 1454 retval = process_group(p); 1455 } 1456 read_unlock(&tasklist_lock); 1457 return retval; 1458 } 1459 } 1460 1461 #ifdef __ARCH_WANT_SYS_GETPGRP 1462 1463 asmlinkage long sys_getpgrp(void) 1464 { 1465 /* SMP - assuming writes are word atomic this is fine */ 1466 return process_group(current); 1467 } 1468 1469 #endif 1470 1471 asmlinkage long sys_getsid(pid_t pid) 1472 { 1473 if (!pid) 1474 return process_session(current); 1475 else { 1476 int retval; 1477 struct task_struct *p; 1478 1479 read_lock(&tasklist_lock); 1480 p = find_task_by_pid(pid); 1481 1482 retval = -ESRCH; 1483 if (p) { 1484 retval = security_task_getsid(p); 1485 if (!retval) 1486 retval = process_session(p); 1487 } 1488 read_unlock(&tasklist_lock); 1489 return retval; 1490 } 1491 } 1492 1493 asmlinkage long sys_setsid(void) 1494 { 1495 struct task_struct *group_leader = current->group_leader; 1496 pid_t session; 1497 int err = -EPERM; 1498 1499 write_lock_irq(&tasklist_lock); 1500 1501 /* Fail if I am already a session leader */ 1502 if (group_leader->signal->leader) 1503 goto out; 1504 1505 session = group_leader->pid; 1506 /* Fail if a process group id already exists that equals the 1507 * proposed session id. 1508 * 1509 * Don't check if session id == 1 because kernel threads use this 1510 * session id and so the check will always fail and make it so 1511 * init cannot successfully call setsid. 1512 */ 1513 if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session)) 1514 goto out; 1515 1516 group_leader->signal->leader = 1; 1517 __set_special_pids(session, session); 1518 1519 spin_lock(&group_leader->sighand->siglock); 1520 group_leader->signal->tty = NULL; 1521 spin_unlock(&group_leader->sighand->siglock); 1522 1523 err = process_group(group_leader); 1524 out: 1525 write_unlock_irq(&tasklist_lock); 1526 return err; 1527 } 1528 1529 /* 1530 * Supplementary group IDs 1531 */ 1532 1533 /* init to 2 - one for init_task, one to ensure it is never freed */ 1534 struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; 1535 1536 struct group_info *groups_alloc(int gidsetsize) 1537 { 1538 struct group_info *group_info; 1539 int nblocks; 1540 int i; 1541 1542 nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK; 1543 /* Make sure we always allocate at least one indirect block pointer */ 1544 nblocks = nblocks ? : 1; 1545 group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER); 1546 if (!group_info) 1547 return NULL; 1548 group_info->ngroups = gidsetsize; 1549 group_info->nblocks = nblocks; 1550 atomic_set(&group_info->usage, 1); 1551 1552 if (gidsetsize <= NGROUPS_SMALL) 1553 group_info->blocks[0] = group_info->small_block; 1554 else { 1555 for (i = 0; i < nblocks; i++) { 1556 gid_t *b; 1557 b = (void *)__get_free_page(GFP_USER); 1558 if (!b) 1559 goto out_undo_partial_alloc; 1560 group_info->blocks[i] = b; 1561 } 1562 } 1563 return group_info; 1564 1565 out_undo_partial_alloc: 1566 while (--i >= 0) { 1567 free_page((unsigned long)group_info->blocks[i]); 1568 } 1569 kfree(group_info); 1570 return NULL; 1571 } 1572 1573 EXPORT_SYMBOL(groups_alloc); 1574 1575 void groups_free(struct group_info *group_info) 1576 { 1577 if (group_info->blocks[0] != group_info->small_block) { 1578 int i; 1579 for (i = 0; i < group_info->nblocks; i++) 1580 free_page((unsigned long)group_info->blocks[i]); 1581 } 1582 kfree(group_info); 1583 } 1584 1585 EXPORT_SYMBOL(groups_free); 1586 1587 /* export the group_info to a user-space array */ 1588 static int groups_to_user(gid_t __user *grouplist, 1589 struct group_info *group_info) 1590 { 1591 int i; 1592 int count = group_info->ngroups; 1593 1594 for (i = 0; i < group_info->nblocks; i++) { 1595 int cp_count = min(NGROUPS_PER_BLOCK, count); 1596 int off = i * NGROUPS_PER_BLOCK; 1597 int len = cp_count * sizeof(*grouplist); 1598 1599 if (copy_to_user(grouplist+off, group_info->blocks[i], len)) 1600 return -EFAULT; 1601 1602 count -= cp_count; 1603 } 1604 return 0; 1605 } 1606 1607 /* fill a group_info from a user-space array - it must be allocated already */ 1608 static int groups_from_user(struct group_info *group_info, 1609 gid_t __user *grouplist) 1610 { 1611 int i; 1612 int count = group_info->ngroups; 1613 1614 for (i = 0; i < group_info->nblocks; i++) { 1615 int cp_count = min(NGROUPS_PER_BLOCK, count); 1616 int off = i * NGROUPS_PER_BLOCK; 1617 int len = cp_count * sizeof(*grouplist); 1618 1619 if (copy_from_user(group_info->blocks[i], grouplist+off, len)) 1620 return -EFAULT; 1621 1622 count -= cp_count; 1623 } 1624 return 0; 1625 } 1626 1627 /* a simple Shell sort */ 1628 static void groups_sort(struct group_info *group_info) 1629 { 1630 int base, max, stride; 1631 int gidsetsize = group_info->ngroups; 1632 1633 for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1) 1634 ; /* nothing */ 1635 stride /= 3; 1636 1637 while (stride) { 1638 max = gidsetsize - stride; 1639 for (base = 0; base < max; base++) { 1640 int left = base; 1641 int right = left + stride; 1642 gid_t tmp = GROUP_AT(group_info, right); 1643 1644 while (left >= 0 && GROUP_AT(group_info, left) > tmp) { 1645 GROUP_AT(group_info, right) = 1646 GROUP_AT(group_info, left); 1647 right = left; 1648 left -= stride; 1649 } 1650 GROUP_AT(group_info, right) = tmp; 1651 } 1652 stride /= 3; 1653 } 1654 } 1655 1656 /* a simple bsearch */ 1657 int groups_search(struct group_info *group_info, gid_t grp) 1658 { 1659 unsigned int left, right; 1660 1661 if (!group_info) 1662 return 0; 1663 1664 left = 0; 1665 right = group_info->ngroups; 1666 while (left < right) { 1667 unsigned int mid = (left+right)/2; 1668 int cmp = grp - GROUP_AT(group_info, mid); 1669 if (cmp > 0) 1670 left = mid + 1; 1671 else if (cmp < 0) 1672 right = mid; 1673 else 1674 return 1; 1675 } 1676 return 0; 1677 } 1678 1679 /* validate and set current->group_info */ 1680 int set_current_groups(struct group_info *group_info) 1681 { 1682 int retval; 1683 struct group_info *old_info; 1684 1685 retval = security_task_setgroups(group_info); 1686 if (retval) 1687 return retval; 1688 1689 groups_sort(group_info); 1690 get_group_info(group_info); 1691 1692 task_lock(current); 1693 old_info = current->group_info; 1694 current->group_info = group_info; 1695 task_unlock(current); 1696 1697 put_group_info(old_info); 1698 1699 return 0; 1700 } 1701 1702 EXPORT_SYMBOL(set_current_groups); 1703 1704 asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) 1705 { 1706 int i = 0; 1707 1708 /* 1709 * SMP: Nobody else can change our grouplist. Thus we are 1710 * safe. 1711 */ 1712 1713 if (gidsetsize < 0) 1714 return -EINVAL; 1715 1716 /* no need to grab task_lock here; it cannot change */ 1717 i = current->group_info->ngroups; 1718 if (gidsetsize) { 1719 if (i > gidsetsize) { 1720 i = -EINVAL; 1721 goto out; 1722 } 1723 if (groups_to_user(grouplist, current->group_info)) { 1724 i = -EFAULT; 1725 goto out; 1726 } 1727 } 1728 out: 1729 return i; 1730 } 1731 1732 /* 1733 * SMP: Our groups are copy-on-write. We can set them safely 1734 * without another task interfering. 1735 */ 1736 1737 asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) 1738 { 1739 struct group_info *group_info; 1740 int retval; 1741 1742 if (!capable(CAP_SETGID)) 1743 return -EPERM; 1744 if ((unsigned)gidsetsize > NGROUPS_MAX) 1745 return -EINVAL; 1746 1747 group_info = groups_alloc(gidsetsize); 1748 if (!group_info) 1749 return -ENOMEM; 1750 retval = groups_from_user(group_info, grouplist); 1751 if (retval) { 1752 put_group_info(group_info); 1753 return retval; 1754 } 1755 1756 retval = set_current_groups(group_info); 1757 put_group_info(group_info); 1758 1759 return retval; 1760 } 1761 1762 /* 1763 * Check whether we're fsgid/egid or in the supplemental group.. 1764 */ 1765 int in_group_p(gid_t grp) 1766 { 1767 int retval = 1; 1768 if (grp != current->fsgid) 1769 retval = groups_search(current->group_info, grp); 1770 return retval; 1771 } 1772 1773 EXPORT_SYMBOL(in_group_p); 1774 1775 int in_egroup_p(gid_t grp) 1776 { 1777 int retval = 1; 1778 if (grp != current->egid) 1779 retval = groups_search(current->group_info, grp); 1780 return retval; 1781 } 1782 1783 EXPORT_SYMBOL(in_egroup_p); 1784 1785 DECLARE_RWSEM(uts_sem); 1786 1787 EXPORT_SYMBOL(uts_sem); 1788 1789 asmlinkage long sys_newuname(struct new_utsname __user * name) 1790 { 1791 int errno = 0; 1792 1793 down_read(&uts_sem); 1794 if (copy_to_user(name, utsname(), sizeof *name)) 1795 errno = -EFAULT; 1796 up_read(&uts_sem); 1797 return errno; 1798 } 1799 1800 asmlinkage long sys_sethostname(char __user *name, int len) 1801 { 1802 int errno; 1803 char tmp[__NEW_UTS_LEN]; 1804 1805 if (!capable(CAP_SYS_ADMIN)) 1806 return -EPERM; 1807 if (len < 0 || len > __NEW_UTS_LEN) 1808 return -EINVAL; 1809 down_write(&uts_sem); 1810 errno = -EFAULT; 1811 if (!copy_from_user(tmp, name, len)) { 1812 memcpy(utsname()->nodename, tmp, len); 1813 utsname()->nodename[len] = 0; 1814 errno = 0; 1815 } 1816 up_write(&uts_sem); 1817 return errno; 1818 } 1819 1820 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1821 1822 asmlinkage long sys_gethostname(char __user *name, int len) 1823 { 1824 int i, errno; 1825 1826 if (len < 0) 1827 return -EINVAL; 1828 down_read(&uts_sem); 1829 i = 1 + strlen(utsname()->nodename); 1830 if (i > len) 1831 i = len; 1832 errno = 0; 1833 if (copy_to_user(name, utsname()->nodename, i)) 1834 errno = -EFAULT; 1835 up_read(&uts_sem); 1836 return errno; 1837 } 1838 1839 #endif 1840 1841 /* 1842 * Only setdomainname; getdomainname can be implemented by calling 1843 * uname() 1844 */ 1845 asmlinkage long sys_setdomainname(char __user *name, int len) 1846 { 1847 int errno; 1848 char tmp[__NEW_UTS_LEN]; 1849 1850 if (!capable(CAP_SYS_ADMIN)) 1851 return -EPERM; 1852 if (len < 0 || len > __NEW_UTS_LEN) 1853 return -EINVAL; 1854 1855 down_write(&uts_sem); 1856 errno = -EFAULT; 1857 if (!copy_from_user(tmp, name, len)) { 1858 memcpy(utsname()->domainname, tmp, len); 1859 utsname()->domainname[len] = 0; 1860 errno = 0; 1861 } 1862 up_write(&uts_sem); 1863 return errno; 1864 } 1865 1866 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1867 { 1868 if (resource >= RLIM_NLIMITS) 1869 return -EINVAL; 1870 else { 1871 struct rlimit value; 1872 task_lock(current->group_leader); 1873 value = current->signal->rlim[resource]; 1874 task_unlock(current->group_leader); 1875 return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1876 } 1877 } 1878 1879 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1880 1881 /* 1882 * Back compatibility for getrlimit. Needed for some apps. 1883 */ 1884 1885 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1886 { 1887 struct rlimit x; 1888 if (resource >= RLIM_NLIMITS) 1889 return -EINVAL; 1890 1891 task_lock(current->group_leader); 1892 x = current->signal->rlim[resource]; 1893 task_unlock(current->group_leader); 1894 if (x.rlim_cur > 0x7FFFFFFF) 1895 x.rlim_cur = 0x7FFFFFFF; 1896 if (x.rlim_max > 0x7FFFFFFF) 1897 x.rlim_max = 0x7FFFFFFF; 1898 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1899 } 1900 1901 #endif 1902 1903 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) 1904 { 1905 struct rlimit new_rlim, *old_rlim; 1906 unsigned long it_prof_secs; 1907 int retval; 1908 1909 if (resource >= RLIM_NLIMITS) 1910 return -EINVAL; 1911 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1912 return -EFAULT; 1913 if (new_rlim.rlim_cur > new_rlim.rlim_max) 1914 return -EINVAL; 1915 old_rlim = current->signal->rlim + resource; 1916 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1917 !capable(CAP_SYS_RESOURCE)) 1918 return -EPERM; 1919 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) 1920 return -EPERM; 1921 1922 retval = security_task_setrlimit(resource, &new_rlim); 1923 if (retval) 1924 return retval; 1925 1926 task_lock(current->group_leader); 1927 *old_rlim = new_rlim; 1928 task_unlock(current->group_leader); 1929 1930 if (resource != RLIMIT_CPU) 1931 goto out; 1932 1933 /* 1934 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1935 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1936 * very long-standing error, and fixing it now risks breakage of 1937 * applications, so we live with it 1938 */ 1939 if (new_rlim.rlim_cur == RLIM_INFINITY) 1940 goto out; 1941 1942 it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); 1943 if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { 1944 unsigned long rlim_cur = new_rlim.rlim_cur; 1945 cputime_t cputime; 1946 1947 if (rlim_cur == 0) { 1948 /* 1949 * The caller is asking for an immediate RLIMIT_CPU 1950 * expiry. But we use the zero value to mean "it was 1951 * never set". So let's cheat and make it one second 1952 * instead 1953 */ 1954 rlim_cur = 1; 1955 } 1956 cputime = secs_to_cputime(rlim_cur); 1957 read_lock(&tasklist_lock); 1958 spin_lock_irq(¤t->sighand->siglock); 1959 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); 1960 spin_unlock_irq(¤t->sighand->siglock); 1961 read_unlock(&tasklist_lock); 1962 } 1963 out: 1964 return 0; 1965 } 1966 1967 /* 1968 * It would make sense to put struct rusage in the task_struct, 1969 * except that would make the task_struct be *really big*. After 1970 * task_struct gets moved into malloc'ed memory, it would 1971 * make sense to do this. It will make moving the rest of the information 1972 * a lot simpler! (Which we're not doing right now because we're not 1973 * measuring them yet). 1974 * 1975 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1976 * races with threads incrementing their own counters. But since word 1977 * reads are atomic, we either get new values or old values and we don't 1978 * care which for the sums. We always take the siglock to protect reading 1979 * the c* fields from p->signal from races with exit.c updating those 1980 * fields when reaping, so a sample either gets all the additions of a 1981 * given child after it's reaped, or none so this sample is before reaping. 1982 * 1983 * Locking: 1984 * We need to take the siglock for CHILDEREN, SELF and BOTH 1985 * for the cases current multithreaded, non-current single threaded 1986 * non-current multithreaded. Thread traversal is now safe with 1987 * the siglock held. 1988 * Strictly speaking, we donot need to take the siglock if we are current and 1989 * single threaded, as no one else can take our signal_struct away, no one 1990 * else can reap the children to update signal->c* counters, and no one else 1991 * can race with the signal-> fields. If we do not take any lock, the 1992 * signal-> fields could be read out of order while another thread was just 1993 * exiting. So we should place a read memory barrier when we avoid the lock. 1994 * On the writer side, write memory barrier is implied in __exit_signal 1995 * as __exit_signal releases the siglock spinlock after updating the signal-> 1996 * fields. But we don't do this yet to keep things simple. 1997 * 1998 */ 1999 2000 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 2001 { 2002 struct task_struct *t; 2003 unsigned long flags; 2004 cputime_t utime, stime; 2005 2006 memset((char *) r, 0, sizeof *r); 2007 utime = stime = cputime_zero; 2008 2009 rcu_read_lock(); 2010 if (!lock_task_sighand(p, &flags)) { 2011 rcu_read_unlock(); 2012 return; 2013 } 2014 2015 switch (who) { 2016 case RUSAGE_BOTH: 2017 case RUSAGE_CHILDREN: 2018 utime = p->signal->cutime; 2019 stime = p->signal->cstime; 2020 r->ru_nvcsw = p->signal->cnvcsw; 2021 r->ru_nivcsw = p->signal->cnivcsw; 2022 r->ru_minflt = p->signal->cmin_flt; 2023 r->ru_majflt = p->signal->cmaj_flt; 2024 2025 if (who == RUSAGE_CHILDREN) 2026 break; 2027 2028 case RUSAGE_SELF: 2029 utime = cputime_add(utime, p->signal->utime); 2030 stime = cputime_add(stime, p->signal->stime); 2031 r->ru_nvcsw += p->signal->nvcsw; 2032 r->ru_nivcsw += p->signal->nivcsw; 2033 r->ru_minflt += p->signal->min_flt; 2034 r->ru_majflt += p->signal->maj_flt; 2035 t = p; 2036 do { 2037 utime = cputime_add(utime, t->utime); 2038 stime = cputime_add(stime, t->stime); 2039 r->ru_nvcsw += t->nvcsw; 2040 r->ru_nivcsw += t->nivcsw; 2041 r->ru_minflt += t->min_flt; 2042 r->ru_majflt += t->maj_flt; 2043 t = next_thread(t); 2044 } while (t != p); 2045 break; 2046 2047 default: 2048 BUG(); 2049 } 2050 2051 unlock_task_sighand(p, &flags); 2052 rcu_read_unlock(); 2053 2054 cputime_to_timeval(utime, &r->ru_utime); 2055 cputime_to_timeval(stime, &r->ru_stime); 2056 } 2057 2058 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 2059 { 2060 struct rusage r; 2061 k_getrusage(p, who, &r); 2062 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 2063 } 2064 2065 asmlinkage long sys_getrusage(int who, struct rusage __user *ru) 2066 { 2067 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) 2068 return -EINVAL; 2069 return getrusage(current, who, ru); 2070 } 2071 2072 asmlinkage long sys_umask(int mask) 2073 { 2074 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 2075 return mask; 2076 } 2077 2078 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, 2079 unsigned long arg4, unsigned long arg5) 2080 { 2081 long error; 2082 2083 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2084 if (error) 2085 return error; 2086 2087 switch (option) { 2088 case PR_SET_PDEATHSIG: 2089 if (!valid_signal(arg2)) { 2090 error = -EINVAL; 2091 break; 2092 } 2093 current->pdeath_signal = arg2; 2094 break; 2095 case PR_GET_PDEATHSIG: 2096 error = put_user(current->pdeath_signal, (int __user *)arg2); 2097 break; 2098 case PR_GET_DUMPABLE: 2099 error = current->mm->dumpable; 2100 break; 2101 case PR_SET_DUMPABLE: 2102 if (arg2 < 0 || arg2 > 1) { 2103 error = -EINVAL; 2104 break; 2105 } 2106 current->mm->dumpable = arg2; 2107 break; 2108 2109 case PR_SET_UNALIGN: 2110 error = SET_UNALIGN_CTL(current, arg2); 2111 break; 2112 case PR_GET_UNALIGN: 2113 error = GET_UNALIGN_CTL(current, arg2); 2114 break; 2115 case PR_SET_FPEMU: 2116 error = SET_FPEMU_CTL(current, arg2); 2117 break; 2118 case PR_GET_FPEMU: 2119 error = GET_FPEMU_CTL(current, arg2); 2120 break; 2121 case PR_SET_FPEXC: 2122 error = SET_FPEXC_CTL(current, arg2); 2123 break; 2124 case PR_GET_FPEXC: 2125 error = GET_FPEXC_CTL(current, arg2); 2126 break; 2127 case PR_GET_TIMING: 2128 error = PR_TIMING_STATISTICAL; 2129 break; 2130 case PR_SET_TIMING: 2131 if (arg2 == PR_TIMING_STATISTICAL) 2132 error = 0; 2133 else 2134 error = -EINVAL; 2135 break; 2136 2137 case PR_GET_KEEPCAPS: 2138 if (current->keep_capabilities) 2139 error = 1; 2140 break; 2141 case PR_SET_KEEPCAPS: 2142 if (arg2 != 0 && arg2 != 1) { 2143 error = -EINVAL; 2144 break; 2145 } 2146 current->keep_capabilities = arg2; 2147 break; 2148 case PR_SET_NAME: { 2149 struct task_struct *me = current; 2150 unsigned char ncomm[sizeof(me->comm)]; 2151 2152 ncomm[sizeof(me->comm)-1] = 0; 2153 if (strncpy_from_user(ncomm, (char __user *)arg2, 2154 sizeof(me->comm)-1) < 0) 2155 return -EFAULT; 2156 set_task_comm(me, ncomm); 2157 return 0; 2158 } 2159 case PR_GET_NAME: { 2160 struct task_struct *me = current; 2161 unsigned char tcomm[sizeof(me->comm)]; 2162 2163 get_task_comm(tcomm, me); 2164 if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm))) 2165 return -EFAULT; 2166 return 0; 2167 } 2168 case PR_GET_ENDIAN: 2169 error = GET_ENDIAN(current, arg2); 2170 break; 2171 case PR_SET_ENDIAN: 2172 error = SET_ENDIAN(current, arg2); 2173 break; 2174 2175 default: 2176 error = -EINVAL; 2177 break; 2178 } 2179 return error; 2180 } 2181 2182 asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, 2183 struct getcpu_cache __user *cache) 2184 { 2185 int err = 0; 2186 int cpu = raw_smp_processor_id(); 2187 if (cpup) 2188 err |= put_user(cpu, cpup); 2189 if (nodep) 2190 err |= put_user(cpu_to_node(cpu), nodep); 2191 if (cache) { 2192 /* 2193 * The cache is not needed for this implementation, 2194 * but make sure user programs pass something 2195 * valid. vsyscall implementations can instead make 2196 * good use of the cache. Only use t0 and t1 because 2197 * these are available in both 32bit and 64bit ABI (no 2198 * need for a compat_getcpu). 32bit has enough 2199 * padding 2200 */ 2201 unsigned long t0, t1; 2202 get_user(t0, &cache->blob[0]); 2203 get_user(t1, &cache->blob[1]); 2204 t0++; 2205 t1++; 2206 put_user(t0, &cache->blob[0]); 2207 put_user(t1, &cache->blob[1]); 2208 } 2209 return err ? -EFAULT : 0; 2210 } 2211