1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/module.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/smp_lock.h> 12 #include <linux/notifier.h> 13 #include <linux/reboot.h> 14 #include <linux/prctl.h> 15 #include <linux/highuid.h> 16 #include <linux/fs.h> 17 #include <linux/kernel.h> 18 #include <linux/kexec.h> 19 #include <linux/workqueue.h> 20 #include <linux/capability.h> 21 #include <linux/device.h> 22 #include <linux/key.h> 23 #include <linux/times.h> 24 #include <linux/posix-timers.h> 25 #include <linux/security.h> 26 #include <linux/dcookies.h> 27 #include <linux/suspend.h> 28 #include <linux/tty.h> 29 #include <linux/signal.h> 30 #include <linux/cn_proc.h> 31 #include <linux/getcpu.h> 32 33 #include <linux/compat.h> 34 #include <linux/syscalls.h> 35 #include <linux/kprobes.h> 36 37 #include <asm/uaccess.h> 38 #include <asm/io.h> 39 #include <asm/unistd.h> 40 41 #ifndef SET_UNALIGN_CTL 42 # define SET_UNALIGN_CTL(a,b) (-EINVAL) 43 #endif 44 #ifndef GET_UNALIGN_CTL 45 # define GET_UNALIGN_CTL(a,b) (-EINVAL) 46 #endif 47 #ifndef SET_FPEMU_CTL 48 # define SET_FPEMU_CTL(a,b) (-EINVAL) 49 #endif 50 #ifndef GET_FPEMU_CTL 51 # define GET_FPEMU_CTL(a,b) (-EINVAL) 52 #endif 53 #ifndef SET_FPEXC_CTL 54 # define SET_FPEXC_CTL(a,b) (-EINVAL) 55 #endif 56 #ifndef GET_FPEXC_CTL 57 # define GET_FPEXC_CTL(a,b) (-EINVAL) 58 #endif 59 #ifndef GET_ENDIAN 60 # define GET_ENDIAN(a,b) (-EINVAL) 61 #endif 62 #ifndef SET_ENDIAN 63 # define SET_ENDIAN(a,b) (-EINVAL) 64 #endif 65 66 /* 67 * this is where the system-wide overflow UID and GID are defined, for 68 * architectures that now have 32-bit UID/GID but didn't in the past 69 */ 70 71 int overflowuid = DEFAULT_OVERFLOWUID; 72 int overflowgid = DEFAULT_OVERFLOWGID; 73 74 #ifdef CONFIG_UID16 75 EXPORT_SYMBOL(overflowuid); 76 EXPORT_SYMBOL(overflowgid); 77 #endif 78 79 /* 80 * the same as above, but for filesystems which can only store a 16-bit 81 * UID and GID. as such, this is needed on all architectures 82 */ 83 84 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 85 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 86 87 EXPORT_SYMBOL(fs_overflowuid); 88 EXPORT_SYMBOL(fs_overflowgid); 89 90 /* 91 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 92 */ 93 94 int C_A_D = 1; 95 struct pid *cad_pid; 96 EXPORT_SYMBOL(cad_pid); 97 98 /* 99 * Notifier list for kernel code which wants to be called 100 * at shutdown. This is used to stop any idling DMA operations 101 * and the like. 102 */ 103 104 static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); 105 106 /* 107 * Notifier chain core routines. The exported routines below 108 * are layered on top of these, with appropriate locking added. 109 */ 110 111 static int notifier_chain_register(struct notifier_block **nl, 112 struct notifier_block *n) 113 { 114 while ((*nl) != NULL) { 115 if (n->priority > (*nl)->priority) 116 break; 117 nl = &((*nl)->next); 118 } 119 n->next = *nl; 120 rcu_assign_pointer(*nl, n); 121 return 0; 122 } 123 124 static int notifier_chain_unregister(struct notifier_block **nl, 125 struct notifier_block *n) 126 { 127 while ((*nl) != NULL) { 128 if ((*nl) == n) { 129 rcu_assign_pointer(*nl, n->next); 130 return 0; 131 } 132 nl = &((*nl)->next); 133 } 134 return -ENOENT; 135 } 136 137 static int __kprobes notifier_call_chain(struct notifier_block **nl, 138 unsigned long val, void *v) 139 { 140 int ret = NOTIFY_DONE; 141 struct notifier_block *nb, *next_nb; 142 143 nb = rcu_dereference(*nl); 144 while (nb) { 145 next_nb = rcu_dereference(nb->next); 146 ret = nb->notifier_call(nb, val, v); 147 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) 148 break; 149 nb = next_nb; 150 } 151 return ret; 152 } 153 154 /* 155 * Atomic notifier chain routines. Registration and unregistration 156 * use a spinlock, and call_chain is synchronized by RCU (no locks). 157 */ 158 159 /** 160 * atomic_notifier_chain_register - Add notifier to an atomic notifier chain 161 * @nh: Pointer to head of the atomic notifier chain 162 * @n: New entry in notifier chain 163 * 164 * Adds a notifier to an atomic notifier chain. 165 * 166 * Currently always returns zero. 167 */ 168 169 int atomic_notifier_chain_register(struct atomic_notifier_head *nh, 170 struct notifier_block *n) 171 { 172 unsigned long flags; 173 int ret; 174 175 spin_lock_irqsave(&nh->lock, flags); 176 ret = notifier_chain_register(&nh->head, n); 177 spin_unlock_irqrestore(&nh->lock, flags); 178 return ret; 179 } 180 181 EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); 182 183 /** 184 * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain 185 * @nh: Pointer to head of the atomic notifier chain 186 * @n: Entry to remove from notifier chain 187 * 188 * Removes a notifier from an atomic notifier chain. 189 * 190 * Returns zero on success or %-ENOENT on failure. 191 */ 192 int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, 193 struct notifier_block *n) 194 { 195 unsigned long flags; 196 int ret; 197 198 spin_lock_irqsave(&nh->lock, flags); 199 ret = notifier_chain_unregister(&nh->head, n); 200 spin_unlock_irqrestore(&nh->lock, flags); 201 synchronize_rcu(); 202 return ret; 203 } 204 205 EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); 206 207 /** 208 * atomic_notifier_call_chain - Call functions in an atomic notifier chain 209 * @nh: Pointer to head of the atomic notifier chain 210 * @val: Value passed unmodified to notifier function 211 * @v: Pointer passed unmodified to notifier function 212 * 213 * Calls each function in a notifier chain in turn. The functions 214 * run in an atomic context, so they must not block. 215 * This routine uses RCU to synchronize with changes to the chain. 216 * 217 * If the return value of the notifier can be and'ed 218 * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain 219 * will return immediately, with the return value of 220 * the notifier function which halted execution. 221 * Otherwise the return value is the return value 222 * of the last notifier function called. 223 */ 224 225 int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh, 226 unsigned long val, void *v) 227 { 228 int ret; 229 230 rcu_read_lock(); 231 ret = notifier_call_chain(&nh->head, val, v); 232 rcu_read_unlock(); 233 return ret; 234 } 235 236 EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); 237 238 /* 239 * Blocking notifier chain routines. All access to the chain is 240 * synchronized by an rwsem. 241 */ 242 243 /** 244 * blocking_notifier_chain_register - Add notifier to a blocking notifier chain 245 * @nh: Pointer to head of the blocking notifier chain 246 * @n: New entry in notifier chain 247 * 248 * Adds a notifier to a blocking notifier chain. 249 * Must be called in process context. 250 * 251 * Currently always returns zero. 252 */ 253 254 int blocking_notifier_chain_register(struct blocking_notifier_head *nh, 255 struct notifier_block *n) 256 { 257 int ret; 258 259 /* 260 * This code gets used during boot-up, when task switching is 261 * not yet working and interrupts must remain disabled. At 262 * such times we must not call down_write(). 263 */ 264 if (unlikely(system_state == SYSTEM_BOOTING)) 265 return notifier_chain_register(&nh->head, n); 266 267 down_write(&nh->rwsem); 268 ret = notifier_chain_register(&nh->head, n); 269 up_write(&nh->rwsem); 270 return ret; 271 } 272 273 EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); 274 275 /** 276 * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain 277 * @nh: Pointer to head of the blocking notifier chain 278 * @n: Entry to remove from notifier chain 279 * 280 * Removes a notifier from a blocking notifier chain. 281 * Must be called from process context. 282 * 283 * Returns zero on success or %-ENOENT on failure. 284 */ 285 int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, 286 struct notifier_block *n) 287 { 288 int ret; 289 290 /* 291 * This code gets used during boot-up, when task switching is 292 * not yet working and interrupts must remain disabled. At 293 * such times we must not call down_write(). 294 */ 295 if (unlikely(system_state == SYSTEM_BOOTING)) 296 return notifier_chain_unregister(&nh->head, n); 297 298 down_write(&nh->rwsem); 299 ret = notifier_chain_unregister(&nh->head, n); 300 up_write(&nh->rwsem); 301 return ret; 302 } 303 304 EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); 305 306 /** 307 * blocking_notifier_call_chain - Call functions in a blocking notifier chain 308 * @nh: Pointer to head of the blocking notifier chain 309 * @val: Value passed unmodified to notifier function 310 * @v: Pointer passed unmodified to notifier function 311 * 312 * Calls each function in a notifier chain in turn. The functions 313 * run in a process context, so they are allowed to block. 314 * 315 * If the return value of the notifier can be and'ed 316 * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain 317 * will return immediately, with the return value of 318 * the notifier function which halted execution. 319 * Otherwise the return value is the return value 320 * of the last notifier function called. 321 */ 322 323 int blocking_notifier_call_chain(struct blocking_notifier_head *nh, 324 unsigned long val, void *v) 325 { 326 int ret = NOTIFY_DONE; 327 328 /* 329 * We check the head outside the lock, but if this access is 330 * racy then it does not matter what the result of the test 331 * is, we re-check the list after having taken the lock anyway: 332 */ 333 if (rcu_dereference(nh->head)) { 334 down_read(&nh->rwsem); 335 ret = notifier_call_chain(&nh->head, val, v); 336 up_read(&nh->rwsem); 337 } 338 return ret; 339 } 340 341 EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); 342 343 /* 344 * Raw notifier chain routines. There is no protection; 345 * the caller must provide it. Use at your own risk! 346 */ 347 348 /** 349 * raw_notifier_chain_register - Add notifier to a raw notifier chain 350 * @nh: Pointer to head of the raw notifier chain 351 * @n: New entry in notifier chain 352 * 353 * Adds a notifier to a raw notifier chain. 354 * All locking must be provided by the caller. 355 * 356 * Currently always returns zero. 357 */ 358 359 int raw_notifier_chain_register(struct raw_notifier_head *nh, 360 struct notifier_block *n) 361 { 362 return notifier_chain_register(&nh->head, n); 363 } 364 365 EXPORT_SYMBOL_GPL(raw_notifier_chain_register); 366 367 /** 368 * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain 369 * @nh: Pointer to head of the raw notifier chain 370 * @n: Entry to remove from notifier chain 371 * 372 * Removes a notifier from a raw notifier chain. 373 * All locking must be provided by the caller. 374 * 375 * Returns zero on success or %-ENOENT on failure. 376 */ 377 int raw_notifier_chain_unregister(struct raw_notifier_head *nh, 378 struct notifier_block *n) 379 { 380 return notifier_chain_unregister(&nh->head, n); 381 } 382 383 EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); 384 385 /** 386 * raw_notifier_call_chain - Call functions in a raw notifier chain 387 * @nh: Pointer to head of the raw notifier chain 388 * @val: Value passed unmodified to notifier function 389 * @v: Pointer passed unmodified to notifier function 390 * 391 * Calls each function in a notifier chain in turn. The functions 392 * run in an undefined context. 393 * All locking must be provided by the caller. 394 * 395 * If the return value of the notifier can be and'ed 396 * with %NOTIFY_STOP_MASK then raw_notifier_call_chain 397 * will return immediately, with the return value of 398 * the notifier function which halted execution. 399 * Otherwise the return value is the return value 400 * of the last notifier function called. 401 */ 402 403 int raw_notifier_call_chain(struct raw_notifier_head *nh, 404 unsigned long val, void *v) 405 { 406 return notifier_call_chain(&nh->head, val, v); 407 } 408 409 EXPORT_SYMBOL_GPL(raw_notifier_call_chain); 410 411 /* 412 * SRCU notifier chain routines. Registration and unregistration 413 * use a mutex, and call_chain is synchronized by SRCU (no locks). 414 */ 415 416 /** 417 * srcu_notifier_chain_register - Add notifier to an SRCU notifier chain 418 * @nh: Pointer to head of the SRCU notifier chain 419 * @n: New entry in notifier chain 420 * 421 * Adds a notifier to an SRCU notifier chain. 422 * Must be called in process context. 423 * 424 * Currently always returns zero. 425 */ 426 427 int srcu_notifier_chain_register(struct srcu_notifier_head *nh, 428 struct notifier_block *n) 429 { 430 int ret; 431 432 /* 433 * This code gets used during boot-up, when task switching is 434 * not yet working and interrupts must remain disabled. At 435 * such times we must not call mutex_lock(). 436 */ 437 if (unlikely(system_state == SYSTEM_BOOTING)) 438 return notifier_chain_register(&nh->head, n); 439 440 mutex_lock(&nh->mutex); 441 ret = notifier_chain_register(&nh->head, n); 442 mutex_unlock(&nh->mutex); 443 return ret; 444 } 445 446 EXPORT_SYMBOL_GPL(srcu_notifier_chain_register); 447 448 /** 449 * srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain 450 * @nh: Pointer to head of the SRCU notifier chain 451 * @n: Entry to remove from notifier chain 452 * 453 * Removes a notifier from an SRCU notifier chain. 454 * Must be called from process context. 455 * 456 * Returns zero on success or %-ENOENT on failure. 457 */ 458 int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, 459 struct notifier_block *n) 460 { 461 int ret; 462 463 /* 464 * This code gets used during boot-up, when task switching is 465 * not yet working and interrupts must remain disabled. At 466 * such times we must not call mutex_lock(). 467 */ 468 if (unlikely(system_state == SYSTEM_BOOTING)) 469 return notifier_chain_unregister(&nh->head, n); 470 471 mutex_lock(&nh->mutex); 472 ret = notifier_chain_unregister(&nh->head, n); 473 mutex_unlock(&nh->mutex); 474 synchronize_srcu(&nh->srcu); 475 return ret; 476 } 477 478 EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); 479 480 /** 481 * srcu_notifier_call_chain - Call functions in an SRCU notifier chain 482 * @nh: Pointer to head of the SRCU notifier chain 483 * @val: Value passed unmodified to notifier function 484 * @v: Pointer passed unmodified to notifier function 485 * 486 * Calls each function in a notifier chain in turn. The functions 487 * run in a process context, so they are allowed to block. 488 * 489 * If the return value of the notifier can be and'ed 490 * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain 491 * will return immediately, with the return value of 492 * the notifier function which halted execution. 493 * Otherwise the return value is the return value 494 * of the last notifier function called. 495 */ 496 497 int srcu_notifier_call_chain(struct srcu_notifier_head *nh, 498 unsigned long val, void *v) 499 { 500 int ret; 501 int idx; 502 503 idx = srcu_read_lock(&nh->srcu); 504 ret = notifier_call_chain(&nh->head, val, v); 505 srcu_read_unlock(&nh->srcu, idx); 506 return ret; 507 } 508 509 EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); 510 511 /** 512 * srcu_init_notifier_head - Initialize an SRCU notifier head 513 * @nh: Pointer to head of the srcu notifier chain 514 * 515 * Unlike other sorts of notifier heads, SRCU notifier heads require 516 * dynamic initialization. Be sure to call this routine before 517 * calling any of the other SRCU notifier routines for this head. 518 * 519 * If an SRCU notifier head is deallocated, it must first be cleaned 520 * up by calling srcu_cleanup_notifier_head(). Otherwise the head's 521 * per-cpu data (used by the SRCU mechanism) will leak. 522 */ 523 524 void srcu_init_notifier_head(struct srcu_notifier_head *nh) 525 { 526 mutex_init(&nh->mutex); 527 if (init_srcu_struct(&nh->srcu) < 0) 528 BUG(); 529 nh->head = NULL; 530 } 531 532 EXPORT_SYMBOL_GPL(srcu_init_notifier_head); 533 534 /** 535 * register_reboot_notifier - Register function to be called at reboot time 536 * @nb: Info about notifier function to be called 537 * 538 * Registers a function with the list of functions 539 * to be called at reboot time. 540 * 541 * Currently always returns zero, as blocking_notifier_chain_register 542 * always returns zero. 543 */ 544 545 int register_reboot_notifier(struct notifier_block * nb) 546 { 547 return blocking_notifier_chain_register(&reboot_notifier_list, nb); 548 } 549 550 EXPORT_SYMBOL(register_reboot_notifier); 551 552 /** 553 * unregister_reboot_notifier - Unregister previously registered reboot notifier 554 * @nb: Hook to be unregistered 555 * 556 * Unregisters a previously registered reboot 557 * notifier function. 558 * 559 * Returns zero on success, or %-ENOENT on failure. 560 */ 561 562 int unregister_reboot_notifier(struct notifier_block * nb) 563 { 564 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); 565 } 566 567 EXPORT_SYMBOL(unregister_reboot_notifier); 568 569 static int set_one_prio(struct task_struct *p, int niceval, int error) 570 { 571 int no_nice; 572 573 if (p->uid != current->euid && 574 p->euid != current->euid && !capable(CAP_SYS_NICE)) { 575 error = -EPERM; 576 goto out; 577 } 578 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 579 error = -EACCES; 580 goto out; 581 } 582 no_nice = security_task_setnice(p, niceval); 583 if (no_nice) { 584 error = no_nice; 585 goto out; 586 } 587 if (error == -ESRCH) 588 error = 0; 589 set_user_nice(p, niceval); 590 out: 591 return error; 592 } 593 594 asmlinkage long sys_setpriority(int which, int who, int niceval) 595 { 596 struct task_struct *g, *p; 597 struct user_struct *user; 598 int error = -EINVAL; 599 600 if (which > 2 || which < 0) 601 goto out; 602 603 /* normalize: avoid signed division (rounding problems) */ 604 error = -ESRCH; 605 if (niceval < -20) 606 niceval = -20; 607 if (niceval > 19) 608 niceval = 19; 609 610 read_lock(&tasklist_lock); 611 switch (which) { 612 case PRIO_PROCESS: 613 if (!who) 614 who = current->pid; 615 p = find_task_by_pid(who); 616 if (p) 617 error = set_one_prio(p, niceval, error); 618 break; 619 case PRIO_PGRP: 620 if (!who) 621 who = process_group(current); 622 do_each_task_pid(who, PIDTYPE_PGID, p) { 623 error = set_one_prio(p, niceval, error); 624 } while_each_task_pid(who, PIDTYPE_PGID, p); 625 break; 626 case PRIO_USER: 627 user = current->user; 628 if (!who) 629 who = current->uid; 630 else 631 if ((who != current->uid) && !(user = find_user(who))) 632 goto out_unlock; /* No processes for this user */ 633 634 do_each_thread(g, p) 635 if (p->uid == who) 636 error = set_one_prio(p, niceval, error); 637 while_each_thread(g, p); 638 if (who != current->uid) 639 free_uid(user); /* For find_user() */ 640 break; 641 } 642 out_unlock: 643 read_unlock(&tasklist_lock); 644 out: 645 return error; 646 } 647 648 /* 649 * Ugh. To avoid negative return values, "getpriority()" will 650 * not return the normal nice-value, but a negated value that 651 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 652 * to stay compatible. 653 */ 654 asmlinkage long sys_getpriority(int which, int who) 655 { 656 struct task_struct *g, *p; 657 struct user_struct *user; 658 long niceval, retval = -ESRCH; 659 660 if (which > 2 || which < 0) 661 return -EINVAL; 662 663 read_lock(&tasklist_lock); 664 switch (which) { 665 case PRIO_PROCESS: 666 if (!who) 667 who = current->pid; 668 p = find_task_by_pid(who); 669 if (p) { 670 niceval = 20 - task_nice(p); 671 if (niceval > retval) 672 retval = niceval; 673 } 674 break; 675 case PRIO_PGRP: 676 if (!who) 677 who = process_group(current); 678 do_each_task_pid(who, PIDTYPE_PGID, p) { 679 niceval = 20 - task_nice(p); 680 if (niceval > retval) 681 retval = niceval; 682 } while_each_task_pid(who, PIDTYPE_PGID, p); 683 break; 684 case PRIO_USER: 685 user = current->user; 686 if (!who) 687 who = current->uid; 688 else 689 if ((who != current->uid) && !(user = find_user(who))) 690 goto out_unlock; /* No processes for this user */ 691 692 do_each_thread(g, p) 693 if (p->uid == who) { 694 niceval = 20 - task_nice(p); 695 if (niceval > retval) 696 retval = niceval; 697 } 698 while_each_thread(g, p); 699 if (who != current->uid) 700 free_uid(user); /* for find_user() */ 701 break; 702 } 703 out_unlock: 704 read_unlock(&tasklist_lock); 705 706 return retval; 707 } 708 709 /** 710 * emergency_restart - reboot the system 711 * 712 * Without shutting down any hardware or taking any locks 713 * reboot the system. This is called when we know we are in 714 * trouble so this is our best effort to reboot. This is 715 * safe to call in interrupt context. 716 */ 717 void emergency_restart(void) 718 { 719 machine_emergency_restart(); 720 } 721 EXPORT_SYMBOL_GPL(emergency_restart); 722 723 static void kernel_restart_prepare(char *cmd) 724 { 725 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 726 system_state = SYSTEM_RESTART; 727 device_shutdown(); 728 } 729 730 /** 731 * kernel_restart - reboot the system 732 * @cmd: pointer to buffer containing command to execute for restart 733 * or %NULL 734 * 735 * Shutdown everything and perform a clean reboot. 736 * This is not safe to call in interrupt context. 737 */ 738 void kernel_restart(char *cmd) 739 { 740 kernel_restart_prepare(cmd); 741 if (!cmd) 742 printk(KERN_EMERG "Restarting system.\n"); 743 else 744 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 745 machine_restart(cmd); 746 } 747 EXPORT_SYMBOL_GPL(kernel_restart); 748 749 /** 750 * kernel_kexec - reboot the system 751 * 752 * Move into place and start executing a preloaded standalone 753 * executable. If nothing was preloaded return an error. 754 */ 755 static void kernel_kexec(void) 756 { 757 #ifdef CONFIG_KEXEC 758 struct kimage *image; 759 image = xchg(&kexec_image, NULL); 760 if (!image) 761 return; 762 kernel_restart_prepare(NULL); 763 printk(KERN_EMERG "Starting new kernel\n"); 764 machine_shutdown(); 765 machine_kexec(image); 766 #endif 767 } 768 769 void kernel_shutdown_prepare(enum system_states state) 770 { 771 blocking_notifier_call_chain(&reboot_notifier_list, 772 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 773 system_state = state; 774 device_shutdown(); 775 } 776 /** 777 * kernel_halt - halt the system 778 * 779 * Shutdown everything and perform a clean system halt. 780 */ 781 void kernel_halt(void) 782 { 783 kernel_shutdown_prepare(SYSTEM_HALT); 784 printk(KERN_EMERG "System halted.\n"); 785 machine_halt(); 786 } 787 788 EXPORT_SYMBOL_GPL(kernel_halt); 789 790 /** 791 * kernel_power_off - power_off the system 792 * 793 * Shutdown everything and perform a clean system power_off. 794 */ 795 void kernel_power_off(void) 796 { 797 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 798 printk(KERN_EMERG "Power down.\n"); 799 machine_power_off(); 800 } 801 EXPORT_SYMBOL_GPL(kernel_power_off); 802 /* 803 * Reboot system call: for obvious reasons only root may call it, 804 * and even root needs to set up some magic numbers in the registers 805 * so that some mistake won't make this reboot the whole machine. 806 * You can also set the meaning of the ctrl-alt-del-key here. 807 * 808 * reboot doesn't sync: do that yourself before calling this. 809 */ 810 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg) 811 { 812 char buffer[256]; 813 814 /* We only trust the superuser with rebooting the system. */ 815 if (!capable(CAP_SYS_BOOT)) 816 return -EPERM; 817 818 /* For safety, we require "magic" arguments. */ 819 if (magic1 != LINUX_REBOOT_MAGIC1 || 820 (magic2 != LINUX_REBOOT_MAGIC2 && 821 magic2 != LINUX_REBOOT_MAGIC2A && 822 magic2 != LINUX_REBOOT_MAGIC2B && 823 magic2 != LINUX_REBOOT_MAGIC2C)) 824 return -EINVAL; 825 826 /* Instead of trying to make the power_off code look like 827 * halt when pm_power_off is not set do it the easy way. 828 */ 829 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 830 cmd = LINUX_REBOOT_CMD_HALT; 831 832 lock_kernel(); 833 switch (cmd) { 834 case LINUX_REBOOT_CMD_RESTART: 835 kernel_restart(NULL); 836 break; 837 838 case LINUX_REBOOT_CMD_CAD_ON: 839 C_A_D = 1; 840 break; 841 842 case LINUX_REBOOT_CMD_CAD_OFF: 843 C_A_D = 0; 844 break; 845 846 case LINUX_REBOOT_CMD_HALT: 847 kernel_halt(); 848 unlock_kernel(); 849 do_exit(0); 850 break; 851 852 case LINUX_REBOOT_CMD_POWER_OFF: 853 kernel_power_off(); 854 unlock_kernel(); 855 do_exit(0); 856 break; 857 858 case LINUX_REBOOT_CMD_RESTART2: 859 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 860 unlock_kernel(); 861 return -EFAULT; 862 } 863 buffer[sizeof(buffer) - 1] = '\0'; 864 865 kernel_restart(buffer); 866 break; 867 868 case LINUX_REBOOT_CMD_KEXEC: 869 kernel_kexec(); 870 unlock_kernel(); 871 return -EINVAL; 872 873 #ifdef CONFIG_SOFTWARE_SUSPEND 874 case LINUX_REBOOT_CMD_SW_SUSPEND: 875 { 876 int ret = software_suspend(); 877 unlock_kernel(); 878 return ret; 879 } 880 #endif 881 882 default: 883 unlock_kernel(); 884 return -EINVAL; 885 } 886 unlock_kernel(); 887 return 0; 888 } 889 890 static void deferred_cad(struct work_struct *dummy) 891 { 892 kernel_restart(NULL); 893 } 894 895 /* 896 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 897 * As it's called within an interrupt, it may NOT sync: the only choice 898 * is whether to reboot at once, or just ignore the ctrl-alt-del. 899 */ 900 void ctrl_alt_del(void) 901 { 902 static DECLARE_WORK(cad_work, deferred_cad); 903 904 if (C_A_D) 905 schedule_work(&cad_work); 906 else 907 kill_cad_pid(SIGINT, 1); 908 } 909 910 /* 911 * Unprivileged users may change the real gid to the effective gid 912 * or vice versa. (BSD-style) 913 * 914 * If you set the real gid at all, or set the effective gid to a value not 915 * equal to the real gid, then the saved gid is set to the new effective gid. 916 * 917 * This makes it possible for a setgid program to completely drop its 918 * privileges, which is often a useful assertion to make when you are doing 919 * a security audit over a program. 920 * 921 * The general idea is that a program which uses just setregid() will be 922 * 100% compatible with BSD. A program which uses just setgid() will be 923 * 100% compatible with POSIX with saved IDs. 924 * 925 * SMP: There are not races, the GIDs are checked only by filesystem 926 * operations (as far as semantic preservation is concerned). 927 */ 928 asmlinkage long sys_setregid(gid_t rgid, gid_t egid) 929 { 930 int old_rgid = current->gid; 931 int old_egid = current->egid; 932 int new_rgid = old_rgid; 933 int new_egid = old_egid; 934 int retval; 935 936 retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE); 937 if (retval) 938 return retval; 939 940 if (rgid != (gid_t) -1) { 941 if ((old_rgid == rgid) || 942 (current->egid==rgid) || 943 capable(CAP_SETGID)) 944 new_rgid = rgid; 945 else 946 return -EPERM; 947 } 948 if (egid != (gid_t) -1) { 949 if ((old_rgid == egid) || 950 (current->egid == egid) || 951 (current->sgid == egid) || 952 capable(CAP_SETGID)) 953 new_egid = egid; 954 else 955 return -EPERM; 956 } 957 if (new_egid != old_egid) { 958 current->mm->dumpable = suid_dumpable; 959 smp_wmb(); 960 } 961 if (rgid != (gid_t) -1 || 962 (egid != (gid_t) -1 && egid != old_rgid)) 963 current->sgid = new_egid; 964 current->fsgid = new_egid; 965 current->egid = new_egid; 966 current->gid = new_rgid; 967 key_fsgid_changed(current); 968 proc_id_connector(current, PROC_EVENT_GID); 969 return 0; 970 } 971 972 /* 973 * setgid() is implemented like SysV w/ SAVED_IDS 974 * 975 * SMP: Same implicit races as above. 976 */ 977 asmlinkage long sys_setgid(gid_t gid) 978 { 979 int old_egid = current->egid; 980 int retval; 981 982 retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); 983 if (retval) 984 return retval; 985 986 if (capable(CAP_SETGID)) { 987 if (old_egid != gid) { 988 current->mm->dumpable = suid_dumpable; 989 smp_wmb(); 990 } 991 current->gid = current->egid = current->sgid = current->fsgid = gid; 992 } else if ((gid == current->gid) || (gid == current->sgid)) { 993 if (old_egid != gid) { 994 current->mm->dumpable = suid_dumpable; 995 smp_wmb(); 996 } 997 current->egid = current->fsgid = gid; 998 } 999 else 1000 return -EPERM; 1001 1002 key_fsgid_changed(current); 1003 proc_id_connector(current, PROC_EVENT_GID); 1004 return 0; 1005 } 1006 1007 static int set_user(uid_t new_ruid, int dumpclear) 1008 { 1009 struct user_struct *new_user; 1010 1011 new_user = alloc_uid(new_ruid); 1012 if (!new_user) 1013 return -EAGAIN; 1014 1015 if (atomic_read(&new_user->processes) >= 1016 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 1017 new_user != &root_user) { 1018 free_uid(new_user); 1019 return -EAGAIN; 1020 } 1021 1022 switch_uid(new_user); 1023 1024 if (dumpclear) { 1025 current->mm->dumpable = suid_dumpable; 1026 smp_wmb(); 1027 } 1028 current->uid = new_ruid; 1029 return 0; 1030 } 1031 1032 /* 1033 * Unprivileged users may change the real uid to the effective uid 1034 * or vice versa. (BSD-style) 1035 * 1036 * If you set the real uid at all, or set the effective uid to a value not 1037 * equal to the real uid, then the saved uid is set to the new effective uid. 1038 * 1039 * This makes it possible for a setuid program to completely drop its 1040 * privileges, which is often a useful assertion to make when you are doing 1041 * a security audit over a program. 1042 * 1043 * The general idea is that a program which uses just setreuid() will be 1044 * 100% compatible with BSD. A program which uses just setuid() will be 1045 * 100% compatible with POSIX with saved IDs. 1046 */ 1047 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) 1048 { 1049 int old_ruid, old_euid, old_suid, new_ruid, new_euid; 1050 int retval; 1051 1052 retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE); 1053 if (retval) 1054 return retval; 1055 1056 new_ruid = old_ruid = current->uid; 1057 new_euid = old_euid = current->euid; 1058 old_suid = current->suid; 1059 1060 if (ruid != (uid_t) -1) { 1061 new_ruid = ruid; 1062 if ((old_ruid != ruid) && 1063 (current->euid != ruid) && 1064 !capable(CAP_SETUID)) 1065 return -EPERM; 1066 } 1067 1068 if (euid != (uid_t) -1) { 1069 new_euid = euid; 1070 if ((old_ruid != euid) && 1071 (current->euid != euid) && 1072 (current->suid != euid) && 1073 !capable(CAP_SETUID)) 1074 return -EPERM; 1075 } 1076 1077 if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) 1078 return -EAGAIN; 1079 1080 if (new_euid != old_euid) { 1081 current->mm->dumpable = suid_dumpable; 1082 smp_wmb(); 1083 } 1084 current->fsuid = current->euid = new_euid; 1085 if (ruid != (uid_t) -1 || 1086 (euid != (uid_t) -1 && euid != old_ruid)) 1087 current->suid = current->euid; 1088 current->fsuid = current->euid; 1089 1090 key_fsuid_changed(current); 1091 proc_id_connector(current, PROC_EVENT_UID); 1092 1093 return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); 1094 } 1095 1096 1097 1098 /* 1099 * setuid() is implemented like SysV with SAVED_IDS 1100 * 1101 * Note that SAVED_ID's is deficient in that a setuid root program 1102 * like sendmail, for example, cannot set its uid to be a normal 1103 * user and then switch back, because if you're root, setuid() sets 1104 * the saved uid too. If you don't like this, blame the bright people 1105 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 1106 * will allow a root program to temporarily drop privileges and be able to 1107 * regain them by swapping the real and effective uid. 1108 */ 1109 asmlinkage long sys_setuid(uid_t uid) 1110 { 1111 int old_euid = current->euid; 1112 int old_ruid, old_suid, new_suid; 1113 int retval; 1114 1115 retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); 1116 if (retval) 1117 return retval; 1118 1119 old_ruid = current->uid; 1120 old_suid = current->suid; 1121 new_suid = old_suid; 1122 1123 if (capable(CAP_SETUID)) { 1124 if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) 1125 return -EAGAIN; 1126 new_suid = uid; 1127 } else if ((uid != current->uid) && (uid != new_suid)) 1128 return -EPERM; 1129 1130 if (old_euid != uid) { 1131 current->mm->dumpable = suid_dumpable; 1132 smp_wmb(); 1133 } 1134 current->fsuid = current->euid = uid; 1135 current->suid = new_suid; 1136 1137 key_fsuid_changed(current); 1138 proc_id_connector(current, PROC_EVENT_UID); 1139 1140 return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); 1141 } 1142 1143 1144 /* 1145 * This function implements a generic ability to update ruid, euid, 1146 * and suid. This allows you to implement the 4.4 compatible seteuid(). 1147 */ 1148 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) 1149 { 1150 int old_ruid = current->uid; 1151 int old_euid = current->euid; 1152 int old_suid = current->suid; 1153 int retval; 1154 1155 retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); 1156 if (retval) 1157 return retval; 1158 1159 if (!capable(CAP_SETUID)) { 1160 if ((ruid != (uid_t) -1) && (ruid != current->uid) && 1161 (ruid != current->euid) && (ruid != current->suid)) 1162 return -EPERM; 1163 if ((euid != (uid_t) -1) && (euid != current->uid) && 1164 (euid != current->euid) && (euid != current->suid)) 1165 return -EPERM; 1166 if ((suid != (uid_t) -1) && (suid != current->uid) && 1167 (suid != current->euid) && (suid != current->suid)) 1168 return -EPERM; 1169 } 1170 if (ruid != (uid_t) -1) { 1171 if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) 1172 return -EAGAIN; 1173 } 1174 if (euid != (uid_t) -1) { 1175 if (euid != current->euid) { 1176 current->mm->dumpable = suid_dumpable; 1177 smp_wmb(); 1178 } 1179 current->euid = euid; 1180 } 1181 current->fsuid = current->euid; 1182 if (suid != (uid_t) -1) 1183 current->suid = suid; 1184 1185 key_fsuid_changed(current); 1186 proc_id_connector(current, PROC_EVENT_UID); 1187 1188 return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); 1189 } 1190 1191 asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) 1192 { 1193 int retval; 1194 1195 if (!(retval = put_user(current->uid, ruid)) && 1196 !(retval = put_user(current->euid, euid))) 1197 retval = put_user(current->suid, suid); 1198 1199 return retval; 1200 } 1201 1202 /* 1203 * Same as above, but for rgid, egid, sgid. 1204 */ 1205 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) 1206 { 1207 int retval; 1208 1209 retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); 1210 if (retval) 1211 return retval; 1212 1213 if (!capable(CAP_SETGID)) { 1214 if ((rgid != (gid_t) -1) && (rgid != current->gid) && 1215 (rgid != current->egid) && (rgid != current->sgid)) 1216 return -EPERM; 1217 if ((egid != (gid_t) -1) && (egid != current->gid) && 1218 (egid != current->egid) && (egid != current->sgid)) 1219 return -EPERM; 1220 if ((sgid != (gid_t) -1) && (sgid != current->gid) && 1221 (sgid != current->egid) && (sgid != current->sgid)) 1222 return -EPERM; 1223 } 1224 if (egid != (gid_t) -1) { 1225 if (egid != current->egid) { 1226 current->mm->dumpable = suid_dumpable; 1227 smp_wmb(); 1228 } 1229 current->egid = egid; 1230 } 1231 current->fsgid = current->egid; 1232 if (rgid != (gid_t) -1) 1233 current->gid = rgid; 1234 if (sgid != (gid_t) -1) 1235 current->sgid = sgid; 1236 1237 key_fsgid_changed(current); 1238 proc_id_connector(current, PROC_EVENT_GID); 1239 return 0; 1240 } 1241 1242 asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) 1243 { 1244 int retval; 1245 1246 if (!(retval = put_user(current->gid, rgid)) && 1247 !(retval = put_user(current->egid, egid))) 1248 retval = put_user(current->sgid, sgid); 1249 1250 return retval; 1251 } 1252 1253 1254 /* 1255 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 1256 * is used for "access()" and for the NFS daemon (letting nfsd stay at 1257 * whatever uid it wants to). It normally shadows "euid", except when 1258 * explicitly set by setfsuid() or for access.. 1259 */ 1260 asmlinkage long sys_setfsuid(uid_t uid) 1261 { 1262 int old_fsuid; 1263 1264 old_fsuid = current->fsuid; 1265 if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) 1266 return old_fsuid; 1267 1268 if (uid == current->uid || uid == current->euid || 1269 uid == current->suid || uid == current->fsuid || 1270 capable(CAP_SETUID)) { 1271 if (uid != old_fsuid) { 1272 current->mm->dumpable = suid_dumpable; 1273 smp_wmb(); 1274 } 1275 current->fsuid = uid; 1276 } 1277 1278 key_fsuid_changed(current); 1279 proc_id_connector(current, PROC_EVENT_UID); 1280 1281 security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); 1282 1283 return old_fsuid; 1284 } 1285 1286 /* 1287 * Samma p� svenska.. 1288 */ 1289 asmlinkage long sys_setfsgid(gid_t gid) 1290 { 1291 int old_fsgid; 1292 1293 old_fsgid = current->fsgid; 1294 if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) 1295 return old_fsgid; 1296 1297 if (gid == current->gid || gid == current->egid || 1298 gid == current->sgid || gid == current->fsgid || 1299 capable(CAP_SETGID)) { 1300 if (gid != old_fsgid) { 1301 current->mm->dumpable = suid_dumpable; 1302 smp_wmb(); 1303 } 1304 current->fsgid = gid; 1305 key_fsgid_changed(current); 1306 proc_id_connector(current, PROC_EVENT_GID); 1307 } 1308 return old_fsgid; 1309 } 1310 1311 asmlinkage long sys_times(struct tms __user * tbuf) 1312 { 1313 /* 1314 * In the SMP world we might just be unlucky and have one of 1315 * the times increment as we use it. Since the value is an 1316 * atomically safe type this is just fine. Conceptually its 1317 * as if the syscall took an instant longer to occur. 1318 */ 1319 if (tbuf) { 1320 struct tms tmp; 1321 struct task_struct *tsk = current; 1322 struct task_struct *t; 1323 cputime_t utime, stime, cutime, cstime; 1324 1325 spin_lock_irq(&tsk->sighand->siglock); 1326 utime = tsk->signal->utime; 1327 stime = tsk->signal->stime; 1328 t = tsk; 1329 do { 1330 utime = cputime_add(utime, t->utime); 1331 stime = cputime_add(stime, t->stime); 1332 t = next_thread(t); 1333 } while (t != tsk); 1334 1335 cutime = tsk->signal->cutime; 1336 cstime = tsk->signal->cstime; 1337 spin_unlock_irq(&tsk->sighand->siglock); 1338 1339 tmp.tms_utime = cputime_to_clock_t(utime); 1340 tmp.tms_stime = cputime_to_clock_t(stime); 1341 tmp.tms_cutime = cputime_to_clock_t(cutime); 1342 tmp.tms_cstime = cputime_to_clock_t(cstime); 1343 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1344 return -EFAULT; 1345 } 1346 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1347 } 1348 1349 /* 1350 * This needs some heavy checking ... 1351 * I just haven't the stomach for it. I also don't fully 1352 * understand sessions/pgrp etc. Let somebody who does explain it. 1353 * 1354 * OK, I think I have the protection semantics right.... this is really 1355 * only important on a multi-user system anyway, to make sure one user 1356 * can't send a signal to a process owned by another. -TYT, 12/12/91 1357 * 1358 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 1359 * LBT 04.03.94 1360 */ 1361 1362 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) 1363 { 1364 struct task_struct *p; 1365 struct task_struct *group_leader = current->group_leader; 1366 int err = -EINVAL; 1367 1368 if (!pid) 1369 pid = group_leader->pid; 1370 if (!pgid) 1371 pgid = pid; 1372 if (pgid < 0) 1373 return -EINVAL; 1374 1375 /* From this point forward we keep holding onto the tasklist lock 1376 * so that our parent does not change from under us. -DaveM 1377 */ 1378 write_lock_irq(&tasklist_lock); 1379 1380 err = -ESRCH; 1381 p = find_task_by_pid(pid); 1382 if (!p) 1383 goto out; 1384 1385 err = -EINVAL; 1386 if (!thread_group_leader(p)) 1387 goto out; 1388 1389 if (p->real_parent == group_leader) { 1390 err = -EPERM; 1391 if (process_session(p) != process_session(group_leader)) 1392 goto out; 1393 err = -EACCES; 1394 if (p->did_exec) 1395 goto out; 1396 } else { 1397 err = -ESRCH; 1398 if (p != group_leader) 1399 goto out; 1400 } 1401 1402 err = -EPERM; 1403 if (p->signal->leader) 1404 goto out; 1405 1406 if (pgid != pid) { 1407 struct task_struct *g = 1408 find_task_by_pid_type(PIDTYPE_PGID, pgid); 1409 1410 if (!g || process_session(g) != process_session(group_leader)) 1411 goto out; 1412 } 1413 1414 err = security_task_setpgid(p, pgid); 1415 if (err) 1416 goto out; 1417 1418 if (process_group(p) != pgid) { 1419 detach_pid(p, PIDTYPE_PGID); 1420 p->signal->pgrp = pgid; 1421 attach_pid(p, PIDTYPE_PGID, pgid); 1422 } 1423 1424 err = 0; 1425 out: 1426 /* All paths lead to here, thus we are safe. -DaveM */ 1427 write_unlock_irq(&tasklist_lock); 1428 return err; 1429 } 1430 1431 asmlinkage long sys_getpgid(pid_t pid) 1432 { 1433 if (!pid) 1434 return process_group(current); 1435 else { 1436 int retval; 1437 struct task_struct *p; 1438 1439 read_lock(&tasklist_lock); 1440 p = find_task_by_pid(pid); 1441 1442 retval = -ESRCH; 1443 if (p) { 1444 retval = security_task_getpgid(p); 1445 if (!retval) 1446 retval = process_group(p); 1447 } 1448 read_unlock(&tasklist_lock); 1449 return retval; 1450 } 1451 } 1452 1453 #ifdef __ARCH_WANT_SYS_GETPGRP 1454 1455 asmlinkage long sys_getpgrp(void) 1456 { 1457 /* SMP - assuming writes are word atomic this is fine */ 1458 return process_group(current); 1459 } 1460 1461 #endif 1462 1463 asmlinkage long sys_getsid(pid_t pid) 1464 { 1465 if (!pid) 1466 return process_session(current); 1467 else { 1468 int retval; 1469 struct task_struct *p; 1470 1471 read_lock(&tasklist_lock); 1472 p = find_task_by_pid(pid); 1473 1474 retval = -ESRCH; 1475 if (p) { 1476 retval = security_task_getsid(p); 1477 if (!retval) 1478 retval = process_session(p); 1479 } 1480 read_unlock(&tasklist_lock); 1481 return retval; 1482 } 1483 } 1484 1485 asmlinkage long sys_setsid(void) 1486 { 1487 struct task_struct *group_leader = current->group_leader; 1488 pid_t session; 1489 int err = -EPERM; 1490 1491 write_lock_irq(&tasklist_lock); 1492 1493 /* Fail if I am already a session leader */ 1494 if (group_leader->signal->leader) 1495 goto out; 1496 1497 session = group_leader->pid; 1498 /* Fail if a process group id already exists that equals the 1499 * proposed session id. 1500 * 1501 * Don't check if session id == 1 because kernel threads use this 1502 * session id and so the check will always fail and make it so 1503 * init cannot successfully call setsid. 1504 */ 1505 if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session)) 1506 goto out; 1507 1508 group_leader->signal->leader = 1; 1509 __set_special_pids(session, session); 1510 1511 spin_lock(&group_leader->sighand->siglock); 1512 group_leader->signal->tty = NULL; 1513 group_leader->signal->tty_old_pgrp = 0; 1514 spin_unlock(&group_leader->sighand->siglock); 1515 1516 err = process_group(group_leader); 1517 out: 1518 write_unlock_irq(&tasklist_lock); 1519 return err; 1520 } 1521 1522 /* 1523 * Supplementary group IDs 1524 */ 1525 1526 /* init to 2 - one for init_task, one to ensure it is never freed */ 1527 struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; 1528 1529 struct group_info *groups_alloc(int gidsetsize) 1530 { 1531 struct group_info *group_info; 1532 int nblocks; 1533 int i; 1534 1535 nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK; 1536 /* Make sure we always allocate at least one indirect block pointer */ 1537 nblocks = nblocks ? : 1; 1538 group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER); 1539 if (!group_info) 1540 return NULL; 1541 group_info->ngroups = gidsetsize; 1542 group_info->nblocks = nblocks; 1543 atomic_set(&group_info->usage, 1); 1544 1545 if (gidsetsize <= NGROUPS_SMALL) 1546 group_info->blocks[0] = group_info->small_block; 1547 else { 1548 for (i = 0; i < nblocks; i++) { 1549 gid_t *b; 1550 b = (void *)__get_free_page(GFP_USER); 1551 if (!b) 1552 goto out_undo_partial_alloc; 1553 group_info->blocks[i] = b; 1554 } 1555 } 1556 return group_info; 1557 1558 out_undo_partial_alloc: 1559 while (--i >= 0) { 1560 free_page((unsigned long)group_info->blocks[i]); 1561 } 1562 kfree(group_info); 1563 return NULL; 1564 } 1565 1566 EXPORT_SYMBOL(groups_alloc); 1567 1568 void groups_free(struct group_info *group_info) 1569 { 1570 if (group_info->blocks[0] != group_info->small_block) { 1571 int i; 1572 for (i = 0; i < group_info->nblocks; i++) 1573 free_page((unsigned long)group_info->blocks[i]); 1574 } 1575 kfree(group_info); 1576 } 1577 1578 EXPORT_SYMBOL(groups_free); 1579 1580 /* export the group_info to a user-space array */ 1581 static int groups_to_user(gid_t __user *grouplist, 1582 struct group_info *group_info) 1583 { 1584 int i; 1585 int count = group_info->ngroups; 1586 1587 for (i = 0; i < group_info->nblocks; i++) { 1588 int cp_count = min(NGROUPS_PER_BLOCK, count); 1589 int off = i * NGROUPS_PER_BLOCK; 1590 int len = cp_count * sizeof(*grouplist); 1591 1592 if (copy_to_user(grouplist+off, group_info->blocks[i], len)) 1593 return -EFAULT; 1594 1595 count -= cp_count; 1596 } 1597 return 0; 1598 } 1599 1600 /* fill a group_info from a user-space array - it must be allocated already */ 1601 static int groups_from_user(struct group_info *group_info, 1602 gid_t __user *grouplist) 1603 { 1604 int i; 1605 int count = group_info->ngroups; 1606 1607 for (i = 0; i < group_info->nblocks; i++) { 1608 int cp_count = min(NGROUPS_PER_BLOCK, count); 1609 int off = i * NGROUPS_PER_BLOCK; 1610 int len = cp_count * sizeof(*grouplist); 1611 1612 if (copy_from_user(group_info->blocks[i], grouplist+off, len)) 1613 return -EFAULT; 1614 1615 count -= cp_count; 1616 } 1617 return 0; 1618 } 1619 1620 /* a simple Shell sort */ 1621 static void groups_sort(struct group_info *group_info) 1622 { 1623 int base, max, stride; 1624 int gidsetsize = group_info->ngroups; 1625 1626 for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1) 1627 ; /* nothing */ 1628 stride /= 3; 1629 1630 while (stride) { 1631 max = gidsetsize - stride; 1632 for (base = 0; base < max; base++) { 1633 int left = base; 1634 int right = left + stride; 1635 gid_t tmp = GROUP_AT(group_info, right); 1636 1637 while (left >= 0 && GROUP_AT(group_info, left) > tmp) { 1638 GROUP_AT(group_info, right) = 1639 GROUP_AT(group_info, left); 1640 right = left; 1641 left -= stride; 1642 } 1643 GROUP_AT(group_info, right) = tmp; 1644 } 1645 stride /= 3; 1646 } 1647 } 1648 1649 /* a simple bsearch */ 1650 int groups_search(struct group_info *group_info, gid_t grp) 1651 { 1652 unsigned int left, right; 1653 1654 if (!group_info) 1655 return 0; 1656 1657 left = 0; 1658 right = group_info->ngroups; 1659 while (left < right) { 1660 unsigned int mid = (left+right)/2; 1661 int cmp = grp - GROUP_AT(group_info, mid); 1662 if (cmp > 0) 1663 left = mid + 1; 1664 else if (cmp < 0) 1665 right = mid; 1666 else 1667 return 1; 1668 } 1669 return 0; 1670 } 1671 1672 /* validate and set current->group_info */ 1673 int set_current_groups(struct group_info *group_info) 1674 { 1675 int retval; 1676 struct group_info *old_info; 1677 1678 retval = security_task_setgroups(group_info); 1679 if (retval) 1680 return retval; 1681 1682 groups_sort(group_info); 1683 get_group_info(group_info); 1684 1685 task_lock(current); 1686 old_info = current->group_info; 1687 current->group_info = group_info; 1688 task_unlock(current); 1689 1690 put_group_info(old_info); 1691 1692 return 0; 1693 } 1694 1695 EXPORT_SYMBOL(set_current_groups); 1696 1697 asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) 1698 { 1699 int i = 0; 1700 1701 /* 1702 * SMP: Nobody else can change our grouplist. Thus we are 1703 * safe. 1704 */ 1705 1706 if (gidsetsize < 0) 1707 return -EINVAL; 1708 1709 /* no need to grab task_lock here; it cannot change */ 1710 i = current->group_info->ngroups; 1711 if (gidsetsize) { 1712 if (i > gidsetsize) { 1713 i = -EINVAL; 1714 goto out; 1715 } 1716 if (groups_to_user(grouplist, current->group_info)) { 1717 i = -EFAULT; 1718 goto out; 1719 } 1720 } 1721 out: 1722 return i; 1723 } 1724 1725 /* 1726 * SMP: Our groups are copy-on-write. We can set them safely 1727 * without another task interfering. 1728 */ 1729 1730 asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) 1731 { 1732 struct group_info *group_info; 1733 int retval; 1734 1735 if (!capable(CAP_SETGID)) 1736 return -EPERM; 1737 if ((unsigned)gidsetsize > NGROUPS_MAX) 1738 return -EINVAL; 1739 1740 group_info = groups_alloc(gidsetsize); 1741 if (!group_info) 1742 return -ENOMEM; 1743 retval = groups_from_user(group_info, grouplist); 1744 if (retval) { 1745 put_group_info(group_info); 1746 return retval; 1747 } 1748 1749 retval = set_current_groups(group_info); 1750 put_group_info(group_info); 1751 1752 return retval; 1753 } 1754 1755 /* 1756 * Check whether we're fsgid/egid or in the supplemental group.. 1757 */ 1758 int in_group_p(gid_t grp) 1759 { 1760 int retval = 1; 1761 if (grp != current->fsgid) 1762 retval = groups_search(current->group_info, grp); 1763 return retval; 1764 } 1765 1766 EXPORT_SYMBOL(in_group_p); 1767 1768 int in_egroup_p(gid_t grp) 1769 { 1770 int retval = 1; 1771 if (grp != current->egid) 1772 retval = groups_search(current->group_info, grp); 1773 return retval; 1774 } 1775 1776 EXPORT_SYMBOL(in_egroup_p); 1777 1778 DECLARE_RWSEM(uts_sem); 1779 1780 EXPORT_SYMBOL(uts_sem); 1781 1782 asmlinkage long sys_newuname(struct new_utsname __user * name) 1783 { 1784 int errno = 0; 1785 1786 down_read(&uts_sem); 1787 if (copy_to_user(name, utsname(), sizeof *name)) 1788 errno = -EFAULT; 1789 up_read(&uts_sem); 1790 return errno; 1791 } 1792 1793 asmlinkage long sys_sethostname(char __user *name, int len) 1794 { 1795 int errno; 1796 char tmp[__NEW_UTS_LEN]; 1797 1798 if (!capable(CAP_SYS_ADMIN)) 1799 return -EPERM; 1800 if (len < 0 || len > __NEW_UTS_LEN) 1801 return -EINVAL; 1802 down_write(&uts_sem); 1803 errno = -EFAULT; 1804 if (!copy_from_user(tmp, name, len)) { 1805 memcpy(utsname()->nodename, tmp, len); 1806 utsname()->nodename[len] = 0; 1807 errno = 0; 1808 } 1809 up_write(&uts_sem); 1810 return errno; 1811 } 1812 1813 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1814 1815 asmlinkage long sys_gethostname(char __user *name, int len) 1816 { 1817 int i, errno; 1818 1819 if (len < 0) 1820 return -EINVAL; 1821 down_read(&uts_sem); 1822 i = 1 + strlen(utsname()->nodename); 1823 if (i > len) 1824 i = len; 1825 errno = 0; 1826 if (copy_to_user(name, utsname()->nodename, i)) 1827 errno = -EFAULT; 1828 up_read(&uts_sem); 1829 return errno; 1830 } 1831 1832 #endif 1833 1834 /* 1835 * Only setdomainname; getdomainname can be implemented by calling 1836 * uname() 1837 */ 1838 asmlinkage long sys_setdomainname(char __user *name, int len) 1839 { 1840 int errno; 1841 char tmp[__NEW_UTS_LEN]; 1842 1843 if (!capable(CAP_SYS_ADMIN)) 1844 return -EPERM; 1845 if (len < 0 || len > __NEW_UTS_LEN) 1846 return -EINVAL; 1847 1848 down_write(&uts_sem); 1849 errno = -EFAULT; 1850 if (!copy_from_user(tmp, name, len)) { 1851 memcpy(utsname()->domainname, tmp, len); 1852 utsname()->domainname[len] = 0; 1853 errno = 0; 1854 } 1855 up_write(&uts_sem); 1856 return errno; 1857 } 1858 1859 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1860 { 1861 if (resource >= RLIM_NLIMITS) 1862 return -EINVAL; 1863 else { 1864 struct rlimit value; 1865 task_lock(current->group_leader); 1866 value = current->signal->rlim[resource]; 1867 task_unlock(current->group_leader); 1868 return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1869 } 1870 } 1871 1872 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1873 1874 /* 1875 * Back compatibility for getrlimit. Needed for some apps. 1876 */ 1877 1878 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1879 { 1880 struct rlimit x; 1881 if (resource >= RLIM_NLIMITS) 1882 return -EINVAL; 1883 1884 task_lock(current->group_leader); 1885 x = current->signal->rlim[resource]; 1886 task_unlock(current->group_leader); 1887 if (x.rlim_cur > 0x7FFFFFFF) 1888 x.rlim_cur = 0x7FFFFFFF; 1889 if (x.rlim_max > 0x7FFFFFFF) 1890 x.rlim_max = 0x7FFFFFFF; 1891 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1892 } 1893 1894 #endif 1895 1896 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) 1897 { 1898 struct rlimit new_rlim, *old_rlim; 1899 unsigned long it_prof_secs; 1900 int retval; 1901 1902 if (resource >= RLIM_NLIMITS) 1903 return -EINVAL; 1904 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1905 return -EFAULT; 1906 if (new_rlim.rlim_cur > new_rlim.rlim_max) 1907 return -EINVAL; 1908 old_rlim = current->signal->rlim + resource; 1909 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1910 !capable(CAP_SYS_RESOURCE)) 1911 return -EPERM; 1912 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) 1913 return -EPERM; 1914 1915 retval = security_task_setrlimit(resource, &new_rlim); 1916 if (retval) 1917 return retval; 1918 1919 task_lock(current->group_leader); 1920 *old_rlim = new_rlim; 1921 task_unlock(current->group_leader); 1922 1923 if (resource != RLIMIT_CPU) 1924 goto out; 1925 1926 /* 1927 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1928 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1929 * very long-standing error, and fixing it now risks breakage of 1930 * applications, so we live with it 1931 */ 1932 if (new_rlim.rlim_cur == RLIM_INFINITY) 1933 goto out; 1934 1935 it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); 1936 if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { 1937 unsigned long rlim_cur = new_rlim.rlim_cur; 1938 cputime_t cputime; 1939 1940 if (rlim_cur == 0) { 1941 /* 1942 * The caller is asking for an immediate RLIMIT_CPU 1943 * expiry. But we use the zero value to mean "it was 1944 * never set". So let's cheat and make it one second 1945 * instead 1946 */ 1947 rlim_cur = 1; 1948 } 1949 cputime = secs_to_cputime(rlim_cur); 1950 read_lock(&tasklist_lock); 1951 spin_lock_irq(¤t->sighand->siglock); 1952 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); 1953 spin_unlock_irq(¤t->sighand->siglock); 1954 read_unlock(&tasklist_lock); 1955 } 1956 out: 1957 return 0; 1958 } 1959 1960 /* 1961 * It would make sense to put struct rusage in the task_struct, 1962 * except that would make the task_struct be *really big*. After 1963 * task_struct gets moved into malloc'ed memory, it would 1964 * make sense to do this. It will make moving the rest of the information 1965 * a lot simpler! (Which we're not doing right now because we're not 1966 * measuring them yet). 1967 * 1968 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1969 * races with threads incrementing their own counters. But since word 1970 * reads are atomic, we either get new values or old values and we don't 1971 * care which for the sums. We always take the siglock to protect reading 1972 * the c* fields from p->signal from races with exit.c updating those 1973 * fields when reaping, so a sample either gets all the additions of a 1974 * given child after it's reaped, or none so this sample is before reaping. 1975 * 1976 * Locking: 1977 * We need to take the siglock for CHILDEREN, SELF and BOTH 1978 * for the cases current multithreaded, non-current single threaded 1979 * non-current multithreaded. Thread traversal is now safe with 1980 * the siglock held. 1981 * Strictly speaking, we donot need to take the siglock if we are current and 1982 * single threaded, as no one else can take our signal_struct away, no one 1983 * else can reap the children to update signal->c* counters, and no one else 1984 * can race with the signal-> fields. If we do not take any lock, the 1985 * signal-> fields could be read out of order while another thread was just 1986 * exiting. So we should place a read memory barrier when we avoid the lock. 1987 * On the writer side, write memory barrier is implied in __exit_signal 1988 * as __exit_signal releases the siglock spinlock after updating the signal-> 1989 * fields. But we don't do this yet to keep things simple. 1990 * 1991 */ 1992 1993 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1994 { 1995 struct task_struct *t; 1996 unsigned long flags; 1997 cputime_t utime, stime; 1998 1999 memset((char *) r, 0, sizeof *r); 2000 utime = stime = cputime_zero; 2001 2002 rcu_read_lock(); 2003 if (!lock_task_sighand(p, &flags)) { 2004 rcu_read_unlock(); 2005 return; 2006 } 2007 2008 switch (who) { 2009 case RUSAGE_BOTH: 2010 case RUSAGE_CHILDREN: 2011 utime = p->signal->cutime; 2012 stime = p->signal->cstime; 2013 r->ru_nvcsw = p->signal->cnvcsw; 2014 r->ru_nivcsw = p->signal->cnivcsw; 2015 r->ru_minflt = p->signal->cmin_flt; 2016 r->ru_majflt = p->signal->cmaj_flt; 2017 2018 if (who == RUSAGE_CHILDREN) 2019 break; 2020 2021 case RUSAGE_SELF: 2022 utime = cputime_add(utime, p->signal->utime); 2023 stime = cputime_add(stime, p->signal->stime); 2024 r->ru_nvcsw += p->signal->nvcsw; 2025 r->ru_nivcsw += p->signal->nivcsw; 2026 r->ru_minflt += p->signal->min_flt; 2027 r->ru_majflt += p->signal->maj_flt; 2028 t = p; 2029 do { 2030 utime = cputime_add(utime, t->utime); 2031 stime = cputime_add(stime, t->stime); 2032 r->ru_nvcsw += t->nvcsw; 2033 r->ru_nivcsw += t->nivcsw; 2034 r->ru_minflt += t->min_flt; 2035 r->ru_majflt += t->maj_flt; 2036 t = next_thread(t); 2037 } while (t != p); 2038 break; 2039 2040 default: 2041 BUG(); 2042 } 2043 2044 unlock_task_sighand(p, &flags); 2045 rcu_read_unlock(); 2046 2047 cputime_to_timeval(utime, &r->ru_utime); 2048 cputime_to_timeval(stime, &r->ru_stime); 2049 } 2050 2051 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 2052 { 2053 struct rusage r; 2054 k_getrusage(p, who, &r); 2055 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 2056 } 2057 2058 asmlinkage long sys_getrusage(int who, struct rusage __user *ru) 2059 { 2060 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) 2061 return -EINVAL; 2062 return getrusage(current, who, ru); 2063 } 2064 2065 asmlinkage long sys_umask(int mask) 2066 { 2067 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 2068 return mask; 2069 } 2070 2071 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, 2072 unsigned long arg4, unsigned long arg5) 2073 { 2074 long error; 2075 2076 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2077 if (error) 2078 return error; 2079 2080 switch (option) { 2081 case PR_SET_PDEATHSIG: 2082 if (!valid_signal(arg2)) { 2083 error = -EINVAL; 2084 break; 2085 } 2086 current->pdeath_signal = arg2; 2087 break; 2088 case PR_GET_PDEATHSIG: 2089 error = put_user(current->pdeath_signal, (int __user *)arg2); 2090 break; 2091 case PR_GET_DUMPABLE: 2092 error = current->mm->dumpable; 2093 break; 2094 case PR_SET_DUMPABLE: 2095 if (arg2 < 0 || arg2 > 1) { 2096 error = -EINVAL; 2097 break; 2098 } 2099 current->mm->dumpable = arg2; 2100 break; 2101 2102 case PR_SET_UNALIGN: 2103 error = SET_UNALIGN_CTL(current, arg2); 2104 break; 2105 case PR_GET_UNALIGN: 2106 error = GET_UNALIGN_CTL(current, arg2); 2107 break; 2108 case PR_SET_FPEMU: 2109 error = SET_FPEMU_CTL(current, arg2); 2110 break; 2111 case PR_GET_FPEMU: 2112 error = GET_FPEMU_CTL(current, arg2); 2113 break; 2114 case PR_SET_FPEXC: 2115 error = SET_FPEXC_CTL(current, arg2); 2116 break; 2117 case PR_GET_FPEXC: 2118 error = GET_FPEXC_CTL(current, arg2); 2119 break; 2120 case PR_GET_TIMING: 2121 error = PR_TIMING_STATISTICAL; 2122 break; 2123 case PR_SET_TIMING: 2124 if (arg2 == PR_TIMING_STATISTICAL) 2125 error = 0; 2126 else 2127 error = -EINVAL; 2128 break; 2129 2130 case PR_GET_KEEPCAPS: 2131 if (current->keep_capabilities) 2132 error = 1; 2133 break; 2134 case PR_SET_KEEPCAPS: 2135 if (arg2 != 0 && arg2 != 1) { 2136 error = -EINVAL; 2137 break; 2138 } 2139 current->keep_capabilities = arg2; 2140 break; 2141 case PR_SET_NAME: { 2142 struct task_struct *me = current; 2143 unsigned char ncomm[sizeof(me->comm)]; 2144 2145 ncomm[sizeof(me->comm)-1] = 0; 2146 if (strncpy_from_user(ncomm, (char __user *)arg2, 2147 sizeof(me->comm)-1) < 0) 2148 return -EFAULT; 2149 set_task_comm(me, ncomm); 2150 return 0; 2151 } 2152 case PR_GET_NAME: { 2153 struct task_struct *me = current; 2154 unsigned char tcomm[sizeof(me->comm)]; 2155 2156 get_task_comm(tcomm, me); 2157 if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm))) 2158 return -EFAULT; 2159 return 0; 2160 } 2161 case PR_GET_ENDIAN: 2162 error = GET_ENDIAN(current, arg2); 2163 break; 2164 case PR_SET_ENDIAN: 2165 error = SET_ENDIAN(current, arg2); 2166 break; 2167 2168 default: 2169 error = -EINVAL; 2170 break; 2171 } 2172 return error; 2173 } 2174 2175 asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, 2176 struct getcpu_cache __user *cache) 2177 { 2178 int err = 0; 2179 int cpu = raw_smp_processor_id(); 2180 if (cpup) 2181 err |= put_user(cpu, cpup); 2182 if (nodep) 2183 err |= put_user(cpu_to_node(cpu), nodep); 2184 if (cache) { 2185 /* 2186 * The cache is not needed for this implementation, 2187 * but make sure user programs pass something 2188 * valid. vsyscall implementations can instead make 2189 * good use of the cache. Only use t0 and t1 because 2190 * these are available in both 32bit and 64bit ABI (no 2191 * need for a compat_getcpu). 32bit has enough 2192 * padding 2193 */ 2194 unsigned long t0, t1; 2195 get_user(t0, &cache->blob[0]); 2196 get_user(t1, &cache->blob[1]); 2197 t0++; 2198 t1++; 2199 put_user(t0, &cache->blob[0]); 2200 put_user(t1, &cache->blob[1]); 2201 } 2202 return err ? -EFAULT : 0; 2203 } 2204