1 /* Common capabilities, needed by capability.o and root_plug.o 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 */ 9 10 #include <linux/capability.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/kernel.h> 14 #include <linux/security.h> 15 #include <linux/file.h> 16 #include <linux/mm.h> 17 #include <linux/mman.h> 18 #include <linux/pagemap.h> 19 #include <linux/swap.h> 20 #include <linux/skbuff.h> 21 #include <linux/netlink.h> 22 #include <linux/ptrace.h> 23 #include <linux/xattr.h> 24 #include <linux/hugetlb.h> 25 #include <linux/mount.h> 26 #include <linux/sched.h> 27 #include <linux/prctl.h> 28 #include <linux/securebits.h> 29 30 int cap_netlink_send(struct sock *sk, struct sk_buff *skb) 31 { 32 NETLINK_CB(skb).eff_cap = current->cap_effective; 33 return 0; 34 } 35 36 int cap_netlink_recv(struct sk_buff *skb, int cap) 37 { 38 if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) 39 return -EPERM; 40 return 0; 41 } 42 43 EXPORT_SYMBOL(cap_netlink_recv); 44 45 /* 46 * NOTE WELL: cap_capable() cannot be used like the kernel's capable() 47 * function. That is, it has the reverse semantics: cap_capable() 48 * returns 0 when a task has a capability, but the kernel's capable() 49 * returns 1 for this case. 50 */ 51 int cap_capable (struct task_struct *tsk, int cap) 52 { 53 /* Derived from include/linux/sched.h:capable. */ 54 if (cap_raised(tsk->cap_effective, cap)) 55 return 0; 56 return -EPERM; 57 } 58 59 int cap_settime(struct timespec *ts, struct timezone *tz) 60 { 61 if (!capable(CAP_SYS_TIME)) 62 return -EPERM; 63 return 0; 64 } 65 66 int cap_ptrace (struct task_struct *parent, struct task_struct *child, 67 unsigned int mode) 68 { 69 /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ 70 if (!cap_issubset(child->cap_permitted, parent->cap_permitted) && 71 !__capable(parent, CAP_SYS_PTRACE)) 72 return -EPERM; 73 return 0; 74 } 75 76 int cap_capget (struct task_struct *target, kernel_cap_t *effective, 77 kernel_cap_t *inheritable, kernel_cap_t *permitted) 78 { 79 /* Derived from kernel/capability.c:sys_capget. */ 80 *effective = target->cap_effective; 81 *inheritable = target->cap_inheritable; 82 *permitted = target->cap_permitted; 83 return 0; 84 } 85 86 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 87 88 static inline int cap_block_setpcap(struct task_struct *target) 89 { 90 /* 91 * No support for remote process capability manipulation with 92 * filesystem capability support. 93 */ 94 return (target != current); 95 } 96 97 static inline int cap_inh_is_capped(void) 98 { 99 /* 100 * Return 1 if changes to the inheritable set are limited 101 * to the old permitted set. That is, if the current task 102 * does *not* possess the CAP_SETPCAP capability. 103 */ 104 return (cap_capable(current, CAP_SETPCAP) != 0); 105 } 106 107 static inline int cap_limit_ptraced_target(void) { return 1; } 108 109 #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 110 111 static inline int cap_block_setpcap(struct task_struct *t) { return 0; } 112 static inline int cap_inh_is_capped(void) { return 1; } 113 static inline int cap_limit_ptraced_target(void) 114 { 115 return !capable(CAP_SETPCAP); 116 } 117 118 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 119 120 int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, 121 kernel_cap_t *inheritable, kernel_cap_t *permitted) 122 { 123 if (cap_block_setpcap(target)) { 124 return -EPERM; 125 } 126 if (cap_inh_is_capped() 127 && !cap_issubset(*inheritable, 128 cap_combine(target->cap_inheritable, 129 current->cap_permitted))) { 130 /* incapable of using this inheritable set */ 131 return -EPERM; 132 } 133 if (!cap_issubset(*inheritable, 134 cap_combine(target->cap_inheritable, 135 current->cap_bset))) { 136 /* no new pI capabilities outside bounding set */ 137 return -EPERM; 138 } 139 140 /* verify restrictions on target's new Permitted set */ 141 if (!cap_issubset (*permitted, 142 cap_combine (target->cap_permitted, 143 current->cap_permitted))) { 144 return -EPERM; 145 } 146 147 /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ 148 if (!cap_issubset (*effective, *permitted)) { 149 return -EPERM; 150 } 151 152 return 0; 153 } 154 155 void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, 156 kernel_cap_t *inheritable, kernel_cap_t *permitted) 157 { 158 target->cap_effective = *effective; 159 target->cap_inheritable = *inheritable; 160 target->cap_permitted = *permitted; 161 } 162 163 static inline void bprm_clear_caps(struct linux_binprm *bprm) 164 { 165 cap_clear(bprm->cap_inheritable); 166 cap_clear(bprm->cap_permitted); 167 bprm->cap_effective = false; 168 } 169 170 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 171 172 int cap_inode_need_killpriv(struct dentry *dentry) 173 { 174 struct inode *inode = dentry->d_inode; 175 int error; 176 177 if (!inode->i_op || !inode->i_op->getxattr) 178 return 0; 179 180 error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); 181 if (error <= 0) 182 return 0; 183 return 1; 184 } 185 186 int cap_inode_killpriv(struct dentry *dentry) 187 { 188 struct inode *inode = dentry->d_inode; 189 190 if (!inode->i_op || !inode->i_op->removexattr) 191 return 0; 192 193 return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); 194 } 195 196 static inline int cap_from_disk(struct vfs_cap_data *caps, 197 struct linux_binprm *bprm, unsigned size) 198 { 199 __u32 magic_etc; 200 unsigned tocopy, i; 201 202 if (size < sizeof(magic_etc)) 203 return -EINVAL; 204 205 magic_etc = le32_to_cpu(caps->magic_etc); 206 207 switch ((magic_etc & VFS_CAP_REVISION_MASK)) { 208 case VFS_CAP_REVISION_1: 209 if (size != XATTR_CAPS_SZ_1) 210 return -EINVAL; 211 tocopy = VFS_CAP_U32_1; 212 break; 213 case VFS_CAP_REVISION_2: 214 if (size != XATTR_CAPS_SZ_2) 215 return -EINVAL; 216 tocopy = VFS_CAP_U32_2; 217 break; 218 default: 219 return -EINVAL; 220 } 221 222 if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) { 223 bprm->cap_effective = true; 224 } else { 225 bprm->cap_effective = false; 226 } 227 228 for (i = 0; i < tocopy; ++i) { 229 bprm->cap_permitted.cap[i] = 230 le32_to_cpu(caps->data[i].permitted); 231 bprm->cap_inheritable.cap[i] = 232 le32_to_cpu(caps->data[i].inheritable); 233 } 234 while (i < VFS_CAP_U32) { 235 bprm->cap_permitted.cap[i] = 0; 236 bprm->cap_inheritable.cap[i] = 0; 237 i++; 238 } 239 240 return 0; 241 } 242 243 /* Locate any VFS capabilities: */ 244 static int get_file_caps(struct linux_binprm *bprm) 245 { 246 struct dentry *dentry; 247 int rc = 0; 248 struct vfs_cap_data vcaps; 249 struct inode *inode; 250 251 if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { 252 bprm_clear_caps(bprm); 253 return 0; 254 } 255 256 dentry = dget(bprm->file->f_dentry); 257 inode = dentry->d_inode; 258 if (!inode->i_op || !inode->i_op->getxattr) 259 goto out; 260 261 rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps, 262 XATTR_CAPS_SZ); 263 if (rc == -ENODATA || rc == -EOPNOTSUPP) { 264 /* no data, that's ok */ 265 rc = 0; 266 goto out; 267 } 268 if (rc < 0) 269 goto out; 270 271 rc = cap_from_disk(&vcaps, bprm, rc); 272 if (rc) 273 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", 274 __func__, rc, bprm->filename); 275 276 out: 277 dput(dentry); 278 if (rc) 279 bprm_clear_caps(bprm); 280 281 return rc; 282 } 283 284 #else 285 int cap_inode_need_killpriv(struct dentry *dentry) 286 { 287 return 0; 288 } 289 290 int cap_inode_killpriv(struct dentry *dentry) 291 { 292 return 0; 293 } 294 295 static inline int get_file_caps(struct linux_binprm *bprm) 296 { 297 bprm_clear_caps(bprm); 298 return 0; 299 } 300 #endif 301 302 int cap_bprm_set_security (struct linux_binprm *bprm) 303 { 304 int ret; 305 306 ret = get_file_caps(bprm); 307 if (ret) 308 printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n", 309 __func__, ret, bprm->filename); 310 311 /* To support inheritance of root-permissions and suid-root 312 * executables under compatibility mode, we raise all three 313 * capability sets for the file. 314 * 315 * If only the real uid is 0, we only raise the inheritable 316 * and permitted sets of the executable file. 317 */ 318 319 if (!issecure (SECURE_NOROOT)) { 320 if (bprm->e_uid == 0 || current->uid == 0) { 321 cap_set_full (bprm->cap_inheritable); 322 cap_set_full (bprm->cap_permitted); 323 } 324 if (bprm->e_uid == 0) 325 bprm->cap_effective = true; 326 } 327 328 return ret; 329 } 330 331 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) 332 { 333 /* Derived from fs/exec.c:compute_creds. */ 334 kernel_cap_t new_permitted, working; 335 336 new_permitted = cap_intersect(bprm->cap_permitted, 337 current->cap_bset); 338 working = cap_intersect(bprm->cap_inheritable, 339 current->cap_inheritable); 340 new_permitted = cap_combine(new_permitted, working); 341 342 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || 343 !cap_issubset (new_permitted, current->cap_permitted)) { 344 set_dumpable(current->mm, suid_dumpable); 345 current->pdeath_signal = 0; 346 347 if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { 348 if (!capable(CAP_SETUID)) { 349 bprm->e_uid = current->uid; 350 bprm->e_gid = current->gid; 351 } 352 if (cap_limit_ptraced_target()) { 353 new_permitted = 354 cap_intersect(new_permitted, 355 current->cap_permitted); 356 } 357 } 358 } 359 360 current->suid = current->euid = current->fsuid = bprm->e_uid; 361 current->sgid = current->egid = current->fsgid = bprm->e_gid; 362 363 /* For init, we want to retain the capabilities set 364 * in the init_task struct. Thus we skip the usual 365 * capability rules */ 366 if (!is_global_init(current)) { 367 current->cap_permitted = new_permitted; 368 if (bprm->cap_effective) 369 current->cap_effective = new_permitted; 370 else 371 cap_clear(current->cap_effective); 372 } 373 374 /* AUD: Audit candidate if current->cap_effective is set */ 375 376 current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); 377 } 378 379 int cap_bprm_secureexec (struct linux_binprm *bprm) 380 { 381 if (current->uid != 0) { 382 if (bprm->cap_effective) 383 return 1; 384 if (!cap_isclear(bprm->cap_permitted)) 385 return 1; 386 if (!cap_isclear(bprm->cap_inheritable)) 387 return 1; 388 } 389 390 return (current->euid != current->uid || 391 current->egid != current->gid); 392 } 393 394 int cap_inode_setxattr(struct dentry *dentry, const char *name, 395 const void *value, size_t size, int flags) 396 { 397 if (!strcmp(name, XATTR_NAME_CAPS)) { 398 if (!capable(CAP_SETFCAP)) 399 return -EPERM; 400 return 0; 401 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 402 sizeof(XATTR_SECURITY_PREFIX) - 1) && 403 !capable(CAP_SYS_ADMIN)) 404 return -EPERM; 405 return 0; 406 } 407 408 int cap_inode_removexattr(struct dentry *dentry, const char *name) 409 { 410 if (!strcmp(name, XATTR_NAME_CAPS)) { 411 if (!capable(CAP_SETFCAP)) 412 return -EPERM; 413 return 0; 414 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 415 sizeof(XATTR_SECURITY_PREFIX) - 1) && 416 !capable(CAP_SYS_ADMIN)) 417 return -EPERM; 418 return 0; 419 } 420 421 /* moved from kernel/sys.c. */ 422 /* 423 * cap_emulate_setxuid() fixes the effective / permitted capabilities of 424 * a process after a call to setuid, setreuid, or setresuid. 425 * 426 * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of 427 * {r,e,s}uid != 0, the permitted and effective capabilities are 428 * cleared. 429 * 430 * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective 431 * capabilities of the process are cleared. 432 * 433 * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective 434 * capabilities are set to the permitted capabilities. 435 * 436 * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 437 * never happen. 438 * 439 * -astor 440 * 441 * cevans - New behaviour, Oct '99 442 * A process may, via prctl(), elect to keep its capabilities when it 443 * calls setuid() and switches away from uid==0. Both permitted and 444 * effective sets will be retained. 445 * Without this change, it was impossible for a daemon to drop only some 446 * of its privilege. The call to setuid(!=0) would drop all privileges! 447 * Keeping uid 0 is not an option because uid 0 owns too many vital 448 * files.. 449 * Thanks to Olaf Kirch and Peter Benie for spotting this. 450 */ 451 static inline void cap_emulate_setxuid (int old_ruid, int old_euid, 452 int old_suid) 453 { 454 if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && 455 (current->uid != 0 && current->euid != 0 && current->suid != 0) && 456 !issecure(SECURE_KEEP_CAPS)) { 457 cap_clear (current->cap_permitted); 458 cap_clear (current->cap_effective); 459 } 460 if (old_euid == 0 && current->euid != 0) { 461 cap_clear (current->cap_effective); 462 } 463 if (old_euid != 0 && current->euid == 0) { 464 current->cap_effective = current->cap_permitted; 465 } 466 } 467 468 int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, 469 int flags) 470 { 471 switch (flags) { 472 case LSM_SETID_RE: 473 case LSM_SETID_ID: 474 case LSM_SETID_RES: 475 /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ 476 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 477 cap_emulate_setxuid (old_ruid, old_euid, old_suid); 478 } 479 break; 480 case LSM_SETID_FS: 481 { 482 uid_t old_fsuid = old_ruid; 483 484 /* Copied from kernel/sys.c:setfsuid. */ 485 486 /* 487 * FIXME - is fsuser used for all CAP_FS_MASK capabilities? 488 * if not, we might be a bit too harsh here. 489 */ 490 491 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 492 if (old_fsuid == 0 && current->fsuid != 0) { 493 current->cap_effective = 494 cap_drop_fs_set( 495 current->cap_effective); 496 } 497 if (old_fsuid != 0 && current->fsuid == 0) { 498 current->cap_effective = 499 cap_raise_fs_set( 500 current->cap_effective, 501 current->cap_permitted); 502 } 503 } 504 break; 505 } 506 default: 507 return -EINVAL; 508 } 509 510 return 0; 511 } 512 513 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 514 /* 515 * Rationale: code calling task_setscheduler, task_setioprio, and 516 * task_setnice, assumes that 517 * . if capable(cap_sys_nice), then those actions should be allowed 518 * . if not capable(cap_sys_nice), but acting on your own processes, 519 * then those actions should be allowed 520 * This is insufficient now since you can call code without suid, but 521 * yet with increased caps. 522 * So we check for increased caps on the target process. 523 */ 524 static inline int cap_safe_nice(struct task_struct *p) 525 { 526 if (!cap_issubset(p->cap_permitted, current->cap_permitted) && 527 !__capable(current, CAP_SYS_NICE)) 528 return -EPERM; 529 return 0; 530 } 531 532 int cap_task_setscheduler (struct task_struct *p, int policy, 533 struct sched_param *lp) 534 { 535 return cap_safe_nice(p); 536 } 537 538 int cap_task_setioprio (struct task_struct *p, int ioprio) 539 { 540 return cap_safe_nice(p); 541 } 542 543 int cap_task_setnice (struct task_struct *p, int nice) 544 { 545 return cap_safe_nice(p); 546 } 547 548 /* 549 * called from kernel/sys.c for prctl(PR_CABSET_DROP) 550 * done without task_capability_lock() because it introduces 551 * no new races - i.e. only another task doing capget() on 552 * this task could get inconsistent info. There can be no 553 * racing writer bc a task can only change its own caps. 554 */ 555 static long cap_prctl_drop(unsigned long cap) 556 { 557 if (!capable(CAP_SETPCAP)) 558 return -EPERM; 559 if (!cap_valid(cap)) 560 return -EINVAL; 561 cap_lower(current->cap_bset, cap); 562 return 0; 563 } 564 565 #else 566 int cap_task_setscheduler (struct task_struct *p, int policy, 567 struct sched_param *lp) 568 { 569 return 0; 570 } 571 int cap_task_setioprio (struct task_struct *p, int ioprio) 572 { 573 return 0; 574 } 575 int cap_task_setnice (struct task_struct *p, int nice) 576 { 577 return 0; 578 } 579 #endif 580 581 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, 582 unsigned long arg4, unsigned long arg5, long *rc_p) 583 { 584 long error = 0; 585 586 switch (option) { 587 case PR_CAPBSET_READ: 588 if (!cap_valid(arg2)) 589 error = -EINVAL; 590 else 591 error = !!cap_raised(current->cap_bset, arg2); 592 break; 593 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 594 case PR_CAPBSET_DROP: 595 error = cap_prctl_drop(arg2); 596 break; 597 598 /* 599 * The next four prctl's remain to assist with transitioning a 600 * system from legacy UID=0 based privilege (when filesystem 601 * capabilities are not in use) to a system using filesystem 602 * capabilities only - as the POSIX.1e draft intended. 603 * 604 * Note: 605 * 606 * PR_SET_SECUREBITS = 607 * issecure_mask(SECURE_KEEP_CAPS_LOCKED) 608 * | issecure_mask(SECURE_NOROOT) 609 * | issecure_mask(SECURE_NOROOT_LOCKED) 610 * | issecure_mask(SECURE_NO_SETUID_FIXUP) 611 * | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED) 612 * 613 * will ensure that the current process and all of its 614 * children will be locked into a pure 615 * capability-based-privilege environment. 616 */ 617 case PR_SET_SECUREBITS: 618 if ((((current->securebits & SECURE_ALL_LOCKS) >> 1) 619 & (current->securebits ^ arg2)) /*[1]*/ 620 || ((current->securebits & SECURE_ALL_LOCKS 621 & ~arg2)) /*[2]*/ 622 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ 623 || (cap_capable(current, CAP_SETPCAP) != 0)) { /*[4]*/ 624 /* 625 * [1] no changing of bits that are locked 626 * [2] no unlocking of locks 627 * [3] no setting of unsupported bits 628 * [4] doing anything requires privilege (go read about 629 * the "sendmail capabilities bug") 630 */ 631 error = -EPERM; /* cannot change a locked bit */ 632 } else { 633 current->securebits = arg2; 634 } 635 break; 636 case PR_GET_SECUREBITS: 637 error = current->securebits; 638 break; 639 640 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 641 642 case PR_GET_KEEPCAPS: 643 if (issecure(SECURE_KEEP_CAPS)) 644 error = 1; 645 break; 646 case PR_SET_KEEPCAPS: 647 if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ 648 error = -EINVAL; 649 else if (issecure(SECURE_KEEP_CAPS_LOCKED)) 650 error = -EPERM; 651 else if (arg2) 652 current->securebits |= issecure_mask(SECURE_KEEP_CAPS); 653 else 654 current->securebits &= 655 ~issecure_mask(SECURE_KEEP_CAPS); 656 break; 657 658 default: 659 /* No functionality available - continue with default */ 660 return 0; 661 } 662 663 /* Functionality provided */ 664 *rc_p = error; 665 return 1; 666 } 667 668 void cap_task_reparent_to_init (struct task_struct *p) 669 { 670 cap_set_init_eff(p->cap_effective); 671 cap_clear(p->cap_inheritable); 672 cap_set_full(p->cap_permitted); 673 p->securebits = SECUREBITS_DEFAULT; 674 return; 675 } 676 677 int cap_syslog (int type) 678 { 679 if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) 680 return -EPERM; 681 return 0; 682 } 683 684 int cap_vm_enough_memory(struct mm_struct *mm, long pages) 685 { 686 int cap_sys_admin = 0; 687 688 if (cap_capable(current, CAP_SYS_ADMIN) == 0) 689 cap_sys_admin = 1; 690 return __vm_enough_memory(mm, pages, cap_sys_admin); 691 } 692 693