xref: /linux/security/commoncap.c (revision 185000fc556372b7fb7f26516c325f212030dbd3)
1 /* Common capabilities, needed by capability.o and root_plug.o
2  *
3  *	This program is free software; you can redistribute it and/or modify
4  *	it under the terms of the GNU General Public License as published by
5  *	the Free Software Foundation; either version 2 of the License, or
6  *	(at your option) any later version.
7  *
8  */
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/kernel.h>
14 #include <linux/security.h>
15 #include <linux/file.h>
16 #include <linux/mm.h>
17 #include <linux/mman.h>
18 #include <linux/pagemap.h>
19 #include <linux/swap.h>
20 #include <linux/skbuff.h>
21 #include <linux/netlink.h>
22 #include <linux/ptrace.h>
23 #include <linux/xattr.h>
24 #include <linux/hugetlb.h>
25 #include <linux/mount.h>
26 #include <linux/sched.h>
27 #include <linux/prctl.h>
28 #include <linux/securebits.h>
29 
30 int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
31 {
32 	NETLINK_CB(skb).eff_cap = current->cap_effective;
33 	return 0;
34 }
35 
36 int cap_netlink_recv(struct sk_buff *skb, int cap)
37 {
38 	if (!cap_raised(NETLINK_CB(skb).eff_cap, cap))
39 		return -EPERM;
40 	return 0;
41 }
42 
43 EXPORT_SYMBOL(cap_netlink_recv);
44 
45 /*
46  * NOTE WELL: cap_capable() cannot be used like the kernel's capable()
47  * function.  That is, it has the reverse semantics: cap_capable()
48  * returns 0 when a task has a capability, but the kernel's capable()
49  * returns 1 for this case.
50  */
51 int cap_capable (struct task_struct *tsk, int cap)
52 {
53 	/* Derived from include/linux/sched.h:capable. */
54 	if (cap_raised(tsk->cap_effective, cap))
55 		return 0;
56 	return -EPERM;
57 }
58 
59 int cap_settime(struct timespec *ts, struct timezone *tz)
60 {
61 	if (!capable(CAP_SYS_TIME))
62 		return -EPERM;
63 	return 0;
64 }
65 
66 int cap_ptrace (struct task_struct *parent, struct task_struct *child,
67 		unsigned int mode)
68 {
69 	/* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */
70 	if (!cap_issubset(child->cap_permitted, parent->cap_permitted) &&
71 	    !__capable(parent, CAP_SYS_PTRACE))
72 		return -EPERM;
73 	return 0;
74 }
75 
76 int cap_capget (struct task_struct *target, kernel_cap_t *effective,
77 		kernel_cap_t *inheritable, kernel_cap_t *permitted)
78 {
79 	/* Derived from kernel/capability.c:sys_capget. */
80 	*effective = target->cap_effective;
81 	*inheritable = target->cap_inheritable;
82 	*permitted = target->cap_permitted;
83 	return 0;
84 }
85 
86 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
87 
88 static inline int cap_block_setpcap(struct task_struct *target)
89 {
90 	/*
91 	 * No support for remote process capability manipulation with
92 	 * filesystem capability support.
93 	 */
94 	return (target != current);
95 }
96 
97 static inline int cap_inh_is_capped(void)
98 {
99 	/*
100 	 * Return 1 if changes to the inheritable set are limited
101 	 * to the old permitted set. That is, if the current task
102 	 * does *not* possess the CAP_SETPCAP capability.
103 	 */
104 	return (cap_capable(current, CAP_SETPCAP) != 0);
105 }
106 
107 static inline int cap_limit_ptraced_target(void) { return 1; }
108 
109 #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
110 
111 static inline int cap_block_setpcap(struct task_struct *t) { return 0; }
112 static inline int cap_inh_is_capped(void) { return 1; }
113 static inline int cap_limit_ptraced_target(void)
114 {
115 	return !capable(CAP_SETPCAP);
116 }
117 
118 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
119 
120 int cap_capset_check (struct task_struct *target, kernel_cap_t *effective,
121 		      kernel_cap_t *inheritable, kernel_cap_t *permitted)
122 {
123 	if (cap_block_setpcap(target)) {
124 		return -EPERM;
125 	}
126 	if (cap_inh_is_capped()
127 	    && !cap_issubset(*inheritable,
128 			     cap_combine(target->cap_inheritable,
129 					 current->cap_permitted))) {
130 		/* incapable of using this inheritable set */
131 		return -EPERM;
132 	}
133 	if (!cap_issubset(*inheritable,
134 			   cap_combine(target->cap_inheritable,
135 				       current->cap_bset))) {
136 		/* no new pI capabilities outside bounding set */
137 		return -EPERM;
138 	}
139 
140 	/* verify restrictions on target's new Permitted set */
141 	if (!cap_issubset (*permitted,
142 			   cap_combine (target->cap_permitted,
143 					current->cap_permitted))) {
144 		return -EPERM;
145 	}
146 
147 	/* verify the _new_Effective_ is a subset of the _new_Permitted_ */
148 	if (!cap_issubset (*effective, *permitted)) {
149 		return -EPERM;
150 	}
151 
152 	return 0;
153 }
154 
155 void cap_capset_set (struct task_struct *target, kernel_cap_t *effective,
156 		     kernel_cap_t *inheritable, kernel_cap_t *permitted)
157 {
158 	target->cap_effective = *effective;
159 	target->cap_inheritable = *inheritable;
160 	target->cap_permitted = *permitted;
161 }
162 
163 static inline void bprm_clear_caps(struct linux_binprm *bprm)
164 {
165 	cap_clear(bprm->cap_inheritable);
166 	cap_clear(bprm->cap_permitted);
167 	bprm->cap_effective = false;
168 }
169 
170 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
171 
172 int cap_inode_need_killpriv(struct dentry *dentry)
173 {
174 	struct inode *inode = dentry->d_inode;
175 	int error;
176 
177 	if (!inode->i_op || !inode->i_op->getxattr)
178 	       return 0;
179 
180 	error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0);
181 	if (error <= 0)
182 		return 0;
183 	return 1;
184 }
185 
186 int cap_inode_killpriv(struct dentry *dentry)
187 {
188 	struct inode *inode = dentry->d_inode;
189 
190 	if (!inode->i_op || !inode->i_op->removexattr)
191 	       return 0;
192 
193 	return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS);
194 }
195 
196 static inline int cap_from_disk(struct vfs_cap_data *caps,
197 				struct linux_binprm *bprm, unsigned size)
198 {
199 	__u32 magic_etc;
200 	unsigned tocopy, i;
201 
202 	if (size < sizeof(magic_etc))
203 		return -EINVAL;
204 
205 	magic_etc = le32_to_cpu(caps->magic_etc);
206 
207 	switch ((magic_etc & VFS_CAP_REVISION_MASK)) {
208 	case VFS_CAP_REVISION_1:
209 		if (size != XATTR_CAPS_SZ_1)
210 			return -EINVAL;
211 		tocopy = VFS_CAP_U32_1;
212 		break;
213 	case VFS_CAP_REVISION_2:
214 		if (size != XATTR_CAPS_SZ_2)
215 			return -EINVAL;
216 		tocopy = VFS_CAP_U32_2;
217 		break;
218 	default:
219 		return -EINVAL;
220 	}
221 
222 	if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) {
223 		bprm->cap_effective = true;
224 	} else {
225 		bprm->cap_effective = false;
226 	}
227 
228 	for (i = 0; i < tocopy; ++i) {
229 		bprm->cap_permitted.cap[i] =
230 			le32_to_cpu(caps->data[i].permitted);
231 		bprm->cap_inheritable.cap[i] =
232 			le32_to_cpu(caps->data[i].inheritable);
233 	}
234 	while (i < VFS_CAP_U32) {
235 		bprm->cap_permitted.cap[i] = 0;
236 		bprm->cap_inheritable.cap[i] = 0;
237 		i++;
238 	}
239 
240 	return 0;
241 }
242 
243 /* Locate any VFS capabilities: */
244 static int get_file_caps(struct linux_binprm *bprm)
245 {
246 	struct dentry *dentry;
247 	int rc = 0;
248 	struct vfs_cap_data vcaps;
249 	struct inode *inode;
250 
251 	if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) {
252 		bprm_clear_caps(bprm);
253 		return 0;
254 	}
255 
256 	dentry = dget(bprm->file->f_dentry);
257 	inode = dentry->d_inode;
258 	if (!inode->i_op || !inode->i_op->getxattr)
259 		goto out;
260 
261 	rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps,
262 				   XATTR_CAPS_SZ);
263 	if (rc == -ENODATA || rc == -EOPNOTSUPP) {
264 		/* no data, that's ok */
265 		rc = 0;
266 		goto out;
267 	}
268 	if (rc < 0)
269 		goto out;
270 
271 	rc = cap_from_disk(&vcaps, bprm, rc);
272 	if (rc)
273 		printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
274 			__func__, rc, bprm->filename);
275 
276 out:
277 	dput(dentry);
278 	if (rc)
279 		bprm_clear_caps(bprm);
280 
281 	return rc;
282 }
283 
284 #else
285 int cap_inode_need_killpriv(struct dentry *dentry)
286 {
287 	return 0;
288 }
289 
290 int cap_inode_killpriv(struct dentry *dentry)
291 {
292 	return 0;
293 }
294 
295 static inline int get_file_caps(struct linux_binprm *bprm)
296 {
297 	bprm_clear_caps(bprm);
298 	return 0;
299 }
300 #endif
301 
302 int cap_bprm_set_security (struct linux_binprm *bprm)
303 {
304 	int ret;
305 
306 	ret = get_file_caps(bprm);
307 	if (ret)
308 		printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n",
309 			__func__, ret, bprm->filename);
310 
311 	/*  To support inheritance of root-permissions and suid-root
312 	 *  executables under compatibility mode, we raise all three
313 	 *  capability sets for the file.
314 	 *
315 	 *  If only the real uid is 0, we only raise the inheritable
316 	 *  and permitted sets of the executable file.
317 	 */
318 
319 	if (!issecure (SECURE_NOROOT)) {
320 		if (bprm->e_uid == 0 || current->uid == 0) {
321 			cap_set_full (bprm->cap_inheritable);
322 			cap_set_full (bprm->cap_permitted);
323 		}
324 		if (bprm->e_uid == 0)
325 			bprm->cap_effective = true;
326 	}
327 
328 	return ret;
329 }
330 
331 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
332 {
333 	/* Derived from fs/exec.c:compute_creds. */
334 	kernel_cap_t new_permitted, working;
335 
336 	new_permitted = cap_intersect(bprm->cap_permitted,
337 				 current->cap_bset);
338 	working = cap_intersect(bprm->cap_inheritable,
339 				 current->cap_inheritable);
340 	new_permitted = cap_combine(new_permitted, working);
341 
342 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
343 	    !cap_issubset (new_permitted, current->cap_permitted)) {
344 		set_dumpable(current->mm, suid_dumpable);
345 		current->pdeath_signal = 0;
346 
347 		if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
348 			if (!capable(CAP_SETUID)) {
349 				bprm->e_uid = current->uid;
350 				bprm->e_gid = current->gid;
351 			}
352 			if (cap_limit_ptraced_target()) {
353 				new_permitted =
354 					cap_intersect(new_permitted,
355 						      current->cap_permitted);
356 			}
357 		}
358 	}
359 
360 	current->suid = current->euid = current->fsuid = bprm->e_uid;
361 	current->sgid = current->egid = current->fsgid = bprm->e_gid;
362 
363 	/* For init, we want to retain the capabilities set
364 	 * in the init_task struct. Thus we skip the usual
365 	 * capability rules */
366 	if (!is_global_init(current)) {
367 		current->cap_permitted = new_permitted;
368 		if (bprm->cap_effective)
369 			current->cap_effective = new_permitted;
370 		else
371 			cap_clear(current->cap_effective);
372 	}
373 
374 	/* AUD: Audit candidate if current->cap_effective is set */
375 
376 	current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
377 }
378 
379 int cap_bprm_secureexec (struct linux_binprm *bprm)
380 {
381 	if (current->uid != 0) {
382 		if (bprm->cap_effective)
383 			return 1;
384 		if (!cap_isclear(bprm->cap_permitted))
385 			return 1;
386 		if (!cap_isclear(bprm->cap_inheritable))
387 			return 1;
388 	}
389 
390 	return (current->euid != current->uid ||
391 		current->egid != current->gid);
392 }
393 
394 int cap_inode_setxattr(struct dentry *dentry, const char *name,
395 		       const void *value, size_t size, int flags)
396 {
397 	if (!strcmp(name, XATTR_NAME_CAPS)) {
398 		if (!capable(CAP_SETFCAP))
399 			return -EPERM;
400 		return 0;
401 	} else if (!strncmp(name, XATTR_SECURITY_PREFIX,
402 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
403 	    !capable(CAP_SYS_ADMIN))
404 		return -EPERM;
405 	return 0;
406 }
407 
408 int cap_inode_removexattr(struct dentry *dentry, const char *name)
409 {
410 	if (!strcmp(name, XATTR_NAME_CAPS)) {
411 		if (!capable(CAP_SETFCAP))
412 			return -EPERM;
413 		return 0;
414 	} else if (!strncmp(name, XATTR_SECURITY_PREFIX,
415 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
416 	    !capable(CAP_SYS_ADMIN))
417 		return -EPERM;
418 	return 0;
419 }
420 
421 /* moved from kernel/sys.c. */
422 /*
423  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
424  * a process after a call to setuid, setreuid, or setresuid.
425  *
426  *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
427  *  {r,e,s}uid != 0, the permitted and effective capabilities are
428  *  cleared.
429  *
430  *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
431  *  capabilities of the process are cleared.
432  *
433  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
434  *  capabilities are set to the permitted capabilities.
435  *
436  *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
437  *  never happen.
438  *
439  *  -astor
440  *
441  * cevans - New behaviour, Oct '99
442  * A process may, via prctl(), elect to keep its capabilities when it
443  * calls setuid() and switches away from uid==0. Both permitted and
444  * effective sets will be retained.
445  * Without this change, it was impossible for a daemon to drop only some
446  * of its privilege. The call to setuid(!=0) would drop all privileges!
447  * Keeping uid 0 is not an option because uid 0 owns too many vital
448  * files..
449  * Thanks to Olaf Kirch and Peter Benie for spotting this.
450  */
451 static inline void cap_emulate_setxuid (int old_ruid, int old_euid,
452 					int old_suid)
453 {
454 	if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
455 	    (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
456 	    !issecure(SECURE_KEEP_CAPS)) {
457 		cap_clear (current->cap_permitted);
458 		cap_clear (current->cap_effective);
459 	}
460 	if (old_euid == 0 && current->euid != 0) {
461 		cap_clear (current->cap_effective);
462 	}
463 	if (old_euid != 0 && current->euid == 0) {
464 		current->cap_effective = current->cap_permitted;
465 	}
466 }
467 
468 int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
469 			  int flags)
470 {
471 	switch (flags) {
472 	case LSM_SETID_RE:
473 	case LSM_SETID_ID:
474 	case LSM_SETID_RES:
475 		/* Copied from kernel/sys.c:setreuid/setuid/setresuid. */
476 		if (!issecure (SECURE_NO_SETUID_FIXUP)) {
477 			cap_emulate_setxuid (old_ruid, old_euid, old_suid);
478 		}
479 		break;
480 	case LSM_SETID_FS:
481 		{
482 			uid_t old_fsuid = old_ruid;
483 
484 			/* Copied from kernel/sys.c:setfsuid. */
485 
486 			/*
487 			 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
488 			 *          if not, we might be a bit too harsh here.
489 			 */
490 
491 			if (!issecure (SECURE_NO_SETUID_FIXUP)) {
492 				if (old_fsuid == 0 && current->fsuid != 0) {
493 					current->cap_effective =
494 						cap_drop_fs_set(
495 						    current->cap_effective);
496 				}
497 				if (old_fsuid != 0 && current->fsuid == 0) {
498 					current->cap_effective =
499 						cap_raise_fs_set(
500 						    current->cap_effective,
501 						    current->cap_permitted);
502 				}
503 			}
504 			break;
505 		}
506 	default:
507 		return -EINVAL;
508 	}
509 
510 	return 0;
511 }
512 
513 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
514 /*
515  * Rationale: code calling task_setscheduler, task_setioprio, and
516  * task_setnice, assumes that
517  *   . if capable(cap_sys_nice), then those actions should be allowed
518  *   . if not capable(cap_sys_nice), but acting on your own processes,
519  *   	then those actions should be allowed
520  * This is insufficient now since you can call code without suid, but
521  * yet with increased caps.
522  * So we check for increased caps on the target process.
523  */
524 static inline int cap_safe_nice(struct task_struct *p)
525 {
526 	if (!cap_issubset(p->cap_permitted, current->cap_permitted) &&
527 	    !__capable(current, CAP_SYS_NICE))
528 		return -EPERM;
529 	return 0;
530 }
531 
532 int cap_task_setscheduler (struct task_struct *p, int policy,
533 			   struct sched_param *lp)
534 {
535 	return cap_safe_nice(p);
536 }
537 
538 int cap_task_setioprio (struct task_struct *p, int ioprio)
539 {
540 	return cap_safe_nice(p);
541 }
542 
543 int cap_task_setnice (struct task_struct *p, int nice)
544 {
545 	return cap_safe_nice(p);
546 }
547 
548 /*
549  * called from kernel/sys.c for prctl(PR_CABSET_DROP)
550  * done without task_capability_lock() because it introduces
551  * no new races - i.e. only another task doing capget() on
552  * this task could get inconsistent info.  There can be no
553  * racing writer bc a task can only change its own caps.
554  */
555 static long cap_prctl_drop(unsigned long cap)
556 {
557 	if (!capable(CAP_SETPCAP))
558 		return -EPERM;
559 	if (!cap_valid(cap))
560 		return -EINVAL;
561 	cap_lower(current->cap_bset, cap);
562 	return 0;
563 }
564 
565 #else
566 int cap_task_setscheduler (struct task_struct *p, int policy,
567 			   struct sched_param *lp)
568 {
569 	return 0;
570 }
571 int cap_task_setioprio (struct task_struct *p, int ioprio)
572 {
573 	return 0;
574 }
575 int cap_task_setnice (struct task_struct *p, int nice)
576 {
577 	return 0;
578 }
579 #endif
580 
581 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
582 		   unsigned long arg4, unsigned long arg5, long *rc_p)
583 {
584 	long error = 0;
585 
586 	switch (option) {
587 	case PR_CAPBSET_READ:
588 		if (!cap_valid(arg2))
589 			error = -EINVAL;
590 		else
591 			error = !!cap_raised(current->cap_bset, arg2);
592 		break;
593 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
594 	case PR_CAPBSET_DROP:
595 		error = cap_prctl_drop(arg2);
596 		break;
597 
598 	/*
599 	 * The next four prctl's remain to assist with transitioning a
600 	 * system from legacy UID=0 based privilege (when filesystem
601 	 * capabilities are not in use) to a system using filesystem
602 	 * capabilities only - as the POSIX.1e draft intended.
603 	 *
604 	 * Note:
605 	 *
606 	 *  PR_SET_SECUREBITS =
607 	 *      issecure_mask(SECURE_KEEP_CAPS_LOCKED)
608 	 *    | issecure_mask(SECURE_NOROOT)
609 	 *    | issecure_mask(SECURE_NOROOT_LOCKED)
610 	 *    | issecure_mask(SECURE_NO_SETUID_FIXUP)
611 	 *    | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)
612 	 *
613 	 * will ensure that the current process and all of its
614 	 * children will be locked into a pure
615 	 * capability-based-privilege environment.
616 	 */
617 	case PR_SET_SECUREBITS:
618 		if ((((current->securebits & SECURE_ALL_LOCKS) >> 1)
619 		     & (current->securebits ^ arg2))                  /*[1]*/
620 		    || ((current->securebits & SECURE_ALL_LOCKS
621 			 & ~arg2))                                    /*[2]*/
622 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/
623 		    || (cap_capable(current, CAP_SETPCAP) != 0)) {    /*[4]*/
624 			/*
625 			 * [1] no changing of bits that are locked
626 			 * [2] no unlocking of locks
627 			 * [3] no setting of unsupported bits
628 			 * [4] doing anything requires privilege (go read about
629 			 *     the "sendmail capabilities bug")
630 			 */
631 			error = -EPERM;  /* cannot change a locked bit */
632 		} else {
633 			current->securebits = arg2;
634 		}
635 		break;
636 	case PR_GET_SECUREBITS:
637 		error = current->securebits;
638 		break;
639 
640 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
641 
642 	case PR_GET_KEEPCAPS:
643 		if (issecure(SECURE_KEEP_CAPS))
644 			error = 1;
645 		break;
646 	case PR_SET_KEEPCAPS:
647 		if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */
648 			error = -EINVAL;
649 		else if (issecure(SECURE_KEEP_CAPS_LOCKED))
650 			error = -EPERM;
651 		else if (arg2)
652 			current->securebits |= issecure_mask(SECURE_KEEP_CAPS);
653 		else
654 			current->securebits &=
655 				~issecure_mask(SECURE_KEEP_CAPS);
656 		break;
657 
658 	default:
659 		/* No functionality available - continue with default */
660 		return 0;
661 	}
662 
663 	/* Functionality provided */
664 	*rc_p = error;
665 	return 1;
666 }
667 
668 void cap_task_reparent_to_init (struct task_struct *p)
669 {
670 	cap_set_init_eff(p->cap_effective);
671 	cap_clear(p->cap_inheritable);
672 	cap_set_full(p->cap_permitted);
673 	p->securebits = SECUREBITS_DEFAULT;
674 	return;
675 }
676 
677 int cap_syslog (int type)
678 {
679 	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
680 		return -EPERM;
681 	return 0;
682 }
683 
684 int cap_vm_enough_memory(struct mm_struct *mm, long pages)
685 {
686 	int cap_sys_admin = 0;
687 
688 	if (cap_capable(current, CAP_SYS_ADMIN) == 0)
689 		cap_sys_admin = 1;
690 	return __vm_enough_memory(mm, pages, cap_sys_admin);
691 }
692 
693