1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Landlock - System call implementations and user space interfaces 4 * 5 * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> 6 * Copyright © 2018-2020 ANSSI 7 * Copyright © 2021-2025 Microsoft Corporation 8 */ 9 10 #include <asm/current.h> 11 #include <linux/anon_inodes.h> 12 #include <linux/bitops.h> 13 #include <linux/build_bug.h> 14 #include <linux/capability.h> 15 #include <linux/cleanup.h> 16 #include <linux/compiler_types.h> 17 #include <linux/dcache.h> 18 #include <linux/err.h> 19 #include <linux/errno.h> 20 #include <linux/fs.h> 21 #include <linux/limits.h> 22 #include <linux/mount.h> 23 #include <linux/path.h> 24 #include <linux/sched.h> 25 #include <linux/security.h> 26 #include <linux/stddef.h> 27 #include <linux/syscalls.h> 28 #include <linux/types.h> 29 #include <linux/uaccess.h> 30 #include <uapi/linux/landlock.h> 31 32 #include "cred.h" 33 #include "domain.h" 34 #include "fs.h" 35 #include "limits.h" 36 #include "net.h" 37 #include "ruleset.h" 38 #include "setup.h" 39 #include "tsync.h" 40 41 static bool is_initialized(void) 42 { 43 if (likely(landlock_initialized)) 44 return true; 45 46 pr_warn_once( 47 "Disabled but requested by user space. " 48 "You should enable Landlock at boot time: " 49 "https://docs.kernel.org/userspace-api/landlock.html#boot-time-configuration\n"); 50 return false; 51 } 52 53 /** 54 * copy_min_struct_from_user - Safe future-proof argument copying 55 * 56 * Extend copy_struct_from_user() to check for consistent user buffer. 57 * 58 * @dst: Kernel space pointer or NULL. 59 * @ksize: Actual size of the data pointed to by @dst. 60 * @ksize_min: Minimal required size to be copied. 61 * @src: User space pointer or NULL. 62 * @usize: (Alleged) size of the data pointed to by @src. 63 */ 64 static __always_inline int 65 copy_min_struct_from_user(void *const dst, const size_t ksize, 66 const size_t ksize_min, const void __user *const src, 67 const size_t usize) 68 { 69 /* Checks buffer inconsistencies. */ 70 BUILD_BUG_ON(!dst); 71 if (!src) 72 return -EFAULT; 73 74 /* Checks size ranges. */ 75 BUILD_BUG_ON(ksize <= 0); 76 BUILD_BUG_ON(ksize < ksize_min); 77 if (usize < ksize_min) 78 return -EINVAL; 79 if (usize > PAGE_SIZE) 80 return -E2BIG; 81 82 /* Copies user buffer and fills with zeros. */ 83 return copy_struct_from_user(dst, ksize, src, usize); 84 } 85 86 /* 87 * This function only contains arithmetic operations with constants, leading to 88 * BUILD_BUG_ON(). The related code is evaluated and checked at build time, 89 * but it is then ignored thanks to compiler optimizations. 90 */ 91 static void build_check_abi(void) 92 { 93 struct landlock_ruleset_attr ruleset_attr; 94 struct landlock_path_beneath_attr path_beneath_attr; 95 struct landlock_net_port_attr net_port_attr; 96 size_t ruleset_size, path_beneath_size, net_port_size; 97 98 /* 99 * For each user space ABI structures, first checks that there is no 100 * hole in them, then checks that all architectures have the same 101 * struct size. 102 */ 103 ruleset_size = sizeof(ruleset_attr.handled_access_fs); 104 ruleset_size += sizeof(ruleset_attr.handled_access_net); 105 ruleset_size += sizeof(ruleset_attr.scoped); 106 BUILD_BUG_ON(sizeof(ruleset_attr) != ruleset_size); 107 BUILD_BUG_ON(sizeof(ruleset_attr) != 24); 108 109 path_beneath_size = sizeof(path_beneath_attr.allowed_access); 110 path_beneath_size += sizeof(path_beneath_attr.parent_fd); 111 BUILD_BUG_ON(sizeof(path_beneath_attr) != path_beneath_size); 112 BUILD_BUG_ON(sizeof(path_beneath_attr) != 12); 113 114 net_port_size = sizeof(net_port_attr.allowed_access); 115 net_port_size += sizeof(net_port_attr.port); 116 BUILD_BUG_ON(sizeof(net_port_attr) != net_port_size); 117 BUILD_BUG_ON(sizeof(net_port_attr) != 16); 118 } 119 120 /* Ruleset handling */ 121 122 static int fop_ruleset_release(struct inode *const inode, 123 struct file *const filp) 124 { 125 struct landlock_ruleset *ruleset = filp->private_data; 126 127 landlock_put_ruleset(ruleset); 128 return 0; 129 } 130 131 static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf, 132 const size_t size, loff_t *const ppos) 133 { 134 /* Dummy handler to enable FMODE_CAN_READ. */ 135 return -EINVAL; 136 } 137 138 static ssize_t fop_dummy_write(struct file *const filp, 139 const char __user *const buf, const size_t size, 140 loff_t *const ppos) 141 { 142 /* Dummy handler to enable FMODE_CAN_WRITE. */ 143 return -EINVAL; 144 } 145 146 /* 147 * A ruleset file descriptor enables to build a ruleset by adding (i.e. 148 * writing) rule after rule, without relying on the task's context. This 149 * reentrant design is also used in a read way to enforce the ruleset on the 150 * current task. 151 */ 152 static const struct file_operations ruleset_fops = { 153 .release = fop_ruleset_release, 154 .read = fop_dummy_read, 155 .write = fop_dummy_write, 156 }; 157 158 /* 159 * The Landlock ABI version should be incremented for each new Landlock-related 160 * user space visible change (e.g. Landlock syscalls). This version should 161 * only be incremented once per Linux release. When incrementing, the date in 162 * Documentation/userspace-api/landlock.rst should be updated to reflect the 163 * UAPI change. 164 * If the change involves a fix that requires userspace awareness, also update 165 * the errata documentation in Documentation/userspace-api/landlock.rst . 166 */ 167 const int landlock_abi_version = 8; 168 169 /** 170 * sys_landlock_create_ruleset - Create a new ruleset 171 * 172 * @attr: Pointer to a &struct landlock_ruleset_attr identifying the scope of 173 * the new ruleset. 174 * @size: Size of the pointed &struct landlock_ruleset_attr (needed for 175 * backward and forward compatibility). 176 * @flags: Supported values: 177 * 178 * - %LANDLOCK_CREATE_RULESET_VERSION 179 * - %LANDLOCK_CREATE_RULESET_ERRATA 180 * 181 * This system call enables to create a new Landlock ruleset, and returns the 182 * related file descriptor on success. 183 * 184 * If %LANDLOCK_CREATE_RULESET_VERSION or %LANDLOCK_CREATE_RULESET_ERRATA is 185 * set, then @attr must be NULL and @size must be 0. 186 * 187 * Possible returned errors are: 188 * 189 * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; 190 * - %EINVAL: unknown @flags, or unknown access, or unknown scope, or too small @size; 191 * - %E2BIG: @attr or @size inconsistencies; 192 * - %EFAULT: @attr or @size inconsistencies; 193 * - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs. 194 * 195 * .. kernel-doc:: include/uapi/linux/landlock.h 196 * :identifiers: landlock_create_ruleset_flags 197 */ 198 SYSCALL_DEFINE3(landlock_create_ruleset, 199 const struct landlock_ruleset_attr __user *const, attr, 200 const size_t, size, const __u32, flags) 201 { 202 struct landlock_ruleset_attr ruleset_attr; 203 struct landlock_ruleset *ruleset; 204 int err, ruleset_fd; 205 206 /* Build-time checks. */ 207 build_check_abi(); 208 209 if (!is_initialized()) 210 return -EOPNOTSUPP; 211 212 if (flags) { 213 if (attr || size) 214 return -EINVAL; 215 216 if (flags == LANDLOCK_CREATE_RULESET_VERSION) 217 return landlock_abi_version; 218 219 if (flags == LANDLOCK_CREATE_RULESET_ERRATA) 220 return landlock_errata; 221 222 return -EINVAL; 223 } 224 225 /* Copies raw user space buffer. */ 226 err = copy_min_struct_from_user(&ruleset_attr, sizeof(ruleset_attr), 227 offsetofend(typeof(ruleset_attr), 228 handled_access_fs), 229 attr, size); 230 if (err) 231 return err; 232 233 /* Checks content (and 32-bits cast). */ 234 if ((ruleset_attr.handled_access_fs | LANDLOCK_MASK_ACCESS_FS) != 235 LANDLOCK_MASK_ACCESS_FS) 236 return -EINVAL; 237 238 /* Checks network content (and 32-bits cast). */ 239 if ((ruleset_attr.handled_access_net | LANDLOCK_MASK_ACCESS_NET) != 240 LANDLOCK_MASK_ACCESS_NET) 241 return -EINVAL; 242 243 /* Checks IPC scoping content (and 32-bits cast). */ 244 if ((ruleset_attr.scoped | LANDLOCK_MASK_SCOPE) != LANDLOCK_MASK_SCOPE) 245 return -EINVAL; 246 247 /* Checks arguments and transforms to kernel struct. */ 248 ruleset = landlock_create_ruleset(ruleset_attr.handled_access_fs, 249 ruleset_attr.handled_access_net, 250 ruleset_attr.scoped); 251 if (IS_ERR(ruleset)) 252 return PTR_ERR(ruleset); 253 254 /* Creates anonymous FD referring to the ruleset. */ 255 ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops, 256 ruleset, O_RDWR | O_CLOEXEC); 257 if (ruleset_fd < 0) 258 landlock_put_ruleset(ruleset); 259 return ruleset_fd; 260 } 261 262 /* 263 * Returns an owned ruleset from a FD. It is thus needed to call 264 * landlock_put_ruleset() on the return value. 265 */ 266 static struct landlock_ruleset *get_ruleset_from_fd(const int fd, 267 const fmode_t mode) 268 { 269 CLASS(fd, ruleset_f)(fd); 270 struct landlock_ruleset *ruleset; 271 272 if (fd_empty(ruleset_f)) 273 return ERR_PTR(-EBADF); 274 275 /* Checks FD type and access right. */ 276 if (fd_file(ruleset_f)->f_op != &ruleset_fops) 277 return ERR_PTR(-EBADFD); 278 if (!(fd_file(ruleset_f)->f_mode & mode)) 279 return ERR_PTR(-EPERM); 280 ruleset = fd_file(ruleset_f)->private_data; 281 if (WARN_ON_ONCE(ruleset->num_layers != 1)) 282 return ERR_PTR(-EINVAL); 283 landlock_get_ruleset(ruleset); 284 return ruleset; 285 } 286 287 /* Path handling */ 288 289 /* 290 * @path: Must call put_path(@path) after the call if it succeeded. 291 */ 292 static int get_path_from_fd(const s32 fd, struct path *const path) 293 { 294 CLASS(fd_raw, f)(fd); 295 296 BUILD_BUG_ON(!__same_type( 297 fd, ((struct landlock_path_beneath_attr *)NULL)->parent_fd)); 298 299 if (fd_empty(f)) 300 return -EBADF; 301 /* 302 * Forbids ruleset FDs, internal filesystems (e.g. nsfs), including 303 * pseudo filesystems that will never be mountable (e.g. sockfs, 304 * pipefs). 305 */ 306 if ((fd_file(f)->f_op == &ruleset_fops) || 307 (fd_file(f)->f_path.mnt->mnt_flags & MNT_INTERNAL) || 308 (fd_file(f)->f_path.dentry->d_sb->s_flags & SB_NOUSER) || 309 IS_PRIVATE(d_backing_inode(fd_file(f)->f_path.dentry))) 310 return -EBADFD; 311 312 *path = fd_file(f)->f_path; 313 path_get(path); 314 return 0; 315 } 316 317 static int add_rule_path_beneath(struct landlock_ruleset *const ruleset, 318 const void __user *const rule_attr) 319 { 320 struct landlock_path_beneath_attr path_beneath_attr; 321 struct path path; 322 int res, err; 323 access_mask_t mask; 324 325 /* Copies raw user space buffer. */ 326 res = copy_from_user(&path_beneath_attr, rule_attr, 327 sizeof(path_beneath_attr)); 328 if (res) 329 return -EFAULT; 330 331 /* 332 * Informs about useless rule: empty allowed_access (i.e. deny rules) 333 * are ignored in path walks. 334 */ 335 if (!path_beneath_attr.allowed_access) 336 return -ENOMSG; 337 338 /* Checks that allowed_access matches the @ruleset constraints. */ 339 mask = ruleset->access_masks[0].fs; 340 if ((path_beneath_attr.allowed_access | mask) != mask) 341 return -EINVAL; 342 343 /* Gets and checks the new rule. */ 344 err = get_path_from_fd(path_beneath_attr.parent_fd, &path); 345 if (err) 346 return err; 347 348 /* Imports the new rule. */ 349 err = landlock_append_fs_rule(ruleset, &path, 350 path_beneath_attr.allowed_access); 351 path_put(&path); 352 return err; 353 } 354 355 static int add_rule_net_port(struct landlock_ruleset *ruleset, 356 const void __user *const rule_attr) 357 { 358 struct landlock_net_port_attr net_port_attr; 359 int res; 360 access_mask_t mask; 361 362 /* Copies raw user space buffer. */ 363 res = copy_from_user(&net_port_attr, rule_attr, sizeof(net_port_attr)); 364 if (res) 365 return -EFAULT; 366 367 /* 368 * Informs about useless rule: empty allowed_access (i.e. deny rules) 369 * are ignored by network actions. 370 */ 371 if (!net_port_attr.allowed_access) 372 return -ENOMSG; 373 374 /* Checks that allowed_access matches the @ruleset constraints. */ 375 mask = landlock_get_net_access_mask(ruleset, 0); 376 if ((net_port_attr.allowed_access | mask) != mask) 377 return -EINVAL; 378 379 /* Denies inserting a rule with port greater than 65535. */ 380 if (net_port_attr.port > U16_MAX) 381 return -EINVAL; 382 383 /* Imports the new rule. */ 384 return landlock_append_net_rule(ruleset, net_port_attr.port, 385 net_port_attr.allowed_access); 386 } 387 388 /** 389 * sys_landlock_add_rule - Add a new rule to a ruleset 390 * 391 * @ruleset_fd: File descriptor tied to the ruleset that should be extended 392 * with the new rule. 393 * @rule_type: Identify the structure type pointed to by @rule_attr: 394 * %LANDLOCK_RULE_PATH_BENEATH or %LANDLOCK_RULE_NET_PORT. 395 * @rule_attr: Pointer to a rule (matching the @rule_type). 396 * @flags: Must be 0. 397 * 398 * This system call enables to define a new rule and add it to an existing 399 * ruleset. 400 * 401 * Possible returned errors are: 402 * 403 * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; 404 * - %EAFNOSUPPORT: @rule_type is %LANDLOCK_RULE_NET_PORT but TCP/IP is not 405 * supported by the running kernel; 406 * - %EINVAL: @flags is not 0; 407 * - %EINVAL: The rule accesses are inconsistent (i.e. 408 * &landlock_path_beneath_attr.allowed_access or 409 * &landlock_net_port_attr.allowed_access is not a subset of the ruleset 410 * handled accesses) 411 * - %EINVAL: &landlock_net_port_attr.port is greater than 65535; 412 * - %ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access is 413 * 0); 414 * - %EBADF: @ruleset_fd is not a file descriptor for the current thread, or a 415 * member of @rule_attr is not a file descriptor as expected; 416 * - %EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of 417 * @rule_attr is not the expected file descriptor type; 418 * - %EPERM: @ruleset_fd has no write access to the underlying ruleset; 419 * - %EFAULT: @rule_attr was not a valid address. 420 */ 421 SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, 422 const enum landlock_rule_type, rule_type, 423 const void __user *const, rule_attr, const __u32, flags) 424 { 425 struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL; 426 427 if (!is_initialized()) 428 return -EOPNOTSUPP; 429 430 /* No flag for now. */ 431 if (flags) 432 return -EINVAL; 433 434 /* Gets and checks the ruleset. */ 435 ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE); 436 if (IS_ERR(ruleset)) 437 return PTR_ERR(ruleset); 438 439 switch (rule_type) { 440 case LANDLOCK_RULE_PATH_BENEATH: 441 return add_rule_path_beneath(ruleset, rule_attr); 442 case LANDLOCK_RULE_NET_PORT: 443 return add_rule_net_port(ruleset, rule_attr); 444 default: 445 return -EINVAL; 446 } 447 } 448 449 /* Enforcement */ 450 451 /** 452 * sys_landlock_restrict_self - Enforce a ruleset on the calling thread 453 * 454 * @ruleset_fd: File descriptor tied to the ruleset to merge with the target. 455 * @flags: Supported values: 456 * 457 * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF 458 * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON 459 * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF 460 * - %LANDLOCK_RESTRICT_SELF_TSYNC 461 * 462 * This system call enforces a Landlock ruleset on the current thread. 463 * Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its 464 * namespace or is running with no_new_privs. This avoids scenarios where 465 * unprivileged tasks can affect the behavior of privileged children. 466 * 467 * Possible returned errors are: 468 * 469 * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; 470 * - %EINVAL: @flags contains an unknown bit. 471 * - %EBADF: @ruleset_fd is not a file descriptor for the current thread; 472 * - %EBADFD: @ruleset_fd is not a ruleset file descriptor; 473 * - %EPERM: @ruleset_fd has no read access to the underlying ruleset, or the 474 * current thread is not running with no_new_privs, or it doesn't have 475 * %CAP_SYS_ADMIN in its namespace. 476 * - %E2BIG: The maximum number of stacked rulesets is reached for the current 477 * thread. 478 * 479 * .. kernel-doc:: include/uapi/linux/landlock.h 480 * :identifiers: landlock_restrict_self_flags 481 */ 482 SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, 483 flags) 484 { 485 struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL; 486 struct cred *new_cred; 487 struct landlock_cred_security *new_llcred; 488 bool __maybe_unused log_same_exec, log_new_exec, log_subdomains, 489 prev_log_subdomains; 490 491 if (!is_initialized()) 492 return -EOPNOTSUPP; 493 494 /* 495 * Similar checks as for seccomp(2), except that an -EPERM may be 496 * returned. 497 */ 498 if (!task_no_new_privs(current) && 499 !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) 500 return -EPERM; 501 502 if ((flags | LANDLOCK_MASK_RESTRICT_SELF) != 503 LANDLOCK_MASK_RESTRICT_SELF) 504 return -EINVAL; 505 506 /* Translates "off" flag to boolean. */ 507 log_same_exec = !(flags & LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF); 508 /* Translates "on" flag to boolean. */ 509 log_new_exec = !!(flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON); 510 /* Translates "off" flag to boolean. */ 511 log_subdomains = !(flags & LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF); 512 513 /* 514 * It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with 515 * -1 as ruleset_fd, but no other flag must be set. 516 */ 517 if (!(ruleset_fd == -1 && 518 flags == LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) { 519 /* Gets and checks the ruleset. */ 520 ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ); 521 if (IS_ERR(ruleset)) 522 return PTR_ERR(ruleset); 523 } 524 525 /* Prepares new credentials. */ 526 new_cred = prepare_creds(); 527 if (!new_cred) 528 return -ENOMEM; 529 530 new_llcred = landlock_cred(new_cred); 531 532 #ifdef CONFIG_AUDIT 533 prev_log_subdomains = !new_llcred->log_subdomains_off; 534 new_llcred->log_subdomains_off = !prev_log_subdomains || 535 !log_subdomains; 536 #endif /* CONFIG_AUDIT */ 537 538 /* 539 * The only case when a ruleset may not be set is if 540 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set and ruleset_fd is -1. 541 * We could optimize this case by not calling commit_creds() if this flag 542 * was already set, but it is not worth the complexity. 543 */ 544 if (ruleset) { 545 /* 546 * There is no possible race condition while copying and 547 * manipulating the current credentials because they are 548 * dedicated per thread. 549 */ 550 struct landlock_ruleset *const new_dom = 551 landlock_merge_ruleset(new_llcred->domain, ruleset); 552 if (IS_ERR(new_dom)) { 553 abort_creds(new_cred); 554 return PTR_ERR(new_dom); 555 } 556 557 #ifdef CONFIG_AUDIT 558 new_dom->hierarchy->log_same_exec = log_same_exec; 559 new_dom->hierarchy->log_new_exec = log_new_exec; 560 if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains) 561 new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED; 562 #endif /* CONFIG_AUDIT */ 563 564 /* Replaces the old (prepared) domain. */ 565 landlock_put_ruleset(new_llcred->domain); 566 new_llcred->domain = new_dom; 567 568 #ifdef CONFIG_AUDIT 569 new_llcred->domain_exec |= BIT(new_dom->num_layers - 1); 570 #endif /* CONFIG_AUDIT */ 571 } 572 573 if (flags & LANDLOCK_RESTRICT_SELF_TSYNC) { 574 const int err = landlock_restrict_sibling_threads( 575 current_cred(), new_cred); 576 if (err) { 577 abort_creds(new_cred); 578 return err; 579 } 580 } 581 582 return commit_creds(new_cred); 583 } 584