1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Landlock - Filesystem management and hooks 4 * 5 * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> 6 * Copyright © 2018-2020 ANSSI 7 * Copyright © 2021-2025 Microsoft Corporation 8 * Copyright © 2022 Günther Noack <gnoack3000@gmail.com> 9 * Copyright © 2023-2024 Google LLC 10 */ 11 12 #include <asm/ioctls.h> 13 #include <kunit/test.h> 14 #include <linux/atomic.h> 15 #include <linux/bitops.h> 16 #include <linux/bits.h> 17 #include <linux/compiler_types.h> 18 #include <linux/dcache.h> 19 #include <linux/err.h> 20 #include <linux/falloc.h> 21 #include <linux/fs.h> 22 #include <linux/init.h> 23 #include <linux/kernel.h> 24 #include <linux/limits.h> 25 #include <linux/list.h> 26 #include <linux/lsm_audit.h> 27 #include <linux/lsm_hooks.h> 28 #include <linux/mount.h> 29 #include <linux/namei.h> 30 #include <linux/path.h> 31 #include <linux/pid.h> 32 #include <linux/rcupdate.h> 33 #include <linux/sched/signal.h> 34 #include <linux/spinlock.h> 35 #include <linux/stat.h> 36 #include <linux/types.h> 37 #include <linux/wait_bit.h> 38 #include <linux/workqueue.h> 39 #include <uapi/linux/fiemap.h> 40 #include <uapi/linux/landlock.h> 41 42 #include "access.h" 43 #include "audit.h" 44 #include "common.h" 45 #include "cred.h" 46 #include "domain.h" 47 #include "fs.h" 48 #include "limits.h" 49 #include "object.h" 50 #include "ruleset.h" 51 #include "setup.h" 52 53 /* Underlying object management */ 54 55 static void release_inode(struct landlock_object *const object) 56 __releases(object->lock) 57 { 58 struct inode *const inode = object->underobj; 59 struct super_block *sb; 60 61 if (!inode) { 62 spin_unlock(&object->lock); 63 return; 64 } 65 66 /* 67 * Protects against concurrent use by hook_sb_delete() of the reference 68 * to the underlying inode. 69 */ 70 object->underobj = NULL; 71 /* 72 * Makes sure that if the filesystem is concurrently unmounted, 73 * hook_sb_delete() will wait for us to finish iput(). 74 */ 75 sb = inode->i_sb; 76 atomic_long_inc(&landlock_superblock(sb)->inode_refs); 77 spin_unlock(&object->lock); 78 /* 79 * Because object->underobj was not NULL, hook_sb_delete() and 80 * get_inode_object() guarantee that it is safe to reset 81 * landlock_inode(inode)->object while it is not NULL. It is therefore 82 * not necessary to lock inode->i_lock. 83 */ 84 rcu_assign_pointer(landlock_inode(inode)->object, NULL); 85 /* 86 * Now, new rules can safely be tied to @inode with get_inode_object(). 87 */ 88 89 iput(inode); 90 if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs)) 91 wake_up_var(&landlock_superblock(sb)->inode_refs); 92 } 93 94 static const struct landlock_object_underops landlock_fs_underops = { 95 .release = release_inode 96 }; 97 98 /* IOCTL helpers */ 99 100 /** 101 * is_masked_device_ioctl - Determine whether an IOCTL command is always 102 * permitted with Landlock for device files. These commands can not be 103 * restricted on device files by enforcing a Landlock policy. 104 * 105 * @cmd: The IOCTL command that is supposed to be run. 106 * 107 * By default, any IOCTL on a device file requires the 108 * LANDLOCK_ACCESS_FS_IOCTL_DEV right. However, we blanket-permit some 109 * commands, if: 110 * 111 * 1. The command is implemented in fs/ioctl.c's do_vfs_ioctl(), 112 * not in f_ops->unlocked_ioctl() or f_ops->compat_ioctl(). 113 * 114 * 2. The command is harmless when invoked on devices. 115 * 116 * We also permit commands that do not make sense for devices, but where the 117 * do_vfs_ioctl() implementation returns a more conventional error code. 118 * 119 * Any new IOCTL commands that are implemented in fs/ioctl.c's do_vfs_ioctl() 120 * should be considered for inclusion here. 121 * 122 * Returns: true if the IOCTL @cmd can not be restricted with Landlock for 123 * device files. 124 */ 125 static __attribute_const__ bool is_masked_device_ioctl(const unsigned int cmd) 126 { 127 switch (cmd) { 128 /* 129 * FIOCLEX, FIONCLEX, FIONBIO and FIOASYNC manipulate the FD's 130 * close-on-exec and the file's buffered-IO and async flags. These 131 * operations are also available through fcntl(2), and are 132 * unconditionally permitted in Landlock. 133 */ 134 case FIOCLEX: 135 case FIONCLEX: 136 case FIONBIO: 137 case FIOASYNC: 138 /* 139 * FIOQSIZE queries the size of a regular file, directory, or link. 140 * 141 * We still permit it, because it always returns -ENOTTY for 142 * other file types. 143 */ 144 case FIOQSIZE: 145 /* 146 * FIFREEZE and FITHAW freeze and thaw the file system which the 147 * given file belongs to. Requires CAP_SYS_ADMIN. 148 * 149 * These commands operate on the file system's superblock rather 150 * than on the file itself. The same operations can also be 151 * done through any other file or directory on the same file 152 * system, so it is safe to permit these. 153 */ 154 case FIFREEZE: 155 case FITHAW: 156 /* 157 * FS_IOC_FIEMAP queries information about the allocation of 158 * blocks within a file. 159 * 160 * This IOCTL command only makes sense for regular files and is 161 * not implemented by devices. It is harmless to permit. 162 */ 163 case FS_IOC_FIEMAP: 164 /* 165 * FIGETBSZ queries the file system's block size for a file or 166 * directory. 167 * 168 * This command operates on the file system's superblock rather 169 * than on the file itself. The same operation can also be done 170 * through any other file or directory on the same file system, 171 * so it is safe to permit it. 172 */ 173 case FIGETBSZ: 174 /* 175 * FICLONE, FICLONERANGE and FIDEDUPERANGE make files share 176 * their underlying storage ("reflink") between source and 177 * destination FDs, on file systems which support that. 178 * 179 * These IOCTL commands only apply to regular files 180 * and are harmless to permit for device files. 181 */ 182 case FICLONE: 183 case FICLONERANGE: 184 case FIDEDUPERANGE: 185 /* 186 * FS_IOC_GETFSUUID and FS_IOC_GETFSSYSFSPATH both operate on 187 * the file system superblock, not on the specific file, so 188 * these operations are available through any other file on the 189 * same file system as well. 190 */ 191 case FS_IOC_GETFSUUID: 192 case FS_IOC_GETFSSYSFSPATH: 193 return true; 194 195 /* 196 * FIONREAD, FS_IOC_GETFLAGS, FS_IOC_SETFLAGS, FS_IOC_FSGETXATTR and 197 * FS_IOC_FSSETXATTR are forwarded to device implementations. 198 */ 199 200 /* 201 * file_ioctl() commands (FIBMAP, FS_IOC_RESVSP, FS_IOC_RESVSP64, 202 * FS_IOC_UNRESVSP, FS_IOC_UNRESVSP64 and FS_IOC_ZERO_RANGE) are 203 * forwarded to device implementations, so not permitted. 204 */ 205 206 /* Other commands are guarded by the access right. */ 207 default: 208 return false; 209 } 210 } 211 212 /* 213 * is_masked_device_ioctl_compat - same as the helper above, but checking the 214 * "compat" IOCTL commands. 215 * 216 * The IOCTL commands with special handling in compat-mode should behave the 217 * same as their non-compat counterparts. 218 */ 219 static __attribute_const__ bool 220 is_masked_device_ioctl_compat(const unsigned int cmd) 221 { 222 switch (cmd) { 223 /* FICLONE is permitted, same as in the non-compat variant. */ 224 case FICLONE: 225 return true; 226 227 #if defined(CONFIG_X86_64) 228 /* 229 * FS_IOC_RESVSP_32, FS_IOC_RESVSP64_32, FS_IOC_UNRESVSP_32, 230 * FS_IOC_UNRESVSP64_32, FS_IOC_ZERO_RANGE_32: not blanket-permitted, 231 * for consistency with their non-compat variants. 232 */ 233 case FS_IOC_RESVSP_32: 234 case FS_IOC_RESVSP64_32: 235 case FS_IOC_UNRESVSP_32: 236 case FS_IOC_UNRESVSP64_32: 237 case FS_IOC_ZERO_RANGE_32: 238 #endif 239 240 /* 241 * FS_IOC32_GETFLAGS, FS_IOC32_SETFLAGS are forwarded to their device 242 * implementations. 243 */ 244 case FS_IOC32_GETFLAGS: 245 case FS_IOC32_SETFLAGS: 246 return false; 247 default: 248 return is_masked_device_ioctl(cmd); 249 } 250 } 251 252 /* Ruleset management */ 253 254 static struct landlock_object *get_inode_object(struct inode *const inode) 255 { 256 struct landlock_object *object, *new_object; 257 struct landlock_inode_security *inode_sec = landlock_inode(inode); 258 259 rcu_read_lock(); 260 retry: 261 object = rcu_dereference(inode_sec->object); 262 if (object) { 263 if (likely(refcount_inc_not_zero(&object->usage))) { 264 rcu_read_unlock(); 265 return object; 266 } 267 /* 268 * We are racing with release_inode(), the object is going 269 * away. Wait for release_inode(), then retry. 270 */ 271 spin_lock(&object->lock); 272 spin_unlock(&object->lock); 273 goto retry; 274 } 275 rcu_read_unlock(); 276 277 /* 278 * If there is no object tied to @inode, then create a new one (without 279 * holding any locks). 280 */ 281 new_object = landlock_create_object(&landlock_fs_underops, inode); 282 if (IS_ERR(new_object)) 283 return new_object; 284 285 /* 286 * Protects against concurrent calls to get_inode_object() or 287 * hook_sb_delete(). 288 */ 289 spin_lock(&inode->i_lock); 290 if (unlikely(rcu_access_pointer(inode_sec->object))) { 291 /* Someone else just created the object, bail out and retry. */ 292 spin_unlock(&inode->i_lock); 293 kfree(new_object); 294 295 rcu_read_lock(); 296 goto retry; 297 } 298 299 /* 300 * @inode will be released by hook_sb_delete() on its superblock 301 * shutdown, or by release_inode() when no more ruleset references the 302 * related object. 303 */ 304 ihold(inode); 305 rcu_assign_pointer(inode_sec->object, new_object); 306 spin_unlock(&inode->i_lock); 307 return new_object; 308 } 309 310 /* All access rights that can be tied to files. */ 311 /* clang-format off */ 312 #define ACCESS_FILE ( \ 313 LANDLOCK_ACCESS_FS_EXECUTE | \ 314 LANDLOCK_ACCESS_FS_WRITE_FILE | \ 315 LANDLOCK_ACCESS_FS_READ_FILE | \ 316 LANDLOCK_ACCESS_FS_TRUNCATE | \ 317 LANDLOCK_ACCESS_FS_IOCTL_DEV) 318 /* clang-format on */ 319 320 /* 321 * @path: Should have been checked by get_path_from_fd(). 322 */ 323 int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, 324 const struct path *const path, 325 access_mask_t access_rights) 326 { 327 int err; 328 struct landlock_id id = { 329 .type = LANDLOCK_KEY_INODE, 330 }; 331 332 /* Files only get access rights that make sense. */ 333 if (!d_is_dir(path->dentry) && 334 !access_mask_subset(access_rights, ACCESS_FILE)) 335 return -EINVAL; 336 if (WARN_ON_ONCE(ruleset->num_layers != 1)) 337 return -EINVAL; 338 339 /* Transforms relative access rights to absolute ones. */ 340 access_rights |= LANDLOCK_MASK_ACCESS_FS & 341 ~landlock_get_fs_access_mask(ruleset, 0); 342 id.key.object = get_inode_object(d_backing_inode(path->dentry)); 343 if (IS_ERR(id.key.object)) 344 return PTR_ERR(id.key.object); 345 mutex_lock(&ruleset->lock); 346 err = landlock_insert_rule(ruleset, id, access_rights); 347 mutex_unlock(&ruleset->lock); 348 /* 349 * No need to check for an error because landlock_insert_rule() 350 * increments the refcount for the new object if needed. 351 */ 352 landlock_put_object(id.key.object); 353 return err; 354 } 355 356 /* Access-control management */ 357 358 /* 359 * The lifetime of the returned rule is tied to @domain. 360 * 361 * Returns NULL if no rule is found or if @dentry is negative. 362 */ 363 static const struct landlock_rule * 364 find_rule(const struct landlock_ruleset *const domain, 365 const struct dentry *const dentry) 366 { 367 const struct landlock_rule *rule; 368 const struct inode *inode; 369 struct landlock_id id = { 370 .type = LANDLOCK_KEY_INODE, 371 }; 372 373 /* Ignores nonexistent leafs. */ 374 if (d_is_negative(dentry)) 375 return NULL; 376 377 inode = d_backing_inode(dentry); 378 rcu_read_lock(); 379 id.key.object = rcu_dereference(landlock_inode(inode)->object); 380 rule = landlock_find_rule(domain, id); 381 rcu_read_unlock(); 382 return rule; 383 } 384 385 /* 386 * Allows access to pseudo filesystems that will never be mountable (e.g. 387 * sockfs, pipefs), but can still be reachable through 388 * /proc/<pid>/fd/<file-descriptor> 389 */ 390 static bool is_nouser_or_private(const struct dentry *dentry) 391 { 392 return (dentry->d_sb->s_flags & SB_NOUSER) || 393 (d_is_positive(dentry) && 394 unlikely(IS_PRIVATE(d_backing_inode(dentry)))); 395 } 396 397 static const struct access_masks any_fs = { 398 .fs = ~0, 399 }; 400 401 /* 402 * Returns true iff the child file with the given src_child access rights under 403 * src_parent would result in having the same or fewer access rights if it were 404 * moved under new_parent. 405 */ 406 static bool may_refer(const struct layer_access_masks *const src_parent, 407 const struct layer_access_masks *const src_child, 408 const struct layer_access_masks *const new_parent, 409 const bool child_is_dir) 410 { 411 for (size_t i = 0; i < ARRAY_SIZE(new_parent->access); i++) { 412 access_mask_t child_access = src_parent->access[i] & 413 src_child->access[i]; 414 access_mask_t parent_access = new_parent->access[i]; 415 416 if (!child_is_dir) { 417 child_access &= ACCESS_FILE; 418 parent_access &= ACCESS_FILE; 419 } 420 421 if (!access_mask_subset(child_access, parent_access)) 422 return false; 423 } 424 return true; 425 } 426 427 /* 428 * Check that a destination file hierarchy has more restrictions than a source 429 * file hierarchy. This is only used for link and rename actions. 430 * 431 * Returns: true if child1 may be moved from parent1 to parent2 without 432 * increasing its access rights. If child2 is set, an additional condition is 433 * that child2 may be used from parent2 to parent1 without increasing its access 434 * rights. 435 */ 436 static bool no_more_access(const struct layer_access_masks *const parent1, 437 const struct layer_access_masks *const child1, 438 const bool child1_is_dir, 439 const struct layer_access_masks *const parent2, 440 const struct layer_access_masks *const child2, 441 const bool child2_is_dir) 442 { 443 if (!may_refer(parent1, child1, parent2, child1_is_dir)) 444 return false; 445 446 if (!child2) 447 return true; 448 449 return may_refer(parent2, child2, parent1, child2_is_dir); 450 } 451 452 #define NMA_TRUE(...) KUNIT_EXPECT_TRUE(test, no_more_access(__VA_ARGS__)) 453 #define NMA_FALSE(...) KUNIT_EXPECT_FALSE(test, no_more_access(__VA_ARGS__)) 454 455 #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST 456 457 static void test_no_more_access(struct kunit *const test) 458 { 459 const struct layer_access_masks rx0 = { 460 .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | 461 LANDLOCK_ACCESS_FS_READ_FILE, 462 }; 463 const struct layer_access_masks mx0 = { 464 .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | 465 LANDLOCK_ACCESS_FS_MAKE_REG, 466 }; 467 const struct layer_access_masks x0 = { 468 .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, 469 }; 470 const struct layer_access_masks x1 = { 471 .access[1] = LANDLOCK_ACCESS_FS_EXECUTE, 472 }; 473 const struct layer_access_masks x01 = { 474 .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, 475 .access[1] = LANDLOCK_ACCESS_FS_EXECUTE, 476 }; 477 const struct layer_access_masks allows_all = {}; 478 479 /* Checks without restriction. */ 480 NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false); 481 NMA_TRUE(&allows_all, &x0, false, &allows_all, NULL, false); 482 NMA_FALSE(&x0, &x0, false, &allows_all, NULL, false); 483 484 /* 485 * Checks that we can only refer a file if no more access could be 486 * inherited. 487 */ 488 NMA_TRUE(&x0, &x0, false, &rx0, NULL, false); 489 NMA_TRUE(&rx0, &rx0, false, &rx0, NULL, false); 490 NMA_FALSE(&rx0, &rx0, false, &x0, NULL, false); 491 NMA_FALSE(&rx0, &rx0, false, &x1, NULL, false); 492 493 /* Checks allowed referring with different nested domains. */ 494 NMA_TRUE(&x0, &x1, false, &x0, NULL, false); 495 NMA_TRUE(&x1, &x0, false, &x0, NULL, false); 496 NMA_TRUE(&x0, &x01, false, &x0, NULL, false); 497 NMA_TRUE(&x0, &x01, false, &rx0, NULL, false); 498 NMA_TRUE(&x01, &x0, false, &x0, NULL, false); 499 NMA_TRUE(&x01, &x0, false, &rx0, NULL, false); 500 NMA_FALSE(&x01, &x01, false, &x0, NULL, false); 501 502 /* Checks that file access rights are also enforced for a directory. */ 503 NMA_FALSE(&rx0, &rx0, true, &x0, NULL, false); 504 505 /* Checks that directory access rights don't impact file referring... */ 506 NMA_TRUE(&mx0, &mx0, false, &x0, NULL, false); 507 /* ...but only directory referring. */ 508 NMA_FALSE(&mx0, &mx0, true, &x0, NULL, false); 509 510 /* Checks directory exchange. */ 511 NMA_TRUE(&mx0, &mx0, true, &mx0, &mx0, true); 512 NMA_TRUE(&mx0, &mx0, true, &mx0, &x0, true); 513 NMA_FALSE(&mx0, &mx0, true, &x0, &mx0, true); 514 NMA_FALSE(&mx0, &mx0, true, &x0, &x0, true); 515 NMA_FALSE(&mx0, &mx0, true, &x1, &x1, true); 516 517 /* Checks file exchange with directory access rights... */ 518 NMA_TRUE(&mx0, &mx0, false, &mx0, &mx0, false); 519 NMA_TRUE(&mx0, &mx0, false, &mx0, &x0, false); 520 NMA_TRUE(&mx0, &mx0, false, &x0, &mx0, false); 521 NMA_TRUE(&mx0, &mx0, false, &x0, &x0, false); 522 /* ...and with file access rights. */ 523 NMA_TRUE(&rx0, &rx0, false, &rx0, &rx0, false); 524 NMA_TRUE(&rx0, &rx0, false, &rx0, &x0, false); 525 NMA_FALSE(&rx0, &rx0, false, &x0, &rx0, false); 526 NMA_FALSE(&rx0, &rx0, false, &x0, &x0, false); 527 NMA_FALSE(&rx0, &rx0, false, &x1, &x1, false); 528 529 /* 530 * Allowing the following requests should not be a security risk 531 * because domain 0 denies execute access, and domain 1 is always 532 * nested with domain 0. However, adding an exception for this case 533 * would mean to check all nested domains to make sure none can get 534 * more privileges (e.g. processes only sandboxed by domain 0). 535 * Moreover, this behavior (i.e. composition of N domains) could then 536 * be inconsistent compared to domain 1's ruleset alone (e.g. it might 537 * be denied to link/rename with domain 1's ruleset, whereas it would 538 * be allowed if nested on top of domain 0). Another drawback would be 539 * to create a cover channel that could enable sandboxed processes to 540 * infer most of the filesystem restrictions from their domain. To 541 * make it simple, efficient, safe, and more consistent, this case is 542 * always denied. 543 */ 544 NMA_FALSE(&x1, &x1, false, &x0, NULL, false); 545 NMA_FALSE(&x1, &x1, false, &rx0, NULL, false); 546 NMA_FALSE(&x1, &x1, true, &x0, NULL, false); 547 NMA_FALSE(&x1, &x1, true, &rx0, NULL, false); 548 549 /* Checks the same case of exclusive domains with a file... */ 550 NMA_TRUE(&x1, &x1, false, &x01, NULL, false); 551 NMA_FALSE(&x1, &x1, false, &x01, &x0, false); 552 NMA_FALSE(&x1, &x1, false, &x01, &x01, false); 553 NMA_FALSE(&x1, &x1, false, &x0, &x0, false); 554 /* ...and with a directory. */ 555 NMA_FALSE(&x1, &x1, false, &x0, &x0, true); 556 NMA_FALSE(&x1, &x1, true, &x0, &x0, false); 557 NMA_FALSE(&x1, &x1, true, &x0, &x0, true); 558 } 559 560 #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ 561 562 #undef NMA_TRUE 563 #undef NMA_FALSE 564 565 static bool is_layer_masks_allowed(const struct layer_access_masks *masks) 566 { 567 return !memchr_inv(&masks->access, 0, sizeof(masks->access)); 568 } 569 570 /* 571 * Removes @masks accesses that are not requested. 572 * 573 * Returns true if the request is allowed, false otherwise. 574 */ 575 static bool scope_to_request(const access_mask_t access_request, 576 struct layer_access_masks *masks) 577 { 578 bool saw_unfulfilled_access = false; 579 580 if (WARN_ON_ONCE(!masks)) 581 return true; 582 583 for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) { 584 masks->access[i] &= access_request; 585 if (masks->access[i]) 586 saw_unfulfilled_access = true; 587 } 588 return !saw_unfulfilled_access; 589 } 590 591 #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST 592 593 static void test_scope_to_request_with_exec_none(struct kunit *const test) 594 { 595 /* Allows everything. */ 596 struct layer_access_masks masks = {}; 597 598 /* Checks and scopes with execute. */ 599 KUNIT_EXPECT_TRUE(test, 600 scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &masks)); 601 KUNIT_EXPECT_EQ(test, 0, masks.access[0]); 602 } 603 604 static void test_scope_to_request_with_exec_some(struct kunit *const test) 605 { 606 /* Denies execute and write. */ 607 struct layer_access_masks masks = { 608 .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, 609 .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE, 610 }; 611 612 /* Checks and scopes with execute. */ 613 KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, 614 &masks)); 615 KUNIT_EXPECT_EQ(test, LANDLOCK_ACCESS_FS_EXECUTE, masks.access[0]); 616 KUNIT_EXPECT_EQ(test, 0, masks.access[1]); 617 } 618 619 static void test_scope_to_request_without_access(struct kunit *const test) 620 { 621 /* Denies execute and write. */ 622 struct layer_access_masks masks = { 623 .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, 624 .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE, 625 }; 626 627 /* Checks and scopes without access request. */ 628 KUNIT_EXPECT_TRUE(test, scope_to_request(0, &masks)); 629 KUNIT_EXPECT_EQ(test, 0, masks.access[0]); 630 KUNIT_EXPECT_EQ(test, 0, masks.access[1]); 631 } 632 633 #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ 634 635 /* 636 * Returns true if there is at least one access right different than 637 * LANDLOCK_ACCESS_FS_REFER. 638 */ 639 static bool is_eacces(const struct layer_access_masks *masks, 640 const access_mask_t access_request) 641 { 642 if (!masks) 643 return false; 644 645 for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) { 646 /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ 647 if (masks->access[i] & access_request & 648 ~LANDLOCK_ACCESS_FS_REFER) 649 return true; 650 } 651 return false; 652 } 653 654 #define IE_TRUE(...) KUNIT_EXPECT_TRUE(test, is_eacces(__VA_ARGS__)) 655 #define IE_FALSE(...) KUNIT_EXPECT_FALSE(test, is_eacces(__VA_ARGS__)) 656 657 #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST 658 659 static void test_is_eacces_with_none(struct kunit *const test) 660 { 661 const struct layer_access_masks masks = {}; 662 663 IE_FALSE(&masks, 0); 664 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); 665 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); 666 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); 667 } 668 669 static void test_is_eacces_with_refer(struct kunit *const test) 670 { 671 const struct layer_access_masks masks = { 672 .access[0] = LANDLOCK_ACCESS_FS_REFER, 673 }; 674 675 IE_FALSE(&masks, 0); 676 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); 677 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); 678 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); 679 } 680 681 static void test_is_eacces_with_write(struct kunit *const test) 682 { 683 const struct layer_access_masks masks = { 684 .access[0] = LANDLOCK_ACCESS_FS_WRITE_FILE, 685 }; 686 687 IE_FALSE(&masks, 0); 688 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); 689 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); 690 691 IE_TRUE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); 692 } 693 694 #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ 695 696 #undef IE_TRUE 697 #undef IE_FALSE 698 699 /** 700 * is_access_to_paths_allowed - Check accesses for requests with a common path 701 * 702 * @domain: Domain to check against. 703 * @path: File hierarchy to walk through. For refer checks, this would be 704 * the common mountpoint. 705 * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is 706 * equal to @layer_masks_parent2 (if any). This is tied to the unique 707 * requested path for most actions, or the source in case of a refer action 708 * (i.e. rename or link), or the source and destination in case of 709 * RENAME_EXCHANGE. 710 * @layer_masks_parent1: Pointer to a matrix of layer masks per access 711 * masks, identifying the layers that forbid a specific access. Bits from 712 * this matrix can be unset according to the @path walk. An empty matrix 713 * means that @domain allows all possible Landlock accesses (i.e. not only 714 * those identified by @access_request_parent1). This matrix can 715 * initially refer to domain layer masks and, when the accesses for the 716 * destination and source are the same, to requested layer masks. 717 * @log_request_parent1: Audit request to fill if the related access is denied. 718 * @dentry_child1: Dentry to the initial child of the parent1 path. This 719 * pointer must be NULL for non-refer actions (i.e. not link nor rename). 720 * @access_request_parent2: Similar to @access_request_parent1 but for a 721 * request involving a source and a destination. This refers to the 722 * destination, except in case of RENAME_EXCHANGE where it also refers to 723 * the source. Must be set to 0 when using a simple path request. 724 * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer 725 * action. This must be NULL otherwise. 726 * @log_request_parent2: Audit request to fill if the related access is denied. 727 * @dentry_child2: Dentry to the initial child of the parent2 path. This 728 * pointer is only set for RENAME_EXCHANGE actions and must be NULL 729 * otherwise. 730 * 731 * This helper first checks that the destination has a superset of restrictions 732 * compared to the source (if any) for a common path. Because of 733 * RENAME_EXCHANGE actions, source and destinations may be swapped. It then 734 * checks that the collected accesses and the remaining ones are enough to 735 * allow the request. 736 * 737 * Returns: 738 * - true if the access request is granted; 739 * - false otherwise. 740 */ 741 static bool 742 is_access_to_paths_allowed(const struct landlock_ruleset *const domain, 743 const struct path *const path, 744 const access_mask_t access_request_parent1, 745 struct layer_access_masks *layer_masks_parent1, 746 struct landlock_request *const log_request_parent1, 747 struct dentry *const dentry_child1, 748 const access_mask_t access_request_parent2, 749 struct layer_access_masks *layer_masks_parent2, 750 struct landlock_request *const log_request_parent2, 751 struct dentry *const dentry_child2) 752 { 753 bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, 754 child1_is_directory = true, child2_is_directory = true; 755 struct path walker_path; 756 access_mask_t access_masked_parent1, access_masked_parent2; 757 struct layer_access_masks _layer_masks_child1, _layer_masks_child2; 758 struct layer_access_masks *layer_masks_child1 = NULL, 759 *layer_masks_child2 = NULL; 760 761 if (!access_request_parent1 && !access_request_parent2) 762 return true; 763 764 if (WARN_ON_ONCE(!path)) 765 return true; 766 767 if (is_nouser_or_private(path->dentry)) 768 return true; 769 770 if (WARN_ON_ONCE(!layer_masks_parent1)) 771 return false; 772 773 allowed_parent1 = is_layer_masks_allowed(layer_masks_parent1); 774 775 if (unlikely(layer_masks_parent2)) { 776 if (WARN_ON_ONCE(!dentry_child1)) 777 return false; 778 779 allowed_parent2 = is_layer_masks_allowed(layer_masks_parent2); 780 781 /* 782 * For a double request, first check for potential privilege 783 * escalation by looking at domain handled accesses (which are 784 * a superset of the meaningful requested accesses). 785 */ 786 access_masked_parent1 = access_masked_parent2 = 787 landlock_union_access_masks(domain).fs; 788 is_dom_check = true; 789 } else { 790 if (WARN_ON_ONCE(dentry_child1 || dentry_child2)) 791 return false; 792 /* For a simple request, only check for requested accesses. */ 793 access_masked_parent1 = access_request_parent1; 794 access_masked_parent2 = access_request_parent2; 795 is_dom_check = false; 796 } 797 798 if (unlikely(dentry_child1)) { 799 if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, 800 &_layer_masks_child1, 801 LANDLOCK_KEY_INODE)) 802 landlock_unmask_layers(find_rule(domain, dentry_child1), 803 &_layer_masks_child1); 804 layer_masks_child1 = &_layer_masks_child1; 805 child1_is_directory = d_is_dir(dentry_child1); 806 } 807 if (unlikely(dentry_child2)) { 808 if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, 809 &_layer_masks_child2, 810 LANDLOCK_KEY_INODE)) 811 landlock_unmask_layers(find_rule(domain, dentry_child2), 812 &_layer_masks_child2); 813 layer_masks_child2 = &_layer_masks_child2; 814 child2_is_directory = d_is_dir(dentry_child2); 815 } 816 817 walker_path = *path; 818 path_get(&walker_path); 819 /* 820 * We need to walk through all the hierarchy to not miss any relevant 821 * restriction. 822 */ 823 while (true) { 824 const struct landlock_rule *rule; 825 826 /* 827 * If at least all accesses allowed on the destination are 828 * already allowed on the source, respectively if there is at 829 * least as much as restrictions on the destination than on the 830 * source, then we can safely refer files from the source to 831 * the destination without risking a privilege escalation. 832 * This also applies in the case of RENAME_EXCHANGE, which 833 * implies checks on both direction. This is crucial for 834 * standalone multilayered security policies. Furthermore, 835 * this helps avoid policy writers to shoot themselves in the 836 * foot. 837 */ 838 if (unlikely(is_dom_check && 839 no_more_access( 840 layer_masks_parent1, layer_masks_child1, 841 child1_is_directory, layer_masks_parent2, 842 layer_masks_child2, 843 child2_is_directory))) { 844 /* 845 * Now, downgrades the remaining checks from domain 846 * handled accesses to requested accesses. 847 */ 848 is_dom_check = false; 849 access_masked_parent1 = access_request_parent1; 850 access_masked_parent2 = access_request_parent2; 851 852 allowed_parent1 = 853 allowed_parent1 || 854 scope_to_request(access_masked_parent1, 855 layer_masks_parent1); 856 allowed_parent2 = 857 allowed_parent2 || 858 scope_to_request(access_masked_parent2, 859 layer_masks_parent2); 860 861 /* Stops when all accesses are granted. */ 862 if (allowed_parent1 && allowed_parent2) 863 break; 864 } 865 866 rule = find_rule(domain, walker_path.dentry); 867 allowed_parent1 = 868 allowed_parent1 || 869 landlock_unmask_layers(rule, layer_masks_parent1); 870 allowed_parent2 = 871 allowed_parent2 || 872 landlock_unmask_layers(rule, layer_masks_parent2); 873 874 /* Stops when a rule from each layer grants access. */ 875 if (allowed_parent1 && allowed_parent2) 876 break; 877 878 jump_up: 879 if (walker_path.dentry == walker_path.mnt->mnt_root) { 880 if (follow_up(&walker_path)) { 881 /* Ignores hidden mount points. */ 882 goto jump_up; 883 } else { 884 /* 885 * Stops at the real root. Denies access 886 * because not all layers have granted access. 887 */ 888 break; 889 } 890 } 891 892 if (unlikely(IS_ROOT(walker_path.dentry))) { 893 if (likely(walker_path.mnt->mnt_flags & MNT_INTERNAL)) { 894 /* 895 * Stops and allows access when reaching disconnected root 896 * directories that are part of internal filesystems (e.g. nsfs, 897 * which is reachable through /proc/<pid>/ns/<namespace>). 898 */ 899 allowed_parent1 = true; 900 allowed_parent2 = true; 901 break; 902 } 903 904 /* 905 * We reached a disconnected root directory from a bind mount. 906 * Let's continue the walk with the mount point we missed. 907 */ 908 dput(walker_path.dentry); 909 walker_path.dentry = walker_path.mnt->mnt_root; 910 dget(walker_path.dentry); 911 } else { 912 struct dentry *const parent_dentry = 913 dget_parent(walker_path.dentry); 914 915 dput(walker_path.dentry); 916 walker_path.dentry = parent_dentry; 917 } 918 } 919 path_put(&walker_path); 920 921 /* 922 * Check CONFIG_AUDIT to enable elision of log_request_parent* and 923 * associated caller's stack variables thanks to dead code elimination. 924 */ 925 #ifdef CONFIG_AUDIT 926 if (!allowed_parent1 && log_request_parent1) { 927 log_request_parent1->type = LANDLOCK_REQUEST_FS_ACCESS; 928 log_request_parent1->audit.type = LSM_AUDIT_DATA_PATH; 929 log_request_parent1->audit.u.path = *path; 930 log_request_parent1->access = access_masked_parent1; 931 log_request_parent1->layer_masks = layer_masks_parent1; 932 } 933 934 if (!allowed_parent2 && log_request_parent2) { 935 log_request_parent2->type = LANDLOCK_REQUEST_FS_ACCESS; 936 log_request_parent2->audit.type = LSM_AUDIT_DATA_PATH; 937 log_request_parent2->audit.u.path = *path; 938 log_request_parent2->access = access_masked_parent2; 939 log_request_parent2->layer_masks = layer_masks_parent2; 940 } 941 #endif /* CONFIG_AUDIT */ 942 943 return allowed_parent1 && allowed_parent2; 944 } 945 946 static int current_check_access_path(const struct path *const path, 947 access_mask_t access_request) 948 { 949 const struct access_masks masks = { 950 .fs = access_request, 951 }; 952 const struct landlock_cred_security *const subject = 953 landlock_get_applicable_subject(current_cred(), masks, NULL); 954 struct layer_access_masks layer_masks; 955 struct landlock_request request = {}; 956 957 if (!subject) 958 return 0; 959 960 access_request = landlock_init_layer_masks(subject->domain, 961 access_request, &layer_masks, 962 LANDLOCK_KEY_INODE); 963 if (is_access_to_paths_allowed(subject->domain, path, access_request, 964 &layer_masks, &request, NULL, 0, NULL, 965 NULL, NULL)) 966 return 0; 967 968 landlock_log_denial(subject, &request); 969 return -EACCES; 970 } 971 972 static __attribute_const__ access_mask_t get_mode_access(const umode_t mode) 973 { 974 switch (mode & S_IFMT) { 975 case S_IFLNK: 976 return LANDLOCK_ACCESS_FS_MAKE_SYM; 977 case S_IFDIR: 978 return LANDLOCK_ACCESS_FS_MAKE_DIR; 979 case S_IFCHR: 980 return LANDLOCK_ACCESS_FS_MAKE_CHAR; 981 case S_IFBLK: 982 return LANDLOCK_ACCESS_FS_MAKE_BLOCK; 983 case S_IFIFO: 984 return LANDLOCK_ACCESS_FS_MAKE_FIFO; 985 case S_IFSOCK: 986 return LANDLOCK_ACCESS_FS_MAKE_SOCK; 987 case S_IFREG: 988 case 0: 989 /* A zero mode translates to S_IFREG. */ 990 default: 991 /* Treats weird files as regular files. */ 992 return LANDLOCK_ACCESS_FS_MAKE_REG; 993 } 994 } 995 996 static access_mask_t maybe_remove(const struct dentry *const dentry) 997 { 998 if (d_is_negative(dentry)) 999 return 0; 1000 return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : 1001 LANDLOCK_ACCESS_FS_REMOVE_FILE; 1002 } 1003 1004 /** 1005 * collect_domain_accesses - Walk through a file path and collect accesses 1006 * 1007 * @domain: Domain to check against. 1008 * @mnt_root: Last directory to check. 1009 * @dir: Directory to start the walk from. 1010 * @layer_masks_dom: Where to store the collected accesses. 1011 * 1012 * This helper is useful to begin a path walk from the @dir directory to a 1013 * @mnt_root directory used as a mount point. This mount point is the common 1014 * ancestor between the source and the destination of a renamed and linked 1015 * file. While walking from @dir to @mnt_root, we record all the domain's 1016 * allowed accesses in @layer_masks_dom. 1017 * 1018 * Because of disconnected directories, this walk may not reach @mnt_dir. In 1019 * this case, the walk will continue to @mnt_dir after this call. 1020 * 1021 * This is similar to is_access_to_paths_allowed() but much simpler because it 1022 * only handles walking on the same mount point and only checks one set of 1023 * accesses. 1024 * 1025 * Returns: 1026 * - true if all the domain access rights are allowed for @dir; 1027 * - false if the walk reached @mnt_root. 1028 */ 1029 static bool collect_domain_accesses(const struct landlock_ruleset *const domain, 1030 const struct dentry *const mnt_root, 1031 struct dentry *dir, 1032 struct layer_access_masks *layer_masks_dom) 1033 { 1034 bool ret = false; 1035 1036 if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) 1037 return true; 1038 if (is_nouser_or_private(dir)) 1039 return true; 1040 1041 if (!landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, 1042 layer_masks_dom, LANDLOCK_KEY_INODE)) 1043 return true; 1044 1045 dget(dir); 1046 while (true) { 1047 struct dentry *parent_dentry; 1048 1049 /* Gets all layers allowing all domain accesses. */ 1050 if (landlock_unmask_layers(find_rule(domain, dir), 1051 layer_masks_dom)) { 1052 /* 1053 * Stops when all handled accesses are allowed by at 1054 * least one rule in each layer. 1055 */ 1056 ret = true; 1057 break; 1058 } 1059 1060 /* 1061 * Stops at the mount point or the filesystem root for a disconnected 1062 * directory. 1063 */ 1064 if (dir == mnt_root || unlikely(IS_ROOT(dir))) 1065 break; 1066 1067 parent_dentry = dget_parent(dir); 1068 dput(dir); 1069 dir = parent_dentry; 1070 } 1071 dput(dir); 1072 return ret; 1073 } 1074 1075 /** 1076 * current_check_refer_path - Check if a rename or link action is allowed 1077 * 1078 * @old_dentry: File or directory requested to be moved or linked. 1079 * @new_dir: Destination parent directory. 1080 * @new_dentry: Destination file or directory. 1081 * @removable: Sets to true if it is a rename operation. 1082 * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE. 1083 * 1084 * Because of its unprivileged constraints, Landlock relies on file hierarchies 1085 * (and not only inodes) to tie access rights to files. Being able to link or 1086 * rename a file hierarchy brings some challenges. Indeed, moving or linking a 1087 * file (i.e. creating a new reference to an inode) can have an impact on the 1088 * actions allowed for a set of files if it would change its parent directory 1089 * (i.e. reparenting). 1090 * 1091 * To avoid trivial access right bypasses, Landlock first checks if the file or 1092 * directory requested to be moved would gain new access rights inherited from 1093 * its new hierarchy. Before returning any error, Landlock then checks that 1094 * the parent source hierarchy and the destination hierarchy would allow the 1095 * link or rename action. If it is not the case, an error with EACCES is 1096 * returned to inform user space that there is no way to remove or create the 1097 * requested source file type. If it should be allowed but the new inherited 1098 * access rights would be greater than the source access rights, then the 1099 * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables 1100 * user space to abort the whole operation if there is no way to do it, or to 1101 * manually copy the source to the destination if this remains allowed, e.g. 1102 * because file creation is allowed on the destination directory but not direct 1103 * linking. 1104 * 1105 * To achieve this goal, the kernel needs to compare two file hierarchies: the 1106 * one identifying the source file or directory (including itself), and the 1107 * destination one. This can be seen as a multilayer partial ordering problem. 1108 * The kernel walks through these paths and collects in a matrix the access 1109 * rights that are denied per layer. These matrices are then compared to see 1110 * if the destination one has more (or the same) restrictions as the source 1111 * one. If this is the case, the requested action will not return EXDEV, which 1112 * doesn't mean the action is allowed. The parent hierarchy of the source 1113 * (i.e. parent directory), and the destination hierarchy must also be checked 1114 * to verify that they explicitly allow such action (i.e. referencing, 1115 * creation and potentially removal rights). The kernel implementation is then 1116 * required to rely on potentially four matrices of access rights: one for the 1117 * source file or directory (i.e. the child), a potentially other one for the 1118 * other source/destination (in case of RENAME_EXCHANGE), one for the source 1119 * parent hierarchy and a last one for the destination hierarchy. These 1120 * ephemeral matrices take some space on the stack, which limits the number of 1121 * layers to a deemed reasonable number: 16. 1122 * 1123 * Returns: 1124 * - 0 if access is allowed; 1125 * - -EXDEV if @old_dentry would inherit new access rights from @new_dir; 1126 * - -EACCES if file removal or creation is denied. 1127 */ 1128 static int current_check_refer_path(struct dentry *const old_dentry, 1129 const struct path *const new_dir, 1130 struct dentry *const new_dentry, 1131 const bool removable, const bool exchange) 1132 { 1133 const struct landlock_cred_security *const subject = 1134 landlock_get_applicable_subject(current_cred(), any_fs, NULL); 1135 bool allow_parent1, allow_parent2; 1136 access_mask_t access_request_parent1, access_request_parent2; 1137 struct path mnt_dir; 1138 struct dentry *old_parent; 1139 struct layer_access_masks layer_masks_parent1 = {}, 1140 layer_masks_parent2 = {}; 1141 struct landlock_request request1 = {}, request2 = {}; 1142 1143 if (!subject) 1144 return 0; 1145 1146 if (unlikely(d_is_negative(old_dentry))) 1147 return -ENOENT; 1148 if (exchange) { 1149 if (unlikely(d_is_negative(new_dentry))) 1150 return -ENOENT; 1151 access_request_parent1 = 1152 get_mode_access(d_backing_inode(new_dentry)->i_mode); 1153 } else { 1154 access_request_parent1 = 0; 1155 } 1156 access_request_parent2 = 1157 get_mode_access(d_backing_inode(old_dentry)->i_mode); 1158 if (removable) { 1159 access_request_parent1 |= maybe_remove(old_dentry); 1160 access_request_parent2 |= maybe_remove(new_dentry); 1161 } 1162 1163 /* The mount points are the same for old and new paths, cf. EXDEV. */ 1164 if (old_dentry->d_parent == new_dir->dentry) { 1165 /* 1166 * The LANDLOCK_ACCESS_FS_REFER access right is not required 1167 * for same-directory referer (i.e. no reparenting). 1168 */ 1169 access_request_parent1 = landlock_init_layer_masks( 1170 subject->domain, 1171 access_request_parent1 | access_request_parent2, 1172 &layer_masks_parent1, LANDLOCK_KEY_INODE); 1173 if (is_access_to_paths_allowed(subject->domain, new_dir, 1174 access_request_parent1, 1175 &layer_masks_parent1, &request1, 1176 NULL, 0, NULL, NULL, NULL)) 1177 return 0; 1178 1179 landlock_log_denial(subject, &request1); 1180 return -EACCES; 1181 } 1182 1183 access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER; 1184 access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER; 1185 1186 /* Saves the common mount point. */ 1187 mnt_dir.mnt = new_dir->mnt; 1188 mnt_dir.dentry = new_dir->mnt->mnt_root; 1189 1190 /* 1191 * old_dentry may be the root of the common mount point and 1192 * !IS_ROOT(old_dentry) at the same time (e.g. with open_tree() and 1193 * OPEN_TREE_CLONE). We do not need to call dget(old_parent) because 1194 * we keep a reference to old_dentry. 1195 */ 1196 old_parent = (old_dentry == mnt_dir.dentry) ? old_dentry : 1197 old_dentry->d_parent; 1198 1199 /* new_dir->dentry is equal to new_dentry->d_parent */ 1200 allow_parent1 = collect_domain_accesses(subject->domain, mnt_dir.dentry, 1201 old_parent, 1202 &layer_masks_parent1); 1203 allow_parent2 = collect_domain_accesses(subject->domain, mnt_dir.dentry, 1204 new_dir->dentry, 1205 &layer_masks_parent2); 1206 1207 if (allow_parent1 && allow_parent2) 1208 return 0; 1209 1210 /* 1211 * To be able to compare source and destination domain access rights, 1212 * take into account the @old_dentry access rights aggregated with its 1213 * parent access rights. This will be useful to compare with the 1214 * destination parent access rights. 1215 */ 1216 if (is_access_to_paths_allowed( 1217 subject->domain, &mnt_dir, access_request_parent1, 1218 &layer_masks_parent1, &request1, old_dentry, 1219 access_request_parent2, &layer_masks_parent2, &request2, 1220 exchange ? new_dentry : NULL)) 1221 return 0; 1222 1223 if (request1.access) { 1224 request1.audit.u.path.dentry = old_parent; 1225 landlock_log_denial(subject, &request1); 1226 } 1227 if (request2.access) { 1228 request2.audit.u.path.dentry = new_dir->dentry; 1229 landlock_log_denial(subject, &request2); 1230 } 1231 1232 /* 1233 * This prioritizes EACCES over EXDEV for all actions, including 1234 * renames with RENAME_EXCHANGE. 1235 */ 1236 if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) || 1237 is_eacces(&layer_masks_parent2, access_request_parent2))) 1238 return -EACCES; 1239 1240 /* 1241 * Gracefully forbids reparenting if the destination directory 1242 * hierarchy is not a superset of restrictions of the source directory 1243 * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the 1244 * source or the destination. 1245 */ 1246 return -EXDEV; 1247 } 1248 1249 /* Inode hooks */ 1250 1251 static void hook_inode_free_security_rcu(void *inode_security) 1252 { 1253 struct landlock_inode_security *inode_sec; 1254 1255 /* 1256 * All inodes must already have been untied from their object by 1257 * release_inode() or hook_sb_delete(). 1258 */ 1259 inode_sec = inode_security + landlock_blob_sizes.lbs_inode; 1260 WARN_ON_ONCE(inode_sec->object); 1261 } 1262 1263 /* Super-block hooks */ 1264 1265 /* 1266 * Release the inodes used in a security policy. 1267 * 1268 * Cf. fsnotify_unmount_inodes() and evict_inodes() 1269 */ 1270 static void hook_sb_delete(struct super_block *const sb) 1271 { 1272 struct inode *inode, *prev_inode = NULL; 1273 1274 if (!landlock_initialized) 1275 return; 1276 1277 spin_lock(&sb->s_inode_list_lock); 1278 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1279 struct landlock_object *object; 1280 1281 /* Only handles referenced inodes. */ 1282 if (!icount_read(inode)) 1283 continue; 1284 1285 /* 1286 * Protects against concurrent modification of inode (e.g. 1287 * from get_inode_object()). 1288 */ 1289 spin_lock(&inode->i_lock); 1290 /* 1291 * Checks I_FREEING and I_WILL_FREE to protect against a race 1292 * condition when release_inode() just called iput(), which 1293 * could lead to a NULL dereference of inode->security or a 1294 * second call to iput() for the same Landlock object. Also 1295 * checks I_NEW because such inode cannot be tied to an object. 1296 */ 1297 if (inode_state_read(inode) & 1298 (I_FREEING | I_WILL_FREE | I_NEW)) { 1299 spin_unlock(&inode->i_lock); 1300 continue; 1301 } 1302 1303 rcu_read_lock(); 1304 object = rcu_dereference(landlock_inode(inode)->object); 1305 if (!object) { 1306 rcu_read_unlock(); 1307 spin_unlock(&inode->i_lock); 1308 continue; 1309 } 1310 /* Keeps a reference to this inode until the next loop walk. */ 1311 __iget(inode); 1312 spin_unlock(&inode->i_lock); 1313 1314 /* 1315 * If there is no concurrent release_inode() ongoing, then we 1316 * are in charge of calling iput() on this inode, otherwise we 1317 * will just wait for it to finish. 1318 */ 1319 spin_lock(&object->lock); 1320 if (object->underobj == inode) { 1321 object->underobj = NULL; 1322 spin_unlock(&object->lock); 1323 rcu_read_unlock(); 1324 1325 /* 1326 * Because object->underobj was not NULL, 1327 * release_inode() and get_inode_object() guarantee 1328 * that it is safe to reset 1329 * landlock_inode(inode)->object while it is not NULL. 1330 * It is therefore not necessary to lock inode->i_lock. 1331 */ 1332 rcu_assign_pointer(landlock_inode(inode)->object, NULL); 1333 /* 1334 * At this point, we own the ihold() reference that was 1335 * originally set up by get_inode_object() and the 1336 * __iget() reference that we just set in this loop 1337 * walk. Therefore there are at least two references 1338 * on the inode. 1339 */ 1340 iput_not_last(inode); 1341 } else { 1342 spin_unlock(&object->lock); 1343 rcu_read_unlock(); 1344 } 1345 1346 if (prev_inode) { 1347 /* 1348 * At this point, we still own the __iget() reference 1349 * that we just set in this loop walk. Therefore we 1350 * can drop the list lock and know that the inode won't 1351 * disappear from under us until the next loop walk. 1352 */ 1353 spin_unlock(&sb->s_inode_list_lock); 1354 /* 1355 * We can now actually put the inode reference from the 1356 * previous loop walk, which is not needed anymore. 1357 */ 1358 iput(prev_inode); 1359 cond_resched(); 1360 spin_lock(&sb->s_inode_list_lock); 1361 } 1362 prev_inode = inode; 1363 } 1364 spin_unlock(&sb->s_inode_list_lock); 1365 1366 /* Puts the inode reference from the last loop walk, if any. */ 1367 if (prev_inode) 1368 iput(prev_inode); 1369 /* Waits for pending iput() in release_inode(). */ 1370 wait_var_event(&landlock_superblock(sb)->inode_refs, 1371 !atomic_long_read(&landlock_superblock(sb)->inode_refs)); 1372 } 1373 1374 static void 1375 log_fs_change_topology_path(const struct landlock_cred_security *const subject, 1376 size_t handle_layer, const struct path *const path) 1377 { 1378 landlock_log_denial(subject, &(struct landlock_request) { 1379 .type = LANDLOCK_REQUEST_FS_CHANGE_TOPOLOGY, 1380 .audit = { 1381 .type = LSM_AUDIT_DATA_PATH, 1382 .u.path = *path, 1383 }, 1384 .layer_plus_one = handle_layer + 1, 1385 }); 1386 } 1387 1388 static void log_fs_change_topology_dentry( 1389 const struct landlock_cred_security *const subject, size_t handle_layer, 1390 struct dentry *const dentry) 1391 { 1392 landlock_log_denial(subject, &(struct landlock_request) { 1393 .type = LANDLOCK_REQUEST_FS_CHANGE_TOPOLOGY, 1394 .audit = { 1395 .type = LSM_AUDIT_DATA_DENTRY, 1396 .u.dentry = dentry, 1397 }, 1398 .layer_plus_one = handle_layer + 1, 1399 }); 1400 } 1401 1402 /* 1403 * Because a Landlock security policy is defined according to the filesystem 1404 * topology (i.e. the mount namespace), changing it may grant access to files 1405 * not previously allowed. 1406 * 1407 * To make it simple, deny any filesystem topology modification by landlocked 1408 * processes. Non-landlocked processes may still change the namespace of a 1409 * landlocked process, but this kind of threat must be handled by a system-wide 1410 * access-control security policy. 1411 * 1412 * This could be lifted in the future if Landlock can safely handle mount 1413 * namespace updates requested by a landlocked process. Indeed, we could 1414 * update the current domain (which is currently read-only) by taking into 1415 * account the accesses of the source and the destination of a new mount point. 1416 * However, it would also require to make all the child domains dynamically 1417 * inherit these new constraints. Anyway, for backward compatibility reasons, 1418 * a dedicated user space option would be required (e.g. as a ruleset flag). 1419 */ 1420 static int hook_sb_mount(const char *const dev_name, 1421 const struct path *const path, const char *const type, 1422 const unsigned long flags, void *const data) 1423 { 1424 size_t handle_layer; 1425 const struct landlock_cred_security *const subject = 1426 landlock_get_applicable_subject(current_cred(), any_fs, 1427 &handle_layer); 1428 1429 if (!subject) 1430 return 0; 1431 1432 log_fs_change_topology_path(subject, handle_layer, path); 1433 return -EPERM; 1434 } 1435 1436 static int hook_move_mount(const struct path *const from_path, 1437 const struct path *const to_path) 1438 { 1439 size_t handle_layer; 1440 const struct landlock_cred_security *const subject = 1441 landlock_get_applicable_subject(current_cred(), any_fs, 1442 &handle_layer); 1443 1444 if (!subject) 1445 return 0; 1446 1447 log_fs_change_topology_path(subject, handle_layer, to_path); 1448 return -EPERM; 1449 } 1450 1451 /* 1452 * Removing a mount point may reveal a previously hidden file hierarchy, which 1453 * may then grant access to files, which may have previously been forbidden. 1454 */ 1455 static int hook_sb_umount(struct vfsmount *const mnt, const int flags) 1456 { 1457 size_t handle_layer; 1458 const struct landlock_cred_security *const subject = 1459 landlock_get_applicable_subject(current_cred(), any_fs, 1460 &handle_layer); 1461 1462 if (!subject) 1463 return 0; 1464 1465 log_fs_change_topology_dentry(subject, handle_layer, mnt->mnt_root); 1466 return -EPERM; 1467 } 1468 1469 static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts) 1470 { 1471 size_t handle_layer; 1472 const struct landlock_cred_security *const subject = 1473 landlock_get_applicable_subject(current_cred(), any_fs, 1474 &handle_layer); 1475 1476 if (!subject) 1477 return 0; 1478 1479 log_fs_change_topology_dentry(subject, handle_layer, sb->s_root); 1480 return -EPERM; 1481 } 1482 1483 /* 1484 * pivot_root(2), like mount(2), changes the current mount namespace. It must 1485 * then be forbidden for a landlocked process. 1486 * 1487 * However, chroot(2) may be allowed because it only changes the relative root 1488 * directory of the current process. Moreover, it can be used to restrict the 1489 * view of the filesystem. 1490 */ 1491 static int hook_sb_pivotroot(const struct path *const old_path, 1492 const struct path *const new_path) 1493 { 1494 size_t handle_layer; 1495 const struct landlock_cred_security *const subject = 1496 landlock_get_applicable_subject(current_cred(), any_fs, 1497 &handle_layer); 1498 1499 if (!subject) 1500 return 0; 1501 1502 log_fs_change_topology_path(subject, handle_layer, new_path); 1503 return -EPERM; 1504 } 1505 1506 /* Path hooks */ 1507 1508 static int hook_path_link(struct dentry *const old_dentry, 1509 const struct path *const new_dir, 1510 struct dentry *const new_dentry) 1511 { 1512 return current_check_refer_path(old_dentry, new_dir, new_dentry, false, 1513 false); 1514 } 1515 1516 static int hook_path_rename(const struct path *const old_dir, 1517 struct dentry *const old_dentry, 1518 const struct path *const new_dir, 1519 struct dentry *const new_dentry, 1520 const unsigned int flags) 1521 { 1522 /* old_dir refers to old_dentry->d_parent and new_dir->mnt */ 1523 return current_check_refer_path(old_dentry, new_dir, new_dentry, true, 1524 !!(flags & RENAME_EXCHANGE)); 1525 } 1526 1527 static int hook_path_mkdir(const struct path *const dir, 1528 struct dentry *const dentry, const umode_t mode) 1529 { 1530 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR); 1531 } 1532 1533 static int hook_path_mknod(const struct path *const dir, 1534 struct dentry *const dentry, const umode_t mode, 1535 const unsigned int dev) 1536 { 1537 return current_check_access_path(dir, get_mode_access(mode)); 1538 } 1539 1540 static int hook_path_symlink(const struct path *const dir, 1541 struct dentry *const dentry, 1542 const char *const old_name) 1543 { 1544 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM); 1545 } 1546 1547 static int hook_path_unlink(const struct path *const dir, 1548 struct dentry *const dentry) 1549 { 1550 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE); 1551 } 1552 1553 static int hook_path_rmdir(const struct path *const dir, 1554 struct dentry *const dentry) 1555 { 1556 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR); 1557 } 1558 1559 static int hook_path_truncate(const struct path *const path) 1560 { 1561 return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE); 1562 } 1563 1564 /* File hooks */ 1565 1566 /** 1567 * get_required_file_open_access - Get access needed to open a file 1568 * 1569 * @file: File being opened. 1570 * 1571 * Returns the access rights that are required for opening the given file, 1572 * depending on the file type and open mode. 1573 */ 1574 static access_mask_t 1575 get_required_file_open_access(const struct file *const file) 1576 { 1577 access_mask_t access = 0; 1578 1579 if (file->f_mode & FMODE_READ) { 1580 /* A directory can only be opened in read mode. */ 1581 if (S_ISDIR(file_inode(file)->i_mode)) 1582 return LANDLOCK_ACCESS_FS_READ_DIR; 1583 access = LANDLOCK_ACCESS_FS_READ_FILE; 1584 } 1585 if (file->f_mode & FMODE_WRITE) 1586 access |= LANDLOCK_ACCESS_FS_WRITE_FILE; 1587 /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */ 1588 if (file->f_flags & __FMODE_EXEC) 1589 access |= LANDLOCK_ACCESS_FS_EXECUTE; 1590 return access; 1591 } 1592 1593 static int hook_file_alloc_security(struct file *const file) 1594 { 1595 /* 1596 * Grants all access rights, even if most of them are not checked later 1597 * on. It is more consistent. 1598 * 1599 * Notably, file descriptors for regular files can also be acquired 1600 * without going through the file_open hook, for example when using 1601 * memfd_create(2). 1602 */ 1603 landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS; 1604 return 0; 1605 } 1606 1607 static bool is_device(const struct file *const file) 1608 { 1609 const struct inode *inode = file_inode(file); 1610 1611 return S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); 1612 } 1613 1614 static int hook_file_open(struct file *const file) 1615 { 1616 struct layer_access_masks layer_masks = {}; 1617 access_mask_t open_access_request, full_access_request, allowed_access, 1618 optional_access; 1619 const struct landlock_cred_security *const subject = 1620 landlock_get_applicable_subject(file->f_cred, any_fs, NULL); 1621 struct landlock_request request = {}; 1622 1623 if (!subject) 1624 return 0; 1625 1626 /* 1627 * Because a file may be opened with O_PATH, get_required_file_open_access() 1628 * may return 0. This case will be handled with a future Landlock 1629 * evolution. 1630 */ 1631 open_access_request = get_required_file_open_access(file); 1632 1633 /* 1634 * We look up more access than what we immediately need for open(), so 1635 * that we can later authorize operations on opened files. 1636 */ 1637 optional_access = LANDLOCK_ACCESS_FS_TRUNCATE; 1638 if (is_device(file)) 1639 optional_access |= LANDLOCK_ACCESS_FS_IOCTL_DEV; 1640 1641 full_access_request = open_access_request | optional_access; 1642 1643 if (is_access_to_paths_allowed( 1644 subject->domain, &file->f_path, 1645 landlock_init_layer_masks(subject->domain, 1646 full_access_request, &layer_masks, 1647 LANDLOCK_KEY_INODE), 1648 &layer_masks, &request, NULL, 0, NULL, NULL, NULL)) { 1649 allowed_access = full_access_request; 1650 } else { 1651 /* 1652 * Calculate the actual allowed access rights from layer_masks. 1653 * Remove the access rights from the full access request which 1654 * are still unfulfilled in any of the layers. 1655 */ 1656 allowed_access = full_access_request; 1657 for (size_t i = 0; i < ARRAY_SIZE(layer_masks.access); i++) 1658 allowed_access &= ~layer_masks.access[i]; 1659 } 1660 1661 /* 1662 * For operations on already opened files (i.e. ftruncate()), it is the 1663 * access rights at the time of open() which decide whether the 1664 * operation is permitted. Therefore, we record the relevant subset of 1665 * file access rights in the opened struct file. 1666 */ 1667 landlock_file(file)->allowed_access = allowed_access; 1668 #ifdef CONFIG_AUDIT 1669 landlock_file(file)->deny_masks = landlock_get_deny_masks( 1670 _LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks); 1671 #endif /* CONFIG_AUDIT */ 1672 1673 if (access_mask_subset(open_access_request, allowed_access)) 1674 return 0; 1675 1676 /* Sets access to reflect the actual request. */ 1677 request.access = open_access_request; 1678 landlock_log_denial(subject, &request); 1679 return -EACCES; 1680 } 1681 1682 static int hook_file_truncate(struct file *const file) 1683 { 1684 /* 1685 * Allows truncation if the truncate right was available at the time of 1686 * opening the file, to get a consistent access check as for read, write 1687 * and execute operations. 1688 * 1689 * Note: For checks done based on the file's Landlock allowed access, we 1690 * enforce them independently of whether the current thread is in a 1691 * Landlock domain, so that open files passed between independent 1692 * processes retain their behaviour. 1693 */ 1694 if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE) 1695 return 0; 1696 1697 landlock_log_denial(landlock_cred(file->f_cred), &(struct landlock_request) { 1698 .type = LANDLOCK_REQUEST_FS_ACCESS, 1699 .audit = { 1700 .type = LSM_AUDIT_DATA_FILE, 1701 .u.file = file, 1702 }, 1703 .all_existing_optional_access = _LANDLOCK_ACCESS_FS_OPTIONAL, 1704 .access = LANDLOCK_ACCESS_FS_TRUNCATE, 1705 #ifdef CONFIG_AUDIT 1706 .deny_masks = landlock_file(file)->deny_masks, 1707 #endif /* CONFIG_AUDIT */ 1708 }); 1709 return -EACCES; 1710 } 1711 1712 static int hook_file_ioctl_common(const struct file *const file, 1713 const unsigned int cmd, const bool is_compat) 1714 { 1715 access_mask_t allowed_access = landlock_file(file)->allowed_access; 1716 1717 /* 1718 * It is the access rights at the time of opening the file which 1719 * determine whether IOCTL can be used on the opened file later. 1720 * 1721 * The access right is attached to the opened file in hook_file_open(). 1722 */ 1723 if (allowed_access & LANDLOCK_ACCESS_FS_IOCTL_DEV) 1724 return 0; 1725 1726 if (!is_device(file)) 1727 return 0; 1728 1729 if (unlikely(is_compat) ? is_masked_device_ioctl_compat(cmd) : 1730 is_masked_device_ioctl(cmd)) 1731 return 0; 1732 1733 landlock_log_denial(landlock_cred(file->f_cred), &(struct landlock_request) { 1734 .type = LANDLOCK_REQUEST_FS_ACCESS, 1735 .audit = { 1736 .type = LSM_AUDIT_DATA_IOCTL_OP, 1737 .u.op = &(struct lsm_ioctlop_audit) { 1738 .path = file->f_path, 1739 .cmd = cmd, 1740 }, 1741 }, 1742 .all_existing_optional_access = _LANDLOCK_ACCESS_FS_OPTIONAL, 1743 .access = LANDLOCK_ACCESS_FS_IOCTL_DEV, 1744 #ifdef CONFIG_AUDIT 1745 .deny_masks = landlock_file(file)->deny_masks, 1746 #endif /* CONFIG_AUDIT */ 1747 }); 1748 return -EACCES; 1749 } 1750 1751 static int hook_file_ioctl(struct file *file, unsigned int cmd, 1752 unsigned long arg) 1753 { 1754 return hook_file_ioctl_common(file, cmd, false); 1755 } 1756 1757 static int hook_file_ioctl_compat(struct file *file, unsigned int cmd, 1758 unsigned long arg) 1759 { 1760 return hook_file_ioctl_common(file, cmd, true); 1761 } 1762 1763 /* 1764 * Always allow sending signals between threads of the same process. This 1765 * ensures consistency with hook_task_kill(). 1766 */ 1767 static bool control_current_fowner(struct fown_struct *const fown) 1768 { 1769 struct task_struct *p; 1770 1771 /* 1772 * Lock already held by __f_setown(), see commit 26f204380a3c ("fs: Fix 1773 * file_set_fowner LSM hook inconsistencies"). 1774 */ 1775 lockdep_assert_held(&fown->lock); 1776 1777 /* 1778 * Some callers (e.g. fcntl_dirnotify) may not be in an RCU read-side 1779 * critical section. 1780 */ 1781 guard(rcu)(); 1782 p = pid_task(fown->pid, fown->pid_type); 1783 if (!p) 1784 return true; 1785 1786 return !same_thread_group(p, current); 1787 } 1788 1789 static void hook_file_set_fowner(struct file *file) 1790 { 1791 struct landlock_ruleset *prev_dom; 1792 struct landlock_cred_security fown_subject = {}; 1793 size_t fown_layer = 0; 1794 1795 if (control_current_fowner(file_f_owner(file))) { 1796 static const struct access_masks signal_scope = { 1797 .scope = LANDLOCK_SCOPE_SIGNAL, 1798 }; 1799 const struct landlock_cred_security *new_subject = 1800 landlock_get_applicable_subject( 1801 current_cred(), signal_scope, &fown_layer); 1802 if (new_subject) { 1803 landlock_get_ruleset(new_subject->domain); 1804 fown_subject = *new_subject; 1805 } 1806 } 1807 1808 prev_dom = landlock_file(file)->fown_subject.domain; 1809 landlock_file(file)->fown_subject = fown_subject; 1810 #ifdef CONFIG_AUDIT 1811 landlock_file(file)->fown_layer = fown_layer; 1812 #endif /* CONFIG_AUDIT*/ 1813 1814 /* May be called in an RCU read-side critical section. */ 1815 landlock_put_ruleset_deferred(prev_dom); 1816 } 1817 1818 static void hook_file_free_security(struct file *file) 1819 { 1820 landlock_put_ruleset_deferred(landlock_file(file)->fown_subject.domain); 1821 } 1822 1823 static struct security_hook_list landlock_hooks[] __ro_after_init = { 1824 LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu), 1825 1826 LSM_HOOK_INIT(sb_delete, hook_sb_delete), 1827 LSM_HOOK_INIT(sb_mount, hook_sb_mount), 1828 LSM_HOOK_INIT(move_mount, hook_move_mount), 1829 LSM_HOOK_INIT(sb_umount, hook_sb_umount), 1830 LSM_HOOK_INIT(sb_remount, hook_sb_remount), 1831 LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot), 1832 1833 LSM_HOOK_INIT(path_link, hook_path_link), 1834 LSM_HOOK_INIT(path_rename, hook_path_rename), 1835 LSM_HOOK_INIT(path_mkdir, hook_path_mkdir), 1836 LSM_HOOK_INIT(path_mknod, hook_path_mknod), 1837 LSM_HOOK_INIT(path_symlink, hook_path_symlink), 1838 LSM_HOOK_INIT(path_unlink, hook_path_unlink), 1839 LSM_HOOK_INIT(path_rmdir, hook_path_rmdir), 1840 LSM_HOOK_INIT(path_truncate, hook_path_truncate), 1841 1842 LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security), 1843 LSM_HOOK_INIT(file_open, hook_file_open), 1844 LSM_HOOK_INIT(file_truncate, hook_file_truncate), 1845 LSM_HOOK_INIT(file_ioctl, hook_file_ioctl), 1846 LSM_HOOK_INIT(file_ioctl_compat, hook_file_ioctl_compat), 1847 LSM_HOOK_INIT(file_set_fowner, hook_file_set_fowner), 1848 LSM_HOOK_INIT(file_free_security, hook_file_free_security), 1849 }; 1850 1851 __init void landlock_add_fs_hooks(void) 1852 { 1853 security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), 1854 &landlock_lsmid); 1855 } 1856 1857 #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST 1858 1859 /* clang-format off */ 1860 static struct kunit_case test_cases[] = { 1861 KUNIT_CASE(test_no_more_access), 1862 KUNIT_CASE(test_scope_to_request_with_exec_none), 1863 KUNIT_CASE(test_scope_to_request_with_exec_some), 1864 KUNIT_CASE(test_scope_to_request_without_access), 1865 KUNIT_CASE(test_is_eacces_with_none), 1866 KUNIT_CASE(test_is_eacces_with_refer), 1867 KUNIT_CASE(test_is_eacces_with_write), 1868 {} 1869 }; 1870 /* clang-format on */ 1871 1872 static struct kunit_suite test_suite = { 1873 .name = "landlock_fs", 1874 .test_cases = test_cases, 1875 }; 1876 1877 kunit_test_suite(test_suite); 1878 1879 #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ 1880