1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Landlock - Filesystem management and hooks 4 * 5 * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> 6 * Copyright © 2018-2020 ANSSI 7 * Copyright © 2021-2025 Microsoft Corporation 8 * Copyright © 2022 Günther Noack <gnoack3000@gmail.com> 9 * Copyright © 2023-2024 Google LLC 10 */ 11 12 #include <asm/ioctls.h> 13 #include <kunit/test.h> 14 #include <linux/atomic.h> 15 #include <linux/bitops.h> 16 #include <linux/bits.h> 17 #include <linux/compiler_types.h> 18 #include <linux/dcache.h> 19 #include <linux/err.h> 20 #include <linux/falloc.h> 21 #include <linux/fs.h> 22 #include <linux/init.h> 23 #include <linux/kernel.h> 24 #include <linux/limits.h> 25 #include <linux/list.h> 26 #include <linux/lsm_audit.h> 27 #include <linux/lsm_hooks.h> 28 #include <linux/mount.h> 29 #include <linux/namei.h> 30 #include <linux/net.h> 31 #include <linux/path.h> 32 #include <linux/pid.h> 33 #include <linux/rcupdate.h> 34 #include <linux/sched/signal.h> 35 #include <linux/spinlock.h> 36 #include <linux/stat.h> 37 #include <linux/types.h> 38 #include <linux/wait_bit.h> 39 #include <linux/workqueue.h> 40 #include <net/af_unix.h> 41 #include <uapi/linux/fiemap.h> 42 #include <uapi/linux/landlock.h> 43 44 #include "access.h" 45 #include "audit.h" 46 #include "common.h" 47 #include "cred.h" 48 #include "domain.h" 49 #include "fs.h" 50 #include "limits.h" 51 #include "object.h" 52 #include "ruleset.h" 53 #include "setup.h" 54 55 /* Underlying object management */ 56 57 static void release_inode(struct landlock_object *const object) 58 __releases(object->lock) 59 { 60 struct inode *const inode = object->underobj; 61 struct super_block *sb; 62 63 if (!inode) { 64 spin_unlock(&object->lock); 65 return; 66 } 67 68 /* 69 * Protects against concurrent use by hook_sb_delete() of the reference 70 * to the underlying inode. 71 */ 72 object->underobj = NULL; 73 /* 74 * Makes sure that if the filesystem is concurrently unmounted, 75 * hook_sb_delete() will wait for us to finish iput(). 76 */ 77 sb = inode->i_sb; 78 atomic_long_inc(&landlock_superblock(sb)->inode_refs); 79 spin_unlock(&object->lock); 80 /* 81 * Because object->underobj was not NULL, hook_sb_delete() and 82 * get_inode_object() guarantee that it is safe to reset 83 * landlock_inode(inode)->object while it is not NULL. It is therefore 84 * not necessary to lock inode->i_lock. 85 */ 86 rcu_assign_pointer(landlock_inode(inode)->object, NULL); 87 /* 88 * Now, new rules can safely be tied to @inode with get_inode_object(). 89 */ 90 91 iput(inode); 92 if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs)) 93 wake_up_var(&landlock_superblock(sb)->inode_refs); 94 } 95 96 static const struct landlock_object_underops landlock_fs_underops = { 97 .release = release_inode 98 }; 99 100 /* IOCTL helpers */ 101 102 /** 103 * is_masked_device_ioctl - Determine whether an IOCTL command is always 104 * permitted with Landlock for device files. These commands can not be 105 * restricted on device files by enforcing a Landlock policy. 106 * 107 * @cmd: The IOCTL command that is supposed to be run. 108 * 109 * By default, any IOCTL on a device file requires the 110 * LANDLOCK_ACCESS_FS_IOCTL_DEV right. However, we blanket-permit some 111 * commands, if: 112 * 113 * 1. The command is implemented in fs/ioctl.c's do_vfs_ioctl(), 114 * not in f_ops->unlocked_ioctl() or f_ops->compat_ioctl(). 115 * 116 * 2. The command is harmless when invoked on devices. 117 * 118 * We also permit commands that do not make sense for devices, but where the 119 * do_vfs_ioctl() implementation returns a more conventional error code. 120 * 121 * Any new IOCTL commands that are implemented in fs/ioctl.c's do_vfs_ioctl() 122 * should be considered for inclusion here. 123 * 124 * Return: True if the IOCTL @cmd can not be restricted with Landlock for 125 * device files, false otherwise. 126 */ 127 static __attribute_const__ bool is_masked_device_ioctl(const unsigned int cmd) 128 { 129 switch (cmd) { 130 /* 131 * FIOCLEX, FIONCLEX, FIONBIO and FIOASYNC manipulate the FD's 132 * close-on-exec and the file's buffered-IO and async flags. These 133 * operations are also available through fcntl(2), and are 134 * unconditionally permitted in Landlock. 135 */ 136 case FIOCLEX: 137 case FIONCLEX: 138 case FIONBIO: 139 case FIOASYNC: 140 /* 141 * FIOQSIZE queries the size of a regular file, directory, or link. 142 * 143 * We still permit it, because it always returns -ENOTTY for 144 * other file types. 145 */ 146 case FIOQSIZE: 147 /* 148 * FIFREEZE and FITHAW freeze and thaw the file system which the 149 * given file belongs to. Requires CAP_SYS_ADMIN. 150 * 151 * These commands operate on the file system's superblock rather 152 * than on the file itself. The same operations can also be 153 * done through any other file or directory on the same file 154 * system, so it is safe to permit these. 155 */ 156 case FIFREEZE: 157 case FITHAW: 158 /* 159 * FS_IOC_FIEMAP queries information about the allocation of 160 * blocks within a file. 161 * 162 * This IOCTL command only makes sense for regular files and is 163 * not implemented by devices. It is harmless to permit. 164 */ 165 case FS_IOC_FIEMAP: 166 /* 167 * FIGETBSZ queries the file system's block size for a file or 168 * directory. 169 * 170 * This command operates on the file system's superblock rather 171 * than on the file itself. The same operation can also be done 172 * through any other file or directory on the same file system, 173 * so it is safe to permit it. 174 */ 175 case FIGETBSZ: 176 /* 177 * FICLONE, FICLONERANGE and FIDEDUPERANGE make files share 178 * their underlying storage ("reflink") between source and 179 * destination FDs, on file systems which support that. 180 * 181 * These IOCTL commands only apply to regular files 182 * and are harmless to permit for device files. 183 */ 184 case FICLONE: 185 case FICLONERANGE: 186 case FIDEDUPERANGE: 187 /* 188 * FS_IOC_GETFSUUID and FS_IOC_GETFSSYSFSPATH both operate on 189 * the file system superblock, not on the specific file, so 190 * these operations are available through any other file on the 191 * same file system as well. 192 */ 193 case FS_IOC_GETFSUUID: 194 case FS_IOC_GETFSSYSFSPATH: 195 return true; 196 197 /* 198 * FIONREAD, FS_IOC_GETFLAGS, FS_IOC_SETFLAGS, FS_IOC_FSGETXATTR and 199 * FS_IOC_FSSETXATTR are forwarded to device implementations. 200 */ 201 202 /* 203 * file_ioctl() commands (FIBMAP, FS_IOC_RESVSP, FS_IOC_RESVSP64, 204 * FS_IOC_UNRESVSP, FS_IOC_UNRESVSP64 and FS_IOC_ZERO_RANGE) are 205 * forwarded to device implementations, so not permitted. 206 */ 207 208 /* Other commands are guarded by the access right. */ 209 default: 210 return false; 211 } 212 } 213 214 /* 215 * is_masked_device_ioctl_compat - same as the helper above, but checking the 216 * "compat" IOCTL commands. 217 * 218 * The IOCTL commands with special handling in compat-mode should behave the 219 * same as their non-compat counterparts. 220 */ 221 static __attribute_const__ bool 222 is_masked_device_ioctl_compat(const unsigned int cmd) 223 { 224 switch (cmd) { 225 /* FICLONE is permitted, same as in the non-compat variant. */ 226 case FICLONE: 227 return true; 228 229 #if defined(CONFIG_X86_64) 230 /* 231 * FS_IOC_RESVSP_32, FS_IOC_RESVSP64_32, FS_IOC_UNRESVSP_32, 232 * FS_IOC_UNRESVSP64_32, FS_IOC_ZERO_RANGE_32: not blanket-permitted, 233 * for consistency with their non-compat variants. 234 */ 235 case FS_IOC_RESVSP_32: 236 case FS_IOC_RESVSP64_32: 237 case FS_IOC_UNRESVSP_32: 238 case FS_IOC_UNRESVSP64_32: 239 case FS_IOC_ZERO_RANGE_32: 240 #endif 241 242 /* 243 * FS_IOC32_GETFLAGS, FS_IOC32_SETFLAGS are forwarded to their device 244 * implementations. 245 */ 246 case FS_IOC32_GETFLAGS: 247 case FS_IOC32_SETFLAGS: 248 return false; 249 default: 250 return is_masked_device_ioctl(cmd); 251 } 252 } 253 254 /* Ruleset management */ 255 256 static struct landlock_object *get_inode_object(struct inode *const inode) 257 { 258 struct landlock_object *object, *new_object; 259 struct landlock_inode_security *inode_sec = landlock_inode(inode); 260 261 rcu_read_lock(); 262 retry: 263 object = rcu_dereference(inode_sec->object); 264 if (object) { 265 if (likely(refcount_inc_not_zero(&object->usage))) { 266 rcu_read_unlock(); 267 return object; 268 } 269 /* 270 * We are racing with release_inode(), the object is going 271 * away. Wait for release_inode(), then retry. 272 */ 273 spin_lock(&object->lock); 274 spin_unlock(&object->lock); 275 goto retry; 276 } 277 rcu_read_unlock(); 278 279 /* 280 * If there is no object tied to @inode, then create a new one (without 281 * holding any locks). 282 */ 283 new_object = landlock_create_object(&landlock_fs_underops, inode); 284 if (IS_ERR(new_object)) 285 return new_object; 286 287 /* 288 * Protects against concurrent calls to get_inode_object() or 289 * hook_sb_delete(). 290 */ 291 spin_lock(&inode->i_lock); 292 if (unlikely(rcu_access_pointer(inode_sec->object))) { 293 /* Someone else just created the object, bail out and retry. */ 294 spin_unlock(&inode->i_lock); 295 kfree(new_object); 296 297 rcu_read_lock(); 298 goto retry; 299 } 300 301 /* 302 * @inode will be released by hook_sb_delete() on its superblock 303 * shutdown, or by release_inode() when no more ruleset references the 304 * related object. 305 */ 306 ihold(inode); 307 rcu_assign_pointer(inode_sec->object, new_object); 308 spin_unlock(&inode->i_lock); 309 return new_object; 310 } 311 312 /* All access rights that can be tied to files. */ 313 /* clang-format off */ 314 #define ACCESS_FILE ( \ 315 LANDLOCK_ACCESS_FS_EXECUTE | \ 316 LANDLOCK_ACCESS_FS_WRITE_FILE | \ 317 LANDLOCK_ACCESS_FS_READ_FILE | \ 318 LANDLOCK_ACCESS_FS_TRUNCATE | \ 319 LANDLOCK_ACCESS_FS_IOCTL_DEV | \ 320 LANDLOCK_ACCESS_FS_RESOLVE_UNIX) 321 /* clang-format on */ 322 323 /* 324 * @path: Should have been checked by get_path_from_fd(). 325 */ 326 int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, 327 const struct path *const path, 328 access_mask_t access_rights, const u32 flags) 329 { 330 int err; 331 struct landlock_id id = { 332 .type = LANDLOCK_KEY_INODE, 333 }; 334 335 /* Files only get access rights that make sense. */ 336 if (!d_is_dir(path->dentry) && 337 !access_mask_subset(access_rights, ACCESS_FILE)) 338 return -EINVAL; 339 if (WARN_ON_ONCE(ruleset->num_layers != 1)) 340 return -EINVAL; 341 342 /* Transforms relative access rights to absolute ones. */ 343 access_rights |= LANDLOCK_MASK_ACCESS_FS & 344 ~landlock_get_fs_access_mask(ruleset, 0); 345 id.key.object = get_inode_object(d_backing_inode(path->dentry)); 346 if (IS_ERR(id.key.object)) 347 return PTR_ERR(id.key.object); 348 mutex_lock(&ruleset->lock); 349 err = landlock_insert_rule(ruleset, id, access_rights, flags); 350 mutex_unlock(&ruleset->lock); 351 /* 352 * No need to check for an error because landlock_insert_rule() 353 * increments the refcount for the new object if needed. 354 */ 355 landlock_put_object(id.key.object); 356 return err; 357 } 358 359 /* Access-control management */ 360 361 /* 362 * The lifetime of the returned rule is tied to @domain. 363 * 364 * Returns NULL if no rule is found or if @dentry is negative. 365 */ 366 static const struct landlock_rule * 367 find_rule(const struct landlock_ruleset *const domain, 368 const struct dentry *const dentry) 369 { 370 const struct landlock_rule *rule; 371 const struct inode *inode; 372 struct landlock_id id = { 373 .type = LANDLOCK_KEY_INODE, 374 }; 375 376 /* Ignores nonexistent leafs. */ 377 if (d_is_negative(dentry)) 378 return NULL; 379 380 inode = d_backing_inode(dentry); 381 rcu_read_lock(); 382 id.key.object = rcu_dereference(landlock_inode(inode)->object); 383 rule = landlock_find_rule(domain, id); 384 rcu_read_unlock(); 385 return rule; 386 } 387 388 /* 389 * Allows access to pseudo filesystems that will never be mountable (e.g. 390 * sockfs, pipefs), but can still be reachable through 391 * /proc/<pid>/fd/<file-descriptor> 392 */ 393 static bool is_nouser_or_private(const struct dentry *dentry) 394 { 395 return (dentry->d_sb->s_flags & SB_NOUSER) || 396 (d_is_positive(dentry) && 397 unlikely(IS_PRIVATE(d_backing_inode(dentry)))); 398 } 399 400 static const struct access_masks any_fs = { 401 .fs = ~0, 402 }; 403 404 /* 405 * Returns true iff the child file with the given src_child access rights under 406 * src_parent would result in having the same or fewer access rights if it were 407 * moved under new_parent. 408 */ 409 static bool may_refer(const struct layer_masks *const src_parent, 410 const struct layer_masks *const src_child, 411 const struct layer_masks *const new_parent, 412 const bool child_is_dir) 413 { 414 for (size_t i = 0; i < ARRAY_SIZE(new_parent->layers); i++) { 415 access_mask_t child_access = src_parent->layers[i].access & 416 src_child->layers[i].access; 417 access_mask_t parent_access = new_parent->layers[i].access; 418 419 if (!child_is_dir) { 420 child_access &= ACCESS_FILE; 421 parent_access &= ACCESS_FILE; 422 } 423 424 if (!access_mask_subset(child_access, parent_access)) 425 return false; 426 } 427 return true; 428 } 429 430 /* 431 * Check that a destination file hierarchy has more restrictions than a source 432 * file hierarchy. This is only used for link and rename actions. 433 * 434 * Return: True if child1 may be moved from parent1 to parent2 without 435 * increasing its access rights (if child2 is set, an additional condition is 436 * that child2 may be used from parent2 to parent1 without increasing its access 437 * rights), false otherwise. 438 */ 439 static bool no_more_access(const struct layer_masks *const parent1, 440 const struct layer_masks *const child1, 441 const bool child1_is_dir, 442 const struct layer_masks *const parent2, 443 const struct layer_masks *const child2, 444 const bool child2_is_dir) 445 { 446 if (!may_refer(parent1, child1, parent2, child1_is_dir)) 447 return false; 448 449 if (!child2) 450 return true; 451 452 return may_refer(parent2, child2, parent1, child2_is_dir); 453 } 454 455 #define NMA_TRUE(...) KUNIT_EXPECT_TRUE(test, no_more_access(__VA_ARGS__)) 456 #define NMA_FALSE(...) KUNIT_EXPECT_FALSE(test, no_more_access(__VA_ARGS__)) 457 458 #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST 459 460 static void test_no_more_access(struct kunit *const test) 461 { 462 const struct layer_masks rx0 = { 463 .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE | 464 LANDLOCK_ACCESS_FS_READ_FILE, 465 }; 466 const struct layer_masks mx0 = { 467 .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE | 468 LANDLOCK_ACCESS_FS_MAKE_REG, 469 }; 470 const struct layer_masks x0 = { 471 .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE, 472 }; 473 const struct layer_masks x1 = { 474 .layers[1].access = LANDLOCK_ACCESS_FS_EXECUTE, 475 }; 476 const struct layer_masks x01 = { 477 .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE, 478 .layers[1].access = LANDLOCK_ACCESS_FS_EXECUTE, 479 }; 480 const struct layer_masks allows_all = {}; 481 482 /* Checks without restriction. */ 483 NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false); 484 NMA_TRUE(&allows_all, &x0, false, &allows_all, NULL, false); 485 NMA_FALSE(&x0, &x0, false, &allows_all, NULL, false); 486 487 /* 488 * Checks that we can only refer a file if no more access could be 489 * inherited. 490 */ 491 NMA_TRUE(&x0, &x0, false, &rx0, NULL, false); 492 NMA_TRUE(&rx0, &rx0, false, &rx0, NULL, false); 493 NMA_FALSE(&rx0, &rx0, false, &x0, NULL, false); 494 NMA_FALSE(&rx0, &rx0, false, &x1, NULL, false); 495 496 /* Checks allowed referring with different nested domains. */ 497 NMA_TRUE(&x0, &x1, false, &x0, NULL, false); 498 NMA_TRUE(&x1, &x0, false, &x0, NULL, false); 499 NMA_TRUE(&x0, &x01, false, &x0, NULL, false); 500 NMA_TRUE(&x0, &x01, false, &rx0, NULL, false); 501 NMA_TRUE(&x01, &x0, false, &x0, NULL, false); 502 NMA_TRUE(&x01, &x0, false, &rx0, NULL, false); 503 NMA_FALSE(&x01, &x01, false, &x0, NULL, false); 504 505 /* Checks that file access rights are also enforced for a directory. */ 506 NMA_FALSE(&rx0, &rx0, true, &x0, NULL, false); 507 508 /* Checks that directory access rights don't impact file referring... */ 509 NMA_TRUE(&mx0, &mx0, false, &x0, NULL, false); 510 /* ...but only directory referring. */ 511 NMA_FALSE(&mx0, &mx0, true, &x0, NULL, false); 512 513 /* Checks directory exchange. */ 514 NMA_TRUE(&mx0, &mx0, true, &mx0, &mx0, true); 515 NMA_TRUE(&mx0, &mx0, true, &mx0, &x0, true); 516 NMA_FALSE(&mx0, &mx0, true, &x0, &mx0, true); 517 NMA_FALSE(&mx0, &mx0, true, &x0, &x0, true); 518 NMA_FALSE(&mx0, &mx0, true, &x1, &x1, true); 519 520 /* Checks file exchange with directory access rights... */ 521 NMA_TRUE(&mx0, &mx0, false, &mx0, &mx0, false); 522 NMA_TRUE(&mx0, &mx0, false, &mx0, &x0, false); 523 NMA_TRUE(&mx0, &mx0, false, &x0, &mx0, false); 524 NMA_TRUE(&mx0, &mx0, false, &x0, &x0, false); 525 /* ...and with file access rights. */ 526 NMA_TRUE(&rx0, &rx0, false, &rx0, &rx0, false); 527 NMA_TRUE(&rx0, &rx0, false, &rx0, &x0, false); 528 NMA_FALSE(&rx0, &rx0, false, &x0, &rx0, false); 529 NMA_FALSE(&rx0, &rx0, false, &x0, &x0, false); 530 NMA_FALSE(&rx0, &rx0, false, &x1, &x1, false); 531 532 /* 533 * Allowing the following requests should not be a security risk 534 * because domain 0 denies execute access, and domain 1 is always 535 * nested with domain 0. However, adding an exception for this case 536 * would mean to check all nested domains to make sure none can get 537 * more privileges (e.g. processes only sandboxed by domain 0). 538 * Moreover, this behavior (i.e. composition of N domains) could then 539 * be inconsistent compared to domain 1's ruleset alone (e.g. it might 540 * be denied to link/rename with domain 1's ruleset, whereas it would 541 * be allowed if nested on top of domain 0). Another drawback would be 542 * to create a cover channel that could enable sandboxed processes to 543 * infer most of the filesystem restrictions from their domain. To 544 * make it simple, efficient, safe, and more consistent, this case is 545 * always denied. 546 */ 547 NMA_FALSE(&x1, &x1, false, &x0, NULL, false); 548 NMA_FALSE(&x1, &x1, false, &rx0, NULL, false); 549 NMA_FALSE(&x1, &x1, true, &x0, NULL, false); 550 NMA_FALSE(&x1, &x1, true, &rx0, NULL, false); 551 552 /* Checks the same case of exclusive domains with a file... */ 553 NMA_TRUE(&x1, &x1, false, &x01, NULL, false); 554 NMA_FALSE(&x1, &x1, false, &x01, &x0, false); 555 NMA_FALSE(&x1, &x1, false, &x01, &x01, false); 556 NMA_FALSE(&x1, &x1, false, &x0, &x0, false); 557 /* ...and with a directory. */ 558 NMA_FALSE(&x1, &x1, false, &x0, &x0, true); 559 NMA_FALSE(&x1, &x1, true, &x0, &x0, false); 560 NMA_FALSE(&x1, &x1, true, &x0, &x0, true); 561 } 562 563 #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ 564 565 #undef NMA_TRUE 566 #undef NMA_FALSE 567 568 static bool is_layer_masks_allowed(const struct layer_masks *masks) 569 { 570 for (size_t i = 0; i < ARRAY_SIZE(masks->layers); i++) { 571 if (masks->layers[i].access) 572 return false; 573 } 574 return true; 575 } 576 577 /* 578 * Removes @masks accesses that are not requested. 579 * 580 * Returns true if the request is allowed, false otherwise. 581 */ 582 static bool scope_to_request(const access_mask_t access_request, 583 struct layer_masks *masks) 584 { 585 bool saw_unfulfilled_access = false; 586 587 if (WARN_ON_ONCE(!masks)) 588 return true; 589 590 for (size_t i = 0; i < ARRAY_SIZE(masks->layers); i++) { 591 masks->layers[i].access &= access_request; 592 if (masks->layers[i].access) 593 saw_unfulfilled_access = true; 594 } 595 return !saw_unfulfilled_access; 596 } 597 598 #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST 599 600 static void test_scope_to_request_with_exec_none(struct kunit *const test) 601 { 602 /* Allows everything. */ 603 struct layer_masks masks = {}; 604 605 /* Checks and scopes with execute. */ 606 KUNIT_EXPECT_TRUE(test, 607 scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &masks)); 608 KUNIT_EXPECT_EQ(test, 0, (access_mask_t)masks.layers[0].access); 609 } 610 611 static void test_scope_to_request_with_exec_some(struct kunit *const test) 612 { 613 /* Denies execute and write. */ 614 struct layer_masks masks = { 615 .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE, 616 .layers[1].access = LANDLOCK_ACCESS_FS_WRITE_FILE, 617 }; 618 619 /* Checks and scopes with execute. */ 620 KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, 621 &masks)); 622 /* 623 * These casts to access_mask_t are needed because typeof(), used in 624 * KUNIT_EXPECT_EQ(), does not work on bitfields. 625 */ 626 KUNIT_EXPECT_EQ(test, LANDLOCK_ACCESS_FS_EXECUTE, 627 (access_mask_t)masks.layers[0].access); 628 KUNIT_EXPECT_EQ(test, 0, (access_mask_t)masks.layers[1].access); 629 } 630 631 static void test_scope_to_request_without_access(struct kunit *const test) 632 { 633 /* Denies execute and write. */ 634 struct layer_masks masks = { 635 .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE, 636 .layers[1].access = LANDLOCK_ACCESS_FS_WRITE_FILE, 637 }; 638 639 /* Checks and scopes without access request. */ 640 KUNIT_EXPECT_TRUE(test, scope_to_request(0, &masks)); 641 KUNIT_EXPECT_EQ(test, 0, (access_mask_t)masks.layers[0].access); 642 KUNIT_EXPECT_EQ(test, 0, (access_mask_t)masks.layers[1].access); 643 } 644 645 #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ 646 647 /* 648 * Returns true if there is at least one access right different than 649 * LANDLOCK_ACCESS_FS_REFER. 650 */ 651 static bool is_eacces(const struct layer_masks *masks, 652 const access_mask_t access_request) 653 { 654 if (!masks) 655 return false; 656 657 for (size_t i = 0; i < ARRAY_SIZE(masks->layers); i++) { 658 /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ 659 if (masks->layers[i].access & access_request & 660 ~LANDLOCK_ACCESS_FS_REFER) 661 return true; 662 } 663 return false; 664 } 665 666 #define IE_TRUE(...) KUNIT_EXPECT_TRUE(test, is_eacces(__VA_ARGS__)) 667 #define IE_FALSE(...) KUNIT_EXPECT_FALSE(test, is_eacces(__VA_ARGS__)) 668 669 #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST 670 671 static void test_is_eacces_with_none(struct kunit *const test) 672 { 673 const struct layer_masks masks = {}; 674 675 IE_FALSE(&masks, 0); 676 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); 677 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); 678 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); 679 } 680 681 static void test_is_eacces_with_refer(struct kunit *const test) 682 { 683 const struct layer_masks masks = { 684 .layers[0].access = LANDLOCK_ACCESS_FS_REFER, 685 }; 686 687 IE_FALSE(&masks, 0); 688 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); 689 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); 690 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); 691 } 692 693 static void test_is_eacces_with_write(struct kunit *const test) 694 { 695 const struct layer_masks masks = { 696 .layers[0].access = LANDLOCK_ACCESS_FS_WRITE_FILE, 697 }; 698 699 IE_FALSE(&masks, 0); 700 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); 701 IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); 702 703 IE_TRUE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); 704 } 705 706 #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ 707 708 #undef IE_TRUE 709 #undef IE_FALSE 710 711 /** 712 * is_access_to_paths_allowed - Check accesses for requests with a common path 713 * 714 * @domain: Domain to check against. 715 * @path: File hierarchy to walk through. For refer checks, this would be 716 * the common mountpoint. 717 * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is 718 * equal to @layer_masks_parent2 (if any). This is tied to the unique 719 * requested path for most actions, or the source in case of a refer action 720 * (i.e. rename or link), or the source and destination in case of 721 * RENAME_EXCHANGE. 722 * @layer_masks_parent1: Pointer to a matrix of layer masks per access 723 * masks, identifying the layers that forbid a specific access. Bits from 724 * this matrix can be unset according to the @path walk. An empty matrix 725 * means that @domain allows all possible Landlock accesses (i.e. not only 726 * those identified by @access_request_parent1). This matrix can 727 * initially refer to domain layer masks and, when the accesses for the 728 * destination and source are the same, to requested layer masks. 729 * @log_request_parent1: Audit request to fill if the related access is denied. 730 * @dentry_child1: Dentry to the initial child of the parent1 path. This 731 * pointer must be NULL for non-refer actions (i.e. not link nor rename). 732 * @access_request_parent2: Similar to @access_request_parent1 but for a 733 * request involving a source and a destination. This refers to the 734 * destination, except in case of RENAME_EXCHANGE where it also refers to 735 * the source. Must be set to 0 when using a simple path request. 736 * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer 737 * action. This must be NULL otherwise. 738 * @log_request_parent2: Audit request to fill if the related access is denied. 739 * @dentry_child2: Dentry to the initial child of the parent2 path. This 740 * pointer is only set for RENAME_EXCHANGE actions and must be NULL 741 * otherwise. 742 * 743 * This helper first checks that the destination has a superset of restrictions 744 * compared to the source (if any) for a common path. Because of 745 * RENAME_EXCHANGE actions, source and destinations may be swapped. It then 746 * checks that the collected accesses and the remaining ones are enough to 747 * allow the request. 748 * 749 * Return: True if the access request is granted, false otherwise. 750 */ 751 static bool 752 is_access_to_paths_allowed(const struct landlock_ruleset *const domain, 753 const struct path *const path, 754 const access_mask_t access_request_parent1, 755 struct layer_masks *layer_masks_parent1, 756 struct landlock_request *const log_request_parent1, 757 struct dentry *const dentry_child1, 758 const access_mask_t access_request_parent2, 759 struct layer_masks *layer_masks_parent2, 760 struct landlock_request *const log_request_parent2, 761 struct dentry *const dentry_child2) 762 { 763 bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, 764 child1_is_directory = true, child2_is_directory = true; 765 struct path walker_path; 766 access_mask_t access_masked_parent1, access_masked_parent2; 767 struct layer_masks _layer_masks_child1, _layer_masks_child2; 768 struct layer_masks *layer_masks_child1 = NULL, 769 *layer_masks_child2 = NULL; 770 771 if (!access_request_parent1 && !access_request_parent2) 772 return true; 773 774 if (WARN_ON_ONCE(!path)) 775 return true; 776 777 if (is_nouser_or_private(path->dentry)) 778 return true; 779 780 if (WARN_ON_ONCE(!layer_masks_parent1)) 781 return false; 782 783 allowed_parent1 = is_layer_masks_allowed(layer_masks_parent1); 784 785 if (unlikely(layer_masks_parent2)) { 786 if (WARN_ON_ONCE(!dentry_child1)) 787 return false; 788 789 allowed_parent2 = is_layer_masks_allowed(layer_masks_parent2); 790 791 /* 792 * For a double request, first check for potential privilege 793 * escalation by looking at domain handled accesses (which are 794 * a superset of the meaningful requested accesses). 795 */ 796 access_masked_parent1 = access_masked_parent2 = 797 landlock_union_access_masks(domain).fs; 798 is_dom_check = true; 799 } else { 800 if (WARN_ON_ONCE(dentry_child1 || dentry_child2)) 801 return false; 802 /* For a simple request, only check for requested accesses. */ 803 access_masked_parent1 = access_request_parent1; 804 access_masked_parent2 = access_request_parent2; 805 is_dom_check = false; 806 } 807 808 if (unlikely(dentry_child1)) { 809 /* 810 * Get the layer masks for the child dentries for use by domain 811 * check later. 812 */ 813 if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, 814 &_layer_masks_child1, 815 LANDLOCK_KEY_INODE)) 816 landlock_unmask_layers(find_rule(domain, dentry_child1), 817 &_layer_masks_child1); 818 layer_masks_child1 = &_layer_masks_child1; 819 child1_is_directory = d_is_dir(dentry_child1); 820 } 821 if (unlikely(dentry_child2)) { 822 if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, 823 &_layer_masks_child2, 824 LANDLOCK_KEY_INODE)) 825 landlock_unmask_layers(find_rule(domain, dentry_child2), 826 &_layer_masks_child2); 827 layer_masks_child2 = &_layer_masks_child2; 828 child2_is_directory = d_is_dir(dentry_child2); 829 } 830 831 walker_path = *path; 832 path_get(&walker_path); 833 /* 834 * We need to walk through all the hierarchy to not miss any relevant 835 * restriction. 836 */ 837 while (true) { 838 const struct landlock_rule *rule; 839 840 /* 841 * If at least all accesses allowed on the destination are 842 * already allowed on the source, respectively if there is at 843 * least as much as restrictions on the destination than on the 844 * source, then we can safely refer files from the source to 845 * the destination without risking a privilege escalation. 846 * This also applies in the case of RENAME_EXCHANGE, which 847 * implies checks on both direction. This is crucial for 848 * standalone multilayered security policies. Furthermore, 849 * this helps avoid policy writers to shoot themselves in the 850 * foot. 851 */ 852 if (unlikely(is_dom_check && 853 no_more_access( 854 layer_masks_parent1, layer_masks_child1, 855 child1_is_directory, layer_masks_parent2, 856 layer_masks_child2, 857 child2_is_directory))) { 858 /* 859 * Now, downgrades the remaining checks from domain 860 * handled accesses to requested accesses. 861 */ 862 is_dom_check = false; 863 access_masked_parent1 = access_request_parent1; 864 access_masked_parent2 = access_request_parent2; 865 866 allowed_parent1 = 867 allowed_parent1 || 868 scope_to_request(access_masked_parent1, 869 layer_masks_parent1); 870 allowed_parent2 = 871 allowed_parent2 || 872 scope_to_request(access_masked_parent2, 873 layer_masks_parent2); 874 875 /* Stops when all accesses are granted. */ 876 if (allowed_parent1 && allowed_parent2) 877 break; 878 } 879 880 rule = find_rule(domain, walker_path.dentry); 881 allowed_parent1 = 882 allowed_parent1 || 883 landlock_unmask_layers(rule, layer_masks_parent1); 884 allowed_parent2 = 885 allowed_parent2 || 886 landlock_unmask_layers(rule, layer_masks_parent2); 887 888 /* Stops when a rule from each layer grants access. */ 889 if (allowed_parent1 && allowed_parent2) 890 break; 891 892 jump_up: 893 if (walker_path.dentry == walker_path.mnt->mnt_root) { 894 if (follow_up(&walker_path)) { 895 /* Ignores hidden mount points. */ 896 goto jump_up; 897 } else { 898 /* 899 * Stops at the real root. Denies access 900 * because not all layers have granted access. 901 */ 902 break; 903 } 904 } 905 906 if (unlikely(IS_ROOT(walker_path.dentry))) { 907 if (likely(walker_path.mnt->mnt_flags & MNT_INTERNAL)) { 908 /* 909 * Stops and allows access when reaching disconnected root 910 * directories that are part of internal filesystems (e.g. nsfs, 911 * which is reachable through /proc/<pid>/ns/<namespace>). 912 */ 913 allowed_parent1 = true; 914 allowed_parent2 = true; 915 break; 916 } 917 918 /* 919 * We reached a disconnected root directory from a bind mount. 920 * Let's continue the walk with the mount point we missed. 921 */ 922 dput(walker_path.dentry); 923 walker_path.dentry = walker_path.mnt->mnt_root; 924 dget(walker_path.dentry); 925 } else { 926 struct dentry *const parent_dentry = 927 dget_parent(walker_path.dentry); 928 929 dput(walker_path.dentry); 930 walker_path.dentry = parent_dentry; 931 } 932 } 933 path_put(&walker_path); 934 935 /* 936 * Check CONFIG_AUDIT to enable elision of log_request_parent* and 937 * associated caller's stack variables thanks to dead code elimination. 938 */ 939 #ifdef CONFIG_AUDIT 940 if (!allowed_parent1 && log_request_parent1) { 941 log_request_parent1->type = LANDLOCK_REQUEST_FS_ACCESS; 942 log_request_parent1->audit.type = LSM_AUDIT_DATA_PATH; 943 log_request_parent1->audit.u.path = *path; 944 log_request_parent1->access = access_masked_parent1; 945 log_request_parent1->layer_masks = layer_masks_parent1; 946 } 947 948 if (!allowed_parent2 && log_request_parent2) { 949 log_request_parent2->type = LANDLOCK_REQUEST_FS_ACCESS; 950 log_request_parent2->audit.type = LSM_AUDIT_DATA_PATH; 951 log_request_parent2->audit.u.path = *path; 952 log_request_parent2->access = access_masked_parent2; 953 log_request_parent2->layer_masks = layer_masks_parent2; 954 } 955 #endif /* CONFIG_AUDIT */ 956 957 return allowed_parent1 && allowed_parent2; 958 } 959 960 static int current_check_access_path(const struct path *const path, 961 access_mask_t access_request) 962 { 963 const struct access_masks masks = { 964 .fs = access_request, 965 }; 966 const struct landlock_cred_security *const subject = 967 landlock_get_applicable_subject(current_cred(), masks, NULL); 968 struct layer_masks layer_masks; 969 struct landlock_request request = {}; 970 971 if (!subject) 972 return 0; 973 974 access_request = landlock_init_layer_masks(subject->domain, 975 access_request, &layer_masks, 976 LANDLOCK_KEY_INODE); 977 if (is_access_to_paths_allowed(subject->domain, path, access_request, 978 &layer_masks, &request, NULL, 0, NULL, 979 NULL, NULL)) 980 return 0; 981 982 landlock_log_denial(subject, &request); 983 return -EACCES; 984 } 985 986 static __attribute_const__ access_mask_t get_mode_access(const umode_t mode) 987 { 988 switch (mode & S_IFMT) { 989 case S_IFLNK: 990 return LANDLOCK_ACCESS_FS_MAKE_SYM; 991 case S_IFDIR: 992 return LANDLOCK_ACCESS_FS_MAKE_DIR; 993 case S_IFCHR: 994 return LANDLOCK_ACCESS_FS_MAKE_CHAR; 995 case S_IFBLK: 996 return LANDLOCK_ACCESS_FS_MAKE_BLOCK; 997 case S_IFIFO: 998 return LANDLOCK_ACCESS_FS_MAKE_FIFO; 999 case S_IFSOCK: 1000 return LANDLOCK_ACCESS_FS_MAKE_SOCK; 1001 case S_IFREG: 1002 case 0: 1003 /* A zero mode translates to S_IFREG. */ 1004 default: 1005 /* Treats weird files as regular files. */ 1006 return LANDLOCK_ACCESS_FS_MAKE_REG; 1007 } 1008 } 1009 1010 static access_mask_t maybe_remove(const struct dentry *const dentry) 1011 { 1012 if (d_is_negative(dentry)) 1013 return 0; 1014 return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : 1015 LANDLOCK_ACCESS_FS_REMOVE_FILE; 1016 } 1017 1018 /** 1019 * collect_domain_accesses - Walk through a file path and collect accesses 1020 * 1021 * @domain: Domain to check against. 1022 * @mnt_root: Last directory to check. 1023 * @dir: Directory to start the walk from. 1024 * @layer_masks_dom: Where to store the collected accesses. 1025 * 1026 * This helper is useful to begin a path walk from the @dir directory to a 1027 * @mnt_root directory used as a mount point. This mount point is the common 1028 * ancestor between the source and the destination of a renamed and linked 1029 * file. While walking from @dir to @mnt_root, we record all the domain's 1030 * allowed accesses in @layer_masks_dom. 1031 * 1032 * Because of disconnected directories, this walk may not reach @mnt_dir. In 1033 * this case, the walk will continue to @mnt_dir after this call. 1034 * 1035 * This is similar to is_access_to_paths_allowed() but much simpler because it 1036 * only handles walking on the same mount point and only checks one set of 1037 * accesses. 1038 * 1039 * Return: True if all the domain access rights are allowed for @dir, false if 1040 * the walk reached @mnt_root. 1041 */ 1042 static bool collect_domain_accesses(const struct landlock_ruleset *const domain, 1043 const struct dentry *const mnt_root, 1044 struct dentry *dir, 1045 struct layer_masks *layer_masks_dom) 1046 { 1047 bool ret = false; 1048 1049 if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) 1050 return true; 1051 if (is_nouser_or_private(dir)) 1052 return true; 1053 1054 if (!landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, 1055 layer_masks_dom, LANDLOCK_KEY_INODE)) 1056 return true; 1057 1058 dget(dir); 1059 while (true) { 1060 struct dentry *parent_dentry; 1061 1062 /* Gets all layers allowing all domain accesses. */ 1063 if (landlock_unmask_layers(find_rule(domain, dir), 1064 layer_masks_dom)) { 1065 /* 1066 * Stops when all handled accesses are allowed by at 1067 * least one rule in each layer. 1068 */ 1069 ret = true; 1070 break; 1071 } 1072 1073 /* 1074 * Stops at the mount point or the filesystem root for a disconnected 1075 * directory. 1076 */ 1077 if (dir == mnt_root || unlikely(IS_ROOT(dir))) 1078 break; 1079 1080 parent_dentry = dget_parent(dir); 1081 dput(dir); 1082 dir = parent_dentry; 1083 } 1084 dput(dir); 1085 return ret; 1086 } 1087 1088 /** 1089 * current_check_refer_path - Check if a rename or link action is allowed 1090 * 1091 * @old_dentry: File or directory requested to be moved or linked. 1092 * @new_dir: Destination parent directory. 1093 * @new_dentry: Destination file or directory. 1094 * @removable: Sets to true if it is a rename operation. 1095 * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE. 1096 * 1097 * Because of its unprivileged constraints, Landlock relies on file hierarchies 1098 * (and not only inodes) to tie access rights to files. Being able to link or 1099 * rename a file hierarchy brings some challenges. Indeed, moving or linking a 1100 * file (i.e. creating a new reference to an inode) can have an impact on the 1101 * actions allowed for a set of files if it would change its parent directory 1102 * (i.e. reparenting). 1103 * 1104 * To avoid trivial access right bypasses, Landlock first checks if the file or 1105 * directory requested to be moved would gain new access rights inherited from 1106 * its new hierarchy. Before returning any error, Landlock then checks that 1107 * the parent source hierarchy and the destination hierarchy would allow the 1108 * link or rename action. If it is not the case, an error with EACCES is 1109 * returned to inform user space that there is no way to remove or create the 1110 * requested source file type. If it should be allowed but the new inherited 1111 * access rights would be greater than the source access rights, then the 1112 * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables 1113 * user space to abort the whole operation if there is no way to do it, or to 1114 * manually copy the source to the destination if this remains allowed, e.g. 1115 * because file creation is allowed on the destination directory but not direct 1116 * linking. 1117 * 1118 * To achieve this goal, the kernel needs to compare two file hierarchies: the 1119 * one identifying the source file or directory (including itself), and the 1120 * destination one. This can be seen as a multilayer partial ordering problem. 1121 * The kernel walks through these paths and collects in a matrix the access 1122 * rights that are denied per layer. These matrices are then compared to see 1123 * if the destination one has more (or the same) restrictions as the source 1124 * one. If this is the case, the requested action will not return EXDEV, which 1125 * doesn't mean the action is allowed. The parent hierarchy of the source 1126 * (i.e. parent directory), and the destination hierarchy must also be checked 1127 * to verify that they explicitly allow such action (i.e. referencing, 1128 * creation and potentially removal rights). The kernel implementation is then 1129 * required to rely on potentially four matrices of access rights: one for the 1130 * source file or directory (i.e. the child), a potentially other one for the 1131 * other source/destination (in case of RENAME_EXCHANGE), one for the source 1132 * parent hierarchy and a last one for the destination hierarchy. These 1133 * ephemeral matrices take some space on the stack, which limits the number of 1134 * layers to a deemed reasonable number: 16. 1135 * 1136 * Return: 0 if access is allowed, -EXDEV if @old_dentry would inherit new 1137 * access rights from @new_dir, or -EACCES if file removal or creation is 1138 * denied. 1139 */ 1140 static int current_check_refer_path(struct dentry *const old_dentry, 1141 const struct path *const new_dir, 1142 struct dentry *const new_dentry, 1143 const bool removable, const bool exchange) 1144 { 1145 const struct landlock_cred_security *const subject = 1146 landlock_get_applicable_subject(current_cred(), any_fs, NULL); 1147 bool allow_parent1, allow_parent2; 1148 access_mask_t access_request_parent1, access_request_parent2; 1149 struct path mnt_dir; 1150 struct dentry *old_parent; 1151 struct layer_masks layer_masks_parent1 = {}, layer_masks_parent2 = {}; 1152 struct landlock_request request1 = {}, request2 = {}; 1153 1154 if (!subject) 1155 return 0; 1156 1157 if (unlikely(d_is_negative(old_dentry))) 1158 return -ENOENT; 1159 if (exchange) { 1160 if (unlikely(d_is_negative(new_dentry))) 1161 return -ENOENT; 1162 access_request_parent1 = 1163 get_mode_access(d_backing_inode(new_dentry)->i_mode); 1164 } else { 1165 access_request_parent1 = 0; 1166 } 1167 access_request_parent2 = 1168 get_mode_access(d_backing_inode(old_dentry)->i_mode); 1169 if (removable) { 1170 access_request_parent1 |= maybe_remove(old_dentry); 1171 access_request_parent2 |= maybe_remove(new_dentry); 1172 } 1173 1174 /* The mount points are the same for old and new paths, cf. EXDEV. */ 1175 if (old_dentry->d_parent == new_dir->dentry) { 1176 /* 1177 * The LANDLOCK_ACCESS_FS_REFER access right is not required 1178 * for same-directory referer (i.e. no reparenting). 1179 */ 1180 access_request_parent1 = landlock_init_layer_masks( 1181 subject->domain, 1182 access_request_parent1 | access_request_parent2, 1183 &layer_masks_parent1, LANDLOCK_KEY_INODE); 1184 if (is_access_to_paths_allowed(subject->domain, new_dir, 1185 access_request_parent1, 1186 &layer_masks_parent1, &request1, 1187 NULL, 0, NULL, NULL, NULL)) 1188 return 0; 1189 1190 landlock_log_denial(subject, &request1); 1191 return -EACCES; 1192 } 1193 1194 access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER; 1195 access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER; 1196 1197 /* Saves the common mount point. */ 1198 mnt_dir.mnt = new_dir->mnt; 1199 mnt_dir.dentry = new_dir->mnt->mnt_root; 1200 1201 /* 1202 * old_dentry may be the root of the common mount point and 1203 * !IS_ROOT(old_dentry) at the same time (e.g. with open_tree() and 1204 * OPEN_TREE_CLONE). We do not need to call dget(old_parent) because 1205 * we keep a reference to old_dentry. 1206 */ 1207 old_parent = (old_dentry == mnt_dir.dentry) ? old_dentry : 1208 old_dentry->d_parent; 1209 1210 /* new_dir->dentry is equal to new_dentry->d_parent */ 1211 allow_parent1 = collect_domain_accesses(subject->domain, mnt_dir.dentry, 1212 old_parent, 1213 &layer_masks_parent1); 1214 allow_parent2 = collect_domain_accesses(subject->domain, mnt_dir.dentry, 1215 new_dir->dentry, 1216 &layer_masks_parent2); 1217 if (allow_parent1 && allow_parent2) 1218 return 0; 1219 1220 /* 1221 * To be able to compare source and destination domain access rights, 1222 * take into account the @old_dentry access rights aggregated with its 1223 * parent access rights. This will be useful to compare with the 1224 * destination parent access rights. 1225 */ 1226 if (is_access_to_paths_allowed( 1227 subject->domain, &mnt_dir, access_request_parent1, 1228 &layer_masks_parent1, &request1, old_dentry, 1229 access_request_parent2, &layer_masks_parent2, &request2, 1230 exchange ? new_dentry : NULL)) 1231 return 0; 1232 1233 if (request1.access) { 1234 request1.audit.u.path.dentry = old_parent; 1235 landlock_log_denial(subject, &request1); 1236 } 1237 if (request2.access) { 1238 request2.audit.u.path.dentry = new_dir->dentry; 1239 landlock_log_denial(subject, &request2); 1240 } 1241 1242 /* 1243 * This prioritizes EACCES over EXDEV for all actions, including 1244 * renames with RENAME_EXCHANGE. 1245 */ 1246 if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) || 1247 is_eacces(&layer_masks_parent2, access_request_parent2))) 1248 return -EACCES; 1249 1250 /* 1251 * Gracefully forbids reparenting if the destination directory 1252 * hierarchy is not a superset of restrictions of the source directory 1253 * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the 1254 * source or the destination. 1255 */ 1256 return -EXDEV; 1257 } 1258 1259 /* Inode hooks */ 1260 1261 static void hook_inode_free_security_rcu(void *inode_security) 1262 { 1263 struct landlock_inode_security *inode_sec; 1264 1265 /* 1266 * All inodes must already have been untied from their object by 1267 * release_inode() or hook_sb_delete(). 1268 */ 1269 inode_sec = inode_security + landlock_blob_sizes.lbs_inode; 1270 WARN_ON_ONCE(inode_sec->object); 1271 } 1272 1273 /* Super-block hooks */ 1274 1275 /* 1276 * Release the inodes used in a security policy. 1277 * 1278 * Cf. fsnotify_unmount_inodes() and evict_inodes() 1279 */ 1280 static void hook_sb_delete(struct super_block *const sb) 1281 { 1282 struct inode *inode, *prev_inode = NULL; 1283 1284 if (!landlock_initialized) 1285 return; 1286 1287 spin_lock(&sb->s_inode_list_lock); 1288 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1289 struct landlock_object *object; 1290 1291 /* Only handles referenced inodes. */ 1292 if (!icount_read_once(inode)) 1293 continue; 1294 1295 /* 1296 * Protects against concurrent modification of inode (e.g. 1297 * from get_inode_object()). 1298 */ 1299 spin_lock(&inode->i_lock); 1300 /* 1301 * Checks I_FREEING and I_WILL_FREE to protect against a race 1302 * condition when release_inode() just called iput(), which 1303 * could lead to a NULL dereference of inode->security or a 1304 * second call to iput() for the same Landlock object. Also 1305 * checks I_NEW because such inode cannot be tied to an object. 1306 */ 1307 if (inode_state_read(inode) & 1308 (I_FREEING | I_WILL_FREE | I_NEW)) { 1309 spin_unlock(&inode->i_lock); 1310 continue; 1311 } 1312 1313 rcu_read_lock(); 1314 object = rcu_dereference(landlock_inode(inode)->object); 1315 if (!object) { 1316 rcu_read_unlock(); 1317 spin_unlock(&inode->i_lock); 1318 continue; 1319 } 1320 /* Keeps a reference to this inode until the next loop walk. */ 1321 __iget(inode); 1322 spin_unlock(&inode->i_lock); 1323 1324 /* 1325 * If there is no concurrent release_inode() ongoing, then we 1326 * are in charge of calling iput() on this inode, otherwise we 1327 * will just wait for it to finish. 1328 */ 1329 spin_lock(&object->lock); 1330 if (object->underobj == inode) { 1331 object->underobj = NULL; 1332 spin_unlock(&object->lock); 1333 rcu_read_unlock(); 1334 1335 /* 1336 * Because object->underobj was not NULL, 1337 * release_inode() and get_inode_object() guarantee 1338 * that it is safe to reset 1339 * landlock_inode(inode)->object while it is not NULL. 1340 * It is therefore not necessary to lock inode->i_lock. 1341 */ 1342 rcu_assign_pointer(landlock_inode(inode)->object, NULL); 1343 /* 1344 * At this point, we own the ihold() reference that was 1345 * originally set up by get_inode_object() and the 1346 * __iget() reference that we just set in this loop 1347 * walk. Therefore there are at least two references 1348 * on the inode. 1349 */ 1350 iput_not_last(inode); 1351 } else { 1352 spin_unlock(&object->lock); 1353 rcu_read_unlock(); 1354 } 1355 1356 if (prev_inode) { 1357 /* 1358 * At this point, we still own the __iget() reference 1359 * that we just set in this loop walk. Therefore we 1360 * can drop the list lock and know that the inode won't 1361 * disappear from under us until the next loop walk. 1362 */ 1363 spin_unlock(&sb->s_inode_list_lock); 1364 /* 1365 * We can now actually put the inode reference from the 1366 * previous loop walk, which is not needed anymore. 1367 */ 1368 iput(prev_inode); 1369 cond_resched(); 1370 spin_lock(&sb->s_inode_list_lock); 1371 } 1372 prev_inode = inode; 1373 } 1374 spin_unlock(&sb->s_inode_list_lock); 1375 1376 /* Puts the inode reference from the last loop walk, if any. */ 1377 if (prev_inode) 1378 iput(prev_inode); 1379 /* Waits for pending iput() in release_inode(). */ 1380 wait_var_event(&landlock_superblock(sb)->inode_refs, 1381 !atomic_long_read(&landlock_superblock(sb)->inode_refs)); 1382 } 1383 1384 static void 1385 log_fs_change_topology_path(const struct landlock_cred_security *const subject, 1386 size_t handle_layer, const struct path *const path) 1387 { 1388 landlock_log_denial(subject, &(struct landlock_request) { 1389 .type = LANDLOCK_REQUEST_FS_CHANGE_TOPOLOGY, 1390 .audit = { 1391 .type = LSM_AUDIT_DATA_PATH, 1392 .u.path = *path, 1393 }, 1394 .layer_plus_one = handle_layer + 1, 1395 }); 1396 } 1397 1398 static void log_fs_change_topology_dentry( 1399 const struct landlock_cred_security *const subject, size_t handle_layer, 1400 struct dentry *const dentry) 1401 { 1402 landlock_log_denial(subject, &(struct landlock_request) { 1403 .type = LANDLOCK_REQUEST_FS_CHANGE_TOPOLOGY, 1404 .audit = { 1405 .type = LSM_AUDIT_DATA_DENTRY, 1406 .u.dentry = dentry, 1407 }, 1408 .layer_plus_one = handle_layer + 1, 1409 }); 1410 } 1411 1412 /* 1413 * Because a Landlock security policy is defined according to the filesystem 1414 * topology (i.e. the mount namespace), changing it may grant access to files 1415 * not previously allowed. 1416 * 1417 * To make it simple, deny any filesystem topology modification by landlocked 1418 * processes. Non-landlocked processes may still change the namespace of a 1419 * landlocked process, but this kind of threat must be handled by a system-wide 1420 * access-control security policy. 1421 * 1422 * This could be lifted in the future if Landlock can safely handle mount 1423 * namespace updates requested by a landlocked process. Indeed, we could 1424 * update the current domain (which is currently read-only) by taking into 1425 * account the accesses of the source and the destination of a new mount point. 1426 * However, it would also require to make all the child domains dynamically 1427 * inherit these new constraints. Anyway, for backward compatibility reasons, 1428 * a dedicated user space option would be required (e.g. as a ruleset flag). 1429 */ 1430 static int hook_sb_mount(const char *const dev_name, 1431 const struct path *const path, const char *const type, 1432 const unsigned long flags, void *const data) 1433 { 1434 size_t handle_layer; 1435 const struct landlock_cred_security *const subject = 1436 landlock_get_applicable_subject(current_cred(), any_fs, 1437 &handle_layer); 1438 1439 if (!subject) 1440 return 0; 1441 1442 log_fs_change_topology_path(subject, handle_layer, path); 1443 return -EPERM; 1444 } 1445 1446 static int hook_move_mount(const struct path *const from_path, 1447 const struct path *const to_path) 1448 { 1449 size_t handle_layer; 1450 const struct landlock_cred_security *const subject = 1451 landlock_get_applicable_subject(current_cred(), any_fs, 1452 &handle_layer); 1453 1454 if (!subject) 1455 return 0; 1456 1457 log_fs_change_topology_path(subject, handle_layer, to_path); 1458 return -EPERM; 1459 } 1460 1461 /* 1462 * Removing a mount point may reveal a previously hidden file hierarchy, which 1463 * may then grant access to files, which may have previously been forbidden. 1464 */ 1465 static int hook_sb_umount(struct vfsmount *const mnt, const int flags) 1466 { 1467 size_t handle_layer; 1468 const struct landlock_cred_security *const subject = 1469 landlock_get_applicable_subject(current_cred(), any_fs, 1470 &handle_layer); 1471 1472 if (!subject) 1473 return 0; 1474 1475 log_fs_change_topology_dentry(subject, handle_layer, mnt->mnt_root); 1476 return -EPERM; 1477 } 1478 1479 static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts) 1480 { 1481 size_t handle_layer; 1482 const struct landlock_cred_security *const subject = 1483 landlock_get_applicable_subject(current_cred(), any_fs, 1484 &handle_layer); 1485 1486 if (!subject) 1487 return 0; 1488 1489 log_fs_change_topology_dentry(subject, handle_layer, sb->s_root); 1490 return -EPERM; 1491 } 1492 1493 /* 1494 * pivot_root(2), like mount(2), changes the current mount namespace. It must 1495 * then be forbidden for a landlocked process. 1496 * 1497 * However, chroot(2) may be allowed because it only changes the relative root 1498 * directory of the current process. Moreover, it can be used to restrict the 1499 * view of the filesystem. 1500 */ 1501 static int hook_sb_pivotroot(const struct path *const old_path, 1502 const struct path *const new_path) 1503 { 1504 size_t handle_layer; 1505 const struct landlock_cred_security *const subject = 1506 landlock_get_applicable_subject(current_cred(), any_fs, 1507 &handle_layer); 1508 1509 if (!subject) 1510 return 0; 1511 1512 log_fs_change_topology_path(subject, handle_layer, new_path); 1513 return -EPERM; 1514 } 1515 1516 /* Path hooks */ 1517 1518 static int hook_path_link(struct dentry *const old_dentry, 1519 const struct path *const new_dir, 1520 struct dentry *const new_dentry) 1521 { 1522 return current_check_refer_path(old_dentry, new_dir, new_dentry, false, 1523 false); 1524 } 1525 1526 static int hook_path_rename(const struct path *const old_dir, 1527 struct dentry *const old_dentry, 1528 const struct path *const new_dir, 1529 struct dentry *const new_dentry, 1530 const unsigned int flags) 1531 { 1532 /* old_dir refers to old_dentry->d_parent and new_dir->mnt */ 1533 return current_check_refer_path(old_dentry, new_dir, new_dentry, true, 1534 !!(flags & RENAME_EXCHANGE)); 1535 } 1536 1537 static int hook_path_mkdir(const struct path *const dir, 1538 struct dentry *const dentry, const umode_t mode) 1539 { 1540 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR); 1541 } 1542 1543 static int hook_path_mknod(const struct path *const dir, 1544 struct dentry *const dentry, const umode_t mode, 1545 const unsigned int dev) 1546 { 1547 return current_check_access_path(dir, get_mode_access(mode)); 1548 } 1549 1550 static int hook_path_symlink(const struct path *const dir, 1551 struct dentry *const dentry, 1552 const char *const old_name) 1553 { 1554 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM); 1555 } 1556 1557 static int hook_path_unlink(const struct path *const dir, 1558 struct dentry *const dentry) 1559 { 1560 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE); 1561 } 1562 1563 static int hook_path_rmdir(const struct path *const dir, 1564 struct dentry *const dentry) 1565 { 1566 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR); 1567 } 1568 1569 static int hook_path_truncate(const struct path *const path) 1570 { 1571 return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE); 1572 } 1573 1574 /** 1575 * unmask_scoped_access - Remove access right bits in @masks in all layers 1576 * where @client and @server have the same domain 1577 * 1578 * This does the same as domain_is_scoped(), but unmasks bits in @masks. 1579 * It can not return early as domain_is_scoped() does. 1580 * 1581 * A scoped access for a given access right bit is allowed iff, for all layer 1582 * depths where the access bit is set, the client and server domain are the 1583 * same. This function clears the access rights @access in @masks at all layer 1584 * depths where the client and server domain are the same, so that, when they 1585 * are all cleared, the access is allowed. 1586 * 1587 * @client: Client domain 1588 * @server: Server domain 1589 * @masks: Layer access masks to unmask 1590 * @access: Access bits that control scoping 1591 */ 1592 static void unmask_scoped_access(const struct landlock_ruleset *const client, 1593 const struct landlock_ruleset *const server, 1594 struct layer_masks *const masks, 1595 const access_mask_t access) 1596 { 1597 int client_layer, server_layer; 1598 const struct landlock_hierarchy *client_walker, *server_walker; 1599 1600 /* This should not happen. */ 1601 if (WARN_ON_ONCE(!client)) 1602 return; 1603 1604 /* Server has no Landlock domain; nothing to clear. */ 1605 if (!server) 1606 return; 1607 1608 /* 1609 * client_layer must be able to represent all numbers from 1610 * LANDLOCK_MAX_NUM_LAYERS - 1 to -1 for the loop below to terminate. 1611 * (It must be large enough, and it must be signed.) 1612 */ 1613 BUILD_BUG_ON(!is_signed_type(typeof(client_layer))); 1614 BUILD_BUG_ON(LANDLOCK_MAX_NUM_LAYERS - 1 > 1615 type_max(typeof(client_layer))); 1616 1617 client_layer = client->num_layers - 1; 1618 client_walker = client->hierarchy; 1619 server_layer = server->num_layers - 1; 1620 server_walker = server->hierarchy; 1621 1622 /* 1623 * Clears the access bits at all layers where the client domain is the 1624 * same as the server domain. We start the walk at min(client_layer, 1625 * server_layer). The layer bits until there can not be cleared because 1626 * either the client or the server domain is missing. 1627 */ 1628 for (; client_layer > server_layer; client_layer--) 1629 client_walker = client_walker->parent; 1630 1631 for (; server_layer > client_layer; server_layer--) 1632 server_walker = server_walker->parent; 1633 1634 for (; client_layer >= 0; client_layer--) { 1635 if (masks->layers[client_layer].access & access && 1636 client_walker == server_walker) 1637 masks->layers[client_layer].access &= ~access; 1638 1639 client_walker = client_walker->parent; 1640 server_walker = server_walker->parent; 1641 } 1642 } 1643 1644 static int hook_unix_find(const struct path *const path, struct sock *other, 1645 int flags) 1646 { 1647 const struct landlock_ruleset *dom_other; 1648 const struct landlock_cred_security *subject; 1649 struct layer_masks layer_masks; 1650 struct landlock_request request = {}; 1651 static const struct access_masks fs_resolve_unix = { 1652 .fs = LANDLOCK_ACCESS_FS_RESOLVE_UNIX, 1653 }; 1654 1655 /* Lookup for the purpose of saving coredumps is OK. */ 1656 if (unlikely(flags & SOCK_COREDUMP)) 1657 return 0; 1658 1659 subject = landlock_get_applicable_subject(current_cred(), 1660 fs_resolve_unix, NULL); 1661 1662 if (!subject) 1663 return 0; 1664 1665 /* 1666 * Ignoring return value: that the domains apply was already checked in 1667 * landlock_get_applicable_subject() above. 1668 */ 1669 landlock_init_layer_masks(subject->domain, fs_resolve_unix.fs, 1670 &layer_masks, LANDLOCK_KEY_INODE); 1671 1672 /* Checks the layers in which we are connecting within the same domain. */ 1673 unix_state_lock(other); 1674 if (unlikely(sock_flag(other, SOCK_DEAD) || !other->sk_socket || 1675 !other->sk_socket->file)) { 1676 unix_state_unlock(other); 1677 /* 1678 * We rely on the caller to catch the (non-reversible) SOCK_DEAD 1679 * condition and retry the lookup. If we returned an error 1680 * here, the lookup would not get retried. 1681 */ 1682 return 0; 1683 } 1684 dom_other = landlock_cred(other->sk_socket->file->f_cred)->domain; 1685 1686 /* Access to the same (or a lower) domain is always allowed. */ 1687 unmask_scoped_access(subject->domain, dom_other, &layer_masks, 1688 fs_resolve_unix.fs); 1689 unix_state_unlock(other); 1690 1691 /* Checks the connections to allow-listed paths. */ 1692 if (is_access_to_paths_allowed(subject->domain, path, 1693 fs_resolve_unix.fs, &layer_masks, 1694 &request, NULL, 0, NULL, NULL, NULL)) 1695 return 0; 1696 1697 landlock_log_denial(subject, &request); 1698 return -EACCES; 1699 } 1700 1701 /* File hooks */ 1702 1703 /** 1704 * get_required_file_open_access - Get access needed to open a file 1705 * 1706 * @file: File being opened. 1707 * 1708 * Return: The access rights that are required for opening the given file, 1709 * depending on the file type and open mode. 1710 */ 1711 static access_mask_t 1712 get_required_file_open_access(const struct file *const file) 1713 { 1714 access_mask_t access = 0; 1715 1716 if (file->f_mode & FMODE_READ) { 1717 /* A directory can only be opened in read mode. */ 1718 if (S_ISDIR(file_inode(file)->i_mode)) 1719 return LANDLOCK_ACCESS_FS_READ_DIR; 1720 access = LANDLOCK_ACCESS_FS_READ_FILE; 1721 } 1722 if (file->f_mode & FMODE_WRITE) 1723 access |= LANDLOCK_ACCESS_FS_WRITE_FILE; 1724 /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */ 1725 if (file->f_flags & __FMODE_EXEC) 1726 access |= LANDLOCK_ACCESS_FS_EXECUTE; 1727 return access; 1728 } 1729 1730 static int hook_file_alloc_security(struct file *const file) 1731 { 1732 /* 1733 * Grants all access rights, even if most of them are not checked later 1734 * on. It is more consistent. 1735 * 1736 * Notably, file descriptors for regular files can also be acquired 1737 * without going through the file_open hook, for example when using 1738 * memfd_create(2). 1739 */ 1740 landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS; 1741 return 0; 1742 } 1743 1744 static bool is_device(const struct file *const file) 1745 { 1746 const struct inode *inode = file_inode(file); 1747 1748 return S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); 1749 } 1750 1751 static int hook_file_open(struct file *const file) 1752 { 1753 struct layer_masks layer_masks = {}; 1754 access_mask_t open_access_request, full_access_request, allowed_access, 1755 optional_access; 1756 const struct landlock_cred_security *const subject = 1757 landlock_get_applicable_subject(file->f_cred, any_fs, NULL); 1758 struct landlock_request request = {}; 1759 1760 if (!subject) 1761 return 0; 1762 1763 /* 1764 * Because a file may be opened with O_PATH, get_required_file_open_access() 1765 * may return 0. This case will be handled with a future Landlock 1766 * evolution. 1767 */ 1768 open_access_request = get_required_file_open_access(file); 1769 1770 /* 1771 * We look up more access than what we immediately need for open(), so 1772 * that we can later authorize operations on opened files. 1773 */ 1774 optional_access = LANDLOCK_ACCESS_FS_TRUNCATE; 1775 if (is_device(file)) 1776 optional_access |= LANDLOCK_ACCESS_FS_IOCTL_DEV; 1777 1778 full_access_request = open_access_request | optional_access; 1779 1780 if (is_access_to_paths_allowed( 1781 subject->domain, &file->f_path, 1782 landlock_init_layer_masks(subject->domain, 1783 full_access_request, &layer_masks, 1784 LANDLOCK_KEY_INODE), 1785 &layer_masks, &request, NULL, 0, NULL, NULL, NULL)) { 1786 allowed_access = full_access_request; 1787 } else { 1788 /* 1789 * Calculate the actual allowed access rights from layer_masks. 1790 * Remove the access rights from the full access request which 1791 * are still unfulfilled in any of the layers. 1792 */ 1793 allowed_access = full_access_request; 1794 for (size_t i = 0; i < ARRAY_SIZE(layer_masks.layers); i++) 1795 allowed_access &= ~layer_masks.layers[i].access; 1796 } 1797 1798 /* 1799 * For operations on already opened files (i.e. ftruncate()), it is the 1800 * access rights at the time of open() which decide whether the 1801 * operation is permitted. Therefore, we record the relevant subset of 1802 * file access rights in the opened struct file. 1803 */ 1804 landlock_file(file)->allowed_access = allowed_access; 1805 #ifdef CONFIG_AUDIT 1806 landlock_file(file)->deny_masks = landlock_get_deny_masks( 1807 _LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks); 1808 landlock_file(file)->quiet_optional_accesses = 1809 landlock_get_quiet_optional_accesses( 1810 _LANDLOCK_ACCESS_FS_OPTIONAL, 1811 landlock_file(file)->deny_masks, &layer_masks); 1812 #endif /* CONFIG_AUDIT */ 1813 1814 if (access_mask_subset(open_access_request, allowed_access)) 1815 return 0; 1816 1817 /* Sets access to reflect the actual request. */ 1818 request.access = open_access_request; 1819 landlock_log_denial(subject, &request); 1820 return -EACCES; 1821 } 1822 1823 static int hook_file_truncate(struct file *const file) 1824 { 1825 /* 1826 * Allows truncation if the truncate right was available at the time of 1827 * opening the file, to get a consistent access check as for read, write 1828 * and execute operations. 1829 * 1830 * Note: For checks done based on the file's Landlock allowed access, we 1831 * enforce them independently of whether the current thread is in a 1832 * Landlock domain, so that open files passed between independent 1833 * processes retain their behaviour. 1834 */ 1835 if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE) 1836 return 0; 1837 1838 landlock_log_denial(landlock_cred(file->f_cred), &(struct landlock_request) { 1839 .type = LANDLOCK_REQUEST_FS_ACCESS, 1840 .audit = { 1841 .type = LSM_AUDIT_DATA_FILE, 1842 .u.file = file, 1843 }, 1844 .all_existing_optional_access = _LANDLOCK_ACCESS_FS_OPTIONAL, 1845 .access = LANDLOCK_ACCESS_FS_TRUNCATE, 1846 #ifdef CONFIG_AUDIT 1847 .deny_masks = landlock_file(file)->deny_masks, 1848 .quiet_optional_accesses = landlock_file(file)->quiet_optional_accesses, 1849 #endif /* CONFIG_AUDIT */ 1850 }); 1851 return -EACCES; 1852 } 1853 1854 static int hook_file_ioctl_common(const struct file *const file, 1855 const unsigned int cmd, const bool is_compat) 1856 { 1857 access_mask_t allowed_access = landlock_file(file)->allowed_access; 1858 1859 /* 1860 * It is the access rights at the time of opening the file which 1861 * determine whether IOCTL can be used on the opened file later. 1862 * 1863 * The access right is attached to the opened file in hook_file_open(). 1864 */ 1865 if (allowed_access & LANDLOCK_ACCESS_FS_IOCTL_DEV) 1866 return 0; 1867 1868 if (!is_device(file)) 1869 return 0; 1870 1871 if (unlikely(is_compat) ? is_masked_device_ioctl_compat(cmd) : 1872 is_masked_device_ioctl(cmd)) 1873 return 0; 1874 1875 landlock_log_denial(landlock_cred(file->f_cred), &(struct landlock_request) { 1876 .type = LANDLOCK_REQUEST_FS_ACCESS, 1877 .audit = { 1878 .type = LSM_AUDIT_DATA_IOCTL_OP, 1879 .u.op = &(struct lsm_ioctlop_audit) { 1880 .path = file->f_path, 1881 .cmd = cmd, 1882 }, 1883 }, 1884 .all_existing_optional_access = _LANDLOCK_ACCESS_FS_OPTIONAL, 1885 .access = LANDLOCK_ACCESS_FS_IOCTL_DEV, 1886 #ifdef CONFIG_AUDIT 1887 .deny_masks = landlock_file(file)->deny_masks, 1888 .quiet_optional_accesses = landlock_file(file)->quiet_optional_accesses, 1889 #endif /* CONFIG_AUDIT */ 1890 }); 1891 return -EACCES; 1892 } 1893 1894 static int hook_file_ioctl(struct file *file, unsigned int cmd, 1895 unsigned long arg) 1896 { 1897 return hook_file_ioctl_common(file, cmd, false); 1898 } 1899 1900 static int hook_file_ioctl_compat(struct file *file, unsigned int cmd, 1901 unsigned long arg) 1902 { 1903 return hook_file_ioctl_common(file, cmd, true); 1904 } 1905 1906 /* 1907 * Always allow sending signals between threads of the same process. This 1908 * ensures consistency with hook_task_kill(). 1909 */ 1910 static bool control_current_fowner(struct fown_struct *const fown) 1911 { 1912 struct task_struct *p; 1913 1914 /* 1915 * Lock already held by __f_setown(), see commit 26f204380a3c ("fs: Fix 1916 * file_set_fowner LSM hook inconsistencies"). 1917 */ 1918 lockdep_assert_held(&fown->lock); 1919 1920 /* 1921 * A process-group or session owner (PIDTYPE_PGID/PIDTYPE_SID) fans the 1922 * signal out to every member at delivery time, so record the domain and 1923 * let hook_file_send_sigiotask() check the live scope per recipient. 1924 */ 1925 if (fown->pid_type != PIDTYPE_PID && fown->pid_type != PIDTYPE_TGID) 1926 return true; 1927 1928 /* 1929 * Some callers (e.g. fcntl_dirnotify) may not be in an RCU read-side 1930 * critical section. 1931 */ 1932 guard(rcu)(); 1933 p = pid_task(fown->pid, fown->pid_type); 1934 if (!p) 1935 return true; 1936 1937 return !same_thread_group(p, current); 1938 } 1939 1940 static void hook_file_set_fowner(struct file *file) 1941 { 1942 struct landlock_ruleset *prev_dom; 1943 struct landlock_cred_security fown_subject = {}; 1944 struct pid *prev_tg, *fown_tg = NULL; 1945 size_t fown_layer = 0; 1946 1947 if (control_current_fowner(file_f_owner(file))) { 1948 static const struct access_masks signal_scope = { 1949 .scope = LANDLOCK_SCOPE_SIGNAL, 1950 }; 1951 const struct landlock_cred_security *new_subject = 1952 landlock_get_applicable_subject( 1953 current_cred(), signal_scope, &fown_layer); 1954 if (new_subject) { 1955 landlock_get_ruleset(new_subject->domain); 1956 fown_subject = *new_subject; 1957 fown_tg = get_pid(task_tgid(current)); 1958 } 1959 } 1960 1961 prev_dom = landlock_file(file)->fown_subject.domain; 1962 prev_tg = landlock_file(file)->fown_tg; 1963 landlock_file(file)->fown_subject = fown_subject; 1964 landlock_file(file)->fown_tg = fown_tg; 1965 #ifdef CONFIG_AUDIT 1966 landlock_file(file)->fown_layer = fown_layer; 1967 #endif /* CONFIG_AUDIT*/ 1968 1969 /* May be called in an RCU read-side critical section. */ 1970 landlock_put_ruleset_deferred(prev_dom); 1971 put_pid(prev_tg); 1972 } 1973 1974 static void hook_file_free_security(struct file *file) 1975 { 1976 put_pid(landlock_file(file)->fown_tg); 1977 landlock_put_ruleset_deferred(landlock_file(file)->fown_subject.domain); 1978 } 1979 1980 static struct security_hook_list landlock_hooks[] __ro_after_init = { 1981 LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu), 1982 1983 LSM_HOOK_INIT(sb_delete, hook_sb_delete), 1984 LSM_HOOK_INIT(sb_mount, hook_sb_mount), 1985 LSM_HOOK_INIT(move_mount, hook_move_mount), 1986 LSM_HOOK_INIT(sb_umount, hook_sb_umount), 1987 LSM_HOOK_INIT(sb_remount, hook_sb_remount), 1988 LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot), 1989 1990 LSM_HOOK_INIT(path_link, hook_path_link), 1991 LSM_HOOK_INIT(path_rename, hook_path_rename), 1992 LSM_HOOK_INIT(path_mkdir, hook_path_mkdir), 1993 LSM_HOOK_INIT(path_mknod, hook_path_mknod), 1994 LSM_HOOK_INIT(path_symlink, hook_path_symlink), 1995 LSM_HOOK_INIT(path_unlink, hook_path_unlink), 1996 LSM_HOOK_INIT(path_rmdir, hook_path_rmdir), 1997 LSM_HOOK_INIT(path_truncate, hook_path_truncate), 1998 LSM_HOOK_INIT(unix_find, hook_unix_find), 1999 2000 LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security), 2001 LSM_HOOK_INIT(file_open, hook_file_open), 2002 LSM_HOOK_INIT(file_truncate, hook_file_truncate), 2003 LSM_HOOK_INIT(file_ioctl, hook_file_ioctl), 2004 LSM_HOOK_INIT(file_ioctl_compat, hook_file_ioctl_compat), 2005 LSM_HOOK_INIT(file_set_fowner, hook_file_set_fowner), 2006 LSM_HOOK_INIT(file_free_security, hook_file_free_security), 2007 }; 2008 2009 __init void landlock_add_fs_hooks(void) 2010 { 2011 security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), 2012 &landlock_lsmid); 2013 } 2014 2015 #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST 2016 2017 /* clang-format off */ 2018 static struct kunit_case test_cases[] = { 2019 KUNIT_CASE(test_no_more_access), 2020 KUNIT_CASE(test_scope_to_request_with_exec_none), 2021 KUNIT_CASE(test_scope_to_request_with_exec_some), 2022 KUNIT_CASE(test_scope_to_request_without_access), 2023 KUNIT_CASE(test_is_eacces_with_none), 2024 KUNIT_CASE(test_is_eacces_with_refer), 2025 KUNIT_CASE(test_is_eacces_with_write), 2026 {} 2027 }; 2028 /* clang-format on */ 2029 2030 static struct kunit_suite test_suite = { 2031 .name = "landlock_fs", 2032 .test_cases = test_cases, 2033 }; 2034 2035 kunit_test_suite(test_suite); 2036 2037 #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ 2038