1cb2c7d1aSMickaël Salaün // SPDX-License-Identifier: GPL-2.0-only 2cb2c7d1aSMickaël Salaün /* 3cb2c7d1aSMickaël Salaün * Landlock LSM - Filesystem management and hooks 4cb2c7d1aSMickaël Salaün * 5cb2c7d1aSMickaël Salaün * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> 6cb2c7d1aSMickaël Salaün * Copyright © 2018-2020 ANSSI 7b91c3e4eSMickaël Salaün * Copyright © 2021-2022 Microsoft Corporation 8cb2c7d1aSMickaël Salaün */ 9cb2c7d1aSMickaël Salaün 10cb2c7d1aSMickaël Salaün #include <linux/atomic.h> 11cb2c7d1aSMickaël Salaün #include <linux/bitops.h> 12cb2c7d1aSMickaël Salaün #include <linux/bits.h> 13cb2c7d1aSMickaël Salaün #include <linux/compiler_types.h> 14cb2c7d1aSMickaël Salaün #include <linux/dcache.h> 15cb2c7d1aSMickaël Salaün #include <linux/err.h> 16cb2c7d1aSMickaël Salaün #include <linux/fs.h> 17cb2c7d1aSMickaël Salaün #include <linux/init.h> 18cb2c7d1aSMickaël Salaün #include <linux/kernel.h> 19cb2c7d1aSMickaël Salaün #include <linux/limits.h> 20cb2c7d1aSMickaël Salaün #include <linux/list.h> 21cb2c7d1aSMickaël Salaün #include <linux/lsm_hooks.h> 22cb2c7d1aSMickaël Salaün #include <linux/mount.h> 23cb2c7d1aSMickaël Salaün #include <linux/namei.h> 24cb2c7d1aSMickaël Salaün #include <linux/path.h> 25cb2c7d1aSMickaël Salaün #include <linux/rcupdate.h> 26cb2c7d1aSMickaël Salaün #include <linux/spinlock.h> 27cb2c7d1aSMickaël Salaün #include <linux/stat.h> 28cb2c7d1aSMickaël Salaün #include <linux/types.h> 29cb2c7d1aSMickaël Salaün #include <linux/wait_bit.h> 30cb2c7d1aSMickaël Salaün #include <linux/workqueue.h> 31cb2c7d1aSMickaël Salaün #include <uapi/linux/landlock.h> 32cb2c7d1aSMickaël Salaün 33cb2c7d1aSMickaël Salaün #include "common.h" 34cb2c7d1aSMickaël Salaün #include "cred.h" 35cb2c7d1aSMickaël Salaün #include "fs.h" 36cb2c7d1aSMickaël Salaün #include "limits.h" 37cb2c7d1aSMickaël Salaün #include "object.h" 38cb2c7d1aSMickaël Salaün #include "ruleset.h" 39cb2c7d1aSMickaël Salaün #include "setup.h" 40cb2c7d1aSMickaël Salaün 41cb2c7d1aSMickaël Salaün /* Underlying object management */ 42cb2c7d1aSMickaël Salaün 43cb2c7d1aSMickaël Salaün static void release_inode(struct landlock_object *const object) 44cb2c7d1aSMickaël Salaün __releases(object->lock) 45cb2c7d1aSMickaël Salaün { 46cb2c7d1aSMickaël Salaün struct inode *const inode = object->underobj; 47cb2c7d1aSMickaël Salaün struct super_block *sb; 48cb2c7d1aSMickaël Salaün 49cb2c7d1aSMickaël Salaün if (!inode) { 50cb2c7d1aSMickaël Salaün spin_unlock(&object->lock); 51cb2c7d1aSMickaël Salaün return; 52cb2c7d1aSMickaël Salaün } 53cb2c7d1aSMickaël Salaün 54cb2c7d1aSMickaël Salaün /* 55cb2c7d1aSMickaël Salaün * Protects against concurrent use by hook_sb_delete() of the reference 56cb2c7d1aSMickaël Salaün * to the underlying inode. 57cb2c7d1aSMickaël Salaün */ 58cb2c7d1aSMickaël Salaün object->underobj = NULL; 59cb2c7d1aSMickaël Salaün /* 60cb2c7d1aSMickaël Salaün * Makes sure that if the filesystem is concurrently unmounted, 61cb2c7d1aSMickaël Salaün * hook_sb_delete() will wait for us to finish iput(). 62cb2c7d1aSMickaël Salaün */ 63cb2c7d1aSMickaël Salaün sb = inode->i_sb; 64cb2c7d1aSMickaël Salaün atomic_long_inc(&landlock_superblock(sb)->inode_refs); 65cb2c7d1aSMickaël Salaün spin_unlock(&object->lock); 66cb2c7d1aSMickaël Salaün /* 67cb2c7d1aSMickaël Salaün * Because object->underobj was not NULL, hook_sb_delete() and 68cb2c7d1aSMickaël Salaün * get_inode_object() guarantee that it is safe to reset 69cb2c7d1aSMickaël Salaün * landlock_inode(inode)->object while it is not NULL. It is therefore 70cb2c7d1aSMickaël Salaün * not necessary to lock inode->i_lock. 71cb2c7d1aSMickaël Salaün */ 72cb2c7d1aSMickaël Salaün rcu_assign_pointer(landlock_inode(inode)->object, NULL); 73cb2c7d1aSMickaël Salaün /* 74cb2c7d1aSMickaël Salaün * Now, new rules can safely be tied to @inode with get_inode_object(). 75cb2c7d1aSMickaël Salaün */ 76cb2c7d1aSMickaël Salaün 77cb2c7d1aSMickaël Salaün iput(inode); 78cb2c7d1aSMickaël Salaün if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs)) 79cb2c7d1aSMickaël Salaün wake_up_var(&landlock_superblock(sb)->inode_refs); 80cb2c7d1aSMickaël Salaün } 81cb2c7d1aSMickaël Salaün 82cb2c7d1aSMickaël Salaün static const struct landlock_object_underops landlock_fs_underops = { 83cb2c7d1aSMickaël Salaün .release = release_inode 84cb2c7d1aSMickaël Salaün }; 85cb2c7d1aSMickaël Salaün 86cb2c7d1aSMickaël Salaün /* Ruleset management */ 87cb2c7d1aSMickaël Salaün 88cb2c7d1aSMickaël Salaün static struct landlock_object *get_inode_object(struct inode *const inode) 89cb2c7d1aSMickaël Salaün { 90cb2c7d1aSMickaël Salaün struct landlock_object *object, *new_object; 91cb2c7d1aSMickaël Salaün struct landlock_inode_security *inode_sec = landlock_inode(inode); 92cb2c7d1aSMickaël Salaün 93cb2c7d1aSMickaël Salaün rcu_read_lock(); 94cb2c7d1aSMickaël Salaün retry: 95cb2c7d1aSMickaël Salaün object = rcu_dereference(inode_sec->object); 96cb2c7d1aSMickaël Salaün if (object) { 97cb2c7d1aSMickaël Salaün if (likely(refcount_inc_not_zero(&object->usage))) { 98cb2c7d1aSMickaël Salaün rcu_read_unlock(); 99cb2c7d1aSMickaël Salaün return object; 100cb2c7d1aSMickaël Salaün } 101cb2c7d1aSMickaël Salaün /* 102cb2c7d1aSMickaël Salaün * We are racing with release_inode(), the object is going 103cb2c7d1aSMickaël Salaün * away. Wait for release_inode(), then retry. 104cb2c7d1aSMickaël Salaün */ 105cb2c7d1aSMickaël Salaün spin_lock(&object->lock); 106cb2c7d1aSMickaël Salaün spin_unlock(&object->lock); 107cb2c7d1aSMickaël Salaün goto retry; 108cb2c7d1aSMickaël Salaün } 109cb2c7d1aSMickaël Salaün rcu_read_unlock(); 110cb2c7d1aSMickaël Salaün 111cb2c7d1aSMickaël Salaün /* 112cb2c7d1aSMickaël Salaün * If there is no object tied to @inode, then create a new one (without 113cb2c7d1aSMickaël Salaün * holding any locks). 114cb2c7d1aSMickaël Salaün */ 115cb2c7d1aSMickaël Salaün new_object = landlock_create_object(&landlock_fs_underops, inode); 116cb2c7d1aSMickaël Salaün if (IS_ERR(new_object)) 117cb2c7d1aSMickaël Salaün return new_object; 118cb2c7d1aSMickaël Salaün 119cb2c7d1aSMickaël Salaün /* 120cb2c7d1aSMickaël Salaün * Protects against concurrent calls to get_inode_object() or 121cb2c7d1aSMickaël Salaün * hook_sb_delete(). 122cb2c7d1aSMickaël Salaün */ 123cb2c7d1aSMickaël Salaün spin_lock(&inode->i_lock); 124cb2c7d1aSMickaël Salaün if (unlikely(rcu_access_pointer(inode_sec->object))) { 125cb2c7d1aSMickaël Salaün /* Someone else just created the object, bail out and retry. */ 126cb2c7d1aSMickaël Salaün spin_unlock(&inode->i_lock); 127cb2c7d1aSMickaël Salaün kfree(new_object); 128cb2c7d1aSMickaël Salaün 129cb2c7d1aSMickaël Salaün rcu_read_lock(); 130cb2c7d1aSMickaël Salaün goto retry; 131cb2c7d1aSMickaël Salaün } 132cb2c7d1aSMickaël Salaün 133cb2c7d1aSMickaël Salaün /* 134cb2c7d1aSMickaël Salaün * @inode will be released by hook_sb_delete() on its superblock 135cb2c7d1aSMickaël Salaün * shutdown, or by release_inode() when no more ruleset references the 136cb2c7d1aSMickaël Salaün * related object. 137cb2c7d1aSMickaël Salaün */ 138cb2c7d1aSMickaël Salaün ihold(inode); 139cb2c7d1aSMickaël Salaün rcu_assign_pointer(inode_sec->object, new_object); 140cb2c7d1aSMickaël Salaün spin_unlock(&inode->i_lock); 141cb2c7d1aSMickaël Salaün return new_object; 142cb2c7d1aSMickaël Salaün } 143cb2c7d1aSMickaël Salaün 144cb2c7d1aSMickaël Salaün /* All access rights that can be tied to files. */ 1456cc2df8eSMickaël Salaün /* clang-format off */ 146cb2c7d1aSMickaël Salaün #define ACCESS_FILE ( \ 147cb2c7d1aSMickaël Salaün LANDLOCK_ACCESS_FS_EXECUTE | \ 148cb2c7d1aSMickaël Salaün LANDLOCK_ACCESS_FS_WRITE_FILE | \ 149b9f5ce27SGünther Noack LANDLOCK_ACCESS_FS_READ_FILE | \ 150b9f5ce27SGünther Noack LANDLOCK_ACCESS_FS_TRUNCATE) 1516cc2df8eSMickaël Salaün /* clang-format on */ 152cb2c7d1aSMickaël Salaün 153cb2c7d1aSMickaël Salaün /* 15455e55920SMickaël Salaün * All access rights that are denied by default whether they are handled or not 15555e55920SMickaël Salaün * by a ruleset/layer. This must be ORed with all ruleset->fs_access_masks[] 15655e55920SMickaël Salaün * entries when we need to get the absolute handled access masks. 15755e55920SMickaël Salaün */ 15855e55920SMickaël Salaün /* clang-format off */ 15955e55920SMickaël Salaün #define ACCESS_INITIALLY_DENIED ( \ 16055e55920SMickaël Salaün LANDLOCK_ACCESS_FS_REFER) 16155e55920SMickaël Salaün /* clang-format on */ 16255e55920SMickaël Salaün 16355e55920SMickaël Salaün /* 164cb2c7d1aSMickaël Salaün * @path: Should have been checked by get_path_from_fd(). 165cb2c7d1aSMickaël Salaün */ 166cb2c7d1aSMickaël Salaün int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, 1675f2ff33eSMickaël Salaün const struct path *const path, 1685f2ff33eSMickaël Salaün access_mask_t access_rights) 169cb2c7d1aSMickaël Salaün { 170cb2c7d1aSMickaël Salaün int err; 171cb2c7d1aSMickaël Salaün struct landlock_object *object; 172cb2c7d1aSMickaël Salaün 173cb2c7d1aSMickaël Salaün /* Files only get access rights that make sense. */ 17406a1c40aSMickaël Salaün if (!d_is_dir(path->dentry) && 17506a1c40aSMickaël Salaün (access_rights | ACCESS_FILE) != ACCESS_FILE) 176cb2c7d1aSMickaël Salaün return -EINVAL; 177cb2c7d1aSMickaël Salaün if (WARN_ON_ONCE(ruleset->num_layers != 1)) 178cb2c7d1aSMickaël Salaün return -EINVAL; 179cb2c7d1aSMickaël Salaün 180cb2c7d1aSMickaël Salaün /* Transforms relative access rights to absolute ones. */ 18155e55920SMickaël Salaün access_rights |= 18255e55920SMickaël Salaün LANDLOCK_MASK_ACCESS_FS & 18355e55920SMickaël Salaün ~(ruleset->fs_access_masks[0] | ACCESS_INITIALLY_DENIED); 184cb2c7d1aSMickaël Salaün object = get_inode_object(d_backing_inode(path->dentry)); 185cb2c7d1aSMickaël Salaün if (IS_ERR(object)) 186cb2c7d1aSMickaël Salaün return PTR_ERR(object); 187cb2c7d1aSMickaël Salaün mutex_lock(&ruleset->lock); 188cb2c7d1aSMickaël Salaün err = landlock_insert_rule(ruleset, object, access_rights); 189cb2c7d1aSMickaël Salaün mutex_unlock(&ruleset->lock); 190cb2c7d1aSMickaël Salaün /* 191cb2c7d1aSMickaël Salaün * No need to check for an error because landlock_insert_rule() 192cb2c7d1aSMickaël Salaün * increments the refcount for the new object if needed. 193cb2c7d1aSMickaël Salaün */ 194cb2c7d1aSMickaël Salaün landlock_put_object(object); 195cb2c7d1aSMickaël Salaün return err; 196cb2c7d1aSMickaël Salaün } 197cb2c7d1aSMickaël Salaün 198cb2c7d1aSMickaël Salaün /* Access-control management */ 199cb2c7d1aSMickaël Salaün 2002cd7cd6eSMickaël Salaün /* 2012cd7cd6eSMickaël Salaün * The lifetime of the returned rule is tied to @domain. 2022cd7cd6eSMickaël Salaün * 2032cd7cd6eSMickaël Salaün * Returns NULL if no rule is found or if @dentry is negative. 2042cd7cd6eSMickaël Salaün */ 2052cd7cd6eSMickaël Salaün static inline const struct landlock_rule * 2062cd7cd6eSMickaël Salaün find_rule(const struct landlock_ruleset *const domain, 2072cd7cd6eSMickaël Salaün const struct dentry *const dentry) 208cb2c7d1aSMickaël Salaün { 209cb2c7d1aSMickaël Salaün const struct landlock_rule *rule; 210cb2c7d1aSMickaël Salaün const struct inode *inode; 211cb2c7d1aSMickaël Salaün 2122cd7cd6eSMickaël Salaün /* Ignores nonexistent leafs. */ 2132cd7cd6eSMickaël Salaün if (d_is_negative(dentry)) 2142cd7cd6eSMickaël Salaün return NULL; 2152cd7cd6eSMickaël Salaün 2162cd7cd6eSMickaël Salaün inode = d_backing_inode(dentry); 217cb2c7d1aSMickaël Salaün rcu_read_lock(); 21806a1c40aSMickaël Salaün rule = landlock_find_rule( 21906a1c40aSMickaël Salaün domain, rcu_dereference(landlock_inode(inode)->object)); 220cb2c7d1aSMickaël Salaün rcu_read_unlock(); 2212cd7cd6eSMickaël Salaün return rule; 2222cd7cd6eSMickaël Salaün } 2232cd7cd6eSMickaël Salaün 2248ba0005fSMickaël Salaün /* 2258ba0005fSMickaël Salaün * @layer_masks is read and may be updated according to the access request and 2268ba0005fSMickaël Salaün * the matching rule. 2278ba0005fSMickaël Salaün * 2288ba0005fSMickaël Salaün * Returns true if the request is allowed (i.e. relevant layer masks for the 2298ba0005fSMickaël Salaün * request are empty). 2308ba0005fSMickaël Salaün */ 2318ba0005fSMickaël Salaün static inline bool 2328ba0005fSMickaël Salaün unmask_layers(const struct landlock_rule *const rule, 2332cd7cd6eSMickaël Salaün const access_mask_t access_request, 2348ba0005fSMickaël Salaün layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) 2352cd7cd6eSMickaël Salaün { 2362cd7cd6eSMickaël Salaün size_t layer_level; 2372cd7cd6eSMickaël Salaün 2388ba0005fSMickaël Salaün if (!access_request || !layer_masks) 2398ba0005fSMickaël Salaün return true; 240cb2c7d1aSMickaël Salaün if (!rule) 2418ba0005fSMickaël Salaün return false; 242cb2c7d1aSMickaël Salaün 243cb2c7d1aSMickaël Salaün /* 244cb2c7d1aSMickaël Salaün * An access is granted if, for each policy layer, at least one rule 2458ba0005fSMickaël Salaün * encountered on the pathwalk grants the requested access, 2468ba0005fSMickaël Salaün * regardless of its position in the layer stack. We must then check 247cb2c7d1aSMickaël Salaün * the remaining layers for each inode, from the first added layer to 2488ba0005fSMickaël Salaün * the last one. When there is multiple requested accesses, for each 2498ba0005fSMickaël Salaün * policy layer, the full set of requested accesses may not be granted 2508ba0005fSMickaël Salaün * by only one rule, but by the union (binary OR) of multiple rules. 2518ba0005fSMickaël Salaün * E.g. /a/b <execute> + /a <read> => /a/b <execute + read> 252cb2c7d1aSMickaël Salaün */ 2532cd7cd6eSMickaël Salaün for (layer_level = 0; layer_level < rule->num_layers; layer_level++) { 2542cd7cd6eSMickaël Salaün const struct landlock_layer *const layer = 2552cd7cd6eSMickaël Salaün &rule->layers[layer_level]; 25675c542d6SMickaël Salaün const layer_mask_t layer_bit = BIT_ULL(layer->level - 1); 2578ba0005fSMickaël Salaün const unsigned long access_req = access_request; 2588ba0005fSMickaël Salaün unsigned long access_bit; 2598ba0005fSMickaël Salaün bool is_empty; 260cb2c7d1aSMickaël Salaün 2618ba0005fSMickaël Salaün /* 2628ba0005fSMickaël Salaün * Records in @layer_masks which layer grants access to each 2638ba0005fSMickaël Salaün * requested access. 2648ba0005fSMickaël Salaün */ 2658ba0005fSMickaël Salaün is_empty = true; 2668ba0005fSMickaël Salaün for_each_set_bit(access_bit, &access_req, 2678ba0005fSMickaël Salaün ARRAY_SIZE(*layer_masks)) { 2688ba0005fSMickaël Salaün if (layer->access & BIT_ULL(access_bit)) 2698ba0005fSMickaël Salaün (*layer_masks)[access_bit] &= ~layer_bit; 2708ba0005fSMickaël Salaün is_empty = is_empty && !(*layer_masks)[access_bit]; 271cb2c7d1aSMickaël Salaün } 2728ba0005fSMickaël Salaün if (is_empty) 2738ba0005fSMickaël Salaün return true; 274cb2c7d1aSMickaël Salaün } 2758ba0005fSMickaël Salaün return false; 276cb2c7d1aSMickaël Salaün } 277cb2c7d1aSMickaël Salaün 2789da82b20SMickaël Salaün /* 2799da82b20SMickaël Salaün * Allows access to pseudo filesystems that will never be mountable (e.g. 2809da82b20SMickaël Salaün * sockfs, pipefs), but can still be reachable through 2819da82b20SMickaël Salaün * /proc/<pid>/fd/<file-descriptor> 2829da82b20SMickaël Salaün */ 2839da82b20SMickaël Salaün static inline bool is_nouser_or_private(const struct dentry *dentry) 2849da82b20SMickaël Salaün { 2859da82b20SMickaël Salaün return (dentry->d_sb->s_flags & SB_NOUSER) || 2869da82b20SMickaël Salaün (d_is_positive(dentry) && 2879da82b20SMickaël Salaün unlikely(IS_PRIVATE(d_backing_inode(dentry)))); 2889da82b20SMickaël Salaün } 2899da82b20SMickaël Salaün 290b91c3e4eSMickaël Salaün static inline access_mask_t 291b91c3e4eSMickaël Salaün get_handled_accesses(const struct landlock_ruleset *const domain) 292b91c3e4eSMickaël Salaün { 29355e55920SMickaël Salaün access_mask_t access_dom = ACCESS_INITIALLY_DENIED; 294b91c3e4eSMickaël Salaün size_t layer_level; 295b91c3e4eSMickaël Salaün 29655e55920SMickaël Salaün for (layer_level = 0; layer_level < domain->num_layers; layer_level++) 29755e55920SMickaël Salaün access_dom |= domain->fs_access_masks[layer_level]; 29855e55920SMickaël Salaün return access_dom & LANDLOCK_MASK_ACCESS_FS; 299b91c3e4eSMickaël Salaün } 300b91c3e4eSMickaël Salaün 30152a13488SGünther Noack /** 30252a13488SGünther Noack * init_layer_masks - Initialize layer masks from an access request 30352a13488SGünther Noack * 30452a13488SGünther Noack * Populates @layer_masks such that for each access right in @access_request, 30552a13488SGünther Noack * the bits for all the layers are set where this access right is handled. 30652a13488SGünther Noack * 30752a13488SGünther Noack * @domain: The domain that defines the current restrictions. 30852a13488SGünther Noack * @access_request: The requested access rights to check. 30952a13488SGünther Noack * @layer_masks: The layer masks to populate. 31052a13488SGünther Noack * 31152a13488SGünther Noack * Returns: An access mask where each access right bit is set which is handled 31252a13488SGünther Noack * in any of the active layers in @domain. 31352a13488SGünther Noack */ 314b91c3e4eSMickaël Salaün static inline access_mask_t 315b91c3e4eSMickaël Salaün init_layer_masks(const struct landlock_ruleset *const domain, 316b91c3e4eSMickaël Salaün const access_mask_t access_request, 317b91c3e4eSMickaël Salaün layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) 318b91c3e4eSMickaël Salaün { 319b91c3e4eSMickaël Salaün access_mask_t handled_accesses = 0; 320b91c3e4eSMickaël Salaün size_t layer_level; 321b91c3e4eSMickaël Salaün 322b91c3e4eSMickaël Salaün memset(layer_masks, 0, sizeof(*layer_masks)); 323b91c3e4eSMickaël Salaün /* An empty access request can happen because of O_WRONLY | O_RDWR. */ 324b91c3e4eSMickaël Salaün if (!access_request) 325b91c3e4eSMickaël Salaün return 0; 326b91c3e4eSMickaël Salaün 327b91c3e4eSMickaël Salaün /* Saves all handled accesses per layer. */ 328b91c3e4eSMickaël Salaün for (layer_level = 0; layer_level < domain->num_layers; layer_level++) { 329b91c3e4eSMickaël Salaün const unsigned long access_req = access_request; 330b91c3e4eSMickaël Salaün unsigned long access_bit; 331b91c3e4eSMickaël Salaün 332b91c3e4eSMickaël Salaün for_each_set_bit(access_bit, &access_req, 333b91c3e4eSMickaël Salaün ARRAY_SIZE(*layer_masks)) { 33455e55920SMickaël Salaün /* 33555e55920SMickaël Salaün * Artificially handles all initially denied by default 33655e55920SMickaël Salaün * access rights. 33755e55920SMickaël Salaün */ 33855e55920SMickaël Salaün if (BIT_ULL(access_bit) & 33955e55920SMickaël Salaün (domain->fs_access_masks[layer_level] | 34055e55920SMickaël Salaün ACCESS_INITIALLY_DENIED)) { 341b91c3e4eSMickaël Salaün (*layer_masks)[access_bit] |= 342b91c3e4eSMickaël Salaün BIT_ULL(layer_level); 343b91c3e4eSMickaël Salaün handled_accesses |= BIT_ULL(access_bit); 344b91c3e4eSMickaël Salaün } 345b91c3e4eSMickaël Salaün } 346b91c3e4eSMickaël Salaün } 347b91c3e4eSMickaël Salaün return handled_accesses; 348b91c3e4eSMickaël Salaün } 349b91c3e4eSMickaël Salaün 350b91c3e4eSMickaël Salaün /* 351b91c3e4eSMickaël Salaün * Check that a destination file hierarchy has more restrictions than a source 352b91c3e4eSMickaël Salaün * file hierarchy. This is only used for link and rename actions. 353b91c3e4eSMickaël Salaün * 354b91c3e4eSMickaël Salaün * @layer_masks_child2: Optional child masks. 355b91c3e4eSMickaël Salaün */ 356b91c3e4eSMickaël Salaün static inline bool no_more_access( 357b91c3e4eSMickaël Salaün const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], 358b91c3e4eSMickaël Salaün const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS], 359b91c3e4eSMickaël Salaün const bool child1_is_directory, 360b91c3e4eSMickaël Salaün const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], 361b91c3e4eSMickaël Salaün const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS], 362b91c3e4eSMickaël Salaün const bool child2_is_directory) 363b91c3e4eSMickaël Salaün { 364b91c3e4eSMickaël Salaün unsigned long access_bit; 365b91c3e4eSMickaël Salaün 366b91c3e4eSMickaël Salaün for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2); 367b91c3e4eSMickaël Salaün access_bit++) { 368b91c3e4eSMickaël Salaün /* Ignores accesses that only make sense for directories. */ 369b91c3e4eSMickaël Salaün const bool is_file_access = 370b91c3e4eSMickaël Salaün !!(BIT_ULL(access_bit) & ACCESS_FILE); 371b91c3e4eSMickaël Salaün 372b91c3e4eSMickaël Salaün if (child1_is_directory || is_file_access) { 373b91c3e4eSMickaël Salaün /* 374b91c3e4eSMickaël Salaün * Checks if the destination restrictions are a 375b91c3e4eSMickaël Salaün * superset of the source ones (i.e. inherited access 376b91c3e4eSMickaël Salaün * rights without child exceptions): 377b91c3e4eSMickaël Salaün * restrictions(parent2) >= restrictions(child1) 378b91c3e4eSMickaël Salaün */ 379b91c3e4eSMickaël Salaün if ((((*layer_masks_parent1)[access_bit] & 380b91c3e4eSMickaël Salaün (*layer_masks_child1)[access_bit]) | 381b91c3e4eSMickaël Salaün (*layer_masks_parent2)[access_bit]) != 382b91c3e4eSMickaël Salaün (*layer_masks_parent2)[access_bit]) 383b91c3e4eSMickaël Salaün return false; 384b91c3e4eSMickaël Salaün } 385b91c3e4eSMickaël Salaün 386b91c3e4eSMickaël Salaün if (!layer_masks_child2) 387b91c3e4eSMickaël Salaün continue; 388b91c3e4eSMickaël Salaün if (child2_is_directory || is_file_access) { 389b91c3e4eSMickaël Salaün /* 390b91c3e4eSMickaël Salaün * Checks inverted restrictions for RENAME_EXCHANGE: 391b91c3e4eSMickaël Salaün * restrictions(parent1) >= restrictions(child2) 392b91c3e4eSMickaël Salaün */ 393b91c3e4eSMickaël Salaün if ((((*layer_masks_parent2)[access_bit] & 394b91c3e4eSMickaël Salaün (*layer_masks_child2)[access_bit]) | 395b91c3e4eSMickaël Salaün (*layer_masks_parent1)[access_bit]) != 396b91c3e4eSMickaël Salaün (*layer_masks_parent1)[access_bit]) 397b91c3e4eSMickaël Salaün return false; 398b91c3e4eSMickaël Salaün } 399b91c3e4eSMickaël Salaün } 400b91c3e4eSMickaël Salaün return true; 401b91c3e4eSMickaël Salaün } 402b91c3e4eSMickaël Salaün 403b91c3e4eSMickaël Salaün /* 404b91c3e4eSMickaël Salaün * Removes @layer_masks accesses that are not requested. 405b91c3e4eSMickaël Salaün * 406b91c3e4eSMickaël Salaün * Returns true if the request is allowed, false otherwise. 407b91c3e4eSMickaël Salaün */ 408b91c3e4eSMickaël Salaün static inline bool 409b91c3e4eSMickaël Salaün scope_to_request(const access_mask_t access_request, 410b91c3e4eSMickaël Salaün layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) 411b91c3e4eSMickaël Salaün { 412b91c3e4eSMickaël Salaün const unsigned long access_req = access_request; 413b91c3e4eSMickaël Salaün unsigned long access_bit; 414b91c3e4eSMickaël Salaün 415b91c3e4eSMickaël Salaün if (WARN_ON_ONCE(!layer_masks)) 416b91c3e4eSMickaël Salaün return true; 417b91c3e4eSMickaël Salaün 418b91c3e4eSMickaël Salaün for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks)) 419b91c3e4eSMickaël Salaün (*layer_masks)[access_bit] = 0; 420b91c3e4eSMickaël Salaün return !memchr_inv(layer_masks, 0, sizeof(*layer_masks)); 421b91c3e4eSMickaël Salaün } 422b91c3e4eSMickaël Salaün 423b91c3e4eSMickaël Salaün /* 424b91c3e4eSMickaël Salaün * Returns true if there is at least one access right different than 425b91c3e4eSMickaël Salaün * LANDLOCK_ACCESS_FS_REFER. 426b91c3e4eSMickaël Salaün */ 427b91c3e4eSMickaël Salaün static inline bool 428b91c3e4eSMickaël Salaün is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], 4295f2ff33eSMickaël Salaün const access_mask_t access_request) 430cb2c7d1aSMickaël Salaün { 431b91c3e4eSMickaël Salaün unsigned long access_bit; 432b91c3e4eSMickaël Salaün /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ 433b91c3e4eSMickaël Salaün const unsigned long access_check = access_request & 434b91c3e4eSMickaël Salaün ~LANDLOCK_ACCESS_FS_REFER; 435cb2c7d1aSMickaël Salaün 436b91c3e4eSMickaël Salaün if (!layer_masks) 437b91c3e4eSMickaël Salaün return false; 438b91c3e4eSMickaël Salaün 439b91c3e4eSMickaël Salaün for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) { 440b91c3e4eSMickaël Salaün if ((*layer_masks)[access_bit]) 441b91c3e4eSMickaël Salaün return true; 442b91c3e4eSMickaël Salaün } 443b91c3e4eSMickaël Salaün return false; 444b91c3e4eSMickaël Salaün } 445b91c3e4eSMickaël Salaün 446b91c3e4eSMickaël Salaün /** 447106794c4SGünther Noack * is_access_to_paths_allowed - Check accesses for requests with a common path 448b91c3e4eSMickaël Salaün * 449b91c3e4eSMickaël Salaün * @domain: Domain to check against. 450b91c3e4eSMickaël Salaün * @path: File hierarchy to walk through. 451b91c3e4eSMickaël Salaün * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is 452b91c3e4eSMickaël Salaün * equal to @layer_masks_parent2 (if any). This is tied to the unique 453b91c3e4eSMickaël Salaün * requested path for most actions, or the source in case of a refer action 454b91c3e4eSMickaël Salaün * (i.e. rename or link), or the source and destination in case of 455b91c3e4eSMickaël Salaün * RENAME_EXCHANGE. 456b91c3e4eSMickaël Salaün * @layer_masks_parent1: Pointer to a matrix of layer masks per access 457b91c3e4eSMickaël Salaün * masks, identifying the layers that forbid a specific access. Bits from 458b91c3e4eSMickaël Salaün * this matrix can be unset according to the @path walk. An empty matrix 459b91c3e4eSMickaël Salaün * means that @domain allows all possible Landlock accesses (i.e. not only 460b91c3e4eSMickaël Salaün * those identified by @access_request_parent1). This matrix can 461b91c3e4eSMickaël Salaün * initially refer to domain layer masks and, when the accesses for the 462b91c3e4eSMickaël Salaün * destination and source are the same, to requested layer masks. 463b91c3e4eSMickaël Salaün * @dentry_child1: Dentry to the initial child of the parent1 path. This 464b91c3e4eSMickaël Salaün * pointer must be NULL for non-refer actions (i.e. not link nor rename). 465b91c3e4eSMickaël Salaün * @access_request_parent2: Similar to @access_request_parent1 but for a 466b91c3e4eSMickaël Salaün * request involving a source and a destination. This refers to the 467b91c3e4eSMickaël Salaün * destination, except in case of RENAME_EXCHANGE where it also refers to 468b91c3e4eSMickaël Salaün * the source. Must be set to 0 when using a simple path request. 469b91c3e4eSMickaël Salaün * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer 470b91c3e4eSMickaël Salaün * action. This must be NULL otherwise. 471b91c3e4eSMickaël Salaün * @dentry_child2: Dentry to the initial child of the parent2 path. This 472b91c3e4eSMickaël Salaün * pointer is only set for RENAME_EXCHANGE actions and must be NULL 473b91c3e4eSMickaël Salaün * otherwise. 474b91c3e4eSMickaël Salaün * 475b91c3e4eSMickaël Salaün * This helper first checks that the destination has a superset of restrictions 476b91c3e4eSMickaël Salaün * compared to the source (if any) for a common path. Because of 477b91c3e4eSMickaël Salaün * RENAME_EXCHANGE actions, source and destinations may be swapped. It then 478b91c3e4eSMickaël Salaün * checks that the collected accesses and the remaining ones are enough to 479b91c3e4eSMickaël Salaün * allow the request. 480b91c3e4eSMickaël Salaün * 481b91c3e4eSMickaël Salaün * Returns: 482106794c4SGünther Noack * - true if the access request is granted; 483106794c4SGünther Noack * - false otherwise. 484b91c3e4eSMickaël Salaün */ 485106794c4SGünther Noack static bool is_access_to_paths_allowed( 486b91c3e4eSMickaël Salaün const struct landlock_ruleset *const domain, 487b91c3e4eSMickaël Salaün const struct path *const path, 488b91c3e4eSMickaël Salaün const access_mask_t access_request_parent1, 489b91c3e4eSMickaël Salaün layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], 490b91c3e4eSMickaël Salaün const struct dentry *const dentry_child1, 491b91c3e4eSMickaël Salaün const access_mask_t access_request_parent2, 492b91c3e4eSMickaël Salaün layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], 493b91c3e4eSMickaël Salaün const struct dentry *const dentry_child2) 494b91c3e4eSMickaël Salaün { 495b91c3e4eSMickaël Salaün bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, 496b91c3e4eSMickaël Salaün child1_is_directory = true, child2_is_directory = true; 497b91c3e4eSMickaël Salaün struct path walker_path; 498b91c3e4eSMickaël Salaün access_mask_t access_masked_parent1, access_masked_parent2; 499b91c3e4eSMickaël Salaün layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS], 500b91c3e4eSMickaël Salaün _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS]; 501b91c3e4eSMickaël Salaün layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL, 502b91c3e4eSMickaël Salaün (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL; 503b91c3e4eSMickaël Salaün 504b91c3e4eSMickaël Salaün if (!access_request_parent1 && !access_request_parent2) 505106794c4SGünther Noack return true; 506cb2c7d1aSMickaël Salaün if (WARN_ON_ONCE(!domain || !path)) 507106794c4SGünther Noack return true; 5089da82b20SMickaël Salaün if (is_nouser_or_private(path->dentry)) 509106794c4SGünther Noack return true; 510b91c3e4eSMickaël Salaün if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1)) 511106794c4SGünther Noack return false; 512cb2c7d1aSMickaël Salaün 513b91c3e4eSMickaël Salaün if (unlikely(layer_masks_parent2)) { 514b91c3e4eSMickaël Salaün if (WARN_ON_ONCE(!dentry_child1)) 515106794c4SGünther Noack return false; 516b91c3e4eSMickaël Salaün /* 517b91c3e4eSMickaël Salaün * For a double request, first check for potential privilege 518b91c3e4eSMickaël Salaün * escalation by looking at domain handled accesses (which are 519b91c3e4eSMickaël Salaün * a superset of the meaningful requested accesses). 520b91c3e4eSMickaël Salaün */ 521b91c3e4eSMickaël Salaün access_masked_parent1 = access_masked_parent2 = 522b91c3e4eSMickaël Salaün get_handled_accesses(domain); 523b91c3e4eSMickaël Salaün is_dom_check = true; 524b91c3e4eSMickaël Salaün } else { 525b91c3e4eSMickaël Salaün if (WARN_ON_ONCE(dentry_child1 || dentry_child2)) 526106794c4SGünther Noack return false; 527b91c3e4eSMickaël Salaün /* For a simple request, only check for requested accesses. */ 528b91c3e4eSMickaël Salaün access_masked_parent1 = access_request_parent1; 529b91c3e4eSMickaël Salaün access_masked_parent2 = access_request_parent2; 530b91c3e4eSMickaël Salaün is_dom_check = false; 531b91c3e4eSMickaël Salaün } 5328ba0005fSMickaël Salaün 533b91c3e4eSMickaël Salaün if (unlikely(dentry_child1)) { 534b91c3e4eSMickaël Salaün unmask_layers(find_rule(domain, dentry_child1), 535b91c3e4eSMickaël Salaün init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, 536b91c3e4eSMickaël Salaün &_layer_masks_child1), 537b91c3e4eSMickaël Salaün &_layer_masks_child1); 538b91c3e4eSMickaël Salaün layer_masks_child1 = &_layer_masks_child1; 539b91c3e4eSMickaël Salaün child1_is_directory = d_is_dir(dentry_child1); 5408ba0005fSMickaël Salaün } 541b91c3e4eSMickaël Salaün if (unlikely(dentry_child2)) { 542b91c3e4eSMickaël Salaün unmask_layers(find_rule(domain, dentry_child2), 543b91c3e4eSMickaël Salaün init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, 544b91c3e4eSMickaël Salaün &_layer_masks_child2), 545b91c3e4eSMickaël Salaün &_layer_masks_child2); 546b91c3e4eSMickaël Salaün layer_masks_child2 = &_layer_masks_child2; 547b91c3e4eSMickaël Salaün child2_is_directory = d_is_dir(dentry_child2); 5488ba0005fSMickaël Salaün } 549cb2c7d1aSMickaël Salaün 550cb2c7d1aSMickaël Salaün walker_path = *path; 551cb2c7d1aSMickaël Salaün path_get(&walker_path); 552cb2c7d1aSMickaël Salaün /* 553cb2c7d1aSMickaël Salaün * We need to walk through all the hierarchy to not miss any relevant 554cb2c7d1aSMickaël Salaün * restriction. 555cb2c7d1aSMickaël Salaün */ 556cb2c7d1aSMickaël Salaün while (true) { 557cb2c7d1aSMickaël Salaün struct dentry *parent_dentry; 558b91c3e4eSMickaël Salaün const struct landlock_rule *rule; 559cb2c7d1aSMickaël Salaün 560b91c3e4eSMickaël Salaün /* 561b91c3e4eSMickaël Salaün * If at least all accesses allowed on the destination are 562b91c3e4eSMickaël Salaün * already allowed on the source, respectively if there is at 563b91c3e4eSMickaël Salaün * least as much as restrictions on the destination than on the 564b91c3e4eSMickaël Salaün * source, then we can safely refer files from the source to 565b91c3e4eSMickaël Salaün * the destination without risking a privilege escalation. 566b91c3e4eSMickaël Salaün * This also applies in the case of RENAME_EXCHANGE, which 567b91c3e4eSMickaël Salaün * implies checks on both direction. This is crucial for 568b91c3e4eSMickaël Salaün * standalone multilayered security policies. Furthermore, 569b91c3e4eSMickaël Salaün * this helps avoid policy writers to shoot themselves in the 570b91c3e4eSMickaël Salaün * foot. 571b91c3e4eSMickaël Salaün */ 572b91c3e4eSMickaël Salaün if (unlikely(is_dom_check && 573b91c3e4eSMickaël Salaün no_more_access( 574b91c3e4eSMickaël Salaün layer_masks_parent1, layer_masks_child1, 575b91c3e4eSMickaël Salaün child1_is_directory, layer_masks_parent2, 576b91c3e4eSMickaël Salaün layer_masks_child2, 577b91c3e4eSMickaël Salaün child2_is_directory))) { 578b91c3e4eSMickaël Salaün allowed_parent1 = scope_to_request( 579b91c3e4eSMickaël Salaün access_request_parent1, layer_masks_parent1); 580b91c3e4eSMickaël Salaün allowed_parent2 = scope_to_request( 581b91c3e4eSMickaël Salaün access_request_parent2, layer_masks_parent2); 582b91c3e4eSMickaël Salaün 583b91c3e4eSMickaël Salaün /* Stops when all accesses are granted. */ 584b91c3e4eSMickaël Salaün if (allowed_parent1 && allowed_parent2) 585b91c3e4eSMickaël Salaün break; 586b91c3e4eSMickaël Salaün 587b91c3e4eSMickaël Salaün /* 588b91c3e4eSMickaël Salaün * Now, downgrades the remaining checks from domain 589b91c3e4eSMickaël Salaün * handled accesses to requested accesses. 590b91c3e4eSMickaël Salaün */ 591b91c3e4eSMickaël Salaün is_dom_check = false; 592b91c3e4eSMickaël Salaün access_masked_parent1 = access_request_parent1; 593b91c3e4eSMickaël Salaün access_masked_parent2 = access_request_parent2; 594b91c3e4eSMickaël Salaün } 595b91c3e4eSMickaël Salaün 596b91c3e4eSMickaël Salaün rule = find_rule(domain, walker_path.dentry); 597b91c3e4eSMickaël Salaün allowed_parent1 = unmask_layers(rule, access_masked_parent1, 598b91c3e4eSMickaël Salaün layer_masks_parent1); 599b91c3e4eSMickaël Salaün allowed_parent2 = unmask_layers(rule, access_masked_parent2, 600b91c3e4eSMickaël Salaün layer_masks_parent2); 601b91c3e4eSMickaël Salaün 602cb2c7d1aSMickaël Salaün /* Stops when a rule from each layer grants access. */ 603b91c3e4eSMickaël Salaün if (allowed_parent1 && allowed_parent2) 604cb2c7d1aSMickaël Salaün break; 605cb2c7d1aSMickaël Salaün 606cb2c7d1aSMickaël Salaün jump_up: 607cb2c7d1aSMickaël Salaün if (walker_path.dentry == walker_path.mnt->mnt_root) { 608cb2c7d1aSMickaël Salaün if (follow_up(&walker_path)) { 609cb2c7d1aSMickaël Salaün /* Ignores hidden mount points. */ 610cb2c7d1aSMickaël Salaün goto jump_up; 611cb2c7d1aSMickaël Salaün } else { 612cb2c7d1aSMickaël Salaün /* 613cb2c7d1aSMickaël Salaün * Stops at the real root. Denies access 614cb2c7d1aSMickaël Salaün * because not all layers have granted access. 615cb2c7d1aSMickaël Salaün */ 616cb2c7d1aSMickaël Salaün break; 617cb2c7d1aSMickaël Salaün } 618cb2c7d1aSMickaël Salaün } 619cb2c7d1aSMickaël Salaün if (unlikely(IS_ROOT(walker_path.dentry))) { 620cb2c7d1aSMickaël Salaün /* 621cb2c7d1aSMickaël Salaün * Stops at disconnected root directories. Only allows 622cb2c7d1aSMickaël Salaün * access to internal filesystems (e.g. nsfs, which is 623cb2c7d1aSMickaël Salaün * reachable through /proc/<pid>/ns/<namespace>). 624cb2c7d1aSMickaël Salaün */ 625b91c3e4eSMickaël Salaün allowed_parent1 = allowed_parent2 = 626b91c3e4eSMickaël Salaün !!(walker_path.mnt->mnt_flags & MNT_INTERNAL); 627cb2c7d1aSMickaël Salaün break; 628cb2c7d1aSMickaël Salaün } 629cb2c7d1aSMickaël Salaün parent_dentry = dget_parent(walker_path.dentry); 630cb2c7d1aSMickaël Salaün dput(walker_path.dentry); 631cb2c7d1aSMickaël Salaün walker_path.dentry = parent_dentry; 632cb2c7d1aSMickaël Salaün } 633cb2c7d1aSMickaël Salaün path_put(&walker_path); 634b91c3e4eSMickaël Salaün 635106794c4SGünther Noack return allowed_parent1 && allowed_parent2; 636b91c3e4eSMickaël Salaün } 637b91c3e4eSMickaël Salaün 638b91c3e4eSMickaël Salaün static inline int check_access_path(const struct landlock_ruleset *const domain, 639b91c3e4eSMickaël Salaün const struct path *const path, 640b91c3e4eSMickaël Salaün access_mask_t access_request) 641b91c3e4eSMickaël Salaün { 642b91c3e4eSMickaël Salaün layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; 643b91c3e4eSMickaël Salaün 644b91c3e4eSMickaël Salaün access_request = init_layer_masks(domain, access_request, &layer_masks); 645106794c4SGünther Noack if (is_access_to_paths_allowed(domain, path, access_request, 646106794c4SGünther Noack &layer_masks, NULL, 0, NULL, NULL)) 647106794c4SGünther Noack return 0; 648106794c4SGünther Noack return -EACCES; 649cb2c7d1aSMickaël Salaün } 650cb2c7d1aSMickaël Salaün 651cb2c7d1aSMickaël Salaün static inline int current_check_access_path(const struct path *const path, 6525f2ff33eSMickaël Salaün const access_mask_t access_request) 653cb2c7d1aSMickaël Salaün { 654cb2c7d1aSMickaël Salaün const struct landlock_ruleset *const dom = 655cb2c7d1aSMickaël Salaün landlock_get_current_domain(); 656cb2c7d1aSMickaël Salaün 657cb2c7d1aSMickaël Salaün if (!dom) 658cb2c7d1aSMickaël Salaün return 0; 659cb2c7d1aSMickaël Salaün return check_access_path(dom, path, access_request); 660cb2c7d1aSMickaël Salaün } 661cb2c7d1aSMickaël Salaün 6629da82b20SMickaël Salaün static inline access_mask_t get_mode_access(const umode_t mode) 6639da82b20SMickaël Salaün { 6649da82b20SMickaël Salaün switch (mode & S_IFMT) { 6659da82b20SMickaël Salaün case S_IFLNK: 6669da82b20SMickaël Salaün return LANDLOCK_ACCESS_FS_MAKE_SYM; 6679da82b20SMickaël Salaün case 0: 6689da82b20SMickaël Salaün /* A zero mode translates to S_IFREG. */ 6699da82b20SMickaël Salaün case S_IFREG: 6709da82b20SMickaël Salaün return LANDLOCK_ACCESS_FS_MAKE_REG; 6719da82b20SMickaël Salaün case S_IFDIR: 6729da82b20SMickaël Salaün return LANDLOCK_ACCESS_FS_MAKE_DIR; 6739da82b20SMickaël Salaün case S_IFCHR: 6749da82b20SMickaël Salaün return LANDLOCK_ACCESS_FS_MAKE_CHAR; 6759da82b20SMickaël Salaün case S_IFBLK: 6769da82b20SMickaël Salaün return LANDLOCK_ACCESS_FS_MAKE_BLOCK; 6779da82b20SMickaël Salaün case S_IFIFO: 6789da82b20SMickaël Salaün return LANDLOCK_ACCESS_FS_MAKE_FIFO; 6799da82b20SMickaël Salaün case S_IFSOCK: 6809da82b20SMickaël Salaün return LANDLOCK_ACCESS_FS_MAKE_SOCK; 6819da82b20SMickaël Salaün default: 6829da82b20SMickaël Salaün WARN_ON_ONCE(1); 6839da82b20SMickaël Salaün return 0; 6849da82b20SMickaël Salaün } 6859da82b20SMickaël Salaün } 6869da82b20SMickaël Salaün 6879da82b20SMickaël Salaün static inline access_mask_t maybe_remove(const struct dentry *const dentry) 6889da82b20SMickaël Salaün { 6899da82b20SMickaël Salaün if (d_is_negative(dentry)) 6909da82b20SMickaël Salaün return 0; 6919da82b20SMickaël Salaün return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : 6929da82b20SMickaël Salaün LANDLOCK_ACCESS_FS_REMOVE_FILE; 6939da82b20SMickaël Salaün } 6949da82b20SMickaël Salaün 695b91c3e4eSMickaël Salaün /** 696b91c3e4eSMickaël Salaün * collect_domain_accesses - Walk through a file path and collect accesses 697b91c3e4eSMickaël Salaün * 698b91c3e4eSMickaël Salaün * @domain: Domain to check against. 699b91c3e4eSMickaël Salaün * @mnt_root: Last directory to check. 700b91c3e4eSMickaël Salaün * @dir: Directory to start the walk from. 701b91c3e4eSMickaël Salaün * @layer_masks_dom: Where to store the collected accesses. 702b91c3e4eSMickaël Salaün * 703b91c3e4eSMickaël Salaün * This helper is useful to begin a path walk from the @dir directory to a 704b91c3e4eSMickaël Salaün * @mnt_root directory used as a mount point. This mount point is the common 705b91c3e4eSMickaël Salaün * ancestor between the source and the destination of a renamed and linked 706b91c3e4eSMickaël Salaün * file. While walking from @dir to @mnt_root, we record all the domain's 707b91c3e4eSMickaël Salaün * allowed accesses in @layer_masks_dom. 708b91c3e4eSMickaël Salaün * 709106794c4SGünther Noack * This is similar to is_access_to_paths_allowed() but much simpler because it 710106794c4SGünther Noack * only handles walking on the same mount point and only checks one set of 711106794c4SGünther Noack * accesses. 712b91c3e4eSMickaël Salaün * 713b91c3e4eSMickaël Salaün * Returns: 714b91c3e4eSMickaël Salaün * - true if all the domain access rights are allowed for @dir; 715b91c3e4eSMickaël Salaün * - false if the walk reached @mnt_root. 716b91c3e4eSMickaël Salaün */ 717b91c3e4eSMickaël Salaün static bool collect_domain_accesses( 718b91c3e4eSMickaël Salaün const struct landlock_ruleset *const domain, 719b91c3e4eSMickaël Salaün const struct dentry *const mnt_root, struct dentry *dir, 720b91c3e4eSMickaël Salaün layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS]) 721b91c3e4eSMickaël Salaün { 722b91c3e4eSMickaël Salaün unsigned long access_dom; 723b91c3e4eSMickaël Salaün bool ret = false; 724b91c3e4eSMickaël Salaün 725b91c3e4eSMickaël Salaün if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) 726b91c3e4eSMickaël Salaün return true; 727b91c3e4eSMickaël Salaün if (is_nouser_or_private(dir)) 728b91c3e4eSMickaël Salaün return true; 729b91c3e4eSMickaël Salaün 730b91c3e4eSMickaël Salaün access_dom = init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, 731b91c3e4eSMickaël Salaün layer_masks_dom); 732b91c3e4eSMickaël Salaün 733b91c3e4eSMickaël Salaün dget(dir); 734b91c3e4eSMickaël Salaün while (true) { 735b91c3e4eSMickaël Salaün struct dentry *parent_dentry; 736b91c3e4eSMickaël Salaün 737b91c3e4eSMickaël Salaün /* Gets all layers allowing all domain accesses. */ 738b91c3e4eSMickaël Salaün if (unmask_layers(find_rule(domain, dir), access_dom, 739b91c3e4eSMickaël Salaün layer_masks_dom)) { 740b91c3e4eSMickaël Salaün /* 741b91c3e4eSMickaël Salaün * Stops when all handled accesses are allowed by at 742b91c3e4eSMickaël Salaün * least one rule in each layer. 743b91c3e4eSMickaël Salaün */ 744b91c3e4eSMickaël Salaün ret = true; 745b91c3e4eSMickaël Salaün break; 746b91c3e4eSMickaël Salaün } 747b91c3e4eSMickaël Salaün 748b91c3e4eSMickaël Salaün /* We should not reach a root other than @mnt_root. */ 749b91c3e4eSMickaël Salaün if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir))) 750b91c3e4eSMickaël Salaün break; 751b91c3e4eSMickaël Salaün 752b91c3e4eSMickaël Salaün parent_dentry = dget_parent(dir); 753b91c3e4eSMickaël Salaün dput(dir); 754b91c3e4eSMickaël Salaün dir = parent_dentry; 755b91c3e4eSMickaël Salaün } 756b91c3e4eSMickaël Salaün dput(dir); 757b91c3e4eSMickaël Salaün return ret; 758b91c3e4eSMickaël Salaün } 759b91c3e4eSMickaël Salaün 760b91c3e4eSMickaël Salaün /** 761b91c3e4eSMickaël Salaün * current_check_refer_path - Check if a rename or link action is allowed 762b91c3e4eSMickaël Salaün * 763b91c3e4eSMickaël Salaün * @old_dentry: File or directory requested to be moved or linked. 764b91c3e4eSMickaël Salaün * @new_dir: Destination parent directory. 765b91c3e4eSMickaël Salaün * @new_dentry: Destination file or directory. 766b91c3e4eSMickaël Salaün * @removable: Sets to true if it is a rename operation. 767b91c3e4eSMickaël Salaün * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE. 768b91c3e4eSMickaël Salaün * 769b91c3e4eSMickaël Salaün * Because of its unprivileged constraints, Landlock relies on file hierarchies 770b91c3e4eSMickaël Salaün * (and not only inodes) to tie access rights to files. Being able to link or 771b91c3e4eSMickaël Salaün * rename a file hierarchy brings some challenges. Indeed, moving or linking a 772b91c3e4eSMickaël Salaün * file (i.e. creating a new reference to an inode) can have an impact on the 773b91c3e4eSMickaël Salaün * actions allowed for a set of files if it would change its parent directory 774b91c3e4eSMickaël Salaün * (i.e. reparenting). 775b91c3e4eSMickaël Salaün * 776b91c3e4eSMickaël Salaün * To avoid trivial access right bypasses, Landlock first checks if the file or 777b91c3e4eSMickaël Salaün * directory requested to be moved would gain new access rights inherited from 778b91c3e4eSMickaël Salaün * its new hierarchy. Before returning any error, Landlock then checks that 779b91c3e4eSMickaël Salaün * the parent source hierarchy and the destination hierarchy would allow the 780b91c3e4eSMickaël Salaün * link or rename action. If it is not the case, an error with EACCES is 781b91c3e4eSMickaël Salaün * returned to inform user space that there is no way to remove or create the 782b91c3e4eSMickaël Salaün * requested source file type. If it should be allowed but the new inherited 783b91c3e4eSMickaël Salaün * access rights would be greater than the source access rights, then the 784b91c3e4eSMickaël Salaün * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables 785b91c3e4eSMickaël Salaün * user space to abort the whole operation if there is no way to do it, or to 786b91c3e4eSMickaël Salaün * manually copy the source to the destination if this remains allowed, e.g. 787b91c3e4eSMickaël Salaün * because file creation is allowed on the destination directory but not direct 788b91c3e4eSMickaël Salaün * linking. 789b91c3e4eSMickaël Salaün * 790b91c3e4eSMickaël Salaün * To achieve this goal, the kernel needs to compare two file hierarchies: the 791b91c3e4eSMickaël Salaün * one identifying the source file or directory (including itself), and the 792b91c3e4eSMickaël Salaün * destination one. This can be seen as a multilayer partial ordering problem. 793b91c3e4eSMickaël Salaün * The kernel walks through these paths and collects in a matrix the access 794b91c3e4eSMickaël Salaün * rights that are denied per layer. These matrices are then compared to see 795b91c3e4eSMickaël Salaün * if the destination one has more (or the same) restrictions as the source 796b91c3e4eSMickaël Salaün * one. If this is the case, the requested action will not return EXDEV, which 797b91c3e4eSMickaël Salaün * doesn't mean the action is allowed. The parent hierarchy of the source 798b91c3e4eSMickaël Salaün * (i.e. parent directory), and the destination hierarchy must also be checked 799b91c3e4eSMickaël Salaün * to verify that they explicitly allow such action (i.e. referencing, 800b91c3e4eSMickaël Salaün * creation and potentially removal rights). The kernel implementation is then 801b91c3e4eSMickaël Salaün * required to rely on potentially four matrices of access rights: one for the 802b91c3e4eSMickaël Salaün * source file or directory (i.e. the child), a potentially other one for the 803b91c3e4eSMickaël Salaün * other source/destination (in case of RENAME_EXCHANGE), one for the source 804b91c3e4eSMickaël Salaün * parent hierarchy and a last one for the destination hierarchy. These 805b91c3e4eSMickaël Salaün * ephemeral matrices take some space on the stack, which limits the number of 806b91c3e4eSMickaël Salaün * layers to a deemed reasonable number: 16. 807b91c3e4eSMickaël Salaün * 808b91c3e4eSMickaël Salaün * Returns: 809b91c3e4eSMickaël Salaün * - 0 if access is allowed; 810b91c3e4eSMickaël Salaün * - -EXDEV if @old_dentry would inherit new access rights from @new_dir; 811b91c3e4eSMickaël Salaün * - -EACCES if file removal or creation is denied. 812b91c3e4eSMickaël Salaün */ 813b91c3e4eSMickaël Salaün static int current_check_refer_path(struct dentry *const old_dentry, 814b91c3e4eSMickaël Salaün const struct path *const new_dir, 815b91c3e4eSMickaël Salaün struct dentry *const new_dentry, 816b91c3e4eSMickaël Salaün const bool removable, const bool exchange) 817b91c3e4eSMickaël Salaün { 818b91c3e4eSMickaël Salaün const struct landlock_ruleset *const dom = 819b91c3e4eSMickaël Salaün landlock_get_current_domain(); 820b91c3e4eSMickaël Salaün bool allow_parent1, allow_parent2; 821b91c3e4eSMickaël Salaün access_mask_t access_request_parent1, access_request_parent2; 822b91c3e4eSMickaël Salaün struct path mnt_dir; 823b91c3e4eSMickaël Salaün layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], 824b91c3e4eSMickaël Salaün layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; 825b91c3e4eSMickaël Salaün 826b91c3e4eSMickaël Salaün if (!dom) 827b91c3e4eSMickaël Salaün return 0; 828b91c3e4eSMickaël Salaün if (WARN_ON_ONCE(dom->num_layers < 1)) 829b91c3e4eSMickaël Salaün return -EACCES; 830b91c3e4eSMickaël Salaün if (unlikely(d_is_negative(old_dentry))) 831b91c3e4eSMickaël Salaün return -ENOENT; 832b91c3e4eSMickaël Salaün if (exchange) { 833b91c3e4eSMickaël Salaün if (unlikely(d_is_negative(new_dentry))) 834b91c3e4eSMickaël Salaün return -ENOENT; 835b91c3e4eSMickaël Salaün access_request_parent1 = 836b91c3e4eSMickaël Salaün get_mode_access(d_backing_inode(new_dentry)->i_mode); 837b91c3e4eSMickaël Salaün } else { 838b91c3e4eSMickaël Salaün access_request_parent1 = 0; 839b91c3e4eSMickaël Salaün } 840b91c3e4eSMickaël Salaün access_request_parent2 = 841b91c3e4eSMickaël Salaün get_mode_access(d_backing_inode(old_dentry)->i_mode); 842b91c3e4eSMickaël Salaün if (removable) { 843b91c3e4eSMickaël Salaün access_request_parent1 |= maybe_remove(old_dentry); 844b91c3e4eSMickaël Salaün access_request_parent2 |= maybe_remove(new_dentry); 845b91c3e4eSMickaël Salaün } 846b91c3e4eSMickaël Salaün 847b91c3e4eSMickaël Salaün /* The mount points are the same for old and new paths, cf. EXDEV. */ 848b91c3e4eSMickaël Salaün if (old_dentry->d_parent == new_dir->dentry) { 849b91c3e4eSMickaël Salaün /* 850b91c3e4eSMickaël Salaün * The LANDLOCK_ACCESS_FS_REFER access right is not required 851b91c3e4eSMickaël Salaün * for same-directory referer (i.e. no reparenting). 852b91c3e4eSMickaël Salaün */ 853b91c3e4eSMickaël Salaün access_request_parent1 = init_layer_masks( 854b91c3e4eSMickaël Salaün dom, access_request_parent1 | access_request_parent2, 855b91c3e4eSMickaël Salaün &layer_masks_parent1); 856106794c4SGünther Noack if (is_access_to_paths_allowed( 857106794c4SGünther Noack dom, new_dir, access_request_parent1, 858106794c4SGünther Noack &layer_masks_parent1, NULL, 0, NULL, NULL)) 859106794c4SGünther Noack return 0; 860106794c4SGünther Noack return -EACCES; 861b91c3e4eSMickaël Salaün } 862b91c3e4eSMickaël Salaün 863b91c3e4eSMickaël Salaün access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER; 864b91c3e4eSMickaël Salaün access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER; 865b91c3e4eSMickaël Salaün 866b91c3e4eSMickaël Salaün /* Saves the common mount point. */ 867b91c3e4eSMickaël Salaün mnt_dir.mnt = new_dir->mnt; 868b91c3e4eSMickaël Salaün mnt_dir.dentry = new_dir->mnt->mnt_root; 869b91c3e4eSMickaël Salaün 870b91c3e4eSMickaël Salaün /* new_dir->dentry is equal to new_dentry->d_parent */ 871b91c3e4eSMickaël Salaün allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, 872b91c3e4eSMickaël Salaün old_dentry->d_parent, 873b91c3e4eSMickaël Salaün &layer_masks_parent1); 874b91c3e4eSMickaël Salaün allow_parent2 = collect_domain_accesses( 875b91c3e4eSMickaël Salaün dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2); 876b91c3e4eSMickaël Salaün 877b91c3e4eSMickaël Salaün if (allow_parent1 && allow_parent2) 878b91c3e4eSMickaël Salaün return 0; 879b91c3e4eSMickaël Salaün 880b91c3e4eSMickaël Salaün /* 881b91c3e4eSMickaël Salaün * To be able to compare source and destination domain access rights, 882b91c3e4eSMickaël Salaün * take into account the @old_dentry access rights aggregated with its 883b91c3e4eSMickaël Salaün * parent access rights. This will be useful to compare with the 884b91c3e4eSMickaël Salaün * destination parent access rights. 885b91c3e4eSMickaël Salaün */ 886106794c4SGünther Noack if (is_access_to_paths_allowed( 887106794c4SGünther Noack dom, &mnt_dir, access_request_parent1, &layer_masks_parent1, 888106794c4SGünther Noack old_dentry, access_request_parent2, &layer_masks_parent2, 889106794c4SGünther Noack exchange ? new_dentry : NULL)) 890106794c4SGünther Noack return 0; 891106794c4SGünther Noack 892106794c4SGünther Noack /* 893106794c4SGünther Noack * This prioritizes EACCES over EXDEV for all actions, including 894106794c4SGünther Noack * renames with RENAME_EXCHANGE. 895106794c4SGünther Noack */ 896106794c4SGünther Noack if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) || 897106794c4SGünther Noack is_eacces(&layer_masks_parent2, access_request_parent2))) 898106794c4SGünther Noack return -EACCES; 899106794c4SGünther Noack 900106794c4SGünther Noack /* 901106794c4SGünther Noack * Gracefully forbids reparenting if the destination directory 902106794c4SGünther Noack * hierarchy is not a superset of restrictions of the source directory 903106794c4SGünther Noack * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the 904106794c4SGünther Noack * source or the destination. 905106794c4SGünther Noack */ 906106794c4SGünther Noack return -EXDEV; 907b91c3e4eSMickaël Salaün } 908b91c3e4eSMickaël Salaün 909cb2c7d1aSMickaël Salaün /* Inode hooks */ 910cb2c7d1aSMickaël Salaün 911cb2c7d1aSMickaël Salaün static void hook_inode_free_security(struct inode *const inode) 912cb2c7d1aSMickaël Salaün { 913cb2c7d1aSMickaël Salaün /* 914cb2c7d1aSMickaël Salaün * All inodes must already have been untied from their object by 915cb2c7d1aSMickaël Salaün * release_inode() or hook_sb_delete(). 916cb2c7d1aSMickaël Salaün */ 917cb2c7d1aSMickaël Salaün WARN_ON_ONCE(landlock_inode(inode)->object); 918cb2c7d1aSMickaël Salaün } 919cb2c7d1aSMickaël Salaün 920cb2c7d1aSMickaël Salaün /* Super-block hooks */ 921cb2c7d1aSMickaël Salaün 922cb2c7d1aSMickaël Salaün /* 923cb2c7d1aSMickaël Salaün * Release the inodes used in a security policy. 924cb2c7d1aSMickaël Salaün * 925cb2c7d1aSMickaël Salaün * Cf. fsnotify_unmount_inodes() and invalidate_inodes() 926cb2c7d1aSMickaël Salaün */ 927cb2c7d1aSMickaël Salaün static void hook_sb_delete(struct super_block *const sb) 928cb2c7d1aSMickaël Salaün { 929cb2c7d1aSMickaël Salaün struct inode *inode, *prev_inode = NULL; 930cb2c7d1aSMickaël Salaün 931cb2c7d1aSMickaël Salaün if (!landlock_initialized) 932cb2c7d1aSMickaël Salaün return; 933cb2c7d1aSMickaël Salaün 934cb2c7d1aSMickaël Salaün spin_lock(&sb->s_inode_list_lock); 935cb2c7d1aSMickaël Salaün list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 936cb2c7d1aSMickaël Salaün struct landlock_object *object; 937cb2c7d1aSMickaël Salaün 938cb2c7d1aSMickaël Salaün /* Only handles referenced inodes. */ 939cb2c7d1aSMickaël Salaün if (!atomic_read(&inode->i_count)) 940cb2c7d1aSMickaël Salaün continue; 941cb2c7d1aSMickaël Salaün 942cb2c7d1aSMickaël Salaün /* 943cb2c7d1aSMickaël Salaün * Protects against concurrent modification of inode (e.g. 944cb2c7d1aSMickaël Salaün * from get_inode_object()). 945cb2c7d1aSMickaël Salaün */ 946cb2c7d1aSMickaël Salaün spin_lock(&inode->i_lock); 947cb2c7d1aSMickaël Salaün /* 948cb2c7d1aSMickaël Salaün * Checks I_FREEING and I_WILL_FREE to protect against a race 949cb2c7d1aSMickaël Salaün * condition when release_inode() just called iput(), which 950cb2c7d1aSMickaël Salaün * could lead to a NULL dereference of inode->security or a 951cb2c7d1aSMickaël Salaün * second call to iput() for the same Landlock object. Also 952cb2c7d1aSMickaël Salaün * checks I_NEW because such inode cannot be tied to an object. 953cb2c7d1aSMickaël Salaün */ 954cb2c7d1aSMickaël Salaün if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) { 955cb2c7d1aSMickaël Salaün spin_unlock(&inode->i_lock); 956cb2c7d1aSMickaël Salaün continue; 957cb2c7d1aSMickaël Salaün } 958cb2c7d1aSMickaël Salaün 959cb2c7d1aSMickaël Salaün rcu_read_lock(); 960cb2c7d1aSMickaël Salaün object = rcu_dereference(landlock_inode(inode)->object); 961cb2c7d1aSMickaël Salaün if (!object) { 962cb2c7d1aSMickaël Salaün rcu_read_unlock(); 963cb2c7d1aSMickaël Salaün spin_unlock(&inode->i_lock); 964cb2c7d1aSMickaël Salaün continue; 965cb2c7d1aSMickaël Salaün } 966cb2c7d1aSMickaël Salaün /* Keeps a reference to this inode until the next loop walk. */ 967cb2c7d1aSMickaël Salaün __iget(inode); 968cb2c7d1aSMickaël Salaün spin_unlock(&inode->i_lock); 969cb2c7d1aSMickaël Salaün 970cb2c7d1aSMickaël Salaün /* 971cb2c7d1aSMickaël Salaün * If there is no concurrent release_inode() ongoing, then we 972cb2c7d1aSMickaël Salaün * are in charge of calling iput() on this inode, otherwise we 973cb2c7d1aSMickaël Salaün * will just wait for it to finish. 974cb2c7d1aSMickaël Salaün */ 975cb2c7d1aSMickaël Salaün spin_lock(&object->lock); 976cb2c7d1aSMickaël Salaün if (object->underobj == inode) { 977cb2c7d1aSMickaël Salaün object->underobj = NULL; 978cb2c7d1aSMickaël Salaün spin_unlock(&object->lock); 979cb2c7d1aSMickaël Salaün rcu_read_unlock(); 980cb2c7d1aSMickaël Salaün 981cb2c7d1aSMickaël Salaün /* 982cb2c7d1aSMickaël Salaün * Because object->underobj was not NULL, 983cb2c7d1aSMickaël Salaün * release_inode() and get_inode_object() guarantee 984cb2c7d1aSMickaël Salaün * that it is safe to reset 985cb2c7d1aSMickaël Salaün * landlock_inode(inode)->object while it is not NULL. 986cb2c7d1aSMickaël Salaün * It is therefore not necessary to lock inode->i_lock. 987cb2c7d1aSMickaël Salaün */ 988cb2c7d1aSMickaël Salaün rcu_assign_pointer(landlock_inode(inode)->object, NULL); 989cb2c7d1aSMickaël Salaün /* 990cb2c7d1aSMickaël Salaün * At this point, we own the ihold() reference that was 991cb2c7d1aSMickaël Salaün * originally set up by get_inode_object() and the 992cb2c7d1aSMickaël Salaün * __iget() reference that we just set in this loop 993cb2c7d1aSMickaël Salaün * walk. Therefore the following call to iput() will 994cb2c7d1aSMickaël Salaün * not sleep nor drop the inode because there is now at 995cb2c7d1aSMickaël Salaün * least two references to it. 996cb2c7d1aSMickaël Salaün */ 997cb2c7d1aSMickaël Salaün iput(inode); 998cb2c7d1aSMickaël Salaün } else { 999cb2c7d1aSMickaël Salaün spin_unlock(&object->lock); 1000cb2c7d1aSMickaël Salaün rcu_read_unlock(); 1001cb2c7d1aSMickaël Salaün } 1002cb2c7d1aSMickaël Salaün 1003cb2c7d1aSMickaël Salaün if (prev_inode) { 1004cb2c7d1aSMickaël Salaün /* 1005cb2c7d1aSMickaël Salaün * At this point, we still own the __iget() reference 1006cb2c7d1aSMickaël Salaün * that we just set in this loop walk. Therefore we 1007cb2c7d1aSMickaël Salaün * can drop the list lock and know that the inode won't 1008cb2c7d1aSMickaël Salaün * disappear from under us until the next loop walk. 1009cb2c7d1aSMickaël Salaün */ 1010cb2c7d1aSMickaël Salaün spin_unlock(&sb->s_inode_list_lock); 1011cb2c7d1aSMickaël Salaün /* 1012cb2c7d1aSMickaël Salaün * We can now actually put the inode reference from the 1013cb2c7d1aSMickaël Salaün * previous loop walk, which is not needed anymore. 1014cb2c7d1aSMickaël Salaün */ 1015cb2c7d1aSMickaël Salaün iput(prev_inode); 1016cb2c7d1aSMickaël Salaün cond_resched(); 1017cb2c7d1aSMickaël Salaün spin_lock(&sb->s_inode_list_lock); 1018cb2c7d1aSMickaël Salaün } 1019cb2c7d1aSMickaël Salaün prev_inode = inode; 1020cb2c7d1aSMickaël Salaün } 1021cb2c7d1aSMickaël Salaün spin_unlock(&sb->s_inode_list_lock); 1022cb2c7d1aSMickaël Salaün 1023cb2c7d1aSMickaël Salaün /* Puts the inode reference from the last loop walk, if any. */ 1024cb2c7d1aSMickaël Salaün if (prev_inode) 1025cb2c7d1aSMickaël Salaün iput(prev_inode); 1026cb2c7d1aSMickaël Salaün /* Waits for pending iput() in release_inode(). */ 102706a1c40aSMickaël Salaün wait_var_event(&landlock_superblock(sb)->inode_refs, 102806a1c40aSMickaël Salaün !atomic_long_read(&landlock_superblock(sb)->inode_refs)); 1029cb2c7d1aSMickaël Salaün } 1030cb2c7d1aSMickaël Salaün 1031cb2c7d1aSMickaël Salaün /* 1032cb2c7d1aSMickaël Salaün * Because a Landlock security policy is defined according to the filesystem 1033cb2c7d1aSMickaël Salaün * topology (i.e. the mount namespace), changing it may grant access to files 1034cb2c7d1aSMickaël Salaün * not previously allowed. 1035cb2c7d1aSMickaël Salaün * 1036cb2c7d1aSMickaël Salaün * To make it simple, deny any filesystem topology modification by landlocked 1037cb2c7d1aSMickaël Salaün * processes. Non-landlocked processes may still change the namespace of a 1038cb2c7d1aSMickaël Salaün * landlocked process, but this kind of threat must be handled by a system-wide 1039cb2c7d1aSMickaël Salaün * access-control security policy. 1040cb2c7d1aSMickaël Salaün * 1041cb2c7d1aSMickaël Salaün * This could be lifted in the future if Landlock can safely handle mount 1042cb2c7d1aSMickaël Salaün * namespace updates requested by a landlocked process. Indeed, we could 1043cb2c7d1aSMickaël Salaün * update the current domain (which is currently read-only) by taking into 1044cb2c7d1aSMickaël Salaün * account the accesses of the source and the destination of a new mount point. 1045cb2c7d1aSMickaël Salaün * However, it would also require to make all the child domains dynamically 1046cb2c7d1aSMickaël Salaün * inherit these new constraints. Anyway, for backward compatibility reasons, 1047cb2c7d1aSMickaël Salaün * a dedicated user space option would be required (e.g. as a ruleset flag). 1048cb2c7d1aSMickaël Salaün */ 1049cb2c7d1aSMickaël Salaün static int hook_sb_mount(const char *const dev_name, 1050cb2c7d1aSMickaël Salaün const struct path *const path, const char *const type, 1051cb2c7d1aSMickaël Salaün const unsigned long flags, void *const data) 1052cb2c7d1aSMickaël Salaün { 1053cb2c7d1aSMickaël Salaün if (!landlock_get_current_domain()) 1054cb2c7d1aSMickaël Salaün return 0; 1055cb2c7d1aSMickaël Salaün return -EPERM; 1056cb2c7d1aSMickaël Salaün } 1057cb2c7d1aSMickaël Salaün 1058cb2c7d1aSMickaël Salaün static int hook_move_mount(const struct path *const from_path, 1059cb2c7d1aSMickaël Salaün const struct path *const to_path) 1060cb2c7d1aSMickaël Salaün { 1061cb2c7d1aSMickaël Salaün if (!landlock_get_current_domain()) 1062cb2c7d1aSMickaël Salaün return 0; 1063cb2c7d1aSMickaël Salaün return -EPERM; 1064cb2c7d1aSMickaël Salaün } 1065cb2c7d1aSMickaël Salaün 1066cb2c7d1aSMickaël Salaün /* 1067cb2c7d1aSMickaël Salaün * Removing a mount point may reveal a previously hidden file hierarchy, which 1068cb2c7d1aSMickaël Salaün * may then grant access to files, which may have previously been forbidden. 1069cb2c7d1aSMickaël Salaün */ 1070cb2c7d1aSMickaël Salaün static int hook_sb_umount(struct vfsmount *const mnt, const int flags) 1071cb2c7d1aSMickaël Salaün { 1072cb2c7d1aSMickaël Salaün if (!landlock_get_current_domain()) 1073cb2c7d1aSMickaël Salaün return 0; 1074cb2c7d1aSMickaël Salaün return -EPERM; 1075cb2c7d1aSMickaël Salaün } 1076cb2c7d1aSMickaël Salaün 1077cb2c7d1aSMickaël Salaün static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts) 1078cb2c7d1aSMickaël Salaün { 1079cb2c7d1aSMickaël Salaün if (!landlock_get_current_domain()) 1080cb2c7d1aSMickaël Salaün return 0; 1081cb2c7d1aSMickaël Salaün return -EPERM; 1082cb2c7d1aSMickaël Salaün } 1083cb2c7d1aSMickaël Salaün 1084cb2c7d1aSMickaël Salaün /* 1085cb2c7d1aSMickaël Salaün * pivot_root(2), like mount(2), changes the current mount namespace. It must 1086cb2c7d1aSMickaël Salaün * then be forbidden for a landlocked process. 1087cb2c7d1aSMickaël Salaün * 1088cb2c7d1aSMickaël Salaün * However, chroot(2) may be allowed because it only changes the relative root 1089cb2c7d1aSMickaël Salaün * directory of the current process. Moreover, it can be used to restrict the 1090cb2c7d1aSMickaël Salaün * view of the filesystem. 1091cb2c7d1aSMickaël Salaün */ 1092cb2c7d1aSMickaël Salaün static int hook_sb_pivotroot(const struct path *const old_path, 1093cb2c7d1aSMickaël Salaün const struct path *const new_path) 1094cb2c7d1aSMickaël Salaün { 1095cb2c7d1aSMickaël Salaün if (!landlock_get_current_domain()) 1096cb2c7d1aSMickaël Salaün return 0; 1097cb2c7d1aSMickaël Salaün return -EPERM; 1098cb2c7d1aSMickaël Salaün } 1099cb2c7d1aSMickaël Salaün 1100cb2c7d1aSMickaël Salaün /* Path hooks */ 1101cb2c7d1aSMickaël Salaün 1102cb2c7d1aSMickaël Salaün static int hook_path_link(struct dentry *const old_dentry, 1103cb2c7d1aSMickaël Salaün const struct path *const new_dir, 1104cb2c7d1aSMickaël Salaün struct dentry *const new_dentry) 1105cb2c7d1aSMickaël Salaün { 1106b91c3e4eSMickaël Salaün return current_check_refer_path(old_dentry, new_dir, new_dentry, false, 1107b91c3e4eSMickaël Salaün false); 1108cb2c7d1aSMickaël Salaün } 1109cb2c7d1aSMickaël Salaün 1110cb2c7d1aSMickaël Salaün static int hook_path_rename(const struct path *const old_dir, 1111cb2c7d1aSMickaël Salaün struct dentry *const old_dentry, 1112cb2c7d1aSMickaël Salaün const struct path *const new_dir, 1113100f59d9SMickaël Salaün struct dentry *const new_dentry, 1114100f59d9SMickaël Salaün const unsigned int flags) 1115cb2c7d1aSMickaël Salaün { 1116b91c3e4eSMickaël Salaün /* old_dir refers to old_dentry->d_parent and new_dir->mnt */ 1117b91c3e4eSMickaël Salaün return current_check_refer_path(old_dentry, new_dir, new_dentry, true, 1118b91c3e4eSMickaël Salaün !!(flags & RENAME_EXCHANGE)); 1119cb2c7d1aSMickaël Salaün } 1120cb2c7d1aSMickaël Salaün 1121cb2c7d1aSMickaël Salaün static int hook_path_mkdir(const struct path *const dir, 1122cb2c7d1aSMickaël Salaün struct dentry *const dentry, const umode_t mode) 1123cb2c7d1aSMickaël Salaün { 1124cb2c7d1aSMickaël Salaün return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR); 1125cb2c7d1aSMickaël Salaün } 1126cb2c7d1aSMickaël Salaün 1127cb2c7d1aSMickaël Salaün static int hook_path_mknod(const struct path *const dir, 1128cb2c7d1aSMickaël Salaün struct dentry *const dentry, const umode_t mode, 1129cb2c7d1aSMickaël Salaün const unsigned int dev) 1130cb2c7d1aSMickaël Salaün { 1131cb2c7d1aSMickaël Salaün const struct landlock_ruleset *const dom = 1132cb2c7d1aSMickaël Salaün landlock_get_current_domain(); 1133cb2c7d1aSMickaël Salaün 1134cb2c7d1aSMickaël Salaün if (!dom) 1135cb2c7d1aSMickaël Salaün return 0; 1136cb2c7d1aSMickaël Salaün return check_access_path(dom, dir, get_mode_access(mode)); 1137cb2c7d1aSMickaël Salaün } 1138cb2c7d1aSMickaël Salaün 1139cb2c7d1aSMickaël Salaün static int hook_path_symlink(const struct path *const dir, 114006a1c40aSMickaël Salaün struct dentry *const dentry, 114106a1c40aSMickaël Salaün const char *const old_name) 1142cb2c7d1aSMickaël Salaün { 1143cb2c7d1aSMickaël Salaün return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM); 1144cb2c7d1aSMickaël Salaün } 1145cb2c7d1aSMickaël Salaün 1146cb2c7d1aSMickaël Salaün static int hook_path_unlink(const struct path *const dir, 1147cb2c7d1aSMickaël Salaün struct dentry *const dentry) 1148cb2c7d1aSMickaël Salaün { 1149cb2c7d1aSMickaël Salaün return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE); 1150cb2c7d1aSMickaël Salaün } 1151cb2c7d1aSMickaël Salaün 1152cb2c7d1aSMickaël Salaün static int hook_path_rmdir(const struct path *const dir, 1153cb2c7d1aSMickaël Salaün struct dentry *const dentry) 1154cb2c7d1aSMickaël Salaün { 1155cb2c7d1aSMickaël Salaün return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR); 1156cb2c7d1aSMickaël Salaün } 1157cb2c7d1aSMickaël Salaün 1158b9f5ce27SGünther Noack static int hook_path_truncate(const struct path *const path) 1159b9f5ce27SGünther Noack { 1160b9f5ce27SGünther Noack return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE); 1161b9f5ce27SGünther Noack } 1162b9f5ce27SGünther Noack 1163cb2c7d1aSMickaël Salaün /* File hooks */ 1164cb2c7d1aSMickaël Salaün 1165b9f5ce27SGünther Noack /** 1166b9f5ce27SGünther Noack * get_required_file_open_access - Get access needed to open a file 1167b9f5ce27SGünther Noack * 1168b9f5ce27SGünther Noack * @file: File being opened. 1169b9f5ce27SGünther Noack * 1170b9f5ce27SGünther Noack * Returns the access rights that are required for opening the given file, 1171b9f5ce27SGünther Noack * depending on the file type and open mode. 1172b9f5ce27SGünther Noack */ 1173b9f5ce27SGünther Noack static inline access_mask_t 1174b9f5ce27SGünther Noack get_required_file_open_access(const struct file *const file) 1175cb2c7d1aSMickaël Salaün { 11765f2ff33eSMickaël Salaün access_mask_t access = 0; 1177cb2c7d1aSMickaël Salaün 1178cb2c7d1aSMickaël Salaün if (file->f_mode & FMODE_READ) { 1179cb2c7d1aSMickaël Salaün /* A directory can only be opened in read mode. */ 1180cb2c7d1aSMickaël Salaün if (S_ISDIR(file_inode(file)->i_mode)) 1181cb2c7d1aSMickaël Salaün return LANDLOCK_ACCESS_FS_READ_DIR; 1182cb2c7d1aSMickaël Salaün access = LANDLOCK_ACCESS_FS_READ_FILE; 1183cb2c7d1aSMickaël Salaün } 1184cb2c7d1aSMickaël Salaün if (file->f_mode & FMODE_WRITE) 1185cb2c7d1aSMickaël Salaün access |= LANDLOCK_ACCESS_FS_WRITE_FILE; 1186cb2c7d1aSMickaël Salaün /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */ 1187cb2c7d1aSMickaël Salaün if (file->f_flags & __FMODE_EXEC) 1188cb2c7d1aSMickaël Salaün access |= LANDLOCK_ACCESS_FS_EXECUTE; 1189cb2c7d1aSMickaël Salaün return access; 1190cb2c7d1aSMickaël Salaün } 1191cb2c7d1aSMickaël Salaün 1192b9f5ce27SGünther Noack static int hook_file_alloc_security(struct file *const file) 1193b9f5ce27SGünther Noack { 1194b9f5ce27SGünther Noack /* 1195b9f5ce27SGünther Noack * Grants all access rights, even if most of them are not checked later 1196b9f5ce27SGünther Noack * on. It is more consistent. 1197b9f5ce27SGünther Noack * 1198b9f5ce27SGünther Noack * Notably, file descriptors for regular files can also be acquired 1199b9f5ce27SGünther Noack * without going through the file_open hook, for example when using 1200b9f5ce27SGünther Noack * memfd_create(2). 1201b9f5ce27SGünther Noack */ 1202b9f5ce27SGünther Noack landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS; 1203b9f5ce27SGünther Noack return 0; 1204b9f5ce27SGünther Noack } 1205b9f5ce27SGünther Noack 1206cb2c7d1aSMickaël Salaün static int hook_file_open(struct file *const file) 1207cb2c7d1aSMickaël Salaün { 1208b9f5ce27SGünther Noack layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; 1209b9f5ce27SGünther Noack access_mask_t open_access_request, full_access_request, allowed_access; 1210b9f5ce27SGünther Noack const access_mask_t optional_access = LANDLOCK_ACCESS_FS_TRUNCATE; 1211cb2c7d1aSMickaël Salaün const struct landlock_ruleset *const dom = 1212cb2c7d1aSMickaël Salaün landlock_get_current_domain(); 1213cb2c7d1aSMickaël Salaün 1214cb2c7d1aSMickaël Salaün if (!dom) 1215cb2c7d1aSMickaël Salaün return 0; 1216b9f5ce27SGünther Noack 1217cb2c7d1aSMickaël Salaün /* 1218b9f5ce27SGünther Noack * Because a file may be opened with O_PATH, get_required_file_open_access() 1219b9f5ce27SGünther Noack * may return 0. This case will be handled with a future Landlock 1220cb2c7d1aSMickaël Salaün * evolution. 1221cb2c7d1aSMickaël Salaün */ 1222b9f5ce27SGünther Noack open_access_request = get_required_file_open_access(file); 1223b9f5ce27SGünther Noack 1224b9f5ce27SGünther Noack /* 1225b9f5ce27SGünther Noack * We look up more access than what we immediately need for open(), so 1226b9f5ce27SGünther Noack * that we can later authorize operations on opened files. 1227b9f5ce27SGünther Noack */ 1228b9f5ce27SGünther Noack full_access_request = open_access_request | optional_access; 1229b9f5ce27SGünther Noack 1230b9f5ce27SGünther Noack if (is_access_to_paths_allowed( 1231b9f5ce27SGünther Noack dom, &file->f_path, 1232b9f5ce27SGünther Noack init_layer_masks(dom, full_access_request, &layer_masks), 1233b9f5ce27SGünther Noack &layer_masks, NULL, 0, NULL, NULL)) { 1234b9f5ce27SGünther Noack allowed_access = full_access_request; 1235b9f5ce27SGünther Noack } else { 1236b9f5ce27SGünther Noack unsigned long access_bit; 1237b9f5ce27SGünther Noack const unsigned long access_req = full_access_request; 1238b9f5ce27SGünther Noack 1239b9f5ce27SGünther Noack /* 1240b9f5ce27SGünther Noack * Calculate the actual allowed access rights from layer_masks. 1241b9f5ce27SGünther Noack * Add each access right to allowed_access which has not been 1242b9f5ce27SGünther Noack * vetoed by any layer. 1243b9f5ce27SGünther Noack */ 1244b9f5ce27SGünther Noack allowed_access = 0; 1245b9f5ce27SGünther Noack for_each_set_bit(access_bit, &access_req, 1246b9f5ce27SGünther Noack ARRAY_SIZE(layer_masks)) { 1247b9f5ce27SGünther Noack if (!layer_masks[access_bit]) 1248b9f5ce27SGünther Noack allowed_access |= BIT_ULL(access_bit); 1249b9f5ce27SGünther Noack } 1250b9f5ce27SGünther Noack } 1251b9f5ce27SGünther Noack 1252b9f5ce27SGünther Noack /* 1253b9f5ce27SGünther Noack * For operations on already opened files (i.e. ftruncate()), it is the 1254b9f5ce27SGünther Noack * access rights at the time of open() which decide whether the 1255b9f5ce27SGünther Noack * operation is permitted. Therefore, we record the relevant subset of 1256b9f5ce27SGünther Noack * file access rights in the opened struct file. 1257b9f5ce27SGünther Noack */ 1258b9f5ce27SGünther Noack landlock_file(file)->allowed_access = allowed_access; 1259b9f5ce27SGünther Noack 1260b9f5ce27SGünther Noack if ((open_access_request & allowed_access) == open_access_request) 1261b9f5ce27SGünther Noack return 0; 1262b9f5ce27SGünther Noack 1263b9f5ce27SGünther Noack return -EACCES; 1264b9f5ce27SGünther Noack } 1265b9f5ce27SGünther Noack 1266b9f5ce27SGünther Noack static int hook_file_truncate(struct file *const file) 1267b9f5ce27SGünther Noack { 1268b9f5ce27SGünther Noack /* 1269b9f5ce27SGünther Noack * Allows truncation if the truncate right was available at the time of 1270b9f5ce27SGünther Noack * opening the file, to get a consistent access check as for read, write 1271b9f5ce27SGünther Noack * and execute operations. 1272b9f5ce27SGünther Noack * 1273b9f5ce27SGünther Noack * Note: For checks done based on the file's Landlock allowed access, we 1274b9f5ce27SGünther Noack * enforce them independently of whether the current thread is in a 1275b9f5ce27SGünther Noack * Landlock domain, so that open files passed between independent 1276b9f5ce27SGünther Noack * processes retain their behaviour. 1277b9f5ce27SGünther Noack */ 1278b9f5ce27SGünther Noack if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE) 1279b9f5ce27SGünther Noack return 0; 1280b9f5ce27SGünther Noack return -EACCES; 1281cb2c7d1aSMickaël Salaün } 1282cb2c7d1aSMickaël Salaün 1283*f22f9aafSPaul Moore static struct security_hook_list landlock_hooks[] __ro_after_init = { 1284cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(inode_free_security, hook_inode_free_security), 1285cb2c7d1aSMickaël Salaün 1286cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(sb_delete, hook_sb_delete), 1287cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(sb_mount, hook_sb_mount), 1288cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(move_mount, hook_move_mount), 1289cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(sb_umount, hook_sb_umount), 1290cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(sb_remount, hook_sb_remount), 1291cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot), 1292cb2c7d1aSMickaël Salaün 1293cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(path_link, hook_path_link), 1294cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(path_rename, hook_path_rename), 1295cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(path_mkdir, hook_path_mkdir), 1296cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(path_mknod, hook_path_mknod), 1297cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(path_symlink, hook_path_symlink), 1298cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(path_unlink, hook_path_unlink), 1299cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(path_rmdir, hook_path_rmdir), 1300b9f5ce27SGünther Noack LSM_HOOK_INIT(path_truncate, hook_path_truncate), 1301cb2c7d1aSMickaël Salaün 1302b9f5ce27SGünther Noack LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security), 1303cb2c7d1aSMickaël Salaün LSM_HOOK_INIT(file_open, hook_file_open), 1304b9f5ce27SGünther Noack LSM_HOOK_INIT(file_truncate, hook_file_truncate), 1305cb2c7d1aSMickaël Salaün }; 1306cb2c7d1aSMickaël Salaün 1307cb2c7d1aSMickaël Salaün __init void landlock_add_fs_hooks(void) 1308cb2c7d1aSMickaël Salaün { 1309cb2c7d1aSMickaël Salaün security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), 1310cb2c7d1aSMickaël Salaün LANDLOCK_NAME); 1311cb2c7d1aSMickaël Salaün } 1312