xref: /linux/mm/secretmem.c (revision 98001fd63d59d2f99c90db823d322de91ff7d771)
11507f512SMike Rapoport // SPDX-License-Identifier: GPL-2.0
21507f512SMike Rapoport /*
31507f512SMike Rapoport  * Copyright IBM Corporation, 2021
41507f512SMike Rapoport  *
51507f512SMike Rapoport  * Author: Mike Rapoport <rppt@linux.ibm.com>
61507f512SMike Rapoport  */
71507f512SMike Rapoport 
81507f512SMike Rapoport #include <linux/mm.h>
91507f512SMike Rapoport #include <linux/fs.h>
101507f512SMike Rapoport #include <linux/swap.h>
111507f512SMike Rapoport #include <linux/mount.h>
121507f512SMike Rapoport #include <linux/memfd.h>
131507f512SMike Rapoport #include <linux/bitops.h>
141507f512SMike Rapoport #include <linux/printk.h>
151507f512SMike Rapoport #include <linux/pagemap.h>
161507f512SMike Rapoport #include <linux/syscalls.h>
171507f512SMike Rapoport #include <linux/pseudo_fs.h>
181507f512SMike Rapoport #include <linux/secretmem.h>
191507f512SMike Rapoport #include <linux/set_memory.h>
201507f512SMike Rapoport #include <linux/sched/signal.h>
211507f512SMike Rapoport 
221507f512SMike Rapoport #include <uapi/linux/magic.h>
231507f512SMike Rapoport 
241507f512SMike Rapoport #include <asm/tlbflush.h>
251507f512SMike Rapoport 
261507f512SMike Rapoport #include "internal.h"
271507f512SMike Rapoport 
281507f512SMike Rapoport #undef pr_fmt
291507f512SMike Rapoport #define pr_fmt(fmt) "secretmem: " fmt
301507f512SMike Rapoport 
311507f512SMike Rapoport /*
321507f512SMike Rapoport  * Define mode and flag masks to allow validation of the system call
331507f512SMike Rapoport  * parameters.
341507f512SMike Rapoport  */
351507f512SMike Rapoport #define SECRETMEM_MODE_MASK	(0x0)
361507f512SMike Rapoport #define SECRETMEM_FLAGS_MASK	SECRETMEM_MODE_MASK
371507f512SMike Rapoport 
381507f512SMike Rapoport static bool secretmem_enable __ro_after_init;
391507f512SMike Rapoport module_param_named(enable, secretmem_enable, bool, 0400);
401507f512SMike Rapoport MODULE_PARM_DESC(secretmem_enable,
411507f512SMike Rapoport 		 "Enable secretmem and memfd_secret(2) system call");
421507f512SMike Rapoport 
4387066fddSLinus Torvalds static atomic_t secretmem_users;
449a436f8fSMike Rapoport 
459a436f8fSMike Rapoport bool secretmem_active(void)
469a436f8fSMike Rapoport {
4787066fddSLinus Torvalds 	return !!atomic_read(&secretmem_users);
489a436f8fSMike Rapoport }
499a436f8fSMike Rapoport 
501507f512SMike Rapoport static vm_fault_t secretmem_fault(struct vm_fault *vmf)
511507f512SMike Rapoport {
521507f512SMike Rapoport 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
531507f512SMike Rapoport 	struct inode *inode = file_inode(vmf->vma->vm_file);
541507f512SMike Rapoport 	pgoff_t offset = vmf->pgoff;
551507f512SMike Rapoport 	gfp_t gfp = vmf->gfp_mask;
561507f512SMike Rapoport 	unsigned long addr;
571507f512SMike Rapoport 	struct page *page;
5884ac0130SMike Rapoport 	vm_fault_t ret;
591507f512SMike Rapoport 	int err;
601507f512SMike Rapoport 
611507f512SMike Rapoport 	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
621507f512SMike Rapoport 		return vmf_error(-EINVAL);
631507f512SMike Rapoport 
6484ac0130SMike Rapoport 	filemap_invalidate_lock_shared(mapping);
6584ac0130SMike Rapoport 
661507f512SMike Rapoport retry:
671507f512SMike Rapoport 	page = find_lock_page(mapping, offset);
681507f512SMike Rapoport 	if (!page) {
691507f512SMike Rapoport 		page = alloc_page(gfp | __GFP_ZERO);
7084ac0130SMike Rapoport 		if (!page) {
7184ac0130SMike Rapoport 			ret = VM_FAULT_OOM;
7284ac0130SMike Rapoport 			goto out;
7384ac0130SMike Rapoport 		}
741507f512SMike Rapoport 
751507f512SMike Rapoport 		err = set_direct_map_invalid_noflush(page);
761507f512SMike Rapoport 		if (err) {
771507f512SMike Rapoport 			put_page(page);
7884ac0130SMike Rapoport 			ret = vmf_error(err);
7984ac0130SMike Rapoport 			goto out;
801507f512SMike Rapoport 		}
811507f512SMike Rapoport 
821507f512SMike Rapoport 		__SetPageUptodate(page);
831507f512SMike Rapoport 		err = add_to_page_cache_lru(page, mapping, offset, gfp);
841507f512SMike Rapoport 		if (unlikely(err)) {
851507f512SMike Rapoport 			put_page(page);
861507f512SMike Rapoport 			/*
871507f512SMike Rapoport 			 * If a split of large page was required, it
881507f512SMike Rapoport 			 * already happened when we marked the page invalid
891507f512SMike Rapoport 			 * which guarantees that this call won't fail
901507f512SMike Rapoport 			 */
911507f512SMike Rapoport 			set_direct_map_default_noflush(page);
921507f512SMike Rapoport 			if (err == -EEXIST)
931507f512SMike Rapoport 				goto retry;
941507f512SMike Rapoport 
9584ac0130SMike Rapoport 			ret = vmf_error(err);
9684ac0130SMike Rapoport 			goto out;
971507f512SMike Rapoport 		}
981507f512SMike Rapoport 
991507f512SMike Rapoport 		addr = (unsigned long)page_address(page);
1001507f512SMike Rapoport 		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
1011507f512SMike Rapoport 	}
1021507f512SMike Rapoport 
1031507f512SMike Rapoport 	vmf->page = page;
10484ac0130SMike Rapoport 	ret = VM_FAULT_LOCKED;
10584ac0130SMike Rapoport 
10684ac0130SMike Rapoport out:
10784ac0130SMike Rapoport 	filemap_invalidate_unlock_shared(mapping);
10884ac0130SMike Rapoport 	return ret;
1091507f512SMike Rapoport }
1101507f512SMike Rapoport 
1111507f512SMike Rapoport static const struct vm_operations_struct secretmem_vm_ops = {
1121507f512SMike Rapoport 	.fault = secretmem_fault,
1131507f512SMike Rapoport };
1141507f512SMike Rapoport 
1159a436f8fSMike Rapoport static int secretmem_release(struct inode *inode, struct file *file)
1169a436f8fSMike Rapoport {
11787066fddSLinus Torvalds 	atomic_dec(&secretmem_users);
1189a436f8fSMike Rapoport 	return 0;
1199a436f8fSMike Rapoport }
1209a436f8fSMike Rapoport 
1211507f512SMike Rapoport static int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
1221507f512SMike Rapoport {
1231507f512SMike Rapoport 	unsigned long len = vma->vm_end - vma->vm_start;
1241507f512SMike Rapoport 
1251507f512SMike Rapoport 	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
1261507f512SMike Rapoport 		return -EINVAL;
1271507f512SMike Rapoport 
1281507f512SMike Rapoport 	if (mlock_future_check(vma->vm_mm, vma->vm_flags | VM_LOCKED, len))
1291507f512SMike Rapoport 		return -EAGAIN;
1301507f512SMike Rapoport 
1311507f512SMike Rapoport 	vma->vm_flags |= VM_LOCKED | VM_DONTDUMP;
1321507f512SMike Rapoport 	vma->vm_ops = &secretmem_vm_ops;
1331507f512SMike Rapoport 
1341507f512SMike Rapoport 	return 0;
1351507f512SMike Rapoport }
1361507f512SMike Rapoport 
1371507f512SMike Rapoport bool vma_is_secretmem(struct vm_area_struct *vma)
1381507f512SMike Rapoport {
1391507f512SMike Rapoport 	return vma->vm_ops == &secretmem_vm_ops;
1401507f512SMike Rapoport }
1411507f512SMike Rapoport 
1421507f512SMike Rapoport static const struct file_operations secretmem_fops = {
1439a436f8fSMike Rapoport 	.release	= secretmem_release,
1441507f512SMike Rapoport 	.mmap		= secretmem_mmap,
1451507f512SMike Rapoport };
1461507f512SMike Rapoport 
1475409548dSMatthew Wilcox (Oracle) static int secretmem_migrate_folio(struct address_space *mapping,
1485409548dSMatthew Wilcox (Oracle) 		struct folio *dst, struct folio *src, enum migrate_mode mode)
1491507f512SMike Rapoport {
1501507f512SMike Rapoport 	return -EBUSY;
1511507f512SMike Rapoport }
1521507f512SMike Rapoport 
1536612ed24SMatthew Wilcox (Oracle) static void secretmem_free_folio(struct folio *folio)
1541507f512SMike Rapoport {
1556612ed24SMatthew Wilcox (Oracle) 	set_direct_map_default_noflush(&folio->page);
1566612ed24SMatthew Wilcox (Oracle) 	folio_zero_segment(folio, 0, folio_size(folio));
1571507f512SMike Rapoport }
1581507f512SMike Rapoport 
1591507f512SMike Rapoport const struct address_space_operations secretmem_aops = {
16046de8b97SMatthew Wilcox (Oracle) 	.dirty_folio	= noop_dirty_folio,
1616612ed24SMatthew Wilcox (Oracle) 	.free_folio	= secretmem_free_folio,
1625409548dSMatthew Wilcox (Oracle) 	.migrate_folio	= secretmem_migrate_folio,
1631507f512SMike Rapoport };
1641507f512SMike Rapoport 
165f9b141f9SAxel Rasmussen static int secretmem_setattr(struct user_namespace *mnt_userns,
166f9b141f9SAxel Rasmussen 			     struct dentry *dentry, struct iattr *iattr)
167f9b141f9SAxel Rasmussen {
168f9b141f9SAxel Rasmussen 	struct inode *inode = d_inode(dentry);
16984ac0130SMike Rapoport 	struct address_space *mapping = inode->i_mapping;
170f9b141f9SAxel Rasmussen 	unsigned int ia_valid = iattr->ia_valid;
17184ac0130SMike Rapoport 	int ret;
17284ac0130SMike Rapoport 
17384ac0130SMike Rapoport 	filemap_invalidate_lock(mapping);
174f9b141f9SAxel Rasmussen 
175f9b141f9SAxel Rasmussen 	if ((ia_valid & ATTR_SIZE) && inode->i_size)
17684ac0130SMike Rapoport 		ret = -EINVAL;
17784ac0130SMike Rapoport 	else
17884ac0130SMike Rapoport 		ret = simple_setattr(mnt_userns, dentry, iattr);
179f9b141f9SAxel Rasmussen 
18084ac0130SMike Rapoport 	filemap_invalidate_unlock(mapping);
18184ac0130SMike Rapoport 
18284ac0130SMike Rapoport 	return ret;
183f9b141f9SAxel Rasmussen }
184f9b141f9SAxel Rasmussen 
185f9b141f9SAxel Rasmussen static const struct inode_operations secretmem_iops = {
186f9b141f9SAxel Rasmussen 	.setattr = secretmem_setattr,
187f9b141f9SAxel Rasmussen };
188f9b141f9SAxel Rasmussen 
1891507f512SMike Rapoport static struct vfsmount *secretmem_mnt;
1901507f512SMike Rapoport 
1911507f512SMike Rapoport static struct file *secretmem_file_create(unsigned long flags)
1921507f512SMike Rapoport {
193*98001fd6SColin Ian King 	struct file *file;
1941507f512SMike Rapoport 	struct inode *inode;
1952bfe15c5SChristian Göttsche 	const char *anon_name = "[secretmem]";
1962bfe15c5SChristian Göttsche 	const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
1972bfe15c5SChristian Göttsche 	int err;
1981507f512SMike Rapoport 
1991507f512SMike Rapoport 	inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
2001507f512SMike Rapoport 	if (IS_ERR(inode))
2011507f512SMike Rapoport 		return ERR_CAST(inode);
2021507f512SMike Rapoport 
2032bfe15c5SChristian Göttsche 	err = security_inode_init_security_anon(inode, &qname, NULL);
2042bfe15c5SChristian Göttsche 	if (err) {
2052bfe15c5SChristian Göttsche 		file = ERR_PTR(err);
2062bfe15c5SChristian Göttsche 		goto err_free_inode;
2072bfe15c5SChristian Göttsche 	}
2082bfe15c5SChristian Göttsche 
2091507f512SMike Rapoport 	file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
2101507f512SMike Rapoport 				 O_RDWR, &secretmem_fops);
2111507f512SMike Rapoport 	if (IS_ERR(file))
2121507f512SMike Rapoport 		goto err_free_inode;
2131507f512SMike Rapoport 
2141507f512SMike Rapoport 	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
2151507f512SMike Rapoport 	mapping_set_unevictable(inode->i_mapping);
2161507f512SMike Rapoport 
217f9b141f9SAxel Rasmussen 	inode->i_op = &secretmem_iops;
2181507f512SMike Rapoport 	inode->i_mapping->a_ops = &secretmem_aops;
2191507f512SMike Rapoport 
2201507f512SMike Rapoport 	/* pretend we are a normal file with zero size */
2211507f512SMike Rapoport 	inode->i_mode |= S_IFREG;
2221507f512SMike Rapoport 	inode->i_size = 0;
2231507f512SMike Rapoport 
2241507f512SMike Rapoport 	return file;
2251507f512SMike Rapoport 
2261507f512SMike Rapoport err_free_inode:
2271507f512SMike Rapoport 	iput(inode);
2281507f512SMike Rapoport 	return file;
2291507f512SMike Rapoport }
2301507f512SMike Rapoport 
2311507f512SMike Rapoport SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
2321507f512SMike Rapoport {
2331507f512SMike Rapoport 	struct file *file;
2341507f512SMike Rapoport 	int fd, err;
2351507f512SMike Rapoport 
2361507f512SMike Rapoport 	/* make sure local flags do not confict with global fcntl.h */
2371507f512SMike Rapoport 	BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
2381507f512SMike Rapoport 
2391507f512SMike Rapoport 	if (!secretmem_enable)
2401507f512SMike Rapoport 		return -ENOSYS;
2411507f512SMike Rapoport 
2421507f512SMike Rapoport 	if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
2431507f512SMike Rapoport 		return -EINVAL;
244cb685432SMatthew Wilcox (Oracle) 	if (atomic_read(&secretmem_users) < 0)
245cb685432SMatthew Wilcox (Oracle) 		return -ENFILE;
2461507f512SMike Rapoport 
2471507f512SMike Rapoport 	fd = get_unused_fd_flags(flags & O_CLOEXEC);
2481507f512SMike Rapoport 	if (fd < 0)
2491507f512SMike Rapoport 		return fd;
2501507f512SMike Rapoport 
2511507f512SMike Rapoport 	file = secretmem_file_create(flags);
2521507f512SMike Rapoport 	if (IS_ERR(file)) {
2531507f512SMike Rapoport 		err = PTR_ERR(file);
2541507f512SMike Rapoport 		goto err_put_fd;
2551507f512SMike Rapoport 	}
2561507f512SMike Rapoport 
2571507f512SMike Rapoport 	file->f_flags |= O_LARGEFILE;
2581507f512SMike Rapoport 
25987066fddSLinus Torvalds 	atomic_inc(&secretmem_users);
260855d4443SKees Cook 	fd_install(fd, file);
2611507f512SMike Rapoport 	return fd;
2621507f512SMike Rapoport 
2631507f512SMike Rapoport err_put_fd:
2641507f512SMike Rapoport 	put_unused_fd(fd);
2651507f512SMike Rapoport 	return err;
2661507f512SMike Rapoport }
2671507f512SMike Rapoport 
2681507f512SMike Rapoport static int secretmem_init_fs_context(struct fs_context *fc)
2691507f512SMike Rapoport {
2701507f512SMike Rapoport 	return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
2711507f512SMike Rapoport }
2721507f512SMike Rapoport 
2731507f512SMike Rapoport static struct file_system_type secretmem_fs = {
2741507f512SMike Rapoport 	.name		= "secretmem",
2751507f512SMike Rapoport 	.init_fs_context = secretmem_init_fs_context,
2761507f512SMike Rapoport 	.kill_sb	= kill_anon_super,
2771507f512SMike Rapoport };
2781507f512SMike Rapoport 
2791ea41595SXiu Jianfeng static int __init secretmem_init(void)
2801507f512SMike Rapoport {
2811507f512SMike Rapoport 	if (!secretmem_enable)
282f7c5b1aaSXiu Jianfeng 		return 0;
2831507f512SMike Rapoport 
2841507f512SMike Rapoport 	secretmem_mnt = kern_mount(&secretmem_fs);
2851507f512SMike Rapoport 	if (IS_ERR(secretmem_mnt))
2864eb5bbdeSBinyi Han 		return PTR_ERR(secretmem_mnt);
2871507f512SMike Rapoport 
2881507f512SMike Rapoport 	/* prevent secretmem mappings from ever getting PROT_EXEC */
2891507f512SMike Rapoport 	secretmem_mnt->mnt_flags |= MNT_NOEXEC;
2901507f512SMike Rapoport 
291f7c5b1aaSXiu Jianfeng 	return 0;
2921507f512SMike Rapoport }
2931507f512SMike Rapoport fs_initcall(secretmem_init);
294