xref: /linux/mm/secretmem.c (revision a1c613ae4c322ddd58d5a8539dbfba2a0380a8c0)
11507f512SMike Rapoport // SPDX-License-Identifier: GPL-2.0
21507f512SMike Rapoport /*
31507f512SMike Rapoport  * Copyright IBM Corporation, 2021
41507f512SMike Rapoport  *
51507f512SMike Rapoport  * Author: Mike Rapoport <rppt@linux.ibm.com>
61507f512SMike Rapoport  */
71507f512SMike Rapoport 
81507f512SMike Rapoport #include <linux/mm.h>
91507f512SMike Rapoport #include <linux/fs.h>
101507f512SMike Rapoport #include <linux/swap.h>
111507f512SMike Rapoport #include <linux/mount.h>
121507f512SMike Rapoport #include <linux/memfd.h>
131507f512SMike Rapoport #include <linux/bitops.h>
141507f512SMike Rapoport #include <linux/printk.h>
151507f512SMike Rapoport #include <linux/pagemap.h>
161507f512SMike Rapoport #include <linux/syscalls.h>
171507f512SMike Rapoport #include <linux/pseudo_fs.h>
181507f512SMike Rapoport #include <linux/secretmem.h>
191507f512SMike Rapoport #include <linux/set_memory.h>
201507f512SMike Rapoport #include <linux/sched/signal.h>
211507f512SMike Rapoport 
221507f512SMike Rapoport #include <uapi/linux/magic.h>
231507f512SMike Rapoport 
241507f512SMike Rapoport #include <asm/tlbflush.h>
251507f512SMike Rapoport 
261507f512SMike Rapoport #include "internal.h"
271507f512SMike Rapoport 
281507f512SMike Rapoport #undef pr_fmt
291507f512SMike Rapoport #define pr_fmt(fmt) "secretmem: " fmt
301507f512SMike Rapoport 
311507f512SMike Rapoport /*
321507f512SMike Rapoport  * Define mode and flag masks to allow validation of the system call
331507f512SMike Rapoport  * parameters.
341507f512SMike Rapoport  */
351507f512SMike Rapoport #define SECRETMEM_MODE_MASK	(0x0)
361507f512SMike Rapoport #define SECRETMEM_FLAGS_MASK	SECRETMEM_MODE_MASK
371507f512SMike Rapoport 
38b758fe6dSMike Rapoport (IBM) static bool secretmem_enable __ro_after_init = 1;
391507f512SMike Rapoport module_param_named(enable, secretmem_enable, bool, 0400);
401507f512SMike Rapoport MODULE_PARM_DESC(secretmem_enable,
411507f512SMike Rapoport 		 "Enable secretmem and memfd_secret(2) system call");
421507f512SMike Rapoport 
4387066fddSLinus Torvalds static atomic_t secretmem_users;
449a436f8fSMike Rapoport 
secretmem_active(void)459a436f8fSMike Rapoport bool secretmem_active(void)
469a436f8fSMike Rapoport {
4787066fddSLinus Torvalds 	return !!atomic_read(&secretmem_users);
489a436f8fSMike Rapoport }
499a436f8fSMike Rapoport 
secretmem_fault(struct vm_fault * vmf)501507f512SMike Rapoport static vm_fault_t secretmem_fault(struct vm_fault *vmf)
511507f512SMike Rapoport {
521507f512SMike Rapoport 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
531507f512SMike Rapoport 	struct inode *inode = file_inode(vmf->vma->vm_file);
541507f512SMike Rapoport 	pgoff_t offset = vmf->pgoff;
551507f512SMike Rapoport 	gfp_t gfp = vmf->gfp_mask;
561507f512SMike Rapoport 	unsigned long addr;
571507f512SMike Rapoport 	struct page *page;
58*7e2fca52SZhangPeng 	struct folio *folio;
5984ac0130SMike Rapoport 	vm_fault_t ret;
601507f512SMike Rapoport 	int err;
611507f512SMike Rapoport 
621507f512SMike Rapoport 	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
631507f512SMike Rapoport 		return vmf_error(-EINVAL);
641507f512SMike Rapoport 
6584ac0130SMike Rapoport 	filemap_invalidate_lock_shared(mapping);
6684ac0130SMike Rapoport 
671507f512SMike Rapoport retry:
681507f512SMike Rapoport 	page = find_lock_page(mapping, offset);
691507f512SMike Rapoport 	if (!page) {
70*7e2fca52SZhangPeng 		folio = folio_alloc(gfp | __GFP_ZERO, 0);
71*7e2fca52SZhangPeng 		if (!folio) {
7284ac0130SMike Rapoport 			ret = VM_FAULT_OOM;
7384ac0130SMike Rapoport 			goto out;
7484ac0130SMike Rapoport 		}
751507f512SMike Rapoport 
76*7e2fca52SZhangPeng 		page = &folio->page;
771507f512SMike Rapoport 		err = set_direct_map_invalid_noflush(page);
781507f512SMike Rapoport 		if (err) {
79*7e2fca52SZhangPeng 			folio_put(folio);
8084ac0130SMike Rapoport 			ret = vmf_error(err);
8184ac0130SMike Rapoport 			goto out;
821507f512SMike Rapoport 		}
831507f512SMike Rapoport 
84*7e2fca52SZhangPeng 		__folio_mark_uptodate(folio);
85*7e2fca52SZhangPeng 		err = filemap_add_folio(mapping, folio, offset, gfp);
861507f512SMike Rapoport 		if (unlikely(err)) {
87*7e2fca52SZhangPeng 			folio_put(folio);
881507f512SMike Rapoport 			/*
891507f512SMike Rapoport 			 * If a split of large page was required, it
901507f512SMike Rapoport 			 * already happened when we marked the page invalid
911507f512SMike Rapoport 			 * which guarantees that this call won't fail
921507f512SMike Rapoport 			 */
931507f512SMike Rapoport 			set_direct_map_default_noflush(page);
941507f512SMike Rapoport 			if (err == -EEXIST)
951507f512SMike Rapoport 				goto retry;
961507f512SMike Rapoport 
9784ac0130SMike Rapoport 			ret = vmf_error(err);
9884ac0130SMike Rapoport 			goto out;
991507f512SMike Rapoport 		}
1001507f512SMike Rapoport 
1011507f512SMike Rapoport 		addr = (unsigned long)page_address(page);
1021507f512SMike Rapoport 		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
1031507f512SMike Rapoport 	}
1041507f512SMike Rapoport 
1051507f512SMike Rapoport 	vmf->page = page;
10684ac0130SMike Rapoport 	ret = VM_FAULT_LOCKED;
10784ac0130SMike Rapoport 
10884ac0130SMike Rapoport out:
10984ac0130SMike Rapoport 	filemap_invalidate_unlock_shared(mapping);
11084ac0130SMike Rapoport 	return ret;
1111507f512SMike Rapoport }
1121507f512SMike Rapoport 
1131507f512SMike Rapoport static const struct vm_operations_struct secretmem_vm_ops = {
1141507f512SMike Rapoport 	.fault = secretmem_fault,
1151507f512SMike Rapoport };
1161507f512SMike Rapoport 
secretmem_release(struct inode * inode,struct file * file)1179a436f8fSMike Rapoport static int secretmem_release(struct inode *inode, struct file *file)
1189a436f8fSMike Rapoport {
11987066fddSLinus Torvalds 	atomic_dec(&secretmem_users);
1209a436f8fSMike Rapoport 	return 0;
1219a436f8fSMike Rapoport }
1229a436f8fSMike Rapoport 
secretmem_mmap(struct file * file,struct vm_area_struct * vma)1231507f512SMike Rapoport static int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
1241507f512SMike Rapoport {
1251507f512SMike Rapoport 	unsigned long len = vma->vm_end - vma->vm_start;
1261507f512SMike Rapoport 
1271507f512SMike Rapoport 	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
1281507f512SMike Rapoport 		return -EINVAL;
1291507f512SMike Rapoport 
130b0cc5e89SAndrew Morton 	if (!mlock_future_ok(vma->vm_mm, vma->vm_flags | VM_LOCKED, len))
1311507f512SMike Rapoport 		return -EAGAIN;
1321507f512SMike Rapoport 
1331c71222eSSuren Baghdasaryan 	vm_flags_set(vma, VM_LOCKED | VM_DONTDUMP);
1341507f512SMike Rapoport 	vma->vm_ops = &secretmem_vm_ops;
1351507f512SMike Rapoport 
1361507f512SMike Rapoport 	return 0;
1371507f512SMike Rapoport }
1381507f512SMike Rapoport 
vma_is_secretmem(struct vm_area_struct * vma)1391507f512SMike Rapoport bool vma_is_secretmem(struct vm_area_struct *vma)
1401507f512SMike Rapoport {
1411507f512SMike Rapoport 	return vma->vm_ops == &secretmem_vm_ops;
1421507f512SMike Rapoport }
1431507f512SMike Rapoport 
1441507f512SMike Rapoport static const struct file_operations secretmem_fops = {
1459a436f8fSMike Rapoport 	.release	= secretmem_release,
1461507f512SMike Rapoport 	.mmap		= secretmem_mmap,
1471507f512SMike Rapoport };
1481507f512SMike Rapoport 
secretmem_migrate_folio(struct address_space * mapping,struct folio * dst,struct folio * src,enum migrate_mode mode)1495409548dSMatthew Wilcox (Oracle) static int secretmem_migrate_folio(struct address_space *mapping,
1505409548dSMatthew Wilcox (Oracle) 		struct folio *dst, struct folio *src, enum migrate_mode mode)
1511507f512SMike Rapoport {
1521507f512SMike Rapoport 	return -EBUSY;
1531507f512SMike Rapoport }
1541507f512SMike Rapoport 
secretmem_free_folio(struct folio * folio)1556612ed24SMatthew Wilcox (Oracle) static void secretmem_free_folio(struct folio *folio)
1561507f512SMike Rapoport {
1576612ed24SMatthew Wilcox (Oracle) 	set_direct_map_default_noflush(&folio->page);
1586612ed24SMatthew Wilcox (Oracle) 	folio_zero_segment(folio, 0, folio_size(folio));
1591507f512SMike Rapoport }
1601507f512SMike Rapoport 
1611507f512SMike Rapoport const struct address_space_operations secretmem_aops = {
16246de8b97SMatthew Wilcox (Oracle) 	.dirty_folio	= noop_dirty_folio,
1636612ed24SMatthew Wilcox (Oracle) 	.free_folio	= secretmem_free_folio,
1645409548dSMatthew Wilcox (Oracle) 	.migrate_folio	= secretmem_migrate_folio,
1651507f512SMike Rapoport };
1661507f512SMike Rapoport 
secretmem_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * iattr)167c1632a0fSChristian Brauner static int secretmem_setattr(struct mnt_idmap *idmap,
168f9b141f9SAxel Rasmussen 			     struct dentry *dentry, struct iattr *iattr)
169f9b141f9SAxel Rasmussen {
170f9b141f9SAxel Rasmussen 	struct inode *inode = d_inode(dentry);
17184ac0130SMike Rapoport 	struct address_space *mapping = inode->i_mapping;
172f9b141f9SAxel Rasmussen 	unsigned int ia_valid = iattr->ia_valid;
17384ac0130SMike Rapoport 	int ret;
17484ac0130SMike Rapoport 
17584ac0130SMike Rapoport 	filemap_invalidate_lock(mapping);
176f9b141f9SAxel Rasmussen 
177f9b141f9SAxel Rasmussen 	if ((ia_valid & ATTR_SIZE) && inode->i_size)
17884ac0130SMike Rapoport 		ret = -EINVAL;
17984ac0130SMike Rapoport 	else
180c1632a0fSChristian Brauner 		ret = simple_setattr(idmap, dentry, iattr);
181f9b141f9SAxel Rasmussen 
18284ac0130SMike Rapoport 	filemap_invalidate_unlock(mapping);
18384ac0130SMike Rapoport 
18484ac0130SMike Rapoport 	return ret;
185f9b141f9SAxel Rasmussen }
186f9b141f9SAxel Rasmussen 
187f9b141f9SAxel Rasmussen static const struct inode_operations secretmem_iops = {
188f9b141f9SAxel Rasmussen 	.setattr = secretmem_setattr,
189f9b141f9SAxel Rasmussen };
190f9b141f9SAxel Rasmussen 
1911507f512SMike Rapoport static struct vfsmount *secretmem_mnt;
1921507f512SMike Rapoport 
secretmem_file_create(unsigned long flags)1931507f512SMike Rapoport static struct file *secretmem_file_create(unsigned long flags)
1941507f512SMike Rapoport {
19598001fd6SColin Ian King 	struct file *file;
1961507f512SMike Rapoport 	struct inode *inode;
1972bfe15c5SChristian Göttsche 	const char *anon_name = "[secretmem]";
1982bfe15c5SChristian Göttsche 	const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
1992bfe15c5SChristian Göttsche 	int err;
2001507f512SMike Rapoport 
2011507f512SMike Rapoport 	inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
2021507f512SMike Rapoport 	if (IS_ERR(inode))
2031507f512SMike Rapoport 		return ERR_CAST(inode);
2041507f512SMike Rapoport 
2052bfe15c5SChristian Göttsche 	err = security_inode_init_security_anon(inode, &qname, NULL);
2062bfe15c5SChristian Göttsche 	if (err) {
2072bfe15c5SChristian Göttsche 		file = ERR_PTR(err);
2082bfe15c5SChristian Göttsche 		goto err_free_inode;
2092bfe15c5SChristian Göttsche 	}
2102bfe15c5SChristian Göttsche 
2111507f512SMike Rapoport 	file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
2121507f512SMike Rapoport 				 O_RDWR, &secretmem_fops);
2131507f512SMike Rapoport 	if (IS_ERR(file))
2141507f512SMike Rapoport 		goto err_free_inode;
2151507f512SMike Rapoport 
2161507f512SMike Rapoport 	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
2171507f512SMike Rapoport 	mapping_set_unevictable(inode->i_mapping);
2181507f512SMike Rapoport 
219f9b141f9SAxel Rasmussen 	inode->i_op = &secretmem_iops;
2201507f512SMike Rapoport 	inode->i_mapping->a_ops = &secretmem_aops;
2211507f512SMike Rapoport 
2221507f512SMike Rapoport 	/* pretend we are a normal file with zero size */
2231507f512SMike Rapoport 	inode->i_mode |= S_IFREG;
2241507f512SMike Rapoport 	inode->i_size = 0;
2251507f512SMike Rapoport 
2261507f512SMike Rapoport 	return file;
2271507f512SMike Rapoport 
2281507f512SMike Rapoport err_free_inode:
2291507f512SMike Rapoport 	iput(inode);
2301507f512SMike Rapoport 	return file;
2311507f512SMike Rapoport }
2321507f512SMike Rapoport 
SYSCALL_DEFINE1(memfd_secret,unsigned int,flags)2331507f512SMike Rapoport SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
2341507f512SMike Rapoport {
2351507f512SMike Rapoport 	struct file *file;
2361507f512SMike Rapoport 	int fd, err;
2371507f512SMike Rapoport 
2381507f512SMike Rapoport 	/* make sure local flags do not confict with global fcntl.h */
2391507f512SMike Rapoport 	BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
2401507f512SMike Rapoport 
2411507f512SMike Rapoport 	if (!secretmem_enable)
2421507f512SMike Rapoport 		return -ENOSYS;
2431507f512SMike Rapoport 
2441507f512SMike Rapoport 	if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
2451507f512SMike Rapoport 		return -EINVAL;
246cb685432SMatthew Wilcox (Oracle) 	if (atomic_read(&secretmem_users) < 0)
247cb685432SMatthew Wilcox (Oracle) 		return -ENFILE;
2481507f512SMike Rapoport 
2491507f512SMike Rapoport 	fd = get_unused_fd_flags(flags & O_CLOEXEC);
2501507f512SMike Rapoport 	if (fd < 0)
2511507f512SMike Rapoport 		return fd;
2521507f512SMike Rapoport 
2531507f512SMike Rapoport 	file = secretmem_file_create(flags);
2541507f512SMike Rapoport 	if (IS_ERR(file)) {
2551507f512SMike Rapoport 		err = PTR_ERR(file);
2561507f512SMike Rapoport 		goto err_put_fd;
2571507f512SMike Rapoport 	}
2581507f512SMike Rapoport 
2591507f512SMike Rapoport 	file->f_flags |= O_LARGEFILE;
2601507f512SMike Rapoport 
26187066fddSLinus Torvalds 	atomic_inc(&secretmem_users);
262855d4443SKees Cook 	fd_install(fd, file);
2631507f512SMike Rapoport 	return fd;
2641507f512SMike Rapoport 
2651507f512SMike Rapoport err_put_fd:
2661507f512SMike Rapoport 	put_unused_fd(fd);
2671507f512SMike Rapoport 	return err;
2681507f512SMike Rapoport }
2691507f512SMike Rapoport 
secretmem_init_fs_context(struct fs_context * fc)2701507f512SMike Rapoport static int secretmem_init_fs_context(struct fs_context *fc)
2711507f512SMike Rapoport {
2721507f512SMike Rapoport 	return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
2731507f512SMike Rapoport }
2741507f512SMike Rapoport 
2751507f512SMike Rapoport static struct file_system_type secretmem_fs = {
2761507f512SMike Rapoport 	.name		= "secretmem",
2771507f512SMike Rapoport 	.init_fs_context = secretmem_init_fs_context,
2781507f512SMike Rapoport 	.kill_sb	= kill_anon_super,
2791507f512SMike Rapoport };
2801507f512SMike Rapoport 
secretmem_init(void)2811ea41595SXiu Jianfeng static int __init secretmem_init(void)
2821507f512SMike Rapoport {
2831507f512SMike Rapoport 	if (!secretmem_enable)
284f7c5b1aaSXiu Jianfeng 		return 0;
2851507f512SMike Rapoport 
2861507f512SMike Rapoport 	secretmem_mnt = kern_mount(&secretmem_fs);
2871507f512SMike Rapoport 	if (IS_ERR(secretmem_mnt))
2884eb5bbdeSBinyi Han 		return PTR_ERR(secretmem_mnt);
2891507f512SMike Rapoport 
2901507f512SMike Rapoport 	/* prevent secretmem mappings from ever getting PROT_EXEC */
2911507f512SMike Rapoport 	secretmem_mnt->mnt_flags |= MNT_NOEXEC;
2921507f512SMike Rapoport 
293f7c5b1aaSXiu Jianfeng 	return 0;
2941507f512SMike Rapoport }
2951507f512SMike Rapoport fs_initcall(secretmem_init);
296