xref: /linux/fs/bcachefs/fs-ioctl.c (revision 566ab427f827b0256d3e8ce0235d088e6a9c28bd)
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_FS
3 
4 #include "bcachefs.h"
5 #include "chardev.h"
6 #include "dirent.h"
7 #include "fs.h"
8 #include "fs-common.h"
9 #include "fs-ioctl.h"
10 #include "quota.h"
11 
12 #include <linux/compat.h>
13 #include <linux/fsnotify.h>
14 #include <linux/mount.h>
15 #include <linux/namei.h>
16 #include <linux/security.h>
17 #include <linux/writeback.h>
18 
19 #define FS_IOC_GOINGDOWN	     _IOR('X', 125, __u32)
20 #define FSOP_GOING_FLAGS_DEFAULT	0x0	/* going down */
21 #define FSOP_GOING_FLAGS_LOGFLUSH	0x1	/* flush log but not data */
22 #define FSOP_GOING_FLAGS_NOLOGFLUSH	0x2	/* don't flush log nor data */
23 
24 struct flags_set {
25 	unsigned		mask;
26 	unsigned		flags;
27 
28 	unsigned		projid;
29 
30 	bool			set_projinherit;
31 	bool			projinherit;
32 };
33 
34 static int bch2_inode_flags_set(struct btree_trans *trans,
35 				struct bch_inode_info *inode,
36 				struct bch_inode_unpacked *bi,
37 				void *p)
38 {
39 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
40 	/*
41 	 * We're relying on btree locking here for exclusion with other ioctl
42 	 * calls - use the flags in the btree (@bi), not inode->i_flags:
43 	 */
44 	struct flags_set *s = p;
45 	unsigned newflags = s->flags;
46 	unsigned oldflags = bi->bi_flags & s->mask;
47 
48 	if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
49 	    !capable(CAP_LINUX_IMMUTABLE))
50 		return -EPERM;
51 
52 	if (!S_ISREG(bi->bi_mode) &&
53 	    !S_ISDIR(bi->bi_mode) &&
54 	    (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
55 		return -EINVAL;
56 
57 	if (s->set_projinherit) {
58 		bi->bi_fields_set &= ~(1 << Inode_opt_project);
59 		bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
60 	}
61 
62 	bi->bi_flags &= ~s->mask;
63 	bi->bi_flags |= newflags;
64 
65 	bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
66 	return 0;
67 }
68 
69 static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
70 {
71 	unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
72 
73 	return put_user(flags, arg);
74 }
75 
76 static int bch2_ioc_setflags(struct bch_fs *c,
77 			     struct file *file,
78 			     struct bch_inode_info *inode,
79 			     void __user *arg)
80 {
81 	struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
82 	unsigned uflags;
83 	int ret;
84 
85 	if (get_user(uflags, (int __user *) arg))
86 		return -EFAULT;
87 
88 	s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
89 	if (uflags)
90 		return -EOPNOTSUPP;
91 
92 	ret = mnt_want_write_file(file);
93 	if (ret)
94 		return ret;
95 
96 	inode_lock(&inode->v);
97 	if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
98 		ret = -EACCES;
99 		goto setflags_out;
100 	}
101 
102 	mutex_lock(&inode->ei_update_lock);
103 	ret   = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
104 		bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
105 			       ATTR_CTIME);
106 	mutex_unlock(&inode->ei_update_lock);
107 
108 setflags_out:
109 	inode_unlock(&inode->v);
110 	mnt_drop_write_file(file);
111 	return ret;
112 }
113 
114 static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
115 			       struct fsxattr __user *arg)
116 {
117 	struct fsxattr fa = { 0 };
118 
119 	fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
120 
121 	if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
122 		fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
123 
124 	fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
125 
126 	if (copy_to_user(arg, &fa, sizeof(fa)))
127 		return -EFAULT;
128 
129 	return 0;
130 }
131 
132 static int fssetxattr_inode_update_fn(struct btree_trans *trans,
133 				      struct bch_inode_info *inode,
134 				      struct bch_inode_unpacked *bi,
135 				      void *p)
136 {
137 	struct flags_set *s = p;
138 
139 	if (s->projid != bi->bi_project) {
140 		bi->bi_fields_set |= 1U << Inode_opt_project;
141 		bi->bi_project = s->projid;
142 	}
143 
144 	return bch2_inode_flags_set(trans, inode, bi, p);
145 }
146 
147 static int bch2_ioc_fssetxattr(struct bch_fs *c,
148 			       struct file *file,
149 			       struct bch_inode_info *inode,
150 			       struct fsxattr __user *arg)
151 {
152 	struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
153 	struct fsxattr fa;
154 	int ret;
155 
156 	if (copy_from_user(&fa, arg, sizeof(fa)))
157 		return -EFAULT;
158 
159 	s.set_projinherit = true;
160 	s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
161 	fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
162 
163 	s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
164 	if (fa.fsx_xflags)
165 		return -EOPNOTSUPP;
166 
167 	if (fa.fsx_projid >= U32_MAX)
168 		return -EINVAL;
169 
170 	/*
171 	 * inode fields accessible via the xattr interface are stored with a +1
172 	 * bias, so that 0 means unset:
173 	 */
174 	s.projid = fa.fsx_projid + 1;
175 
176 	ret = mnt_want_write_file(file);
177 	if (ret)
178 		return ret;
179 
180 	inode_lock(&inode->v);
181 	if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
182 		ret = -EACCES;
183 		goto err;
184 	}
185 
186 	mutex_lock(&inode->ei_update_lock);
187 	ret   = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
188 		bch2_set_projid(c, inode, fa.fsx_projid) ?:
189 		bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
190 			       ATTR_CTIME);
191 	mutex_unlock(&inode->ei_update_lock);
192 err:
193 	inode_unlock(&inode->v);
194 	mnt_drop_write_file(file);
195 	return ret;
196 }
197 
198 static int bch2_reinherit_attrs_fn(struct btree_trans *trans,
199 				   struct bch_inode_info *inode,
200 				   struct bch_inode_unpacked *bi,
201 				   void *p)
202 {
203 	struct bch_inode_info *dir = p;
204 
205 	return !bch2_reinherit_attrs(bi, &dir->ei_inode);
206 }
207 
208 static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
209 				    struct file *file,
210 				    struct bch_inode_info *src,
211 				    const char __user *name)
212 {
213 	struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode);
214 	struct bch_inode_info *dst;
215 	struct inode *vinode = NULL;
216 	char *kname = NULL;
217 	struct qstr qstr;
218 	int ret = 0;
219 	subvol_inum inum;
220 
221 	kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
222 	if (!kname)
223 		return -ENOMEM;
224 
225 	ret = strncpy_from_user(kname, name, BCH_NAME_MAX);
226 	if (unlikely(ret < 0))
227 		goto err1;
228 
229 	qstr.len	= ret;
230 	qstr.name	= kname;
231 
232 	ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum);
233 	if (ret)
234 		goto err1;
235 
236 	vinode = bch2_vfs_inode_get(c, inum);
237 	ret = PTR_ERR_OR_ZERO(vinode);
238 	if (ret)
239 		goto err1;
240 
241 	dst = to_bch_ei(vinode);
242 
243 	ret = mnt_want_write_file(file);
244 	if (ret)
245 		goto err2;
246 
247 	bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
248 
249 	if (inode_attr_changing(src, dst, Inode_opt_project)) {
250 		ret = bch2_fs_quota_transfer(c, dst,
251 					     src->ei_qid,
252 					     1 << QTYP_PRJ,
253 					     KEY_TYPE_QUOTA_PREALLOC);
254 		if (ret)
255 			goto err3;
256 	}
257 
258 	ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
259 err3:
260 	bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
261 
262 	/* return true if we did work */
263 	if (ret >= 0)
264 		ret = !ret;
265 
266 	mnt_drop_write_file(file);
267 err2:
268 	iput(vinode);
269 err1:
270 	kfree(kname);
271 
272 	return ret;
273 }
274 
275 static int bch2_ioc_getversion(struct bch_inode_info *inode, u32 __user *arg)
276 {
277 	return put_user(inode->v.i_generation, arg);
278 }
279 
280 static int bch2_ioc_getlabel(struct bch_fs *c, char __user *user_label)
281 {
282 	int ret;
283 	size_t len;
284 	char label[BCH_SB_LABEL_SIZE];
285 
286 	BUILD_BUG_ON(BCH_SB_LABEL_SIZE >= FSLABEL_MAX);
287 
288 	mutex_lock(&c->sb_lock);
289 	memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
290 	mutex_unlock(&c->sb_lock);
291 
292 	len = strnlen(label, BCH_SB_LABEL_SIZE);
293 	if (len == BCH_SB_LABEL_SIZE) {
294 		bch_warn(c,
295 			"label is too long, return the first %zu bytes",
296 			--len);
297 	}
298 
299 	ret = copy_to_user(user_label, label, len);
300 
301 	return ret ? -EFAULT : 0;
302 }
303 
304 static int bch2_ioc_setlabel(struct bch_fs *c,
305 			     struct file *file,
306 			     struct bch_inode_info *inode,
307 			     const char __user *user_label)
308 {
309 	int ret;
310 	char label[BCH_SB_LABEL_SIZE];
311 
312 	if (!capable(CAP_SYS_ADMIN))
313 		return -EPERM;
314 
315 	if (copy_from_user(label, user_label, sizeof(label)))
316 		return -EFAULT;
317 
318 	if (strnlen(label, BCH_SB_LABEL_SIZE) == BCH_SB_LABEL_SIZE) {
319 		bch_err(c,
320 			"unable to set label with more than %d bytes",
321 			BCH_SB_LABEL_SIZE - 1);
322 		return -EINVAL;
323 	}
324 
325 	ret = mnt_want_write_file(file);
326 	if (ret)
327 		return ret;
328 
329 	mutex_lock(&c->sb_lock);
330 	strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE);
331 	ret = bch2_write_super(c);
332 	mutex_unlock(&c->sb_lock);
333 
334 	mnt_drop_write_file(file);
335 	return ret;
336 }
337 
338 static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
339 {
340 	u32 flags;
341 	int ret = 0;
342 
343 	if (!capable(CAP_SYS_ADMIN))
344 		return -EPERM;
345 
346 	if (get_user(flags, arg))
347 		return -EFAULT;
348 
349 	bch_notice(c, "shutdown by ioctl type %u", flags);
350 
351 	switch (flags) {
352 	case FSOP_GOING_FLAGS_DEFAULT:
353 		ret = bdev_freeze(c->vfs_sb->s_bdev);
354 		if (ret)
355 			break;
356 		bch2_journal_flush(&c->journal);
357 		bch2_fs_emergency_read_only(c);
358 		bdev_thaw(c->vfs_sb->s_bdev);
359 		break;
360 	case FSOP_GOING_FLAGS_LOGFLUSH:
361 		bch2_journal_flush(&c->journal);
362 		fallthrough;
363 	case FSOP_GOING_FLAGS_NOLOGFLUSH:
364 		bch2_fs_emergency_read_only(c);
365 		break;
366 	default:
367 		ret = -EINVAL;
368 		break;
369 	}
370 
371 	return ret;
372 }
373 
374 static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
375 					struct bch_ioctl_subvolume arg)
376 {
377 	struct inode *dir;
378 	struct bch_inode_info *inode;
379 	struct user_namespace *s_user_ns;
380 	struct dentry *dst_dentry;
381 	struct path src_path, dst_path;
382 	int how = LOOKUP_FOLLOW;
383 	int error;
384 	subvol_inum snapshot_src = { 0 };
385 	unsigned lookup_flags = 0;
386 	unsigned create_flags = BCH_CREATE_SUBVOL;
387 
388 	if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE|
389 			  BCH_SUBVOL_SNAPSHOT_RO))
390 		return -EINVAL;
391 
392 	if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
393 	    (arg.src_ptr ||
394 	     (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)))
395 		return -EINVAL;
396 
397 	if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
398 		create_flags |= BCH_CREATE_SNAPSHOT;
399 
400 	if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)
401 		create_flags |= BCH_CREATE_SNAPSHOT_RO;
402 
403 	if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) {
404 		/* sync_inodes_sb enforce s_umount is locked */
405 		down_read(&c->vfs_sb->s_umount);
406 		sync_inodes_sb(c->vfs_sb);
407 		up_read(&c->vfs_sb->s_umount);
408 	}
409 retry:
410 	if (arg.src_ptr) {
411 		error = user_path_at(arg.dirfd,
412 				(const char __user *)(unsigned long)arg.src_ptr,
413 				how, &src_path);
414 		if (error)
415 			goto err1;
416 
417 		if (src_path.dentry->d_sb->s_fs_info != c) {
418 			path_put(&src_path);
419 			error = -EXDEV;
420 			goto err1;
421 		}
422 
423 		snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode));
424 	}
425 
426 	dst_dentry = user_path_create(arg.dirfd,
427 			(const char __user *)(unsigned long)arg.dst_ptr,
428 			&dst_path, lookup_flags);
429 	error = PTR_ERR_OR_ZERO(dst_dentry);
430 	if (error)
431 		goto err2;
432 
433 	if (dst_dentry->d_sb->s_fs_info != c) {
434 		error = -EXDEV;
435 		goto err3;
436 	}
437 
438 	if (dst_dentry->d_inode) {
439 		error = -BCH_ERR_EEXIST_subvolume_create;
440 		goto err3;
441 	}
442 
443 	dir = dst_path.dentry->d_inode;
444 	if (IS_DEADDIR(dir)) {
445 		error = -BCH_ERR_ENOENT_directory_dead;
446 		goto err3;
447 	}
448 
449 	s_user_ns = dir->i_sb->s_user_ns;
450 	if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
451 	    !kgid_has_mapping(s_user_ns, current_fsgid())) {
452 		error = -EOVERFLOW;
453 		goto err3;
454 	}
455 
456 	error = inode_permission(file_mnt_idmap(filp),
457 				 dir, MAY_WRITE | MAY_EXEC);
458 	if (error)
459 		goto err3;
460 
461 	if (!IS_POSIXACL(dir))
462 		arg.mode &= ~current_umask();
463 
464 	error = security_path_mkdir(&dst_path, dst_dentry, arg.mode);
465 	if (error)
466 		goto err3;
467 
468 	if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
469 	    !arg.src_ptr)
470 		snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol;
471 
472 	down_write(&c->snapshot_create_lock);
473 	inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
474 			      dst_dentry, arg.mode|S_IFDIR,
475 			      0, snapshot_src, create_flags);
476 	up_write(&c->snapshot_create_lock);
477 
478 	error = PTR_ERR_OR_ZERO(inode);
479 	if (error)
480 		goto err3;
481 
482 	d_instantiate(dst_dentry, &inode->v);
483 	fsnotify_mkdir(dir, dst_dentry);
484 err3:
485 	done_path_create(&dst_path, dst_dentry);
486 err2:
487 	if (arg.src_ptr)
488 		path_put(&src_path);
489 
490 	if (retry_estale(error, lookup_flags)) {
491 		lookup_flags |= LOOKUP_REVAL;
492 		goto retry;
493 	}
494 err1:
495 	return error;
496 }
497 
498 static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
499 				struct bch_ioctl_subvolume arg)
500 {
501 	const char __user *name = (void __user *)(unsigned long)arg.dst_ptr;
502 	struct path path;
503 	struct inode *dir;
504 	struct dentry *victim;
505 	int ret = 0;
506 
507 	if (arg.flags)
508 		return -EINVAL;
509 
510 	victim = user_path_locked_at(arg.dirfd, name, &path);
511 	if (IS_ERR(victim))
512 		return PTR_ERR(victim);
513 
514 	dir = d_inode(path.dentry);
515 	if (victim->d_sb->s_fs_info != c) {
516 		ret = -EXDEV;
517 		goto err;
518 	}
519 	if (!d_is_positive(victim)) {
520 		ret = -ENOENT;
521 		goto err;
522 	}
523 	ret = __bch2_unlink(dir, victim, true);
524 	if (!ret) {
525 		fsnotify_rmdir(dir, victim);
526 		d_delete(victim);
527 	}
528 err:
529 	inode_unlock(dir);
530 	dput(victim);
531 	path_put(&path);
532 	return ret;
533 }
534 
535 long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
536 {
537 	struct bch_inode_info *inode = file_bch_inode(file);
538 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
539 	long ret;
540 
541 	switch (cmd) {
542 	case FS_IOC_GETFLAGS:
543 		ret = bch2_ioc_getflags(inode, (int __user *) arg);
544 		break;
545 
546 	case FS_IOC_SETFLAGS:
547 		ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
548 		break;
549 
550 	case FS_IOC_FSGETXATTR:
551 		ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
552 		break;
553 
554 	case FS_IOC_FSSETXATTR:
555 		ret = bch2_ioc_fssetxattr(c, file, inode,
556 					  (void __user *) arg);
557 		break;
558 
559 	case BCHFS_IOC_REINHERIT_ATTRS:
560 		ret = bch2_ioc_reinherit_attrs(c, file, inode,
561 					       (void __user *) arg);
562 		break;
563 
564 	case FS_IOC_GETVERSION:
565 		ret = bch2_ioc_getversion(inode, (u32 __user *) arg);
566 		break;
567 
568 	case FS_IOC_SETVERSION:
569 		ret = -ENOTTY;
570 		break;
571 
572 	case FS_IOC_GETFSLABEL:
573 		ret = bch2_ioc_getlabel(c, (void __user *) arg);
574 		break;
575 
576 	case FS_IOC_SETFSLABEL:
577 		ret = bch2_ioc_setlabel(c, file, inode, (const void __user *) arg);
578 		break;
579 
580 	case FS_IOC_GOINGDOWN:
581 		ret = bch2_ioc_goingdown(c, (u32 __user *) arg);
582 		break;
583 
584 	case BCH_IOCTL_SUBVOLUME_CREATE: {
585 		struct bch_ioctl_subvolume i;
586 
587 		ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
588 			? -EFAULT
589 			: bch2_ioctl_subvolume_create(c, file, i);
590 		break;
591 	}
592 
593 	case BCH_IOCTL_SUBVOLUME_DESTROY: {
594 		struct bch_ioctl_subvolume i;
595 
596 		ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
597 			? -EFAULT
598 			: bch2_ioctl_subvolume_destroy(c, file, i);
599 		break;
600 	}
601 
602 	default:
603 		ret = bch2_fs_ioctl(c, cmd, (void __user *) arg);
604 		break;
605 	}
606 
607 	return bch2_err_class(ret);
608 }
609 
610 #ifdef CONFIG_COMPAT
611 long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
612 {
613 	/* These are just misnamed, they actually get/put from/to user an int */
614 	switch (cmd) {
615 	case FS_IOC32_GETFLAGS:
616 		cmd = FS_IOC_GETFLAGS;
617 		break;
618 	case FS_IOC32_SETFLAGS:
619 		cmd = FS_IOC_SETFLAGS;
620 		break;
621 	case FS_IOC32_GETVERSION:
622 		cmd = FS_IOC_GETVERSION;
623 		break;
624 	case FS_IOC_GETFSLABEL:
625 	case FS_IOC_SETFSLABEL:
626 		break;
627 	default:
628 		return -ENOIOCTLCMD;
629 	}
630 	return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
631 }
632 #endif
633 
634 #endif /* NO_BCACHEFS_FS */
635