1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/bio.h> 8 #include <linux/file.h> 9 #include <linux/fs.h> 10 #include <linux/fsnotify.h> 11 #include <linux/pagemap.h> 12 #include <linux/highmem.h> 13 #include <linux/time.h> 14 #include <linux/string.h> 15 #include <linux/backing-dev.h> 16 #include <linux/mount.h> 17 #include <linux/namei.h> 18 #include <linux/writeback.h> 19 #include <linux/compat.h> 20 #include <linux/security.h> 21 #include <linux/xattr.h> 22 #include <linux/mm.h> 23 #include <linux/slab.h> 24 #include <linux/blkdev.h> 25 #include <linux/uuid.h> 26 #include <linux/btrfs.h> 27 #include <linux/uaccess.h> 28 #include <linux/iversion.h> 29 #include <linux/fileattr.h> 30 #include "ctree.h" 31 #include "disk-io.h" 32 #include "export.h" 33 #include "transaction.h" 34 #include "btrfs_inode.h" 35 #include "print-tree.h" 36 #include "volumes.h" 37 #include "locking.h" 38 #include "backref.h" 39 #include "rcu-string.h" 40 #include "send.h" 41 #include "dev-replace.h" 42 #include "props.h" 43 #include "sysfs.h" 44 #include "qgroup.h" 45 #include "tree-log.h" 46 #include "compression.h" 47 #include "space-info.h" 48 #include "delalloc-space.h" 49 #include "block-group.h" 50 51 #ifdef CONFIG_64BIT 52 /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI 53 * structures are incorrect, as the timespec structure from userspace 54 * is 4 bytes too small. We define these alternatives here to teach 55 * the kernel about the 32-bit struct packing. 56 */ 57 struct btrfs_ioctl_timespec_32 { 58 __u64 sec; 59 __u32 nsec; 60 } __attribute__ ((__packed__)); 61 62 struct btrfs_ioctl_received_subvol_args_32 { 63 char uuid[BTRFS_UUID_SIZE]; /* in */ 64 __u64 stransid; /* in */ 65 __u64 rtransid; /* out */ 66 struct btrfs_ioctl_timespec_32 stime; /* in */ 67 struct btrfs_ioctl_timespec_32 rtime; /* out */ 68 __u64 flags; /* in */ 69 __u64 reserved[16]; /* in */ 70 } __attribute__ ((__packed__)); 71 72 #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ 73 struct btrfs_ioctl_received_subvol_args_32) 74 #endif 75 76 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 77 struct btrfs_ioctl_send_args_32 { 78 __s64 send_fd; /* in */ 79 __u64 clone_sources_count; /* in */ 80 compat_uptr_t clone_sources; /* in */ 81 __u64 parent_root; /* in */ 82 __u64 flags; /* in */ 83 __u64 reserved[4]; /* in */ 84 } __attribute__ ((__packed__)); 85 86 #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \ 87 struct btrfs_ioctl_send_args_32) 88 #endif 89 90 /* Mask out flags that are inappropriate for the given type of inode. */ 91 static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode, 92 unsigned int flags) 93 { 94 if (S_ISDIR(inode->i_mode)) 95 return flags; 96 else if (S_ISREG(inode->i_mode)) 97 return flags & ~FS_DIRSYNC_FL; 98 else 99 return flags & (FS_NODUMP_FL | FS_NOATIME_FL); 100 } 101 102 /* 103 * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS 104 * ioctl. 105 */ 106 static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags) 107 { 108 unsigned int iflags = 0; 109 110 if (flags & BTRFS_INODE_SYNC) 111 iflags |= FS_SYNC_FL; 112 if (flags & BTRFS_INODE_IMMUTABLE) 113 iflags |= FS_IMMUTABLE_FL; 114 if (flags & BTRFS_INODE_APPEND) 115 iflags |= FS_APPEND_FL; 116 if (flags & BTRFS_INODE_NODUMP) 117 iflags |= FS_NODUMP_FL; 118 if (flags & BTRFS_INODE_NOATIME) 119 iflags |= FS_NOATIME_FL; 120 if (flags & BTRFS_INODE_DIRSYNC) 121 iflags |= FS_DIRSYNC_FL; 122 if (flags & BTRFS_INODE_NODATACOW) 123 iflags |= FS_NOCOW_FL; 124 125 if (flags & BTRFS_INODE_NOCOMPRESS) 126 iflags |= FS_NOCOMP_FL; 127 else if (flags & BTRFS_INODE_COMPRESS) 128 iflags |= FS_COMPR_FL; 129 130 return iflags; 131 } 132 133 /* 134 * Update inode->i_flags based on the btrfs internal flags. 135 */ 136 void btrfs_sync_inode_flags_to_i_flags(struct inode *inode) 137 { 138 struct btrfs_inode *binode = BTRFS_I(inode); 139 unsigned int new_fl = 0; 140 141 if (binode->flags & BTRFS_INODE_SYNC) 142 new_fl |= S_SYNC; 143 if (binode->flags & BTRFS_INODE_IMMUTABLE) 144 new_fl |= S_IMMUTABLE; 145 if (binode->flags & BTRFS_INODE_APPEND) 146 new_fl |= S_APPEND; 147 if (binode->flags & BTRFS_INODE_NOATIME) 148 new_fl |= S_NOATIME; 149 if (binode->flags & BTRFS_INODE_DIRSYNC) 150 new_fl |= S_DIRSYNC; 151 152 set_mask_bits(&inode->i_flags, 153 S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC, 154 new_fl); 155 } 156 157 /* 158 * Check if @flags are a supported and valid set of FS_*_FL flags and that 159 * the old and new flags are not conflicting 160 */ 161 static int check_fsflags(unsigned int old_flags, unsigned int flags) 162 { 163 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 164 FS_NOATIME_FL | FS_NODUMP_FL | \ 165 FS_SYNC_FL | FS_DIRSYNC_FL | \ 166 FS_NOCOMP_FL | FS_COMPR_FL | 167 FS_NOCOW_FL)) 168 return -EOPNOTSUPP; 169 170 /* COMPR and NOCOMP on new/old are valid */ 171 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 172 return -EINVAL; 173 174 if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL)) 175 return -EINVAL; 176 177 /* NOCOW and compression options are mutually exclusive */ 178 if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL))) 179 return -EINVAL; 180 if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL))) 181 return -EINVAL; 182 183 return 0; 184 } 185 186 static int check_fsflags_compatible(struct btrfs_fs_info *fs_info, 187 unsigned int flags) 188 { 189 if (btrfs_is_zoned(fs_info) && (flags & FS_NOCOW_FL)) 190 return -EPERM; 191 192 return 0; 193 } 194 195 /* 196 * Set flags/xflags from the internal inode flags. The remaining items of 197 * fsxattr are zeroed. 198 */ 199 int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) 200 { 201 struct btrfs_inode *binode = BTRFS_I(d_inode(dentry)); 202 203 fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode->flags)); 204 return 0; 205 } 206 207 int btrfs_fileattr_set(struct user_namespace *mnt_userns, 208 struct dentry *dentry, struct fileattr *fa) 209 { 210 struct inode *inode = d_inode(dentry); 211 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 212 struct btrfs_inode *binode = BTRFS_I(inode); 213 struct btrfs_root *root = binode->root; 214 struct btrfs_trans_handle *trans; 215 unsigned int fsflags, old_fsflags; 216 int ret; 217 const char *comp = NULL; 218 u32 binode_flags; 219 220 if (btrfs_root_readonly(root)) 221 return -EROFS; 222 223 if (fileattr_has_fsx(fa)) 224 return -EOPNOTSUPP; 225 226 fsflags = btrfs_mask_fsflags_for_type(inode, fa->flags); 227 old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); 228 ret = check_fsflags(old_fsflags, fsflags); 229 if (ret) 230 return ret; 231 232 ret = check_fsflags_compatible(fs_info, fsflags); 233 if (ret) 234 return ret; 235 236 binode_flags = binode->flags; 237 if (fsflags & FS_SYNC_FL) 238 binode_flags |= BTRFS_INODE_SYNC; 239 else 240 binode_flags &= ~BTRFS_INODE_SYNC; 241 if (fsflags & FS_IMMUTABLE_FL) 242 binode_flags |= BTRFS_INODE_IMMUTABLE; 243 else 244 binode_flags &= ~BTRFS_INODE_IMMUTABLE; 245 if (fsflags & FS_APPEND_FL) 246 binode_flags |= BTRFS_INODE_APPEND; 247 else 248 binode_flags &= ~BTRFS_INODE_APPEND; 249 if (fsflags & FS_NODUMP_FL) 250 binode_flags |= BTRFS_INODE_NODUMP; 251 else 252 binode_flags &= ~BTRFS_INODE_NODUMP; 253 if (fsflags & FS_NOATIME_FL) 254 binode_flags |= BTRFS_INODE_NOATIME; 255 else 256 binode_flags &= ~BTRFS_INODE_NOATIME; 257 258 /* If coming from FS_IOC_FSSETXATTR then skip unconverted flags */ 259 if (!fa->flags_valid) { 260 /* 1 item for the inode */ 261 trans = btrfs_start_transaction(root, 1); 262 if (IS_ERR(trans)) 263 return PTR_ERR(trans); 264 goto update_flags; 265 } 266 267 if (fsflags & FS_DIRSYNC_FL) 268 binode_flags |= BTRFS_INODE_DIRSYNC; 269 else 270 binode_flags &= ~BTRFS_INODE_DIRSYNC; 271 if (fsflags & FS_NOCOW_FL) { 272 if (S_ISREG(inode->i_mode)) { 273 /* 274 * It's safe to turn csums off here, no extents exist. 275 * Otherwise we want the flag to reflect the real COW 276 * status of the file and will not set it. 277 */ 278 if (inode->i_size == 0) 279 binode_flags |= BTRFS_INODE_NODATACOW | 280 BTRFS_INODE_NODATASUM; 281 } else { 282 binode_flags |= BTRFS_INODE_NODATACOW; 283 } 284 } else { 285 /* 286 * Revert back under same assumptions as above 287 */ 288 if (S_ISREG(inode->i_mode)) { 289 if (inode->i_size == 0) 290 binode_flags &= ~(BTRFS_INODE_NODATACOW | 291 BTRFS_INODE_NODATASUM); 292 } else { 293 binode_flags &= ~BTRFS_INODE_NODATACOW; 294 } 295 } 296 297 /* 298 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 299 * flag may be changed automatically if compression code won't make 300 * things smaller. 301 */ 302 if (fsflags & FS_NOCOMP_FL) { 303 binode_flags &= ~BTRFS_INODE_COMPRESS; 304 binode_flags |= BTRFS_INODE_NOCOMPRESS; 305 } else if (fsflags & FS_COMPR_FL) { 306 307 if (IS_SWAPFILE(inode)) 308 return -ETXTBSY; 309 310 binode_flags |= BTRFS_INODE_COMPRESS; 311 binode_flags &= ~BTRFS_INODE_NOCOMPRESS; 312 313 comp = btrfs_compress_type2str(fs_info->compress_type); 314 if (!comp || comp[0] == 0) 315 comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); 316 } else { 317 binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 318 } 319 320 /* 321 * 1 for inode item 322 * 2 for properties 323 */ 324 trans = btrfs_start_transaction(root, 3); 325 if (IS_ERR(trans)) 326 return PTR_ERR(trans); 327 328 if (comp) { 329 ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp, 330 strlen(comp), 0); 331 if (ret) { 332 btrfs_abort_transaction(trans, ret); 333 goto out_end_trans; 334 } 335 } else { 336 ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 337 0, 0); 338 if (ret && ret != -ENODATA) { 339 btrfs_abort_transaction(trans, ret); 340 goto out_end_trans; 341 } 342 } 343 344 update_flags: 345 binode->flags = binode_flags; 346 btrfs_sync_inode_flags_to_i_flags(inode); 347 inode_inc_iversion(inode); 348 inode->i_ctime = current_time(inode); 349 ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); 350 351 out_end_trans: 352 btrfs_end_transaction(trans); 353 return ret; 354 } 355 356 /* 357 * Start exclusive operation @type, return true on success 358 */ 359 bool btrfs_exclop_start(struct btrfs_fs_info *fs_info, 360 enum btrfs_exclusive_operation type) 361 { 362 bool ret = false; 363 364 spin_lock(&fs_info->super_lock); 365 if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) { 366 fs_info->exclusive_operation = type; 367 ret = true; 368 } 369 spin_unlock(&fs_info->super_lock); 370 371 return ret; 372 } 373 374 /* 375 * Conditionally allow to enter the exclusive operation in case it's compatible 376 * with the running one. This must be paired with btrfs_exclop_start_unlock and 377 * btrfs_exclop_finish. 378 * 379 * Compatibility: 380 * - the same type is already running 381 * - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller 382 * must check the condition first that would allow none -> @type 383 */ 384 bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info, 385 enum btrfs_exclusive_operation type) 386 { 387 spin_lock(&fs_info->super_lock); 388 if (fs_info->exclusive_operation == type) 389 return true; 390 391 spin_unlock(&fs_info->super_lock); 392 return false; 393 } 394 395 void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info) 396 { 397 spin_unlock(&fs_info->super_lock); 398 } 399 400 void btrfs_exclop_finish(struct btrfs_fs_info *fs_info) 401 { 402 spin_lock(&fs_info->super_lock); 403 WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE); 404 spin_unlock(&fs_info->super_lock); 405 sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation"); 406 } 407 408 static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 409 { 410 struct inode *inode = file_inode(file); 411 412 return put_user(inode->i_generation, arg); 413 } 414 415 static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info, 416 void __user *arg) 417 { 418 struct btrfs_device *device; 419 struct request_queue *q; 420 struct fstrim_range range; 421 u64 minlen = ULLONG_MAX; 422 u64 num_devices = 0; 423 int ret; 424 425 if (!capable(CAP_SYS_ADMIN)) 426 return -EPERM; 427 428 /* 429 * btrfs_trim_block_group() depends on space cache, which is not 430 * available in zoned filesystem. So, disallow fitrim on a zoned 431 * filesystem for now. 432 */ 433 if (btrfs_is_zoned(fs_info)) 434 return -EOPNOTSUPP; 435 436 /* 437 * If the fs is mounted with nologreplay, which requires it to be 438 * mounted in RO mode as well, we can not allow discard on free space 439 * inside block groups, because log trees refer to extents that are not 440 * pinned in a block group's free space cache (pinning the extents is 441 * precisely the first phase of replaying a log tree). 442 */ 443 if (btrfs_test_opt(fs_info, NOLOGREPLAY)) 444 return -EROFS; 445 446 rcu_read_lock(); 447 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, 448 dev_list) { 449 if (!device->bdev) 450 continue; 451 q = bdev_get_queue(device->bdev); 452 if (blk_queue_discard(q)) { 453 num_devices++; 454 minlen = min_t(u64, q->limits.discard_granularity, 455 minlen); 456 } 457 } 458 rcu_read_unlock(); 459 460 if (!num_devices) 461 return -EOPNOTSUPP; 462 if (copy_from_user(&range, arg, sizeof(range))) 463 return -EFAULT; 464 465 /* 466 * NOTE: Don't truncate the range using super->total_bytes. Bytenr of 467 * block group is in the logical address space, which can be any 468 * sectorsize aligned bytenr in the range [0, U64_MAX]. 469 */ 470 if (range.len < fs_info->sb->s_blocksize) 471 return -EINVAL; 472 473 range.minlen = max(range.minlen, minlen); 474 ret = btrfs_trim_fs(fs_info, &range); 475 if (ret < 0) 476 return ret; 477 478 if (copy_to_user(arg, &range, sizeof(range))) 479 return -EFAULT; 480 481 return 0; 482 } 483 484 int __pure btrfs_is_empty_uuid(u8 *uuid) 485 { 486 int i; 487 488 for (i = 0; i < BTRFS_UUID_SIZE; i++) { 489 if (uuid[i]) 490 return 0; 491 } 492 return 1; 493 } 494 495 static noinline int create_subvol(struct inode *dir, 496 struct dentry *dentry, 497 const char *name, int namelen, 498 struct btrfs_qgroup_inherit *inherit) 499 { 500 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 501 struct btrfs_trans_handle *trans; 502 struct btrfs_key key; 503 struct btrfs_root_item *root_item; 504 struct btrfs_inode_item *inode_item; 505 struct extent_buffer *leaf; 506 struct btrfs_root *root = BTRFS_I(dir)->root; 507 struct btrfs_root *new_root; 508 struct btrfs_block_rsv block_rsv; 509 struct timespec64 cur_time = current_time(dir); 510 struct inode *inode; 511 int ret; 512 int err; 513 dev_t anon_dev = 0; 514 u64 objectid; 515 u64 index = 0; 516 517 root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); 518 if (!root_item) 519 return -ENOMEM; 520 521 ret = btrfs_get_free_objectid(fs_info->tree_root, &objectid); 522 if (ret) 523 goto fail_free; 524 525 ret = get_anon_bdev(&anon_dev); 526 if (ret < 0) 527 goto fail_free; 528 529 /* 530 * Don't create subvolume whose level is not zero. Or qgroup will be 531 * screwed up since it assumes subvolume qgroup's level to be 0. 532 */ 533 if (btrfs_qgroup_level(objectid)) { 534 ret = -ENOSPC; 535 goto fail_free; 536 } 537 538 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 539 /* 540 * The same as the snapshot creation, please see the comment 541 * of create_snapshot(). 542 */ 543 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false); 544 if (ret) 545 goto fail_free; 546 547 trans = btrfs_start_transaction(root, 0); 548 if (IS_ERR(trans)) { 549 ret = PTR_ERR(trans); 550 btrfs_subvolume_release_metadata(root, &block_rsv); 551 goto fail_free; 552 } 553 trans->block_rsv = &block_rsv; 554 trans->bytes_reserved = block_rsv.size; 555 556 ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit); 557 if (ret) 558 goto fail; 559 560 leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0, 561 BTRFS_NESTING_NORMAL); 562 if (IS_ERR(leaf)) { 563 ret = PTR_ERR(leaf); 564 goto fail; 565 } 566 567 btrfs_mark_buffer_dirty(leaf); 568 569 inode_item = &root_item->inode; 570 btrfs_set_stack_inode_generation(inode_item, 1); 571 btrfs_set_stack_inode_size(inode_item, 3); 572 btrfs_set_stack_inode_nlink(inode_item, 1); 573 btrfs_set_stack_inode_nbytes(inode_item, 574 fs_info->nodesize); 575 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); 576 577 btrfs_set_root_flags(root_item, 0); 578 btrfs_set_root_limit(root_item, 0); 579 btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); 580 581 btrfs_set_root_bytenr(root_item, leaf->start); 582 btrfs_set_root_generation(root_item, trans->transid); 583 btrfs_set_root_level(root_item, 0); 584 btrfs_set_root_refs(root_item, 1); 585 btrfs_set_root_used(root_item, leaf->len); 586 btrfs_set_root_last_snapshot(root_item, 0); 587 588 btrfs_set_root_generation_v2(root_item, 589 btrfs_root_generation(root_item)); 590 generate_random_guid(root_item->uuid); 591 btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec); 592 btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec); 593 root_item->ctime = root_item->otime; 594 btrfs_set_root_ctransid(root_item, trans->transid); 595 btrfs_set_root_otransid(root_item, trans->transid); 596 597 btrfs_tree_unlock(leaf); 598 599 btrfs_set_root_dirid(root_item, BTRFS_FIRST_FREE_OBJECTID); 600 601 key.objectid = objectid; 602 key.offset = 0; 603 key.type = BTRFS_ROOT_ITEM_KEY; 604 ret = btrfs_insert_root(trans, fs_info->tree_root, &key, 605 root_item); 606 if (ret) { 607 /* 608 * Since we don't abort the transaction in this case, free the 609 * tree block so that we don't leak space and leave the 610 * filesystem in an inconsistent state (an extent item in the 611 * extent tree without backreferences). Also no need to have 612 * the tree block locked since it is not in any tree at this 613 * point, so no other task can find it and use it. 614 */ 615 btrfs_free_tree_block(trans, root, leaf, 0, 1); 616 free_extent_buffer(leaf); 617 goto fail; 618 } 619 620 free_extent_buffer(leaf); 621 leaf = NULL; 622 623 key.offset = (u64)-1; 624 new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); 625 if (IS_ERR(new_root)) { 626 free_anon_bdev(anon_dev); 627 ret = PTR_ERR(new_root); 628 btrfs_abort_transaction(trans, ret); 629 goto fail; 630 } 631 /* Freeing will be done in btrfs_put_root() of new_root */ 632 anon_dev = 0; 633 634 ret = btrfs_record_root_in_trans(trans, new_root); 635 if (ret) { 636 btrfs_put_root(new_root); 637 btrfs_abort_transaction(trans, ret); 638 goto fail; 639 } 640 641 ret = btrfs_create_subvol_root(trans, new_root, root); 642 btrfs_put_root(new_root); 643 if (ret) { 644 /* We potentially lose an unused inode item here */ 645 btrfs_abort_transaction(trans, ret); 646 goto fail; 647 } 648 649 /* 650 * insert the directory item 651 */ 652 ret = btrfs_set_inode_index(BTRFS_I(dir), &index); 653 if (ret) { 654 btrfs_abort_transaction(trans, ret); 655 goto fail; 656 } 657 658 ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key, 659 BTRFS_FT_DIR, index); 660 if (ret) { 661 btrfs_abort_transaction(trans, ret); 662 goto fail; 663 } 664 665 btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2); 666 ret = btrfs_update_inode(trans, root, BTRFS_I(dir)); 667 if (ret) { 668 btrfs_abort_transaction(trans, ret); 669 goto fail; 670 } 671 672 ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid, 673 btrfs_ino(BTRFS_I(dir)), index, name, namelen); 674 if (ret) { 675 btrfs_abort_transaction(trans, ret); 676 goto fail; 677 } 678 679 ret = btrfs_uuid_tree_add(trans, root_item->uuid, 680 BTRFS_UUID_KEY_SUBVOL, objectid); 681 if (ret) 682 btrfs_abort_transaction(trans, ret); 683 684 fail: 685 kfree(root_item); 686 trans->block_rsv = NULL; 687 trans->bytes_reserved = 0; 688 btrfs_subvolume_release_metadata(root, &block_rsv); 689 690 err = btrfs_commit_transaction(trans); 691 if (err && !ret) 692 ret = err; 693 694 if (!ret) { 695 inode = btrfs_lookup_dentry(dir, dentry); 696 if (IS_ERR(inode)) 697 return PTR_ERR(inode); 698 d_instantiate(dentry, inode); 699 } 700 return ret; 701 702 fail_free: 703 if (anon_dev) 704 free_anon_bdev(anon_dev); 705 kfree(root_item); 706 return ret; 707 } 708 709 static int create_snapshot(struct btrfs_root *root, struct inode *dir, 710 struct dentry *dentry, bool readonly, 711 struct btrfs_qgroup_inherit *inherit) 712 { 713 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 714 struct inode *inode; 715 struct btrfs_pending_snapshot *pending_snapshot; 716 struct btrfs_trans_handle *trans; 717 int ret; 718 719 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) 720 return -EINVAL; 721 722 if (atomic_read(&root->nr_swapfiles)) { 723 btrfs_warn(fs_info, 724 "cannot snapshot subvolume with active swapfile"); 725 return -ETXTBSY; 726 } 727 728 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL); 729 if (!pending_snapshot) 730 return -ENOMEM; 731 732 ret = get_anon_bdev(&pending_snapshot->anon_dev); 733 if (ret < 0) 734 goto free_pending; 735 pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), 736 GFP_KERNEL); 737 pending_snapshot->path = btrfs_alloc_path(); 738 if (!pending_snapshot->root_item || !pending_snapshot->path) { 739 ret = -ENOMEM; 740 goto free_pending; 741 } 742 743 btrfs_init_block_rsv(&pending_snapshot->block_rsv, 744 BTRFS_BLOCK_RSV_TEMP); 745 /* 746 * 1 - parent dir inode 747 * 2 - dir entries 748 * 1 - root item 749 * 2 - root ref/backref 750 * 1 - root of snapshot 751 * 1 - UUID item 752 */ 753 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, 754 &pending_snapshot->block_rsv, 8, 755 false); 756 if (ret) 757 goto free_pending; 758 759 pending_snapshot->dentry = dentry; 760 pending_snapshot->root = root; 761 pending_snapshot->readonly = readonly; 762 pending_snapshot->dir = dir; 763 pending_snapshot->inherit = inherit; 764 765 trans = btrfs_start_transaction(root, 0); 766 if (IS_ERR(trans)) { 767 ret = PTR_ERR(trans); 768 goto fail; 769 } 770 771 spin_lock(&fs_info->trans_lock); 772 list_add(&pending_snapshot->list, 773 &trans->transaction->pending_snapshots); 774 spin_unlock(&fs_info->trans_lock); 775 776 ret = btrfs_commit_transaction(trans); 777 if (ret) 778 goto fail; 779 780 ret = pending_snapshot->error; 781 if (ret) 782 goto fail; 783 784 ret = btrfs_orphan_cleanup(pending_snapshot->snap); 785 if (ret) 786 goto fail; 787 788 inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry); 789 if (IS_ERR(inode)) { 790 ret = PTR_ERR(inode); 791 goto fail; 792 } 793 794 d_instantiate(dentry, inode); 795 ret = 0; 796 pending_snapshot->anon_dev = 0; 797 fail: 798 /* Prevent double freeing of anon_dev */ 799 if (ret && pending_snapshot->snap) 800 pending_snapshot->snap->anon_dev = 0; 801 btrfs_put_root(pending_snapshot->snap); 802 btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv); 803 free_pending: 804 if (pending_snapshot->anon_dev) 805 free_anon_bdev(pending_snapshot->anon_dev); 806 kfree(pending_snapshot->root_item); 807 btrfs_free_path(pending_snapshot->path); 808 kfree(pending_snapshot); 809 810 return ret; 811 } 812 813 /* copy of may_delete in fs/namei.c() 814 * Check whether we can remove a link victim from directory dir, check 815 * whether the type of victim is right. 816 * 1. We can't do it if dir is read-only (done in permission()) 817 * 2. We should have write and exec permissions on dir 818 * 3. We can't remove anything from append-only dir 819 * 4. We can't do anything with immutable dir (done in permission()) 820 * 5. If the sticky bit on dir is set we should either 821 * a. be owner of dir, or 822 * b. be owner of victim, or 823 * c. have CAP_FOWNER capability 824 * 6. If the victim is append-only or immutable we can't do anything with 825 * links pointing to it. 826 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 827 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 828 * 9. We can't remove a root or mountpoint. 829 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 830 * nfs_async_unlink(). 831 */ 832 833 static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) 834 { 835 int error; 836 837 if (d_really_is_negative(victim)) 838 return -ENOENT; 839 840 BUG_ON(d_inode(victim->d_parent) != dir); 841 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); 842 843 error = inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC); 844 if (error) 845 return error; 846 if (IS_APPEND(dir)) 847 return -EPERM; 848 if (check_sticky(&init_user_ns, dir, d_inode(victim)) || 849 IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) || 850 IS_SWAPFILE(d_inode(victim))) 851 return -EPERM; 852 if (isdir) { 853 if (!d_is_dir(victim)) 854 return -ENOTDIR; 855 if (IS_ROOT(victim)) 856 return -EBUSY; 857 } else if (d_is_dir(victim)) 858 return -EISDIR; 859 if (IS_DEADDIR(dir)) 860 return -ENOENT; 861 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 862 return -EBUSY; 863 return 0; 864 } 865 866 /* copy of may_create in fs/namei.c() */ 867 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 868 { 869 if (d_really_is_positive(child)) 870 return -EEXIST; 871 if (IS_DEADDIR(dir)) 872 return -ENOENT; 873 return inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC); 874 } 875 876 /* 877 * Create a new subvolume below @parent. This is largely modeled after 878 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 879 * inside this filesystem so it's quite a bit simpler. 880 */ 881 static noinline int btrfs_mksubvol(const struct path *parent, 882 const char *name, int namelen, 883 struct btrfs_root *snap_src, 884 bool readonly, 885 struct btrfs_qgroup_inherit *inherit) 886 { 887 struct inode *dir = d_inode(parent->dentry); 888 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 889 struct dentry *dentry; 890 int error; 891 892 error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); 893 if (error == -EINTR) 894 return error; 895 896 dentry = lookup_one_len(name, parent->dentry, namelen); 897 error = PTR_ERR(dentry); 898 if (IS_ERR(dentry)) 899 goto out_unlock; 900 901 error = btrfs_may_create(dir, dentry); 902 if (error) 903 goto out_dput; 904 905 /* 906 * even if this name doesn't exist, we may get hash collisions. 907 * check for them now when we can safely fail 908 */ 909 error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, 910 dir->i_ino, name, 911 namelen); 912 if (error) 913 goto out_dput; 914 915 down_read(&fs_info->subvol_sem); 916 917 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) 918 goto out_up_read; 919 920 if (snap_src) 921 error = create_snapshot(snap_src, dir, dentry, readonly, inherit); 922 else 923 error = create_subvol(dir, dentry, name, namelen, inherit); 924 925 if (!error) 926 fsnotify_mkdir(dir, dentry); 927 out_up_read: 928 up_read(&fs_info->subvol_sem); 929 out_dput: 930 dput(dentry); 931 out_unlock: 932 btrfs_inode_unlock(dir, 0); 933 return error; 934 } 935 936 static noinline int btrfs_mksnapshot(const struct path *parent, 937 const char *name, int namelen, 938 struct btrfs_root *root, 939 bool readonly, 940 struct btrfs_qgroup_inherit *inherit) 941 { 942 int ret; 943 bool snapshot_force_cow = false; 944 945 /* 946 * Force new buffered writes to reserve space even when NOCOW is 947 * possible. This is to avoid later writeback (running dealloc) to 948 * fallback to COW mode and unexpectedly fail with ENOSPC. 949 */ 950 btrfs_drew_read_lock(&root->snapshot_lock); 951 952 ret = btrfs_start_delalloc_snapshot(root, false); 953 if (ret) 954 goto out; 955 956 /* 957 * All previous writes have started writeback in NOCOW mode, so now 958 * we force future writes to fallback to COW mode during snapshot 959 * creation. 960 */ 961 atomic_inc(&root->snapshot_force_cow); 962 snapshot_force_cow = true; 963 964 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); 965 966 ret = btrfs_mksubvol(parent, name, namelen, 967 root, readonly, inherit); 968 out: 969 if (snapshot_force_cow) 970 atomic_dec(&root->snapshot_force_cow); 971 btrfs_drew_read_unlock(&root->snapshot_lock); 972 return ret; 973 } 974 975 /* 976 * When we're defragging a range, we don't want to kick it off again 977 * if it is really just waiting for delalloc to send it down. 978 * If we find a nice big extent or delalloc range for the bytes in the 979 * file you want to defrag, we return 0 to let you know to skip this 980 * part of the file 981 */ 982 static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh) 983 { 984 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 985 struct extent_map *em = NULL; 986 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 987 u64 end; 988 989 read_lock(&em_tree->lock); 990 em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE); 991 read_unlock(&em_tree->lock); 992 993 if (em) { 994 end = extent_map_end(em); 995 free_extent_map(em); 996 if (end - offset > thresh) 997 return 0; 998 } 999 /* if we already have a nice delalloc here, just stop */ 1000 thresh /= 2; 1001 end = count_range_bits(io_tree, &offset, offset + thresh, 1002 thresh, EXTENT_DELALLOC, 1); 1003 if (end >= thresh) 1004 return 0; 1005 return 1; 1006 } 1007 1008 /* 1009 * helper function to walk through a file and find extents 1010 * newer than a specific transid, and smaller than thresh. 1011 * 1012 * This is used by the defragging code to find new and small 1013 * extents 1014 */ 1015 static int find_new_extents(struct btrfs_root *root, 1016 struct inode *inode, u64 newer_than, 1017 u64 *off, u32 thresh) 1018 { 1019 struct btrfs_path *path; 1020 struct btrfs_key min_key; 1021 struct extent_buffer *leaf; 1022 struct btrfs_file_extent_item *extent; 1023 int type; 1024 int ret; 1025 u64 ino = btrfs_ino(BTRFS_I(inode)); 1026 1027 path = btrfs_alloc_path(); 1028 if (!path) 1029 return -ENOMEM; 1030 1031 min_key.objectid = ino; 1032 min_key.type = BTRFS_EXTENT_DATA_KEY; 1033 min_key.offset = *off; 1034 1035 while (1) { 1036 ret = btrfs_search_forward(root, &min_key, path, newer_than); 1037 if (ret != 0) 1038 goto none; 1039 process_slot: 1040 if (min_key.objectid != ino) 1041 goto none; 1042 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 1043 goto none; 1044 1045 leaf = path->nodes[0]; 1046 extent = btrfs_item_ptr(leaf, path->slots[0], 1047 struct btrfs_file_extent_item); 1048 1049 type = btrfs_file_extent_type(leaf, extent); 1050 if (type == BTRFS_FILE_EXTENT_REG && 1051 btrfs_file_extent_num_bytes(leaf, extent) < thresh && 1052 check_defrag_in_cache(inode, min_key.offset, thresh)) { 1053 *off = min_key.offset; 1054 btrfs_free_path(path); 1055 return 0; 1056 } 1057 1058 path->slots[0]++; 1059 if (path->slots[0] < btrfs_header_nritems(leaf)) { 1060 btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); 1061 goto process_slot; 1062 } 1063 1064 if (min_key.offset == (u64)-1) 1065 goto none; 1066 1067 min_key.offset++; 1068 btrfs_release_path(path); 1069 } 1070 none: 1071 btrfs_free_path(path); 1072 return -ENOENT; 1073 } 1074 1075 static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) 1076 { 1077 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 1078 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1079 struct extent_map *em; 1080 u64 len = PAGE_SIZE; 1081 1082 /* 1083 * hopefully we have this extent in the tree already, try without 1084 * the full extent lock 1085 */ 1086 read_lock(&em_tree->lock); 1087 em = lookup_extent_mapping(em_tree, start, len); 1088 read_unlock(&em_tree->lock); 1089 1090 if (!em) { 1091 struct extent_state *cached = NULL; 1092 u64 end = start + len - 1; 1093 1094 /* get the big lock and read metadata off disk */ 1095 lock_extent_bits(io_tree, start, end, &cached); 1096 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); 1097 unlock_extent_cached(io_tree, start, end, &cached); 1098 1099 if (IS_ERR(em)) 1100 return NULL; 1101 } 1102 1103 return em; 1104 } 1105 1106 static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) 1107 { 1108 struct extent_map *next; 1109 bool ret = true; 1110 1111 /* this is the last extent */ 1112 if (em->start + em->len >= i_size_read(inode)) 1113 return false; 1114 1115 next = defrag_lookup_extent(inode, em->start + em->len); 1116 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) 1117 ret = false; 1118 else if ((em->block_start + em->block_len == next->block_start) && 1119 (em->block_len > SZ_128K && next->block_len > SZ_128K)) 1120 ret = false; 1121 1122 free_extent_map(next); 1123 return ret; 1124 } 1125 1126 static int should_defrag_range(struct inode *inode, u64 start, u32 thresh, 1127 u64 *last_len, u64 *skip, u64 *defrag_end, 1128 int compress) 1129 { 1130 struct extent_map *em; 1131 int ret = 1; 1132 bool next_mergeable = true; 1133 bool prev_mergeable = true; 1134 1135 /* 1136 * make sure that once we start defragging an extent, we keep on 1137 * defragging it 1138 */ 1139 if (start < *defrag_end) 1140 return 1; 1141 1142 *skip = 0; 1143 1144 em = defrag_lookup_extent(inode, start); 1145 if (!em) 1146 return 0; 1147 1148 /* this will cover holes, and inline extents */ 1149 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 1150 ret = 0; 1151 goto out; 1152 } 1153 1154 if (!*defrag_end) 1155 prev_mergeable = false; 1156 1157 next_mergeable = defrag_check_next_extent(inode, em); 1158 /* 1159 * we hit a real extent, if it is big or the next extent is not a 1160 * real extent, don't bother defragging it 1161 */ 1162 if (!compress && (*last_len == 0 || *last_len >= thresh) && 1163 (em->len >= thresh || (!next_mergeable && !prev_mergeable))) 1164 ret = 0; 1165 out: 1166 /* 1167 * last_len ends up being a counter of how many bytes we've defragged. 1168 * every time we choose not to defrag an extent, we reset *last_len 1169 * so that the next tiny extent will force a defrag. 1170 * 1171 * The end result of this is that tiny extents before a single big 1172 * extent will force at least part of that big extent to be defragged. 1173 */ 1174 if (ret) { 1175 *defrag_end = extent_map_end(em); 1176 } else { 1177 *last_len = 0; 1178 *skip = extent_map_end(em); 1179 *defrag_end = 0; 1180 } 1181 1182 free_extent_map(em); 1183 return ret; 1184 } 1185 1186 /* 1187 * it doesn't do much good to defrag one or two pages 1188 * at a time. This pulls in a nice chunk of pages 1189 * to COW and defrag. 1190 * 1191 * It also makes sure the delalloc code has enough 1192 * dirty data to avoid making new small extents as part 1193 * of the defrag 1194 * 1195 * It's a good idea to start RA on this range 1196 * before calling this. 1197 */ 1198 static int cluster_pages_for_defrag(struct inode *inode, 1199 struct page **pages, 1200 unsigned long start_index, 1201 unsigned long num_pages) 1202 { 1203 unsigned long file_end; 1204 u64 isize = i_size_read(inode); 1205 u64 page_start; 1206 u64 page_end; 1207 u64 page_cnt; 1208 u64 start = (u64)start_index << PAGE_SHIFT; 1209 u64 search_start; 1210 int ret; 1211 int i; 1212 int i_done; 1213 struct btrfs_ordered_extent *ordered; 1214 struct extent_state *cached_state = NULL; 1215 struct extent_io_tree *tree; 1216 struct extent_changeset *data_reserved = NULL; 1217 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1218 1219 file_end = (isize - 1) >> PAGE_SHIFT; 1220 if (!isize || start_index > file_end) 1221 return 0; 1222 1223 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); 1224 1225 ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, 1226 start, page_cnt << PAGE_SHIFT); 1227 if (ret) 1228 return ret; 1229 i_done = 0; 1230 tree = &BTRFS_I(inode)->io_tree; 1231 1232 /* step one, lock all the pages */ 1233 for (i = 0; i < page_cnt; i++) { 1234 struct page *page; 1235 again: 1236 page = find_or_create_page(inode->i_mapping, 1237 start_index + i, mask); 1238 if (!page) 1239 break; 1240 1241 ret = set_page_extent_mapped(page); 1242 if (ret < 0) { 1243 unlock_page(page); 1244 put_page(page); 1245 break; 1246 } 1247 1248 page_start = page_offset(page); 1249 page_end = page_start + PAGE_SIZE - 1; 1250 while (1) { 1251 lock_extent_bits(tree, page_start, page_end, 1252 &cached_state); 1253 ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), 1254 page_start); 1255 unlock_extent_cached(tree, page_start, page_end, 1256 &cached_state); 1257 if (!ordered) 1258 break; 1259 1260 unlock_page(page); 1261 btrfs_start_ordered_extent(ordered, 1); 1262 btrfs_put_ordered_extent(ordered); 1263 lock_page(page); 1264 /* 1265 * we unlocked the page above, so we need check if 1266 * it was released or not. 1267 */ 1268 if (page->mapping != inode->i_mapping) { 1269 unlock_page(page); 1270 put_page(page); 1271 goto again; 1272 } 1273 } 1274 1275 if (!PageUptodate(page)) { 1276 btrfs_readpage(NULL, page); 1277 lock_page(page); 1278 if (!PageUptodate(page)) { 1279 unlock_page(page); 1280 put_page(page); 1281 ret = -EIO; 1282 break; 1283 } 1284 } 1285 1286 if (page->mapping != inode->i_mapping) { 1287 unlock_page(page); 1288 put_page(page); 1289 goto again; 1290 } 1291 1292 pages[i] = page; 1293 i_done++; 1294 } 1295 if (!i_done || ret) 1296 goto out; 1297 1298 if (!(inode->i_sb->s_flags & SB_ACTIVE)) 1299 goto out; 1300 1301 /* 1302 * so now we have a nice long stream of locked 1303 * and up to date pages, lets wait on them 1304 */ 1305 for (i = 0; i < i_done; i++) 1306 wait_on_page_writeback(pages[i]); 1307 1308 page_start = page_offset(pages[0]); 1309 page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE; 1310 1311 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1312 page_start, page_end - 1, &cached_state); 1313 1314 /* 1315 * When defragmenting we skip ranges that have holes or inline extents, 1316 * (check should_defrag_range()), to avoid unnecessary IO and wasting 1317 * space. At btrfs_defrag_file(), we check if a range should be defragged 1318 * before locking the inode and then, if it should, we trigger a sync 1319 * page cache readahead - we lock the inode only after that to avoid 1320 * blocking for too long other tasks that possibly want to operate on 1321 * other file ranges. But before we were able to get the inode lock, 1322 * some other task may have punched a hole in the range, or we may have 1323 * now an inline extent, in which case we should not defrag. So check 1324 * for that here, where we have the inode and the range locked, and bail 1325 * out if that happened. 1326 */ 1327 search_start = page_start; 1328 while (search_start < page_end) { 1329 struct extent_map *em; 1330 1331 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start, 1332 page_end - search_start); 1333 if (IS_ERR(em)) { 1334 ret = PTR_ERR(em); 1335 goto out_unlock_range; 1336 } 1337 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 1338 free_extent_map(em); 1339 /* Ok, 0 means we did not defrag anything */ 1340 ret = 0; 1341 goto out_unlock_range; 1342 } 1343 search_start = extent_map_end(em); 1344 free_extent_map(em); 1345 } 1346 1347 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 1348 page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | 1349 EXTENT_DEFRAG, 0, 0, &cached_state); 1350 1351 if (i_done != page_cnt) { 1352 spin_lock(&BTRFS_I(inode)->lock); 1353 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); 1354 spin_unlock(&BTRFS_I(inode)->lock); 1355 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, 1356 start, (page_cnt - i_done) << PAGE_SHIFT, true); 1357 } 1358 1359 1360 set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, 1361 &cached_state); 1362 1363 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1364 page_start, page_end - 1, &cached_state); 1365 1366 for (i = 0; i < i_done; i++) { 1367 clear_page_dirty_for_io(pages[i]); 1368 ClearPageChecked(pages[i]); 1369 set_page_dirty(pages[i]); 1370 unlock_page(pages[i]); 1371 put_page(pages[i]); 1372 } 1373 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); 1374 extent_changeset_free(data_reserved); 1375 return i_done; 1376 1377 out_unlock_range: 1378 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1379 page_start, page_end - 1, &cached_state); 1380 out: 1381 for (i = 0; i < i_done; i++) { 1382 unlock_page(pages[i]); 1383 put_page(pages[i]); 1384 } 1385 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, 1386 start, page_cnt << PAGE_SHIFT, true); 1387 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); 1388 extent_changeset_free(data_reserved); 1389 return ret; 1390 1391 } 1392 1393 int btrfs_defrag_file(struct inode *inode, struct file *file, 1394 struct btrfs_ioctl_defrag_range_args *range, 1395 u64 newer_than, unsigned long max_to_defrag) 1396 { 1397 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1398 struct btrfs_root *root = BTRFS_I(inode)->root; 1399 struct file_ra_state *ra = NULL; 1400 unsigned long last_index; 1401 u64 isize = i_size_read(inode); 1402 u64 last_len = 0; 1403 u64 skip = 0; 1404 u64 defrag_end = 0; 1405 u64 newer_off = range->start; 1406 unsigned long i; 1407 unsigned long ra_index = 0; 1408 int ret; 1409 int defrag_count = 0; 1410 int compress_type = BTRFS_COMPRESS_ZLIB; 1411 u32 extent_thresh = range->extent_thresh; 1412 unsigned long max_cluster = SZ_256K >> PAGE_SHIFT; 1413 unsigned long cluster = max_cluster; 1414 u64 new_align = ~((u64)SZ_128K - 1); 1415 struct page **pages = NULL; 1416 bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS; 1417 1418 if (isize == 0) 1419 return 0; 1420 1421 if (range->start >= isize) 1422 return -EINVAL; 1423 1424 if (do_compress) { 1425 if (range->compress_type >= BTRFS_NR_COMPRESS_TYPES) 1426 return -EINVAL; 1427 if (range->compress_type) 1428 compress_type = range->compress_type; 1429 } 1430 1431 if (extent_thresh == 0) 1432 extent_thresh = SZ_256K; 1433 1434 /* 1435 * If we were not given a file, allocate a readahead context. As 1436 * readahead is just an optimization, defrag will work without it so 1437 * we don't error out. 1438 */ 1439 if (!file) { 1440 ra = kzalloc(sizeof(*ra), GFP_KERNEL); 1441 if (ra) 1442 file_ra_state_init(ra, inode->i_mapping); 1443 } else { 1444 ra = &file->f_ra; 1445 } 1446 1447 pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL); 1448 if (!pages) { 1449 ret = -ENOMEM; 1450 goto out_ra; 1451 } 1452 1453 /* find the last page to defrag */ 1454 if (range->start + range->len > range->start) { 1455 last_index = min_t(u64, isize - 1, 1456 range->start + range->len - 1) >> PAGE_SHIFT; 1457 } else { 1458 last_index = (isize - 1) >> PAGE_SHIFT; 1459 } 1460 1461 if (newer_than) { 1462 ret = find_new_extents(root, inode, newer_than, 1463 &newer_off, SZ_64K); 1464 if (!ret) { 1465 range->start = newer_off; 1466 /* 1467 * we always align our defrag to help keep 1468 * the extents in the file evenly spaced 1469 */ 1470 i = (newer_off & new_align) >> PAGE_SHIFT; 1471 } else 1472 goto out_ra; 1473 } else { 1474 i = range->start >> PAGE_SHIFT; 1475 } 1476 if (!max_to_defrag) 1477 max_to_defrag = last_index - i + 1; 1478 1479 /* 1480 * make writeback starts from i, so the defrag range can be 1481 * written sequentially. 1482 */ 1483 if (i < inode->i_mapping->writeback_index) 1484 inode->i_mapping->writeback_index = i; 1485 1486 while (i <= last_index && defrag_count < max_to_defrag && 1487 (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) { 1488 /* 1489 * make sure we stop running if someone unmounts 1490 * the FS 1491 */ 1492 if (!(inode->i_sb->s_flags & SB_ACTIVE)) 1493 break; 1494 1495 if (btrfs_defrag_cancelled(fs_info)) { 1496 btrfs_debug(fs_info, "defrag_file cancelled"); 1497 ret = -EAGAIN; 1498 goto error; 1499 } 1500 1501 if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT, 1502 extent_thresh, &last_len, &skip, 1503 &defrag_end, do_compress)){ 1504 unsigned long next; 1505 /* 1506 * the should_defrag function tells us how much to skip 1507 * bump our counter by the suggested amount 1508 */ 1509 next = DIV_ROUND_UP(skip, PAGE_SIZE); 1510 i = max(i + 1, next); 1511 continue; 1512 } 1513 1514 if (!newer_than) { 1515 cluster = (PAGE_ALIGN(defrag_end) >> 1516 PAGE_SHIFT) - i; 1517 cluster = min(cluster, max_cluster); 1518 } else { 1519 cluster = max_cluster; 1520 } 1521 1522 if (i + cluster > ra_index) { 1523 ra_index = max(i, ra_index); 1524 if (ra) 1525 page_cache_sync_readahead(inode->i_mapping, ra, 1526 file, ra_index, cluster); 1527 ra_index += cluster; 1528 } 1529 1530 btrfs_inode_lock(inode, 0); 1531 if (IS_SWAPFILE(inode)) { 1532 ret = -ETXTBSY; 1533 } else { 1534 if (do_compress) 1535 BTRFS_I(inode)->defrag_compress = compress_type; 1536 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1537 } 1538 if (ret < 0) { 1539 btrfs_inode_unlock(inode, 0); 1540 goto out_ra; 1541 } 1542 1543 defrag_count += ret; 1544 balance_dirty_pages_ratelimited(inode->i_mapping); 1545 btrfs_inode_unlock(inode, 0); 1546 1547 if (newer_than) { 1548 if (newer_off == (u64)-1) 1549 break; 1550 1551 if (ret > 0) 1552 i += ret; 1553 1554 newer_off = max(newer_off + 1, 1555 (u64)i << PAGE_SHIFT); 1556 1557 ret = find_new_extents(root, inode, newer_than, 1558 &newer_off, SZ_64K); 1559 if (!ret) { 1560 range->start = newer_off; 1561 i = (newer_off & new_align) >> PAGE_SHIFT; 1562 } else { 1563 break; 1564 } 1565 } else { 1566 if (ret > 0) { 1567 i += ret; 1568 last_len += ret << PAGE_SHIFT; 1569 } else { 1570 i++; 1571 last_len = 0; 1572 } 1573 } 1574 } 1575 1576 ret = defrag_count; 1577 error: 1578 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { 1579 filemap_flush(inode->i_mapping); 1580 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 1581 &BTRFS_I(inode)->runtime_flags)) 1582 filemap_flush(inode->i_mapping); 1583 } 1584 1585 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1586 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); 1587 } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { 1588 btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); 1589 } 1590 1591 out_ra: 1592 if (do_compress) { 1593 btrfs_inode_lock(inode, 0); 1594 BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE; 1595 btrfs_inode_unlock(inode, 0); 1596 } 1597 if (!file) 1598 kfree(ra); 1599 kfree(pages); 1600 return ret; 1601 } 1602 1603 /* 1604 * Try to start exclusive operation @type or cancel it if it's running. 1605 * 1606 * Return: 1607 * 0 - normal mode, newly claimed op started 1608 * >0 - normal mode, something else is running, 1609 * return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS to user space 1610 * ECANCELED - cancel mode, successful cancel 1611 * ENOTCONN - cancel mode, operation not running anymore 1612 */ 1613 static int exclop_start_or_cancel_reloc(struct btrfs_fs_info *fs_info, 1614 enum btrfs_exclusive_operation type, bool cancel) 1615 { 1616 if (!cancel) { 1617 /* Start normal op */ 1618 if (!btrfs_exclop_start(fs_info, type)) 1619 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 1620 /* Exclusive operation is now claimed */ 1621 return 0; 1622 } 1623 1624 /* Cancel running op */ 1625 if (btrfs_exclop_start_try_lock(fs_info, type)) { 1626 /* 1627 * This blocks any exclop finish from setting it to NONE, so we 1628 * request cancellation. Either it runs and we will wait for it, 1629 * or it has finished and no waiting will happen. 1630 */ 1631 atomic_inc(&fs_info->reloc_cancel_req); 1632 btrfs_exclop_start_unlock(fs_info); 1633 1634 if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) 1635 wait_on_bit(&fs_info->flags, BTRFS_FS_RELOC_RUNNING, 1636 TASK_INTERRUPTIBLE); 1637 1638 return -ECANCELED; 1639 } 1640 1641 /* Something else is running or none */ 1642 return -ENOTCONN; 1643 } 1644 1645 static noinline int btrfs_ioctl_resize(struct file *file, 1646 void __user *arg) 1647 { 1648 struct inode *inode = file_inode(file); 1649 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1650 u64 new_size; 1651 u64 old_size; 1652 u64 devid = 1; 1653 struct btrfs_root *root = BTRFS_I(inode)->root; 1654 struct btrfs_ioctl_vol_args *vol_args; 1655 struct btrfs_trans_handle *trans; 1656 struct btrfs_device *device = NULL; 1657 char *sizestr; 1658 char *retptr; 1659 char *devstr = NULL; 1660 int ret = 0; 1661 int mod = 0; 1662 bool cancel; 1663 1664 if (!capable(CAP_SYS_ADMIN)) 1665 return -EPERM; 1666 1667 ret = mnt_want_write_file(file); 1668 if (ret) 1669 return ret; 1670 1671 /* 1672 * Read the arguments before checking exclusivity to be able to 1673 * distinguish regular resize and cancel 1674 */ 1675 vol_args = memdup_user(arg, sizeof(*vol_args)); 1676 if (IS_ERR(vol_args)) { 1677 ret = PTR_ERR(vol_args); 1678 goto out_drop; 1679 } 1680 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1681 sizestr = vol_args->name; 1682 cancel = (strcmp("cancel", sizestr) == 0); 1683 ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_RESIZE, cancel); 1684 if (ret) 1685 goto out_free; 1686 /* Exclusive operation is now claimed */ 1687 1688 devstr = strchr(sizestr, ':'); 1689 if (devstr) { 1690 sizestr = devstr + 1; 1691 *devstr = '\0'; 1692 devstr = vol_args->name; 1693 ret = kstrtoull(devstr, 10, &devid); 1694 if (ret) 1695 goto out_finish; 1696 if (!devid) { 1697 ret = -EINVAL; 1698 goto out_finish; 1699 } 1700 btrfs_info(fs_info, "resizing devid %llu", devid); 1701 } 1702 1703 device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); 1704 if (!device) { 1705 btrfs_info(fs_info, "resizer unable to find device %llu", 1706 devid); 1707 ret = -ENODEV; 1708 goto out_finish; 1709 } 1710 1711 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { 1712 btrfs_info(fs_info, 1713 "resizer unable to apply on readonly device %llu", 1714 devid); 1715 ret = -EPERM; 1716 goto out_finish; 1717 } 1718 1719 if (!strcmp(sizestr, "max")) 1720 new_size = device->bdev->bd_inode->i_size; 1721 else { 1722 if (sizestr[0] == '-') { 1723 mod = -1; 1724 sizestr++; 1725 } else if (sizestr[0] == '+') { 1726 mod = 1; 1727 sizestr++; 1728 } 1729 new_size = memparse(sizestr, &retptr); 1730 if (*retptr != '\0' || new_size == 0) { 1731 ret = -EINVAL; 1732 goto out_finish; 1733 } 1734 } 1735 1736 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { 1737 ret = -EPERM; 1738 goto out_finish; 1739 } 1740 1741 old_size = btrfs_device_get_total_bytes(device); 1742 1743 if (mod < 0) { 1744 if (new_size > old_size) { 1745 ret = -EINVAL; 1746 goto out_finish; 1747 } 1748 new_size = old_size - new_size; 1749 } else if (mod > 0) { 1750 if (new_size > ULLONG_MAX - old_size) { 1751 ret = -ERANGE; 1752 goto out_finish; 1753 } 1754 new_size = old_size + new_size; 1755 } 1756 1757 if (new_size < SZ_256M) { 1758 ret = -EINVAL; 1759 goto out_finish; 1760 } 1761 if (new_size > device->bdev->bd_inode->i_size) { 1762 ret = -EFBIG; 1763 goto out_finish; 1764 } 1765 1766 new_size = round_down(new_size, fs_info->sectorsize); 1767 1768 if (new_size > old_size) { 1769 trans = btrfs_start_transaction(root, 0); 1770 if (IS_ERR(trans)) { 1771 ret = PTR_ERR(trans); 1772 goto out_finish; 1773 } 1774 ret = btrfs_grow_device(trans, device, new_size); 1775 btrfs_commit_transaction(trans); 1776 } else if (new_size < old_size) { 1777 ret = btrfs_shrink_device(device, new_size); 1778 } /* equal, nothing need to do */ 1779 1780 if (ret == 0 && new_size != old_size) 1781 btrfs_info_in_rcu(fs_info, 1782 "resize device %s (devid %llu) from %llu to %llu", 1783 rcu_str_deref(device->name), device->devid, 1784 old_size, new_size); 1785 out_finish: 1786 btrfs_exclop_finish(fs_info); 1787 out_free: 1788 kfree(vol_args); 1789 out_drop: 1790 mnt_drop_write_file(file); 1791 return ret; 1792 } 1793 1794 static noinline int __btrfs_ioctl_snap_create(struct file *file, 1795 const char *name, unsigned long fd, int subvol, 1796 bool readonly, 1797 struct btrfs_qgroup_inherit *inherit) 1798 { 1799 int namelen; 1800 int ret = 0; 1801 1802 if (!S_ISDIR(file_inode(file)->i_mode)) 1803 return -ENOTDIR; 1804 1805 ret = mnt_want_write_file(file); 1806 if (ret) 1807 goto out; 1808 1809 namelen = strlen(name); 1810 if (strchr(name, '/')) { 1811 ret = -EINVAL; 1812 goto out_drop_write; 1813 } 1814 1815 if (name[0] == '.' && 1816 (namelen == 1 || (name[1] == '.' && namelen == 2))) { 1817 ret = -EEXIST; 1818 goto out_drop_write; 1819 } 1820 1821 if (subvol) { 1822 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1823 NULL, readonly, inherit); 1824 } else { 1825 struct fd src = fdget(fd); 1826 struct inode *src_inode; 1827 if (!src.file) { 1828 ret = -EINVAL; 1829 goto out_drop_write; 1830 } 1831 1832 src_inode = file_inode(src.file); 1833 if (src_inode->i_sb != file_inode(file)->i_sb) { 1834 btrfs_info(BTRFS_I(file_inode(file))->root->fs_info, 1835 "Snapshot src from another FS"); 1836 ret = -EXDEV; 1837 } else if (!inode_owner_or_capable(&init_user_ns, src_inode)) { 1838 /* 1839 * Subvolume creation is not restricted, but snapshots 1840 * are limited to own subvolumes only 1841 */ 1842 ret = -EPERM; 1843 } else { 1844 ret = btrfs_mksnapshot(&file->f_path, name, namelen, 1845 BTRFS_I(src_inode)->root, 1846 readonly, inherit); 1847 } 1848 fdput(src); 1849 } 1850 out_drop_write: 1851 mnt_drop_write_file(file); 1852 out: 1853 return ret; 1854 } 1855 1856 static noinline int btrfs_ioctl_snap_create(struct file *file, 1857 void __user *arg, int subvol) 1858 { 1859 struct btrfs_ioctl_vol_args *vol_args; 1860 int ret; 1861 1862 if (!S_ISDIR(file_inode(file)->i_mode)) 1863 return -ENOTDIR; 1864 1865 vol_args = memdup_user(arg, sizeof(*vol_args)); 1866 if (IS_ERR(vol_args)) 1867 return PTR_ERR(vol_args); 1868 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1869 1870 ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd, 1871 subvol, false, NULL); 1872 1873 kfree(vol_args); 1874 return ret; 1875 } 1876 1877 static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 1878 void __user *arg, int subvol) 1879 { 1880 struct btrfs_ioctl_vol_args_v2 *vol_args; 1881 int ret; 1882 bool readonly = false; 1883 struct btrfs_qgroup_inherit *inherit = NULL; 1884 1885 if (!S_ISDIR(file_inode(file)->i_mode)) 1886 return -ENOTDIR; 1887 1888 vol_args = memdup_user(arg, sizeof(*vol_args)); 1889 if (IS_ERR(vol_args)) 1890 return PTR_ERR(vol_args); 1891 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1892 1893 if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) { 1894 ret = -EOPNOTSUPP; 1895 goto free_args; 1896 } 1897 1898 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1899 readonly = true; 1900 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { 1901 u64 nums; 1902 1903 if (vol_args->size < sizeof(*inherit) || 1904 vol_args->size > PAGE_SIZE) { 1905 ret = -EINVAL; 1906 goto free_args; 1907 } 1908 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); 1909 if (IS_ERR(inherit)) { 1910 ret = PTR_ERR(inherit); 1911 goto free_args; 1912 } 1913 1914 if (inherit->num_qgroups > PAGE_SIZE || 1915 inherit->num_ref_copies > PAGE_SIZE || 1916 inherit->num_excl_copies > PAGE_SIZE) { 1917 ret = -EINVAL; 1918 goto free_inherit; 1919 } 1920 1921 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 1922 2 * inherit->num_excl_copies; 1923 if (vol_args->size != struct_size(inherit, qgroups, nums)) { 1924 ret = -EINVAL; 1925 goto free_inherit; 1926 } 1927 } 1928 1929 ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd, 1930 subvol, readonly, inherit); 1931 if (ret) 1932 goto free_inherit; 1933 free_inherit: 1934 kfree(inherit); 1935 free_args: 1936 kfree(vol_args); 1937 return ret; 1938 } 1939 1940 static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1941 void __user *arg) 1942 { 1943 struct inode *inode = file_inode(file); 1944 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1945 struct btrfs_root *root = BTRFS_I(inode)->root; 1946 int ret = 0; 1947 u64 flags = 0; 1948 1949 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) 1950 return -EINVAL; 1951 1952 down_read(&fs_info->subvol_sem); 1953 if (btrfs_root_readonly(root)) 1954 flags |= BTRFS_SUBVOL_RDONLY; 1955 up_read(&fs_info->subvol_sem); 1956 1957 if (copy_to_user(arg, &flags, sizeof(flags))) 1958 ret = -EFAULT; 1959 1960 return ret; 1961 } 1962 1963 static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1964 void __user *arg) 1965 { 1966 struct inode *inode = file_inode(file); 1967 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1968 struct btrfs_root *root = BTRFS_I(inode)->root; 1969 struct btrfs_trans_handle *trans; 1970 u64 root_flags; 1971 u64 flags; 1972 int ret = 0; 1973 1974 if (!inode_owner_or_capable(&init_user_ns, inode)) 1975 return -EPERM; 1976 1977 ret = mnt_want_write_file(file); 1978 if (ret) 1979 goto out; 1980 1981 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { 1982 ret = -EINVAL; 1983 goto out_drop_write; 1984 } 1985 1986 if (copy_from_user(&flags, arg, sizeof(flags))) { 1987 ret = -EFAULT; 1988 goto out_drop_write; 1989 } 1990 1991 if (flags & ~BTRFS_SUBVOL_RDONLY) { 1992 ret = -EOPNOTSUPP; 1993 goto out_drop_write; 1994 } 1995 1996 down_write(&fs_info->subvol_sem); 1997 1998 /* nothing to do */ 1999 if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 2000 goto out_drop_sem; 2001 2002 root_flags = btrfs_root_flags(&root->root_item); 2003 if (flags & BTRFS_SUBVOL_RDONLY) { 2004 btrfs_set_root_flags(&root->root_item, 2005 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 2006 } else { 2007 /* 2008 * Block RO -> RW transition if this subvolume is involved in 2009 * send 2010 */ 2011 spin_lock(&root->root_item_lock); 2012 if (root->send_in_progress == 0) { 2013 btrfs_set_root_flags(&root->root_item, 2014 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 2015 spin_unlock(&root->root_item_lock); 2016 } else { 2017 spin_unlock(&root->root_item_lock); 2018 btrfs_warn(fs_info, 2019 "Attempt to set subvolume %llu read-write during send", 2020 root->root_key.objectid); 2021 ret = -EPERM; 2022 goto out_drop_sem; 2023 } 2024 } 2025 2026 trans = btrfs_start_transaction(root, 1); 2027 if (IS_ERR(trans)) { 2028 ret = PTR_ERR(trans); 2029 goto out_reset; 2030 } 2031 2032 ret = btrfs_update_root(trans, fs_info->tree_root, 2033 &root->root_key, &root->root_item); 2034 if (ret < 0) { 2035 btrfs_end_transaction(trans); 2036 goto out_reset; 2037 } 2038 2039 ret = btrfs_commit_transaction(trans); 2040 2041 out_reset: 2042 if (ret) 2043 btrfs_set_root_flags(&root->root_item, root_flags); 2044 out_drop_sem: 2045 up_write(&fs_info->subvol_sem); 2046 out_drop_write: 2047 mnt_drop_write_file(file); 2048 out: 2049 return ret; 2050 } 2051 2052 static noinline int key_in_sk(struct btrfs_key *key, 2053 struct btrfs_ioctl_search_key *sk) 2054 { 2055 struct btrfs_key test; 2056 int ret; 2057 2058 test.objectid = sk->min_objectid; 2059 test.type = sk->min_type; 2060 test.offset = sk->min_offset; 2061 2062 ret = btrfs_comp_cpu_keys(key, &test); 2063 if (ret < 0) 2064 return 0; 2065 2066 test.objectid = sk->max_objectid; 2067 test.type = sk->max_type; 2068 test.offset = sk->max_offset; 2069 2070 ret = btrfs_comp_cpu_keys(key, &test); 2071 if (ret > 0) 2072 return 0; 2073 return 1; 2074 } 2075 2076 static noinline int copy_to_sk(struct btrfs_path *path, 2077 struct btrfs_key *key, 2078 struct btrfs_ioctl_search_key *sk, 2079 size_t *buf_size, 2080 char __user *ubuf, 2081 unsigned long *sk_offset, 2082 int *num_found) 2083 { 2084 u64 found_transid; 2085 struct extent_buffer *leaf; 2086 struct btrfs_ioctl_search_header sh; 2087 struct btrfs_key test; 2088 unsigned long item_off; 2089 unsigned long item_len; 2090 int nritems; 2091 int i; 2092 int slot; 2093 int ret = 0; 2094 2095 leaf = path->nodes[0]; 2096 slot = path->slots[0]; 2097 nritems = btrfs_header_nritems(leaf); 2098 2099 if (btrfs_header_generation(leaf) > sk->max_transid) { 2100 i = nritems; 2101 goto advance_key; 2102 } 2103 found_transid = btrfs_header_generation(leaf); 2104 2105 for (i = slot; i < nritems; i++) { 2106 item_off = btrfs_item_ptr_offset(leaf, i); 2107 item_len = btrfs_item_size_nr(leaf, i); 2108 2109 btrfs_item_key_to_cpu(leaf, key, i); 2110 if (!key_in_sk(key, sk)) 2111 continue; 2112 2113 if (sizeof(sh) + item_len > *buf_size) { 2114 if (*num_found) { 2115 ret = 1; 2116 goto out; 2117 } 2118 2119 /* 2120 * return one empty item back for v1, which does not 2121 * handle -EOVERFLOW 2122 */ 2123 2124 *buf_size = sizeof(sh) + item_len; 2125 item_len = 0; 2126 ret = -EOVERFLOW; 2127 } 2128 2129 if (sizeof(sh) + item_len + *sk_offset > *buf_size) { 2130 ret = 1; 2131 goto out; 2132 } 2133 2134 sh.objectid = key->objectid; 2135 sh.offset = key->offset; 2136 sh.type = key->type; 2137 sh.len = item_len; 2138 sh.transid = found_transid; 2139 2140 /* 2141 * Copy search result header. If we fault then loop again so we 2142 * can fault in the pages and -EFAULT there if there's a 2143 * problem. Otherwise we'll fault and then copy the buffer in 2144 * properly this next time through 2145 */ 2146 if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) { 2147 ret = 0; 2148 goto out; 2149 } 2150 2151 *sk_offset += sizeof(sh); 2152 2153 if (item_len) { 2154 char __user *up = ubuf + *sk_offset; 2155 /* 2156 * Copy the item, same behavior as above, but reset the 2157 * * sk_offset so we copy the full thing again. 2158 */ 2159 if (read_extent_buffer_to_user_nofault(leaf, up, 2160 item_off, item_len)) { 2161 ret = 0; 2162 *sk_offset -= sizeof(sh); 2163 goto out; 2164 } 2165 2166 *sk_offset += item_len; 2167 } 2168 (*num_found)++; 2169 2170 if (ret) /* -EOVERFLOW from above */ 2171 goto out; 2172 2173 if (*num_found >= sk->nr_items) { 2174 ret = 1; 2175 goto out; 2176 } 2177 } 2178 advance_key: 2179 ret = 0; 2180 test.objectid = sk->max_objectid; 2181 test.type = sk->max_type; 2182 test.offset = sk->max_offset; 2183 if (btrfs_comp_cpu_keys(key, &test) >= 0) 2184 ret = 1; 2185 else if (key->offset < (u64)-1) 2186 key->offset++; 2187 else if (key->type < (u8)-1) { 2188 key->offset = 0; 2189 key->type++; 2190 } else if (key->objectid < (u64)-1) { 2191 key->offset = 0; 2192 key->type = 0; 2193 key->objectid++; 2194 } else 2195 ret = 1; 2196 out: 2197 /* 2198 * 0: all items from this leaf copied, continue with next 2199 * 1: * more items can be copied, but unused buffer is too small 2200 * * all items were found 2201 * Either way, it will stops the loop which iterates to the next 2202 * leaf 2203 * -EOVERFLOW: item was to large for buffer 2204 * -EFAULT: could not copy extent buffer back to userspace 2205 */ 2206 return ret; 2207 } 2208 2209 static noinline int search_ioctl(struct inode *inode, 2210 struct btrfs_ioctl_search_key *sk, 2211 size_t *buf_size, 2212 char __user *ubuf) 2213 { 2214 struct btrfs_fs_info *info = btrfs_sb(inode->i_sb); 2215 struct btrfs_root *root; 2216 struct btrfs_key key; 2217 struct btrfs_path *path; 2218 int ret; 2219 int num_found = 0; 2220 unsigned long sk_offset = 0; 2221 2222 if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { 2223 *buf_size = sizeof(struct btrfs_ioctl_search_header); 2224 return -EOVERFLOW; 2225 } 2226 2227 path = btrfs_alloc_path(); 2228 if (!path) 2229 return -ENOMEM; 2230 2231 if (sk->tree_id == 0) { 2232 /* search the root of the inode that was passed */ 2233 root = btrfs_grab_root(BTRFS_I(inode)->root); 2234 } else { 2235 root = btrfs_get_fs_root(info, sk->tree_id, true); 2236 if (IS_ERR(root)) { 2237 btrfs_free_path(path); 2238 return PTR_ERR(root); 2239 } 2240 } 2241 2242 key.objectid = sk->min_objectid; 2243 key.type = sk->min_type; 2244 key.offset = sk->min_offset; 2245 2246 while (1) { 2247 ret = fault_in_pages_writeable(ubuf + sk_offset, 2248 *buf_size - sk_offset); 2249 if (ret) 2250 break; 2251 2252 ret = btrfs_search_forward(root, &key, path, sk->min_transid); 2253 if (ret != 0) { 2254 if (ret > 0) 2255 ret = 0; 2256 goto err; 2257 } 2258 ret = copy_to_sk(path, &key, sk, buf_size, ubuf, 2259 &sk_offset, &num_found); 2260 btrfs_release_path(path); 2261 if (ret) 2262 break; 2263 2264 } 2265 if (ret > 0) 2266 ret = 0; 2267 err: 2268 sk->nr_items = num_found; 2269 btrfs_put_root(root); 2270 btrfs_free_path(path); 2271 return ret; 2272 } 2273 2274 static noinline int btrfs_ioctl_tree_search(struct file *file, 2275 void __user *argp) 2276 { 2277 struct btrfs_ioctl_search_args __user *uargs; 2278 struct btrfs_ioctl_search_key sk; 2279 struct inode *inode; 2280 int ret; 2281 size_t buf_size; 2282 2283 if (!capable(CAP_SYS_ADMIN)) 2284 return -EPERM; 2285 2286 uargs = (struct btrfs_ioctl_search_args __user *)argp; 2287 2288 if (copy_from_user(&sk, &uargs->key, sizeof(sk))) 2289 return -EFAULT; 2290 2291 buf_size = sizeof(uargs->buf); 2292 2293 inode = file_inode(file); 2294 ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); 2295 2296 /* 2297 * In the origin implementation an overflow is handled by returning a 2298 * search header with a len of zero, so reset ret. 2299 */ 2300 if (ret == -EOVERFLOW) 2301 ret = 0; 2302 2303 if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) 2304 ret = -EFAULT; 2305 return ret; 2306 } 2307 2308 static noinline int btrfs_ioctl_tree_search_v2(struct file *file, 2309 void __user *argp) 2310 { 2311 struct btrfs_ioctl_search_args_v2 __user *uarg; 2312 struct btrfs_ioctl_search_args_v2 args; 2313 struct inode *inode; 2314 int ret; 2315 size_t buf_size; 2316 const size_t buf_limit = SZ_16M; 2317 2318 if (!capable(CAP_SYS_ADMIN)) 2319 return -EPERM; 2320 2321 /* copy search header and buffer size */ 2322 uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; 2323 if (copy_from_user(&args, uarg, sizeof(args))) 2324 return -EFAULT; 2325 2326 buf_size = args.buf_size; 2327 2328 /* limit result size to 16MB */ 2329 if (buf_size > buf_limit) 2330 buf_size = buf_limit; 2331 2332 inode = file_inode(file); 2333 ret = search_ioctl(inode, &args.key, &buf_size, 2334 (char __user *)(&uarg->buf[0])); 2335 if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) 2336 ret = -EFAULT; 2337 else if (ret == -EOVERFLOW && 2338 copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) 2339 ret = -EFAULT; 2340 2341 return ret; 2342 } 2343 2344 /* 2345 * Search INODE_REFs to identify path name of 'dirid' directory 2346 * in a 'tree_id' tree. and sets path name to 'name'. 2347 */ 2348 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, 2349 u64 tree_id, u64 dirid, char *name) 2350 { 2351 struct btrfs_root *root; 2352 struct btrfs_key key; 2353 char *ptr; 2354 int ret = -1; 2355 int slot; 2356 int len; 2357 int total_len = 0; 2358 struct btrfs_inode_ref *iref; 2359 struct extent_buffer *l; 2360 struct btrfs_path *path; 2361 2362 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 2363 name[0]='\0'; 2364 return 0; 2365 } 2366 2367 path = btrfs_alloc_path(); 2368 if (!path) 2369 return -ENOMEM; 2370 2371 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1]; 2372 2373 root = btrfs_get_fs_root(info, tree_id, true); 2374 if (IS_ERR(root)) { 2375 ret = PTR_ERR(root); 2376 root = NULL; 2377 goto out; 2378 } 2379 2380 key.objectid = dirid; 2381 key.type = BTRFS_INODE_REF_KEY; 2382 key.offset = (u64)-1; 2383 2384 while (1) { 2385 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2386 if (ret < 0) 2387 goto out; 2388 else if (ret > 0) { 2389 ret = btrfs_previous_item(root, path, dirid, 2390 BTRFS_INODE_REF_KEY); 2391 if (ret < 0) 2392 goto out; 2393 else if (ret > 0) { 2394 ret = -ENOENT; 2395 goto out; 2396 } 2397 } 2398 2399 l = path->nodes[0]; 2400 slot = path->slots[0]; 2401 btrfs_item_key_to_cpu(l, &key, slot); 2402 2403 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 2404 len = btrfs_inode_ref_name_len(l, iref); 2405 ptr -= len + 1; 2406 total_len += len + 1; 2407 if (ptr < name) { 2408 ret = -ENAMETOOLONG; 2409 goto out; 2410 } 2411 2412 *(ptr + len) = '/'; 2413 read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len); 2414 2415 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 2416 break; 2417 2418 btrfs_release_path(path); 2419 key.objectid = key.offset; 2420 key.offset = (u64)-1; 2421 dirid = key.objectid; 2422 } 2423 memmove(name, ptr, total_len); 2424 name[total_len] = '\0'; 2425 ret = 0; 2426 out: 2427 btrfs_put_root(root); 2428 btrfs_free_path(path); 2429 return ret; 2430 } 2431 2432 static int btrfs_search_path_in_tree_user(struct inode *inode, 2433 struct btrfs_ioctl_ino_lookup_user_args *args) 2434 { 2435 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; 2436 struct super_block *sb = inode->i_sb; 2437 struct btrfs_key upper_limit = BTRFS_I(inode)->location; 2438 u64 treeid = BTRFS_I(inode)->root->root_key.objectid; 2439 u64 dirid = args->dirid; 2440 unsigned long item_off; 2441 unsigned long item_len; 2442 struct btrfs_inode_ref *iref; 2443 struct btrfs_root_ref *rref; 2444 struct btrfs_root *root = NULL; 2445 struct btrfs_path *path; 2446 struct btrfs_key key, key2; 2447 struct extent_buffer *leaf; 2448 struct inode *temp_inode; 2449 char *ptr; 2450 int slot; 2451 int len; 2452 int total_len = 0; 2453 int ret; 2454 2455 path = btrfs_alloc_path(); 2456 if (!path) 2457 return -ENOMEM; 2458 2459 /* 2460 * If the bottom subvolume does not exist directly under upper_limit, 2461 * construct the path in from the bottom up. 2462 */ 2463 if (dirid != upper_limit.objectid) { 2464 ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1]; 2465 2466 root = btrfs_get_fs_root(fs_info, treeid, true); 2467 if (IS_ERR(root)) { 2468 ret = PTR_ERR(root); 2469 goto out; 2470 } 2471 2472 key.objectid = dirid; 2473 key.type = BTRFS_INODE_REF_KEY; 2474 key.offset = (u64)-1; 2475 while (1) { 2476 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2477 if (ret < 0) { 2478 goto out_put; 2479 } else if (ret > 0) { 2480 ret = btrfs_previous_item(root, path, dirid, 2481 BTRFS_INODE_REF_KEY); 2482 if (ret < 0) { 2483 goto out_put; 2484 } else if (ret > 0) { 2485 ret = -ENOENT; 2486 goto out_put; 2487 } 2488 } 2489 2490 leaf = path->nodes[0]; 2491 slot = path->slots[0]; 2492 btrfs_item_key_to_cpu(leaf, &key, slot); 2493 2494 iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref); 2495 len = btrfs_inode_ref_name_len(leaf, iref); 2496 ptr -= len + 1; 2497 total_len += len + 1; 2498 if (ptr < args->path) { 2499 ret = -ENAMETOOLONG; 2500 goto out_put; 2501 } 2502 2503 *(ptr + len) = '/'; 2504 read_extent_buffer(leaf, ptr, 2505 (unsigned long)(iref + 1), len); 2506 2507 /* Check the read+exec permission of this directory */ 2508 ret = btrfs_previous_item(root, path, dirid, 2509 BTRFS_INODE_ITEM_KEY); 2510 if (ret < 0) { 2511 goto out_put; 2512 } else if (ret > 0) { 2513 ret = -ENOENT; 2514 goto out_put; 2515 } 2516 2517 leaf = path->nodes[0]; 2518 slot = path->slots[0]; 2519 btrfs_item_key_to_cpu(leaf, &key2, slot); 2520 if (key2.objectid != dirid) { 2521 ret = -ENOENT; 2522 goto out_put; 2523 } 2524 2525 temp_inode = btrfs_iget(sb, key2.objectid, root); 2526 if (IS_ERR(temp_inode)) { 2527 ret = PTR_ERR(temp_inode); 2528 goto out_put; 2529 } 2530 ret = inode_permission(&init_user_ns, temp_inode, 2531 MAY_READ | MAY_EXEC); 2532 iput(temp_inode); 2533 if (ret) { 2534 ret = -EACCES; 2535 goto out_put; 2536 } 2537 2538 if (key.offset == upper_limit.objectid) 2539 break; 2540 if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) { 2541 ret = -EACCES; 2542 goto out_put; 2543 } 2544 2545 btrfs_release_path(path); 2546 key.objectid = key.offset; 2547 key.offset = (u64)-1; 2548 dirid = key.objectid; 2549 } 2550 2551 memmove(args->path, ptr, total_len); 2552 args->path[total_len] = '\0'; 2553 btrfs_put_root(root); 2554 root = NULL; 2555 btrfs_release_path(path); 2556 } 2557 2558 /* Get the bottom subvolume's name from ROOT_REF */ 2559 key.objectid = treeid; 2560 key.type = BTRFS_ROOT_REF_KEY; 2561 key.offset = args->treeid; 2562 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); 2563 if (ret < 0) { 2564 goto out; 2565 } else if (ret > 0) { 2566 ret = -ENOENT; 2567 goto out; 2568 } 2569 2570 leaf = path->nodes[0]; 2571 slot = path->slots[0]; 2572 btrfs_item_key_to_cpu(leaf, &key, slot); 2573 2574 item_off = btrfs_item_ptr_offset(leaf, slot); 2575 item_len = btrfs_item_size_nr(leaf, slot); 2576 /* Check if dirid in ROOT_REF corresponds to passed dirid */ 2577 rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); 2578 if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) { 2579 ret = -EINVAL; 2580 goto out; 2581 } 2582 2583 /* Copy subvolume's name */ 2584 item_off += sizeof(struct btrfs_root_ref); 2585 item_len -= sizeof(struct btrfs_root_ref); 2586 read_extent_buffer(leaf, args->name, item_off, item_len); 2587 args->name[item_len] = 0; 2588 2589 out_put: 2590 btrfs_put_root(root); 2591 out: 2592 btrfs_free_path(path); 2593 return ret; 2594 } 2595 2596 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 2597 void __user *argp) 2598 { 2599 struct btrfs_ioctl_ino_lookup_args *args; 2600 struct inode *inode; 2601 int ret = 0; 2602 2603 args = memdup_user(argp, sizeof(*args)); 2604 if (IS_ERR(args)) 2605 return PTR_ERR(args); 2606 2607 inode = file_inode(file); 2608 2609 /* 2610 * Unprivileged query to obtain the containing subvolume root id. The 2611 * path is reset so it's consistent with btrfs_search_path_in_tree. 2612 */ 2613 if (args->treeid == 0) 2614 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 2615 2616 if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) { 2617 args->name[0] = 0; 2618 goto out; 2619 } 2620 2621 if (!capable(CAP_SYS_ADMIN)) { 2622 ret = -EPERM; 2623 goto out; 2624 } 2625 2626 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, 2627 args->treeid, args->objectid, 2628 args->name); 2629 2630 out: 2631 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2632 ret = -EFAULT; 2633 2634 kfree(args); 2635 return ret; 2636 } 2637 2638 /* 2639 * Version of ino_lookup ioctl (unprivileged) 2640 * 2641 * The main differences from ino_lookup ioctl are: 2642 * 2643 * 1. Read + Exec permission will be checked using inode_permission() during 2644 * path construction. -EACCES will be returned in case of failure. 2645 * 2. Path construction will be stopped at the inode number which corresponds 2646 * to the fd with which this ioctl is called. If constructed path does not 2647 * exist under fd's inode, -EACCES will be returned. 2648 * 3. The name of bottom subvolume is also searched and filled. 2649 */ 2650 static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp) 2651 { 2652 struct btrfs_ioctl_ino_lookup_user_args *args; 2653 struct inode *inode; 2654 int ret; 2655 2656 args = memdup_user(argp, sizeof(*args)); 2657 if (IS_ERR(args)) 2658 return PTR_ERR(args); 2659 2660 inode = file_inode(file); 2661 2662 if (args->dirid == BTRFS_FIRST_FREE_OBJECTID && 2663 BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) { 2664 /* 2665 * The subvolume does not exist under fd with which this is 2666 * called 2667 */ 2668 kfree(args); 2669 return -EACCES; 2670 } 2671 2672 ret = btrfs_search_path_in_tree_user(inode, args); 2673 2674 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2675 ret = -EFAULT; 2676 2677 kfree(args); 2678 return ret; 2679 } 2680 2681 /* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */ 2682 static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp) 2683 { 2684 struct btrfs_ioctl_get_subvol_info_args *subvol_info; 2685 struct btrfs_fs_info *fs_info; 2686 struct btrfs_root *root; 2687 struct btrfs_path *path; 2688 struct btrfs_key key; 2689 struct btrfs_root_item *root_item; 2690 struct btrfs_root_ref *rref; 2691 struct extent_buffer *leaf; 2692 unsigned long item_off; 2693 unsigned long item_len; 2694 struct inode *inode; 2695 int slot; 2696 int ret = 0; 2697 2698 path = btrfs_alloc_path(); 2699 if (!path) 2700 return -ENOMEM; 2701 2702 subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL); 2703 if (!subvol_info) { 2704 btrfs_free_path(path); 2705 return -ENOMEM; 2706 } 2707 2708 inode = file_inode(file); 2709 fs_info = BTRFS_I(inode)->root->fs_info; 2710 2711 /* Get root_item of inode's subvolume */ 2712 key.objectid = BTRFS_I(inode)->root->root_key.objectid; 2713 root = btrfs_get_fs_root(fs_info, key.objectid, true); 2714 if (IS_ERR(root)) { 2715 ret = PTR_ERR(root); 2716 goto out_free; 2717 } 2718 root_item = &root->root_item; 2719 2720 subvol_info->treeid = key.objectid; 2721 2722 subvol_info->generation = btrfs_root_generation(root_item); 2723 subvol_info->flags = btrfs_root_flags(root_item); 2724 2725 memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE); 2726 memcpy(subvol_info->parent_uuid, root_item->parent_uuid, 2727 BTRFS_UUID_SIZE); 2728 memcpy(subvol_info->received_uuid, root_item->received_uuid, 2729 BTRFS_UUID_SIZE); 2730 2731 subvol_info->ctransid = btrfs_root_ctransid(root_item); 2732 subvol_info->ctime.sec = btrfs_stack_timespec_sec(&root_item->ctime); 2733 subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(&root_item->ctime); 2734 2735 subvol_info->otransid = btrfs_root_otransid(root_item); 2736 subvol_info->otime.sec = btrfs_stack_timespec_sec(&root_item->otime); 2737 subvol_info->otime.nsec = btrfs_stack_timespec_nsec(&root_item->otime); 2738 2739 subvol_info->stransid = btrfs_root_stransid(root_item); 2740 subvol_info->stime.sec = btrfs_stack_timespec_sec(&root_item->stime); 2741 subvol_info->stime.nsec = btrfs_stack_timespec_nsec(&root_item->stime); 2742 2743 subvol_info->rtransid = btrfs_root_rtransid(root_item); 2744 subvol_info->rtime.sec = btrfs_stack_timespec_sec(&root_item->rtime); 2745 subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(&root_item->rtime); 2746 2747 if (key.objectid != BTRFS_FS_TREE_OBJECTID) { 2748 /* Search root tree for ROOT_BACKREF of this subvolume */ 2749 key.type = BTRFS_ROOT_BACKREF_KEY; 2750 key.offset = 0; 2751 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); 2752 if (ret < 0) { 2753 goto out; 2754 } else if (path->slots[0] >= 2755 btrfs_header_nritems(path->nodes[0])) { 2756 ret = btrfs_next_leaf(fs_info->tree_root, path); 2757 if (ret < 0) { 2758 goto out; 2759 } else if (ret > 0) { 2760 ret = -EUCLEAN; 2761 goto out; 2762 } 2763 } 2764 2765 leaf = path->nodes[0]; 2766 slot = path->slots[0]; 2767 btrfs_item_key_to_cpu(leaf, &key, slot); 2768 if (key.objectid == subvol_info->treeid && 2769 key.type == BTRFS_ROOT_BACKREF_KEY) { 2770 subvol_info->parent_id = key.offset; 2771 2772 rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); 2773 subvol_info->dirid = btrfs_root_ref_dirid(leaf, rref); 2774 2775 item_off = btrfs_item_ptr_offset(leaf, slot) 2776 + sizeof(struct btrfs_root_ref); 2777 item_len = btrfs_item_size_nr(leaf, slot) 2778 - sizeof(struct btrfs_root_ref); 2779 read_extent_buffer(leaf, subvol_info->name, 2780 item_off, item_len); 2781 } else { 2782 ret = -ENOENT; 2783 goto out; 2784 } 2785 } 2786 2787 if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) 2788 ret = -EFAULT; 2789 2790 out: 2791 btrfs_put_root(root); 2792 out_free: 2793 btrfs_free_path(path); 2794 kfree(subvol_info); 2795 return ret; 2796 } 2797 2798 /* 2799 * Return ROOT_REF information of the subvolume containing this inode 2800 * except the subvolume name. 2801 */ 2802 static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp) 2803 { 2804 struct btrfs_ioctl_get_subvol_rootref_args *rootrefs; 2805 struct btrfs_root_ref *rref; 2806 struct btrfs_root *root; 2807 struct btrfs_path *path; 2808 struct btrfs_key key; 2809 struct extent_buffer *leaf; 2810 struct inode *inode; 2811 u64 objectid; 2812 int slot; 2813 int ret; 2814 u8 found; 2815 2816 path = btrfs_alloc_path(); 2817 if (!path) 2818 return -ENOMEM; 2819 2820 rootrefs = memdup_user(argp, sizeof(*rootrefs)); 2821 if (IS_ERR(rootrefs)) { 2822 btrfs_free_path(path); 2823 return PTR_ERR(rootrefs); 2824 } 2825 2826 inode = file_inode(file); 2827 root = BTRFS_I(inode)->root->fs_info->tree_root; 2828 objectid = BTRFS_I(inode)->root->root_key.objectid; 2829 2830 key.objectid = objectid; 2831 key.type = BTRFS_ROOT_REF_KEY; 2832 key.offset = rootrefs->min_treeid; 2833 found = 0; 2834 2835 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2836 if (ret < 0) { 2837 goto out; 2838 } else if (path->slots[0] >= 2839 btrfs_header_nritems(path->nodes[0])) { 2840 ret = btrfs_next_leaf(root, path); 2841 if (ret < 0) { 2842 goto out; 2843 } else if (ret > 0) { 2844 ret = -EUCLEAN; 2845 goto out; 2846 } 2847 } 2848 while (1) { 2849 leaf = path->nodes[0]; 2850 slot = path->slots[0]; 2851 2852 btrfs_item_key_to_cpu(leaf, &key, slot); 2853 if (key.objectid != objectid || key.type != BTRFS_ROOT_REF_KEY) { 2854 ret = 0; 2855 goto out; 2856 } 2857 2858 if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) { 2859 ret = -EOVERFLOW; 2860 goto out; 2861 } 2862 2863 rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); 2864 rootrefs->rootref[found].treeid = key.offset; 2865 rootrefs->rootref[found].dirid = 2866 btrfs_root_ref_dirid(leaf, rref); 2867 found++; 2868 2869 ret = btrfs_next_item(root, path); 2870 if (ret < 0) { 2871 goto out; 2872 } else if (ret > 0) { 2873 ret = -EUCLEAN; 2874 goto out; 2875 } 2876 } 2877 2878 out: 2879 if (!ret || ret == -EOVERFLOW) { 2880 rootrefs->num_items = found; 2881 /* update min_treeid for next search */ 2882 if (found) 2883 rootrefs->min_treeid = 2884 rootrefs->rootref[found - 1].treeid + 1; 2885 if (copy_to_user(argp, rootrefs, sizeof(*rootrefs))) 2886 ret = -EFAULT; 2887 } 2888 2889 kfree(rootrefs); 2890 btrfs_free_path(path); 2891 2892 return ret; 2893 } 2894 2895 static noinline int btrfs_ioctl_snap_destroy(struct file *file, 2896 void __user *arg, 2897 bool destroy_v2) 2898 { 2899 struct dentry *parent = file->f_path.dentry; 2900 struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb); 2901 struct dentry *dentry; 2902 struct inode *dir = d_inode(parent); 2903 struct inode *inode; 2904 struct btrfs_root *root = BTRFS_I(dir)->root; 2905 struct btrfs_root *dest = NULL; 2906 struct btrfs_ioctl_vol_args *vol_args = NULL; 2907 struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL; 2908 char *subvol_name, *subvol_name_ptr = NULL; 2909 int subvol_namelen; 2910 int err = 0; 2911 bool destroy_parent = false; 2912 2913 if (destroy_v2) { 2914 vol_args2 = memdup_user(arg, sizeof(*vol_args2)); 2915 if (IS_ERR(vol_args2)) 2916 return PTR_ERR(vol_args2); 2917 2918 if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) { 2919 err = -EOPNOTSUPP; 2920 goto out; 2921 } 2922 2923 /* 2924 * If SPEC_BY_ID is not set, we are looking for the subvolume by 2925 * name, same as v1 currently does. 2926 */ 2927 if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) { 2928 vol_args2->name[BTRFS_SUBVOL_NAME_MAX] = 0; 2929 subvol_name = vol_args2->name; 2930 2931 err = mnt_want_write_file(file); 2932 if (err) 2933 goto out; 2934 } else { 2935 if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) { 2936 err = -EINVAL; 2937 goto out; 2938 } 2939 2940 err = mnt_want_write_file(file); 2941 if (err) 2942 goto out; 2943 2944 dentry = btrfs_get_dentry(fs_info->sb, 2945 BTRFS_FIRST_FREE_OBJECTID, 2946 vol_args2->subvolid, 0, 0); 2947 if (IS_ERR(dentry)) { 2948 err = PTR_ERR(dentry); 2949 goto out_drop_write; 2950 } 2951 2952 /* 2953 * Change the default parent since the subvolume being 2954 * deleted can be outside of the current mount point. 2955 */ 2956 parent = btrfs_get_parent(dentry); 2957 2958 /* 2959 * At this point dentry->d_name can point to '/' if the 2960 * subvolume we want to destroy is outsite of the 2961 * current mount point, so we need to release the 2962 * current dentry and execute the lookup to return a new 2963 * one with ->d_name pointing to the 2964 * <mount point>/subvol_name. 2965 */ 2966 dput(dentry); 2967 if (IS_ERR(parent)) { 2968 err = PTR_ERR(parent); 2969 goto out_drop_write; 2970 } 2971 dir = d_inode(parent); 2972 2973 /* 2974 * If v2 was used with SPEC_BY_ID, a new parent was 2975 * allocated since the subvolume can be outside of the 2976 * current mount point. Later on we need to release this 2977 * new parent dentry. 2978 */ 2979 destroy_parent = true; 2980 2981 subvol_name_ptr = btrfs_get_subvol_name_from_objectid( 2982 fs_info, vol_args2->subvolid); 2983 if (IS_ERR(subvol_name_ptr)) { 2984 err = PTR_ERR(subvol_name_ptr); 2985 goto free_parent; 2986 } 2987 /* subvol_name_ptr is already nul terminated */ 2988 subvol_name = (char *)kbasename(subvol_name_ptr); 2989 } 2990 } else { 2991 vol_args = memdup_user(arg, sizeof(*vol_args)); 2992 if (IS_ERR(vol_args)) 2993 return PTR_ERR(vol_args); 2994 2995 vol_args->name[BTRFS_PATH_NAME_MAX] = 0; 2996 subvol_name = vol_args->name; 2997 2998 err = mnt_want_write_file(file); 2999 if (err) 3000 goto out; 3001 } 3002 3003 subvol_namelen = strlen(subvol_name); 3004 3005 if (strchr(subvol_name, '/') || 3006 strncmp(subvol_name, "..", subvol_namelen) == 0) { 3007 err = -EINVAL; 3008 goto free_subvol_name; 3009 } 3010 3011 if (!S_ISDIR(dir->i_mode)) { 3012 err = -ENOTDIR; 3013 goto free_subvol_name; 3014 } 3015 3016 err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); 3017 if (err == -EINTR) 3018 goto free_subvol_name; 3019 dentry = lookup_one_len(subvol_name, parent, subvol_namelen); 3020 if (IS_ERR(dentry)) { 3021 err = PTR_ERR(dentry); 3022 goto out_unlock_dir; 3023 } 3024 3025 if (d_really_is_negative(dentry)) { 3026 err = -ENOENT; 3027 goto out_dput; 3028 } 3029 3030 inode = d_inode(dentry); 3031 dest = BTRFS_I(inode)->root; 3032 if (!capable(CAP_SYS_ADMIN)) { 3033 /* 3034 * Regular user. Only allow this with a special mount 3035 * option, when the user has write+exec access to the 3036 * subvol root, and when rmdir(2) would have been 3037 * allowed. 3038 * 3039 * Note that this is _not_ check that the subvol is 3040 * empty or doesn't contain data that we wouldn't 3041 * otherwise be able to delete. 3042 * 3043 * Users who want to delete empty subvols should try 3044 * rmdir(2). 3045 */ 3046 err = -EPERM; 3047 if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED)) 3048 goto out_dput; 3049 3050 /* 3051 * Do not allow deletion if the parent dir is the same 3052 * as the dir to be deleted. That means the ioctl 3053 * must be called on the dentry referencing the root 3054 * of the subvol, not a random directory contained 3055 * within it. 3056 */ 3057 err = -EINVAL; 3058 if (root == dest) 3059 goto out_dput; 3060 3061 err = inode_permission(&init_user_ns, inode, 3062 MAY_WRITE | MAY_EXEC); 3063 if (err) 3064 goto out_dput; 3065 } 3066 3067 /* check if subvolume may be deleted by a user */ 3068 err = btrfs_may_delete(dir, dentry, 1); 3069 if (err) 3070 goto out_dput; 3071 3072 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { 3073 err = -EINVAL; 3074 goto out_dput; 3075 } 3076 3077 btrfs_inode_lock(inode, 0); 3078 err = btrfs_delete_subvolume(dir, dentry); 3079 btrfs_inode_unlock(inode, 0); 3080 if (!err) { 3081 fsnotify_rmdir(dir, dentry); 3082 d_delete(dentry); 3083 } 3084 3085 out_dput: 3086 dput(dentry); 3087 out_unlock_dir: 3088 btrfs_inode_unlock(dir, 0); 3089 free_subvol_name: 3090 kfree(subvol_name_ptr); 3091 free_parent: 3092 if (destroy_parent) 3093 dput(parent); 3094 out_drop_write: 3095 mnt_drop_write_file(file); 3096 out: 3097 kfree(vol_args2); 3098 kfree(vol_args); 3099 return err; 3100 } 3101 3102 static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 3103 { 3104 struct inode *inode = file_inode(file); 3105 struct btrfs_root *root = BTRFS_I(inode)->root; 3106 struct btrfs_ioctl_defrag_range_args *range; 3107 int ret; 3108 3109 ret = mnt_want_write_file(file); 3110 if (ret) 3111 return ret; 3112 3113 if (btrfs_root_readonly(root)) { 3114 ret = -EROFS; 3115 goto out; 3116 } 3117 3118 switch (inode->i_mode & S_IFMT) { 3119 case S_IFDIR: 3120 if (!capable(CAP_SYS_ADMIN)) { 3121 ret = -EPERM; 3122 goto out; 3123 } 3124 ret = btrfs_defrag_root(root); 3125 break; 3126 case S_IFREG: 3127 /* 3128 * Note that this does not check the file descriptor for write 3129 * access. This prevents defragmenting executables that are 3130 * running and allows defrag on files open in read-only mode. 3131 */ 3132 if (!capable(CAP_SYS_ADMIN) && 3133 inode_permission(&init_user_ns, inode, MAY_WRITE)) { 3134 ret = -EPERM; 3135 goto out; 3136 } 3137 3138 range = kzalloc(sizeof(*range), GFP_KERNEL); 3139 if (!range) { 3140 ret = -ENOMEM; 3141 goto out; 3142 } 3143 3144 if (argp) { 3145 if (copy_from_user(range, argp, 3146 sizeof(*range))) { 3147 ret = -EFAULT; 3148 kfree(range); 3149 goto out; 3150 } 3151 /* compression requires us to start the IO */ 3152 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 3153 range->flags |= BTRFS_DEFRAG_RANGE_START_IO; 3154 range->extent_thresh = (u32)-1; 3155 } 3156 } else { 3157 /* the rest are all set to zero by kzalloc */ 3158 range->len = (u64)-1; 3159 } 3160 ret = btrfs_defrag_file(file_inode(file), file, 3161 range, BTRFS_OLDEST_GENERATION, 0); 3162 if (ret > 0) 3163 ret = 0; 3164 kfree(range); 3165 break; 3166 default: 3167 ret = -EINVAL; 3168 } 3169 out: 3170 mnt_drop_write_file(file); 3171 return ret; 3172 } 3173 3174 static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg) 3175 { 3176 struct btrfs_ioctl_vol_args *vol_args; 3177 int ret; 3178 3179 if (!capable(CAP_SYS_ADMIN)) 3180 return -EPERM; 3181 3182 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) 3183 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 3184 3185 vol_args = memdup_user(arg, sizeof(*vol_args)); 3186 if (IS_ERR(vol_args)) { 3187 ret = PTR_ERR(vol_args); 3188 goto out; 3189 } 3190 3191 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 3192 ret = btrfs_init_new_device(fs_info, vol_args->name); 3193 3194 if (!ret) 3195 btrfs_info(fs_info, "disk added %s", vol_args->name); 3196 3197 kfree(vol_args); 3198 out: 3199 btrfs_exclop_finish(fs_info); 3200 return ret; 3201 } 3202 3203 static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) 3204 { 3205 struct inode *inode = file_inode(file); 3206 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3207 struct btrfs_ioctl_vol_args_v2 *vol_args; 3208 int ret; 3209 bool cancel = false; 3210 3211 if (!capable(CAP_SYS_ADMIN)) 3212 return -EPERM; 3213 3214 ret = mnt_want_write_file(file); 3215 if (ret) 3216 return ret; 3217 3218 vol_args = memdup_user(arg, sizeof(*vol_args)); 3219 if (IS_ERR(vol_args)) { 3220 ret = PTR_ERR(vol_args); 3221 goto err_drop; 3222 } 3223 3224 if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) { 3225 ret = -EOPNOTSUPP; 3226 goto out; 3227 } 3228 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 3229 if (!(vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) && 3230 strcmp("cancel", vol_args->name) == 0) 3231 cancel = true; 3232 3233 ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, 3234 cancel); 3235 if (ret) 3236 goto out; 3237 /* Exclusive operation is now claimed */ 3238 3239 if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) 3240 ret = btrfs_rm_device(fs_info, NULL, vol_args->devid); 3241 else 3242 ret = btrfs_rm_device(fs_info, vol_args->name, 0); 3243 3244 btrfs_exclop_finish(fs_info); 3245 3246 if (!ret) { 3247 if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) 3248 btrfs_info(fs_info, "device deleted: id %llu", 3249 vol_args->devid); 3250 else 3251 btrfs_info(fs_info, "device deleted: %s", 3252 vol_args->name); 3253 } 3254 out: 3255 kfree(vol_args); 3256 err_drop: 3257 mnt_drop_write_file(file); 3258 return ret; 3259 } 3260 3261 static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) 3262 { 3263 struct inode *inode = file_inode(file); 3264 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3265 struct btrfs_ioctl_vol_args *vol_args; 3266 int ret; 3267 bool cancel; 3268 3269 if (!capable(CAP_SYS_ADMIN)) 3270 return -EPERM; 3271 3272 ret = mnt_want_write_file(file); 3273 if (ret) 3274 return ret; 3275 3276 vol_args = memdup_user(arg, sizeof(*vol_args)); 3277 if (IS_ERR(vol_args)) { 3278 ret = PTR_ERR(vol_args); 3279 goto out_drop_write; 3280 } 3281 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 3282 cancel = (strcmp("cancel", vol_args->name) == 0); 3283 3284 ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, 3285 cancel); 3286 if (ret == 0) { 3287 ret = btrfs_rm_device(fs_info, vol_args->name, 0); 3288 if (!ret) 3289 btrfs_info(fs_info, "disk deleted %s", vol_args->name); 3290 btrfs_exclop_finish(fs_info); 3291 } 3292 3293 kfree(vol_args); 3294 out_drop_write: 3295 mnt_drop_write_file(file); 3296 3297 return ret; 3298 } 3299 3300 static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, 3301 void __user *arg) 3302 { 3303 struct btrfs_ioctl_fs_info_args *fi_args; 3304 struct btrfs_device *device; 3305 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 3306 u64 flags_in; 3307 int ret = 0; 3308 3309 fi_args = memdup_user(arg, sizeof(*fi_args)); 3310 if (IS_ERR(fi_args)) 3311 return PTR_ERR(fi_args); 3312 3313 flags_in = fi_args->flags; 3314 memset(fi_args, 0, sizeof(*fi_args)); 3315 3316 rcu_read_lock(); 3317 fi_args->num_devices = fs_devices->num_devices; 3318 3319 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { 3320 if (device->devid > fi_args->max_id) 3321 fi_args->max_id = device->devid; 3322 } 3323 rcu_read_unlock(); 3324 3325 memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid)); 3326 fi_args->nodesize = fs_info->nodesize; 3327 fi_args->sectorsize = fs_info->sectorsize; 3328 fi_args->clone_alignment = fs_info->sectorsize; 3329 3330 if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) { 3331 fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy); 3332 fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy); 3333 fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO; 3334 } 3335 3336 if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) { 3337 fi_args->generation = fs_info->generation; 3338 fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION; 3339 } 3340 3341 if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) { 3342 memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid, 3343 sizeof(fi_args->metadata_uuid)); 3344 fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID; 3345 } 3346 3347 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 3348 ret = -EFAULT; 3349 3350 kfree(fi_args); 3351 return ret; 3352 } 3353 3354 static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, 3355 void __user *arg) 3356 { 3357 struct btrfs_ioctl_dev_info_args *di_args; 3358 struct btrfs_device *dev; 3359 int ret = 0; 3360 char *s_uuid = NULL; 3361 3362 di_args = memdup_user(arg, sizeof(*di_args)); 3363 if (IS_ERR(di_args)) 3364 return PTR_ERR(di_args); 3365 3366 if (!btrfs_is_empty_uuid(di_args->uuid)) 3367 s_uuid = di_args->uuid; 3368 3369 rcu_read_lock(); 3370 dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid, 3371 NULL); 3372 3373 if (!dev) { 3374 ret = -ENODEV; 3375 goto out; 3376 } 3377 3378 di_args->devid = dev->devid; 3379 di_args->bytes_used = btrfs_device_get_bytes_used(dev); 3380 di_args->total_bytes = btrfs_device_get_total_bytes(dev); 3381 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 3382 if (dev->name) { 3383 strncpy(di_args->path, rcu_str_deref(dev->name), 3384 sizeof(di_args->path) - 1); 3385 di_args->path[sizeof(di_args->path) - 1] = 0; 3386 } else { 3387 di_args->path[0] = '\0'; 3388 } 3389 3390 out: 3391 rcu_read_unlock(); 3392 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 3393 ret = -EFAULT; 3394 3395 kfree(di_args); 3396 return ret; 3397 } 3398 3399 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 3400 { 3401 struct inode *inode = file_inode(file); 3402 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3403 struct btrfs_root *root = BTRFS_I(inode)->root; 3404 struct btrfs_root *new_root; 3405 struct btrfs_dir_item *di; 3406 struct btrfs_trans_handle *trans; 3407 struct btrfs_path *path = NULL; 3408 struct btrfs_disk_key disk_key; 3409 u64 objectid = 0; 3410 u64 dir_id; 3411 int ret; 3412 3413 if (!capable(CAP_SYS_ADMIN)) 3414 return -EPERM; 3415 3416 ret = mnt_want_write_file(file); 3417 if (ret) 3418 return ret; 3419 3420 if (copy_from_user(&objectid, argp, sizeof(objectid))) { 3421 ret = -EFAULT; 3422 goto out; 3423 } 3424 3425 if (!objectid) 3426 objectid = BTRFS_FS_TREE_OBJECTID; 3427 3428 new_root = btrfs_get_fs_root(fs_info, objectid, true); 3429 if (IS_ERR(new_root)) { 3430 ret = PTR_ERR(new_root); 3431 goto out; 3432 } 3433 if (!is_fstree(new_root->root_key.objectid)) { 3434 ret = -ENOENT; 3435 goto out_free; 3436 } 3437 3438 path = btrfs_alloc_path(); 3439 if (!path) { 3440 ret = -ENOMEM; 3441 goto out_free; 3442 } 3443 3444 trans = btrfs_start_transaction(root, 1); 3445 if (IS_ERR(trans)) { 3446 ret = PTR_ERR(trans); 3447 goto out_free; 3448 } 3449 3450 dir_id = btrfs_super_root_dir(fs_info->super_copy); 3451 di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path, 3452 dir_id, "default", 7, 1); 3453 if (IS_ERR_OR_NULL(di)) { 3454 btrfs_release_path(path); 3455 btrfs_end_transaction(trans); 3456 btrfs_err(fs_info, 3457 "Umm, you don't have the default diritem, this isn't going to work"); 3458 ret = -ENOENT; 3459 goto out_free; 3460 } 3461 3462 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); 3463 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); 3464 btrfs_mark_buffer_dirty(path->nodes[0]); 3465 btrfs_release_path(path); 3466 3467 btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL); 3468 btrfs_end_transaction(trans); 3469 out_free: 3470 btrfs_put_root(new_root); 3471 btrfs_free_path(path); 3472 out: 3473 mnt_drop_write_file(file); 3474 return ret; 3475 } 3476 3477 static void get_block_group_info(struct list_head *groups_list, 3478 struct btrfs_ioctl_space_info *space) 3479 { 3480 struct btrfs_block_group *block_group; 3481 3482 space->total_bytes = 0; 3483 space->used_bytes = 0; 3484 space->flags = 0; 3485 list_for_each_entry(block_group, groups_list, list) { 3486 space->flags = block_group->flags; 3487 space->total_bytes += block_group->length; 3488 space->used_bytes += block_group->used; 3489 } 3490 } 3491 3492 static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info, 3493 void __user *arg) 3494 { 3495 struct btrfs_ioctl_space_args space_args; 3496 struct btrfs_ioctl_space_info space; 3497 struct btrfs_ioctl_space_info *dest; 3498 struct btrfs_ioctl_space_info *dest_orig; 3499 struct btrfs_ioctl_space_info __user *user_dest; 3500 struct btrfs_space_info *info; 3501 static const u64 types[] = { 3502 BTRFS_BLOCK_GROUP_DATA, 3503 BTRFS_BLOCK_GROUP_SYSTEM, 3504 BTRFS_BLOCK_GROUP_METADATA, 3505 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA 3506 }; 3507 int num_types = 4; 3508 int alloc_size; 3509 int ret = 0; 3510 u64 slot_count = 0; 3511 int i, c; 3512 3513 if (copy_from_user(&space_args, 3514 (struct btrfs_ioctl_space_args __user *)arg, 3515 sizeof(space_args))) 3516 return -EFAULT; 3517 3518 for (i = 0; i < num_types; i++) { 3519 struct btrfs_space_info *tmp; 3520 3521 info = NULL; 3522 list_for_each_entry(tmp, &fs_info->space_info, list) { 3523 if (tmp->flags == types[i]) { 3524 info = tmp; 3525 break; 3526 } 3527 } 3528 3529 if (!info) 3530 continue; 3531 3532 down_read(&info->groups_sem); 3533 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 3534 if (!list_empty(&info->block_groups[c])) 3535 slot_count++; 3536 } 3537 up_read(&info->groups_sem); 3538 } 3539 3540 /* 3541 * Global block reserve, exported as a space_info 3542 */ 3543 slot_count++; 3544 3545 /* space_slots == 0 means they are asking for a count */ 3546 if (space_args.space_slots == 0) { 3547 space_args.total_spaces = slot_count; 3548 goto out; 3549 } 3550 3551 slot_count = min_t(u64, space_args.space_slots, slot_count); 3552 3553 alloc_size = sizeof(*dest) * slot_count; 3554 3555 /* we generally have at most 6 or so space infos, one for each raid 3556 * level. So, a whole page should be more than enough for everyone 3557 */ 3558 if (alloc_size > PAGE_SIZE) 3559 return -ENOMEM; 3560 3561 space_args.total_spaces = 0; 3562 dest = kmalloc(alloc_size, GFP_KERNEL); 3563 if (!dest) 3564 return -ENOMEM; 3565 dest_orig = dest; 3566 3567 /* now we have a buffer to copy into */ 3568 for (i = 0; i < num_types; i++) { 3569 struct btrfs_space_info *tmp; 3570 3571 if (!slot_count) 3572 break; 3573 3574 info = NULL; 3575 list_for_each_entry(tmp, &fs_info->space_info, list) { 3576 if (tmp->flags == types[i]) { 3577 info = tmp; 3578 break; 3579 } 3580 } 3581 3582 if (!info) 3583 continue; 3584 down_read(&info->groups_sem); 3585 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 3586 if (!list_empty(&info->block_groups[c])) { 3587 get_block_group_info(&info->block_groups[c], 3588 &space); 3589 memcpy(dest, &space, sizeof(space)); 3590 dest++; 3591 space_args.total_spaces++; 3592 slot_count--; 3593 } 3594 if (!slot_count) 3595 break; 3596 } 3597 up_read(&info->groups_sem); 3598 } 3599 3600 /* 3601 * Add global block reserve 3602 */ 3603 if (slot_count) { 3604 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 3605 3606 spin_lock(&block_rsv->lock); 3607 space.total_bytes = block_rsv->size; 3608 space.used_bytes = block_rsv->size - block_rsv->reserved; 3609 spin_unlock(&block_rsv->lock); 3610 space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV; 3611 memcpy(dest, &space, sizeof(space)); 3612 space_args.total_spaces++; 3613 } 3614 3615 user_dest = (struct btrfs_ioctl_space_info __user *) 3616 (arg + sizeof(struct btrfs_ioctl_space_args)); 3617 3618 if (copy_to_user(user_dest, dest_orig, alloc_size)) 3619 ret = -EFAULT; 3620 3621 kfree(dest_orig); 3622 out: 3623 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 3624 ret = -EFAULT; 3625 3626 return ret; 3627 } 3628 3629 static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, 3630 void __user *argp) 3631 { 3632 struct btrfs_trans_handle *trans; 3633 u64 transid; 3634 int ret; 3635 3636 trans = btrfs_attach_transaction_barrier(root); 3637 if (IS_ERR(trans)) { 3638 if (PTR_ERR(trans) != -ENOENT) 3639 return PTR_ERR(trans); 3640 3641 /* No running transaction, don't bother */ 3642 transid = root->fs_info->last_trans_committed; 3643 goto out; 3644 } 3645 transid = trans->transid; 3646 ret = btrfs_commit_transaction_async(trans); 3647 if (ret) { 3648 btrfs_end_transaction(trans); 3649 return ret; 3650 } 3651 out: 3652 if (argp) 3653 if (copy_to_user(argp, &transid, sizeof(transid))) 3654 return -EFAULT; 3655 return 0; 3656 } 3657 3658 static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info, 3659 void __user *argp) 3660 { 3661 u64 transid; 3662 3663 if (argp) { 3664 if (copy_from_user(&transid, argp, sizeof(transid))) 3665 return -EFAULT; 3666 } else { 3667 transid = 0; /* current trans */ 3668 } 3669 return btrfs_wait_for_commit(fs_info, transid); 3670 } 3671 3672 static long btrfs_ioctl_scrub(struct file *file, void __user *arg) 3673 { 3674 struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb); 3675 struct btrfs_ioctl_scrub_args *sa; 3676 int ret; 3677 3678 if (!capable(CAP_SYS_ADMIN)) 3679 return -EPERM; 3680 3681 sa = memdup_user(arg, sizeof(*sa)); 3682 if (IS_ERR(sa)) 3683 return PTR_ERR(sa); 3684 3685 if (!(sa->flags & BTRFS_SCRUB_READONLY)) { 3686 ret = mnt_want_write_file(file); 3687 if (ret) 3688 goto out; 3689 } 3690 3691 ret = btrfs_scrub_dev(fs_info, sa->devid, sa->start, sa->end, 3692 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, 3693 0); 3694 3695 /* 3696 * Copy scrub args to user space even if btrfs_scrub_dev() returned an 3697 * error. This is important as it allows user space to know how much 3698 * progress scrub has done. For example, if scrub is canceled we get 3699 * -ECANCELED from btrfs_scrub_dev() and return that error back to user 3700 * space. Later user space can inspect the progress from the structure 3701 * btrfs_ioctl_scrub_args and resume scrub from where it left off 3702 * previously (btrfs-progs does this). 3703 * If we fail to copy the btrfs_ioctl_scrub_args structure to user space 3704 * then return -EFAULT to signal the structure was not copied or it may 3705 * be corrupt and unreliable due to a partial copy. 3706 */ 3707 if (copy_to_user(arg, sa, sizeof(*sa))) 3708 ret = -EFAULT; 3709 3710 if (!(sa->flags & BTRFS_SCRUB_READONLY)) 3711 mnt_drop_write_file(file); 3712 out: 3713 kfree(sa); 3714 return ret; 3715 } 3716 3717 static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info) 3718 { 3719 if (!capable(CAP_SYS_ADMIN)) 3720 return -EPERM; 3721 3722 return btrfs_scrub_cancel(fs_info); 3723 } 3724 3725 static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info, 3726 void __user *arg) 3727 { 3728 struct btrfs_ioctl_scrub_args *sa; 3729 int ret; 3730 3731 if (!capable(CAP_SYS_ADMIN)) 3732 return -EPERM; 3733 3734 sa = memdup_user(arg, sizeof(*sa)); 3735 if (IS_ERR(sa)) 3736 return PTR_ERR(sa); 3737 3738 ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress); 3739 3740 if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) 3741 ret = -EFAULT; 3742 3743 kfree(sa); 3744 return ret; 3745 } 3746 3747 static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info, 3748 void __user *arg) 3749 { 3750 struct btrfs_ioctl_get_dev_stats *sa; 3751 int ret; 3752 3753 sa = memdup_user(arg, sizeof(*sa)); 3754 if (IS_ERR(sa)) 3755 return PTR_ERR(sa); 3756 3757 if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { 3758 kfree(sa); 3759 return -EPERM; 3760 } 3761 3762 ret = btrfs_get_dev_stats(fs_info, sa); 3763 3764 if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) 3765 ret = -EFAULT; 3766 3767 kfree(sa); 3768 return ret; 3769 } 3770 3771 static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info, 3772 void __user *arg) 3773 { 3774 struct btrfs_ioctl_dev_replace_args *p; 3775 int ret; 3776 3777 if (!capable(CAP_SYS_ADMIN)) 3778 return -EPERM; 3779 3780 p = memdup_user(arg, sizeof(*p)); 3781 if (IS_ERR(p)) 3782 return PTR_ERR(p); 3783 3784 switch (p->cmd) { 3785 case BTRFS_IOCTL_DEV_REPLACE_CMD_START: 3786 if (sb_rdonly(fs_info->sb)) { 3787 ret = -EROFS; 3788 goto out; 3789 } 3790 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REPLACE)) { 3791 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 3792 } else { 3793 ret = btrfs_dev_replace_by_ioctl(fs_info, p); 3794 btrfs_exclop_finish(fs_info); 3795 } 3796 break; 3797 case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: 3798 btrfs_dev_replace_status(fs_info, p); 3799 ret = 0; 3800 break; 3801 case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: 3802 p->result = btrfs_dev_replace_cancel(fs_info); 3803 ret = 0; 3804 break; 3805 default: 3806 ret = -EINVAL; 3807 break; 3808 } 3809 3810 if ((ret == 0 || ret == -ECANCELED) && copy_to_user(arg, p, sizeof(*p))) 3811 ret = -EFAULT; 3812 out: 3813 kfree(p); 3814 return ret; 3815 } 3816 3817 static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) 3818 { 3819 int ret = 0; 3820 int i; 3821 u64 rel_ptr; 3822 int size; 3823 struct btrfs_ioctl_ino_path_args *ipa = NULL; 3824 struct inode_fs_paths *ipath = NULL; 3825 struct btrfs_path *path; 3826 3827 if (!capable(CAP_DAC_READ_SEARCH)) 3828 return -EPERM; 3829 3830 path = btrfs_alloc_path(); 3831 if (!path) { 3832 ret = -ENOMEM; 3833 goto out; 3834 } 3835 3836 ipa = memdup_user(arg, sizeof(*ipa)); 3837 if (IS_ERR(ipa)) { 3838 ret = PTR_ERR(ipa); 3839 ipa = NULL; 3840 goto out; 3841 } 3842 3843 size = min_t(u32, ipa->size, 4096); 3844 ipath = init_ipath(size, root, path); 3845 if (IS_ERR(ipath)) { 3846 ret = PTR_ERR(ipath); 3847 ipath = NULL; 3848 goto out; 3849 } 3850 3851 ret = paths_from_inode(ipa->inum, ipath); 3852 if (ret < 0) 3853 goto out; 3854 3855 for (i = 0; i < ipath->fspath->elem_cnt; ++i) { 3856 rel_ptr = ipath->fspath->val[i] - 3857 (u64)(unsigned long)ipath->fspath->val; 3858 ipath->fspath->val[i] = rel_ptr; 3859 } 3860 3861 ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, 3862 ipath->fspath, size); 3863 if (ret) { 3864 ret = -EFAULT; 3865 goto out; 3866 } 3867 3868 out: 3869 btrfs_free_path(path); 3870 free_ipath(ipath); 3871 kfree(ipa); 3872 3873 return ret; 3874 } 3875 3876 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) 3877 { 3878 struct btrfs_data_container *inodes = ctx; 3879 const size_t c = 3 * sizeof(u64); 3880 3881 if (inodes->bytes_left >= c) { 3882 inodes->bytes_left -= c; 3883 inodes->val[inodes->elem_cnt] = inum; 3884 inodes->val[inodes->elem_cnt + 1] = offset; 3885 inodes->val[inodes->elem_cnt + 2] = root; 3886 inodes->elem_cnt += 3; 3887 } else { 3888 inodes->bytes_missing += c - inodes->bytes_left; 3889 inodes->bytes_left = 0; 3890 inodes->elem_missed += 3; 3891 } 3892 3893 return 0; 3894 } 3895 3896 static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, 3897 void __user *arg, int version) 3898 { 3899 int ret = 0; 3900 int size; 3901 struct btrfs_ioctl_logical_ino_args *loi; 3902 struct btrfs_data_container *inodes = NULL; 3903 struct btrfs_path *path = NULL; 3904 bool ignore_offset; 3905 3906 if (!capable(CAP_SYS_ADMIN)) 3907 return -EPERM; 3908 3909 loi = memdup_user(arg, sizeof(*loi)); 3910 if (IS_ERR(loi)) 3911 return PTR_ERR(loi); 3912 3913 if (version == 1) { 3914 ignore_offset = false; 3915 size = min_t(u32, loi->size, SZ_64K); 3916 } else { 3917 /* All reserved bits must be 0 for now */ 3918 if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) { 3919 ret = -EINVAL; 3920 goto out_loi; 3921 } 3922 /* Only accept flags we have defined so far */ 3923 if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) { 3924 ret = -EINVAL; 3925 goto out_loi; 3926 } 3927 ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; 3928 size = min_t(u32, loi->size, SZ_16M); 3929 } 3930 3931 path = btrfs_alloc_path(); 3932 if (!path) { 3933 ret = -ENOMEM; 3934 goto out; 3935 } 3936 3937 inodes = init_data_container(size); 3938 if (IS_ERR(inodes)) { 3939 ret = PTR_ERR(inodes); 3940 inodes = NULL; 3941 goto out; 3942 } 3943 3944 ret = iterate_inodes_from_logical(loi->logical, fs_info, path, 3945 build_ino_list, inodes, ignore_offset); 3946 if (ret == -EINVAL) 3947 ret = -ENOENT; 3948 if (ret < 0) 3949 goto out; 3950 3951 ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes, 3952 size); 3953 if (ret) 3954 ret = -EFAULT; 3955 3956 out: 3957 btrfs_free_path(path); 3958 kvfree(inodes); 3959 out_loi: 3960 kfree(loi); 3961 3962 return ret; 3963 } 3964 3965 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, 3966 struct btrfs_ioctl_balance_args *bargs) 3967 { 3968 struct btrfs_balance_control *bctl = fs_info->balance_ctl; 3969 3970 bargs->flags = bctl->flags; 3971 3972 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) 3973 bargs->state |= BTRFS_BALANCE_STATE_RUNNING; 3974 if (atomic_read(&fs_info->balance_pause_req)) 3975 bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; 3976 if (atomic_read(&fs_info->balance_cancel_req)) 3977 bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; 3978 3979 memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); 3980 memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); 3981 memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); 3982 3983 spin_lock(&fs_info->balance_lock); 3984 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 3985 spin_unlock(&fs_info->balance_lock); 3986 } 3987 3988 static long btrfs_ioctl_balance(struct file *file, void __user *arg) 3989 { 3990 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 3991 struct btrfs_fs_info *fs_info = root->fs_info; 3992 struct btrfs_ioctl_balance_args *bargs; 3993 struct btrfs_balance_control *bctl; 3994 bool need_unlock; /* for mut. excl. ops lock */ 3995 int ret; 3996 3997 if (!capable(CAP_SYS_ADMIN)) 3998 return -EPERM; 3999 4000 ret = mnt_want_write_file(file); 4001 if (ret) 4002 return ret; 4003 4004 again: 4005 if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { 4006 mutex_lock(&fs_info->balance_mutex); 4007 need_unlock = true; 4008 goto locked; 4009 } 4010 4011 /* 4012 * mut. excl. ops lock is locked. Three possibilities: 4013 * (1) some other op is running 4014 * (2) balance is running 4015 * (3) balance is paused -- special case (think resume) 4016 */ 4017 mutex_lock(&fs_info->balance_mutex); 4018 if (fs_info->balance_ctl) { 4019 /* this is either (2) or (3) */ 4020 if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { 4021 mutex_unlock(&fs_info->balance_mutex); 4022 /* 4023 * Lock released to allow other waiters to continue, 4024 * we'll reexamine the status again. 4025 */ 4026 mutex_lock(&fs_info->balance_mutex); 4027 4028 if (fs_info->balance_ctl && 4029 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { 4030 /* this is (3) */ 4031 need_unlock = false; 4032 goto locked; 4033 } 4034 4035 mutex_unlock(&fs_info->balance_mutex); 4036 goto again; 4037 } else { 4038 /* this is (2) */ 4039 mutex_unlock(&fs_info->balance_mutex); 4040 ret = -EINPROGRESS; 4041 goto out; 4042 } 4043 } else { 4044 /* this is (1) */ 4045 mutex_unlock(&fs_info->balance_mutex); 4046 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 4047 goto out; 4048 } 4049 4050 locked: 4051 4052 if (arg) { 4053 bargs = memdup_user(arg, sizeof(*bargs)); 4054 if (IS_ERR(bargs)) { 4055 ret = PTR_ERR(bargs); 4056 goto out_unlock; 4057 } 4058 4059 if (bargs->flags & BTRFS_BALANCE_RESUME) { 4060 if (!fs_info->balance_ctl) { 4061 ret = -ENOTCONN; 4062 goto out_bargs; 4063 } 4064 4065 bctl = fs_info->balance_ctl; 4066 spin_lock(&fs_info->balance_lock); 4067 bctl->flags |= BTRFS_BALANCE_RESUME; 4068 spin_unlock(&fs_info->balance_lock); 4069 4070 goto do_balance; 4071 } 4072 } else { 4073 bargs = NULL; 4074 } 4075 4076 if (fs_info->balance_ctl) { 4077 ret = -EINPROGRESS; 4078 goto out_bargs; 4079 } 4080 4081 bctl = kzalloc(sizeof(*bctl), GFP_KERNEL); 4082 if (!bctl) { 4083 ret = -ENOMEM; 4084 goto out_bargs; 4085 } 4086 4087 if (arg) { 4088 memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); 4089 memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); 4090 memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); 4091 4092 bctl->flags = bargs->flags; 4093 } else { 4094 /* balance everything - no filters */ 4095 bctl->flags |= BTRFS_BALANCE_TYPE_MASK; 4096 } 4097 4098 if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) { 4099 ret = -EINVAL; 4100 goto out_bctl; 4101 } 4102 4103 do_balance: 4104 /* 4105 * Ownership of bctl and exclusive operation goes to btrfs_balance. 4106 * bctl is freed in reset_balance_state, or, if restriper was paused 4107 * all the way until unmount, in free_fs_info. The flag should be 4108 * cleared after reset_balance_state. 4109 */ 4110 need_unlock = false; 4111 4112 ret = btrfs_balance(fs_info, bctl, bargs); 4113 bctl = NULL; 4114 4115 if ((ret == 0 || ret == -ECANCELED) && arg) { 4116 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4117 ret = -EFAULT; 4118 } 4119 4120 out_bctl: 4121 kfree(bctl); 4122 out_bargs: 4123 kfree(bargs); 4124 out_unlock: 4125 mutex_unlock(&fs_info->balance_mutex); 4126 if (need_unlock) 4127 btrfs_exclop_finish(fs_info); 4128 out: 4129 mnt_drop_write_file(file); 4130 return ret; 4131 } 4132 4133 static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info *fs_info, int cmd) 4134 { 4135 if (!capable(CAP_SYS_ADMIN)) 4136 return -EPERM; 4137 4138 switch (cmd) { 4139 case BTRFS_BALANCE_CTL_PAUSE: 4140 return btrfs_pause_balance(fs_info); 4141 case BTRFS_BALANCE_CTL_CANCEL: 4142 return btrfs_cancel_balance(fs_info); 4143 } 4144 4145 return -EINVAL; 4146 } 4147 4148 static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info, 4149 void __user *arg) 4150 { 4151 struct btrfs_ioctl_balance_args *bargs; 4152 int ret = 0; 4153 4154 if (!capable(CAP_SYS_ADMIN)) 4155 return -EPERM; 4156 4157 mutex_lock(&fs_info->balance_mutex); 4158 if (!fs_info->balance_ctl) { 4159 ret = -ENOTCONN; 4160 goto out; 4161 } 4162 4163 bargs = kzalloc(sizeof(*bargs), GFP_KERNEL); 4164 if (!bargs) { 4165 ret = -ENOMEM; 4166 goto out; 4167 } 4168 4169 btrfs_update_ioctl_balance_args(fs_info, bargs); 4170 4171 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4172 ret = -EFAULT; 4173 4174 kfree(bargs); 4175 out: 4176 mutex_unlock(&fs_info->balance_mutex); 4177 return ret; 4178 } 4179 4180 static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) 4181 { 4182 struct inode *inode = file_inode(file); 4183 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4184 struct btrfs_ioctl_quota_ctl_args *sa; 4185 int ret; 4186 4187 if (!capable(CAP_SYS_ADMIN)) 4188 return -EPERM; 4189 4190 ret = mnt_want_write_file(file); 4191 if (ret) 4192 return ret; 4193 4194 sa = memdup_user(arg, sizeof(*sa)); 4195 if (IS_ERR(sa)) { 4196 ret = PTR_ERR(sa); 4197 goto drop_write; 4198 } 4199 4200 down_write(&fs_info->subvol_sem); 4201 4202 switch (sa->cmd) { 4203 case BTRFS_QUOTA_CTL_ENABLE: 4204 ret = btrfs_quota_enable(fs_info); 4205 break; 4206 case BTRFS_QUOTA_CTL_DISABLE: 4207 ret = btrfs_quota_disable(fs_info); 4208 break; 4209 default: 4210 ret = -EINVAL; 4211 break; 4212 } 4213 4214 kfree(sa); 4215 up_write(&fs_info->subvol_sem); 4216 drop_write: 4217 mnt_drop_write_file(file); 4218 return ret; 4219 } 4220 4221 static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) 4222 { 4223 struct inode *inode = file_inode(file); 4224 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4225 struct btrfs_root *root = BTRFS_I(inode)->root; 4226 struct btrfs_ioctl_qgroup_assign_args *sa; 4227 struct btrfs_trans_handle *trans; 4228 int ret; 4229 int err; 4230 4231 if (!capable(CAP_SYS_ADMIN)) 4232 return -EPERM; 4233 4234 ret = mnt_want_write_file(file); 4235 if (ret) 4236 return ret; 4237 4238 sa = memdup_user(arg, sizeof(*sa)); 4239 if (IS_ERR(sa)) { 4240 ret = PTR_ERR(sa); 4241 goto drop_write; 4242 } 4243 4244 trans = btrfs_join_transaction(root); 4245 if (IS_ERR(trans)) { 4246 ret = PTR_ERR(trans); 4247 goto out; 4248 } 4249 4250 if (sa->assign) { 4251 ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst); 4252 } else { 4253 ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst); 4254 } 4255 4256 /* update qgroup status and info */ 4257 err = btrfs_run_qgroups(trans); 4258 if (err < 0) 4259 btrfs_handle_fs_error(fs_info, err, 4260 "failed to update qgroup status and info"); 4261 err = btrfs_end_transaction(trans); 4262 if (err && !ret) 4263 ret = err; 4264 4265 out: 4266 kfree(sa); 4267 drop_write: 4268 mnt_drop_write_file(file); 4269 return ret; 4270 } 4271 4272 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) 4273 { 4274 struct inode *inode = file_inode(file); 4275 struct btrfs_root *root = BTRFS_I(inode)->root; 4276 struct btrfs_ioctl_qgroup_create_args *sa; 4277 struct btrfs_trans_handle *trans; 4278 int ret; 4279 int err; 4280 4281 if (!capable(CAP_SYS_ADMIN)) 4282 return -EPERM; 4283 4284 ret = mnt_want_write_file(file); 4285 if (ret) 4286 return ret; 4287 4288 sa = memdup_user(arg, sizeof(*sa)); 4289 if (IS_ERR(sa)) { 4290 ret = PTR_ERR(sa); 4291 goto drop_write; 4292 } 4293 4294 if (!sa->qgroupid) { 4295 ret = -EINVAL; 4296 goto out; 4297 } 4298 4299 trans = btrfs_join_transaction(root); 4300 if (IS_ERR(trans)) { 4301 ret = PTR_ERR(trans); 4302 goto out; 4303 } 4304 4305 if (sa->create) { 4306 ret = btrfs_create_qgroup(trans, sa->qgroupid); 4307 } else { 4308 ret = btrfs_remove_qgroup(trans, sa->qgroupid); 4309 } 4310 4311 err = btrfs_end_transaction(trans); 4312 if (err && !ret) 4313 ret = err; 4314 4315 out: 4316 kfree(sa); 4317 drop_write: 4318 mnt_drop_write_file(file); 4319 return ret; 4320 } 4321 4322 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) 4323 { 4324 struct inode *inode = file_inode(file); 4325 struct btrfs_root *root = BTRFS_I(inode)->root; 4326 struct btrfs_ioctl_qgroup_limit_args *sa; 4327 struct btrfs_trans_handle *trans; 4328 int ret; 4329 int err; 4330 u64 qgroupid; 4331 4332 if (!capable(CAP_SYS_ADMIN)) 4333 return -EPERM; 4334 4335 ret = mnt_want_write_file(file); 4336 if (ret) 4337 return ret; 4338 4339 sa = memdup_user(arg, sizeof(*sa)); 4340 if (IS_ERR(sa)) { 4341 ret = PTR_ERR(sa); 4342 goto drop_write; 4343 } 4344 4345 trans = btrfs_join_transaction(root); 4346 if (IS_ERR(trans)) { 4347 ret = PTR_ERR(trans); 4348 goto out; 4349 } 4350 4351 qgroupid = sa->qgroupid; 4352 if (!qgroupid) { 4353 /* take the current subvol as qgroup */ 4354 qgroupid = root->root_key.objectid; 4355 } 4356 4357 ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim); 4358 4359 err = btrfs_end_transaction(trans); 4360 if (err && !ret) 4361 ret = err; 4362 4363 out: 4364 kfree(sa); 4365 drop_write: 4366 mnt_drop_write_file(file); 4367 return ret; 4368 } 4369 4370 static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) 4371 { 4372 struct inode *inode = file_inode(file); 4373 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4374 struct btrfs_ioctl_quota_rescan_args *qsa; 4375 int ret; 4376 4377 if (!capable(CAP_SYS_ADMIN)) 4378 return -EPERM; 4379 4380 ret = mnt_want_write_file(file); 4381 if (ret) 4382 return ret; 4383 4384 qsa = memdup_user(arg, sizeof(*qsa)); 4385 if (IS_ERR(qsa)) { 4386 ret = PTR_ERR(qsa); 4387 goto drop_write; 4388 } 4389 4390 if (qsa->flags) { 4391 ret = -EINVAL; 4392 goto out; 4393 } 4394 4395 ret = btrfs_qgroup_rescan(fs_info); 4396 4397 out: 4398 kfree(qsa); 4399 drop_write: 4400 mnt_drop_write_file(file); 4401 return ret; 4402 } 4403 4404 static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info, 4405 void __user *arg) 4406 { 4407 struct btrfs_ioctl_quota_rescan_args *qsa; 4408 int ret = 0; 4409 4410 if (!capable(CAP_SYS_ADMIN)) 4411 return -EPERM; 4412 4413 qsa = kzalloc(sizeof(*qsa), GFP_KERNEL); 4414 if (!qsa) 4415 return -ENOMEM; 4416 4417 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 4418 qsa->flags = 1; 4419 qsa->progress = fs_info->qgroup_rescan_progress.objectid; 4420 } 4421 4422 if (copy_to_user(arg, qsa, sizeof(*qsa))) 4423 ret = -EFAULT; 4424 4425 kfree(qsa); 4426 return ret; 4427 } 4428 4429 static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info, 4430 void __user *arg) 4431 { 4432 if (!capable(CAP_SYS_ADMIN)) 4433 return -EPERM; 4434 4435 return btrfs_qgroup_wait_for_completion(fs_info, true); 4436 } 4437 4438 static long _btrfs_ioctl_set_received_subvol(struct file *file, 4439 struct btrfs_ioctl_received_subvol_args *sa) 4440 { 4441 struct inode *inode = file_inode(file); 4442 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4443 struct btrfs_root *root = BTRFS_I(inode)->root; 4444 struct btrfs_root_item *root_item = &root->root_item; 4445 struct btrfs_trans_handle *trans; 4446 struct timespec64 ct = current_time(inode); 4447 int ret = 0; 4448 int received_uuid_changed; 4449 4450 if (!inode_owner_or_capable(&init_user_ns, inode)) 4451 return -EPERM; 4452 4453 ret = mnt_want_write_file(file); 4454 if (ret < 0) 4455 return ret; 4456 4457 down_write(&fs_info->subvol_sem); 4458 4459 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { 4460 ret = -EINVAL; 4461 goto out; 4462 } 4463 4464 if (btrfs_root_readonly(root)) { 4465 ret = -EROFS; 4466 goto out; 4467 } 4468 4469 /* 4470 * 1 - root item 4471 * 2 - uuid items (received uuid + subvol uuid) 4472 */ 4473 trans = btrfs_start_transaction(root, 3); 4474 if (IS_ERR(trans)) { 4475 ret = PTR_ERR(trans); 4476 trans = NULL; 4477 goto out; 4478 } 4479 4480 sa->rtransid = trans->transid; 4481 sa->rtime.sec = ct.tv_sec; 4482 sa->rtime.nsec = ct.tv_nsec; 4483 4484 received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, 4485 BTRFS_UUID_SIZE); 4486 if (received_uuid_changed && 4487 !btrfs_is_empty_uuid(root_item->received_uuid)) { 4488 ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid, 4489 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4490 root->root_key.objectid); 4491 if (ret && ret != -ENOENT) { 4492 btrfs_abort_transaction(trans, ret); 4493 btrfs_end_transaction(trans); 4494 goto out; 4495 } 4496 } 4497 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); 4498 btrfs_set_root_stransid(root_item, sa->stransid); 4499 btrfs_set_root_rtransid(root_item, sa->rtransid); 4500 btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); 4501 btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); 4502 btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); 4503 btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); 4504 4505 ret = btrfs_update_root(trans, fs_info->tree_root, 4506 &root->root_key, &root->root_item); 4507 if (ret < 0) { 4508 btrfs_end_transaction(trans); 4509 goto out; 4510 } 4511 if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { 4512 ret = btrfs_uuid_tree_add(trans, sa->uuid, 4513 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4514 root->root_key.objectid); 4515 if (ret < 0 && ret != -EEXIST) { 4516 btrfs_abort_transaction(trans, ret); 4517 btrfs_end_transaction(trans); 4518 goto out; 4519 } 4520 } 4521 ret = btrfs_commit_transaction(trans); 4522 out: 4523 up_write(&fs_info->subvol_sem); 4524 mnt_drop_write_file(file); 4525 return ret; 4526 } 4527 4528 #ifdef CONFIG_64BIT 4529 static long btrfs_ioctl_set_received_subvol_32(struct file *file, 4530 void __user *arg) 4531 { 4532 struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; 4533 struct btrfs_ioctl_received_subvol_args *args64 = NULL; 4534 int ret = 0; 4535 4536 args32 = memdup_user(arg, sizeof(*args32)); 4537 if (IS_ERR(args32)) 4538 return PTR_ERR(args32); 4539 4540 args64 = kmalloc(sizeof(*args64), GFP_KERNEL); 4541 if (!args64) { 4542 ret = -ENOMEM; 4543 goto out; 4544 } 4545 4546 memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); 4547 args64->stransid = args32->stransid; 4548 args64->rtransid = args32->rtransid; 4549 args64->stime.sec = args32->stime.sec; 4550 args64->stime.nsec = args32->stime.nsec; 4551 args64->rtime.sec = args32->rtime.sec; 4552 args64->rtime.nsec = args32->rtime.nsec; 4553 args64->flags = args32->flags; 4554 4555 ret = _btrfs_ioctl_set_received_subvol(file, args64); 4556 if (ret) 4557 goto out; 4558 4559 memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); 4560 args32->stransid = args64->stransid; 4561 args32->rtransid = args64->rtransid; 4562 args32->stime.sec = args64->stime.sec; 4563 args32->stime.nsec = args64->stime.nsec; 4564 args32->rtime.sec = args64->rtime.sec; 4565 args32->rtime.nsec = args64->rtime.nsec; 4566 args32->flags = args64->flags; 4567 4568 ret = copy_to_user(arg, args32, sizeof(*args32)); 4569 if (ret) 4570 ret = -EFAULT; 4571 4572 out: 4573 kfree(args32); 4574 kfree(args64); 4575 return ret; 4576 } 4577 #endif 4578 4579 static long btrfs_ioctl_set_received_subvol(struct file *file, 4580 void __user *arg) 4581 { 4582 struct btrfs_ioctl_received_subvol_args *sa = NULL; 4583 int ret = 0; 4584 4585 sa = memdup_user(arg, sizeof(*sa)); 4586 if (IS_ERR(sa)) 4587 return PTR_ERR(sa); 4588 4589 ret = _btrfs_ioctl_set_received_subvol(file, sa); 4590 4591 if (ret) 4592 goto out; 4593 4594 ret = copy_to_user(arg, sa, sizeof(*sa)); 4595 if (ret) 4596 ret = -EFAULT; 4597 4598 out: 4599 kfree(sa); 4600 return ret; 4601 } 4602 4603 static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info, 4604 void __user *arg) 4605 { 4606 size_t len; 4607 int ret; 4608 char label[BTRFS_LABEL_SIZE]; 4609 4610 spin_lock(&fs_info->super_lock); 4611 memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE); 4612 spin_unlock(&fs_info->super_lock); 4613 4614 len = strnlen(label, BTRFS_LABEL_SIZE); 4615 4616 if (len == BTRFS_LABEL_SIZE) { 4617 btrfs_warn(fs_info, 4618 "label is too long, return the first %zu bytes", 4619 --len); 4620 } 4621 4622 ret = copy_to_user(arg, label, len); 4623 4624 return ret ? -EFAULT : 0; 4625 } 4626 4627 static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) 4628 { 4629 struct inode *inode = file_inode(file); 4630 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4631 struct btrfs_root *root = BTRFS_I(inode)->root; 4632 struct btrfs_super_block *super_block = fs_info->super_copy; 4633 struct btrfs_trans_handle *trans; 4634 char label[BTRFS_LABEL_SIZE]; 4635 int ret; 4636 4637 if (!capable(CAP_SYS_ADMIN)) 4638 return -EPERM; 4639 4640 if (copy_from_user(label, arg, sizeof(label))) 4641 return -EFAULT; 4642 4643 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { 4644 btrfs_err(fs_info, 4645 "unable to set label with more than %d bytes", 4646 BTRFS_LABEL_SIZE - 1); 4647 return -EINVAL; 4648 } 4649 4650 ret = mnt_want_write_file(file); 4651 if (ret) 4652 return ret; 4653 4654 trans = btrfs_start_transaction(root, 0); 4655 if (IS_ERR(trans)) { 4656 ret = PTR_ERR(trans); 4657 goto out_unlock; 4658 } 4659 4660 spin_lock(&fs_info->super_lock); 4661 strcpy(super_block->label, label); 4662 spin_unlock(&fs_info->super_lock); 4663 ret = btrfs_commit_transaction(trans); 4664 4665 out_unlock: 4666 mnt_drop_write_file(file); 4667 return ret; 4668 } 4669 4670 #define INIT_FEATURE_FLAGS(suffix) \ 4671 { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \ 4672 .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ 4673 .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } 4674 4675 int btrfs_ioctl_get_supported_features(void __user *arg) 4676 { 4677 static const struct btrfs_ioctl_feature_flags features[3] = { 4678 INIT_FEATURE_FLAGS(SUPP), 4679 INIT_FEATURE_FLAGS(SAFE_SET), 4680 INIT_FEATURE_FLAGS(SAFE_CLEAR) 4681 }; 4682 4683 if (copy_to_user(arg, &features, sizeof(features))) 4684 return -EFAULT; 4685 4686 return 0; 4687 } 4688 4689 static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info, 4690 void __user *arg) 4691 { 4692 struct btrfs_super_block *super_block = fs_info->super_copy; 4693 struct btrfs_ioctl_feature_flags features; 4694 4695 features.compat_flags = btrfs_super_compat_flags(super_block); 4696 features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block); 4697 features.incompat_flags = btrfs_super_incompat_flags(super_block); 4698 4699 if (copy_to_user(arg, &features, sizeof(features))) 4700 return -EFAULT; 4701 4702 return 0; 4703 } 4704 4705 static int check_feature_bits(struct btrfs_fs_info *fs_info, 4706 enum btrfs_feature_set set, 4707 u64 change_mask, u64 flags, u64 supported_flags, 4708 u64 safe_set, u64 safe_clear) 4709 { 4710 const char *type = btrfs_feature_set_name(set); 4711 char *names; 4712 u64 disallowed, unsupported; 4713 u64 set_mask = flags & change_mask; 4714 u64 clear_mask = ~flags & change_mask; 4715 4716 unsupported = set_mask & ~supported_flags; 4717 if (unsupported) { 4718 names = btrfs_printable_features(set, unsupported); 4719 if (names) { 4720 btrfs_warn(fs_info, 4721 "this kernel does not support the %s feature bit%s", 4722 names, strchr(names, ',') ? "s" : ""); 4723 kfree(names); 4724 } else 4725 btrfs_warn(fs_info, 4726 "this kernel does not support %s bits 0x%llx", 4727 type, unsupported); 4728 return -EOPNOTSUPP; 4729 } 4730 4731 disallowed = set_mask & ~safe_set; 4732 if (disallowed) { 4733 names = btrfs_printable_features(set, disallowed); 4734 if (names) { 4735 btrfs_warn(fs_info, 4736 "can't set the %s feature bit%s while mounted", 4737 names, strchr(names, ',') ? "s" : ""); 4738 kfree(names); 4739 } else 4740 btrfs_warn(fs_info, 4741 "can't set %s bits 0x%llx while mounted", 4742 type, disallowed); 4743 return -EPERM; 4744 } 4745 4746 disallowed = clear_mask & ~safe_clear; 4747 if (disallowed) { 4748 names = btrfs_printable_features(set, disallowed); 4749 if (names) { 4750 btrfs_warn(fs_info, 4751 "can't clear the %s feature bit%s while mounted", 4752 names, strchr(names, ',') ? "s" : ""); 4753 kfree(names); 4754 } else 4755 btrfs_warn(fs_info, 4756 "can't clear %s bits 0x%llx while mounted", 4757 type, disallowed); 4758 return -EPERM; 4759 } 4760 4761 return 0; 4762 } 4763 4764 #define check_feature(fs_info, change_mask, flags, mask_base) \ 4765 check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \ 4766 BTRFS_FEATURE_ ## mask_base ## _SUPP, \ 4767 BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \ 4768 BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR) 4769 4770 static int btrfs_ioctl_set_features(struct file *file, void __user *arg) 4771 { 4772 struct inode *inode = file_inode(file); 4773 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4774 struct btrfs_root *root = BTRFS_I(inode)->root; 4775 struct btrfs_super_block *super_block = fs_info->super_copy; 4776 struct btrfs_ioctl_feature_flags flags[2]; 4777 struct btrfs_trans_handle *trans; 4778 u64 newflags; 4779 int ret; 4780 4781 if (!capable(CAP_SYS_ADMIN)) 4782 return -EPERM; 4783 4784 if (copy_from_user(flags, arg, sizeof(flags))) 4785 return -EFAULT; 4786 4787 /* Nothing to do */ 4788 if (!flags[0].compat_flags && !flags[0].compat_ro_flags && 4789 !flags[0].incompat_flags) 4790 return 0; 4791 4792 ret = check_feature(fs_info, flags[0].compat_flags, 4793 flags[1].compat_flags, COMPAT); 4794 if (ret) 4795 return ret; 4796 4797 ret = check_feature(fs_info, flags[0].compat_ro_flags, 4798 flags[1].compat_ro_flags, COMPAT_RO); 4799 if (ret) 4800 return ret; 4801 4802 ret = check_feature(fs_info, flags[0].incompat_flags, 4803 flags[1].incompat_flags, INCOMPAT); 4804 if (ret) 4805 return ret; 4806 4807 ret = mnt_want_write_file(file); 4808 if (ret) 4809 return ret; 4810 4811 trans = btrfs_start_transaction(root, 0); 4812 if (IS_ERR(trans)) { 4813 ret = PTR_ERR(trans); 4814 goto out_drop_write; 4815 } 4816 4817 spin_lock(&fs_info->super_lock); 4818 newflags = btrfs_super_compat_flags(super_block); 4819 newflags |= flags[0].compat_flags & flags[1].compat_flags; 4820 newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags); 4821 btrfs_set_super_compat_flags(super_block, newflags); 4822 4823 newflags = btrfs_super_compat_ro_flags(super_block); 4824 newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags; 4825 newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags); 4826 btrfs_set_super_compat_ro_flags(super_block, newflags); 4827 4828 newflags = btrfs_super_incompat_flags(super_block); 4829 newflags |= flags[0].incompat_flags & flags[1].incompat_flags; 4830 newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags); 4831 btrfs_set_super_incompat_flags(super_block, newflags); 4832 spin_unlock(&fs_info->super_lock); 4833 4834 ret = btrfs_commit_transaction(trans); 4835 out_drop_write: 4836 mnt_drop_write_file(file); 4837 4838 return ret; 4839 } 4840 4841 static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat) 4842 { 4843 struct btrfs_ioctl_send_args *arg; 4844 int ret; 4845 4846 if (compat) { 4847 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 4848 struct btrfs_ioctl_send_args_32 args32; 4849 4850 ret = copy_from_user(&args32, argp, sizeof(args32)); 4851 if (ret) 4852 return -EFAULT; 4853 arg = kzalloc(sizeof(*arg), GFP_KERNEL); 4854 if (!arg) 4855 return -ENOMEM; 4856 arg->send_fd = args32.send_fd; 4857 arg->clone_sources_count = args32.clone_sources_count; 4858 arg->clone_sources = compat_ptr(args32.clone_sources); 4859 arg->parent_root = args32.parent_root; 4860 arg->flags = args32.flags; 4861 memcpy(arg->reserved, args32.reserved, 4862 sizeof(args32.reserved)); 4863 #else 4864 return -ENOTTY; 4865 #endif 4866 } else { 4867 arg = memdup_user(argp, sizeof(*arg)); 4868 if (IS_ERR(arg)) 4869 return PTR_ERR(arg); 4870 } 4871 ret = btrfs_ioctl_send(file, arg); 4872 kfree(arg); 4873 return ret; 4874 } 4875 4876 long btrfs_ioctl(struct file *file, unsigned int 4877 cmd, unsigned long arg) 4878 { 4879 struct inode *inode = file_inode(file); 4880 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4881 struct btrfs_root *root = BTRFS_I(inode)->root; 4882 void __user *argp = (void __user *)arg; 4883 4884 switch (cmd) { 4885 case FS_IOC_GETVERSION: 4886 return btrfs_ioctl_getversion(file, argp); 4887 case FS_IOC_GETFSLABEL: 4888 return btrfs_ioctl_get_fslabel(fs_info, argp); 4889 case FS_IOC_SETFSLABEL: 4890 return btrfs_ioctl_set_fslabel(file, argp); 4891 case FITRIM: 4892 return btrfs_ioctl_fitrim(fs_info, argp); 4893 case BTRFS_IOC_SNAP_CREATE: 4894 return btrfs_ioctl_snap_create(file, argp, 0); 4895 case BTRFS_IOC_SNAP_CREATE_V2: 4896 return btrfs_ioctl_snap_create_v2(file, argp, 0); 4897 case BTRFS_IOC_SUBVOL_CREATE: 4898 return btrfs_ioctl_snap_create(file, argp, 1); 4899 case BTRFS_IOC_SUBVOL_CREATE_V2: 4900 return btrfs_ioctl_snap_create_v2(file, argp, 1); 4901 case BTRFS_IOC_SNAP_DESTROY: 4902 return btrfs_ioctl_snap_destroy(file, argp, false); 4903 case BTRFS_IOC_SNAP_DESTROY_V2: 4904 return btrfs_ioctl_snap_destroy(file, argp, true); 4905 case BTRFS_IOC_SUBVOL_GETFLAGS: 4906 return btrfs_ioctl_subvol_getflags(file, argp); 4907 case BTRFS_IOC_SUBVOL_SETFLAGS: 4908 return btrfs_ioctl_subvol_setflags(file, argp); 4909 case BTRFS_IOC_DEFAULT_SUBVOL: 4910 return btrfs_ioctl_default_subvol(file, argp); 4911 case BTRFS_IOC_DEFRAG: 4912 return btrfs_ioctl_defrag(file, NULL); 4913 case BTRFS_IOC_DEFRAG_RANGE: 4914 return btrfs_ioctl_defrag(file, argp); 4915 case BTRFS_IOC_RESIZE: 4916 return btrfs_ioctl_resize(file, argp); 4917 case BTRFS_IOC_ADD_DEV: 4918 return btrfs_ioctl_add_dev(fs_info, argp); 4919 case BTRFS_IOC_RM_DEV: 4920 return btrfs_ioctl_rm_dev(file, argp); 4921 case BTRFS_IOC_RM_DEV_V2: 4922 return btrfs_ioctl_rm_dev_v2(file, argp); 4923 case BTRFS_IOC_FS_INFO: 4924 return btrfs_ioctl_fs_info(fs_info, argp); 4925 case BTRFS_IOC_DEV_INFO: 4926 return btrfs_ioctl_dev_info(fs_info, argp); 4927 case BTRFS_IOC_BALANCE: 4928 return btrfs_ioctl_balance(file, NULL); 4929 case BTRFS_IOC_TREE_SEARCH: 4930 return btrfs_ioctl_tree_search(file, argp); 4931 case BTRFS_IOC_TREE_SEARCH_V2: 4932 return btrfs_ioctl_tree_search_v2(file, argp); 4933 case BTRFS_IOC_INO_LOOKUP: 4934 return btrfs_ioctl_ino_lookup(file, argp); 4935 case BTRFS_IOC_INO_PATHS: 4936 return btrfs_ioctl_ino_to_path(root, argp); 4937 case BTRFS_IOC_LOGICAL_INO: 4938 return btrfs_ioctl_logical_to_ino(fs_info, argp, 1); 4939 case BTRFS_IOC_LOGICAL_INO_V2: 4940 return btrfs_ioctl_logical_to_ino(fs_info, argp, 2); 4941 case BTRFS_IOC_SPACE_INFO: 4942 return btrfs_ioctl_space_info(fs_info, argp); 4943 case BTRFS_IOC_SYNC: { 4944 int ret; 4945 4946 ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, false); 4947 if (ret) 4948 return ret; 4949 ret = btrfs_sync_fs(inode->i_sb, 1); 4950 /* 4951 * The transaction thread may want to do more work, 4952 * namely it pokes the cleaner kthread that will start 4953 * processing uncleaned subvols. 4954 */ 4955 wake_up_process(fs_info->transaction_kthread); 4956 return ret; 4957 } 4958 case BTRFS_IOC_START_SYNC: 4959 return btrfs_ioctl_start_sync(root, argp); 4960 case BTRFS_IOC_WAIT_SYNC: 4961 return btrfs_ioctl_wait_sync(fs_info, argp); 4962 case BTRFS_IOC_SCRUB: 4963 return btrfs_ioctl_scrub(file, argp); 4964 case BTRFS_IOC_SCRUB_CANCEL: 4965 return btrfs_ioctl_scrub_cancel(fs_info); 4966 case BTRFS_IOC_SCRUB_PROGRESS: 4967 return btrfs_ioctl_scrub_progress(fs_info, argp); 4968 case BTRFS_IOC_BALANCE_V2: 4969 return btrfs_ioctl_balance(file, argp); 4970 case BTRFS_IOC_BALANCE_CTL: 4971 return btrfs_ioctl_balance_ctl(fs_info, arg); 4972 case BTRFS_IOC_BALANCE_PROGRESS: 4973 return btrfs_ioctl_balance_progress(fs_info, argp); 4974 case BTRFS_IOC_SET_RECEIVED_SUBVOL: 4975 return btrfs_ioctl_set_received_subvol(file, argp); 4976 #ifdef CONFIG_64BIT 4977 case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: 4978 return btrfs_ioctl_set_received_subvol_32(file, argp); 4979 #endif 4980 case BTRFS_IOC_SEND: 4981 return _btrfs_ioctl_send(file, argp, false); 4982 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 4983 case BTRFS_IOC_SEND_32: 4984 return _btrfs_ioctl_send(file, argp, true); 4985 #endif 4986 case BTRFS_IOC_GET_DEV_STATS: 4987 return btrfs_ioctl_get_dev_stats(fs_info, argp); 4988 case BTRFS_IOC_QUOTA_CTL: 4989 return btrfs_ioctl_quota_ctl(file, argp); 4990 case BTRFS_IOC_QGROUP_ASSIGN: 4991 return btrfs_ioctl_qgroup_assign(file, argp); 4992 case BTRFS_IOC_QGROUP_CREATE: 4993 return btrfs_ioctl_qgroup_create(file, argp); 4994 case BTRFS_IOC_QGROUP_LIMIT: 4995 return btrfs_ioctl_qgroup_limit(file, argp); 4996 case BTRFS_IOC_QUOTA_RESCAN: 4997 return btrfs_ioctl_quota_rescan(file, argp); 4998 case BTRFS_IOC_QUOTA_RESCAN_STATUS: 4999 return btrfs_ioctl_quota_rescan_status(fs_info, argp); 5000 case BTRFS_IOC_QUOTA_RESCAN_WAIT: 5001 return btrfs_ioctl_quota_rescan_wait(fs_info, argp); 5002 case BTRFS_IOC_DEV_REPLACE: 5003 return btrfs_ioctl_dev_replace(fs_info, argp); 5004 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 5005 return btrfs_ioctl_get_supported_features(argp); 5006 case BTRFS_IOC_GET_FEATURES: 5007 return btrfs_ioctl_get_features(fs_info, argp); 5008 case BTRFS_IOC_SET_FEATURES: 5009 return btrfs_ioctl_set_features(file, argp); 5010 case BTRFS_IOC_GET_SUBVOL_INFO: 5011 return btrfs_ioctl_get_subvol_info(file, argp); 5012 case BTRFS_IOC_GET_SUBVOL_ROOTREF: 5013 return btrfs_ioctl_get_subvol_rootref(file, argp); 5014 case BTRFS_IOC_INO_LOOKUP_USER: 5015 return btrfs_ioctl_ino_lookup_user(file, argp); 5016 } 5017 5018 return -ENOTTY; 5019 } 5020 5021 #ifdef CONFIG_COMPAT 5022 long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 5023 { 5024 /* 5025 * These all access 32-bit values anyway so no further 5026 * handling is necessary. 5027 */ 5028 switch (cmd) { 5029 case FS_IOC32_GETVERSION: 5030 cmd = FS_IOC_GETVERSION; 5031 break; 5032 } 5033 5034 return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 5035 } 5036 #endif 5037