1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/buffer_head.h> 22 #include <linux/file.h> 23 #include <linux/fs.h> 24 #include <linux/fsnotify.h> 25 #include <linux/pagemap.h> 26 #include <linux/highmem.h> 27 #include <linux/time.h> 28 #include <linux/init.h> 29 #include <linux/string.h> 30 #include <linux/backing-dev.h> 31 #include <linux/mount.h> 32 #include <linux/mpage.h> 33 #include <linux/namei.h> 34 #include <linux/swap.h> 35 #include <linux/writeback.h> 36 #include <linux/statfs.h> 37 #include <linux/compat.h> 38 #include <linux/bit_spinlock.h> 39 #include <linux/security.h> 40 #include <linux/xattr.h> 41 #include <linux/vmalloc.h> 42 #include <linux/slab.h> 43 #include <linux/blkdev.h> 44 #include <linux/uuid.h> 45 #include "compat.h" 46 #include "ctree.h" 47 #include "disk-io.h" 48 #include "transaction.h" 49 #include "btrfs_inode.h" 50 #include "ioctl.h" 51 #include "print-tree.h" 52 #include "volumes.h" 53 #include "locking.h" 54 #include "inode-map.h" 55 #include "backref.h" 56 #include "rcu-string.h" 57 #include "send.h" 58 59 /* Mask out flags that are inappropriate for the given type of inode. */ 60 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 61 { 62 if (S_ISDIR(mode)) 63 return flags; 64 else if (S_ISREG(mode)) 65 return flags & ~FS_DIRSYNC_FL; 66 else 67 return flags & (FS_NODUMP_FL | FS_NOATIME_FL); 68 } 69 70 /* 71 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. 72 */ 73 static unsigned int btrfs_flags_to_ioctl(unsigned int flags) 74 { 75 unsigned int iflags = 0; 76 77 if (flags & BTRFS_INODE_SYNC) 78 iflags |= FS_SYNC_FL; 79 if (flags & BTRFS_INODE_IMMUTABLE) 80 iflags |= FS_IMMUTABLE_FL; 81 if (flags & BTRFS_INODE_APPEND) 82 iflags |= FS_APPEND_FL; 83 if (flags & BTRFS_INODE_NODUMP) 84 iflags |= FS_NODUMP_FL; 85 if (flags & BTRFS_INODE_NOATIME) 86 iflags |= FS_NOATIME_FL; 87 if (flags & BTRFS_INODE_DIRSYNC) 88 iflags |= FS_DIRSYNC_FL; 89 if (flags & BTRFS_INODE_NODATACOW) 90 iflags |= FS_NOCOW_FL; 91 92 if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS)) 93 iflags |= FS_COMPR_FL; 94 else if (flags & BTRFS_INODE_NOCOMPRESS) 95 iflags |= FS_NOCOMP_FL; 96 97 return iflags; 98 } 99 100 /* 101 * Update inode->i_flags based on the btrfs internal flags. 102 */ 103 void btrfs_update_iflags(struct inode *inode) 104 { 105 struct btrfs_inode *ip = BTRFS_I(inode); 106 107 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 108 109 if (ip->flags & BTRFS_INODE_SYNC) 110 inode->i_flags |= S_SYNC; 111 if (ip->flags & BTRFS_INODE_IMMUTABLE) 112 inode->i_flags |= S_IMMUTABLE; 113 if (ip->flags & BTRFS_INODE_APPEND) 114 inode->i_flags |= S_APPEND; 115 if (ip->flags & BTRFS_INODE_NOATIME) 116 inode->i_flags |= S_NOATIME; 117 if (ip->flags & BTRFS_INODE_DIRSYNC) 118 inode->i_flags |= S_DIRSYNC; 119 } 120 121 /* 122 * Inherit flags from the parent inode. 123 * 124 * Currently only the compression flags and the cow flags are inherited. 125 */ 126 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) 127 { 128 unsigned int flags; 129 130 if (!dir) 131 return; 132 133 flags = BTRFS_I(dir)->flags; 134 135 if (flags & BTRFS_INODE_NOCOMPRESS) { 136 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; 137 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 138 } else if (flags & BTRFS_INODE_COMPRESS) { 139 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 140 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; 141 } 142 143 if (flags & BTRFS_INODE_NODATACOW) 144 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 145 146 btrfs_update_iflags(inode); 147 } 148 149 static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 150 { 151 struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode); 152 unsigned int flags = btrfs_flags_to_ioctl(ip->flags); 153 154 if (copy_to_user(arg, &flags, sizeof(flags))) 155 return -EFAULT; 156 return 0; 157 } 158 159 static int check_flags(unsigned int flags) 160 { 161 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 162 FS_NOATIME_FL | FS_NODUMP_FL | \ 163 FS_SYNC_FL | FS_DIRSYNC_FL | \ 164 FS_NOCOMP_FL | FS_COMPR_FL | 165 FS_NOCOW_FL)) 166 return -EOPNOTSUPP; 167 168 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 169 return -EINVAL; 170 171 return 0; 172 } 173 174 static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 175 { 176 struct inode *inode = file->f_path.dentry->d_inode; 177 struct btrfs_inode *ip = BTRFS_I(inode); 178 struct btrfs_root *root = ip->root; 179 struct btrfs_trans_handle *trans; 180 unsigned int flags, oldflags; 181 int ret; 182 u64 ip_oldflags; 183 unsigned int i_oldflags; 184 185 if (btrfs_root_readonly(root)) 186 return -EROFS; 187 188 if (copy_from_user(&flags, arg, sizeof(flags))) 189 return -EFAULT; 190 191 ret = check_flags(flags); 192 if (ret) 193 return ret; 194 195 if (!inode_owner_or_capable(inode)) 196 return -EACCES; 197 198 ret = mnt_want_write_file(file); 199 if (ret) 200 return ret; 201 202 mutex_lock(&inode->i_mutex); 203 204 ip_oldflags = ip->flags; 205 i_oldflags = inode->i_flags; 206 207 flags = btrfs_mask_flags(inode->i_mode, flags); 208 oldflags = btrfs_flags_to_ioctl(ip->flags); 209 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 210 if (!capable(CAP_LINUX_IMMUTABLE)) { 211 ret = -EPERM; 212 goto out_unlock; 213 } 214 } 215 216 if (flags & FS_SYNC_FL) 217 ip->flags |= BTRFS_INODE_SYNC; 218 else 219 ip->flags &= ~BTRFS_INODE_SYNC; 220 if (flags & FS_IMMUTABLE_FL) 221 ip->flags |= BTRFS_INODE_IMMUTABLE; 222 else 223 ip->flags &= ~BTRFS_INODE_IMMUTABLE; 224 if (flags & FS_APPEND_FL) 225 ip->flags |= BTRFS_INODE_APPEND; 226 else 227 ip->flags &= ~BTRFS_INODE_APPEND; 228 if (flags & FS_NODUMP_FL) 229 ip->flags |= BTRFS_INODE_NODUMP; 230 else 231 ip->flags &= ~BTRFS_INODE_NODUMP; 232 if (flags & FS_NOATIME_FL) 233 ip->flags |= BTRFS_INODE_NOATIME; 234 else 235 ip->flags &= ~BTRFS_INODE_NOATIME; 236 if (flags & FS_DIRSYNC_FL) 237 ip->flags |= BTRFS_INODE_DIRSYNC; 238 else 239 ip->flags &= ~BTRFS_INODE_DIRSYNC; 240 if (flags & FS_NOCOW_FL) 241 ip->flags |= BTRFS_INODE_NODATACOW; 242 else 243 ip->flags &= ~BTRFS_INODE_NODATACOW; 244 245 /* 246 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 247 * flag may be changed automatically if compression code won't make 248 * things smaller. 249 */ 250 if (flags & FS_NOCOMP_FL) { 251 ip->flags &= ~BTRFS_INODE_COMPRESS; 252 ip->flags |= BTRFS_INODE_NOCOMPRESS; 253 } else if (flags & FS_COMPR_FL) { 254 ip->flags |= BTRFS_INODE_COMPRESS; 255 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 256 } else { 257 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 258 } 259 260 trans = btrfs_start_transaction(root, 1); 261 if (IS_ERR(trans)) { 262 ret = PTR_ERR(trans); 263 goto out_drop; 264 } 265 266 btrfs_update_iflags(inode); 267 inode_inc_iversion(inode); 268 inode->i_ctime = CURRENT_TIME; 269 ret = btrfs_update_inode(trans, root, inode); 270 271 btrfs_end_transaction(trans, root); 272 out_drop: 273 if (ret) { 274 ip->flags = ip_oldflags; 275 inode->i_flags = i_oldflags; 276 } 277 278 out_unlock: 279 mutex_unlock(&inode->i_mutex); 280 mnt_drop_write_file(file); 281 return ret; 282 } 283 284 static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 285 { 286 struct inode *inode = file->f_path.dentry->d_inode; 287 288 return put_user(inode->i_generation, arg); 289 } 290 291 static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) 292 { 293 struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb); 294 struct btrfs_device *device; 295 struct request_queue *q; 296 struct fstrim_range range; 297 u64 minlen = ULLONG_MAX; 298 u64 num_devices = 0; 299 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); 300 int ret; 301 302 if (!capable(CAP_SYS_ADMIN)) 303 return -EPERM; 304 305 rcu_read_lock(); 306 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, 307 dev_list) { 308 if (!device->bdev) 309 continue; 310 q = bdev_get_queue(device->bdev); 311 if (blk_queue_discard(q)) { 312 num_devices++; 313 minlen = min((u64)q->limits.discard_granularity, 314 minlen); 315 } 316 } 317 rcu_read_unlock(); 318 319 if (!num_devices) 320 return -EOPNOTSUPP; 321 if (copy_from_user(&range, arg, sizeof(range))) 322 return -EFAULT; 323 if (range.start > total_bytes) 324 return -EINVAL; 325 326 range.len = min(range.len, total_bytes - range.start); 327 range.minlen = max(range.minlen, minlen); 328 ret = btrfs_trim_fs(fs_info->tree_root, &range); 329 if (ret < 0) 330 return ret; 331 332 if (copy_to_user(arg, &range, sizeof(range))) 333 return -EFAULT; 334 335 return 0; 336 } 337 338 static noinline int create_subvol(struct btrfs_root *root, 339 struct dentry *dentry, 340 char *name, int namelen, 341 u64 *async_transid, 342 struct btrfs_qgroup_inherit **inherit) 343 { 344 struct btrfs_trans_handle *trans; 345 struct btrfs_key key; 346 struct btrfs_root_item root_item; 347 struct btrfs_inode_item *inode_item; 348 struct extent_buffer *leaf; 349 struct btrfs_root *new_root; 350 struct dentry *parent = dentry->d_parent; 351 struct inode *dir; 352 struct timespec cur_time = CURRENT_TIME; 353 int ret; 354 int err; 355 u64 objectid; 356 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 357 u64 index = 0; 358 uuid_le new_uuid; 359 360 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); 361 if (ret) 362 return ret; 363 364 dir = parent->d_inode; 365 366 /* 367 * 1 - inode item 368 * 2 - refs 369 * 1 - root item 370 * 2 - dir items 371 */ 372 trans = btrfs_start_transaction(root, 6); 373 if (IS_ERR(trans)) 374 return PTR_ERR(trans); 375 376 ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, 377 inherit ? *inherit : NULL); 378 if (ret) 379 goto fail; 380 381 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 382 0, objectid, NULL, 0, 0, 0); 383 if (IS_ERR(leaf)) { 384 ret = PTR_ERR(leaf); 385 goto fail; 386 } 387 388 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 389 btrfs_set_header_bytenr(leaf, leaf->start); 390 btrfs_set_header_generation(leaf, trans->transid); 391 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 392 btrfs_set_header_owner(leaf, objectid); 393 394 write_extent_buffer(leaf, root->fs_info->fsid, 395 (unsigned long)btrfs_header_fsid(leaf), 396 BTRFS_FSID_SIZE); 397 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 398 (unsigned long)btrfs_header_chunk_tree_uuid(leaf), 399 BTRFS_UUID_SIZE); 400 btrfs_mark_buffer_dirty(leaf); 401 402 memset(&root_item, 0, sizeof(root_item)); 403 404 inode_item = &root_item.inode; 405 inode_item->generation = cpu_to_le64(1); 406 inode_item->size = cpu_to_le64(3); 407 inode_item->nlink = cpu_to_le32(1); 408 inode_item->nbytes = cpu_to_le64(root->leafsize); 409 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 410 411 root_item.flags = 0; 412 root_item.byte_limit = 0; 413 inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); 414 415 btrfs_set_root_bytenr(&root_item, leaf->start); 416 btrfs_set_root_generation(&root_item, trans->transid); 417 btrfs_set_root_level(&root_item, 0); 418 btrfs_set_root_refs(&root_item, 1); 419 btrfs_set_root_used(&root_item, leaf->len); 420 btrfs_set_root_last_snapshot(&root_item, 0); 421 422 btrfs_set_root_generation_v2(&root_item, 423 btrfs_root_generation(&root_item)); 424 uuid_le_gen(&new_uuid); 425 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 426 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); 427 root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); 428 root_item.ctime = root_item.otime; 429 btrfs_set_root_ctransid(&root_item, trans->transid); 430 btrfs_set_root_otransid(&root_item, trans->transid); 431 432 btrfs_tree_unlock(leaf); 433 free_extent_buffer(leaf); 434 leaf = NULL; 435 436 btrfs_set_root_dirid(&root_item, new_dirid); 437 438 key.objectid = objectid; 439 key.offset = 0; 440 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 441 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 442 &root_item); 443 if (ret) 444 goto fail; 445 446 key.offset = (u64)-1; 447 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); 448 if (IS_ERR(new_root)) { 449 btrfs_abort_transaction(trans, root, PTR_ERR(new_root)); 450 ret = PTR_ERR(new_root); 451 goto fail; 452 } 453 454 btrfs_record_root_in_trans(trans, new_root); 455 456 ret = btrfs_create_subvol_root(trans, new_root, new_dirid); 457 if (ret) { 458 /* We potentially lose an unused inode item here */ 459 btrfs_abort_transaction(trans, root, ret); 460 goto fail; 461 } 462 463 /* 464 * insert the directory item 465 */ 466 ret = btrfs_set_inode_index(dir, &index); 467 if (ret) { 468 btrfs_abort_transaction(trans, root, ret); 469 goto fail; 470 } 471 472 ret = btrfs_insert_dir_item(trans, root, 473 name, namelen, dir, &key, 474 BTRFS_FT_DIR, index); 475 if (ret) { 476 btrfs_abort_transaction(trans, root, ret); 477 goto fail; 478 } 479 480 btrfs_i_size_write(dir, dir->i_size + namelen * 2); 481 ret = btrfs_update_inode(trans, root, dir); 482 BUG_ON(ret); 483 484 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 485 objectid, root->root_key.objectid, 486 btrfs_ino(dir), index, name, namelen); 487 488 BUG_ON(ret); 489 490 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 491 fail: 492 if (async_transid) { 493 *async_transid = trans->transid; 494 err = btrfs_commit_transaction_async(trans, root, 1); 495 } else { 496 err = btrfs_commit_transaction(trans, root); 497 } 498 if (err && !ret) 499 ret = err; 500 return ret; 501 } 502 503 static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 504 char *name, int namelen, u64 *async_transid, 505 bool readonly, struct btrfs_qgroup_inherit **inherit) 506 { 507 struct inode *inode; 508 struct btrfs_pending_snapshot *pending_snapshot; 509 struct btrfs_trans_handle *trans; 510 int ret; 511 512 if (!root->ref_cows) 513 return -EINVAL; 514 515 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 516 if (!pending_snapshot) 517 return -ENOMEM; 518 519 btrfs_init_block_rsv(&pending_snapshot->block_rsv); 520 pending_snapshot->dentry = dentry; 521 pending_snapshot->root = root; 522 pending_snapshot->readonly = readonly; 523 if (inherit) { 524 pending_snapshot->inherit = *inherit; 525 *inherit = NULL; /* take responsibility to free it */ 526 } 527 528 trans = btrfs_start_transaction(root->fs_info->extent_root, 5); 529 if (IS_ERR(trans)) { 530 ret = PTR_ERR(trans); 531 goto fail; 532 } 533 534 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); 535 BUG_ON(ret); 536 537 spin_lock(&root->fs_info->trans_lock); 538 list_add(&pending_snapshot->list, 539 &trans->transaction->pending_snapshots); 540 spin_unlock(&root->fs_info->trans_lock); 541 if (async_transid) { 542 *async_transid = trans->transid; 543 ret = btrfs_commit_transaction_async(trans, 544 root->fs_info->extent_root, 1); 545 } else { 546 ret = btrfs_commit_transaction(trans, 547 root->fs_info->extent_root); 548 } 549 BUG_ON(ret); 550 551 ret = pending_snapshot->error; 552 if (ret) 553 goto fail; 554 555 ret = btrfs_orphan_cleanup(pending_snapshot->snap); 556 if (ret) 557 goto fail; 558 559 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 560 if (IS_ERR(inode)) { 561 ret = PTR_ERR(inode); 562 goto fail; 563 } 564 BUG_ON(!inode); 565 d_instantiate(dentry, inode); 566 ret = 0; 567 fail: 568 kfree(pending_snapshot); 569 return ret; 570 } 571 572 /* copy of check_sticky in fs/namei.c() 573 * It's inline, so penalty for filesystems that don't use sticky bit is 574 * minimal. 575 */ 576 static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) 577 { 578 uid_t fsuid = current_fsuid(); 579 580 if (!(dir->i_mode & S_ISVTX)) 581 return 0; 582 if (inode->i_uid == fsuid) 583 return 0; 584 if (dir->i_uid == fsuid) 585 return 0; 586 return !capable(CAP_FOWNER); 587 } 588 589 /* copy of may_delete in fs/namei.c() 590 * Check whether we can remove a link victim from directory dir, check 591 * whether the type of victim is right. 592 * 1. We can't do it if dir is read-only (done in permission()) 593 * 2. We should have write and exec permissions on dir 594 * 3. We can't remove anything from append-only dir 595 * 4. We can't do anything with immutable dir (done in permission()) 596 * 5. If the sticky bit on dir is set we should either 597 * a. be owner of dir, or 598 * b. be owner of victim, or 599 * c. have CAP_FOWNER capability 600 * 6. If the victim is append-only or immutable we can't do antyhing with 601 * links pointing to it. 602 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 603 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 604 * 9. We can't remove a root or mountpoint. 605 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 606 * nfs_async_unlink(). 607 */ 608 609 static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) 610 { 611 int error; 612 613 if (!victim->d_inode) 614 return -ENOENT; 615 616 BUG_ON(victim->d_parent->d_inode != dir); 617 audit_inode_child(victim, dir); 618 619 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 620 if (error) 621 return error; 622 if (IS_APPEND(dir)) 623 return -EPERM; 624 if (btrfs_check_sticky(dir, victim->d_inode)|| 625 IS_APPEND(victim->d_inode)|| 626 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 627 return -EPERM; 628 if (isdir) { 629 if (!S_ISDIR(victim->d_inode->i_mode)) 630 return -ENOTDIR; 631 if (IS_ROOT(victim)) 632 return -EBUSY; 633 } else if (S_ISDIR(victim->d_inode->i_mode)) 634 return -EISDIR; 635 if (IS_DEADDIR(dir)) 636 return -ENOENT; 637 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 638 return -EBUSY; 639 return 0; 640 } 641 642 /* copy of may_create in fs/namei.c() */ 643 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 644 { 645 if (child->d_inode) 646 return -EEXIST; 647 if (IS_DEADDIR(dir)) 648 return -ENOENT; 649 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 650 } 651 652 /* 653 * Create a new subvolume below @parent. This is largely modeled after 654 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 655 * inside this filesystem so it's quite a bit simpler. 656 */ 657 static noinline int btrfs_mksubvol(struct path *parent, 658 char *name, int namelen, 659 struct btrfs_root *snap_src, 660 u64 *async_transid, bool readonly, 661 struct btrfs_qgroup_inherit **inherit) 662 { 663 struct inode *dir = parent->dentry->d_inode; 664 struct dentry *dentry; 665 int error; 666 667 error = mnt_want_write(parent->mnt); 668 if (error) 669 return error; 670 671 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 672 673 dentry = lookup_one_len(name, parent->dentry, namelen); 674 error = PTR_ERR(dentry); 675 if (IS_ERR(dentry)) 676 goto out_unlock; 677 678 error = -EEXIST; 679 if (dentry->d_inode) 680 goto out_dput; 681 682 error = btrfs_may_create(dir, dentry); 683 if (error) 684 goto out_dput; 685 686 down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 687 688 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) 689 goto out_up_read; 690 691 if (snap_src) { 692 error = create_snapshot(snap_src, dentry, name, namelen, 693 async_transid, readonly, inherit); 694 } else { 695 error = create_subvol(BTRFS_I(dir)->root, dentry, 696 name, namelen, async_transid, inherit); 697 } 698 if (!error) 699 fsnotify_mkdir(dir, dentry); 700 out_up_read: 701 up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 702 out_dput: 703 dput(dentry); 704 out_unlock: 705 mutex_unlock(&dir->i_mutex); 706 mnt_drop_write(parent->mnt); 707 return error; 708 } 709 710 /* 711 * When we're defragging a range, we don't want to kick it off again 712 * if it is really just waiting for delalloc to send it down. 713 * If we find a nice big extent or delalloc range for the bytes in the 714 * file you want to defrag, we return 0 to let you know to skip this 715 * part of the file 716 */ 717 static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) 718 { 719 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 720 struct extent_map *em = NULL; 721 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 722 u64 end; 723 724 read_lock(&em_tree->lock); 725 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); 726 read_unlock(&em_tree->lock); 727 728 if (em) { 729 end = extent_map_end(em); 730 free_extent_map(em); 731 if (end - offset > thresh) 732 return 0; 733 } 734 /* if we already have a nice delalloc here, just stop */ 735 thresh /= 2; 736 end = count_range_bits(io_tree, &offset, offset + thresh, 737 thresh, EXTENT_DELALLOC, 1); 738 if (end >= thresh) 739 return 0; 740 return 1; 741 } 742 743 /* 744 * helper function to walk through a file and find extents 745 * newer than a specific transid, and smaller than thresh. 746 * 747 * This is used by the defragging code to find new and small 748 * extents 749 */ 750 static int find_new_extents(struct btrfs_root *root, 751 struct inode *inode, u64 newer_than, 752 u64 *off, int thresh) 753 { 754 struct btrfs_path *path; 755 struct btrfs_key min_key; 756 struct btrfs_key max_key; 757 struct extent_buffer *leaf; 758 struct btrfs_file_extent_item *extent; 759 int type; 760 int ret; 761 u64 ino = btrfs_ino(inode); 762 763 path = btrfs_alloc_path(); 764 if (!path) 765 return -ENOMEM; 766 767 min_key.objectid = ino; 768 min_key.type = BTRFS_EXTENT_DATA_KEY; 769 min_key.offset = *off; 770 771 max_key.objectid = ino; 772 max_key.type = (u8)-1; 773 max_key.offset = (u64)-1; 774 775 path->keep_locks = 1; 776 777 while(1) { 778 ret = btrfs_search_forward(root, &min_key, &max_key, 779 path, 0, newer_than); 780 if (ret != 0) 781 goto none; 782 if (min_key.objectid != ino) 783 goto none; 784 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 785 goto none; 786 787 leaf = path->nodes[0]; 788 extent = btrfs_item_ptr(leaf, path->slots[0], 789 struct btrfs_file_extent_item); 790 791 type = btrfs_file_extent_type(leaf, extent); 792 if (type == BTRFS_FILE_EXTENT_REG && 793 btrfs_file_extent_num_bytes(leaf, extent) < thresh && 794 check_defrag_in_cache(inode, min_key.offset, thresh)) { 795 *off = min_key.offset; 796 btrfs_free_path(path); 797 return 0; 798 } 799 800 if (min_key.offset == (u64)-1) 801 goto none; 802 803 min_key.offset++; 804 btrfs_release_path(path); 805 } 806 none: 807 btrfs_free_path(path); 808 return -ENOENT; 809 } 810 811 static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) 812 { 813 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 814 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 815 struct extent_map *em; 816 u64 len = PAGE_CACHE_SIZE; 817 818 /* 819 * hopefully we have this extent in the tree already, try without 820 * the full extent lock 821 */ 822 read_lock(&em_tree->lock); 823 em = lookup_extent_mapping(em_tree, start, len); 824 read_unlock(&em_tree->lock); 825 826 if (!em) { 827 /* get the big lock and read metadata off disk */ 828 lock_extent(io_tree, start, start + len - 1); 829 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 830 unlock_extent(io_tree, start, start + len - 1); 831 832 if (IS_ERR(em)) 833 return NULL; 834 } 835 836 return em; 837 } 838 839 static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) 840 { 841 struct extent_map *next; 842 bool ret = true; 843 844 /* this is the last extent */ 845 if (em->start + em->len >= i_size_read(inode)) 846 return false; 847 848 next = defrag_lookup_extent(inode, em->start + em->len); 849 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) 850 ret = false; 851 852 free_extent_map(next); 853 return ret; 854 } 855 856 static int should_defrag_range(struct inode *inode, u64 start, int thresh, 857 u64 *last_len, u64 *skip, u64 *defrag_end, 858 int compress) 859 { 860 struct extent_map *em; 861 int ret = 1; 862 bool next_mergeable = true; 863 864 /* 865 * make sure that once we start defragging an extent, we keep on 866 * defragging it 867 */ 868 if (start < *defrag_end) 869 return 1; 870 871 *skip = 0; 872 873 em = defrag_lookup_extent(inode, start); 874 if (!em) 875 return 0; 876 877 /* this will cover holes, and inline extents */ 878 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 879 ret = 0; 880 goto out; 881 } 882 883 next_mergeable = defrag_check_next_extent(inode, em); 884 885 /* 886 * we hit a real extent, if it is big or the next extent is not a 887 * real extent, don't bother defragging it 888 */ 889 if (!compress && (*last_len == 0 || *last_len >= thresh) && 890 (em->len >= thresh || !next_mergeable)) 891 ret = 0; 892 out: 893 /* 894 * last_len ends up being a counter of how many bytes we've defragged. 895 * every time we choose not to defrag an extent, we reset *last_len 896 * so that the next tiny extent will force a defrag. 897 * 898 * The end result of this is that tiny extents before a single big 899 * extent will force at least part of that big extent to be defragged. 900 */ 901 if (ret) { 902 *defrag_end = extent_map_end(em); 903 } else { 904 *last_len = 0; 905 *skip = extent_map_end(em); 906 *defrag_end = 0; 907 } 908 909 free_extent_map(em); 910 return ret; 911 } 912 913 /* 914 * it doesn't do much good to defrag one or two pages 915 * at a time. This pulls in a nice chunk of pages 916 * to COW and defrag. 917 * 918 * It also makes sure the delalloc code has enough 919 * dirty data to avoid making new small extents as part 920 * of the defrag 921 * 922 * It's a good idea to start RA on this range 923 * before calling this. 924 */ 925 static int cluster_pages_for_defrag(struct inode *inode, 926 struct page **pages, 927 unsigned long start_index, 928 int num_pages) 929 { 930 unsigned long file_end; 931 u64 isize = i_size_read(inode); 932 u64 page_start; 933 u64 page_end; 934 u64 page_cnt; 935 int ret; 936 int i; 937 int i_done; 938 struct btrfs_ordered_extent *ordered; 939 struct extent_state *cached_state = NULL; 940 struct extent_io_tree *tree; 941 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 942 943 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 944 if (!isize || start_index > file_end) 945 return 0; 946 947 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); 948 949 ret = btrfs_delalloc_reserve_space(inode, 950 page_cnt << PAGE_CACHE_SHIFT); 951 if (ret) 952 return ret; 953 i_done = 0; 954 tree = &BTRFS_I(inode)->io_tree; 955 956 /* step one, lock all the pages */ 957 for (i = 0; i < page_cnt; i++) { 958 struct page *page; 959 again: 960 page = find_or_create_page(inode->i_mapping, 961 start_index + i, mask); 962 if (!page) 963 break; 964 965 page_start = page_offset(page); 966 page_end = page_start + PAGE_CACHE_SIZE - 1; 967 while (1) { 968 lock_extent(tree, page_start, page_end); 969 ordered = btrfs_lookup_ordered_extent(inode, 970 page_start); 971 unlock_extent(tree, page_start, page_end); 972 if (!ordered) 973 break; 974 975 unlock_page(page); 976 btrfs_start_ordered_extent(inode, ordered, 1); 977 btrfs_put_ordered_extent(ordered); 978 lock_page(page); 979 /* 980 * we unlocked the page above, so we need check if 981 * it was released or not. 982 */ 983 if (page->mapping != inode->i_mapping) { 984 unlock_page(page); 985 page_cache_release(page); 986 goto again; 987 } 988 } 989 990 if (!PageUptodate(page)) { 991 btrfs_readpage(NULL, page); 992 lock_page(page); 993 if (!PageUptodate(page)) { 994 unlock_page(page); 995 page_cache_release(page); 996 ret = -EIO; 997 break; 998 } 999 } 1000 1001 if (page->mapping != inode->i_mapping) { 1002 unlock_page(page); 1003 page_cache_release(page); 1004 goto again; 1005 } 1006 1007 pages[i] = page; 1008 i_done++; 1009 } 1010 if (!i_done || ret) 1011 goto out; 1012 1013 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1014 goto out; 1015 1016 /* 1017 * so now we have a nice long stream of locked 1018 * and up to date pages, lets wait on them 1019 */ 1020 for (i = 0; i < i_done; i++) 1021 wait_on_page_writeback(pages[i]); 1022 1023 page_start = page_offset(pages[0]); 1024 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; 1025 1026 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1027 page_start, page_end - 1, 0, &cached_state); 1028 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 1029 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1030 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, 1031 GFP_NOFS); 1032 1033 if (i_done != page_cnt) { 1034 spin_lock(&BTRFS_I(inode)->lock); 1035 BTRFS_I(inode)->outstanding_extents++; 1036 spin_unlock(&BTRFS_I(inode)->lock); 1037 btrfs_delalloc_release_space(inode, 1038 (page_cnt - i_done) << PAGE_CACHE_SHIFT); 1039 } 1040 1041 1042 btrfs_set_extent_delalloc(inode, page_start, page_end - 1, 1043 &cached_state); 1044 1045 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1046 page_start, page_end - 1, &cached_state, 1047 GFP_NOFS); 1048 1049 for (i = 0; i < i_done; i++) { 1050 clear_page_dirty_for_io(pages[i]); 1051 ClearPageChecked(pages[i]); 1052 set_page_extent_mapped(pages[i]); 1053 set_page_dirty(pages[i]); 1054 unlock_page(pages[i]); 1055 page_cache_release(pages[i]); 1056 } 1057 return i_done; 1058 out: 1059 for (i = 0; i < i_done; i++) { 1060 unlock_page(pages[i]); 1061 page_cache_release(pages[i]); 1062 } 1063 btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT); 1064 return ret; 1065 1066 } 1067 1068 int btrfs_defrag_file(struct inode *inode, struct file *file, 1069 struct btrfs_ioctl_defrag_range_args *range, 1070 u64 newer_than, unsigned long max_to_defrag) 1071 { 1072 struct btrfs_root *root = BTRFS_I(inode)->root; 1073 struct file_ra_state *ra = NULL; 1074 unsigned long last_index; 1075 u64 isize = i_size_read(inode); 1076 u64 last_len = 0; 1077 u64 skip = 0; 1078 u64 defrag_end = 0; 1079 u64 newer_off = range->start; 1080 unsigned long i; 1081 unsigned long ra_index = 0; 1082 int ret; 1083 int defrag_count = 0; 1084 int compress_type = BTRFS_COMPRESS_ZLIB; 1085 int extent_thresh = range->extent_thresh; 1086 int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; 1087 int cluster = max_cluster; 1088 u64 new_align = ~((u64)128 * 1024 - 1); 1089 struct page **pages = NULL; 1090 1091 if (extent_thresh == 0) 1092 extent_thresh = 256 * 1024; 1093 1094 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1095 if (range->compress_type > BTRFS_COMPRESS_TYPES) 1096 return -EINVAL; 1097 if (range->compress_type) 1098 compress_type = range->compress_type; 1099 } 1100 1101 if (isize == 0) 1102 return 0; 1103 1104 /* 1105 * if we were not given a file, allocate a readahead 1106 * context 1107 */ 1108 if (!file) { 1109 ra = kzalloc(sizeof(*ra), GFP_NOFS); 1110 if (!ra) 1111 return -ENOMEM; 1112 file_ra_state_init(ra, inode->i_mapping); 1113 } else { 1114 ra = &file->f_ra; 1115 } 1116 1117 pages = kmalloc(sizeof(struct page *) * max_cluster, 1118 GFP_NOFS); 1119 if (!pages) { 1120 ret = -ENOMEM; 1121 goto out_ra; 1122 } 1123 1124 /* find the last page to defrag */ 1125 if (range->start + range->len > range->start) { 1126 last_index = min_t(u64, isize - 1, 1127 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 1128 } else { 1129 last_index = (isize - 1) >> PAGE_CACHE_SHIFT; 1130 } 1131 1132 if (newer_than) { 1133 ret = find_new_extents(root, inode, newer_than, 1134 &newer_off, 64 * 1024); 1135 if (!ret) { 1136 range->start = newer_off; 1137 /* 1138 * we always align our defrag to help keep 1139 * the extents in the file evenly spaced 1140 */ 1141 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; 1142 } else 1143 goto out_ra; 1144 } else { 1145 i = range->start >> PAGE_CACHE_SHIFT; 1146 } 1147 if (!max_to_defrag) 1148 max_to_defrag = last_index + 1; 1149 1150 /* 1151 * make writeback starts from i, so the defrag range can be 1152 * written sequentially. 1153 */ 1154 if (i < inode->i_mapping->writeback_index) 1155 inode->i_mapping->writeback_index = i; 1156 1157 while (i <= last_index && defrag_count < max_to_defrag && 1158 (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 1159 PAGE_CACHE_SHIFT)) { 1160 /* 1161 * make sure we stop running if someone unmounts 1162 * the FS 1163 */ 1164 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1165 break; 1166 1167 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1168 extent_thresh, &last_len, &skip, 1169 &defrag_end, range->flags & 1170 BTRFS_DEFRAG_RANGE_COMPRESS)) { 1171 unsigned long next; 1172 /* 1173 * the should_defrag function tells us how much to skip 1174 * bump our counter by the suggested amount 1175 */ 1176 next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1177 i = max(i + 1, next); 1178 continue; 1179 } 1180 1181 if (!newer_than) { 1182 cluster = (PAGE_CACHE_ALIGN(defrag_end) >> 1183 PAGE_CACHE_SHIFT) - i; 1184 cluster = min(cluster, max_cluster); 1185 } else { 1186 cluster = max_cluster; 1187 } 1188 1189 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 1190 BTRFS_I(inode)->force_compress = compress_type; 1191 1192 if (i + cluster > ra_index) { 1193 ra_index = max(i, ra_index); 1194 btrfs_force_ra(inode->i_mapping, ra, file, ra_index, 1195 cluster); 1196 ra_index += max_cluster; 1197 } 1198 1199 mutex_lock(&inode->i_mutex); 1200 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1201 if (ret < 0) { 1202 mutex_unlock(&inode->i_mutex); 1203 goto out_ra; 1204 } 1205 1206 defrag_count += ret; 1207 balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); 1208 mutex_unlock(&inode->i_mutex); 1209 1210 if (newer_than) { 1211 if (newer_off == (u64)-1) 1212 break; 1213 1214 if (ret > 0) 1215 i += ret; 1216 1217 newer_off = max(newer_off + 1, 1218 (u64)i << PAGE_CACHE_SHIFT); 1219 1220 ret = find_new_extents(root, inode, 1221 newer_than, &newer_off, 1222 64 * 1024); 1223 if (!ret) { 1224 range->start = newer_off; 1225 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; 1226 } else { 1227 break; 1228 } 1229 } else { 1230 if (ret > 0) { 1231 i += ret; 1232 last_len += ret << PAGE_CACHE_SHIFT; 1233 } else { 1234 i++; 1235 last_len = 0; 1236 } 1237 } 1238 } 1239 1240 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) 1241 filemap_flush(inode->i_mapping); 1242 1243 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 1244 /* the filemap_flush will queue IO into the worker threads, but 1245 * we have to make sure the IO is actually started and that 1246 * ordered extents get created before we return 1247 */ 1248 atomic_inc(&root->fs_info->async_submit_draining); 1249 while (atomic_read(&root->fs_info->nr_async_submits) || 1250 atomic_read(&root->fs_info->async_delalloc_pages)) { 1251 wait_event(root->fs_info->async_submit_wait, 1252 (atomic_read(&root->fs_info->nr_async_submits) == 0 && 1253 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 1254 } 1255 atomic_dec(&root->fs_info->async_submit_draining); 1256 1257 mutex_lock(&inode->i_mutex); 1258 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 1259 mutex_unlock(&inode->i_mutex); 1260 } 1261 1262 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1263 btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO); 1264 } 1265 1266 ret = defrag_count; 1267 1268 out_ra: 1269 if (!file) 1270 kfree(ra); 1271 kfree(pages); 1272 return ret; 1273 } 1274 1275 static noinline int btrfs_ioctl_resize(struct btrfs_root *root, 1276 void __user *arg) 1277 { 1278 u64 new_size; 1279 u64 old_size; 1280 u64 devid = 1; 1281 struct btrfs_ioctl_vol_args *vol_args; 1282 struct btrfs_trans_handle *trans; 1283 struct btrfs_device *device = NULL; 1284 char *sizestr; 1285 char *devstr = NULL; 1286 int ret = 0; 1287 int mod = 0; 1288 1289 if (root->fs_info->sb->s_flags & MS_RDONLY) 1290 return -EROFS; 1291 1292 if (!capable(CAP_SYS_ADMIN)) 1293 return -EPERM; 1294 1295 mutex_lock(&root->fs_info->volume_mutex); 1296 if (root->fs_info->balance_ctl) { 1297 printk(KERN_INFO "btrfs: balance in progress\n"); 1298 ret = -EINVAL; 1299 goto out; 1300 } 1301 1302 vol_args = memdup_user(arg, sizeof(*vol_args)); 1303 if (IS_ERR(vol_args)) { 1304 ret = PTR_ERR(vol_args); 1305 goto out; 1306 } 1307 1308 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1309 1310 sizestr = vol_args->name; 1311 devstr = strchr(sizestr, ':'); 1312 if (devstr) { 1313 char *end; 1314 sizestr = devstr + 1; 1315 *devstr = '\0'; 1316 devstr = vol_args->name; 1317 devid = simple_strtoull(devstr, &end, 10); 1318 printk(KERN_INFO "btrfs: resizing devid %llu\n", 1319 (unsigned long long)devid); 1320 } 1321 device = btrfs_find_device(root, devid, NULL, NULL); 1322 if (!device) { 1323 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1324 (unsigned long long)devid); 1325 ret = -EINVAL; 1326 goto out_free; 1327 } 1328 if (device->fs_devices && device->fs_devices->seeding) { 1329 printk(KERN_INFO "btrfs: resizer unable to apply on " 1330 "seeding device %llu\n", 1331 (unsigned long long)devid); 1332 ret = -EINVAL; 1333 goto out_free; 1334 } 1335 1336 if (!strcmp(sizestr, "max")) 1337 new_size = device->bdev->bd_inode->i_size; 1338 else { 1339 if (sizestr[0] == '-') { 1340 mod = -1; 1341 sizestr++; 1342 } else if (sizestr[0] == '+') { 1343 mod = 1; 1344 sizestr++; 1345 } 1346 new_size = memparse(sizestr, NULL); 1347 if (new_size == 0) { 1348 ret = -EINVAL; 1349 goto out_free; 1350 } 1351 } 1352 1353 old_size = device->total_bytes; 1354 1355 if (mod < 0) { 1356 if (new_size > old_size) { 1357 ret = -EINVAL; 1358 goto out_free; 1359 } 1360 new_size = old_size - new_size; 1361 } else if (mod > 0) { 1362 new_size = old_size + new_size; 1363 } 1364 1365 if (new_size < 256 * 1024 * 1024) { 1366 ret = -EINVAL; 1367 goto out_free; 1368 } 1369 if (new_size > device->bdev->bd_inode->i_size) { 1370 ret = -EFBIG; 1371 goto out_free; 1372 } 1373 1374 do_div(new_size, root->sectorsize); 1375 new_size *= root->sectorsize; 1376 1377 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", 1378 rcu_str_deref(device->name), 1379 (unsigned long long)new_size); 1380 1381 if (new_size > old_size) { 1382 trans = btrfs_start_transaction(root, 0); 1383 if (IS_ERR(trans)) { 1384 ret = PTR_ERR(trans); 1385 goto out_free; 1386 } 1387 ret = btrfs_grow_device(trans, device, new_size); 1388 btrfs_commit_transaction(trans, root); 1389 } else if (new_size < old_size) { 1390 ret = btrfs_shrink_device(device, new_size); 1391 } 1392 1393 out_free: 1394 kfree(vol_args); 1395 out: 1396 mutex_unlock(&root->fs_info->volume_mutex); 1397 return ret; 1398 } 1399 1400 static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1401 char *name, unsigned long fd, int subvol, 1402 u64 *transid, bool readonly, 1403 struct btrfs_qgroup_inherit **inherit) 1404 { 1405 struct file *src_file; 1406 int namelen; 1407 int ret = 0; 1408 1409 ret = mnt_want_write_file(file); 1410 if (ret) 1411 goto out; 1412 1413 namelen = strlen(name); 1414 if (strchr(name, '/')) { 1415 ret = -EINVAL; 1416 goto out_drop_write; 1417 } 1418 1419 if (name[0] == '.' && 1420 (namelen == 1 || (name[1] == '.' && namelen == 2))) { 1421 ret = -EEXIST; 1422 goto out_drop_write; 1423 } 1424 1425 if (subvol) { 1426 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1427 NULL, transid, readonly, inherit); 1428 } else { 1429 struct inode *src_inode; 1430 src_file = fget(fd); 1431 if (!src_file) { 1432 ret = -EINVAL; 1433 goto out_drop_write; 1434 } 1435 1436 src_inode = src_file->f_path.dentry->d_inode; 1437 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { 1438 printk(KERN_INFO "btrfs: Snapshot src from " 1439 "another FS\n"); 1440 ret = -EINVAL; 1441 fput(src_file); 1442 goto out_drop_write; 1443 } 1444 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1445 BTRFS_I(src_inode)->root, 1446 transid, readonly, inherit); 1447 fput(src_file); 1448 } 1449 out_drop_write: 1450 mnt_drop_write_file(file); 1451 out: 1452 return ret; 1453 } 1454 1455 static noinline int btrfs_ioctl_snap_create(struct file *file, 1456 void __user *arg, int subvol) 1457 { 1458 struct btrfs_ioctl_vol_args *vol_args; 1459 int ret; 1460 1461 vol_args = memdup_user(arg, sizeof(*vol_args)); 1462 if (IS_ERR(vol_args)) 1463 return PTR_ERR(vol_args); 1464 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1465 1466 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1467 vol_args->fd, subvol, 1468 NULL, false, NULL); 1469 1470 kfree(vol_args); 1471 return ret; 1472 } 1473 1474 static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 1475 void __user *arg, int subvol) 1476 { 1477 struct btrfs_ioctl_vol_args_v2 *vol_args; 1478 int ret; 1479 u64 transid = 0; 1480 u64 *ptr = NULL; 1481 bool readonly = false; 1482 struct btrfs_qgroup_inherit *inherit = NULL; 1483 1484 vol_args = memdup_user(arg, sizeof(*vol_args)); 1485 if (IS_ERR(vol_args)) 1486 return PTR_ERR(vol_args); 1487 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1488 1489 if (vol_args->flags & 1490 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | 1491 BTRFS_SUBVOL_QGROUP_INHERIT)) { 1492 ret = -EOPNOTSUPP; 1493 goto out; 1494 } 1495 1496 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 1497 ptr = &transid; 1498 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1499 readonly = true; 1500 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { 1501 if (vol_args->size > PAGE_CACHE_SIZE) { 1502 ret = -EINVAL; 1503 goto out; 1504 } 1505 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); 1506 if (IS_ERR(inherit)) { 1507 ret = PTR_ERR(inherit); 1508 goto out; 1509 } 1510 } 1511 1512 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1513 vol_args->fd, subvol, ptr, 1514 readonly, &inherit); 1515 1516 if (ret == 0 && ptr && 1517 copy_to_user(arg + 1518 offsetof(struct btrfs_ioctl_vol_args_v2, 1519 transid), ptr, sizeof(*ptr))) 1520 ret = -EFAULT; 1521 out: 1522 kfree(vol_args); 1523 kfree(inherit); 1524 return ret; 1525 } 1526 1527 static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1528 void __user *arg) 1529 { 1530 struct inode *inode = fdentry(file)->d_inode; 1531 struct btrfs_root *root = BTRFS_I(inode)->root; 1532 int ret = 0; 1533 u64 flags = 0; 1534 1535 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) 1536 return -EINVAL; 1537 1538 down_read(&root->fs_info->subvol_sem); 1539 if (btrfs_root_readonly(root)) 1540 flags |= BTRFS_SUBVOL_RDONLY; 1541 up_read(&root->fs_info->subvol_sem); 1542 1543 if (copy_to_user(arg, &flags, sizeof(flags))) 1544 ret = -EFAULT; 1545 1546 return ret; 1547 } 1548 1549 static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1550 void __user *arg) 1551 { 1552 struct inode *inode = fdentry(file)->d_inode; 1553 struct btrfs_root *root = BTRFS_I(inode)->root; 1554 struct btrfs_trans_handle *trans; 1555 u64 root_flags; 1556 u64 flags; 1557 int ret = 0; 1558 1559 ret = mnt_want_write_file(file); 1560 if (ret) 1561 goto out; 1562 1563 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 1564 ret = -EINVAL; 1565 goto out_drop_write; 1566 } 1567 1568 if (copy_from_user(&flags, arg, sizeof(flags))) { 1569 ret = -EFAULT; 1570 goto out_drop_write; 1571 } 1572 1573 if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { 1574 ret = -EINVAL; 1575 goto out_drop_write; 1576 } 1577 1578 if (flags & ~BTRFS_SUBVOL_RDONLY) { 1579 ret = -EOPNOTSUPP; 1580 goto out_drop_write; 1581 } 1582 1583 if (!inode_owner_or_capable(inode)) { 1584 ret = -EACCES; 1585 goto out_drop_write; 1586 } 1587 1588 down_write(&root->fs_info->subvol_sem); 1589 1590 /* nothing to do */ 1591 if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 1592 goto out_drop_sem; 1593 1594 root_flags = btrfs_root_flags(&root->root_item); 1595 if (flags & BTRFS_SUBVOL_RDONLY) 1596 btrfs_set_root_flags(&root->root_item, 1597 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1598 else 1599 btrfs_set_root_flags(&root->root_item, 1600 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1601 1602 trans = btrfs_start_transaction(root, 1); 1603 if (IS_ERR(trans)) { 1604 ret = PTR_ERR(trans); 1605 goto out_reset; 1606 } 1607 1608 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1609 &root->root_key, &root->root_item); 1610 1611 btrfs_commit_transaction(trans, root); 1612 out_reset: 1613 if (ret) 1614 btrfs_set_root_flags(&root->root_item, root_flags); 1615 out_drop_sem: 1616 up_write(&root->fs_info->subvol_sem); 1617 out_drop_write: 1618 mnt_drop_write_file(file); 1619 out: 1620 return ret; 1621 } 1622 1623 /* 1624 * helper to check if the subvolume references other subvolumes 1625 */ 1626 static noinline int may_destroy_subvol(struct btrfs_root *root) 1627 { 1628 struct btrfs_path *path; 1629 struct btrfs_key key; 1630 int ret; 1631 1632 path = btrfs_alloc_path(); 1633 if (!path) 1634 return -ENOMEM; 1635 1636 key.objectid = root->root_key.objectid; 1637 key.type = BTRFS_ROOT_REF_KEY; 1638 key.offset = (u64)-1; 1639 1640 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, 1641 &key, path, 0, 0); 1642 if (ret < 0) 1643 goto out; 1644 BUG_ON(ret == 0); 1645 1646 ret = 0; 1647 if (path->slots[0] > 0) { 1648 path->slots[0]--; 1649 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1650 if (key.objectid == root->root_key.objectid && 1651 key.type == BTRFS_ROOT_REF_KEY) 1652 ret = -ENOTEMPTY; 1653 } 1654 out: 1655 btrfs_free_path(path); 1656 return ret; 1657 } 1658 1659 static noinline int key_in_sk(struct btrfs_key *key, 1660 struct btrfs_ioctl_search_key *sk) 1661 { 1662 struct btrfs_key test; 1663 int ret; 1664 1665 test.objectid = sk->min_objectid; 1666 test.type = sk->min_type; 1667 test.offset = sk->min_offset; 1668 1669 ret = btrfs_comp_cpu_keys(key, &test); 1670 if (ret < 0) 1671 return 0; 1672 1673 test.objectid = sk->max_objectid; 1674 test.type = sk->max_type; 1675 test.offset = sk->max_offset; 1676 1677 ret = btrfs_comp_cpu_keys(key, &test); 1678 if (ret > 0) 1679 return 0; 1680 return 1; 1681 } 1682 1683 static noinline int copy_to_sk(struct btrfs_root *root, 1684 struct btrfs_path *path, 1685 struct btrfs_key *key, 1686 struct btrfs_ioctl_search_key *sk, 1687 char *buf, 1688 unsigned long *sk_offset, 1689 int *num_found) 1690 { 1691 u64 found_transid; 1692 struct extent_buffer *leaf; 1693 struct btrfs_ioctl_search_header sh; 1694 unsigned long item_off; 1695 unsigned long item_len; 1696 int nritems; 1697 int i; 1698 int slot; 1699 int ret = 0; 1700 1701 leaf = path->nodes[0]; 1702 slot = path->slots[0]; 1703 nritems = btrfs_header_nritems(leaf); 1704 1705 if (btrfs_header_generation(leaf) > sk->max_transid) { 1706 i = nritems; 1707 goto advance_key; 1708 } 1709 found_transid = btrfs_header_generation(leaf); 1710 1711 for (i = slot; i < nritems; i++) { 1712 item_off = btrfs_item_ptr_offset(leaf, i); 1713 item_len = btrfs_item_size_nr(leaf, i); 1714 1715 if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1716 item_len = 0; 1717 1718 if (sizeof(sh) + item_len + *sk_offset > 1719 BTRFS_SEARCH_ARGS_BUFSIZE) { 1720 ret = 1; 1721 goto overflow; 1722 } 1723 1724 btrfs_item_key_to_cpu(leaf, key, i); 1725 if (!key_in_sk(key, sk)) 1726 continue; 1727 1728 sh.objectid = key->objectid; 1729 sh.offset = key->offset; 1730 sh.type = key->type; 1731 sh.len = item_len; 1732 sh.transid = found_transid; 1733 1734 /* copy search result header */ 1735 memcpy(buf + *sk_offset, &sh, sizeof(sh)); 1736 *sk_offset += sizeof(sh); 1737 1738 if (item_len) { 1739 char *p = buf + *sk_offset; 1740 /* copy the item */ 1741 read_extent_buffer(leaf, p, 1742 item_off, item_len); 1743 *sk_offset += item_len; 1744 } 1745 (*num_found)++; 1746 1747 if (*num_found >= sk->nr_items) 1748 break; 1749 } 1750 advance_key: 1751 ret = 0; 1752 if (key->offset < (u64)-1 && key->offset < sk->max_offset) 1753 key->offset++; 1754 else if (key->type < (u8)-1 && key->type < sk->max_type) { 1755 key->offset = 0; 1756 key->type++; 1757 } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) { 1758 key->offset = 0; 1759 key->type = 0; 1760 key->objectid++; 1761 } else 1762 ret = 1; 1763 overflow: 1764 return ret; 1765 } 1766 1767 static noinline int search_ioctl(struct inode *inode, 1768 struct btrfs_ioctl_search_args *args) 1769 { 1770 struct btrfs_root *root; 1771 struct btrfs_key key; 1772 struct btrfs_key max_key; 1773 struct btrfs_path *path; 1774 struct btrfs_ioctl_search_key *sk = &args->key; 1775 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 1776 int ret; 1777 int num_found = 0; 1778 unsigned long sk_offset = 0; 1779 1780 path = btrfs_alloc_path(); 1781 if (!path) 1782 return -ENOMEM; 1783 1784 if (sk->tree_id == 0) { 1785 /* search the root of the inode that was passed */ 1786 root = BTRFS_I(inode)->root; 1787 } else { 1788 key.objectid = sk->tree_id; 1789 key.type = BTRFS_ROOT_ITEM_KEY; 1790 key.offset = (u64)-1; 1791 root = btrfs_read_fs_root_no_name(info, &key); 1792 if (IS_ERR(root)) { 1793 printk(KERN_ERR "could not find root %llu\n", 1794 sk->tree_id); 1795 btrfs_free_path(path); 1796 return -ENOENT; 1797 } 1798 } 1799 1800 key.objectid = sk->min_objectid; 1801 key.type = sk->min_type; 1802 key.offset = sk->min_offset; 1803 1804 max_key.objectid = sk->max_objectid; 1805 max_key.type = sk->max_type; 1806 max_key.offset = sk->max_offset; 1807 1808 path->keep_locks = 1; 1809 1810 while(1) { 1811 ret = btrfs_search_forward(root, &key, &max_key, path, 0, 1812 sk->min_transid); 1813 if (ret != 0) { 1814 if (ret > 0) 1815 ret = 0; 1816 goto err; 1817 } 1818 ret = copy_to_sk(root, path, &key, sk, args->buf, 1819 &sk_offset, &num_found); 1820 btrfs_release_path(path); 1821 if (ret || num_found >= sk->nr_items) 1822 break; 1823 1824 } 1825 ret = 0; 1826 err: 1827 sk->nr_items = num_found; 1828 btrfs_free_path(path); 1829 return ret; 1830 } 1831 1832 static noinline int btrfs_ioctl_tree_search(struct file *file, 1833 void __user *argp) 1834 { 1835 struct btrfs_ioctl_search_args *args; 1836 struct inode *inode; 1837 int ret; 1838 1839 if (!capable(CAP_SYS_ADMIN)) 1840 return -EPERM; 1841 1842 args = memdup_user(argp, sizeof(*args)); 1843 if (IS_ERR(args)) 1844 return PTR_ERR(args); 1845 1846 inode = fdentry(file)->d_inode; 1847 ret = search_ioctl(inode, args); 1848 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1849 ret = -EFAULT; 1850 kfree(args); 1851 return ret; 1852 } 1853 1854 /* 1855 * Search INODE_REFs to identify path name of 'dirid' directory 1856 * in a 'tree_id' tree. and sets path name to 'name'. 1857 */ 1858 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, 1859 u64 tree_id, u64 dirid, char *name) 1860 { 1861 struct btrfs_root *root; 1862 struct btrfs_key key; 1863 char *ptr; 1864 int ret = -1; 1865 int slot; 1866 int len; 1867 int total_len = 0; 1868 struct btrfs_inode_ref *iref; 1869 struct extent_buffer *l; 1870 struct btrfs_path *path; 1871 1872 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 1873 name[0]='\0'; 1874 return 0; 1875 } 1876 1877 path = btrfs_alloc_path(); 1878 if (!path) 1879 return -ENOMEM; 1880 1881 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; 1882 1883 key.objectid = tree_id; 1884 key.type = BTRFS_ROOT_ITEM_KEY; 1885 key.offset = (u64)-1; 1886 root = btrfs_read_fs_root_no_name(info, &key); 1887 if (IS_ERR(root)) { 1888 printk(KERN_ERR "could not find root %llu\n", tree_id); 1889 ret = -ENOENT; 1890 goto out; 1891 } 1892 1893 key.objectid = dirid; 1894 key.type = BTRFS_INODE_REF_KEY; 1895 key.offset = (u64)-1; 1896 1897 while(1) { 1898 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1899 if (ret < 0) 1900 goto out; 1901 1902 l = path->nodes[0]; 1903 slot = path->slots[0]; 1904 if (ret > 0 && slot > 0) 1905 slot--; 1906 btrfs_item_key_to_cpu(l, &key, slot); 1907 1908 if (ret > 0 && (key.objectid != dirid || 1909 key.type != BTRFS_INODE_REF_KEY)) { 1910 ret = -ENOENT; 1911 goto out; 1912 } 1913 1914 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 1915 len = btrfs_inode_ref_name_len(l, iref); 1916 ptr -= len + 1; 1917 total_len += len + 1; 1918 if (ptr < name) 1919 goto out; 1920 1921 *(ptr + len) = '/'; 1922 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); 1923 1924 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 1925 break; 1926 1927 btrfs_release_path(path); 1928 key.objectid = key.offset; 1929 key.offset = (u64)-1; 1930 dirid = key.objectid; 1931 } 1932 if (ptr < name) 1933 goto out; 1934 memmove(name, ptr, total_len); 1935 name[total_len]='\0'; 1936 ret = 0; 1937 out: 1938 btrfs_free_path(path); 1939 return ret; 1940 } 1941 1942 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 1943 void __user *argp) 1944 { 1945 struct btrfs_ioctl_ino_lookup_args *args; 1946 struct inode *inode; 1947 int ret; 1948 1949 if (!capable(CAP_SYS_ADMIN)) 1950 return -EPERM; 1951 1952 args = memdup_user(argp, sizeof(*args)); 1953 if (IS_ERR(args)) 1954 return PTR_ERR(args); 1955 1956 inode = fdentry(file)->d_inode; 1957 1958 if (args->treeid == 0) 1959 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 1960 1961 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, 1962 args->treeid, args->objectid, 1963 args->name); 1964 1965 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1966 ret = -EFAULT; 1967 1968 kfree(args); 1969 return ret; 1970 } 1971 1972 static noinline int btrfs_ioctl_snap_destroy(struct file *file, 1973 void __user *arg) 1974 { 1975 struct dentry *parent = fdentry(file); 1976 struct dentry *dentry; 1977 struct inode *dir = parent->d_inode; 1978 struct inode *inode; 1979 struct btrfs_root *root = BTRFS_I(dir)->root; 1980 struct btrfs_root *dest = NULL; 1981 struct btrfs_ioctl_vol_args *vol_args; 1982 struct btrfs_trans_handle *trans; 1983 int namelen; 1984 int ret; 1985 int err = 0; 1986 1987 vol_args = memdup_user(arg, sizeof(*vol_args)); 1988 if (IS_ERR(vol_args)) 1989 return PTR_ERR(vol_args); 1990 1991 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1992 namelen = strlen(vol_args->name); 1993 if (strchr(vol_args->name, '/') || 1994 strncmp(vol_args->name, "..", namelen) == 0) { 1995 err = -EINVAL; 1996 goto out; 1997 } 1998 1999 err = mnt_want_write_file(file); 2000 if (err) 2001 goto out; 2002 2003 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 2004 dentry = lookup_one_len(vol_args->name, parent, namelen); 2005 if (IS_ERR(dentry)) { 2006 err = PTR_ERR(dentry); 2007 goto out_unlock_dir; 2008 } 2009 2010 if (!dentry->d_inode) { 2011 err = -ENOENT; 2012 goto out_dput; 2013 } 2014 2015 inode = dentry->d_inode; 2016 dest = BTRFS_I(inode)->root; 2017 if (!capable(CAP_SYS_ADMIN)){ 2018 /* 2019 * Regular user. Only allow this with a special mount 2020 * option, when the user has write+exec access to the 2021 * subvol root, and when rmdir(2) would have been 2022 * allowed. 2023 * 2024 * Note that this is _not_ check that the subvol is 2025 * empty or doesn't contain data that we wouldn't 2026 * otherwise be able to delete. 2027 * 2028 * Users who want to delete empty subvols should try 2029 * rmdir(2). 2030 */ 2031 err = -EPERM; 2032 if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 2033 goto out_dput; 2034 2035 /* 2036 * Do not allow deletion if the parent dir is the same 2037 * as the dir to be deleted. That means the ioctl 2038 * must be called on the dentry referencing the root 2039 * of the subvol, not a random directory contained 2040 * within it. 2041 */ 2042 err = -EINVAL; 2043 if (root == dest) 2044 goto out_dput; 2045 2046 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2047 if (err) 2048 goto out_dput; 2049 2050 /* check if subvolume may be deleted by a non-root user */ 2051 err = btrfs_may_delete(dir, dentry, 1); 2052 if (err) 2053 goto out_dput; 2054 } 2055 2056 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 2057 err = -EINVAL; 2058 goto out_dput; 2059 } 2060 2061 mutex_lock(&inode->i_mutex); 2062 err = d_invalidate(dentry); 2063 if (err) 2064 goto out_unlock; 2065 2066 down_write(&root->fs_info->subvol_sem); 2067 2068 err = may_destroy_subvol(dest); 2069 if (err) 2070 goto out_up_write; 2071 2072 trans = btrfs_start_transaction(root, 0); 2073 if (IS_ERR(trans)) { 2074 err = PTR_ERR(trans); 2075 goto out_up_write; 2076 } 2077 trans->block_rsv = &root->fs_info->global_block_rsv; 2078 2079 ret = btrfs_unlink_subvol(trans, root, dir, 2080 dest->root_key.objectid, 2081 dentry->d_name.name, 2082 dentry->d_name.len); 2083 if (ret) { 2084 err = ret; 2085 btrfs_abort_transaction(trans, root, ret); 2086 goto out_end_trans; 2087 } 2088 2089 btrfs_record_root_in_trans(trans, dest); 2090 2091 memset(&dest->root_item.drop_progress, 0, 2092 sizeof(dest->root_item.drop_progress)); 2093 dest->root_item.drop_level = 0; 2094 btrfs_set_root_refs(&dest->root_item, 0); 2095 2096 if (!xchg(&dest->orphan_item_inserted, 1)) { 2097 ret = btrfs_insert_orphan_item(trans, 2098 root->fs_info->tree_root, 2099 dest->root_key.objectid); 2100 if (ret) { 2101 btrfs_abort_transaction(trans, root, ret); 2102 err = ret; 2103 goto out_end_trans; 2104 } 2105 } 2106 out_end_trans: 2107 ret = btrfs_end_transaction(trans, root); 2108 if (ret && !err) 2109 err = ret; 2110 inode->i_flags |= S_DEAD; 2111 out_up_write: 2112 up_write(&root->fs_info->subvol_sem); 2113 out_unlock: 2114 mutex_unlock(&inode->i_mutex); 2115 if (!err) { 2116 shrink_dcache_sb(root->fs_info->sb); 2117 btrfs_invalidate_inodes(dest); 2118 d_delete(dentry); 2119 } 2120 out_dput: 2121 dput(dentry); 2122 out_unlock_dir: 2123 mutex_unlock(&dir->i_mutex); 2124 mnt_drop_write_file(file); 2125 out: 2126 kfree(vol_args); 2127 return err; 2128 } 2129 2130 static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 2131 { 2132 struct inode *inode = fdentry(file)->d_inode; 2133 struct btrfs_root *root = BTRFS_I(inode)->root; 2134 struct btrfs_ioctl_defrag_range_args *range; 2135 int ret; 2136 2137 if (btrfs_root_readonly(root)) 2138 return -EROFS; 2139 2140 ret = mnt_want_write_file(file); 2141 if (ret) 2142 return ret; 2143 2144 switch (inode->i_mode & S_IFMT) { 2145 case S_IFDIR: 2146 if (!capable(CAP_SYS_ADMIN)) { 2147 ret = -EPERM; 2148 goto out; 2149 } 2150 ret = btrfs_defrag_root(root, 0); 2151 if (ret) 2152 goto out; 2153 ret = btrfs_defrag_root(root->fs_info->extent_root, 0); 2154 break; 2155 case S_IFREG: 2156 if (!(file->f_mode & FMODE_WRITE)) { 2157 ret = -EINVAL; 2158 goto out; 2159 } 2160 2161 range = kzalloc(sizeof(*range), GFP_KERNEL); 2162 if (!range) { 2163 ret = -ENOMEM; 2164 goto out; 2165 } 2166 2167 if (argp) { 2168 if (copy_from_user(range, argp, 2169 sizeof(*range))) { 2170 ret = -EFAULT; 2171 kfree(range); 2172 goto out; 2173 } 2174 /* compression requires us to start the IO */ 2175 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 2176 range->flags |= BTRFS_DEFRAG_RANGE_START_IO; 2177 range->extent_thresh = (u32)-1; 2178 } 2179 } else { 2180 /* the rest are all set to zero by kzalloc */ 2181 range->len = (u64)-1; 2182 } 2183 ret = btrfs_defrag_file(fdentry(file)->d_inode, file, 2184 range, 0, 0); 2185 if (ret > 0) 2186 ret = 0; 2187 kfree(range); 2188 break; 2189 default: 2190 ret = -EINVAL; 2191 } 2192 out: 2193 mnt_drop_write_file(file); 2194 return ret; 2195 } 2196 2197 static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) 2198 { 2199 struct btrfs_ioctl_vol_args *vol_args; 2200 int ret; 2201 2202 if (!capable(CAP_SYS_ADMIN)) 2203 return -EPERM; 2204 2205 mutex_lock(&root->fs_info->volume_mutex); 2206 if (root->fs_info->balance_ctl) { 2207 printk(KERN_INFO "btrfs: balance in progress\n"); 2208 ret = -EINVAL; 2209 goto out; 2210 } 2211 2212 vol_args = memdup_user(arg, sizeof(*vol_args)); 2213 if (IS_ERR(vol_args)) { 2214 ret = PTR_ERR(vol_args); 2215 goto out; 2216 } 2217 2218 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2219 ret = btrfs_init_new_device(root, vol_args->name); 2220 2221 kfree(vol_args); 2222 out: 2223 mutex_unlock(&root->fs_info->volume_mutex); 2224 return ret; 2225 } 2226 2227 static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) 2228 { 2229 struct btrfs_ioctl_vol_args *vol_args; 2230 int ret; 2231 2232 if (!capable(CAP_SYS_ADMIN)) 2233 return -EPERM; 2234 2235 if (root->fs_info->sb->s_flags & MS_RDONLY) 2236 return -EROFS; 2237 2238 mutex_lock(&root->fs_info->volume_mutex); 2239 if (root->fs_info->balance_ctl) { 2240 printk(KERN_INFO "btrfs: balance in progress\n"); 2241 ret = -EINVAL; 2242 goto out; 2243 } 2244 2245 vol_args = memdup_user(arg, sizeof(*vol_args)); 2246 if (IS_ERR(vol_args)) { 2247 ret = PTR_ERR(vol_args); 2248 goto out; 2249 } 2250 2251 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2252 ret = btrfs_rm_device(root, vol_args->name); 2253 2254 kfree(vol_args); 2255 out: 2256 mutex_unlock(&root->fs_info->volume_mutex); 2257 return ret; 2258 } 2259 2260 static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) 2261 { 2262 struct btrfs_ioctl_fs_info_args *fi_args; 2263 struct btrfs_device *device; 2264 struct btrfs_device *next; 2265 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2266 int ret = 0; 2267 2268 if (!capable(CAP_SYS_ADMIN)) 2269 return -EPERM; 2270 2271 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); 2272 if (!fi_args) 2273 return -ENOMEM; 2274 2275 fi_args->num_devices = fs_devices->num_devices; 2276 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); 2277 2278 mutex_lock(&fs_devices->device_list_mutex); 2279 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2280 if (device->devid > fi_args->max_id) 2281 fi_args->max_id = device->devid; 2282 } 2283 mutex_unlock(&fs_devices->device_list_mutex); 2284 2285 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 2286 ret = -EFAULT; 2287 2288 kfree(fi_args); 2289 return ret; 2290 } 2291 2292 static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) 2293 { 2294 struct btrfs_ioctl_dev_info_args *di_args; 2295 struct btrfs_device *dev; 2296 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2297 int ret = 0; 2298 char *s_uuid = NULL; 2299 char empty_uuid[BTRFS_UUID_SIZE] = {0}; 2300 2301 if (!capable(CAP_SYS_ADMIN)) 2302 return -EPERM; 2303 2304 di_args = memdup_user(arg, sizeof(*di_args)); 2305 if (IS_ERR(di_args)) 2306 return PTR_ERR(di_args); 2307 2308 if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0) 2309 s_uuid = di_args->uuid; 2310 2311 mutex_lock(&fs_devices->device_list_mutex); 2312 dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL); 2313 mutex_unlock(&fs_devices->device_list_mutex); 2314 2315 if (!dev) { 2316 ret = -ENODEV; 2317 goto out; 2318 } 2319 2320 di_args->devid = dev->devid; 2321 di_args->bytes_used = dev->bytes_used; 2322 di_args->total_bytes = dev->total_bytes; 2323 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2324 if (dev->name) { 2325 struct rcu_string *name; 2326 2327 rcu_read_lock(); 2328 name = rcu_dereference(dev->name); 2329 strncpy(di_args->path, name->str, sizeof(di_args->path)); 2330 rcu_read_unlock(); 2331 di_args->path[sizeof(di_args->path) - 1] = 0; 2332 } else { 2333 di_args->path[0] = '\0'; 2334 } 2335 2336 out: 2337 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 2338 ret = -EFAULT; 2339 2340 kfree(di_args); 2341 return ret; 2342 } 2343 2344 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 2345 u64 off, u64 olen, u64 destoff) 2346 { 2347 struct inode *inode = fdentry(file)->d_inode; 2348 struct btrfs_root *root = BTRFS_I(inode)->root; 2349 struct file *src_file; 2350 struct inode *src; 2351 struct btrfs_trans_handle *trans; 2352 struct btrfs_path *path; 2353 struct extent_buffer *leaf; 2354 char *buf; 2355 struct btrfs_key key; 2356 u32 nritems; 2357 int slot; 2358 int ret; 2359 u64 len = olen; 2360 u64 bs = root->fs_info->sb->s_blocksize; 2361 u64 hint_byte; 2362 2363 /* 2364 * TODO: 2365 * - split compressed inline extents. annoying: we need to 2366 * decompress into destination's address_space (the file offset 2367 * may change, so source mapping won't do), then recompress (or 2368 * otherwise reinsert) a subrange. 2369 * - allow ranges within the same file to be cloned (provided 2370 * they don't overlap)? 2371 */ 2372 2373 /* the destination must be opened for writing */ 2374 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) 2375 return -EINVAL; 2376 2377 if (btrfs_root_readonly(root)) 2378 return -EROFS; 2379 2380 ret = mnt_want_write_file(file); 2381 if (ret) 2382 return ret; 2383 2384 src_file = fget(srcfd); 2385 if (!src_file) { 2386 ret = -EBADF; 2387 goto out_drop_write; 2388 } 2389 2390 ret = -EXDEV; 2391 if (src_file->f_path.mnt != file->f_path.mnt) 2392 goto out_fput; 2393 2394 src = src_file->f_dentry->d_inode; 2395 2396 ret = -EINVAL; 2397 if (src == inode) 2398 goto out_fput; 2399 2400 /* the src must be open for reading */ 2401 if (!(src_file->f_mode & FMODE_READ)) 2402 goto out_fput; 2403 2404 /* don't make the dst file partly checksummed */ 2405 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 2406 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 2407 goto out_fput; 2408 2409 ret = -EISDIR; 2410 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 2411 goto out_fput; 2412 2413 ret = -EXDEV; 2414 if (src->i_sb != inode->i_sb) 2415 goto out_fput; 2416 2417 ret = -ENOMEM; 2418 buf = vmalloc(btrfs_level_size(root, 0)); 2419 if (!buf) 2420 goto out_fput; 2421 2422 path = btrfs_alloc_path(); 2423 if (!path) { 2424 vfree(buf); 2425 goto out_fput; 2426 } 2427 path->reada = 2; 2428 2429 if (inode < src) { 2430 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 2431 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); 2432 } else { 2433 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); 2434 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 2435 } 2436 2437 /* determine range to clone */ 2438 ret = -EINVAL; 2439 if (off + len > src->i_size || off + len < off) 2440 goto out_unlock; 2441 if (len == 0) 2442 olen = len = src->i_size - off; 2443 /* if we extend to eof, continue to block boundary */ 2444 if (off + len == src->i_size) 2445 len = ALIGN(src->i_size, bs) - off; 2446 2447 /* verify the end result is block aligned */ 2448 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 2449 !IS_ALIGNED(destoff, bs)) 2450 goto out_unlock; 2451 2452 if (destoff > inode->i_size) { 2453 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 2454 if (ret) 2455 goto out_unlock; 2456 } 2457 2458 /* truncate page cache pages from target inode range */ 2459 truncate_inode_pages_range(&inode->i_data, destoff, 2460 PAGE_CACHE_ALIGN(destoff + len) - 1); 2461 2462 /* do any pending delalloc/csum calc on src, one way or 2463 another, and lock file content */ 2464 while (1) { 2465 struct btrfs_ordered_extent *ordered; 2466 lock_extent(&BTRFS_I(src)->io_tree, off, off+len); 2467 ordered = btrfs_lookup_first_ordered_extent(src, off+len); 2468 if (!ordered && 2469 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, 2470 EXTENT_DELALLOC, 0, NULL)) 2471 break; 2472 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); 2473 if (ordered) 2474 btrfs_put_ordered_extent(ordered); 2475 btrfs_wait_ordered_range(src, off, len); 2476 } 2477 2478 /* clone data */ 2479 key.objectid = btrfs_ino(src); 2480 key.type = BTRFS_EXTENT_DATA_KEY; 2481 key.offset = 0; 2482 2483 while (1) { 2484 /* 2485 * note the key will change type as we walk through the 2486 * tree. 2487 */ 2488 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 2489 0, 0); 2490 if (ret < 0) 2491 goto out; 2492 2493 nritems = btrfs_header_nritems(path->nodes[0]); 2494 if (path->slots[0] >= nritems) { 2495 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 2496 if (ret < 0) 2497 goto out; 2498 if (ret > 0) 2499 break; 2500 nritems = btrfs_header_nritems(path->nodes[0]); 2501 } 2502 leaf = path->nodes[0]; 2503 slot = path->slots[0]; 2504 2505 btrfs_item_key_to_cpu(leaf, &key, slot); 2506 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 2507 key.objectid != btrfs_ino(src)) 2508 break; 2509 2510 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 2511 struct btrfs_file_extent_item *extent; 2512 int type; 2513 u32 size; 2514 struct btrfs_key new_key; 2515 u64 disko = 0, diskl = 0; 2516 u64 datao = 0, datal = 0; 2517 u8 comp; 2518 u64 endoff; 2519 2520 size = btrfs_item_size_nr(leaf, slot); 2521 read_extent_buffer(leaf, buf, 2522 btrfs_item_ptr_offset(leaf, slot), 2523 size); 2524 2525 extent = btrfs_item_ptr(leaf, slot, 2526 struct btrfs_file_extent_item); 2527 comp = btrfs_file_extent_compression(leaf, extent); 2528 type = btrfs_file_extent_type(leaf, extent); 2529 if (type == BTRFS_FILE_EXTENT_REG || 2530 type == BTRFS_FILE_EXTENT_PREALLOC) { 2531 disko = btrfs_file_extent_disk_bytenr(leaf, 2532 extent); 2533 diskl = btrfs_file_extent_disk_num_bytes(leaf, 2534 extent); 2535 datao = btrfs_file_extent_offset(leaf, extent); 2536 datal = btrfs_file_extent_num_bytes(leaf, 2537 extent); 2538 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 2539 /* take upper bound, may be compressed */ 2540 datal = btrfs_file_extent_ram_bytes(leaf, 2541 extent); 2542 } 2543 btrfs_release_path(path); 2544 2545 if (key.offset + datal <= off || 2546 key.offset >= off+len) 2547 goto next; 2548 2549 memcpy(&new_key, &key, sizeof(new_key)); 2550 new_key.objectid = btrfs_ino(inode); 2551 if (off <= key.offset) 2552 new_key.offset = key.offset + destoff - off; 2553 else 2554 new_key.offset = destoff; 2555 2556 /* 2557 * 1 - adjusting old extent (we may have to split it) 2558 * 1 - add new extent 2559 * 1 - inode update 2560 */ 2561 trans = btrfs_start_transaction(root, 3); 2562 if (IS_ERR(trans)) { 2563 ret = PTR_ERR(trans); 2564 goto out; 2565 } 2566 2567 if (type == BTRFS_FILE_EXTENT_REG || 2568 type == BTRFS_FILE_EXTENT_PREALLOC) { 2569 /* 2570 * a | --- range to clone ---| b 2571 * | ------------- extent ------------- | 2572 */ 2573 2574 /* substract range b */ 2575 if (key.offset + datal > off + len) 2576 datal = off + len - key.offset; 2577 2578 /* substract range a */ 2579 if (off > key.offset) { 2580 datao += off - key.offset; 2581 datal -= off - key.offset; 2582 } 2583 2584 ret = btrfs_drop_extents(trans, inode, 2585 new_key.offset, 2586 new_key.offset + datal, 2587 &hint_byte, 1); 2588 if (ret) { 2589 btrfs_abort_transaction(trans, root, 2590 ret); 2591 btrfs_end_transaction(trans, root); 2592 goto out; 2593 } 2594 2595 ret = btrfs_insert_empty_item(trans, root, path, 2596 &new_key, size); 2597 if (ret) { 2598 btrfs_abort_transaction(trans, root, 2599 ret); 2600 btrfs_end_transaction(trans, root); 2601 goto out; 2602 } 2603 2604 leaf = path->nodes[0]; 2605 slot = path->slots[0]; 2606 write_extent_buffer(leaf, buf, 2607 btrfs_item_ptr_offset(leaf, slot), 2608 size); 2609 2610 extent = btrfs_item_ptr(leaf, slot, 2611 struct btrfs_file_extent_item); 2612 2613 /* disko == 0 means it's a hole */ 2614 if (!disko) 2615 datao = 0; 2616 2617 btrfs_set_file_extent_offset(leaf, extent, 2618 datao); 2619 btrfs_set_file_extent_num_bytes(leaf, extent, 2620 datal); 2621 if (disko) { 2622 inode_add_bytes(inode, datal); 2623 ret = btrfs_inc_extent_ref(trans, root, 2624 disko, diskl, 0, 2625 root->root_key.objectid, 2626 btrfs_ino(inode), 2627 new_key.offset - datao, 2628 0); 2629 if (ret) { 2630 btrfs_abort_transaction(trans, 2631 root, 2632 ret); 2633 btrfs_end_transaction(trans, 2634 root); 2635 goto out; 2636 2637 } 2638 } 2639 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 2640 u64 skip = 0; 2641 u64 trim = 0; 2642 if (off > key.offset) { 2643 skip = off - key.offset; 2644 new_key.offset += skip; 2645 } 2646 2647 if (key.offset + datal > off+len) 2648 trim = key.offset + datal - (off+len); 2649 2650 if (comp && (skip || trim)) { 2651 ret = -EINVAL; 2652 btrfs_end_transaction(trans, root); 2653 goto out; 2654 } 2655 size -= skip + trim; 2656 datal -= skip + trim; 2657 2658 ret = btrfs_drop_extents(trans, inode, 2659 new_key.offset, 2660 new_key.offset + datal, 2661 &hint_byte, 1); 2662 if (ret) { 2663 btrfs_abort_transaction(trans, root, 2664 ret); 2665 btrfs_end_transaction(trans, root); 2666 goto out; 2667 } 2668 2669 ret = btrfs_insert_empty_item(trans, root, path, 2670 &new_key, size); 2671 if (ret) { 2672 btrfs_abort_transaction(trans, root, 2673 ret); 2674 btrfs_end_transaction(trans, root); 2675 goto out; 2676 } 2677 2678 if (skip) { 2679 u32 start = 2680 btrfs_file_extent_calc_inline_size(0); 2681 memmove(buf+start, buf+start+skip, 2682 datal); 2683 } 2684 2685 leaf = path->nodes[0]; 2686 slot = path->slots[0]; 2687 write_extent_buffer(leaf, buf, 2688 btrfs_item_ptr_offset(leaf, slot), 2689 size); 2690 inode_add_bytes(inode, datal); 2691 } 2692 2693 btrfs_mark_buffer_dirty(leaf); 2694 btrfs_release_path(path); 2695 2696 inode_inc_iversion(inode); 2697 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2698 2699 /* 2700 * we round up to the block size at eof when 2701 * determining which extents to clone above, 2702 * but shouldn't round up the file size 2703 */ 2704 endoff = new_key.offset + datal; 2705 if (endoff > destoff+olen) 2706 endoff = destoff+olen; 2707 if (endoff > inode->i_size) 2708 btrfs_i_size_write(inode, endoff); 2709 2710 ret = btrfs_update_inode(trans, root, inode); 2711 if (ret) { 2712 btrfs_abort_transaction(trans, root, ret); 2713 btrfs_end_transaction(trans, root); 2714 goto out; 2715 } 2716 ret = btrfs_end_transaction(trans, root); 2717 } 2718 next: 2719 btrfs_release_path(path); 2720 key.offset++; 2721 } 2722 ret = 0; 2723 out: 2724 btrfs_release_path(path); 2725 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); 2726 out_unlock: 2727 mutex_unlock(&src->i_mutex); 2728 mutex_unlock(&inode->i_mutex); 2729 vfree(buf); 2730 btrfs_free_path(path); 2731 out_fput: 2732 fput(src_file); 2733 out_drop_write: 2734 mnt_drop_write_file(file); 2735 return ret; 2736 } 2737 2738 static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) 2739 { 2740 struct btrfs_ioctl_clone_range_args args; 2741 2742 if (copy_from_user(&args, argp, sizeof(args))) 2743 return -EFAULT; 2744 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, 2745 args.src_length, args.dest_offset); 2746 } 2747 2748 /* 2749 * there are many ways the trans_start and trans_end ioctls can lead 2750 * to deadlocks. They should only be used by applications that 2751 * basically own the machine, and have a very in depth understanding 2752 * of all the possible deadlocks and enospc problems. 2753 */ 2754 static long btrfs_ioctl_trans_start(struct file *file) 2755 { 2756 struct inode *inode = fdentry(file)->d_inode; 2757 struct btrfs_root *root = BTRFS_I(inode)->root; 2758 struct btrfs_trans_handle *trans; 2759 int ret; 2760 2761 ret = -EPERM; 2762 if (!capable(CAP_SYS_ADMIN)) 2763 goto out; 2764 2765 ret = -EINPROGRESS; 2766 if (file->private_data) 2767 goto out; 2768 2769 ret = -EROFS; 2770 if (btrfs_root_readonly(root)) 2771 goto out; 2772 2773 ret = mnt_want_write_file(file); 2774 if (ret) 2775 goto out; 2776 2777 atomic_inc(&root->fs_info->open_ioctl_trans); 2778 2779 ret = -ENOMEM; 2780 trans = btrfs_start_ioctl_transaction(root); 2781 if (IS_ERR(trans)) 2782 goto out_drop; 2783 2784 file->private_data = trans; 2785 return 0; 2786 2787 out_drop: 2788 atomic_dec(&root->fs_info->open_ioctl_trans); 2789 mnt_drop_write_file(file); 2790 out: 2791 return ret; 2792 } 2793 2794 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 2795 { 2796 struct inode *inode = fdentry(file)->d_inode; 2797 struct btrfs_root *root = BTRFS_I(inode)->root; 2798 struct btrfs_root *new_root; 2799 struct btrfs_dir_item *di; 2800 struct btrfs_trans_handle *trans; 2801 struct btrfs_path *path; 2802 struct btrfs_key location; 2803 struct btrfs_disk_key disk_key; 2804 u64 objectid = 0; 2805 u64 dir_id; 2806 2807 if (!capable(CAP_SYS_ADMIN)) 2808 return -EPERM; 2809 2810 if (copy_from_user(&objectid, argp, sizeof(objectid))) 2811 return -EFAULT; 2812 2813 if (!objectid) 2814 objectid = root->root_key.objectid; 2815 2816 location.objectid = objectid; 2817 location.type = BTRFS_ROOT_ITEM_KEY; 2818 location.offset = (u64)-1; 2819 2820 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 2821 if (IS_ERR(new_root)) 2822 return PTR_ERR(new_root); 2823 2824 if (btrfs_root_refs(&new_root->root_item) == 0) 2825 return -ENOENT; 2826 2827 path = btrfs_alloc_path(); 2828 if (!path) 2829 return -ENOMEM; 2830 path->leave_spinning = 1; 2831 2832 trans = btrfs_start_transaction(root, 1); 2833 if (IS_ERR(trans)) { 2834 btrfs_free_path(path); 2835 return PTR_ERR(trans); 2836 } 2837 2838 dir_id = btrfs_super_root_dir(root->fs_info->super_copy); 2839 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, 2840 dir_id, "default", 7, 1); 2841 if (IS_ERR_OR_NULL(di)) { 2842 btrfs_free_path(path); 2843 btrfs_end_transaction(trans, root); 2844 printk(KERN_ERR "Umm, you don't have the default dir item, " 2845 "this isn't going to work\n"); 2846 return -ENOENT; 2847 } 2848 2849 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); 2850 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); 2851 btrfs_mark_buffer_dirty(path->nodes[0]); 2852 btrfs_free_path(path); 2853 2854 btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); 2855 btrfs_end_transaction(trans, root); 2856 2857 return 0; 2858 } 2859 2860 static void get_block_group_info(struct list_head *groups_list, 2861 struct btrfs_ioctl_space_info *space) 2862 { 2863 struct btrfs_block_group_cache *block_group; 2864 2865 space->total_bytes = 0; 2866 space->used_bytes = 0; 2867 space->flags = 0; 2868 list_for_each_entry(block_group, groups_list, list) { 2869 space->flags = block_group->flags; 2870 space->total_bytes += block_group->key.offset; 2871 space->used_bytes += 2872 btrfs_block_group_used(&block_group->item); 2873 } 2874 } 2875 2876 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 2877 { 2878 struct btrfs_ioctl_space_args space_args; 2879 struct btrfs_ioctl_space_info space; 2880 struct btrfs_ioctl_space_info *dest; 2881 struct btrfs_ioctl_space_info *dest_orig; 2882 struct btrfs_ioctl_space_info __user *user_dest; 2883 struct btrfs_space_info *info; 2884 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 2885 BTRFS_BLOCK_GROUP_SYSTEM, 2886 BTRFS_BLOCK_GROUP_METADATA, 2887 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; 2888 int num_types = 4; 2889 int alloc_size; 2890 int ret = 0; 2891 u64 slot_count = 0; 2892 int i, c; 2893 2894 if (copy_from_user(&space_args, 2895 (struct btrfs_ioctl_space_args __user *)arg, 2896 sizeof(space_args))) 2897 return -EFAULT; 2898 2899 for (i = 0; i < num_types; i++) { 2900 struct btrfs_space_info *tmp; 2901 2902 info = NULL; 2903 rcu_read_lock(); 2904 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2905 list) { 2906 if (tmp->flags == types[i]) { 2907 info = tmp; 2908 break; 2909 } 2910 } 2911 rcu_read_unlock(); 2912 2913 if (!info) 2914 continue; 2915 2916 down_read(&info->groups_sem); 2917 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 2918 if (!list_empty(&info->block_groups[c])) 2919 slot_count++; 2920 } 2921 up_read(&info->groups_sem); 2922 } 2923 2924 /* space_slots == 0 means they are asking for a count */ 2925 if (space_args.space_slots == 0) { 2926 space_args.total_spaces = slot_count; 2927 goto out; 2928 } 2929 2930 slot_count = min_t(u64, space_args.space_slots, slot_count); 2931 2932 alloc_size = sizeof(*dest) * slot_count; 2933 2934 /* we generally have at most 6 or so space infos, one for each raid 2935 * level. So, a whole page should be more than enough for everyone 2936 */ 2937 if (alloc_size > PAGE_CACHE_SIZE) 2938 return -ENOMEM; 2939 2940 space_args.total_spaces = 0; 2941 dest = kmalloc(alloc_size, GFP_NOFS); 2942 if (!dest) 2943 return -ENOMEM; 2944 dest_orig = dest; 2945 2946 /* now we have a buffer to copy into */ 2947 for (i = 0; i < num_types; i++) { 2948 struct btrfs_space_info *tmp; 2949 2950 if (!slot_count) 2951 break; 2952 2953 info = NULL; 2954 rcu_read_lock(); 2955 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2956 list) { 2957 if (tmp->flags == types[i]) { 2958 info = tmp; 2959 break; 2960 } 2961 } 2962 rcu_read_unlock(); 2963 2964 if (!info) 2965 continue; 2966 down_read(&info->groups_sem); 2967 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 2968 if (!list_empty(&info->block_groups[c])) { 2969 get_block_group_info(&info->block_groups[c], 2970 &space); 2971 memcpy(dest, &space, sizeof(space)); 2972 dest++; 2973 space_args.total_spaces++; 2974 slot_count--; 2975 } 2976 if (!slot_count) 2977 break; 2978 } 2979 up_read(&info->groups_sem); 2980 } 2981 2982 user_dest = (struct btrfs_ioctl_space_info __user *) 2983 (arg + sizeof(struct btrfs_ioctl_space_args)); 2984 2985 if (copy_to_user(user_dest, dest_orig, alloc_size)) 2986 ret = -EFAULT; 2987 2988 kfree(dest_orig); 2989 out: 2990 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 2991 ret = -EFAULT; 2992 2993 return ret; 2994 } 2995 2996 /* 2997 * there are many ways the trans_start and trans_end ioctls can lead 2998 * to deadlocks. They should only be used by applications that 2999 * basically own the machine, and have a very in depth understanding 3000 * of all the possible deadlocks and enospc problems. 3001 */ 3002 long btrfs_ioctl_trans_end(struct file *file) 3003 { 3004 struct inode *inode = fdentry(file)->d_inode; 3005 struct btrfs_root *root = BTRFS_I(inode)->root; 3006 struct btrfs_trans_handle *trans; 3007 3008 trans = file->private_data; 3009 if (!trans) 3010 return -EINVAL; 3011 file->private_data = NULL; 3012 3013 btrfs_end_transaction(trans, root); 3014 3015 atomic_dec(&root->fs_info->open_ioctl_trans); 3016 3017 mnt_drop_write_file(file); 3018 return 0; 3019 } 3020 3021 static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) 3022 { 3023 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 3024 struct btrfs_trans_handle *trans; 3025 u64 transid; 3026 int ret; 3027 3028 trans = btrfs_start_transaction(root, 0); 3029 if (IS_ERR(trans)) 3030 return PTR_ERR(trans); 3031 transid = trans->transid; 3032 ret = btrfs_commit_transaction_async(trans, root, 0); 3033 if (ret) { 3034 btrfs_end_transaction(trans, root); 3035 return ret; 3036 } 3037 3038 if (argp) 3039 if (copy_to_user(argp, &transid, sizeof(transid))) 3040 return -EFAULT; 3041 return 0; 3042 } 3043 3044 static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) 3045 { 3046 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 3047 u64 transid; 3048 3049 if (argp) { 3050 if (copy_from_user(&transid, argp, sizeof(transid))) 3051 return -EFAULT; 3052 } else { 3053 transid = 0; /* current trans */ 3054 } 3055 return btrfs_wait_for_commit(root, transid); 3056 } 3057 3058 static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) 3059 { 3060 int ret; 3061 struct btrfs_ioctl_scrub_args *sa; 3062 3063 if (!capable(CAP_SYS_ADMIN)) 3064 return -EPERM; 3065 3066 sa = memdup_user(arg, sizeof(*sa)); 3067 if (IS_ERR(sa)) 3068 return PTR_ERR(sa); 3069 3070 ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end, 3071 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY); 3072 3073 if (copy_to_user(arg, sa, sizeof(*sa))) 3074 ret = -EFAULT; 3075 3076 kfree(sa); 3077 return ret; 3078 } 3079 3080 static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg) 3081 { 3082 if (!capable(CAP_SYS_ADMIN)) 3083 return -EPERM; 3084 3085 return btrfs_scrub_cancel(root); 3086 } 3087 3088 static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, 3089 void __user *arg) 3090 { 3091 struct btrfs_ioctl_scrub_args *sa; 3092 int ret; 3093 3094 if (!capable(CAP_SYS_ADMIN)) 3095 return -EPERM; 3096 3097 sa = memdup_user(arg, sizeof(*sa)); 3098 if (IS_ERR(sa)) 3099 return PTR_ERR(sa); 3100 3101 ret = btrfs_scrub_progress(root, sa->devid, &sa->progress); 3102 3103 if (copy_to_user(arg, sa, sizeof(*sa))) 3104 ret = -EFAULT; 3105 3106 kfree(sa); 3107 return ret; 3108 } 3109 3110 static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, 3111 void __user *arg) 3112 { 3113 struct btrfs_ioctl_get_dev_stats *sa; 3114 int ret; 3115 3116 sa = memdup_user(arg, sizeof(*sa)); 3117 if (IS_ERR(sa)) 3118 return PTR_ERR(sa); 3119 3120 if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { 3121 kfree(sa); 3122 return -EPERM; 3123 } 3124 3125 ret = btrfs_get_dev_stats(root, sa); 3126 3127 if (copy_to_user(arg, sa, sizeof(*sa))) 3128 ret = -EFAULT; 3129 3130 kfree(sa); 3131 return ret; 3132 } 3133 3134 static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) 3135 { 3136 int ret = 0; 3137 int i; 3138 u64 rel_ptr; 3139 int size; 3140 struct btrfs_ioctl_ino_path_args *ipa = NULL; 3141 struct inode_fs_paths *ipath = NULL; 3142 struct btrfs_path *path; 3143 3144 if (!capable(CAP_SYS_ADMIN)) 3145 return -EPERM; 3146 3147 path = btrfs_alloc_path(); 3148 if (!path) { 3149 ret = -ENOMEM; 3150 goto out; 3151 } 3152 3153 ipa = memdup_user(arg, sizeof(*ipa)); 3154 if (IS_ERR(ipa)) { 3155 ret = PTR_ERR(ipa); 3156 ipa = NULL; 3157 goto out; 3158 } 3159 3160 size = min_t(u32, ipa->size, 4096); 3161 ipath = init_ipath(size, root, path); 3162 if (IS_ERR(ipath)) { 3163 ret = PTR_ERR(ipath); 3164 ipath = NULL; 3165 goto out; 3166 } 3167 3168 ret = paths_from_inode(ipa->inum, ipath); 3169 if (ret < 0) 3170 goto out; 3171 3172 for (i = 0; i < ipath->fspath->elem_cnt; ++i) { 3173 rel_ptr = ipath->fspath->val[i] - 3174 (u64)(unsigned long)ipath->fspath->val; 3175 ipath->fspath->val[i] = rel_ptr; 3176 } 3177 3178 ret = copy_to_user((void *)(unsigned long)ipa->fspath, 3179 (void *)(unsigned long)ipath->fspath, size); 3180 if (ret) { 3181 ret = -EFAULT; 3182 goto out; 3183 } 3184 3185 out: 3186 btrfs_free_path(path); 3187 free_ipath(ipath); 3188 kfree(ipa); 3189 3190 return ret; 3191 } 3192 3193 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) 3194 { 3195 struct btrfs_data_container *inodes = ctx; 3196 const size_t c = 3 * sizeof(u64); 3197 3198 if (inodes->bytes_left >= c) { 3199 inodes->bytes_left -= c; 3200 inodes->val[inodes->elem_cnt] = inum; 3201 inodes->val[inodes->elem_cnt + 1] = offset; 3202 inodes->val[inodes->elem_cnt + 2] = root; 3203 inodes->elem_cnt += 3; 3204 } else { 3205 inodes->bytes_missing += c - inodes->bytes_left; 3206 inodes->bytes_left = 0; 3207 inodes->elem_missed += 3; 3208 } 3209 3210 return 0; 3211 } 3212 3213 static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, 3214 void __user *arg) 3215 { 3216 int ret = 0; 3217 int size; 3218 u64 extent_item_pos; 3219 struct btrfs_ioctl_logical_ino_args *loi; 3220 struct btrfs_data_container *inodes = NULL; 3221 struct btrfs_path *path = NULL; 3222 struct btrfs_key key; 3223 3224 if (!capable(CAP_SYS_ADMIN)) 3225 return -EPERM; 3226 3227 loi = memdup_user(arg, sizeof(*loi)); 3228 if (IS_ERR(loi)) { 3229 ret = PTR_ERR(loi); 3230 loi = NULL; 3231 goto out; 3232 } 3233 3234 path = btrfs_alloc_path(); 3235 if (!path) { 3236 ret = -ENOMEM; 3237 goto out; 3238 } 3239 3240 size = min_t(u32, loi->size, 4096); 3241 inodes = init_data_container(size); 3242 if (IS_ERR(inodes)) { 3243 ret = PTR_ERR(inodes); 3244 inodes = NULL; 3245 goto out; 3246 } 3247 3248 ret = extent_from_logical(root->fs_info, loi->logical, path, &key); 3249 btrfs_release_path(path); 3250 3251 if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) 3252 ret = -ENOENT; 3253 if (ret < 0) 3254 goto out; 3255 3256 extent_item_pos = loi->logical - key.objectid; 3257 ret = iterate_extent_inodes(root->fs_info, key.objectid, 3258 extent_item_pos, 0, build_ino_list, 3259 inodes); 3260 3261 if (ret < 0) 3262 goto out; 3263 3264 ret = copy_to_user((void *)(unsigned long)loi->inodes, 3265 (void *)(unsigned long)inodes, size); 3266 if (ret) 3267 ret = -EFAULT; 3268 3269 out: 3270 btrfs_free_path(path); 3271 kfree(inodes); 3272 kfree(loi); 3273 3274 return ret; 3275 } 3276 3277 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 3278 struct btrfs_ioctl_balance_args *bargs) 3279 { 3280 struct btrfs_balance_control *bctl = fs_info->balance_ctl; 3281 3282 bargs->flags = bctl->flags; 3283 3284 if (atomic_read(&fs_info->balance_running)) 3285 bargs->state |= BTRFS_BALANCE_STATE_RUNNING; 3286 if (atomic_read(&fs_info->balance_pause_req)) 3287 bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; 3288 if (atomic_read(&fs_info->balance_cancel_req)) 3289 bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; 3290 3291 memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); 3292 memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); 3293 memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); 3294 3295 if (lock) { 3296 spin_lock(&fs_info->balance_lock); 3297 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 3298 spin_unlock(&fs_info->balance_lock); 3299 } else { 3300 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 3301 } 3302 } 3303 3304 static long btrfs_ioctl_balance(struct file *file, void __user *arg) 3305 { 3306 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 3307 struct btrfs_fs_info *fs_info = root->fs_info; 3308 struct btrfs_ioctl_balance_args *bargs; 3309 struct btrfs_balance_control *bctl; 3310 int ret; 3311 3312 if (!capable(CAP_SYS_ADMIN)) 3313 return -EPERM; 3314 3315 ret = mnt_want_write_file(file); 3316 if (ret) 3317 return ret; 3318 3319 mutex_lock(&fs_info->volume_mutex); 3320 mutex_lock(&fs_info->balance_mutex); 3321 3322 if (arg) { 3323 bargs = memdup_user(arg, sizeof(*bargs)); 3324 if (IS_ERR(bargs)) { 3325 ret = PTR_ERR(bargs); 3326 goto out; 3327 } 3328 3329 if (bargs->flags & BTRFS_BALANCE_RESUME) { 3330 if (!fs_info->balance_ctl) { 3331 ret = -ENOTCONN; 3332 goto out_bargs; 3333 } 3334 3335 bctl = fs_info->balance_ctl; 3336 spin_lock(&fs_info->balance_lock); 3337 bctl->flags |= BTRFS_BALANCE_RESUME; 3338 spin_unlock(&fs_info->balance_lock); 3339 3340 goto do_balance; 3341 } 3342 } else { 3343 bargs = NULL; 3344 } 3345 3346 if (fs_info->balance_ctl) { 3347 ret = -EINPROGRESS; 3348 goto out_bargs; 3349 } 3350 3351 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 3352 if (!bctl) { 3353 ret = -ENOMEM; 3354 goto out_bargs; 3355 } 3356 3357 bctl->fs_info = fs_info; 3358 if (arg) { 3359 memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); 3360 memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); 3361 memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); 3362 3363 bctl->flags = bargs->flags; 3364 } else { 3365 /* balance everything - no filters */ 3366 bctl->flags |= BTRFS_BALANCE_TYPE_MASK; 3367 } 3368 3369 do_balance: 3370 ret = btrfs_balance(bctl, bargs); 3371 /* 3372 * bctl is freed in __cancel_balance or in free_fs_info if 3373 * restriper was paused all the way until unmount 3374 */ 3375 if (arg) { 3376 if (copy_to_user(arg, bargs, sizeof(*bargs))) 3377 ret = -EFAULT; 3378 } 3379 3380 out_bargs: 3381 kfree(bargs); 3382 out: 3383 mutex_unlock(&fs_info->balance_mutex); 3384 mutex_unlock(&fs_info->volume_mutex); 3385 mnt_drop_write_file(file); 3386 return ret; 3387 } 3388 3389 static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd) 3390 { 3391 if (!capable(CAP_SYS_ADMIN)) 3392 return -EPERM; 3393 3394 switch (cmd) { 3395 case BTRFS_BALANCE_CTL_PAUSE: 3396 return btrfs_pause_balance(root->fs_info); 3397 case BTRFS_BALANCE_CTL_CANCEL: 3398 return btrfs_cancel_balance(root->fs_info); 3399 } 3400 3401 return -EINVAL; 3402 } 3403 3404 static long btrfs_ioctl_balance_progress(struct btrfs_root *root, 3405 void __user *arg) 3406 { 3407 struct btrfs_fs_info *fs_info = root->fs_info; 3408 struct btrfs_ioctl_balance_args *bargs; 3409 int ret = 0; 3410 3411 if (!capable(CAP_SYS_ADMIN)) 3412 return -EPERM; 3413 3414 mutex_lock(&fs_info->balance_mutex); 3415 if (!fs_info->balance_ctl) { 3416 ret = -ENOTCONN; 3417 goto out; 3418 } 3419 3420 bargs = kzalloc(sizeof(*bargs), GFP_NOFS); 3421 if (!bargs) { 3422 ret = -ENOMEM; 3423 goto out; 3424 } 3425 3426 update_ioctl_balance_args(fs_info, 1, bargs); 3427 3428 if (copy_to_user(arg, bargs, sizeof(*bargs))) 3429 ret = -EFAULT; 3430 3431 kfree(bargs); 3432 out: 3433 mutex_unlock(&fs_info->balance_mutex); 3434 return ret; 3435 } 3436 3437 static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) 3438 { 3439 struct btrfs_ioctl_quota_ctl_args *sa; 3440 struct btrfs_trans_handle *trans = NULL; 3441 int ret; 3442 int err; 3443 3444 if (!capable(CAP_SYS_ADMIN)) 3445 return -EPERM; 3446 3447 if (root->fs_info->sb->s_flags & MS_RDONLY) 3448 return -EROFS; 3449 3450 sa = memdup_user(arg, sizeof(*sa)); 3451 if (IS_ERR(sa)) 3452 return PTR_ERR(sa); 3453 3454 if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { 3455 trans = btrfs_start_transaction(root, 2); 3456 if (IS_ERR(trans)) { 3457 ret = PTR_ERR(trans); 3458 goto out; 3459 } 3460 } 3461 3462 switch (sa->cmd) { 3463 case BTRFS_QUOTA_CTL_ENABLE: 3464 ret = btrfs_quota_enable(trans, root->fs_info); 3465 break; 3466 case BTRFS_QUOTA_CTL_DISABLE: 3467 ret = btrfs_quota_disable(trans, root->fs_info); 3468 break; 3469 case BTRFS_QUOTA_CTL_RESCAN: 3470 ret = btrfs_quota_rescan(root->fs_info); 3471 break; 3472 default: 3473 ret = -EINVAL; 3474 break; 3475 } 3476 3477 if (copy_to_user(arg, sa, sizeof(*sa))) 3478 ret = -EFAULT; 3479 3480 if (trans) { 3481 err = btrfs_commit_transaction(trans, root); 3482 if (err && !ret) 3483 ret = err; 3484 } 3485 3486 out: 3487 kfree(sa); 3488 return ret; 3489 } 3490 3491 static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) 3492 { 3493 struct btrfs_ioctl_qgroup_assign_args *sa; 3494 struct btrfs_trans_handle *trans; 3495 int ret; 3496 int err; 3497 3498 if (!capable(CAP_SYS_ADMIN)) 3499 return -EPERM; 3500 3501 if (root->fs_info->sb->s_flags & MS_RDONLY) 3502 return -EROFS; 3503 3504 sa = memdup_user(arg, sizeof(*sa)); 3505 if (IS_ERR(sa)) 3506 return PTR_ERR(sa); 3507 3508 trans = btrfs_join_transaction(root); 3509 if (IS_ERR(trans)) { 3510 ret = PTR_ERR(trans); 3511 goto out; 3512 } 3513 3514 /* FIXME: check if the IDs really exist */ 3515 if (sa->assign) { 3516 ret = btrfs_add_qgroup_relation(trans, root->fs_info, 3517 sa->src, sa->dst); 3518 } else { 3519 ret = btrfs_del_qgroup_relation(trans, root->fs_info, 3520 sa->src, sa->dst); 3521 } 3522 3523 err = btrfs_end_transaction(trans, root); 3524 if (err && !ret) 3525 ret = err; 3526 3527 out: 3528 kfree(sa); 3529 return ret; 3530 } 3531 3532 static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) 3533 { 3534 struct btrfs_ioctl_qgroup_create_args *sa; 3535 struct btrfs_trans_handle *trans; 3536 int ret; 3537 int err; 3538 3539 if (!capable(CAP_SYS_ADMIN)) 3540 return -EPERM; 3541 3542 if (root->fs_info->sb->s_flags & MS_RDONLY) 3543 return -EROFS; 3544 3545 sa = memdup_user(arg, sizeof(*sa)); 3546 if (IS_ERR(sa)) 3547 return PTR_ERR(sa); 3548 3549 trans = btrfs_join_transaction(root); 3550 if (IS_ERR(trans)) { 3551 ret = PTR_ERR(trans); 3552 goto out; 3553 } 3554 3555 /* FIXME: check if the IDs really exist */ 3556 if (sa->create) { 3557 ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, 3558 NULL); 3559 } else { 3560 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); 3561 } 3562 3563 err = btrfs_end_transaction(trans, root); 3564 if (err && !ret) 3565 ret = err; 3566 3567 out: 3568 kfree(sa); 3569 return ret; 3570 } 3571 3572 static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) 3573 { 3574 struct btrfs_ioctl_qgroup_limit_args *sa; 3575 struct btrfs_trans_handle *trans; 3576 int ret; 3577 int err; 3578 u64 qgroupid; 3579 3580 if (!capable(CAP_SYS_ADMIN)) 3581 return -EPERM; 3582 3583 if (root->fs_info->sb->s_flags & MS_RDONLY) 3584 return -EROFS; 3585 3586 sa = memdup_user(arg, sizeof(*sa)); 3587 if (IS_ERR(sa)) 3588 return PTR_ERR(sa); 3589 3590 trans = btrfs_join_transaction(root); 3591 if (IS_ERR(trans)) { 3592 ret = PTR_ERR(trans); 3593 goto out; 3594 } 3595 3596 qgroupid = sa->qgroupid; 3597 if (!qgroupid) { 3598 /* take the current subvol as qgroup */ 3599 qgroupid = root->root_key.objectid; 3600 } 3601 3602 /* FIXME: check if the IDs really exist */ 3603 ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim); 3604 3605 err = btrfs_end_transaction(trans, root); 3606 if (err && !ret) 3607 ret = err; 3608 3609 out: 3610 kfree(sa); 3611 return ret; 3612 } 3613 3614 static long btrfs_ioctl_set_received_subvol(struct file *file, 3615 void __user *arg) 3616 { 3617 struct btrfs_ioctl_received_subvol_args *sa = NULL; 3618 struct inode *inode = fdentry(file)->d_inode; 3619 struct btrfs_root *root = BTRFS_I(inode)->root; 3620 struct btrfs_root_item *root_item = &root->root_item; 3621 struct btrfs_trans_handle *trans; 3622 struct timespec ct = CURRENT_TIME; 3623 int ret = 0; 3624 3625 ret = mnt_want_write_file(file); 3626 if (ret < 0) 3627 return ret; 3628 3629 down_write(&root->fs_info->subvol_sem); 3630 3631 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 3632 ret = -EINVAL; 3633 goto out; 3634 } 3635 3636 if (btrfs_root_readonly(root)) { 3637 ret = -EROFS; 3638 goto out; 3639 } 3640 3641 if (!inode_owner_or_capable(inode)) { 3642 ret = -EACCES; 3643 goto out; 3644 } 3645 3646 sa = memdup_user(arg, sizeof(*sa)); 3647 if (IS_ERR(sa)) { 3648 ret = PTR_ERR(sa); 3649 sa = NULL; 3650 goto out; 3651 } 3652 3653 trans = btrfs_start_transaction(root, 1); 3654 if (IS_ERR(trans)) { 3655 ret = PTR_ERR(trans); 3656 trans = NULL; 3657 goto out; 3658 } 3659 3660 sa->rtransid = trans->transid; 3661 sa->rtime.sec = ct.tv_sec; 3662 sa->rtime.nsec = ct.tv_nsec; 3663 3664 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); 3665 btrfs_set_root_stransid(root_item, sa->stransid); 3666 btrfs_set_root_rtransid(root_item, sa->rtransid); 3667 root_item->stime.sec = cpu_to_le64(sa->stime.sec); 3668 root_item->stime.nsec = cpu_to_le32(sa->stime.nsec); 3669 root_item->rtime.sec = cpu_to_le64(sa->rtime.sec); 3670 root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec); 3671 3672 ret = btrfs_update_root(trans, root->fs_info->tree_root, 3673 &root->root_key, &root->root_item); 3674 if (ret < 0) { 3675 btrfs_end_transaction(trans, root); 3676 trans = NULL; 3677 goto out; 3678 } else { 3679 ret = btrfs_commit_transaction(trans, root); 3680 if (ret < 0) 3681 goto out; 3682 } 3683 3684 ret = copy_to_user(arg, sa, sizeof(*sa)); 3685 if (ret) 3686 ret = -EFAULT; 3687 3688 out: 3689 kfree(sa); 3690 up_write(&root->fs_info->subvol_sem); 3691 mnt_drop_write_file(file); 3692 return ret; 3693 } 3694 3695 long btrfs_ioctl(struct file *file, unsigned int 3696 cmd, unsigned long arg) 3697 { 3698 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 3699 void __user *argp = (void __user *)arg; 3700 3701 switch (cmd) { 3702 case FS_IOC_GETFLAGS: 3703 return btrfs_ioctl_getflags(file, argp); 3704 case FS_IOC_SETFLAGS: 3705 return btrfs_ioctl_setflags(file, argp); 3706 case FS_IOC_GETVERSION: 3707 return btrfs_ioctl_getversion(file, argp); 3708 case FITRIM: 3709 return btrfs_ioctl_fitrim(file, argp); 3710 case BTRFS_IOC_SNAP_CREATE: 3711 return btrfs_ioctl_snap_create(file, argp, 0); 3712 case BTRFS_IOC_SNAP_CREATE_V2: 3713 return btrfs_ioctl_snap_create_v2(file, argp, 0); 3714 case BTRFS_IOC_SUBVOL_CREATE: 3715 return btrfs_ioctl_snap_create(file, argp, 1); 3716 case BTRFS_IOC_SUBVOL_CREATE_V2: 3717 return btrfs_ioctl_snap_create_v2(file, argp, 1); 3718 case BTRFS_IOC_SNAP_DESTROY: 3719 return btrfs_ioctl_snap_destroy(file, argp); 3720 case BTRFS_IOC_SUBVOL_GETFLAGS: 3721 return btrfs_ioctl_subvol_getflags(file, argp); 3722 case BTRFS_IOC_SUBVOL_SETFLAGS: 3723 return btrfs_ioctl_subvol_setflags(file, argp); 3724 case BTRFS_IOC_DEFAULT_SUBVOL: 3725 return btrfs_ioctl_default_subvol(file, argp); 3726 case BTRFS_IOC_DEFRAG: 3727 return btrfs_ioctl_defrag(file, NULL); 3728 case BTRFS_IOC_DEFRAG_RANGE: 3729 return btrfs_ioctl_defrag(file, argp); 3730 case BTRFS_IOC_RESIZE: 3731 return btrfs_ioctl_resize(root, argp); 3732 case BTRFS_IOC_ADD_DEV: 3733 return btrfs_ioctl_add_dev(root, argp); 3734 case BTRFS_IOC_RM_DEV: 3735 return btrfs_ioctl_rm_dev(root, argp); 3736 case BTRFS_IOC_FS_INFO: 3737 return btrfs_ioctl_fs_info(root, argp); 3738 case BTRFS_IOC_DEV_INFO: 3739 return btrfs_ioctl_dev_info(root, argp); 3740 case BTRFS_IOC_BALANCE: 3741 return btrfs_ioctl_balance(file, NULL); 3742 case BTRFS_IOC_CLONE: 3743 return btrfs_ioctl_clone(file, arg, 0, 0, 0); 3744 case BTRFS_IOC_CLONE_RANGE: 3745 return btrfs_ioctl_clone_range(file, argp); 3746 case BTRFS_IOC_TRANS_START: 3747 return btrfs_ioctl_trans_start(file); 3748 case BTRFS_IOC_TRANS_END: 3749 return btrfs_ioctl_trans_end(file); 3750 case BTRFS_IOC_TREE_SEARCH: 3751 return btrfs_ioctl_tree_search(file, argp); 3752 case BTRFS_IOC_INO_LOOKUP: 3753 return btrfs_ioctl_ino_lookup(file, argp); 3754 case BTRFS_IOC_INO_PATHS: 3755 return btrfs_ioctl_ino_to_path(root, argp); 3756 case BTRFS_IOC_LOGICAL_INO: 3757 return btrfs_ioctl_logical_to_ino(root, argp); 3758 case BTRFS_IOC_SPACE_INFO: 3759 return btrfs_ioctl_space_info(root, argp); 3760 case BTRFS_IOC_SYNC: 3761 btrfs_sync_fs(file->f_dentry->d_sb, 1); 3762 return 0; 3763 case BTRFS_IOC_START_SYNC: 3764 return btrfs_ioctl_start_sync(file, argp); 3765 case BTRFS_IOC_WAIT_SYNC: 3766 return btrfs_ioctl_wait_sync(file, argp); 3767 case BTRFS_IOC_SCRUB: 3768 return btrfs_ioctl_scrub(root, argp); 3769 case BTRFS_IOC_SCRUB_CANCEL: 3770 return btrfs_ioctl_scrub_cancel(root, argp); 3771 case BTRFS_IOC_SCRUB_PROGRESS: 3772 return btrfs_ioctl_scrub_progress(root, argp); 3773 case BTRFS_IOC_BALANCE_V2: 3774 return btrfs_ioctl_balance(file, argp); 3775 case BTRFS_IOC_BALANCE_CTL: 3776 return btrfs_ioctl_balance_ctl(root, arg); 3777 case BTRFS_IOC_BALANCE_PROGRESS: 3778 return btrfs_ioctl_balance_progress(root, argp); 3779 case BTRFS_IOC_SET_RECEIVED_SUBVOL: 3780 return btrfs_ioctl_set_received_subvol(file, argp); 3781 case BTRFS_IOC_SEND: 3782 return btrfs_ioctl_send(file, argp); 3783 case BTRFS_IOC_GET_DEV_STATS: 3784 return btrfs_ioctl_get_dev_stats(root, argp); 3785 case BTRFS_IOC_QUOTA_CTL: 3786 return btrfs_ioctl_quota_ctl(root, argp); 3787 case BTRFS_IOC_QGROUP_ASSIGN: 3788 return btrfs_ioctl_qgroup_assign(root, argp); 3789 case BTRFS_IOC_QGROUP_CREATE: 3790 return btrfs_ioctl_qgroup_create(root, argp); 3791 case BTRFS_IOC_QGROUP_LIMIT: 3792 return btrfs_ioctl_qgroup_limit(root, argp); 3793 } 3794 3795 return -ENOTTY; 3796 } 3797