1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/buffer_head.h> 22 #include <linux/file.h> 23 #include <linux/fs.h> 24 #include <linux/fsnotify.h> 25 #include <linux/pagemap.h> 26 #include <linux/highmem.h> 27 #include <linux/time.h> 28 #include <linux/init.h> 29 #include <linux/string.h> 30 #include <linux/backing-dev.h> 31 #include <linux/mount.h> 32 #include <linux/mpage.h> 33 #include <linux/namei.h> 34 #include <linux/swap.h> 35 #include <linux/writeback.h> 36 #include <linux/statfs.h> 37 #include <linux/compat.h> 38 #include <linux/bit_spinlock.h> 39 #include <linux/security.h> 40 #include <linux/xattr.h> 41 #include <linux/vmalloc.h> 42 #include <linux/slab.h> 43 #include <linux/blkdev.h> 44 #include "compat.h" 45 #include "ctree.h" 46 #include "disk-io.h" 47 #include "transaction.h" 48 #include "btrfs_inode.h" 49 #include "ioctl.h" 50 #include "print-tree.h" 51 #include "volumes.h" 52 #include "locking.h" 53 54 /* Mask out flags that are inappropriate for the given type of inode. */ 55 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 56 { 57 if (S_ISDIR(mode)) 58 return flags; 59 else if (S_ISREG(mode)) 60 return flags & ~FS_DIRSYNC_FL; 61 else 62 return flags & (FS_NODUMP_FL | FS_NOATIME_FL); 63 } 64 65 /* 66 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. 67 */ 68 static unsigned int btrfs_flags_to_ioctl(unsigned int flags) 69 { 70 unsigned int iflags = 0; 71 72 if (flags & BTRFS_INODE_SYNC) 73 iflags |= FS_SYNC_FL; 74 if (flags & BTRFS_INODE_IMMUTABLE) 75 iflags |= FS_IMMUTABLE_FL; 76 if (flags & BTRFS_INODE_APPEND) 77 iflags |= FS_APPEND_FL; 78 if (flags & BTRFS_INODE_NODUMP) 79 iflags |= FS_NODUMP_FL; 80 if (flags & BTRFS_INODE_NOATIME) 81 iflags |= FS_NOATIME_FL; 82 if (flags & BTRFS_INODE_DIRSYNC) 83 iflags |= FS_DIRSYNC_FL; 84 85 return iflags; 86 } 87 88 /* 89 * Update inode->i_flags based on the btrfs internal flags. 90 */ 91 void btrfs_update_iflags(struct inode *inode) 92 { 93 struct btrfs_inode *ip = BTRFS_I(inode); 94 95 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 96 97 if (ip->flags & BTRFS_INODE_SYNC) 98 inode->i_flags |= S_SYNC; 99 if (ip->flags & BTRFS_INODE_IMMUTABLE) 100 inode->i_flags |= S_IMMUTABLE; 101 if (ip->flags & BTRFS_INODE_APPEND) 102 inode->i_flags |= S_APPEND; 103 if (ip->flags & BTRFS_INODE_NOATIME) 104 inode->i_flags |= S_NOATIME; 105 if (ip->flags & BTRFS_INODE_DIRSYNC) 106 inode->i_flags |= S_DIRSYNC; 107 } 108 109 /* 110 * Inherit flags from the parent inode. 111 * 112 * Unlike extN we don't have any flags we don't want to inherit currently. 113 */ 114 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) 115 { 116 unsigned int flags; 117 118 if (!dir) 119 return; 120 121 flags = BTRFS_I(dir)->flags; 122 123 if (S_ISREG(inode->i_mode)) 124 flags &= ~BTRFS_INODE_DIRSYNC; 125 else if (!S_ISDIR(inode->i_mode)) 126 flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME); 127 128 BTRFS_I(inode)->flags = flags; 129 btrfs_update_iflags(inode); 130 } 131 132 static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 133 { 134 struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode); 135 unsigned int flags = btrfs_flags_to_ioctl(ip->flags); 136 137 if (copy_to_user(arg, &flags, sizeof(flags))) 138 return -EFAULT; 139 return 0; 140 } 141 142 static int check_flags(unsigned int flags) 143 { 144 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 145 FS_NOATIME_FL | FS_NODUMP_FL | \ 146 FS_SYNC_FL | FS_DIRSYNC_FL | \ 147 FS_NOCOMP_FL | FS_COMPR_FL | \ 148 FS_NOCOW_FL | FS_COW_FL)) 149 return -EOPNOTSUPP; 150 151 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 152 return -EINVAL; 153 154 if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL)) 155 return -EINVAL; 156 157 return 0; 158 } 159 160 static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 161 { 162 struct inode *inode = file->f_path.dentry->d_inode; 163 struct btrfs_inode *ip = BTRFS_I(inode); 164 struct btrfs_root *root = ip->root; 165 struct btrfs_trans_handle *trans; 166 unsigned int flags, oldflags; 167 int ret; 168 169 if (btrfs_root_readonly(root)) 170 return -EROFS; 171 172 if (copy_from_user(&flags, arg, sizeof(flags))) 173 return -EFAULT; 174 175 ret = check_flags(flags); 176 if (ret) 177 return ret; 178 179 if (!inode_owner_or_capable(inode)) 180 return -EACCES; 181 182 mutex_lock(&inode->i_mutex); 183 184 flags = btrfs_mask_flags(inode->i_mode, flags); 185 oldflags = btrfs_flags_to_ioctl(ip->flags); 186 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 187 if (!capable(CAP_LINUX_IMMUTABLE)) { 188 ret = -EPERM; 189 goto out_unlock; 190 } 191 } 192 193 ret = mnt_want_write(file->f_path.mnt); 194 if (ret) 195 goto out_unlock; 196 197 if (flags & FS_SYNC_FL) 198 ip->flags |= BTRFS_INODE_SYNC; 199 else 200 ip->flags &= ~BTRFS_INODE_SYNC; 201 if (flags & FS_IMMUTABLE_FL) 202 ip->flags |= BTRFS_INODE_IMMUTABLE; 203 else 204 ip->flags &= ~BTRFS_INODE_IMMUTABLE; 205 if (flags & FS_APPEND_FL) 206 ip->flags |= BTRFS_INODE_APPEND; 207 else 208 ip->flags &= ~BTRFS_INODE_APPEND; 209 if (flags & FS_NODUMP_FL) 210 ip->flags |= BTRFS_INODE_NODUMP; 211 else 212 ip->flags &= ~BTRFS_INODE_NODUMP; 213 if (flags & FS_NOATIME_FL) 214 ip->flags |= BTRFS_INODE_NOATIME; 215 else 216 ip->flags &= ~BTRFS_INODE_NOATIME; 217 if (flags & FS_DIRSYNC_FL) 218 ip->flags |= BTRFS_INODE_DIRSYNC; 219 else 220 ip->flags &= ~BTRFS_INODE_DIRSYNC; 221 222 /* 223 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 224 * flag may be changed automatically if compression code won't make 225 * things smaller. 226 */ 227 if (flags & FS_NOCOMP_FL) { 228 ip->flags &= ~BTRFS_INODE_COMPRESS; 229 ip->flags |= BTRFS_INODE_NOCOMPRESS; 230 } else if (flags & FS_COMPR_FL) { 231 ip->flags |= BTRFS_INODE_COMPRESS; 232 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 233 } 234 if (flags & FS_NOCOW_FL) 235 ip->flags |= BTRFS_INODE_NODATACOW; 236 else if (flags & FS_COW_FL) 237 ip->flags &= ~BTRFS_INODE_NODATACOW; 238 239 trans = btrfs_join_transaction(root, 1); 240 BUG_ON(IS_ERR(trans)); 241 242 ret = btrfs_update_inode(trans, root, inode); 243 BUG_ON(ret); 244 245 btrfs_update_iflags(inode); 246 inode->i_ctime = CURRENT_TIME; 247 btrfs_end_transaction(trans, root); 248 249 mnt_drop_write(file->f_path.mnt); 250 251 ret = 0; 252 out_unlock: 253 mutex_unlock(&inode->i_mutex); 254 return ret; 255 } 256 257 static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 258 { 259 struct inode *inode = file->f_path.dentry->d_inode; 260 261 return put_user(inode->i_generation, arg); 262 } 263 264 static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) 265 { 266 struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; 267 struct btrfs_fs_info *fs_info = root->fs_info; 268 struct btrfs_device *device; 269 struct request_queue *q; 270 struct fstrim_range range; 271 u64 minlen = ULLONG_MAX; 272 u64 num_devices = 0; 273 int ret; 274 275 if (!capable(CAP_SYS_ADMIN)) 276 return -EPERM; 277 278 mutex_lock(&fs_info->fs_devices->device_list_mutex); 279 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { 280 if (!device->bdev) 281 continue; 282 q = bdev_get_queue(device->bdev); 283 if (blk_queue_discard(q)) { 284 num_devices++; 285 minlen = min((u64)q->limits.discard_granularity, 286 minlen); 287 } 288 } 289 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 290 if (!num_devices) 291 return -EOPNOTSUPP; 292 293 if (copy_from_user(&range, arg, sizeof(range))) 294 return -EFAULT; 295 296 range.minlen = max(range.minlen, minlen); 297 ret = btrfs_trim_fs(root, &range); 298 if (ret < 0) 299 return ret; 300 301 if (copy_to_user(arg, &range, sizeof(range))) 302 return -EFAULT; 303 304 return 0; 305 } 306 307 static noinline int create_subvol(struct btrfs_root *root, 308 struct dentry *dentry, 309 char *name, int namelen, 310 u64 *async_transid) 311 { 312 struct btrfs_trans_handle *trans; 313 struct btrfs_key key; 314 struct btrfs_root_item root_item; 315 struct btrfs_inode_item *inode_item; 316 struct extent_buffer *leaf; 317 struct btrfs_root *new_root; 318 struct dentry *parent = dget_parent(dentry); 319 struct inode *dir; 320 int ret; 321 int err; 322 u64 objectid; 323 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 324 u64 index = 0; 325 326 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 327 0, &objectid); 328 if (ret) { 329 dput(parent); 330 return ret; 331 } 332 333 dir = parent->d_inode; 334 335 /* 336 * 1 - inode item 337 * 2 - refs 338 * 1 - root item 339 * 2 - dir items 340 */ 341 trans = btrfs_start_transaction(root, 6); 342 if (IS_ERR(trans)) { 343 dput(parent); 344 return PTR_ERR(trans); 345 } 346 347 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 348 0, objectid, NULL, 0, 0, 0); 349 if (IS_ERR(leaf)) { 350 ret = PTR_ERR(leaf); 351 goto fail; 352 } 353 354 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 355 btrfs_set_header_bytenr(leaf, leaf->start); 356 btrfs_set_header_generation(leaf, trans->transid); 357 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 358 btrfs_set_header_owner(leaf, objectid); 359 360 write_extent_buffer(leaf, root->fs_info->fsid, 361 (unsigned long)btrfs_header_fsid(leaf), 362 BTRFS_FSID_SIZE); 363 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 364 (unsigned long)btrfs_header_chunk_tree_uuid(leaf), 365 BTRFS_UUID_SIZE); 366 btrfs_mark_buffer_dirty(leaf); 367 368 inode_item = &root_item.inode; 369 memset(inode_item, 0, sizeof(*inode_item)); 370 inode_item->generation = cpu_to_le64(1); 371 inode_item->size = cpu_to_le64(3); 372 inode_item->nlink = cpu_to_le32(1); 373 inode_item->nbytes = cpu_to_le64(root->leafsize); 374 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 375 376 root_item.flags = 0; 377 root_item.byte_limit = 0; 378 inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); 379 380 btrfs_set_root_bytenr(&root_item, leaf->start); 381 btrfs_set_root_generation(&root_item, trans->transid); 382 btrfs_set_root_level(&root_item, 0); 383 btrfs_set_root_refs(&root_item, 1); 384 btrfs_set_root_used(&root_item, leaf->len); 385 btrfs_set_root_last_snapshot(&root_item, 0); 386 387 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 388 root_item.drop_level = 0; 389 390 btrfs_tree_unlock(leaf); 391 free_extent_buffer(leaf); 392 leaf = NULL; 393 394 btrfs_set_root_dirid(&root_item, new_dirid); 395 396 key.objectid = objectid; 397 key.offset = 0; 398 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 399 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 400 &root_item); 401 if (ret) 402 goto fail; 403 404 key.offset = (u64)-1; 405 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); 406 BUG_ON(IS_ERR(new_root)); 407 408 btrfs_record_root_in_trans(trans, new_root); 409 410 ret = btrfs_create_subvol_root(trans, new_root, new_dirid, 411 BTRFS_I(dir)->block_group); 412 /* 413 * insert the directory item 414 */ 415 ret = btrfs_set_inode_index(dir, &index); 416 BUG_ON(ret); 417 418 ret = btrfs_insert_dir_item(trans, root, 419 name, namelen, dir->i_ino, &key, 420 BTRFS_FT_DIR, index); 421 if (ret) 422 goto fail; 423 424 btrfs_i_size_write(dir, dir->i_size + namelen * 2); 425 ret = btrfs_update_inode(trans, root, dir); 426 BUG_ON(ret); 427 428 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 429 objectid, root->root_key.objectid, 430 dir->i_ino, index, name, namelen); 431 432 BUG_ON(ret); 433 434 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 435 fail: 436 dput(parent); 437 if (async_transid) { 438 *async_transid = trans->transid; 439 err = btrfs_commit_transaction_async(trans, root, 1); 440 } else { 441 err = btrfs_commit_transaction(trans, root); 442 } 443 if (err && !ret) 444 ret = err; 445 return ret; 446 } 447 448 static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 449 char *name, int namelen, u64 *async_transid, 450 bool readonly) 451 { 452 struct inode *inode; 453 struct dentry *parent; 454 struct btrfs_pending_snapshot *pending_snapshot; 455 struct btrfs_trans_handle *trans; 456 int ret; 457 458 if (!root->ref_cows) 459 return -EINVAL; 460 461 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 462 if (!pending_snapshot) 463 return -ENOMEM; 464 465 btrfs_init_block_rsv(&pending_snapshot->block_rsv); 466 pending_snapshot->dentry = dentry; 467 pending_snapshot->root = root; 468 pending_snapshot->readonly = readonly; 469 470 trans = btrfs_start_transaction(root->fs_info->extent_root, 5); 471 if (IS_ERR(trans)) { 472 ret = PTR_ERR(trans); 473 goto fail; 474 } 475 476 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); 477 BUG_ON(ret); 478 479 list_add(&pending_snapshot->list, 480 &trans->transaction->pending_snapshots); 481 if (async_transid) { 482 *async_transid = trans->transid; 483 ret = btrfs_commit_transaction_async(trans, 484 root->fs_info->extent_root, 1); 485 } else { 486 ret = btrfs_commit_transaction(trans, 487 root->fs_info->extent_root); 488 } 489 BUG_ON(ret); 490 491 ret = pending_snapshot->error; 492 if (ret) 493 goto fail; 494 495 ret = btrfs_orphan_cleanup(pending_snapshot->snap); 496 if (ret) 497 goto fail; 498 499 parent = dget_parent(dentry); 500 inode = btrfs_lookup_dentry(parent->d_inode, dentry); 501 dput(parent); 502 if (IS_ERR(inode)) { 503 ret = PTR_ERR(inode); 504 goto fail; 505 } 506 BUG_ON(!inode); 507 d_instantiate(dentry, inode); 508 ret = 0; 509 fail: 510 kfree(pending_snapshot); 511 return ret; 512 } 513 514 /* copy of check_sticky in fs/namei.c() 515 * It's inline, so penalty for filesystems that don't use sticky bit is 516 * minimal. 517 */ 518 static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) 519 { 520 uid_t fsuid = current_fsuid(); 521 522 if (!(dir->i_mode & S_ISVTX)) 523 return 0; 524 if (inode->i_uid == fsuid) 525 return 0; 526 if (dir->i_uid == fsuid) 527 return 0; 528 return !capable(CAP_FOWNER); 529 } 530 531 /* copy of may_delete in fs/namei.c() 532 * Check whether we can remove a link victim from directory dir, check 533 * whether the type of victim is right. 534 * 1. We can't do it if dir is read-only (done in permission()) 535 * 2. We should have write and exec permissions on dir 536 * 3. We can't remove anything from append-only dir 537 * 4. We can't do anything with immutable dir (done in permission()) 538 * 5. If the sticky bit on dir is set we should either 539 * a. be owner of dir, or 540 * b. be owner of victim, or 541 * c. have CAP_FOWNER capability 542 * 6. If the victim is append-only or immutable we can't do antyhing with 543 * links pointing to it. 544 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 545 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 546 * 9. We can't remove a root or mountpoint. 547 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 548 * nfs_async_unlink(). 549 */ 550 551 static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) 552 { 553 int error; 554 555 if (!victim->d_inode) 556 return -ENOENT; 557 558 BUG_ON(victim->d_parent->d_inode != dir); 559 audit_inode_child(victim, dir); 560 561 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 562 if (error) 563 return error; 564 if (IS_APPEND(dir)) 565 return -EPERM; 566 if (btrfs_check_sticky(dir, victim->d_inode)|| 567 IS_APPEND(victim->d_inode)|| 568 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 569 return -EPERM; 570 if (isdir) { 571 if (!S_ISDIR(victim->d_inode->i_mode)) 572 return -ENOTDIR; 573 if (IS_ROOT(victim)) 574 return -EBUSY; 575 } else if (S_ISDIR(victim->d_inode->i_mode)) 576 return -EISDIR; 577 if (IS_DEADDIR(dir)) 578 return -ENOENT; 579 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 580 return -EBUSY; 581 return 0; 582 } 583 584 /* copy of may_create in fs/namei.c() */ 585 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 586 { 587 if (child->d_inode) 588 return -EEXIST; 589 if (IS_DEADDIR(dir)) 590 return -ENOENT; 591 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 592 } 593 594 /* 595 * Create a new subvolume below @parent. This is largely modeled after 596 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 597 * inside this filesystem so it's quite a bit simpler. 598 */ 599 static noinline int btrfs_mksubvol(struct path *parent, 600 char *name, int namelen, 601 struct btrfs_root *snap_src, 602 u64 *async_transid, bool readonly) 603 { 604 struct inode *dir = parent->dentry->d_inode; 605 struct dentry *dentry; 606 int error; 607 608 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 609 610 dentry = lookup_one_len(name, parent->dentry, namelen); 611 error = PTR_ERR(dentry); 612 if (IS_ERR(dentry)) 613 goto out_unlock; 614 615 error = -EEXIST; 616 if (dentry->d_inode) 617 goto out_dput; 618 619 error = mnt_want_write(parent->mnt); 620 if (error) 621 goto out_dput; 622 623 error = btrfs_may_create(dir, dentry); 624 if (error) 625 goto out_drop_write; 626 627 down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 628 629 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) 630 goto out_up_read; 631 632 if (snap_src) { 633 error = create_snapshot(snap_src, dentry, 634 name, namelen, async_transid, readonly); 635 } else { 636 error = create_subvol(BTRFS_I(dir)->root, dentry, 637 name, namelen, async_transid); 638 } 639 if (!error) 640 fsnotify_mkdir(dir, dentry); 641 out_up_read: 642 up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 643 out_drop_write: 644 mnt_drop_write(parent->mnt); 645 out_dput: 646 dput(dentry); 647 out_unlock: 648 mutex_unlock(&dir->i_mutex); 649 return error; 650 } 651 652 static int should_defrag_range(struct inode *inode, u64 start, u64 len, 653 int thresh, u64 *last_len, u64 *skip, 654 u64 *defrag_end) 655 { 656 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 657 struct extent_map *em = NULL; 658 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 659 int ret = 1; 660 661 662 if (thresh == 0) 663 thresh = 256 * 1024; 664 665 /* 666 * make sure that once we start defragging and extent, we keep on 667 * defragging it 668 */ 669 if (start < *defrag_end) 670 return 1; 671 672 *skip = 0; 673 674 /* 675 * hopefully we have this extent in the tree already, try without 676 * the full extent lock 677 */ 678 read_lock(&em_tree->lock); 679 em = lookup_extent_mapping(em_tree, start, len); 680 read_unlock(&em_tree->lock); 681 682 if (!em) { 683 /* get the big lock and read metadata off disk */ 684 lock_extent(io_tree, start, start + len - 1, GFP_NOFS); 685 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 686 unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); 687 688 if (IS_ERR(em)) 689 return 0; 690 } 691 692 /* this will cover holes, and inline extents */ 693 if (em->block_start >= EXTENT_MAP_LAST_BYTE) 694 ret = 0; 695 696 /* 697 * we hit a real extent, if it is big don't bother defragging it again 698 */ 699 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) 700 ret = 0; 701 702 /* 703 * last_len ends up being a counter of how many bytes we've defragged. 704 * every time we choose not to defrag an extent, we reset *last_len 705 * so that the next tiny extent will force a defrag. 706 * 707 * The end result of this is that tiny extents before a single big 708 * extent will force at least part of that big extent to be defragged. 709 */ 710 if (ret) { 711 *last_len += len; 712 *defrag_end = extent_map_end(em); 713 } else { 714 *last_len = 0; 715 *skip = extent_map_end(em); 716 *defrag_end = 0; 717 } 718 719 free_extent_map(em); 720 return ret; 721 } 722 723 static int btrfs_defrag_file(struct file *file, 724 struct btrfs_ioctl_defrag_range_args *range) 725 { 726 struct inode *inode = fdentry(file)->d_inode; 727 struct btrfs_root *root = BTRFS_I(inode)->root; 728 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 729 struct btrfs_ordered_extent *ordered; 730 struct page *page; 731 struct btrfs_super_block *disk_super; 732 unsigned long last_index; 733 unsigned long ra_pages = root->fs_info->bdi.ra_pages; 734 unsigned long total_read = 0; 735 u64 features; 736 u64 page_start; 737 u64 page_end; 738 u64 last_len = 0; 739 u64 skip = 0; 740 u64 defrag_end = 0; 741 unsigned long i; 742 int ret; 743 int compress_type = BTRFS_COMPRESS_ZLIB; 744 745 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 746 if (range->compress_type > BTRFS_COMPRESS_TYPES) 747 return -EINVAL; 748 if (range->compress_type) 749 compress_type = range->compress_type; 750 } 751 752 if (inode->i_size == 0) 753 return 0; 754 755 if (range->start + range->len > range->start) { 756 last_index = min_t(u64, inode->i_size - 1, 757 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 758 } else { 759 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; 760 } 761 762 i = range->start >> PAGE_CACHE_SHIFT; 763 while (i <= last_index) { 764 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 765 PAGE_CACHE_SIZE, 766 range->extent_thresh, 767 &last_len, &skip, 768 &defrag_end)) { 769 unsigned long next; 770 /* 771 * the should_defrag function tells us how much to skip 772 * bump our counter by the suggested amount 773 */ 774 next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 775 i = max(i + 1, next); 776 continue; 777 } 778 779 if (total_read % ra_pages == 0) { 780 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, 781 min(last_index, i + ra_pages - 1)); 782 } 783 total_read++; 784 mutex_lock(&inode->i_mutex); 785 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 786 BTRFS_I(inode)->force_compress = compress_type; 787 788 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 789 if (ret) 790 goto err_unlock; 791 again: 792 if (inode->i_size == 0 || 793 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { 794 ret = 0; 795 goto err_reservations; 796 } 797 798 page = grab_cache_page(inode->i_mapping, i); 799 if (!page) { 800 ret = -ENOMEM; 801 goto err_reservations; 802 } 803 804 if (!PageUptodate(page)) { 805 btrfs_readpage(NULL, page); 806 lock_page(page); 807 if (!PageUptodate(page)) { 808 unlock_page(page); 809 page_cache_release(page); 810 ret = -EIO; 811 goto err_reservations; 812 } 813 } 814 815 if (page->mapping != inode->i_mapping) { 816 unlock_page(page); 817 page_cache_release(page); 818 goto again; 819 } 820 821 wait_on_page_writeback(page); 822 823 if (PageDirty(page)) { 824 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 825 goto loop_unlock; 826 } 827 828 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 829 page_end = page_start + PAGE_CACHE_SIZE - 1; 830 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 831 832 ordered = btrfs_lookup_ordered_extent(inode, page_start); 833 if (ordered) { 834 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 835 unlock_page(page); 836 page_cache_release(page); 837 btrfs_start_ordered_extent(inode, ordered, 1); 838 btrfs_put_ordered_extent(ordered); 839 goto again; 840 } 841 set_page_extent_mapped(page); 842 843 /* 844 * this makes sure page_mkwrite is called on the 845 * page if it is dirtied again later 846 */ 847 clear_page_dirty_for_io(page); 848 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, 849 page_end, EXTENT_DIRTY | EXTENT_DELALLOC | 850 EXTENT_DO_ACCOUNTING, GFP_NOFS); 851 852 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); 853 ClearPageChecked(page); 854 set_page_dirty(page); 855 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 856 857 loop_unlock: 858 unlock_page(page); 859 page_cache_release(page); 860 mutex_unlock(&inode->i_mutex); 861 862 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); 863 i++; 864 } 865 866 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) 867 filemap_flush(inode->i_mapping); 868 869 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 870 /* the filemap_flush will queue IO into the worker threads, but 871 * we have to make sure the IO is actually started and that 872 * ordered extents get created before we return 873 */ 874 atomic_inc(&root->fs_info->async_submit_draining); 875 while (atomic_read(&root->fs_info->nr_async_submits) || 876 atomic_read(&root->fs_info->async_delalloc_pages)) { 877 wait_event(root->fs_info->async_submit_wait, 878 (atomic_read(&root->fs_info->nr_async_submits) == 0 && 879 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 880 } 881 atomic_dec(&root->fs_info->async_submit_draining); 882 883 mutex_lock(&inode->i_mutex); 884 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 885 mutex_unlock(&inode->i_mutex); 886 } 887 888 disk_super = &root->fs_info->super_copy; 889 features = btrfs_super_incompat_flags(disk_super); 890 if (range->compress_type == BTRFS_COMPRESS_LZO) { 891 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 892 btrfs_set_super_incompat_flags(disk_super, features); 893 } 894 895 return 0; 896 897 err_reservations: 898 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 899 err_unlock: 900 mutex_unlock(&inode->i_mutex); 901 return ret; 902 } 903 904 static noinline int btrfs_ioctl_resize(struct btrfs_root *root, 905 void __user *arg) 906 { 907 u64 new_size; 908 u64 old_size; 909 u64 devid = 1; 910 struct btrfs_ioctl_vol_args *vol_args; 911 struct btrfs_trans_handle *trans; 912 struct btrfs_device *device = NULL; 913 char *sizestr; 914 char *devstr = NULL; 915 int ret = 0; 916 int mod = 0; 917 918 if (root->fs_info->sb->s_flags & MS_RDONLY) 919 return -EROFS; 920 921 if (!capable(CAP_SYS_ADMIN)) 922 return -EPERM; 923 924 vol_args = memdup_user(arg, sizeof(*vol_args)); 925 if (IS_ERR(vol_args)) 926 return PTR_ERR(vol_args); 927 928 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 929 930 mutex_lock(&root->fs_info->volume_mutex); 931 sizestr = vol_args->name; 932 devstr = strchr(sizestr, ':'); 933 if (devstr) { 934 char *end; 935 sizestr = devstr + 1; 936 *devstr = '\0'; 937 devstr = vol_args->name; 938 devid = simple_strtoull(devstr, &end, 10); 939 printk(KERN_INFO "resizing devid %llu\n", 940 (unsigned long long)devid); 941 } 942 device = btrfs_find_device(root, devid, NULL, NULL); 943 if (!device) { 944 printk(KERN_INFO "resizer unable to find device %llu\n", 945 (unsigned long long)devid); 946 ret = -EINVAL; 947 goto out_unlock; 948 } 949 if (!strcmp(sizestr, "max")) 950 new_size = device->bdev->bd_inode->i_size; 951 else { 952 if (sizestr[0] == '-') { 953 mod = -1; 954 sizestr++; 955 } else if (sizestr[0] == '+') { 956 mod = 1; 957 sizestr++; 958 } 959 new_size = memparse(sizestr, NULL); 960 if (new_size == 0) { 961 ret = -EINVAL; 962 goto out_unlock; 963 } 964 } 965 966 old_size = device->total_bytes; 967 968 if (mod < 0) { 969 if (new_size > old_size) { 970 ret = -EINVAL; 971 goto out_unlock; 972 } 973 new_size = old_size - new_size; 974 } else if (mod > 0) { 975 new_size = old_size + new_size; 976 } 977 978 if (new_size < 256 * 1024 * 1024) { 979 ret = -EINVAL; 980 goto out_unlock; 981 } 982 if (new_size > device->bdev->bd_inode->i_size) { 983 ret = -EFBIG; 984 goto out_unlock; 985 } 986 987 do_div(new_size, root->sectorsize); 988 new_size *= root->sectorsize; 989 990 printk(KERN_INFO "new size for %s is %llu\n", 991 device->name, (unsigned long long)new_size); 992 993 if (new_size > old_size) { 994 trans = btrfs_start_transaction(root, 0); 995 if (IS_ERR(trans)) { 996 ret = PTR_ERR(trans); 997 goto out_unlock; 998 } 999 ret = btrfs_grow_device(trans, device, new_size); 1000 btrfs_commit_transaction(trans, root); 1001 } else { 1002 ret = btrfs_shrink_device(device, new_size); 1003 } 1004 1005 out_unlock: 1006 mutex_unlock(&root->fs_info->volume_mutex); 1007 kfree(vol_args); 1008 return ret; 1009 } 1010 1011 static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1012 char *name, 1013 unsigned long fd, 1014 int subvol, 1015 u64 *transid, 1016 bool readonly) 1017 { 1018 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 1019 struct file *src_file; 1020 int namelen; 1021 int ret = 0; 1022 1023 if (root->fs_info->sb->s_flags & MS_RDONLY) 1024 return -EROFS; 1025 1026 namelen = strlen(name); 1027 if (strchr(name, '/')) { 1028 ret = -EINVAL; 1029 goto out; 1030 } 1031 1032 if (subvol) { 1033 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1034 NULL, transid, readonly); 1035 } else { 1036 struct inode *src_inode; 1037 src_file = fget(fd); 1038 if (!src_file) { 1039 ret = -EINVAL; 1040 goto out; 1041 } 1042 1043 src_inode = src_file->f_path.dentry->d_inode; 1044 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { 1045 printk(KERN_INFO "btrfs: Snapshot src from " 1046 "another FS\n"); 1047 ret = -EINVAL; 1048 fput(src_file); 1049 goto out; 1050 } 1051 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1052 BTRFS_I(src_inode)->root, 1053 transid, readonly); 1054 fput(src_file); 1055 } 1056 out: 1057 return ret; 1058 } 1059 1060 static noinline int btrfs_ioctl_snap_create(struct file *file, 1061 void __user *arg, int subvol) 1062 { 1063 struct btrfs_ioctl_vol_args *vol_args; 1064 int ret; 1065 1066 vol_args = memdup_user(arg, sizeof(*vol_args)); 1067 if (IS_ERR(vol_args)) 1068 return PTR_ERR(vol_args); 1069 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1070 1071 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1072 vol_args->fd, subvol, 1073 NULL, false); 1074 1075 kfree(vol_args); 1076 return ret; 1077 } 1078 1079 static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 1080 void __user *arg, int subvol) 1081 { 1082 struct btrfs_ioctl_vol_args_v2 *vol_args; 1083 int ret; 1084 u64 transid = 0; 1085 u64 *ptr = NULL; 1086 bool readonly = false; 1087 1088 vol_args = memdup_user(arg, sizeof(*vol_args)); 1089 if (IS_ERR(vol_args)) 1090 return PTR_ERR(vol_args); 1091 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1092 1093 if (vol_args->flags & 1094 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { 1095 ret = -EOPNOTSUPP; 1096 goto out; 1097 } 1098 1099 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 1100 ptr = &transid; 1101 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1102 readonly = true; 1103 1104 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1105 vol_args->fd, subvol, 1106 ptr, readonly); 1107 1108 if (ret == 0 && ptr && 1109 copy_to_user(arg + 1110 offsetof(struct btrfs_ioctl_vol_args_v2, 1111 transid), ptr, sizeof(*ptr))) 1112 ret = -EFAULT; 1113 out: 1114 kfree(vol_args); 1115 return ret; 1116 } 1117 1118 static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1119 void __user *arg) 1120 { 1121 struct inode *inode = fdentry(file)->d_inode; 1122 struct btrfs_root *root = BTRFS_I(inode)->root; 1123 int ret = 0; 1124 u64 flags = 0; 1125 1126 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1127 return -EINVAL; 1128 1129 down_read(&root->fs_info->subvol_sem); 1130 if (btrfs_root_readonly(root)) 1131 flags |= BTRFS_SUBVOL_RDONLY; 1132 up_read(&root->fs_info->subvol_sem); 1133 1134 if (copy_to_user(arg, &flags, sizeof(flags))) 1135 ret = -EFAULT; 1136 1137 return ret; 1138 } 1139 1140 static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1141 void __user *arg) 1142 { 1143 struct inode *inode = fdentry(file)->d_inode; 1144 struct btrfs_root *root = BTRFS_I(inode)->root; 1145 struct btrfs_trans_handle *trans; 1146 u64 root_flags; 1147 u64 flags; 1148 int ret = 0; 1149 1150 if (root->fs_info->sb->s_flags & MS_RDONLY) 1151 return -EROFS; 1152 1153 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1154 return -EINVAL; 1155 1156 if (copy_from_user(&flags, arg, sizeof(flags))) 1157 return -EFAULT; 1158 1159 if (flags & BTRFS_SUBVOL_CREATE_ASYNC) 1160 return -EINVAL; 1161 1162 if (flags & ~BTRFS_SUBVOL_RDONLY) 1163 return -EOPNOTSUPP; 1164 1165 if (!inode_owner_or_capable(inode)) 1166 return -EACCES; 1167 1168 down_write(&root->fs_info->subvol_sem); 1169 1170 /* nothing to do */ 1171 if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 1172 goto out; 1173 1174 root_flags = btrfs_root_flags(&root->root_item); 1175 if (flags & BTRFS_SUBVOL_RDONLY) 1176 btrfs_set_root_flags(&root->root_item, 1177 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1178 else 1179 btrfs_set_root_flags(&root->root_item, 1180 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1181 1182 trans = btrfs_start_transaction(root, 1); 1183 if (IS_ERR(trans)) { 1184 ret = PTR_ERR(trans); 1185 goto out_reset; 1186 } 1187 1188 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1189 &root->root_key, &root->root_item); 1190 1191 btrfs_commit_transaction(trans, root); 1192 out_reset: 1193 if (ret) 1194 btrfs_set_root_flags(&root->root_item, root_flags); 1195 out: 1196 up_write(&root->fs_info->subvol_sem); 1197 return ret; 1198 } 1199 1200 /* 1201 * helper to check if the subvolume references other subvolumes 1202 */ 1203 static noinline int may_destroy_subvol(struct btrfs_root *root) 1204 { 1205 struct btrfs_path *path; 1206 struct btrfs_key key; 1207 int ret; 1208 1209 path = btrfs_alloc_path(); 1210 if (!path) 1211 return -ENOMEM; 1212 1213 key.objectid = root->root_key.objectid; 1214 key.type = BTRFS_ROOT_REF_KEY; 1215 key.offset = (u64)-1; 1216 1217 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, 1218 &key, path, 0, 0); 1219 if (ret < 0) 1220 goto out; 1221 BUG_ON(ret == 0); 1222 1223 ret = 0; 1224 if (path->slots[0] > 0) { 1225 path->slots[0]--; 1226 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1227 if (key.objectid == root->root_key.objectid && 1228 key.type == BTRFS_ROOT_REF_KEY) 1229 ret = -ENOTEMPTY; 1230 } 1231 out: 1232 btrfs_free_path(path); 1233 return ret; 1234 } 1235 1236 static noinline int key_in_sk(struct btrfs_key *key, 1237 struct btrfs_ioctl_search_key *sk) 1238 { 1239 struct btrfs_key test; 1240 int ret; 1241 1242 test.objectid = sk->min_objectid; 1243 test.type = sk->min_type; 1244 test.offset = sk->min_offset; 1245 1246 ret = btrfs_comp_cpu_keys(key, &test); 1247 if (ret < 0) 1248 return 0; 1249 1250 test.objectid = sk->max_objectid; 1251 test.type = sk->max_type; 1252 test.offset = sk->max_offset; 1253 1254 ret = btrfs_comp_cpu_keys(key, &test); 1255 if (ret > 0) 1256 return 0; 1257 return 1; 1258 } 1259 1260 static noinline int copy_to_sk(struct btrfs_root *root, 1261 struct btrfs_path *path, 1262 struct btrfs_key *key, 1263 struct btrfs_ioctl_search_key *sk, 1264 char *buf, 1265 unsigned long *sk_offset, 1266 int *num_found) 1267 { 1268 u64 found_transid; 1269 struct extent_buffer *leaf; 1270 struct btrfs_ioctl_search_header sh; 1271 unsigned long item_off; 1272 unsigned long item_len; 1273 int nritems; 1274 int i; 1275 int slot; 1276 int found = 0; 1277 int ret = 0; 1278 1279 leaf = path->nodes[0]; 1280 slot = path->slots[0]; 1281 nritems = btrfs_header_nritems(leaf); 1282 1283 if (btrfs_header_generation(leaf) > sk->max_transid) { 1284 i = nritems; 1285 goto advance_key; 1286 } 1287 found_transid = btrfs_header_generation(leaf); 1288 1289 for (i = slot; i < nritems; i++) { 1290 item_off = btrfs_item_ptr_offset(leaf, i); 1291 item_len = btrfs_item_size_nr(leaf, i); 1292 1293 if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1294 item_len = 0; 1295 1296 if (sizeof(sh) + item_len + *sk_offset > 1297 BTRFS_SEARCH_ARGS_BUFSIZE) { 1298 ret = 1; 1299 goto overflow; 1300 } 1301 1302 btrfs_item_key_to_cpu(leaf, key, i); 1303 if (!key_in_sk(key, sk)) 1304 continue; 1305 1306 sh.objectid = key->objectid; 1307 sh.offset = key->offset; 1308 sh.type = key->type; 1309 sh.len = item_len; 1310 sh.transid = found_transid; 1311 1312 /* copy search result header */ 1313 memcpy(buf + *sk_offset, &sh, sizeof(sh)); 1314 *sk_offset += sizeof(sh); 1315 1316 if (item_len) { 1317 char *p = buf + *sk_offset; 1318 /* copy the item */ 1319 read_extent_buffer(leaf, p, 1320 item_off, item_len); 1321 *sk_offset += item_len; 1322 } 1323 found++; 1324 1325 if (*num_found >= sk->nr_items) 1326 break; 1327 } 1328 advance_key: 1329 ret = 0; 1330 if (key->offset < (u64)-1 && key->offset < sk->max_offset) 1331 key->offset++; 1332 else if (key->type < (u8)-1 && key->type < sk->max_type) { 1333 key->offset = 0; 1334 key->type++; 1335 } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) { 1336 key->offset = 0; 1337 key->type = 0; 1338 key->objectid++; 1339 } else 1340 ret = 1; 1341 overflow: 1342 *num_found += found; 1343 return ret; 1344 } 1345 1346 static noinline int search_ioctl(struct inode *inode, 1347 struct btrfs_ioctl_search_args *args) 1348 { 1349 struct btrfs_root *root; 1350 struct btrfs_key key; 1351 struct btrfs_key max_key; 1352 struct btrfs_path *path; 1353 struct btrfs_ioctl_search_key *sk = &args->key; 1354 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 1355 int ret; 1356 int num_found = 0; 1357 unsigned long sk_offset = 0; 1358 1359 path = btrfs_alloc_path(); 1360 if (!path) 1361 return -ENOMEM; 1362 1363 if (sk->tree_id == 0) { 1364 /* search the root of the inode that was passed */ 1365 root = BTRFS_I(inode)->root; 1366 } else { 1367 key.objectid = sk->tree_id; 1368 key.type = BTRFS_ROOT_ITEM_KEY; 1369 key.offset = (u64)-1; 1370 root = btrfs_read_fs_root_no_name(info, &key); 1371 if (IS_ERR(root)) { 1372 printk(KERN_ERR "could not find root %llu\n", 1373 sk->tree_id); 1374 btrfs_free_path(path); 1375 return -ENOENT; 1376 } 1377 } 1378 1379 key.objectid = sk->min_objectid; 1380 key.type = sk->min_type; 1381 key.offset = sk->min_offset; 1382 1383 max_key.objectid = sk->max_objectid; 1384 max_key.type = sk->max_type; 1385 max_key.offset = sk->max_offset; 1386 1387 path->keep_locks = 1; 1388 1389 while(1) { 1390 ret = btrfs_search_forward(root, &key, &max_key, path, 0, 1391 sk->min_transid); 1392 if (ret != 0) { 1393 if (ret > 0) 1394 ret = 0; 1395 goto err; 1396 } 1397 ret = copy_to_sk(root, path, &key, sk, args->buf, 1398 &sk_offset, &num_found); 1399 btrfs_release_path(root, path); 1400 if (ret || num_found >= sk->nr_items) 1401 break; 1402 1403 } 1404 ret = 0; 1405 err: 1406 sk->nr_items = num_found; 1407 btrfs_free_path(path); 1408 return ret; 1409 } 1410 1411 static noinline int btrfs_ioctl_tree_search(struct file *file, 1412 void __user *argp) 1413 { 1414 struct btrfs_ioctl_search_args *args; 1415 struct inode *inode; 1416 int ret; 1417 1418 if (!capable(CAP_SYS_ADMIN)) 1419 return -EPERM; 1420 1421 args = memdup_user(argp, sizeof(*args)); 1422 if (IS_ERR(args)) 1423 return PTR_ERR(args); 1424 1425 inode = fdentry(file)->d_inode; 1426 ret = search_ioctl(inode, args); 1427 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1428 ret = -EFAULT; 1429 kfree(args); 1430 return ret; 1431 } 1432 1433 /* 1434 * Search INODE_REFs to identify path name of 'dirid' directory 1435 * in a 'tree_id' tree. and sets path name to 'name'. 1436 */ 1437 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, 1438 u64 tree_id, u64 dirid, char *name) 1439 { 1440 struct btrfs_root *root; 1441 struct btrfs_key key; 1442 char *ptr; 1443 int ret = -1; 1444 int slot; 1445 int len; 1446 int total_len = 0; 1447 struct btrfs_inode_ref *iref; 1448 struct extent_buffer *l; 1449 struct btrfs_path *path; 1450 1451 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 1452 name[0]='\0'; 1453 return 0; 1454 } 1455 1456 path = btrfs_alloc_path(); 1457 if (!path) 1458 return -ENOMEM; 1459 1460 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; 1461 1462 key.objectid = tree_id; 1463 key.type = BTRFS_ROOT_ITEM_KEY; 1464 key.offset = (u64)-1; 1465 root = btrfs_read_fs_root_no_name(info, &key); 1466 if (IS_ERR(root)) { 1467 printk(KERN_ERR "could not find root %llu\n", tree_id); 1468 ret = -ENOENT; 1469 goto out; 1470 } 1471 1472 key.objectid = dirid; 1473 key.type = BTRFS_INODE_REF_KEY; 1474 key.offset = (u64)-1; 1475 1476 while(1) { 1477 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1478 if (ret < 0) 1479 goto out; 1480 1481 l = path->nodes[0]; 1482 slot = path->slots[0]; 1483 if (ret > 0 && slot > 0) 1484 slot--; 1485 btrfs_item_key_to_cpu(l, &key, slot); 1486 1487 if (ret > 0 && (key.objectid != dirid || 1488 key.type != BTRFS_INODE_REF_KEY)) { 1489 ret = -ENOENT; 1490 goto out; 1491 } 1492 1493 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 1494 len = btrfs_inode_ref_name_len(l, iref); 1495 ptr -= len + 1; 1496 total_len += len + 1; 1497 if (ptr < name) 1498 goto out; 1499 1500 *(ptr + len) = '/'; 1501 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); 1502 1503 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 1504 break; 1505 1506 btrfs_release_path(root, path); 1507 key.objectid = key.offset; 1508 key.offset = (u64)-1; 1509 dirid = key.objectid; 1510 1511 } 1512 if (ptr < name) 1513 goto out; 1514 memcpy(name, ptr, total_len); 1515 name[total_len]='\0'; 1516 ret = 0; 1517 out: 1518 btrfs_free_path(path); 1519 return ret; 1520 } 1521 1522 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 1523 void __user *argp) 1524 { 1525 struct btrfs_ioctl_ino_lookup_args *args; 1526 struct inode *inode; 1527 int ret; 1528 1529 if (!capable(CAP_SYS_ADMIN)) 1530 return -EPERM; 1531 1532 args = memdup_user(argp, sizeof(*args)); 1533 if (IS_ERR(args)) 1534 return PTR_ERR(args); 1535 1536 inode = fdentry(file)->d_inode; 1537 1538 if (args->treeid == 0) 1539 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 1540 1541 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, 1542 args->treeid, args->objectid, 1543 args->name); 1544 1545 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1546 ret = -EFAULT; 1547 1548 kfree(args); 1549 return ret; 1550 } 1551 1552 static noinline int btrfs_ioctl_snap_destroy(struct file *file, 1553 void __user *arg) 1554 { 1555 struct dentry *parent = fdentry(file); 1556 struct dentry *dentry; 1557 struct inode *dir = parent->d_inode; 1558 struct inode *inode; 1559 struct btrfs_root *root = BTRFS_I(dir)->root; 1560 struct btrfs_root *dest = NULL; 1561 struct btrfs_ioctl_vol_args *vol_args; 1562 struct btrfs_trans_handle *trans; 1563 int namelen; 1564 int ret; 1565 int err = 0; 1566 1567 vol_args = memdup_user(arg, sizeof(*vol_args)); 1568 if (IS_ERR(vol_args)) 1569 return PTR_ERR(vol_args); 1570 1571 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1572 namelen = strlen(vol_args->name); 1573 if (strchr(vol_args->name, '/') || 1574 strncmp(vol_args->name, "..", namelen) == 0) { 1575 err = -EINVAL; 1576 goto out; 1577 } 1578 1579 err = mnt_want_write(file->f_path.mnt); 1580 if (err) 1581 goto out; 1582 1583 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 1584 dentry = lookup_one_len(vol_args->name, parent, namelen); 1585 if (IS_ERR(dentry)) { 1586 err = PTR_ERR(dentry); 1587 goto out_unlock_dir; 1588 } 1589 1590 if (!dentry->d_inode) { 1591 err = -ENOENT; 1592 goto out_dput; 1593 } 1594 1595 inode = dentry->d_inode; 1596 dest = BTRFS_I(inode)->root; 1597 if (!capable(CAP_SYS_ADMIN)){ 1598 /* 1599 * Regular user. Only allow this with a special mount 1600 * option, when the user has write+exec access to the 1601 * subvol root, and when rmdir(2) would have been 1602 * allowed. 1603 * 1604 * Note that this is _not_ check that the subvol is 1605 * empty or doesn't contain data that we wouldn't 1606 * otherwise be able to delete. 1607 * 1608 * Users who want to delete empty subvols should try 1609 * rmdir(2). 1610 */ 1611 err = -EPERM; 1612 if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 1613 goto out_dput; 1614 1615 /* 1616 * Do not allow deletion if the parent dir is the same 1617 * as the dir to be deleted. That means the ioctl 1618 * must be called on the dentry referencing the root 1619 * of the subvol, not a random directory contained 1620 * within it. 1621 */ 1622 err = -EINVAL; 1623 if (root == dest) 1624 goto out_dput; 1625 1626 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 1627 if (err) 1628 goto out_dput; 1629 1630 /* check if subvolume may be deleted by a non-root user */ 1631 err = btrfs_may_delete(dir, dentry, 1); 1632 if (err) 1633 goto out_dput; 1634 } 1635 1636 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1637 err = -EINVAL; 1638 goto out_dput; 1639 } 1640 1641 mutex_lock(&inode->i_mutex); 1642 err = d_invalidate(dentry); 1643 if (err) 1644 goto out_unlock; 1645 1646 down_write(&root->fs_info->subvol_sem); 1647 1648 err = may_destroy_subvol(dest); 1649 if (err) 1650 goto out_up_write; 1651 1652 trans = btrfs_start_transaction(root, 0); 1653 if (IS_ERR(trans)) { 1654 err = PTR_ERR(trans); 1655 goto out_up_write; 1656 } 1657 trans->block_rsv = &root->fs_info->global_block_rsv; 1658 1659 ret = btrfs_unlink_subvol(trans, root, dir, 1660 dest->root_key.objectid, 1661 dentry->d_name.name, 1662 dentry->d_name.len); 1663 BUG_ON(ret); 1664 1665 btrfs_record_root_in_trans(trans, dest); 1666 1667 memset(&dest->root_item.drop_progress, 0, 1668 sizeof(dest->root_item.drop_progress)); 1669 dest->root_item.drop_level = 0; 1670 btrfs_set_root_refs(&dest->root_item, 0); 1671 1672 if (!xchg(&dest->orphan_item_inserted, 1)) { 1673 ret = btrfs_insert_orphan_item(trans, 1674 root->fs_info->tree_root, 1675 dest->root_key.objectid); 1676 BUG_ON(ret); 1677 } 1678 1679 ret = btrfs_end_transaction(trans, root); 1680 BUG_ON(ret); 1681 inode->i_flags |= S_DEAD; 1682 out_up_write: 1683 up_write(&root->fs_info->subvol_sem); 1684 out_unlock: 1685 mutex_unlock(&inode->i_mutex); 1686 if (!err) { 1687 shrink_dcache_sb(root->fs_info->sb); 1688 btrfs_invalidate_inodes(dest); 1689 d_delete(dentry); 1690 } 1691 out_dput: 1692 dput(dentry); 1693 out_unlock_dir: 1694 mutex_unlock(&dir->i_mutex); 1695 mnt_drop_write(file->f_path.mnt); 1696 out: 1697 kfree(vol_args); 1698 return err; 1699 } 1700 1701 static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 1702 { 1703 struct inode *inode = fdentry(file)->d_inode; 1704 struct btrfs_root *root = BTRFS_I(inode)->root; 1705 struct btrfs_ioctl_defrag_range_args *range; 1706 int ret; 1707 1708 if (btrfs_root_readonly(root)) 1709 return -EROFS; 1710 1711 ret = mnt_want_write(file->f_path.mnt); 1712 if (ret) 1713 return ret; 1714 1715 switch (inode->i_mode & S_IFMT) { 1716 case S_IFDIR: 1717 if (!capable(CAP_SYS_ADMIN)) { 1718 ret = -EPERM; 1719 goto out; 1720 } 1721 ret = btrfs_defrag_root(root, 0); 1722 if (ret) 1723 goto out; 1724 ret = btrfs_defrag_root(root->fs_info->extent_root, 0); 1725 break; 1726 case S_IFREG: 1727 if (!(file->f_mode & FMODE_WRITE)) { 1728 ret = -EINVAL; 1729 goto out; 1730 } 1731 1732 range = kzalloc(sizeof(*range), GFP_KERNEL); 1733 if (!range) { 1734 ret = -ENOMEM; 1735 goto out; 1736 } 1737 1738 if (argp) { 1739 if (copy_from_user(range, argp, 1740 sizeof(*range))) { 1741 ret = -EFAULT; 1742 kfree(range); 1743 goto out; 1744 } 1745 /* compression requires us to start the IO */ 1746 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 1747 range->flags |= BTRFS_DEFRAG_RANGE_START_IO; 1748 range->extent_thresh = (u32)-1; 1749 } 1750 } else { 1751 /* the rest are all set to zero by kzalloc */ 1752 range->len = (u64)-1; 1753 } 1754 ret = btrfs_defrag_file(file, range); 1755 kfree(range); 1756 break; 1757 default: 1758 ret = -EINVAL; 1759 } 1760 out: 1761 mnt_drop_write(file->f_path.mnt); 1762 return ret; 1763 } 1764 1765 static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) 1766 { 1767 struct btrfs_ioctl_vol_args *vol_args; 1768 int ret; 1769 1770 if (!capable(CAP_SYS_ADMIN)) 1771 return -EPERM; 1772 1773 vol_args = memdup_user(arg, sizeof(*vol_args)); 1774 if (IS_ERR(vol_args)) 1775 return PTR_ERR(vol_args); 1776 1777 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1778 ret = btrfs_init_new_device(root, vol_args->name); 1779 1780 kfree(vol_args); 1781 return ret; 1782 } 1783 1784 static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) 1785 { 1786 struct btrfs_ioctl_vol_args *vol_args; 1787 int ret; 1788 1789 if (!capable(CAP_SYS_ADMIN)) 1790 return -EPERM; 1791 1792 if (root->fs_info->sb->s_flags & MS_RDONLY) 1793 return -EROFS; 1794 1795 vol_args = memdup_user(arg, sizeof(*vol_args)); 1796 if (IS_ERR(vol_args)) 1797 return PTR_ERR(vol_args); 1798 1799 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1800 ret = btrfs_rm_device(root, vol_args->name); 1801 1802 kfree(vol_args); 1803 return ret; 1804 } 1805 1806 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 1807 u64 off, u64 olen, u64 destoff) 1808 { 1809 struct inode *inode = fdentry(file)->d_inode; 1810 struct btrfs_root *root = BTRFS_I(inode)->root; 1811 struct file *src_file; 1812 struct inode *src; 1813 struct btrfs_trans_handle *trans; 1814 struct btrfs_path *path; 1815 struct extent_buffer *leaf; 1816 char *buf; 1817 struct btrfs_key key; 1818 u32 nritems; 1819 int slot; 1820 int ret; 1821 u64 len = olen; 1822 u64 bs = root->fs_info->sb->s_blocksize; 1823 u64 hint_byte; 1824 1825 /* 1826 * TODO: 1827 * - split compressed inline extents. annoying: we need to 1828 * decompress into destination's address_space (the file offset 1829 * may change, so source mapping won't do), then recompress (or 1830 * otherwise reinsert) a subrange. 1831 * - allow ranges within the same file to be cloned (provided 1832 * they don't overlap)? 1833 */ 1834 1835 /* the destination must be opened for writing */ 1836 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) 1837 return -EINVAL; 1838 1839 if (btrfs_root_readonly(root)) 1840 return -EROFS; 1841 1842 ret = mnt_want_write(file->f_path.mnt); 1843 if (ret) 1844 return ret; 1845 1846 src_file = fget(srcfd); 1847 if (!src_file) { 1848 ret = -EBADF; 1849 goto out_drop_write; 1850 } 1851 1852 src = src_file->f_dentry->d_inode; 1853 1854 ret = -EINVAL; 1855 if (src == inode) 1856 goto out_fput; 1857 1858 /* the src must be open for reading */ 1859 if (!(src_file->f_mode & FMODE_READ)) 1860 goto out_fput; 1861 1862 ret = -EISDIR; 1863 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 1864 goto out_fput; 1865 1866 ret = -EXDEV; 1867 if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root) 1868 goto out_fput; 1869 1870 ret = -ENOMEM; 1871 buf = vmalloc(btrfs_level_size(root, 0)); 1872 if (!buf) 1873 goto out_fput; 1874 1875 path = btrfs_alloc_path(); 1876 if (!path) { 1877 vfree(buf); 1878 goto out_fput; 1879 } 1880 path->reada = 2; 1881 1882 if (inode < src) { 1883 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 1884 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); 1885 } else { 1886 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); 1887 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 1888 } 1889 1890 /* determine range to clone */ 1891 ret = -EINVAL; 1892 if (off + len > src->i_size || off + len < off) 1893 goto out_unlock; 1894 if (len == 0) 1895 olen = len = src->i_size - off; 1896 /* if we extend to eof, continue to block boundary */ 1897 if (off + len == src->i_size) 1898 len = ALIGN(src->i_size, bs) - off; 1899 1900 /* verify the end result is block aligned */ 1901 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 1902 !IS_ALIGNED(destoff, bs)) 1903 goto out_unlock; 1904 1905 /* do any pending delalloc/csum calc on src, one way or 1906 another, and lock file content */ 1907 while (1) { 1908 struct btrfs_ordered_extent *ordered; 1909 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1910 ordered = btrfs_lookup_first_ordered_extent(src, off+len); 1911 if (!ordered && 1912 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, 1913 EXTENT_DELALLOC, 0, NULL)) 1914 break; 1915 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1916 if (ordered) 1917 btrfs_put_ordered_extent(ordered); 1918 btrfs_wait_ordered_range(src, off, len); 1919 } 1920 1921 /* clone data */ 1922 key.objectid = src->i_ino; 1923 key.type = BTRFS_EXTENT_DATA_KEY; 1924 key.offset = 0; 1925 1926 while (1) { 1927 /* 1928 * note the key will change type as we walk through the 1929 * tree. 1930 */ 1931 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1932 if (ret < 0) 1933 goto out; 1934 1935 nritems = btrfs_header_nritems(path->nodes[0]); 1936 if (path->slots[0] >= nritems) { 1937 ret = btrfs_next_leaf(root, path); 1938 if (ret < 0) 1939 goto out; 1940 if (ret > 0) 1941 break; 1942 nritems = btrfs_header_nritems(path->nodes[0]); 1943 } 1944 leaf = path->nodes[0]; 1945 slot = path->slots[0]; 1946 1947 btrfs_item_key_to_cpu(leaf, &key, slot); 1948 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 1949 key.objectid != src->i_ino) 1950 break; 1951 1952 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 1953 struct btrfs_file_extent_item *extent; 1954 int type; 1955 u32 size; 1956 struct btrfs_key new_key; 1957 u64 disko = 0, diskl = 0; 1958 u64 datao = 0, datal = 0; 1959 u8 comp; 1960 u64 endoff; 1961 1962 size = btrfs_item_size_nr(leaf, slot); 1963 read_extent_buffer(leaf, buf, 1964 btrfs_item_ptr_offset(leaf, slot), 1965 size); 1966 1967 extent = btrfs_item_ptr(leaf, slot, 1968 struct btrfs_file_extent_item); 1969 comp = btrfs_file_extent_compression(leaf, extent); 1970 type = btrfs_file_extent_type(leaf, extent); 1971 if (type == BTRFS_FILE_EXTENT_REG || 1972 type == BTRFS_FILE_EXTENT_PREALLOC) { 1973 disko = btrfs_file_extent_disk_bytenr(leaf, 1974 extent); 1975 diskl = btrfs_file_extent_disk_num_bytes(leaf, 1976 extent); 1977 datao = btrfs_file_extent_offset(leaf, extent); 1978 datal = btrfs_file_extent_num_bytes(leaf, 1979 extent); 1980 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 1981 /* take upper bound, may be compressed */ 1982 datal = btrfs_file_extent_ram_bytes(leaf, 1983 extent); 1984 } 1985 btrfs_release_path(root, path); 1986 1987 if (key.offset + datal <= off || 1988 key.offset >= off+len) 1989 goto next; 1990 1991 memcpy(&new_key, &key, sizeof(new_key)); 1992 new_key.objectid = inode->i_ino; 1993 if (off <= key.offset) 1994 new_key.offset = key.offset + destoff - off; 1995 else 1996 new_key.offset = destoff; 1997 1998 trans = btrfs_start_transaction(root, 1); 1999 if (IS_ERR(trans)) { 2000 ret = PTR_ERR(trans); 2001 goto out; 2002 } 2003 2004 if (type == BTRFS_FILE_EXTENT_REG || 2005 type == BTRFS_FILE_EXTENT_PREALLOC) { 2006 if (off > key.offset) { 2007 datao += off - key.offset; 2008 datal -= off - key.offset; 2009 } 2010 2011 if (key.offset + datal > off + len) 2012 datal = off + len - key.offset; 2013 2014 ret = btrfs_drop_extents(trans, inode, 2015 new_key.offset, 2016 new_key.offset + datal, 2017 &hint_byte, 1); 2018 BUG_ON(ret); 2019 2020 ret = btrfs_insert_empty_item(trans, root, path, 2021 &new_key, size); 2022 BUG_ON(ret); 2023 2024 leaf = path->nodes[0]; 2025 slot = path->slots[0]; 2026 write_extent_buffer(leaf, buf, 2027 btrfs_item_ptr_offset(leaf, slot), 2028 size); 2029 2030 extent = btrfs_item_ptr(leaf, slot, 2031 struct btrfs_file_extent_item); 2032 2033 /* disko == 0 means it's a hole */ 2034 if (!disko) 2035 datao = 0; 2036 2037 btrfs_set_file_extent_offset(leaf, extent, 2038 datao); 2039 btrfs_set_file_extent_num_bytes(leaf, extent, 2040 datal); 2041 if (disko) { 2042 inode_add_bytes(inode, datal); 2043 ret = btrfs_inc_extent_ref(trans, root, 2044 disko, diskl, 0, 2045 root->root_key.objectid, 2046 inode->i_ino, 2047 new_key.offset - datao); 2048 BUG_ON(ret); 2049 } 2050 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 2051 u64 skip = 0; 2052 u64 trim = 0; 2053 if (off > key.offset) { 2054 skip = off - key.offset; 2055 new_key.offset += skip; 2056 } 2057 2058 if (key.offset + datal > off+len) 2059 trim = key.offset + datal - (off+len); 2060 2061 if (comp && (skip || trim)) { 2062 ret = -EINVAL; 2063 btrfs_end_transaction(trans, root); 2064 goto out; 2065 } 2066 size -= skip + trim; 2067 datal -= skip + trim; 2068 2069 ret = btrfs_drop_extents(trans, inode, 2070 new_key.offset, 2071 new_key.offset + datal, 2072 &hint_byte, 1); 2073 BUG_ON(ret); 2074 2075 ret = btrfs_insert_empty_item(trans, root, path, 2076 &new_key, size); 2077 BUG_ON(ret); 2078 2079 if (skip) { 2080 u32 start = 2081 btrfs_file_extent_calc_inline_size(0); 2082 memmove(buf+start, buf+start+skip, 2083 datal); 2084 } 2085 2086 leaf = path->nodes[0]; 2087 slot = path->slots[0]; 2088 write_extent_buffer(leaf, buf, 2089 btrfs_item_ptr_offset(leaf, slot), 2090 size); 2091 inode_add_bytes(inode, datal); 2092 } 2093 2094 btrfs_mark_buffer_dirty(leaf); 2095 btrfs_release_path(root, path); 2096 2097 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2098 2099 /* 2100 * we round up to the block size at eof when 2101 * determining which extents to clone above, 2102 * but shouldn't round up the file size 2103 */ 2104 endoff = new_key.offset + datal; 2105 if (endoff > destoff+olen) 2106 endoff = destoff+olen; 2107 if (endoff > inode->i_size) 2108 btrfs_i_size_write(inode, endoff); 2109 2110 BTRFS_I(inode)->flags = BTRFS_I(src)->flags; 2111 ret = btrfs_update_inode(trans, root, inode); 2112 BUG_ON(ret); 2113 btrfs_end_transaction(trans, root); 2114 } 2115 next: 2116 btrfs_release_path(root, path); 2117 key.offset++; 2118 } 2119 ret = 0; 2120 out: 2121 btrfs_release_path(root, path); 2122 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2123 out_unlock: 2124 mutex_unlock(&src->i_mutex); 2125 mutex_unlock(&inode->i_mutex); 2126 vfree(buf); 2127 btrfs_free_path(path); 2128 out_fput: 2129 fput(src_file); 2130 out_drop_write: 2131 mnt_drop_write(file->f_path.mnt); 2132 return ret; 2133 } 2134 2135 static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) 2136 { 2137 struct btrfs_ioctl_clone_range_args args; 2138 2139 if (copy_from_user(&args, argp, sizeof(args))) 2140 return -EFAULT; 2141 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, 2142 args.src_length, args.dest_offset); 2143 } 2144 2145 /* 2146 * there are many ways the trans_start and trans_end ioctls can lead 2147 * to deadlocks. They should only be used by applications that 2148 * basically own the machine, and have a very in depth understanding 2149 * of all the possible deadlocks and enospc problems. 2150 */ 2151 static long btrfs_ioctl_trans_start(struct file *file) 2152 { 2153 struct inode *inode = fdentry(file)->d_inode; 2154 struct btrfs_root *root = BTRFS_I(inode)->root; 2155 struct btrfs_trans_handle *trans; 2156 int ret; 2157 2158 ret = -EPERM; 2159 if (!capable(CAP_SYS_ADMIN)) 2160 goto out; 2161 2162 ret = -EINPROGRESS; 2163 if (file->private_data) 2164 goto out; 2165 2166 ret = -EROFS; 2167 if (btrfs_root_readonly(root)) 2168 goto out; 2169 2170 ret = mnt_want_write(file->f_path.mnt); 2171 if (ret) 2172 goto out; 2173 2174 mutex_lock(&root->fs_info->trans_mutex); 2175 root->fs_info->open_ioctl_trans++; 2176 mutex_unlock(&root->fs_info->trans_mutex); 2177 2178 ret = -ENOMEM; 2179 trans = btrfs_start_ioctl_transaction(root, 0); 2180 if (IS_ERR(trans)) 2181 goto out_drop; 2182 2183 file->private_data = trans; 2184 return 0; 2185 2186 out_drop: 2187 mutex_lock(&root->fs_info->trans_mutex); 2188 root->fs_info->open_ioctl_trans--; 2189 mutex_unlock(&root->fs_info->trans_mutex); 2190 mnt_drop_write(file->f_path.mnt); 2191 out: 2192 return ret; 2193 } 2194 2195 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 2196 { 2197 struct inode *inode = fdentry(file)->d_inode; 2198 struct btrfs_root *root = BTRFS_I(inode)->root; 2199 struct btrfs_root *new_root; 2200 struct btrfs_dir_item *di; 2201 struct btrfs_trans_handle *trans; 2202 struct btrfs_path *path; 2203 struct btrfs_key location; 2204 struct btrfs_disk_key disk_key; 2205 struct btrfs_super_block *disk_super; 2206 u64 features; 2207 u64 objectid = 0; 2208 u64 dir_id; 2209 2210 if (!capable(CAP_SYS_ADMIN)) 2211 return -EPERM; 2212 2213 if (copy_from_user(&objectid, argp, sizeof(objectid))) 2214 return -EFAULT; 2215 2216 if (!objectid) 2217 objectid = root->root_key.objectid; 2218 2219 location.objectid = objectid; 2220 location.type = BTRFS_ROOT_ITEM_KEY; 2221 location.offset = (u64)-1; 2222 2223 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 2224 if (IS_ERR(new_root)) 2225 return PTR_ERR(new_root); 2226 2227 if (btrfs_root_refs(&new_root->root_item) == 0) 2228 return -ENOENT; 2229 2230 path = btrfs_alloc_path(); 2231 if (!path) 2232 return -ENOMEM; 2233 path->leave_spinning = 1; 2234 2235 trans = btrfs_start_transaction(root, 1); 2236 if (IS_ERR(trans)) { 2237 btrfs_free_path(path); 2238 return PTR_ERR(trans); 2239 } 2240 2241 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 2242 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, 2243 dir_id, "default", 7, 1); 2244 if (IS_ERR_OR_NULL(di)) { 2245 btrfs_free_path(path); 2246 btrfs_end_transaction(trans, root); 2247 printk(KERN_ERR "Umm, you don't have the default dir item, " 2248 "this isn't going to work\n"); 2249 return -ENOENT; 2250 } 2251 2252 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); 2253 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); 2254 btrfs_mark_buffer_dirty(path->nodes[0]); 2255 btrfs_free_path(path); 2256 2257 disk_super = &root->fs_info->super_copy; 2258 features = btrfs_super_incompat_flags(disk_super); 2259 if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { 2260 features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; 2261 btrfs_set_super_incompat_flags(disk_super, features); 2262 } 2263 btrfs_end_transaction(trans, root); 2264 2265 return 0; 2266 } 2267 2268 static void get_block_group_info(struct list_head *groups_list, 2269 struct btrfs_ioctl_space_info *space) 2270 { 2271 struct btrfs_block_group_cache *block_group; 2272 2273 space->total_bytes = 0; 2274 space->used_bytes = 0; 2275 space->flags = 0; 2276 list_for_each_entry(block_group, groups_list, list) { 2277 space->flags = block_group->flags; 2278 space->total_bytes += block_group->key.offset; 2279 space->used_bytes += 2280 btrfs_block_group_used(&block_group->item); 2281 } 2282 } 2283 2284 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 2285 { 2286 struct btrfs_ioctl_space_args space_args; 2287 struct btrfs_ioctl_space_info space; 2288 struct btrfs_ioctl_space_info *dest; 2289 struct btrfs_ioctl_space_info *dest_orig; 2290 struct btrfs_ioctl_space_info __user *user_dest; 2291 struct btrfs_space_info *info; 2292 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 2293 BTRFS_BLOCK_GROUP_SYSTEM, 2294 BTRFS_BLOCK_GROUP_METADATA, 2295 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; 2296 int num_types = 4; 2297 int alloc_size; 2298 int ret = 0; 2299 u64 slot_count = 0; 2300 int i, c; 2301 2302 if (copy_from_user(&space_args, 2303 (struct btrfs_ioctl_space_args __user *)arg, 2304 sizeof(space_args))) 2305 return -EFAULT; 2306 2307 for (i = 0; i < num_types; i++) { 2308 struct btrfs_space_info *tmp; 2309 2310 info = NULL; 2311 rcu_read_lock(); 2312 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2313 list) { 2314 if (tmp->flags == types[i]) { 2315 info = tmp; 2316 break; 2317 } 2318 } 2319 rcu_read_unlock(); 2320 2321 if (!info) 2322 continue; 2323 2324 down_read(&info->groups_sem); 2325 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 2326 if (!list_empty(&info->block_groups[c])) 2327 slot_count++; 2328 } 2329 up_read(&info->groups_sem); 2330 } 2331 2332 /* space_slots == 0 means they are asking for a count */ 2333 if (space_args.space_slots == 0) { 2334 space_args.total_spaces = slot_count; 2335 goto out; 2336 } 2337 2338 slot_count = min_t(u64, space_args.space_slots, slot_count); 2339 2340 alloc_size = sizeof(*dest) * slot_count; 2341 2342 /* we generally have at most 6 or so space infos, one for each raid 2343 * level. So, a whole page should be more than enough for everyone 2344 */ 2345 if (alloc_size > PAGE_CACHE_SIZE) 2346 return -ENOMEM; 2347 2348 space_args.total_spaces = 0; 2349 dest = kmalloc(alloc_size, GFP_NOFS); 2350 if (!dest) 2351 return -ENOMEM; 2352 dest_orig = dest; 2353 2354 /* now we have a buffer to copy into */ 2355 for (i = 0; i < num_types; i++) { 2356 struct btrfs_space_info *tmp; 2357 2358 if (!slot_count) 2359 break; 2360 2361 info = NULL; 2362 rcu_read_lock(); 2363 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2364 list) { 2365 if (tmp->flags == types[i]) { 2366 info = tmp; 2367 break; 2368 } 2369 } 2370 rcu_read_unlock(); 2371 2372 if (!info) 2373 continue; 2374 down_read(&info->groups_sem); 2375 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 2376 if (!list_empty(&info->block_groups[c])) { 2377 get_block_group_info(&info->block_groups[c], 2378 &space); 2379 memcpy(dest, &space, sizeof(space)); 2380 dest++; 2381 space_args.total_spaces++; 2382 slot_count--; 2383 } 2384 if (!slot_count) 2385 break; 2386 } 2387 up_read(&info->groups_sem); 2388 } 2389 2390 user_dest = (struct btrfs_ioctl_space_info *) 2391 (arg + sizeof(struct btrfs_ioctl_space_args)); 2392 2393 if (copy_to_user(user_dest, dest_orig, alloc_size)) 2394 ret = -EFAULT; 2395 2396 kfree(dest_orig); 2397 out: 2398 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 2399 ret = -EFAULT; 2400 2401 return ret; 2402 } 2403 2404 /* 2405 * there are many ways the trans_start and trans_end ioctls can lead 2406 * to deadlocks. They should only be used by applications that 2407 * basically own the machine, and have a very in depth understanding 2408 * of all the possible deadlocks and enospc problems. 2409 */ 2410 long btrfs_ioctl_trans_end(struct file *file) 2411 { 2412 struct inode *inode = fdentry(file)->d_inode; 2413 struct btrfs_root *root = BTRFS_I(inode)->root; 2414 struct btrfs_trans_handle *trans; 2415 2416 trans = file->private_data; 2417 if (!trans) 2418 return -EINVAL; 2419 file->private_data = NULL; 2420 2421 btrfs_end_transaction(trans, root); 2422 2423 mutex_lock(&root->fs_info->trans_mutex); 2424 root->fs_info->open_ioctl_trans--; 2425 mutex_unlock(&root->fs_info->trans_mutex); 2426 2427 mnt_drop_write(file->f_path.mnt); 2428 return 0; 2429 } 2430 2431 static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) 2432 { 2433 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 2434 struct btrfs_trans_handle *trans; 2435 u64 transid; 2436 int ret; 2437 2438 trans = btrfs_start_transaction(root, 0); 2439 if (IS_ERR(trans)) 2440 return PTR_ERR(trans); 2441 transid = trans->transid; 2442 ret = btrfs_commit_transaction_async(trans, root, 0); 2443 if (ret) { 2444 btrfs_end_transaction(trans, root); 2445 return ret; 2446 } 2447 2448 if (argp) 2449 if (copy_to_user(argp, &transid, sizeof(transid))) 2450 return -EFAULT; 2451 return 0; 2452 } 2453 2454 static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) 2455 { 2456 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 2457 u64 transid; 2458 2459 if (argp) { 2460 if (copy_from_user(&transid, argp, sizeof(transid))) 2461 return -EFAULT; 2462 } else { 2463 transid = 0; /* current trans */ 2464 } 2465 return btrfs_wait_for_commit(root, transid); 2466 } 2467 2468 long btrfs_ioctl(struct file *file, unsigned int 2469 cmd, unsigned long arg) 2470 { 2471 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 2472 void __user *argp = (void __user *)arg; 2473 2474 switch (cmd) { 2475 case FS_IOC_GETFLAGS: 2476 return btrfs_ioctl_getflags(file, argp); 2477 case FS_IOC_SETFLAGS: 2478 return btrfs_ioctl_setflags(file, argp); 2479 case FS_IOC_GETVERSION: 2480 return btrfs_ioctl_getversion(file, argp); 2481 case FITRIM: 2482 return btrfs_ioctl_fitrim(file, argp); 2483 case BTRFS_IOC_SNAP_CREATE: 2484 return btrfs_ioctl_snap_create(file, argp, 0); 2485 case BTRFS_IOC_SNAP_CREATE_V2: 2486 return btrfs_ioctl_snap_create_v2(file, argp, 0); 2487 case BTRFS_IOC_SUBVOL_CREATE: 2488 return btrfs_ioctl_snap_create(file, argp, 1); 2489 case BTRFS_IOC_SNAP_DESTROY: 2490 return btrfs_ioctl_snap_destroy(file, argp); 2491 case BTRFS_IOC_SUBVOL_GETFLAGS: 2492 return btrfs_ioctl_subvol_getflags(file, argp); 2493 case BTRFS_IOC_SUBVOL_SETFLAGS: 2494 return btrfs_ioctl_subvol_setflags(file, argp); 2495 case BTRFS_IOC_DEFAULT_SUBVOL: 2496 return btrfs_ioctl_default_subvol(file, argp); 2497 case BTRFS_IOC_DEFRAG: 2498 return btrfs_ioctl_defrag(file, NULL); 2499 case BTRFS_IOC_DEFRAG_RANGE: 2500 return btrfs_ioctl_defrag(file, argp); 2501 case BTRFS_IOC_RESIZE: 2502 return btrfs_ioctl_resize(root, argp); 2503 case BTRFS_IOC_ADD_DEV: 2504 return btrfs_ioctl_add_dev(root, argp); 2505 case BTRFS_IOC_RM_DEV: 2506 return btrfs_ioctl_rm_dev(root, argp); 2507 case BTRFS_IOC_BALANCE: 2508 return btrfs_balance(root->fs_info->dev_root); 2509 case BTRFS_IOC_CLONE: 2510 return btrfs_ioctl_clone(file, arg, 0, 0, 0); 2511 case BTRFS_IOC_CLONE_RANGE: 2512 return btrfs_ioctl_clone_range(file, argp); 2513 case BTRFS_IOC_TRANS_START: 2514 return btrfs_ioctl_trans_start(file); 2515 case BTRFS_IOC_TRANS_END: 2516 return btrfs_ioctl_trans_end(file); 2517 case BTRFS_IOC_TREE_SEARCH: 2518 return btrfs_ioctl_tree_search(file, argp); 2519 case BTRFS_IOC_INO_LOOKUP: 2520 return btrfs_ioctl_ino_lookup(file, argp); 2521 case BTRFS_IOC_SPACE_INFO: 2522 return btrfs_ioctl_space_info(root, argp); 2523 case BTRFS_IOC_SYNC: 2524 btrfs_sync_fs(file->f_dentry->d_sb, 1); 2525 return 0; 2526 case BTRFS_IOC_START_SYNC: 2527 return btrfs_ioctl_start_sync(file, argp); 2528 case BTRFS_IOC_WAIT_SYNC: 2529 return btrfs_ioctl_wait_sync(file, argp); 2530 } 2531 2532 return -ENOTTY; 2533 } 2534