1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/buffer_head.h> 22 #include <linux/file.h> 23 #include <linux/fs.h> 24 #include <linux/fsnotify.h> 25 #include <linux/pagemap.h> 26 #include <linux/highmem.h> 27 #include <linux/time.h> 28 #include <linux/init.h> 29 #include <linux/string.h> 30 #include <linux/backing-dev.h> 31 #include <linux/mount.h> 32 #include <linux/mpage.h> 33 #include <linux/namei.h> 34 #include <linux/swap.h> 35 #include <linux/writeback.h> 36 #include <linux/statfs.h> 37 #include <linux/compat.h> 38 #include <linux/bit_spinlock.h> 39 #include <linux/security.h> 40 #include <linux/xattr.h> 41 #include <linux/vmalloc.h> 42 #include <linux/slab.h> 43 #include <linux/blkdev.h> 44 #include "compat.h" 45 #include "ctree.h" 46 #include "disk-io.h" 47 #include "transaction.h" 48 #include "btrfs_inode.h" 49 #include "ioctl.h" 50 #include "print-tree.h" 51 #include "volumes.h" 52 #include "locking.h" 53 54 /* Mask out flags that are inappropriate for the given type of inode. */ 55 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 56 { 57 if (S_ISDIR(mode)) 58 return flags; 59 else if (S_ISREG(mode)) 60 return flags & ~FS_DIRSYNC_FL; 61 else 62 return flags & (FS_NODUMP_FL | FS_NOATIME_FL); 63 } 64 65 /* 66 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. 67 */ 68 static unsigned int btrfs_flags_to_ioctl(unsigned int flags) 69 { 70 unsigned int iflags = 0; 71 72 if (flags & BTRFS_INODE_SYNC) 73 iflags |= FS_SYNC_FL; 74 if (flags & BTRFS_INODE_IMMUTABLE) 75 iflags |= FS_IMMUTABLE_FL; 76 if (flags & BTRFS_INODE_APPEND) 77 iflags |= FS_APPEND_FL; 78 if (flags & BTRFS_INODE_NODUMP) 79 iflags |= FS_NODUMP_FL; 80 if (flags & BTRFS_INODE_NOATIME) 81 iflags |= FS_NOATIME_FL; 82 if (flags & BTRFS_INODE_DIRSYNC) 83 iflags |= FS_DIRSYNC_FL; 84 if (flags & BTRFS_INODE_NODATACOW) 85 iflags |= FS_NOCOW_FL; 86 87 if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS)) 88 iflags |= FS_COMPR_FL; 89 else if (flags & BTRFS_INODE_NOCOMPRESS) 90 iflags |= FS_NOCOMP_FL; 91 92 return iflags; 93 } 94 95 /* 96 * Update inode->i_flags based on the btrfs internal flags. 97 */ 98 void btrfs_update_iflags(struct inode *inode) 99 { 100 struct btrfs_inode *ip = BTRFS_I(inode); 101 102 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 103 104 if (ip->flags & BTRFS_INODE_SYNC) 105 inode->i_flags |= S_SYNC; 106 if (ip->flags & BTRFS_INODE_IMMUTABLE) 107 inode->i_flags |= S_IMMUTABLE; 108 if (ip->flags & BTRFS_INODE_APPEND) 109 inode->i_flags |= S_APPEND; 110 if (ip->flags & BTRFS_INODE_NOATIME) 111 inode->i_flags |= S_NOATIME; 112 if (ip->flags & BTRFS_INODE_DIRSYNC) 113 inode->i_flags |= S_DIRSYNC; 114 } 115 116 /* 117 * Inherit flags from the parent inode. 118 * 119 * Unlike extN we don't have any flags we don't want to inherit currently. 120 */ 121 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) 122 { 123 unsigned int flags; 124 125 if (!dir) 126 return; 127 128 flags = BTRFS_I(dir)->flags; 129 130 if (S_ISREG(inode->i_mode)) 131 flags &= ~BTRFS_INODE_DIRSYNC; 132 else if (!S_ISDIR(inode->i_mode)) 133 flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME); 134 135 BTRFS_I(inode)->flags = flags; 136 btrfs_update_iflags(inode); 137 } 138 139 static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 140 { 141 struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode); 142 unsigned int flags = btrfs_flags_to_ioctl(ip->flags); 143 144 if (copy_to_user(arg, &flags, sizeof(flags))) 145 return -EFAULT; 146 return 0; 147 } 148 149 static int check_flags(unsigned int flags) 150 { 151 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 152 FS_NOATIME_FL | FS_NODUMP_FL | \ 153 FS_SYNC_FL | FS_DIRSYNC_FL | \ 154 FS_NOCOMP_FL | FS_COMPR_FL | 155 FS_NOCOW_FL)) 156 return -EOPNOTSUPP; 157 158 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 159 return -EINVAL; 160 161 return 0; 162 } 163 164 static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 165 { 166 struct inode *inode = file->f_path.dentry->d_inode; 167 struct btrfs_inode *ip = BTRFS_I(inode); 168 struct btrfs_root *root = ip->root; 169 struct btrfs_trans_handle *trans; 170 unsigned int flags, oldflags; 171 int ret; 172 173 if (btrfs_root_readonly(root)) 174 return -EROFS; 175 176 if (copy_from_user(&flags, arg, sizeof(flags))) 177 return -EFAULT; 178 179 ret = check_flags(flags); 180 if (ret) 181 return ret; 182 183 if (!inode_owner_or_capable(inode)) 184 return -EACCES; 185 186 mutex_lock(&inode->i_mutex); 187 188 flags = btrfs_mask_flags(inode->i_mode, flags); 189 oldflags = btrfs_flags_to_ioctl(ip->flags); 190 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 191 if (!capable(CAP_LINUX_IMMUTABLE)) { 192 ret = -EPERM; 193 goto out_unlock; 194 } 195 } 196 197 ret = mnt_want_write(file->f_path.mnt); 198 if (ret) 199 goto out_unlock; 200 201 if (flags & FS_SYNC_FL) 202 ip->flags |= BTRFS_INODE_SYNC; 203 else 204 ip->flags &= ~BTRFS_INODE_SYNC; 205 if (flags & FS_IMMUTABLE_FL) 206 ip->flags |= BTRFS_INODE_IMMUTABLE; 207 else 208 ip->flags &= ~BTRFS_INODE_IMMUTABLE; 209 if (flags & FS_APPEND_FL) 210 ip->flags |= BTRFS_INODE_APPEND; 211 else 212 ip->flags &= ~BTRFS_INODE_APPEND; 213 if (flags & FS_NODUMP_FL) 214 ip->flags |= BTRFS_INODE_NODUMP; 215 else 216 ip->flags &= ~BTRFS_INODE_NODUMP; 217 if (flags & FS_NOATIME_FL) 218 ip->flags |= BTRFS_INODE_NOATIME; 219 else 220 ip->flags &= ~BTRFS_INODE_NOATIME; 221 if (flags & FS_DIRSYNC_FL) 222 ip->flags |= BTRFS_INODE_DIRSYNC; 223 else 224 ip->flags &= ~BTRFS_INODE_DIRSYNC; 225 if (flags & FS_NOCOW_FL) 226 ip->flags |= BTRFS_INODE_NODATACOW; 227 else 228 ip->flags &= ~BTRFS_INODE_NODATACOW; 229 230 /* 231 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 232 * flag may be changed automatically if compression code won't make 233 * things smaller. 234 */ 235 if (flags & FS_NOCOMP_FL) { 236 ip->flags &= ~BTRFS_INODE_COMPRESS; 237 ip->flags |= BTRFS_INODE_NOCOMPRESS; 238 } else if (flags & FS_COMPR_FL) { 239 ip->flags |= BTRFS_INODE_COMPRESS; 240 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 241 } else { 242 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 243 } 244 245 trans = btrfs_join_transaction(root, 1); 246 BUG_ON(IS_ERR(trans)); 247 248 ret = btrfs_update_inode(trans, root, inode); 249 BUG_ON(ret); 250 251 btrfs_update_iflags(inode); 252 inode->i_ctime = CURRENT_TIME; 253 btrfs_end_transaction(trans, root); 254 255 mnt_drop_write(file->f_path.mnt); 256 257 ret = 0; 258 out_unlock: 259 mutex_unlock(&inode->i_mutex); 260 return ret; 261 } 262 263 static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 264 { 265 struct inode *inode = file->f_path.dentry->d_inode; 266 267 return put_user(inode->i_generation, arg); 268 } 269 270 static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) 271 { 272 struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; 273 struct btrfs_fs_info *fs_info = root->fs_info; 274 struct btrfs_device *device; 275 struct request_queue *q; 276 struct fstrim_range range; 277 u64 minlen = ULLONG_MAX; 278 u64 num_devices = 0; 279 int ret; 280 281 if (!capable(CAP_SYS_ADMIN)) 282 return -EPERM; 283 284 mutex_lock(&fs_info->fs_devices->device_list_mutex); 285 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { 286 if (!device->bdev) 287 continue; 288 q = bdev_get_queue(device->bdev); 289 if (blk_queue_discard(q)) { 290 num_devices++; 291 minlen = min((u64)q->limits.discard_granularity, 292 minlen); 293 } 294 } 295 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 296 if (!num_devices) 297 return -EOPNOTSUPP; 298 299 if (copy_from_user(&range, arg, sizeof(range))) 300 return -EFAULT; 301 302 range.minlen = max(range.minlen, minlen); 303 ret = btrfs_trim_fs(root, &range); 304 if (ret < 0) 305 return ret; 306 307 if (copy_to_user(arg, &range, sizeof(range))) 308 return -EFAULT; 309 310 return 0; 311 } 312 313 static noinline int create_subvol(struct btrfs_root *root, 314 struct dentry *dentry, 315 char *name, int namelen, 316 u64 *async_transid) 317 { 318 struct btrfs_trans_handle *trans; 319 struct btrfs_key key; 320 struct btrfs_root_item root_item; 321 struct btrfs_inode_item *inode_item; 322 struct extent_buffer *leaf; 323 struct btrfs_root *new_root; 324 struct dentry *parent = dget_parent(dentry); 325 struct inode *dir; 326 int ret; 327 int err; 328 u64 objectid; 329 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 330 u64 index = 0; 331 332 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 333 0, &objectid); 334 if (ret) { 335 dput(parent); 336 return ret; 337 } 338 339 dir = parent->d_inode; 340 341 /* 342 * 1 - inode item 343 * 2 - refs 344 * 1 - root item 345 * 2 - dir items 346 */ 347 trans = btrfs_start_transaction(root, 6); 348 if (IS_ERR(trans)) { 349 dput(parent); 350 return PTR_ERR(trans); 351 } 352 353 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 354 0, objectid, NULL, 0, 0, 0); 355 if (IS_ERR(leaf)) { 356 ret = PTR_ERR(leaf); 357 goto fail; 358 } 359 360 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 361 btrfs_set_header_bytenr(leaf, leaf->start); 362 btrfs_set_header_generation(leaf, trans->transid); 363 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 364 btrfs_set_header_owner(leaf, objectid); 365 366 write_extent_buffer(leaf, root->fs_info->fsid, 367 (unsigned long)btrfs_header_fsid(leaf), 368 BTRFS_FSID_SIZE); 369 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 370 (unsigned long)btrfs_header_chunk_tree_uuid(leaf), 371 BTRFS_UUID_SIZE); 372 btrfs_mark_buffer_dirty(leaf); 373 374 inode_item = &root_item.inode; 375 memset(inode_item, 0, sizeof(*inode_item)); 376 inode_item->generation = cpu_to_le64(1); 377 inode_item->size = cpu_to_le64(3); 378 inode_item->nlink = cpu_to_le32(1); 379 inode_item->nbytes = cpu_to_le64(root->leafsize); 380 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 381 382 root_item.flags = 0; 383 root_item.byte_limit = 0; 384 inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); 385 386 btrfs_set_root_bytenr(&root_item, leaf->start); 387 btrfs_set_root_generation(&root_item, trans->transid); 388 btrfs_set_root_level(&root_item, 0); 389 btrfs_set_root_refs(&root_item, 1); 390 btrfs_set_root_used(&root_item, leaf->len); 391 btrfs_set_root_last_snapshot(&root_item, 0); 392 393 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 394 root_item.drop_level = 0; 395 396 btrfs_tree_unlock(leaf); 397 free_extent_buffer(leaf); 398 leaf = NULL; 399 400 btrfs_set_root_dirid(&root_item, new_dirid); 401 402 key.objectid = objectid; 403 key.offset = 0; 404 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 405 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 406 &root_item); 407 if (ret) 408 goto fail; 409 410 key.offset = (u64)-1; 411 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); 412 BUG_ON(IS_ERR(new_root)); 413 414 btrfs_record_root_in_trans(trans, new_root); 415 416 ret = btrfs_create_subvol_root(trans, new_root, new_dirid, 417 BTRFS_I(dir)->block_group); 418 /* 419 * insert the directory item 420 */ 421 ret = btrfs_set_inode_index(dir, &index); 422 BUG_ON(ret); 423 424 ret = btrfs_insert_dir_item(trans, root, 425 name, namelen, dir->i_ino, &key, 426 BTRFS_FT_DIR, index); 427 if (ret) 428 goto fail; 429 430 btrfs_i_size_write(dir, dir->i_size + namelen * 2); 431 ret = btrfs_update_inode(trans, root, dir); 432 BUG_ON(ret); 433 434 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 435 objectid, root->root_key.objectid, 436 dir->i_ino, index, name, namelen); 437 438 BUG_ON(ret); 439 440 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 441 fail: 442 dput(parent); 443 if (async_transid) { 444 *async_transid = trans->transid; 445 err = btrfs_commit_transaction_async(trans, root, 1); 446 } else { 447 err = btrfs_commit_transaction(trans, root); 448 } 449 if (err && !ret) 450 ret = err; 451 return ret; 452 } 453 454 static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 455 char *name, int namelen, u64 *async_transid, 456 bool readonly) 457 { 458 struct inode *inode; 459 struct dentry *parent; 460 struct btrfs_pending_snapshot *pending_snapshot; 461 struct btrfs_trans_handle *trans; 462 int ret; 463 464 if (!root->ref_cows) 465 return -EINVAL; 466 467 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 468 if (!pending_snapshot) 469 return -ENOMEM; 470 471 btrfs_init_block_rsv(&pending_snapshot->block_rsv); 472 pending_snapshot->dentry = dentry; 473 pending_snapshot->root = root; 474 pending_snapshot->readonly = readonly; 475 476 trans = btrfs_start_transaction(root->fs_info->extent_root, 5); 477 if (IS_ERR(trans)) { 478 ret = PTR_ERR(trans); 479 goto fail; 480 } 481 482 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); 483 BUG_ON(ret); 484 485 list_add(&pending_snapshot->list, 486 &trans->transaction->pending_snapshots); 487 if (async_transid) { 488 *async_transid = trans->transid; 489 ret = btrfs_commit_transaction_async(trans, 490 root->fs_info->extent_root, 1); 491 } else { 492 ret = btrfs_commit_transaction(trans, 493 root->fs_info->extent_root); 494 } 495 BUG_ON(ret); 496 497 ret = pending_snapshot->error; 498 if (ret) 499 goto fail; 500 501 ret = btrfs_orphan_cleanup(pending_snapshot->snap); 502 if (ret) 503 goto fail; 504 505 parent = dget_parent(dentry); 506 inode = btrfs_lookup_dentry(parent->d_inode, dentry); 507 dput(parent); 508 if (IS_ERR(inode)) { 509 ret = PTR_ERR(inode); 510 goto fail; 511 } 512 BUG_ON(!inode); 513 d_instantiate(dentry, inode); 514 ret = 0; 515 fail: 516 kfree(pending_snapshot); 517 return ret; 518 } 519 520 /* copy of check_sticky in fs/namei.c() 521 * It's inline, so penalty for filesystems that don't use sticky bit is 522 * minimal. 523 */ 524 static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) 525 { 526 uid_t fsuid = current_fsuid(); 527 528 if (!(dir->i_mode & S_ISVTX)) 529 return 0; 530 if (inode->i_uid == fsuid) 531 return 0; 532 if (dir->i_uid == fsuid) 533 return 0; 534 return !capable(CAP_FOWNER); 535 } 536 537 /* copy of may_delete in fs/namei.c() 538 * Check whether we can remove a link victim from directory dir, check 539 * whether the type of victim is right. 540 * 1. We can't do it if dir is read-only (done in permission()) 541 * 2. We should have write and exec permissions on dir 542 * 3. We can't remove anything from append-only dir 543 * 4. We can't do anything with immutable dir (done in permission()) 544 * 5. If the sticky bit on dir is set we should either 545 * a. be owner of dir, or 546 * b. be owner of victim, or 547 * c. have CAP_FOWNER capability 548 * 6. If the victim is append-only or immutable we can't do antyhing with 549 * links pointing to it. 550 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 551 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 552 * 9. We can't remove a root or mountpoint. 553 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 554 * nfs_async_unlink(). 555 */ 556 557 static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) 558 { 559 int error; 560 561 if (!victim->d_inode) 562 return -ENOENT; 563 564 BUG_ON(victim->d_parent->d_inode != dir); 565 audit_inode_child(victim, dir); 566 567 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 568 if (error) 569 return error; 570 if (IS_APPEND(dir)) 571 return -EPERM; 572 if (btrfs_check_sticky(dir, victim->d_inode)|| 573 IS_APPEND(victim->d_inode)|| 574 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 575 return -EPERM; 576 if (isdir) { 577 if (!S_ISDIR(victim->d_inode->i_mode)) 578 return -ENOTDIR; 579 if (IS_ROOT(victim)) 580 return -EBUSY; 581 } else if (S_ISDIR(victim->d_inode->i_mode)) 582 return -EISDIR; 583 if (IS_DEADDIR(dir)) 584 return -ENOENT; 585 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 586 return -EBUSY; 587 return 0; 588 } 589 590 /* copy of may_create in fs/namei.c() */ 591 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 592 { 593 if (child->d_inode) 594 return -EEXIST; 595 if (IS_DEADDIR(dir)) 596 return -ENOENT; 597 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 598 } 599 600 /* 601 * Create a new subvolume below @parent. This is largely modeled after 602 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 603 * inside this filesystem so it's quite a bit simpler. 604 */ 605 static noinline int btrfs_mksubvol(struct path *parent, 606 char *name, int namelen, 607 struct btrfs_root *snap_src, 608 u64 *async_transid, bool readonly) 609 { 610 struct inode *dir = parent->dentry->d_inode; 611 struct dentry *dentry; 612 int error; 613 614 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 615 616 dentry = lookup_one_len(name, parent->dentry, namelen); 617 error = PTR_ERR(dentry); 618 if (IS_ERR(dentry)) 619 goto out_unlock; 620 621 error = -EEXIST; 622 if (dentry->d_inode) 623 goto out_dput; 624 625 error = mnt_want_write(parent->mnt); 626 if (error) 627 goto out_dput; 628 629 error = btrfs_may_create(dir, dentry); 630 if (error) 631 goto out_drop_write; 632 633 down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 634 635 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) 636 goto out_up_read; 637 638 if (snap_src) { 639 error = create_snapshot(snap_src, dentry, 640 name, namelen, async_transid, readonly); 641 } else { 642 error = create_subvol(BTRFS_I(dir)->root, dentry, 643 name, namelen, async_transid); 644 } 645 if (!error) 646 fsnotify_mkdir(dir, dentry); 647 out_up_read: 648 up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 649 out_drop_write: 650 mnt_drop_write(parent->mnt); 651 out_dput: 652 dput(dentry); 653 out_unlock: 654 mutex_unlock(&dir->i_mutex); 655 return error; 656 } 657 658 static int should_defrag_range(struct inode *inode, u64 start, u64 len, 659 int thresh, u64 *last_len, u64 *skip, 660 u64 *defrag_end) 661 { 662 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 663 struct extent_map *em = NULL; 664 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 665 int ret = 1; 666 667 668 if (thresh == 0) 669 thresh = 256 * 1024; 670 671 /* 672 * make sure that once we start defragging and extent, we keep on 673 * defragging it 674 */ 675 if (start < *defrag_end) 676 return 1; 677 678 *skip = 0; 679 680 /* 681 * hopefully we have this extent in the tree already, try without 682 * the full extent lock 683 */ 684 read_lock(&em_tree->lock); 685 em = lookup_extent_mapping(em_tree, start, len); 686 read_unlock(&em_tree->lock); 687 688 if (!em) { 689 /* get the big lock and read metadata off disk */ 690 lock_extent(io_tree, start, start + len - 1, GFP_NOFS); 691 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 692 unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); 693 694 if (IS_ERR(em)) 695 return 0; 696 } 697 698 /* this will cover holes, and inline extents */ 699 if (em->block_start >= EXTENT_MAP_LAST_BYTE) 700 ret = 0; 701 702 /* 703 * we hit a real extent, if it is big don't bother defragging it again 704 */ 705 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) 706 ret = 0; 707 708 /* 709 * last_len ends up being a counter of how many bytes we've defragged. 710 * every time we choose not to defrag an extent, we reset *last_len 711 * so that the next tiny extent will force a defrag. 712 * 713 * The end result of this is that tiny extents before a single big 714 * extent will force at least part of that big extent to be defragged. 715 */ 716 if (ret) { 717 *last_len += len; 718 *defrag_end = extent_map_end(em); 719 } else { 720 *last_len = 0; 721 *skip = extent_map_end(em); 722 *defrag_end = 0; 723 } 724 725 free_extent_map(em); 726 return ret; 727 } 728 729 static int btrfs_defrag_file(struct file *file, 730 struct btrfs_ioctl_defrag_range_args *range) 731 { 732 struct inode *inode = fdentry(file)->d_inode; 733 struct btrfs_root *root = BTRFS_I(inode)->root; 734 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 735 struct btrfs_ordered_extent *ordered; 736 struct page *page; 737 struct btrfs_super_block *disk_super; 738 unsigned long last_index; 739 unsigned long ra_pages = root->fs_info->bdi.ra_pages; 740 unsigned long total_read = 0; 741 u64 features; 742 u64 page_start; 743 u64 page_end; 744 u64 last_len = 0; 745 u64 skip = 0; 746 u64 defrag_end = 0; 747 unsigned long i; 748 int ret; 749 int compress_type = BTRFS_COMPRESS_ZLIB; 750 751 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 752 if (range->compress_type > BTRFS_COMPRESS_TYPES) 753 return -EINVAL; 754 if (range->compress_type) 755 compress_type = range->compress_type; 756 } 757 758 if (inode->i_size == 0) 759 return 0; 760 761 if (range->start + range->len > range->start) { 762 last_index = min_t(u64, inode->i_size - 1, 763 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 764 } else { 765 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; 766 } 767 768 i = range->start >> PAGE_CACHE_SHIFT; 769 while (i <= last_index) { 770 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 771 PAGE_CACHE_SIZE, 772 range->extent_thresh, 773 &last_len, &skip, 774 &defrag_end)) { 775 unsigned long next; 776 /* 777 * the should_defrag function tells us how much to skip 778 * bump our counter by the suggested amount 779 */ 780 next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 781 i = max(i + 1, next); 782 continue; 783 } 784 785 if (total_read % ra_pages == 0) { 786 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, 787 min(last_index, i + ra_pages - 1)); 788 } 789 total_read++; 790 mutex_lock(&inode->i_mutex); 791 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 792 BTRFS_I(inode)->force_compress = compress_type; 793 794 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 795 if (ret) 796 goto err_unlock; 797 again: 798 if (inode->i_size == 0 || 799 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { 800 ret = 0; 801 goto err_reservations; 802 } 803 804 page = grab_cache_page(inode->i_mapping, i); 805 if (!page) { 806 ret = -ENOMEM; 807 goto err_reservations; 808 } 809 810 if (!PageUptodate(page)) { 811 btrfs_readpage(NULL, page); 812 lock_page(page); 813 if (!PageUptodate(page)) { 814 unlock_page(page); 815 page_cache_release(page); 816 ret = -EIO; 817 goto err_reservations; 818 } 819 } 820 821 if (page->mapping != inode->i_mapping) { 822 unlock_page(page); 823 page_cache_release(page); 824 goto again; 825 } 826 827 wait_on_page_writeback(page); 828 829 if (PageDirty(page)) { 830 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 831 goto loop_unlock; 832 } 833 834 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 835 page_end = page_start + PAGE_CACHE_SIZE - 1; 836 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 837 838 ordered = btrfs_lookup_ordered_extent(inode, page_start); 839 if (ordered) { 840 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 841 unlock_page(page); 842 page_cache_release(page); 843 btrfs_start_ordered_extent(inode, ordered, 1); 844 btrfs_put_ordered_extent(ordered); 845 goto again; 846 } 847 set_page_extent_mapped(page); 848 849 /* 850 * this makes sure page_mkwrite is called on the 851 * page if it is dirtied again later 852 */ 853 clear_page_dirty_for_io(page); 854 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, 855 page_end, EXTENT_DIRTY | EXTENT_DELALLOC | 856 EXTENT_DO_ACCOUNTING, GFP_NOFS); 857 858 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); 859 ClearPageChecked(page); 860 set_page_dirty(page); 861 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 862 863 loop_unlock: 864 unlock_page(page); 865 page_cache_release(page); 866 mutex_unlock(&inode->i_mutex); 867 868 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); 869 i++; 870 } 871 872 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) 873 filemap_flush(inode->i_mapping); 874 875 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 876 /* the filemap_flush will queue IO into the worker threads, but 877 * we have to make sure the IO is actually started and that 878 * ordered extents get created before we return 879 */ 880 atomic_inc(&root->fs_info->async_submit_draining); 881 while (atomic_read(&root->fs_info->nr_async_submits) || 882 atomic_read(&root->fs_info->async_delalloc_pages)) { 883 wait_event(root->fs_info->async_submit_wait, 884 (atomic_read(&root->fs_info->nr_async_submits) == 0 && 885 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 886 } 887 atomic_dec(&root->fs_info->async_submit_draining); 888 889 mutex_lock(&inode->i_mutex); 890 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 891 mutex_unlock(&inode->i_mutex); 892 } 893 894 disk_super = &root->fs_info->super_copy; 895 features = btrfs_super_incompat_flags(disk_super); 896 if (range->compress_type == BTRFS_COMPRESS_LZO) { 897 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 898 btrfs_set_super_incompat_flags(disk_super, features); 899 } 900 901 return 0; 902 903 err_reservations: 904 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 905 err_unlock: 906 mutex_unlock(&inode->i_mutex); 907 return ret; 908 } 909 910 static noinline int btrfs_ioctl_resize(struct btrfs_root *root, 911 void __user *arg) 912 { 913 u64 new_size; 914 u64 old_size; 915 u64 devid = 1; 916 struct btrfs_ioctl_vol_args *vol_args; 917 struct btrfs_trans_handle *trans; 918 struct btrfs_device *device = NULL; 919 char *sizestr; 920 char *devstr = NULL; 921 int ret = 0; 922 int mod = 0; 923 924 if (root->fs_info->sb->s_flags & MS_RDONLY) 925 return -EROFS; 926 927 if (!capable(CAP_SYS_ADMIN)) 928 return -EPERM; 929 930 vol_args = memdup_user(arg, sizeof(*vol_args)); 931 if (IS_ERR(vol_args)) 932 return PTR_ERR(vol_args); 933 934 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 935 936 mutex_lock(&root->fs_info->volume_mutex); 937 sizestr = vol_args->name; 938 devstr = strchr(sizestr, ':'); 939 if (devstr) { 940 char *end; 941 sizestr = devstr + 1; 942 *devstr = '\0'; 943 devstr = vol_args->name; 944 devid = simple_strtoull(devstr, &end, 10); 945 printk(KERN_INFO "resizing devid %llu\n", 946 (unsigned long long)devid); 947 } 948 device = btrfs_find_device(root, devid, NULL, NULL); 949 if (!device) { 950 printk(KERN_INFO "resizer unable to find device %llu\n", 951 (unsigned long long)devid); 952 ret = -EINVAL; 953 goto out_unlock; 954 } 955 if (!strcmp(sizestr, "max")) 956 new_size = device->bdev->bd_inode->i_size; 957 else { 958 if (sizestr[0] == '-') { 959 mod = -1; 960 sizestr++; 961 } else if (sizestr[0] == '+') { 962 mod = 1; 963 sizestr++; 964 } 965 new_size = memparse(sizestr, NULL); 966 if (new_size == 0) { 967 ret = -EINVAL; 968 goto out_unlock; 969 } 970 } 971 972 old_size = device->total_bytes; 973 974 if (mod < 0) { 975 if (new_size > old_size) { 976 ret = -EINVAL; 977 goto out_unlock; 978 } 979 new_size = old_size - new_size; 980 } else if (mod > 0) { 981 new_size = old_size + new_size; 982 } 983 984 if (new_size < 256 * 1024 * 1024) { 985 ret = -EINVAL; 986 goto out_unlock; 987 } 988 if (new_size > device->bdev->bd_inode->i_size) { 989 ret = -EFBIG; 990 goto out_unlock; 991 } 992 993 do_div(new_size, root->sectorsize); 994 new_size *= root->sectorsize; 995 996 printk(KERN_INFO "new size for %s is %llu\n", 997 device->name, (unsigned long long)new_size); 998 999 if (new_size > old_size) { 1000 trans = btrfs_start_transaction(root, 0); 1001 if (IS_ERR(trans)) { 1002 ret = PTR_ERR(trans); 1003 goto out_unlock; 1004 } 1005 ret = btrfs_grow_device(trans, device, new_size); 1006 btrfs_commit_transaction(trans, root); 1007 } else { 1008 ret = btrfs_shrink_device(device, new_size); 1009 } 1010 1011 out_unlock: 1012 mutex_unlock(&root->fs_info->volume_mutex); 1013 kfree(vol_args); 1014 return ret; 1015 } 1016 1017 static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1018 char *name, 1019 unsigned long fd, 1020 int subvol, 1021 u64 *transid, 1022 bool readonly) 1023 { 1024 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 1025 struct file *src_file; 1026 int namelen; 1027 int ret = 0; 1028 1029 if (root->fs_info->sb->s_flags & MS_RDONLY) 1030 return -EROFS; 1031 1032 namelen = strlen(name); 1033 if (strchr(name, '/')) { 1034 ret = -EINVAL; 1035 goto out; 1036 } 1037 1038 if (subvol) { 1039 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1040 NULL, transid, readonly); 1041 } else { 1042 struct inode *src_inode; 1043 src_file = fget(fd); 1044 if (!src_file) { 1045 ret = -EINVAL; 1046 goto out; 1047 } 1048 1049 src_inode = src_file->f_path.dentry->d_inode; 1050 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { 1051 printk(KERN_INFO "btrfs: Snapshot src from " 1052 "another FS\n"); 1053 ret = -EINVAL; 1054 fput(src_file); 1055 goto out; 1056 } 1057 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1058 BTRFS_I(src_inode)->root, 1059 transid, readonly); 1060 fput(src_file); 1061 } 1062 out: 1063 return ret; 1064 } 1065 1066 static noinline int btrfs_ioctl_snap_create(struct file *file, 1067 void __user *arg, int subvol) 1068 { 1069 struct btrfs_ioctl_vol_args *vol_args; 1070 int ret; 1071 1072 vol_args = memdup_user(arg, sizeof(*vol_args)); 1073 if (IS_ERR(vol_args)) 1074 return PTR_ERR(vol_args); 1075 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1076 1077 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1078 vol_args->fd, subvol, 1079 NULL, false); 1080 1081 kfree(vol_args); 1082 return ret; 1083 } 1084 1085 static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 1086 void __user *arg, int subvol) 1087 { 1088 struct btrfs_ioctl_vol_args_v2 *vol_args; 1089 int ret; 1090 u64 transid = 0; 1091 u64 *ptr = NULL; 1092 bool readonly = false; 1093 1094 vol_args = memdup_user(arg, sizeof(*vol_args)); 1095 if (IS_ERR(vol_args)) 1096 return PTR_ERR(vol_args); 1097 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1098 1099 if (vol_args->flags & 1100 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { 1101 ret = -EOPNOTSUPP; 1102 goto out; 1103 } 1104 1105 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 1106 ptr = &transid; 1107 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1108 readonly = true; 1109 1110 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1111 vol_args->fd, subvol, 1112 ptr, readonly); 1113 1114 if (ret == 0 && ptr && 1115 copy_to_user(arg + 1116 offsetof(struct btrfs_ioctl_vol_args_v2, 1117 transid), ptr, sizeof(*ptr))) 1118 ret = -EFAULT; 1119 out: 1120 kfree(vol_args); 1121 return ret; 1122 } 1123 1124 static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1125 void __user *arg) 1126 { 1127 struct inode *inode = fdentry(file)->d_inode; 1128 struct btrfs_root *root = BTRFS_I(inode)->root; 1129 int ret = 0; 1130 u64 flags = 0; 1131 1132 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1133 return -EINVAL; 1134 1135 down_read(&root->fs_info->subvol_sem); 1136 if (btrfs_root_readonly(root)) 1137 flags |= BTRFS_SUBVOL_RDONLY; 1138 up_read(&root->fs_info->subvol_sem); 1139 1140 if (copy_to_user(arg, &flags, sizeof(flags))) 1141 ret = -EFAULT; 1142 1143 return ret; 1144 } 1145 1146 static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1147 void __user *arg) 1148 { 1149 struct inode *inode = fdentry(file)->d_inode; 1150 struct btrfs_root *root = BTRFS_I(inode)->root; 1151 struct btrfs_trans_handle *trans; 1152 u64 root_flags; 1153 u64 flags; 1154 int ret = 0; 1155 1156 if (root->fs_info->sb->s_flags & MS_RDONLY) 1157 return -EROFS; 1158 1159 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1160 return -EINVAL; 1161 1162 if (copy_from_user(&flags, arg, sizeof(flags))) 1163 return -EFAULT; 1164 1165 if (flags & BTRFS_SUBVOL_CREATE_ASYNC) 1166 return -EINVAL; 1167 1168 if (flags & ~BTRFS_SUBVOL_RDONLY) 1169 return -EOPNOTSUPP; 1170 1171 if (!inode_owner_or_capable(inode)) 1172 return -EACCES; 1173 1174 down_write(&root->fs_info->subvol_sem); 1175 1176 /* nothing to do */ 1177 if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 1178 goto out; 1179 1180 root_flags = btrfs_root_flags(&root->root_item); 1181 if (flags & BTRFS_SUBVOL_RDONLY) 1182 btrfs_set_root_flags(&root->root_item, 1183 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1184 else 1185 btrfs_set_root_flags(&root->root_item, 1186 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1187 1188 trans = btrfs_start_transaction(root, 1); 1189 if (IS_ERR(trans)) { 1190 ret = PTR_ERR(trans); 1191 goto out_reset; 1192 } 1193 1194 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1195 &root->root_key, &root->root_item); 1196 1197 btrfs_commit_transaction(trans, root); 1198 out_reset: 1199 if (ret) 1200 btrfs_set_root_flags(&root->root_item, root_flags); 1201 out: 1202 up_write(&root->fs_info->subvol_sem); 1203 return ret; 1204 } 1205 1206 /* 1207 * helper to check if the subvolume references other subvolumes 1208 */ 1209 static noinline int may_destroy_subvol(struct btrfs_root *root) 1210 { 1211 struct btrfs_path *path; 1212 struct btrfs_key key; 1213 int ret; 1214 1215 path = btrfs_alloc_path(); 1216 if (!path) 1217 return -ENOMEM; 1218 1219 key.objectid = root->root_key.objectid; 1220 key.type = BTRFS_ROOT_REF_KEY; 1221 key.offset = (u64)-1; 1222 1223 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, 1224 &key, path, 0, 0); 1225 if (ret < 0) 1226 goto out; 1227 BUG_ON(ret == 0); 1228 1229 ret = 0; 1230 if (path->slots[0] > 0) { 1231 path->slots[0]--; 1232 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1233 if (key.objectid == root->root_key.objectid && 1234 key.type == BTRFS_ROOT_REF_KEY) 1235 ret = -ENOTEMPTY; 1236 } 1237 out: 1238 btrfs_free_path(path); 1239 return ret; 1240 } 1241 1242 static noinline int key_in_sk(struct btrfs_key *key, 1243 struct btrfs_ioctl_search_key *sk) 1244 { 1245 struct btrfs_key test; 1246 int ret; 1247 1248 test.objectid = sk->min_objectid; 1249 test.type = sk->min_type; 1250 test.offset = sk->min_offset; 1251 1252 ret = btrfs_comp_cpu_keys(key, &test); 1253 if (ret < 0) 1254 return 0; 1255 1256 test.objectid = sk->max_objectid; 1257 test.type = sk->max_type; 1258 test.offset = sk->max_offset; 1259 1260 ret = btrfs_comp_cpu_keys(key, &test); 1261 if (ret > 0) 1262 return 0; 1263 return 1; 1264 } 1265 1266 static noinline int copy_to_sk(struct btrfs_root *root, 1267 struct btrfs_path *path, 1268 struct btrfs_key *key, 1269 struct btrfs_ioctl_search_key *sk, 1270 char *buf, 1271 unsigned long *sk_offset, 1272 int *num_found) 1273 { 1274 u64 found_transid; 1275 struct extent_buffer *leaf; 1276 struct btrfs_ioctl_search_header sh; 1277 unsigned long item_off; 1278 unsigned long item_len; 1279 int nritems; 1280 int i; 1281 int slot; 1282 int found = 0; 1283 int ret = 0; 1284 1285 leaf = path->nodes[0]; 1286 slot = path->slots[0]; 1287 nritems = btrfs_header_nritems(leaf); 1288 1289 if (btrfs_header_generation(leaf) > sk->max_transid) { 1290 i = nritems; 1291 goto advance_key; 1292 } 1293 found_transid = btrfs_header_generation(leaf); 1294 1295 for (i = slot; i < nritems; i++) { 1296 item_off = btrfs_item_ptr_offset(leaf, i); 1297 item_len = btrfs_item_size_nr(leaf, i); 1298 1299 if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1300 item_len = 0; 1301 1302 if (sizeof(sh) + item_len + *sk_offset > 1303 BTRFS_SEARCH_ARGS_BUFSIZE) { 1304 ret = 1; 1305 goto overflow; 1306 } 1307 1308 btrfs_item_key_to_cpu(leaf, key, i); 1309 if (!key_in_sk(key, sk)) 1310 continue; 1311 1312 sh.objectid = key->objectid; 1313 sh.offset = key->offset; 1314 sh.type = key->type; 1315 sh.len = item_len; 1316 sh.transid = found_transid; 1317 1318 /* copy search result header */ 1319 memcpy(buf + *sk_offset, &sh, sizeof(sh)); 1320 *sk_offset += sizeof(sh); 1321 1322 if (item_len) { 1323 char *p = buf + *sk_offset; 1324 /* copy the item */ 1325 read_extent_buffer(leaf, p, 1326 item_off, item_len); 1327 *sk_offset += item_len; 1328 } 1329 found++; 1330 1331 if (*num_found >= sk->nr_items) 1332 break; 1333 } 1334 advance_key: 1335 ret = 0; 1336 if (key->offset < (u64)-1 && key->offset < sk->max_offset) 1337 key->offset++; 1338 else if (key->type < (u8)-1 && key->type < sk->max_type) { 1339 key->offset = 0; 1340 key->type++; 1341 } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) { 1342 key->offset = 0; 1343 key->type = 0; 1344 key->objectid++; 1345 } else 1346 ret = 1; 1347 overflow: 1348 *num_found += found; 1349 return ret; 1350 } 1351 1352 static noinline int search_ioctl(struct inode *inode, 1353 struct btrfs_ioctl_search_args *args) 1354 { 1355 struct btrfs_root *root; 1356 struct btrfs_key key; 1357 struct btrfs_key max_key; 1358 struct btrfs_path *path; 1359 struct btrfs_ioctl_search_key *sk = &args->key; 1360 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 1361 int ret; 1362 int num_found = 0; 1363 unsigned long sk_offset = 0; 1364 1365 path = btrfs_alloc_path(); 1366 if (!path) 1367 return -ENOMEM; 1368 1369 if (sk->tree_id == 0) { 1370 /* search the root of the inode that was passed */ 1371 root = BTRFS_I(inode)->root; 1372 } else { 1373 key.objectid = sk->tree_id; 1374 key.type = BTRFS_ROOT_ITEM_KEY; 1375 key.offset = (u64)-1; 1376 root = btrfs_read_fs_root_no_name(info, &key); 1377 if (IS_ERR(root)) { 1378 printk(KERN_ERR "could not find root %llu\n", 1379 sk->tree_id); 1380 btrfs_free_path(path); 1381 return -ENOENT; 1382 } 1383 } 1384 1385 key.objectid = sk->min_objectid; 1386 key.type = sk->min_type; 1387 key.offset = sk->min_offset; 1388 1389 max_key.objectid = sk->max_objectid; 1390 max_key.type = sk->max_type; 1391 max_key.offset = sk->max_offset; 1392 1393 path->keep_locks = 1; 1394 1395 while(1) { 1396 ret = btrfs_search_forward(root, &key, &max_key, path, 0, 1397 sk->min_transid); 1398 if (ret != 0) { 1399 if (ret > 0) 1400 ret = 0; 1401 goto err; 1402 } 1403 ret = copy_to_sk(root, path, &key, sk, args->buf, 1404 &sk_offset, &num_found); 1405 btrfs_release_path(root, path); 1406 if (ret || num_found >= sk->nr_items) 1407 break; 1408 1409 } 1410 ret = 0; 1411 err: 1412 sk->nr_items = num_found; 1413 btrfs_free_path(path); 1414 return ret; 1415 } 1416 1417 static noinline int btrfs_ioctl_tree_search(struct file *file, 1418 void __user *argp) 1419 { 1420 struct btrfs_ioctl_search_args *args; 1421 struct inode *inode; 1422 int ret; 1423 1424 if (!capable(CAP_SYS_ADMIN)) 1425 return -EPERM; 1426 1427 args = memdup_user(argp, sizeof(*args)); 1428 if (IS_ERR(args)) 1429 return PTR_ERR(args); 1430 1431 inode = fdentry(file)->d_inode; 1432 ret = search_ioctl(inode, args); 1433 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1434 ret = -EFAULT; 1435 kfree(args); 1436 return ret; 1437 } 1438 1439 /* 1440 * Search INODE_REFs to identify path name of 'dirid' directory 1441 * in a 'tree_id' tree. and sets path name to 'name'. 1442 */ 1443 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, 1444 u64 tree_id, u64 dirid, char *name) 1445 { 1446 struct btrfs_root *root; 1447 struct btrfs_key key; 1448 char *ptr; 1449 int ret = -1; 1450 int slot; 1451 int len; 1452 int total_len = 0; 1453 struct btrfs_inode_ref *iref; 1454 struct extent_buffer *l; 1455 struct btrfs_path *path; 1456 1457 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 1458 name[0]='\0'; 1459 return 0; 1460 } 1461 1462 path = btrfs_alloc_path(); 1463 if (!path) 1464 return -ENOMEM; 1465 1466 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; 1467 1468 key.objectid = tree_id; 1469 key.type = BTRFS_ROOT_ITEM_KEY; 1470 key.offset = (u64)-1; 1471 root = btrfs_read_fs_root_no_name(info, &key); 1472 if (IS_ERR(root)) { 1473 printk(KERN_ERR "could not find root %llu\n", tree_id); 1474 ret = -ENOENT; 1475 goto out; 1476 } 1477 1478 key.objectid = dirid; 1479 key.type = BTRFS_INODE_REF_KEY; 1480 key.offset = (u64)-1; 1481 1482 while(1) { 1483 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1484 if (ret < 0) 1485 goto out; 1486 1487 l = path->nodes[0]; 1488 slot = path->slots[0]; 1489 if (ret > 0 && slot > 0) 1490 slot--; 1491 btrfs_item_key_to_cpu(l, &key, slot); 1492 1493 if (ret > 0 && (key.objectid != dirid || 1494 key.type != BTRFS_INODE_REF_KEY)) { 1495 ret = -ENOENT; 1496 goto out; 1497 } 1498 1499 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 1500 len = btrfs_inode_ref_name_len(l, iref); 1501 ptr -= len + 1; 1502 total_len += len + 1; 1503 if (ptr < name) 1504 goto out; 1505 1506 *(ptr + len) = '/'; 1507 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); 1508 1509 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 1510 break; 1511 1512 btrfs_release_path(root, path); 1513 key.objectid = key.offset; 1514 key.offset = (u64)-1; 1515 dirid = key.objectid; 1516 1517 } 1518 if (ptr < name) 1519 goto out; 1520 memcpy(name, ptr, total_len); 1521 name[total_len]='\0'; 1522 ret = 0; 1523 out: 1524 btrfs_free_path(path); 1525 return ret; 1526 } 1527 1528 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 1529 void __user *argp) 1530 { 1531 struct btrfs_ioctl_ino_lookup_args *args; 1532 struct inode *inode; 1533 int ret; 1534 1535 if (!capable(CAP_SYS_ADMIN)) 1536 return -EPERM; 1537 1538 args = memdup_user(argp, sizeof(*args)); 1539 if (IS_ERR(args)) 1540 return PTR_ERR(args); 1541 1542 inode = fdentry(file)->d_inode; 1543 1544 if (args->treeid == 0) 1545 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 1546 1547 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, 1548 args->treeid, args->objectid, 1549 args->name); 1550 1551 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1552 ret = -EFAULT; 1553 1554 kfree(args); 1555 return ret; 1556 } 1557 1558 static noinline int btrfs_ioctl_snap_destroy(struct file *file, 1559 void __user *arg) 1560 { 1561 struct dentry *parent = fdentry(file); 1562 struct dentry *dentry; 1563 struct inode *dir = parent->d_inode; 1564 struct inode *inode; 1565 struct btrfs_root *root = BTRFS_I(dir)->root; 1566 struct btrfs_root *dest = NULL; 1567 struct btrfs_ioctl_vol_args *vol_args; 1568 struct btrfs_trans_handle *trans; 1569 int namelen; 1570 int ret; 1571 int err = 0; 1572 1573 vol_args = memdup_user(arg, sizeof(*vol_args)); 1574 if (IS_ERR(vol_args)) 1575 return PTR_ERR(vol_args); 1576 1577 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1578 namelen = strlen(vol_args->name); 1579 if (strchr(vol_args->name, '/') || 1580 strncmp(vol_args->name, "..", namelen) == 0) { 1581 err = -EINVAL; 1582 goto out; 1583 } 1584 1585 err = mnt_want_write(file->f_path.mnt); 1586 if (err) 1587 goto out; 1588 1589 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 1590 dentry = lookup_one_len(vol_args->name, parent, namelen); 1591 if (IS_ERR(dentry)) { 1592 err = PTR_ERR(dentry); 1593 goto out_unlock_dir; 1594 } 1595 1596 if (!dentry->d_inode) { 1597 err = -ENOENT; 1598 goto out_dput; 1599 } 1600 1601 inode = dentry->d_inode; 1602 dest = BTRFS_I(inode)->root; 1603 if (!capable(CAP_SYS_ADMIN)){ 1604 /* 1605 * Regular user. Only allow this with a special mount 1606 * option, when the user has write+exec access to the 1607 * subvol root, and when rmdir(2) would have been 1608 * allowed. 1609 * 1610 * Note that this is _not_ check that the subvol is 1611 * empty or doesn't contain data that we wouldn't 1612 * otherwise be able to delete. 1613 * 1614 * Users who want to delete empty subvols should try 1615 * rmdir(2). 1616 */ 1617 err = -EPERM; 1618 if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 1619 goto out_dput; 1620 1621 /* 1622 * Do not allow deletion if the parent dir is the same 1623 * as the dir to be deleted. That means the ioctl 1624 * must be called on the dentry referencing the root 1625 * of the subvol, not a random directory contained 1626 * within it. 1627 */ 1628 err = -EINVAL; 1629 if (root == dest) 1630 goto out_dput; 1631 1632 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 1633 if (err) 1634 goto out_dput; 1635 1636 /* check if subvolume may be deleted by a non-root user */ 1637 err = btrfs_may_delete(dir, dentry, 1); 1638 if (err) 1639 goto out_dput; 1640 } 1641 1642 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1643 err = -EINVAL; 1644 goto out_dput; 1645 } 1646 1647 mutex_lock(&inode->i_mutex); 1648 err = d_invalidate(dentry); 1649 if (err) 1650 goto out_unlock; 1651 1652 down_write(&root->fs_info->subvol_sem); 1653 1654 err = may_destroy_subvol(dest); 1655 if (err) 1656 goto out_up_write; 1657 1658 trans = btrfs_start_transaction(root, 0); 1659 if (IS_ERR(trans)) { 1660 err = PTR_ERR(trans); 1661 goto out_up_write; 1662 } 1663 trans->block_rsv = &root->fs_info->global_block_rsv; 1664 1665 ret = btrfs_unlink_subvol(trans, root, dir, 1666 dest->root_key.objectid, 1667 dentry->d_name.name, 1668 dentry->d_name.len); 1669 BUG_ON(ret); 1670 1671 btrfs_record_root_in_trans(trans, dest); 1672 1673 memset(&dest->root_item.drop_progress, 0, 1674 sizeof(dest->root_item.drop_progress)); 1675 dest->root_item.drop_level = 0; 1676 btrfs_set_root_refs(&dest->root_item, 0); 1677 1678 if (!xchg(&dest->orphan_item_inserted, 1)) { 1679 ret = btrfs_insert_orphan_item(trans, 1680 root->fs_info->tree_root, 1681 dest->root_key.objectid); 1682 BUG_ON(ret); 1683 } 1684 1685 ret = btrfs_end_transaction(trans, root); 1686 BUG_ON(ret); 1687 inode->i_flags |= S_DEAD; 1688 out_up_write: 1689 up_write(&root->fs_info->subvol_sem); 1690 out_unlock: 1691 mutex_unlock(&inode->i_mutex); 1692 if (!err) { 1693 shrink_dcache_sb(root->fs_info->sb); 1694 btrfs_invalidate_inodes(dest); 1695 d_delete(dentry); 1696 } 1697 out_dput: 1698 dput(dentry); 1699 out_unlock_dir: 1700 mutex_unlock(&dir->i_mutex); 1701 mnt_drop_write(file->f_path.mnt); 1702 out: 1703 kfree(vol_args); 1704 return err; 1705 } 1706 1707 static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 1708 { 1709 struct inode *inode = fdentry(file)->d_inode; 1710 struct btrfs_root *root = BTRFS_I(inode)->root; 1711 struct btrfs_ioctl_defrag_range_args *range; 1712 int ret; 1713 1714 if (btrfs_root_readonly(root)) 1715 return -EROFS; 1716 1717 ret = mnt_want_write(file->f_path.mnt); 1718 if (ret) 1719 return ret; 1720 1721 switch (inode->i_mode & S_IFMT) { 1722 case S_IFDIR: 1723 if (!capable(CAP_SYS_ADMIN)) { 1724 ret = -EPERM; 1725 goto out; 1726 } 1727 ret = btrfs_defrag_root(root, 0); 1728 if (ret) 1729 goto out; 1730 ret = btrfs_defrag_root(root->fs_info->extent_root, 0); 1731 break; 1732 case S_IFREG: 1733 if (!(file->f_mode & FMODE_WRITE)) { 1734 ret = -EINVAL; 1735 goto out; 1736 } 1737 1738 range = kzalloc(sizeof(*range), GFP_KERNEL); 1739 if (!range) { 1740 ret = -ENOMEM; 1741 goto out; 1742 } 1743 1744 if (argp) { 1745 if (copy_from_user(range, argp, 1746 sizeof(*range))) { 1747 ret = -EFAULT; 1748 kfree(range); 1749 goto out; 1750 } 1751 /* compression requires us to start the IO */ 1752 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 1753 range->flags |= BTRFS_DEFRAG_RANGE_START_IO; 1754 range->extent_thresh = (u32)-1; 1755 } 1756 } else { 1757 /* the rest are all set to zero by kzalloc */ 1758 range->len = (u64)-1; 1759 } 1760 ret = btrfs_defrag_file(file, range); 1761 kfree(range); 1762 break; 1763 default: 1764 ret = -EINVAL; 1765 } 1766 out: 1767 mnt_drop_write(file->f_path.mnt); 1768 return ret; 1769 } 1770 1771 static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) 1772 { 1773 struct btrfs_ioctl_vol_args *vol_args; 1774 int ret; 1775 1776 if (!capable(CAP_SYS_ADMIN)) 1777 return -EPERM; 1778 1779 vol_args = memdup_user(arg, sizeof(*vol_args)); 1780 if (IS_ERR(vol_args)) 1781 return PTR_ERR(vol_args); 1782 1783 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1784 ret = btrfs_init_new_device(root, vol_args->name); 1785 1786 kfree(vol_args); 1787 return ret; 1788 } 1789 1790 static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) 1791 { 1792 struct btrfs_ioctl_vol_args *vol_args; 1793 int ret; 1794 1795 if (!capable(CAP_SYS_ADMIN)) 1796 return -EPERM; 1797 1798 if (root->fs_info->sb->s_flags & MS_RDONLY) 1799 return -EROFS; 1800 1801 vol_args = memdup_user(arg, sizeof(*vol_args)); 1802 if (IS_ERR(vol_args)) 1803 return PTR_ERR(vol_args); 1804 1805 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1806 ret = btrfs_rm_device(root, vol_args->name); 1807 1808 kfree(vol_args); 1809 return ret; 1810 } 1811 1812 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 1813 u64 off, u64 olen, u64 destoff) 1814 { 1815 struct inode *inode = fdentry(file)->d_inode; 1816 struct btrfs_root *root = BTRFS_I(inode)->root; 1817 struct file *src_file; 1818 struct inode *src; 1819 struct btrfs_trans_handle *trans; 1820 struct btrfs_path *path; 1821 struct extent_buffer *leaf; 1822 char *buf; 1823 struct btrfs_key key; 1824 u32 nritems; 1825 int slot; 1826 int ret; 1827 u64 len = olen; 1828 u64 bs = root->fs_info->sb->s_blocksize; 1829 u64 hint_byte; 1830 1831 /* 1832 * TODO: 1833 * - split compressed inline extents. annoying: we need to 1834 * decompress into destination's address_space (the file offset 1835 * may change, so source mapping won't do), then recompress (or 1836 * otherwise reinsert) a subrange. 1837 * - allow ranges within the same file to be cloned (provided 1838 * they don't overlap)? 1839 */ 1840 1841 /* the destination must be opened for writing */ 1842 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) 1843 return -EINVAL; 1844 1845 if (btrfs_root_readonly(root)) 1846 return -EROFS; 1847 1848 ret = mnt_want_write(file->f_path.mnt); 1849 if (ret) 1850 return ret; 1851 1852 src_file = fget(srcfd); 1853 if (!src_file) { 1854 ret = -EBADF; 1855 goto out_drop_write; 1856 } 1857 1858 src = src_file->f_dentry->d_inode; 1859 1860 ret = -EINVAL; 1861 if (src == inode) 1862 goto out_fput; 1863 1864 /* the src must be open for reading */ 1865 if (!(src_file->f_mode & FMODE_READ)) 1866 goto out_fput; 1867 1868 ret = -EISDIR; 1869 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 1870 goto out_fput; 1871 1872 ret = -EXDEV; 1873 if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root) 1874 goto out_fput; 1875 1876 ret = -ENOMEM; 1877 buf = vmalloc(btrfs_level_size(root, 0)); 1878 if (!buf) 1879 goto out_fput; 1880 1881 path = btrfs_alloc_path(); 1882 if (!path) { 1883 vfree(buf); 1884 goto out_fput; 1885 } 1886 path->reada = 2; 1887 1888 if (inode < src) { 1889 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 1890 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); 1891 } else { 1892 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); 1893 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 1894 } 1895 1896 /* determine range to clone */ 1897 ret = -EINVAL; 1898 if (off + len > src->i_size || off + len < off) 1899 goto out_unlock; 1900 if (len == 0) 1901 olen = len = src->i_size - off; 1902 /* if we extend to eof, continue to block boundary */ 1903 if (off + len == src->i_size) 1904 len = ALIGN(src->i_size, bs) - off; 1905 1906 /* verify the end result is block aligned */ 1907 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 1908 !IS_ALIGNED(destoff, bs)) 1909 goto out_unlock; 1910 1911 /* do any pending delalloc/csum calc on src, one way or 1912 another, and lock file content */ 1913 while (1) { 1914 struct btrfs_ordered_extent *ordered; 1915 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1916 ordered = btrfs_lookup_first_ordered_extent(src, off+len); 1917 if (!ordered && 1918 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, 1919 EXTENT_DELALLOC, 0, NULL)) 1920 break; 1921 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1922 if (ordered) 1923 btrfs_put_ordered_extent(ordered); 1924 btrfs_wait_ordered_range(src, off, len); 1925 } 1926 1927 /* clone data */ 1928 key.objectid = src->i_ino; 1929 key.type = BTRFS_EXTENT_DATA_KEY; 1930 key.offset = 0; 1931 1932 while (1) { 1933 /* 1934 * note the key will change type as we walk through the 1935 * tree. 1936 */ 1937 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1938 if (ret < 0) 1939 goto out; 1940 1941 nritems = btrfs_header_nritems(path->nodes[0]); 1942 if (path->slots[0] >= nritems) { 1943 ret = btrfs_next_leaf(root, path); 1944 if (ret < 0) 1945 goto out; 1946 if (ret > 0) 1947 break; 1948 nritems = btrfs_header_nritems(path->nodes[0]); 1949 } 1950 leaf = path->nodes[0]; 1951 slot = path->slots[0]; 1952 1953 btrfs_item_key_to_cpu(leaf, &key, slot); 1954 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 1955 key.objectid != src->i_ino) 1956 break; 1957 1958 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 1959 struct btrfs_file_extent_item *extent; 1960 int type; 1961 u32 size; 1962 struct btrfs_key new_key; 1963 u64 disko = 0, diskl = 0; 1964 u64 datao = 0, datal = 0; 1965 u8 comp; 1966 u64 endoff; 1967 1968 size = btrfs_item_size_nr(leaf, slot); 1969 read_extent_buffer(leaf, buf, 1970 btrfs_item_ptr_offset(leaf, slot), 1971 size); 1972 1973 extent = btrfs_item_ptr(leaf, slot, 1974 struct btrfs_file_extent_item); 1975 comp = btrfs_file_extent_compression(leaf, extent); 1976 type = btrfs_file_extent_type(leaf, extent); 1977 if (type == BTRFS_FILE_EXTENT_REG || 1978 type == BTRFS_FILE_EXTENT_PREALLOC) { 1979 disko = btrfs_file_extent_disk_bytenr(leaf, 1980 extent); 1981 diskl = btrfs_file_extent_disk_num_bytes(leaf, 1982 extent); 1983 datao = btrfs_file_extent_offset(leaf, extent); 1984 datal = btrfs_file_extent_num_bytes(leaf, 1985 extent); 1986 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 1987 /* take upper bound, may be compressed */ 1988 datal = btrfs_file_extent_ram_bytes(leaf, 1989 extent); 1990 } 1991 btrfs_release_path(root, path); 1992 1993 if (key.offset + datal <= off || 1994 key.offset >= off+len) 1995 goto next; 1996 1997 memcpy(&new_key, &key, sizeof(new_key)); 1998 new_key.objectid = inode->i_ino; 1999 if (off <= key.offset) 2000 new_key.offset = key.offset + destoff - off; 2001 else 2002 new_key.offset = destoff; 2003 2004 trans = btrfs_start_transaction(root, 1); 2005 if (IS_ERR(trans)) { 2006 ret = PTR_ERR(trans); 2007 goto out; 2008 } 2009 2010 if (type == BTRFS_FILE_EXTENT_REG || 2011 type == BTRFS_FILE_EXTENT_PREALLOC) { 2012 if (off > key.offset) { 2013 datao += off - key.offset; 2014 datal -= off - key.offset; 2015 } 2016 2017 if (key.offset + datal > off + len) 2018 datal = off + len - key.offset; 2019 2020 ret = btrfs_drop_extents(trans, inode, 2021 new_key.offset, 2022 new_key.offset + datal, 2023 &hint_byte, 1); 2024 BUG_ON(ret); 2025 2026 ret = btrfs_insert_empty_item(trans, root, path, 2027 &new_key, size); 2028 BUG_ON(ret); 2029 2030 leaf = path->nodes[0]; 2031 slot = path->slots[0]; 2032 write_extent_buffer(leaf, buf, 2033 btrfs_item_ptr_offset(leaf, slot), 2034 size); 2035 2036 extent = btrfs_item_ptr(leaf, slot, 2037 struct btrfs_file_extent_item); 2038 2039 /* disko == 0 means it's a hole */ 2040 if (!disko) 2041 datao = 0; 2042 2043 btrfs_set_file_extent_offset(leaf, extent, 2044 datao); 2045 btrfs_set_file_extent_num_bytes(leaf, extent, 2046 datal); 2047 if (disko) { 2048 inode_add_bytes(inode, datal); 2049 ret = btrfs_inc_extent_ref(trans, root, 2050 disko, diskl, 0, 2051 root->root_key.objectid, 2052 inode->i_ino, 2053 new_key.offset - datao); 2054 BUG_ON(ret); 2055 } 2056 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 2057 u64 skip = 0; 2058 u64 trim = 0; 2059 if (off > key.offset) { 2060 skip = off - key.offset; 2061 new_key.offset += skip; 2062 } 2063 2064 if (key.offset + datal > off+len) 2065 trim = key.offset + datal - (off+len); 2066 2067 if (comp && (skip || trim)) { 2068 ret = -EINVAL; 2069 btrfs_end_transaction(trans, root); 2070 goto out; 2071 } 2072 size -= skip + trim; 2073 datal -= skip + trim; 2074 2075 ret = btrfs_drop_extents(trans, inode, 2076 new_key.offset, 2077 new_key.offset + datal, 2078 &hint_byte, 1); 2079 BUG_ON(ret); 2080 2081 ret = btrfs_insert_empty_item(trans, root, path, 2082 &new_key, size); 2083 BUG_ON(ret); 2084 2085 if (skip) { 2086 u32 start = 2087 btrfs_file_extent_calc_inline_size(0); 2088 memmove(buf+start, buf+start+skip, 2089 datal); 2090 } 2091 2092 leaf = path->nodes[0]; 2093 slot = path->slots[0]; 2094 write_extent_buffer(leaf, buf, 2095 btrfs_item_ptr_offset(leaf, slot), 2096 size); 2097 inode_add_bytes(inode, datal); 2098 } 2099 2100 btrfs_mark_buffer_dirty(leaf); 2101 btrfs_release_path(root, path); 2102 2103 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2104 2105 /* 2106 * we round up to the block size at eof when 2107 * determining which extents to clone above, 2108 * but shouldn't round up the file size 2109 */ 2110 endoff = new_key.offset + datal; 2111 if (endoff > destoff+olen) 2112 endoff = destoff+olen; 2113 if (endoff > inode->i_size) 2114 btrfs_i_size_write(inode, endoff); 2115 2116 BTRFS_I(inode)->flags = BTRFS_I(src)->flags; 2117 ret = btrfs_update_inode(trans, root, inode); 2118 BUG_ON(ret); 2119 btrfs_end_transaction(trans, root); 2120 } 2121 next: 2122 btrfs_release_path(root, path); 2123 key.offset++; 2124 } 2125 ret = 0; 2126 out: 2127 btrfs_release_path(root, path); 2128 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2129 out_unlock: 2130 mutex_unlock(&src->i_mutex); 2131 mutex_unlock(&inode->i_mutex); 2132 vfree(buf); 2133 btrfs_free_path(path); 2134 out_fput: 2135 fput(src_file); 2136 out_drop_write: 2137 mnt_drop_write(file->f_path.mnt); 2138 return ret; 2139 } 2140 2141 static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) 2142 { 2143 struct btrfs_ioctl_clone_range_args args; 2144 2145 if (copy_from_user(&args, argp, sizeof(args))) 2146 return -EFAULT; 2147 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, 2148 args.src_length, args.dest_offset); 2149 } 2150 2151 /* 2152 * there are many ways the trans_start and trans_end ioctls can lead 2153 * to deadlocks. They should only be used by applications that 2154 * basically own the machine, and have a very in depth understanding 2155 * of all the possible deadlocks and enospc problems. 2156 */ 2157 static long btrfs_ioctl_trans_start(struct file *file) 2158 { 2159 struct inode *inode = fdentry(file)->d_inode; 2160 struct btrfs_root *root = BTRFS_I(inode)->root; 2161 struct btrfs_trans_handle *trans; 2162 int ret; 2163 2164 ret = -EPERM; 2165 if (!capable(CAP_SYS_ADMIN)) 2166 goto out; 2167 2168 ret = -EINPROGRESS; 2169 if (file->private_data) 2170 goto out; 2171 2172 ret = -EROFS; 2173 if (btrfs_root_readonly(root)) 2174 goto out; 2175 2176 ret = mnt_want_write(file->f_path.mnt); 2177 if (ret) 2178 goto out; 2179 2180 mutex_lock(&root->fs_info->trans_mutex); 2181 root->fs_info->open_ioctl_trans++; 2182 mutex_unlock(&root->fs_info->trans_mutex); 2183 2184 ret = -ENOMEM; 2185 trans = btrfs_start_ioctl_transaction(root, 0); 2186 if (IS_ERR(trans)) 2187 goto out_drop; 2188 2189 file->private_data = trans; 2190 return 0; 2191 2192 out_drop: 2193 mutex_lock(&root->fs_info->trans_mutex); 2194 root->fs_info->open_ioctl_trans--; 2195 mutex_unlock(&root->fs_info->trans_mutex); 2196 mnt_drop_write(file->f_path.mnt); 2197 out: 2198 return ret; 2199 } 2200 2201 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 2202 { 2203 struct inode *inode = fdentry(file)->d_inode; 2204 struct btrfs_root *root = BTRFS_I(inode)->root; 2205 struct btrfs_root *new_root; 2206 struct btrfs_dir_item *di; 2207 struct btrfs_trans_handle *trans; 2208 struct btrfs_path *path; 2209 struct btrfs_key location; 2210 struct btrfs_disk_key disk_key; 2211 struct btrfs_super_block *disk_super; 2212 u64 features; 2213 u64 objectid = 0; 2214 u64 dir_id; 2215 2216 if (!capable(CAP_SYS_ADMIN)) 2217 return -EPERM; 2218 2219 if (copy_from_user(&objectid, argp, sizeof(objectid))) 2220 return -EFAULT; 2221 2222 if (!objectid) 2223 objectid = root->root_key.objectid; 2224 2225 location.objectid = objectid; 2226 location.type = BTRFS_ROOT_ITEM_KEY; 2227 location.offset = (u64)-1; 2228 2229 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 2230 if (IS_ERR(new_root)) 2231 return PTR_ERR(new_root); 2232 2233 if (btrfs_root_refs(&new_root->root_item) == 0) 2234 return -ENOENT; 2235 2236 path = btrfs_alloc_path(); 2237 if (!path) 2238 return -ENOMEM; 2239 path->leave_spinning = 1; 2240 2241 trans = btrfs_start_transaction(root, 1); 2242 if (IS_ERR(trans)) { 2243 btrfs_free_path(path); 2244 return PTR_ERR(trans); 2245 } 2246 2247 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 2248 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, 2249 dir_id, "default", 7, 1); 2250 if (IS_ERR_OR_NULL(di)) { 2251 btrfs_free_path(path); 2252 btrfs_end_transaction(trans, root); 2253 printk(KERN_ERR "Umm, you don't have the default dir item, " 2254 "this isn't going to work\n"); 2255 return -ENOENT; 2256 } 2257 2258 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); 2259 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); 2260 btrfs_mark_buffer_dirty(path->nodes[0]); 2261 btrfs_free_path(path); 2262 2263 disk_super = &root->fs_info->super_copy; 2264 features = btrfs_super_incompat_flags(disk_super); 2265 if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { 2266 features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; 2267 btrfs_set_super_incompat_flags(disk_super, features); 2268 } 2269 btrfs_end_transaction(trans, root); 2270 2271 return 0; 2272 } 2273 2274 static void get_block_group_info(struct list_head *groups_list, 2275 struct btrfs_ioctl_space_info *space) 2276 { 2277 struct btrfs_block_group_cache *block_group; 2278 2279 space->total_bytes = 0; 2280 space->used_bytes = 0; 2281 space->flags = 0; 2282 list_for_each_entry(block_group, groups_list, list) { 2283 space->flags = block_group->flags; 2284 space->total_bytes += block_group->key.offset; 2285 space->used_bytes += 2286 btrfs_block_group_used(&block_group->item); 2287 } 2288 } 2289 2290 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 2291 { 2292 struct btrfs_ioctl_space_args space_args; 2293 struct btrfs_ioctl_space_info space; 2294 struct btrfs_ioctl_space_info *dest; 2295 struct btrfs_ioctl_space_info *dest_orig; 2296 struct btrfs_ioctl_space_info __user *user_dest; 2297 struct btrfs_space_info *info; 2298 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 2299 BTRFS_BLOCK_GROUP_SYSTEM, 2300 BTRFS_BLOCK_GROUP_METADATA, 2301 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; 2302 int num_types = 4; 2303 int alloc_size; 2304 int ret = 0; 2305 u64 slot_count = 0; 2306 int i, c; 2307 2308 if (copy_from_user(&space_args, 2309 (struct btrfs_ioctl_space_args __user *)arg, 2310 sizeof(space_args))) 2311 return -EFAULT; 2312 2313 for (i = 0; i < num_types; i++) { 2314 struct btrfs_space_info *tmp; 2315 2316 info = NULL; 2317 rcu_read_lock(); 2318 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2319 list) { 2320 if (tmp->flags == types[i]) { 2321 info = tmp; 2322 break; 2323 } 2324 } 2325 rcu_read_unlock(); 2326 2327 if (!info) 2328 continue; 2329 2330 down_read(&info->groups_sem); 2331 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 2332 if (!list_empty(&info->block_groups[c])) 2333 slot_count++; 2334 } 2335 up_read(&info->groups_sem); 2336 } 2337 2338 /* space_slots == 0 means they are asking for a count */ 2339 if (space_args.space_slots == 0) { 2340 space_args.total_spaces = slot_count; 2341 goto out; 2342 } 2343 2344 slot_count = min_t(u64, space_args.space_slots, slot_count); 2345 2346 alloc_size = sizeof(*dest) * slot_count; 2347 2348 /* we generally have at most 6 or so space infos, one for each raid 2349 * level. So, a whole page should be more than enough for everyone 2350 */ 2351 if (alloc_size > PAGE_CACHE_SIZE) 2352 return -ENOMEM; 2353 2354 space_args.total_spaces = 0; 2355 dest = kmalloc(alloc_size, GFP_NOFS); 2356 if (!dest) 2357 return -ENOMEM; 2358 dest_orig = dest; 2359 2360 /* now we have a buffer to copy into */ 2361 for (i = 0; i < num_types; i++) { 2362 struct btrfs_space_info *tmp; 2363 2364 if (!slot_count) 2365 break; 2366 2367 info = NULL; 2368 rcu_read_lock(); 2369 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2370 list) { 2371 if (tmp->flags == types[i]) { 2372 info = tmp; 2373 break; 2374 } 2375 } 2376 rcu_read_unlock(); 2377 2378 if (!info) 2379 continue; 2380 down_read(&info->groups_sem); 2381 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 2382 if (!list_empty(&info->block_groups[c])) { 2383 get_block_group_info(&info->block_groups[c], 2384 &space); 2385 memcpy(dest, &space, sizeof(space)); 2386 dest++; 2387 space_args.total_spaces++; 2388 slot_count--; 2389 } 2390 if (!slot_count) 2391 break; 2392 } 2393 up_read(&info->groups_sem); 2394 } 2395 2396 user_dest = (struct btrfs_ioctl_space_info *) 2397 (arg + sizeof(struct btrfs_ioctl_space_args)); 2398 2399 if (copy_to_user(user_dest, dest_orig, alloc_size)) 2400 ret = -EFAULT; 2401 2402 kfree(dest_orig); 2403 out: 2404 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 2405 ret = -EFAULT; 2406 2407 return ret; 2408 } 2409 2410 /* 2411 * there are many ways the trans_start and trans_end ioctls can lead 2412 * to deadlocks. They should only be used by applications that 2413 * basically own the machine, and have a very in depth understanding 2414 * of all the possible deadlocks and enospc problems. 2415 */ 2416 long btrfs_ioctl_trans_end(struct file *file) 2417 { 2418 struct inode *inode = fdentry(file)->d_inode; 2419 struct btrfs_root *root = BTRFS_I(inode)->root; 2420 struct btrfs_trans_handle *trans; 2421 2422 trans = file->private_data; 2423 if (!trans) 2424 return -EINVAL; 2425 file->private_data = NULL; 2426 2427 btrfs_end_transaction(trans, root); 2428 2429 mutex_lock(&root->fs_info->trans_mutex); 2430 root->fs_info->open_ioctl_trans--; 2431 mutex_unlock(&root->fs_info->trans_mutex); 2432 2433 mnt_drop_write(file->f_path.mnt); 2434 return 0; 2435 } 2436 2437 static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) 2438 { 2439 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 2440 struct btrfs_trans_handle *trans; 2441 u64 transid; 2442 int ret; 2443 2444 trans = btrfs_start_transaction(root, 0); 2445 if (IS_ERR(trans)) 2446 return PTR_ERR(trans); 2447 transid = trans->transid; 2448 ret = btrfs_commit_transaction_async(trans, root, 0); 2449 if (ret) { 2450 btrfs_end_transaction(trans, root); 2451 return ret; 2452 } 2453 2454 if (argp) 2455 if (copy_to_user(argp, &transid, sizeof(transid))) 2456 return -EFAULT; 2457 return 0; 2458 } 2459 2460 static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) 2461 { 2462 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 2463 u64 transid; 2464 2465 if (argp) { 2466 if (copy_from_user(&transid, argp, sizeof(transid))) 2467 return -EFAULT; 2468 } else { 2469 transid = 0; /* current trans */ 2470 } 2471 return btrfs_wait_for_commit(root, transid); 2472 } 2473 2474 long btrfs_ioctl(struct file *file, unsigned int 2475 cmd, unsigned long arg) 2476 { 2477 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 2478 void __user *argp = (void __user *)arg; 2479 2480 switch (cmd) { 2481 case FS_IOC_GETFLAGS: 2482 return btrfs_ioctl_getflags(file, argp); 2483 case FS_IOC_SETFLAGS: 2484 return btrfs_ioctl_setflags(file, argp); 2485 case FS_IOC_GETVERSION: 2486 return btrfs_ioctl_getversion(file, argp); 2487 case FITRIM: 2488 return btrfs_ioctl_fitrim(file, argp); 2489 case BTRFS_IOC_SNAP_CREATE: 2490 return btrfs_ioctl_snap_create(file, argp, 0); 2491 case BTRFS_IOC_SNAP_CREATE_V2: 2492 return btrfs_ioctl_snap_create_v2(file, argp, 0); 2493 case BTRFS_IOC_SUBVOL_CREATE: 2494 return btrfs_ioctl_snap_create(file, argp, 1); 2495 case BTRFS_IOC_SNAP_DESTROY: 2496 return btrfs_ioctl_snap_destroy(file, argp); 2497 case BTRFS_IOC_SUBVOL_GETFLAGS: 2498 return btrfs_ioctl_subvol_getflags(file, argp); 2499 case BTRFS_IOC_SUBVOL_SETFLAGS: 2500 return btrfs_ioctl_subvol_setflags(file, argp); 2501 case BTRFS_IOC_DEFAULT_SUBVOL: 2502 return btrfs_ioctl_default_subvol(file, argp); 2503 case BTRFS_IOC_DEFRAG: 2504 return btrfs_ioctl_defrag(file, NULL); 2505 case BTRFS_IOC_DEFRAG_RANGE: 2506 return btrfs_ioctl_defrag(file, argp); 2507 case BTRFS_IOC_RESIZE: 2508 return btrfs_ioctl_resize(root, argp); 2509 case BTRFS_IOC_ADD_DEV: 2510 return btrfs_ioctl_add_dev(root, argp); 2511 case BTRFS_IOC_RM_DEV: 2512 return btrfs_ioctl_rm_dev(root, argp); 2513 case BTRFS_IOC_BALANCE: 2514 return btrfs_balance(root->fs_info->dev_root); 2515 case BTRFS_IOC_CLONE: 2516 return btrfs_ioctl_clone(file, arg, 0, 0, 0); 2517 case BTRFS_IOC_CLONE_RANGE: 2518 return btrfs_ioctl_clone_range(file, argp); 2519 case BTRFS_IOC_TRANS_START: 2520 return btrfs_ioctl_trans_start(file); 2521 case BTRFS_IOC_TRANS_END: 2522 return btrfs_ioctl_trans_end(file); 2523 case BTRFS_IOC_TREE_SEARCH: 2524 return btrfs_ioctl_tree_search(file, argp); 2525 case BTRFS_IOC_INO_LOOKUP: 2526 return btrfs_ioctl_ino_lookup(file, argp); 2527 case BTRFS_IOC_SPACE_INFO: 2528 return btrfs_ioctl_space_info(root, argp); 2529 case BTRFS_IOC_SYNC: 2530 btrfs_sync_fs(file->f_dentry->d_sb, 1); 2531 return 0; 2532 case BTRFS_IOC_START_SYNC: 2533 return btrfs_ioctl_start_sync(file, argp); 2534 case BTRFS_IOC_WAIT_SYNC: 2535 return btrfs_ioctl_wait_sync(file, argp); 2536 } 2537 2538 return -ENOTTY; 2539 } 2540