1 /* 2 * Copyright (C) 2012 Alexander Block. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/bsearch.h> 20 #include <linux/fs.h> 21 #include <linux/file.h> 22 #include <linux/sort.h> 23 #include <linux/mount.h> 24 #include <linux/xattr.h> 25 #include <linux/posix_acl_xattr.h> 26 #include <linux/radix-tree.h> 27 #include <linux/vmalloc.h> 28 #include <linux/string.h> 29 30 #include "send.h" 31 #include "backref.h" 32 #include "hash.h" 33 #include "locking.h" 34 #include "disk-io.h" 35 #include "btrfs_inode.h" 36 #include "transaction.h" 37 38 static int g_verbose = 0; 39 40 #define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) 41 42 /* 43 * A fs_path is a helper to dynamically build path names with unknown size. 44 * It reallocates the internal buffer on demand. 45 * It allows fast adding of path elements on the right side (normal path) and 46 * fast adding to the left side (reversed path). A reversed path can also be 47 * unreversed if needed. 48 */ 49 struct fs_path { 50 union { 51 struct { 52 char *start; 53 char *end; 54 55 char *buf; 56 unsigned short buf_len:15; 57 unsigned short reversed:1; 58 char inline_buf[]; 59 }; 60 /* 61 * Average path length does not exceed 200 bytes, we'll have 62 * better packing in the slab and higher chance to satisfy 63 * a allocation later during send. 64 */ 65 char pad[256]; 66 }; 67 }; 68 #define FS_PATH_INLINE_SIZE \ 69 (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) 70 71 72 /* reused for each extent */ 73 struct clone_root { 74 struct btrfs_root *root; 75 u64 ino; 76 u64 offset; 77 78 u64 found_refs; 79 }; 80 81 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128 82 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) 83 84 struct send_ctx { 85 struct file *send_filp; 86 loff_t send_off; 87 char *send_buf; 88 u32 send_size; 89 u32 send_max_size; 90 u64 total_send_size; 91 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 92 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 93 94 struct btrfs_root *send_root; 95 struct btrfs_root *parent_root; 96 struct clone_root *clone_roots; 97 int clone_roots_cnt; 98 99 /* current state of the compare_tree call */ 100 struct btrfs_path *left_path; 101 struct btrfs_path *right_path; 102 struct btrfs_key *cmp_key; 103 104 /* 105 * infos of the currently processed inode. In case of deleted inodes, 106 * these are the values from the deleted inode. 107 */ 108 u64 cur_ino; 109 u64 cur_inode_gen; 110 int cur_inode_new; 111 int cur_inode_new_gen; 112 int cur_inode_deleted; 113 u64 cur_inode_size; 114 u64 cur_inode_mode; 115 u64 cur_inode_rdev; 116 u64 cur_inode_last_extent; 117 118 u64 send_progress; 119 120 struct list_head new_refs; 121 struct list_head deleted_refs; 122 123 struct radix_tree_root name_cache; 124 struct list_head name_cache_list; 125 int name_cache_size; 126 127 struct file_ra_state ra; 128 129 char *read_buf; 130 131 /* 132 * We process inodes by their increasing order, so if before an 133 * incremental send we reverse the parent/child relationship of 134 * directories such that a directory with a lower inode number was 135 * the parent of a directory with a higher inode number, and the one 136 * becoming the new parent got renamed too, we can't rename/move the 137 * directory with lower inode number when we finish processing it - we 138 * must process the directory with higher inode number first, then 139 * rename/move it and then rename/move the directory with lower inode 140 * number. Example follows. 141 * 142 * Tree state when the first send was performed: 143 * 144 * . 145 * |-- a (ino 257) 146 * |-- b (ino 258) 147 * | 148 * | 149 * |-- c (ino 259) 150 * | |-- d (ino 260) 151 * | 152 * |-- c2 (ino 261) 153 * 154 * Tree state when the second (incremental) send is performed: 155 * 156 * . 157 * |-- a (ino 257) 158 * |-- b (ino 258) 159 * |-- c2 (ino 261) 160 * |-- d2 (ino 260) 161 * |-- cc (ino 259) 162 * 163 * The sequence of steps that lead to the second state was: 164 * 165 * mv /a/b/c/d /a/b/c2/d2 166 * mv /a/b/c /a/b/c2/d2/cc 167 * 168 * "c" has lower inode number, but we can't move it (2nd mv operation) 169 * before we move "d", which has higher inode number. 170 * 171 * So we just memorize which move/rename operations must be performed 172 * later when their respective parent is processed and moved/renamed. 173 */ 174 175 /* Indexed by parent directory inode number. */ 176 struct rb_root pending_dir_moves; 177 178 /* 179 * Reverse index, indexed by the inode number of a directory that 180 * is waiting for the move/rename of its immediate parent before its 181 * own move/rename can be performed. 182 */ 183 struct rb_root waiting_dir_moves; 184 185 /* 186 * A directory that is going to be rm'ed might have a child directory 187 * which is in the pending directory moves index above. In this case, 188 * the directory can only be removed after the move/rename of its child 189 * is performed. Example: 190 * 191 * Parent snapshot: 192 * 193 * . (ino 256) 194 * |-- a/ (ino 257) 195 * |-- b/ (ino 258) 196 * |-- c/ (ino 259) 197 * | |-- x/ (ino 260) 198 * | 199 * |-- y/ (ino 261) 200 * 201 * Send snapshot: 202 * 203 * . (ino 256) 204 * |-- a/ (ino 257) 205 * |-- b/ (ino 258) 206 * |-- YY/ (ino 261) 207 * |-- x/ (ino 260) 208 * 209 * Sequence of steps that lead to the send snapshot: 210 * rm -f /a/b/c/foo.txt 211 * mv /a/b/y /a/b/YY 212 * mv /a/b/c/x /a/b/YY 213 * rmdir /a/b/c 214 * 215 * When the child is processed, its move/rename is delayed until its 216 * parent is processed (as explained above), but all other operations 217 * like update utimes, chown, chgrp, etc, are performed and the paths 218 * that it uses for those operations must use the orphanized name of 219 * its parent (the directory we're going to rm later), so we need to 220 * memorize that name. 221 * 222 * Indexed by the inode number of the directory to be deleted. 223 */ 224 struct rb_root orphan_dirs; 225 }; 226 227 struct pending_dir_move { 228 struct rb_node node; 229 struct list_head list; 230 u64 parent_ino; 231 u64 ino; 232 u64 gen; 233 bool is_orphan; 234 struct list_head update_refs; 235 }; 236 237 struct waiting_dir_move { 238 struct rb_node node; 239 u64 ino; 240 /* 241 * There might be some directory that could not be removed because it 242 * was waiting for this directory inode to be moved first. Therefore 243 * after this directory is moved, we can try to rmdir the ino rmdir_ino. 244 */ 245 u64 rmdir_ino; 246 bool orphanized; 247 }; 248 249 struct orphan_dir_info { 250 struct rb_node node; 251 u64 ino; 252 u64 gen; 253 }; 254 255 struct name_cache_entry { 256 struct list_head list; 257 /* 258 * radix_tree has only 32bit entries but we need to handle 64bit inums. 259 * We use the lower 32bit of the 64bit inum to store it in the tree. If 260 * more then one inum would fall into the same entry, we use radix_list 261 * to store the additional entries. radix_list is also used to store 262 * entries where two entries have the same inum but different 263 * generations. 264 */ 265 struct list_head radix_list; 266 u64 ino; 267 u64 gen; 268 u64 parent_ino; 269 u64 parent_gen; 270 int ret; 271 int need_later_update; 272 int name_len; 273 char name[]; 274 }; 275 276 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); 277 278 static struct waiting_dir_move * 279 get_waiting_dir_move(struct send_ctx *sctx, u64 ino); 280 281 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); 282 283 static int need_send_hole(struct send_ctx *sctx) 284 { 285 return (sctx->parent_root && !sctx->cur_inode_new && 286 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted && 287 S_ISREG(sctx->cur_inode_mode)); 288 } 289 290 static void fs_path_reset(struct fs_path *p) 291 { 292 if (p->reversed) { 293 p->start = p->buf + p->buf_len - 1; 294 p->end = p->start; 295 *p->start = 0; 296 } else { 297 p->start = p->buf; 298 p->end = p->start; 299 *p->start = 0; 300 } 301 } 302 303 static struct fs_path *fs_path_alloc(void) 304 { 305 struct fs_path *p; 306 307 p = kmalloc(sizeof(*p), GFP_NOFS); 308 if (!p) 309 return NULL; 310 p->reversed = 0; 311 p->buf = p->inline_buf; 312 p->buf_len = FS_PATH_INLINE_SIZE; 313 fs_path_reset(p); 314 return p; 315 } 316 317 static struct fs_path *fs_path_alloc_reversed(void) 318 { 319 struct fs_path *p; 320 321 p = fs_path_alloc(); 322 if (!p) 323 return NULL; 324 p->reversed = 1; 325 fs_path_reset(p); 326 return p; 327 } 328 329 static void fs_path_free(struct fs_path *p) 330 { 331 if (!p) 332 return; 333 if (p->buf != p->inline_buf) 334 kfree(p->buf); 335 kfree(p); 336 } 337 338 static int fs_path_len(struct fs_path *p) 339 { 340 return p->end - p->start; 341 } 342 343 static int fs_path_ensure_buf(struct fs_path *p, int len) 344 { 345 char *tmp_buf; 346 int path_len; 347 int old_buf_len; 348 349 len++; 350 351 if (p->buf_len >= len) 352 return 0; 353 354 if (len > PATH_MAX) { 355 WARN_ON(1); 356 return -ENOMEM; 357 } 358 359 path_len = p->end - p->start; 360 old_buf_len = p->buf_len; 361 362 /* 363 * First time the inline_buf does not suffice 364 */ 365 if (p->buf == p->inline_buf) { 366 tmp_buf = kmalloc(len, GFP_NOFS); 367 if (tmp_buf) 368 memcpy(tmp_buf, p->buf, old_buf_len); 369 } else { 370 tmp_buf = krealloc(p->buf, len, GFP_NOFS); 371 } 372 if (!tmp_buf) 373 return -ENOMEM; 374 p->buf = tmp_buf; 375 /* 376 * The real size of the buffer is bigger, this will let the fast path 377 * happen most of the time 378 */ 379 p->buf_len = ksize(p->buf); 380 381 if (p->reversed) { 382 tmp_buf = p->buf + old_buf_len - path_len - 1; 383 p->end = p->buf + p->buf_len - 1; 384 p->start = p->end - path_len; 385 memmove(p->start, tmp_buf, path_len + 1); 386 } else { 387 p->start = p->buf; 388 p->end = p->start + path_len; 389 } 390 return 0; 391 } 392 393 static int fs_path_prepare_for_add(struct fs_path *p, int name_len, 394 char **prepared) 395 { 396 int ret; 397 int new_len; 398 399 new_len = p->end - p->start + name_len; 400 if (p->start != p->end) 401 new_len++; 402 ret = fs_path_ensure_buf(p, new_len); 403 if (ret < 0) 404 goto out; 405 406 if (p->reversed) { 407 if (p->start != p->end) 408 *--p->start = '/'; 409 p->start -= name_len; 410 *prepared = p->start; 411 } else { 412 if (p->start != p->end) 413 *p->end++ = '/'; 414 *prepared = p->end; 415 p->end += name_len; 416 *p->end = 0; 417 } 418 419 out: 420 return ret; 421 } 422 423 static int fs_path_add(struct fs_path *p, const char *name, int name_len) 424 { 425 int ret; 426 char *prepared; 427 428 ret = fs_path_prepare_for_add(p, name_len, &prepared); 429 if (ret < 0) 430 goto out; 431 memcpy(prepared, name, name_len); 432 433 out: 434 return ret; 435 } 436 437 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) 438 { 439 int ret; 440 char *prepared; 441 442 ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); 443 if (ret < 0) 444 goto out; 445 memcpy(prepared, p2->start, p2->end - p2->start); 446 447 out: 448 return ret; 449 } 450 451 static int fs_path_add_from_extent_buffer(struct fs_path *p, 452 struct extent_buffer *eb, 453 unsigned long off, int len) 454 { 455 int ret; 456 char *prepared; 457 458 ret = fs_path_prepare_for_add(p, len, &prepared); 459 if (ret < 0) 460 goto out; 461 462 read_extent_buffer(eb, prepared, off, len); 463 464 out: 465 return ret; 466 } 467 468 static int fs_path_copy(struct fs_path *p, struct fs_path *from) 469 { 470 int ret; 471 472 p->reversed = from->reversed; 473 fs_path_reset(p); 474 475 ret = fs_path_add_path(p, from); 476 477 return ret; 478 } 479 480 481 static void fs_path_unreverse(struct fs_path *p) 482 { 483 char *tmp; 484 int len; 485 486 if (!p->reversed) 487 return; 488 489 tmp = p->start; 490 len = p->end - p->start; 491 p->start = p->buf; 492 p->end = p->start + len; 493 memmove(p->start, tmp, len + 1); 494 p->reversed = 0; 495 } 496 497 static struct btrfs_path *alloc_path_for_send(void) 498 { 499 struct btrfs_path *path; 500 501 path = btrfs_alloc_path(); 502 if (!path) 503 return NULL; 504 path->search_commit_root = 1; 505 path->skip_locking = 1; 506 path->need_commit_sem = 1; 507 return path; 508 } 509 510 static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) 511 { 512 int ret; 513 mm_segment_t old_fs; 514 u32 pos = 0; 515 516 old_fs = get_fs(); 517 set_fs(KERNEL_DS); 518 519 while (pos < len) { 520 ret = vfs_write(filp, (__force const char __user *)buf + pos, 521 len - pos, off); 522 /* TODO handle that correctly */ 523 /*if (ret == -ERESTARTSYS) { 524 continue; 525 }*/ 526 if (ret < 0) 527 goto out; 528 if (ret == 0) { 529 ret = -EIO; 530 goto out; 531 } 532 pos += ret; 533 } 534 535 ret = 0; 536 537 out: 538 set_fs(old_fs); 539 return ret; 540 } 541 542 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) 543 { 544 struct btrfs_tlv_header *hdr; 545 int total_len = sizeof(*hdr) + len; 546 int left = sctx->send_max_size - sctx->send_size; 547 548 if (unlikely(left < total_len)) 549 return -EOVERFLOW; 550 551 hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); 552 hdr->tlv_type = cpu_to_le16(attr); 553 hdr->tlv_len = cpu_to_le16(len); 554 memcpy(hdr + 1, data, len); 555 sctx->send_size += total_len; 556 557 return 0; 558 } 559 560 #define TLV_PUT_DEFINE_INT(bits) \ 561 static int tlv_put_u##bits(struct send_ctx *sctx, \ 562 u##bits attr, u##bits value) \ 563 { \ 564 __le##bits __tmp = cpu_to_le##bits(value); \ 565 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \ 566 } 567 568 TLV_PUT_DEFINE_INT(64) 569 570 static int tlv_put_string(struct send_ctx *sctx, u16 attr, 571 const char *str, int len) 572 { 573 if (len == -1) 574 len = strlen(str); 575 return tlv_put(sctx, attr, str, len); 576 } 577 578 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, 579 const u8 *uuid) 580 { 581 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 582 } 583 584 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 585 struct extent_buffer *eb, 586 struct btrfs_timespec *ts) 587 { 588 struct btrfs_timespec bts; 589 read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); 590 return tlv_put(sctx, attr, &bts, sizeof(bts)); 591 } 592 593 594 #define TLV_PUT(sctx, attrtype, attrlen, data) \ 595 do { \ 596 ret = tlv_put(sctx, attrtype, attrlen, data); \ 597 if (ret < 0) \ 598 goto tlv_put_failure; \ 599 } while (0) 600 601 #define TLV_PUT_INT(sctx, attrtype, bits, value) \ 602 do { \ 603 ret = tlv_put_u##bits(sctx, attrtype, value); \ 604 if (ret < 0) \ 605 goto tlv_put_failure; \ 606 } while (0) 607 608 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) 609 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) 610 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) 611 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) 612 #define TLV_PUT_STRING(sctx, attrtype, str, len) \ 613 do { \ 614 ret = tlv_put_string(sctx, attrtype, str, len); \ 615 if (ret < 0) \ 616 goto tlv_put_failure; \ 617 } while (0) 618 #define TLV_PUT_PATH(sctx, attrtype, p) \ 619 do { \ 620 ret = tlv_put_string(sctx, attrtype, p->start, \ 621 p->end - p->start); \ 622 if (ret < 0) \ 623 goto tlv_put_failure; \ 624 } while(0) 625 #define TLV_PUT_UUID(sctx, attrtype, uuid) \ 626 do { \ 627 ret = tlv_put_uuid(sctx, attrtype, uuid); \ 628 if (ret < 0) \ 629 goto tlv_put_failure; \ 630 } while (0) 631 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 632 do { \ 633 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 634 if (ret < 0) \ 635 goto tlv_put_failure; \ 636 } while (0) 637 638 static int send_header(struct send_ctx *sctx) 639 { 640 struct btrfs_stream_header hdr; 641 642 strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); 643 hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); 644 645 return write_buf(sctx->send_filp, &hdr, sizeof(hdr), 646 &sctx->send_off); 647 } 648 649 /* 650 * For each command/item we want to send to userspace, we call this function. 651 */ 652 static int begin_cmd(struct send_ctx *sctx, int cmd) 653 { 654 struct btrfs_cmd_header *hdr; 655 656 if (WARN_ON(!sctx->send_buf)) 657 return -EINVAL; 658 659 BUG_ON(sctx->send_size); 660 661 sctx->send_size += sizeof(*hdr); 662 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 663 hdr->cmd = cpu_to_le16(cmd); 664 665 return 0; 666 } 667 668 static int send_cmd(struct send_ctx *sctx) 669 { 670 int ret; 671 struct btrfs_cmd_header *hdr; 672 u32 crc; 673 674 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 675 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); 676 hdr->crc = 0; 677 678 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); 679 hdr->crc = cpu_to_le32(crc); 680 681 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, 682 &sctx->send_off); 683 684 sctx->total_send_size += sctx->send_size; 685 sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; 686 sctx->send_size = 0; 687 688 return ret; 689 } 690 691 /* 692 * Sends a move instruction to user space 693 */ 694 static int send_rename(struct send_ctx *sctx, 695 struct fs_path *from, struct fs_path *to) 696 { 697 int ret; 698 699 verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); 700 701 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); 702 if (ret < 0) 703 goto out; 704 705 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); 706 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); 707 708 ret = send_cmd(sctx); 709 710 tlv_put_failure: 711 out: 712 return ret; 713 } 714 715 /* 716 * Sends a link instruction to user space 717 */ 718 static int send_link(struct send_ctx *sctx, 719 struct fs_path *path, struct fs_path *lnk) 720 { 721 int ret; 722 723 verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); 724 725 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); 726 if (ret < 0) 727 goto out; 728 729 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 730 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); 731 732 ret = send_cmd(sctx); 733 734 tlv_put_failure: 735 out: 736 return ret; 737 } 738 739 /* 740 * Sends an unlink instruction to user space 741 */ 742 static int send_unlink(struct send_ctx *sctx, struct fs_path *path) 743 { 744 int ret; 745 746 verbose_printk("btrfs: send_unlink %s\n", path->start); 747 748 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); 749 if (ret < 0) 750 goto out; 751 752 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 753 754 ret = send_cmd(sctx); 755 756 tlv_put_failure: 757 out: 758 return ret; 759 } 760 761 /* 762 * Sends a rmdir instruction to user space 763 */ 764 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) 765 { 766 int ret; 767 768 verbose_printk("btrfs: send_rmdir %s\n", path->start); 769 770 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); 771 if (ret < 0) 772 goto out; 773 774 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 775 776 ret = send_cmd(sctx); 777 778 tlv_put_failure: 779 out: 780 return ret; 781 } 782 783 /* 784 * Helper function to retrieve some fields from an inode item. 785 */ 786 static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path, 787 u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid, 788 u64 *gid, u64 *rdev) 789 { 790 int ret; 791 struct btrfs_inode_item *ii; 792 struct btrfs_key key; 793 794 key.objectid = ino; 795 key.type = BTRFS_INODE_ITEM_KEY; 796 key.offset = 0; 797 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 798 if (ret) { 799 if (ret > 0) 800 ret = -ENOENT; 801 return ret; 802 } 803 804 ii = btrfs_item_ptr(path->nodes[0], path->slots[0], 805 struct btrfs_inode_item); 806 if (size) 807 *size = btrfs_inode_size(path->nodes[0], ii); 808 if (gen) 809 *gen = btrfs_inode_generation(path->nodes[0], ii); 810 if (mode) 811 *mode = btrfs_inode_mode(path->nodes[0], ii); 812 if (uid) 813 *uid = btrfs_inode_uid(path->nodes[0], ii); 814 if (gid) 815 *gid = btrfs_inode_gid(path->nodes[0], ii); 816 if (rdev) 817 *rdev = btrfs_inode_rdev(path->nodes[0], ii); 818 819 return ret; 820 } 821 822 static int get_inode_info(struct btrfs_root *root, 823 u64 ino, u64 *size, u64 *gen, 824 u64 *mode, u64 *uid, u64 *gid, 825 u64 *rdev) 826 { 827 struct btrfs_path *path; 828 int ret; 829 830 path = alloc_path_for_send(); 831 if (!path) 832 return -ENOMEM; 833 ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid, 834 rdev); 835 btrfs_free_path(path); 836 return ret; 837 } 838 839 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, 840 struct fs_path *p, 841 void *ctx); 842 843 /* 844 * Helper function to iterate the entries in ONE btrfs_inode_ref or 845 * btrfs_inode_extref. 846 * The iterate callback may return a non zero value to stop iteration. This can 847 * be a negative value for error codes or 1 to simply stop it. 848 * 849 * path must point to the INODE_REF or INODE_EXTREF when called. 850 */ 851 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, 852 struct btrfs_key *found_key, int resolve, 853 iterate_inode_ref_t iterate, void *ctx) 854 { 855 struct extent_buffer *eb = path->nodes[0]; 856 struct btrfs_item *item; 857 struct btrfs_inode_ref *iref; 858 struct btrfs_inode_extref *extref; 859 struct btrfs_path *tmp_path; 860 struct fs_path *p; 861 u32 cur = 0; 862 u32 total; 863 int slot = path->slots[0]; 864 u32 name_len; 865 char *start; 866 int ret = 0; 867 int num = 0; 868 int index; 869 u64 dir; 870 unsigned long name_off; 871 unsigned long elem_size; 872 unsigned long ptr; 873 874 p = fs_path_alloc_reversed(); 875 if (!p) 876 return -ENOMEM; 877 878 tmp_path = alloc_path_for_send(); 879 if (!tmp_path) { 880 fs_path_free(p); 881 return -ENOMEM; 882 } 883 884 885 if (found_key->type == BTRFS_INODE_REF_KEY) { 886 ptr = (unsigned long)btrfs_item_ptr(eb, slot, 887 struct btrfs_inode_ref); 888 item = btrfs_item_nr(slot); 889 total = btrfs_item_size(eb, item); 890 elem_size = sizeof(*iref); 891 } else { 892 ptr = btrfs_item_ptr_offset(eb, slot); 893 total = btrfs_item_size_nr(eb, slot); 894 elem_size = sizeof(*extref); 895 } 896 897 while (cur < total) { 898 fs_path_reset(p); 899 900 if (found_key->type == BTRFS_INODE_REF_KEY) { 901 iref = (struct btrfs_inode_ref *)(ptr + cur); 902 name_len = btrfs_inode_ref_name_len(eb, iref); 903 name_off = (unsigned long)(iref + 1); 904 index = btrfs_inode_ref_index(eb, iref); 905 dir = found_key->offset; 906 } else { 907 extref = (struct btrfs_inode_extref *)(ptr + cur); 908 name_len = btrfs_inode_extref_name_len(eb, extref); 909 name_off = (unsigned long)&extref->name; 910 index = btrfs_inode_extref_index(eb, extref); 911 dir = btrfs_inode_extref_parent(eb, extref); 912 } 913 914 if (resolve) { 915 start = btrfs_ref_to_path(root, tmp_path, name_len, 916 name_off, eb, dir, 917 p->buf, p->buf_len); 918 if (IS_ERR(start)) { 919 ret = PTR_ERR(start); 920 goto out; 921 } 922 if (start < p->buf) { 923 /* overflow , try again with larger buffer */ 924 ret = fs_path_ensure_buf(p, 925 p->buf_len + p->buf - start); 926 if (ret < 0) 927 goto out; 928 start = btrfs_ref_to_path(root, tmp_path, 929 name_len, name_off, 930 eb, dir, 931 p->buf, p->buf_len); 932 if (IS_ERR(start)) { 933 ret = PTR_ERR(start); 934 goto out; 935 } 936 BUG_ON(start < p->buf); 937 } 938 p->start = start; 939 } else { 940 ret = fs_path_add_from_extent_buffer(p, eb, name_off, 941 name_len); 942 if (ret < 0) 943 goto out; 944 } 945 946 cur += elem_size + name_len; 947 ret = iterate(num, dir, index, p, ctx); 948 if (ret) 949 goto out; 950 num++; 951 } 952 953 out: 954 btrfs_free_path(tmp_path); 955 fs_path_free(p); 956 return ret; 957 } 958 959 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, 960 const char *name, int name_len, 961 const char *data, int data_len, 962 u8 type, void *ctx); 963 964 /* 965 * Helper function to iterate the entries in ONE btrfs_dir_item. 966 * The iterate callback may return a non zero value to stop iteration. This can 967 * be a negative value for error codes or 1 to simply stop it. 968 * 969 * path must point to the dir item when called. 970 */ 971 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, 972 struct btrfs_key *found_key, 973 iterate_dir_item_t iterate, void *ctx) 974 { 975 int ret = 0; 976 struct extent_buffer *eb; 977 struct btrfs_item *item; 978 struct btrfs_dir_item *di; 979 struct btrfs_key di_key; 980 char *buf = NULL; 981 int buf_len; 982 u32 name_len; 983 u32 data_len; 984 u32 cur; 985 u32 len; 986 u32 total; 987 int slot; 988 int num; 989 u8 type; 990 991 /* 992 * Start with a small buffer (1 page). If later we end up needing more 993 * space, which can happen for xattrs on a fs with a leaf size greater 994 * then the page size, attempt to increase the buffer. Typically xattr 995 * values are small. 996 */ 997 buf_len = PATH_MAX; 998 buf = kmalloc(buf_len, GFP_NOFS); 999 if (!buf) { 1000 ret = -ENOMEM; 1001 goto out; 1002 } 1003 1004 eb = path->nodes[0]; 1005 slot = path->slots[0]; 1006 item = btrfs_item_nr(slot); 1007 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 1008 cur = 0; 1009 len = 0; 1010 total = btrfs_item_size(eb, item); 1011 1012 num = 0; 1013 while (cur < total) { 1014 name_len = btrfs_dir_name_len(eb, di); 1015 data_len = btrfs_dir_data_len(eb, di); 1016 type = btrfs_dir_type(eb, di); 1017 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 1018 1019 if (type == BTRFS_FT_XATTR) { 1020 if (name_len > XATTR_NAME_MAX) { 1021 ret = -ENAMETOOLONG; 1022 goto out; 1023 } 1024 if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) { 1025 ret = -E2BIG; 1026 goto out; 1027 } 1028 } else { 1029 /* 1030 * Path too long 1031 */ 1032 if (name_len + data_len > PATH_MAX) { 1033 ret = -ENAMETOOLONG; 1034 goto out; 1035 } 1036 } 1037 1038 if (name_len + data_len > buf_len) { 1039 buf_len = name_len + data_len; 1040 if (is_vmalloc_addr(buf)) { 1041 vfree(buf); 1042 buf = NULL; 1043 } else { 1044 char *tmp = krealloc(buf, buf_len, 1045 GFP_NOFS | __GFP_NOWARN); 1046 1047 if (!tmp) 1048 kfree(buf); 1049 buf = tmp; 1050 } 1051 if (!buf) { 1052 buf = vmalloc(buf_len); 1053 if (!buf) { 1054 ret = -ENOMEM; 1055 goto out; 1056 } 1057 } 1058 } 1059 1060 read_extent_buffer(eb, buf, (unsigned long)(di + 1), 1061 name_len + data_len); 1062 1063 len = sizeof(*di) + name_len + data_len; 1064 di = (struct btrfs_dir_item *)((char *)di + len); 1065 cur += len; 1066 1067 ret = iterate(num, &di_key, buf, name_len, buf + name_len, 1068 data_len, type, ctx); 1069 if (ret < 0) 1070 goto out; 1071 if (ret) { 1072 ret = 0; 1073 goto out; 1074 } 1075 1076 num++; 1077 } 1078 1079 out: 1080 kvfree(buf); 1081 return ret; 1082 } 1083 1084 static int __copy_first_ref(int num, u64 dir, int index, 1085 struct fs_path *p, void *ctx) 1086 { 1087 int ret; 1088 struct fs_path *pt = ctx; 1089 1090 ret = fs_path_copy(pt, p); 1091 if (ret < 0) 1092 return ret; 1093 1094 /* we want the first only */ 1095 return 1; 1096 } 1097 1098 /* 1099 * Retrieve the first path of an inode. If an inode has more then one 1100 * ref/hardlink, this is ignored. 1101 */ 1102 static int get_inode_path(struct btrfs_root *root, 1103 u64 ino, struct fs_path *path) 1104 { 1105 int ret; 1106 struct btrfs_key key, found_key; 1107 struct btrfs_path *p; 1108 1109 p = alloc_path_for_send(); 1110 if (!p) 1111 return -ENOMEM; 1112 1113 fs_path_reset(path); 1114 1115 key.objectid = ino; 1116 key.type = BTRFS_INODE_REF_KEY; 1117 key.offset = 0; 1118 1119 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); 1120 if (ret < 0) 1121 goto out; 1122 if (ret) { 1123 ret = 1; 1124 goto out; 1125 } 1126 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); 1127 if (found_key.objectid != ino || 1128 (found_key.type != BTRFS_INODE_REF_KEY && 1129 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1130 ret = -ENOENT; 1131 goto out; 1132 } 1133 1134 ret = iterate_inode_ref(root, p, &found_key, 1, 1135 __copy_first_ref, path); 1136 if (ret < 0) 1137 goto out; 1138 ret = 0; 1139 1140 out: 1141 btrfs_free_path(p); 1142 return ret; 1143 } 1144 1145 struct backref_ctx { 1146 struct send_ctx *sctx; 1147 1148 struct btrfs_path *path; 1149 /* number of total found references */ 1150 u64 found; 1151 1152 /* 1153 * used for clones found in send_root. clones found behind cur_objectid 1154 * and cur_offset are not considered as allowed clones. 1155 */ 1156 u64 cur_objectid; 1157 u64 cur_offset; 1158 1159 /* may be truncated in case it's the last extent in a file */ 1160 u64 extent_len; 1161 1162 /* data offset in the file extent item */ 1163 u64 data_offset; 1164 1165 /* Just to check for bugs in backref resolving */ 1166 int found_itself; 1167 }; 1168 1169 static int __clone_root_cmp_bsearch(const void *key, const void *elt) 1170 { 1171 u64 root = (u64)(uintptr_t)key; 1172 struct clone_root *cr = (struct clone_root *)elt; 1173 1174 if (root < cr->root->objectid) 1175 return -1; 1176 if (root > cr->root->objectid) 1177 return 1; 1178 return 0; 1179 } 1180 1181 static int __clone_root_cmp_sort(const void *e1, const void *e2) 1182 { 1183 struct clone_root *cr1 = (struct clone_root *)e1; 1184 struct clone_root *cr2 = (struct clone_root *)e2; 1185 1186 if (cr1->root->objectid < cr2->root->objectid) 1187 return -1; 1188 if (cr1->root->objectid > cr2->root->objectid) 1189 return 1; 1190 return 0; 1191 } 1192 1193 /* 1194 * Called for every backref that is found for the current extent. 1195 * Results are collected in sctx->clone_roots->ino/offset/found_refs 1196 */ 1197 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) 1198 { 1199 struct backref_ctx *bctx = ctx_; 1200 struct clone_root *found; 1201 int ret; 1202 u64 i_size; 1203 1204 /* First check if the root is in the list of accepted clone sources */ 1205 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, 1206 bctx->sctx->clone_roots_cnt, 1207 sizeof(struct clone_root), 1208 __clone_root_cmp_bsearch); 1209 if (!found) 1210 return 0; 1211 1212 if (found->root == bctx->sctx->send_root && 1213 ino == bctx->cur_objectid && 1214 offset == bctx->cur_offset) { 1215 bctx->found_itself = 1; 1216 } 1217 1218 /* 1219 * There are inodes that have extents that lie behind its i_size. Don't 1220 * accept clones from these extents. 1221 */ 1222 ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, 1223 NULL, NULL, NULL); 1224 btrfs_release_path(bctx->path); 1225 if (ret < 0) 1226 return ret; 1227 1228 if (offset + bctx->data_offset + bctx->extent_len > i_size) 1229 return 0; 1230 1231 /* 1232 * Make sure we don't consider clones from send_root that are 1233 * behind the current inode/offset. 1234 */ 1235 if (found->root == bctx->sctx->send_root) { 1236 /* 1237 * TODO for the moment we don't accept clones from the inode 1238 * that is currently send. We may change this when 1239 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same 1240 * file. 1241 */ 1242 if (ino >= bctx->cur_objectid) 1243 return 0; 1244 #if 0 1245 if (ino > bctx->cur_objectid) 1246 return 0; 1247 if (offset + bctx->extent_len > bctx->cur_offset) 1248 return 0; 1249 #endif 1250 } 1251 1252 bctx->found++; 1253 found->found_refs++; 1254 if (ino < found->ino) { 1255 found->ino = ino; 1256 found->offset = offset; 1257 } else if (found->ino == ino) { 1258 /* 1259 * same extent found more then once in the same file. 1260 */ 1261 if (found->offset > offset + bctx->extent_len) 1262 found->offset = offset; 1263 } 1264 1265 return 0; 1266 } 1267 1268 /* 1269 * Given an inode, offset and extent item, it finds a good clone for a clone 1270 * instruction. Returns -ENOENT when none could be found. The function makes 1271 * sure that the returned clone is usable at the point where sending is at the 1272 * moment. This means, that no clones are accepted which lie behind the current 1273 * inode+offset. 1274 * 1275 * path must point to the extent item when called. 1276 */ 1277 static int find_extent_clone(struct send_ctx *sctx, 1278 struct btrfs_path *path, 1279 u64 ino, u64 data_offset, 1280 u64 ino_size, 1281 struct clone_root **found) 1282 { 1283 int ret; 1284 int extent_type; 1285 u64 logical; 1286 u64 disk_byte; 1287 u64 num_bytes; 1288 u64 extent_item_pos; 1289 u64 flags = 0; 1290 struct btrfs_file_extent_item *fi; 1291 struct extent_buffer *eb = path->nodes[0]; 1292 struct backref_ctx *backref_ctx = NULL; 1293 struct clone_root *cur_clone_root; 1294 struct btrfs_key found_key; 1295 struct btrfs_path *tmp_path; 1296 int compressed; 1297 u32 i; 1298 1299 tmp_path = alloc_path_for_send(); 1300 if (!tmp_path) 1301 return -ENOMEM; 1302 1303 /* We only use this path under the commit sem */ 1304 tmp_path->need_commit_sem = 0; 1305 1306 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); 1307 if (!backref_ctx) { 1308 ret = -ENOMEM; 1309 goto out; 1310 } 1311 1312 backref_ctx->path = tmp_path; 1313 1314 if (data_offset >= ino_size) { 1315 /* 1316 * There may be extents that lie behind the file's size. 1317 * I at least had this in combination with snapshotting while 1318 * writing large files. 1319 */ 1320 ret = 0; 1321 goto out; 1322 } 1323 1324 fi = btrfs_item_ptr(eb, path->slots[0], 1325 struct btrfs_file_extent_item); 1326 extent_type = btrfs_file_extent_type(eb, fi); 1327 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1328 ret = -ENOENT; 1329 goto out; 1330 } 1331 compressed = btrfs_file_extent_compression(eb, fi); 1332 1333 num_bytes = btrfs_file_extent_num_bytes(eb, fi); 1334 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 1335 if (disk_byte == 0) { 1336 ret = -ENOENT; 1337 goto out; 1338 } 1339 logical = disk_byte + btrfs_file_extent_offset(eb, fi); 1340 1341 down_read(&sctx->send_root->fs_info->commit_root_sem); 1342 ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, 1343 &found_key, &flags); 1344 up_read(&sctx->send_root->fs_info->commit_root_sem); 1345 btrfs_release_path(tmp_path); 1346 1347 if (ret < 0) 1348 goto out; 1349 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1350 ret = -EIO; 1351 goto out; 1352 } 1353 1354 /* 1355 * Setup the clone roots. 1356 */ 1357 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1358 cur_clone_root = sctx->clone_roots + i; 1359 cur_clone_root->ino = (u64)-1; 1360 cur_clone_root->offset = 0; 1361 cur_clone_root->found_refs = 0; 1362 } 1363 1364 backref_ctx->sctx = sctx; 1365 backref_ctx->found = 0; 1366 backref_ctx->cur_objectid = ino; 1367 backref_ctx->cur_offset = data_offset; 1368 backref_ctx->found_itself = 0; 1369 backref_ctx->extent_len = num_bytes; 1370 /* 1371 * For non-compressed extents iterate_extent_inodes() gives us extent 1372 * offsets that already take into account the data offset, but not for 1373 * compressed extents, since the offset is logical and not relative to 1374 * the physical extent locations. We must take this into account to 1375 * avoid sending clone offsets that go beyond the source file's size, 1376 * which would result in the clone ioctl failing with -EINVAL on the 1377 * receiving end. 1378 */ 1379 if (compressed == BTRFS_COMPRESS_NONE) 1380 backref_ctx->data_offset = 0; 1381 else 1382 backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi); 1383 1384 /* 1385 * The last extent of a file may be too large due to page alignment. 1386 * We need to adjust extent_len in this case so that the checks in 1387 * __iterate_backrefs work. 1388 */ 1389 if (data_offset + num_bytes >= ino_size) 1390 backref_ctx->extent_len = ino_size - data_offset; 1391 1392 /* 1393 * Now collect all backrefs. 1394 */ 1395 if (compressed == BTRFS_COMPRESS_NONE) 1396 extent_item_pos = logical - found_key.objectid; 1397 else 1398 extent_item_pos = 0; 1399 ret = iterate_extent_inodes(sctx->send_root->fs_info, 1400 found_key.objectid, extent_item_pos, 1, 1401 __iterate_backrefs, backref_ctx); 1402 1403 if (ret < 0) 1404 goto out; 1405 1406 if (!backref_ctx->found_itself) { 1407 /* found a bug in backref code? */ 1408 ret = -EIO; 1409 btrfs_err(sctx->send_root->fs_info, "did not find backref in " 1410 "send_root. inode=%llu, offset=%llu, " 1411 "disk_byte=%llu found extent=%llu", 1412 ino, data_offset, disk_byte, found_key.objectid); 1413 goto out; 1414 } 1415 1416 verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " 1417 "ino=%llu, " 1418 "num_bytes=%llu, logical=%llu\n", 1419 data_offset, ino, num_bytes, logical); 1420 1421 if (!backref_ctx->found) 1422 verbose_printk("btrfs: no clones found\n"); 1423 1424 cur_clone_root = NULL; 1425 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1426 if (sctx->clone_roots[i].found_refs) { 1427 if (!cur_clone_root) 1428 cur_clone_root = sctx->clone_roots + i; 1429 else if (sctx->clone_roots[i].root == sctx->send_root) 1430 /* prefer clones from send_root over others */ 1431 cur_clone_root = sctx->clone_roots + i; 1432 } 1433 1434 } 1435 1436 if (cur_clone_root) { 1437 if (compressed != BTRFS_COMPRESS_NONE) { 1438 /* 1439 * Offsets given by iterate_extent_inodes() are relative 1440 * to the start of the extent, we need to add logical 1441 * offset from the file extent item. 1442 * (See why at backref.c:check_extent_in_eb()) 1443 */ 1444 cur_clone_root->offset += btrfs_file_extent_offset(eb, 1445 fi); 1446 } 1447 *found = cur_clone_root; 1448 ret = 0; 1449 } else { 1450 ret = -ENOENT; 1451 } 1452 1453 out: 1454 btrfs_free_path(tmp_path); 1455 kfree(backref_ctx); 1456 return ret; 1457 } 1458 1459 static int read_symlink(struct btrfs_root *root, 1460 u64 ino, 1461 struct fs_path *dest) 1462 { 1463 int ret; 1464 struct btrfs_path *path; 1465 struct btrfs_key key; 1466 struct btrfs_file_extent_item *ei; 1467 u8 type; 1468 u8 compression; 1469 unsigned long off; 1470 int len; 1471 1472 path = alloc_path_for_send(); 1473 if (!path) 1474 return -ENOMEM; 1475 1476 key.objectid = ino; 1477 key.type = BTRFS_EXTENT_DATA_KEY; 1478 key.offset = 0; 1479 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1480 if (ret < 0) 1481 goto out; 1482 BUG_ON(ret); 1483 1484 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 1485 struct btrfs_file_extent_item); 1486 type = btrfs_file_extent_type(path->nodes[0], ei); 1487 compression = btrfs_file_extent_compression(path->nodes[0], ei); 1488 BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); 1489 BUG_ON(compression); 1490 1491 off = btrfs_file_extent_inline_start(ei); 1492 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); 1493 1494 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1495 1496 out: 1497 btrfs_free_path(path); 1498 return ret; 1499 } 1500 1501 /* 1502 * Helper function to generate a file name that is unique in the root of 1503 * send_root and parent_root. This is used to generate names for orphan inodes. 1504 */ 1505 static int gen_unique_name(struct send_ctx *sctx, 1506 u64 ino, u64 gen, 1507 struct fs_path *dest) 1508 { 1509 int ret = 0; 1510 struct btrfs_path *path; 1511 struct btrfs_dir_item *di; 1512 char tmp[64]; 1513 int len; 1514 u64 idx = 0; 1515 1516 path = alloc_path_for_send(); 1517 if (!path) 1518 return -ENOMEM; 1519 1520 while (1) { 1521 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", 1522 ino, gen, idx); 1523 ASSERT(len < sizeof(tmp)); 1524 1525 di = btrfs_lookup_dir_item(NULL, sctx->send_root, 1526 path, BTRFS_FIRST_FREE_OBJECTID, 1527 tmp, strlen(tmp), 0); 1528 btrfs_release_path(path); 1529 if (IS_ERR(di)) { 1530 ret = PTR_ERR(di); 1531 goto out; 1532 } 1533 if (di) { 1534 /* not unique, try again */ 1535 idx++; 1536 continue; 1537 } 1538 1539 if (!sctx->parent_root) { 1540 /* unique */ 1541 ret = 0; 1542 break; 1543 } 1544 1545 di = btrfs_lookup_dir_item(NULL, sctx->parent_root, 1546 path, BTRFS_FIRST_FREE_OBJECTID, 1547 tmp, strlen(tmp), 0); 1548 btrfs_release_path(path); 1549 if (IS_ERR(di)) { 1550 ret = PTR_ERR(di); 1551 goto out; 1552 } 1553 if (di) { 1554 /* not unique, try again */ 1555 idx++; 1556 continue; 1557 } 1558 /* unique */ 1559 break; 1560 } 1561 1562 ret = fs_path_add(dest, tmp, strlen(tmp)); 1563 1564 out: 1565 btrfs_free_path(path); 1566 return ret; 1567 } 1568 1569 enum inode_state { 1570 inode_state_no_change, 1571 inode_state_will_create, 1572 inode_state_did_create, 1573 inode_state_will_delete, 1574 inode_state_did_delete, 1575 }; 1576 1577 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) 1578 { 1579 int ret; 1580 int left_ret; 1581 int right_ret; 1582 u64 left_gen; 1583 u64 right_gen; 1584 1585 ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, 1586 NULL, NULL); 1587 if (ret < 0 && ret != -ENOENT) 1588 goto out; 1589 left_ret = ret; 1590 1591 if (!sctx->parent_root) { 1592 right_ret = -ENOENT; 1593 } else { 1594 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, 1595 NULL, NULL, NULL, NULL); 1596 if (ret < 0 && ret != -ENOENT) 1597 goto out; 1598 right_ret = ret; 1599 } 1600 1601 if (!left_ret && !right_ret) { 1602 if (left_gen == gen && right_gen == gen) { 1603 ret = inode_state_no_change; 1604 } else if (left_gen == gen) { 1605 if (ino < sctx->send_progress) 1606 ret = inode_state_did_create; 1607 else 1608 ret = inode_state_will_create; 1609 } else if (right_gen == gen) { 1610 if (ino < sctx->send_progress) 1611 ret = inode_state_did_delete; 1612 else 1613 ret = inode_state_will_delete; 1614 } else { 1615 ret = -ENOENT; 1616 } 1617 } else if (!left_ret) { 1618 if (left_gen == gen) { 1619 if (ino < sctx->send_progress) 1620 ret = inode_state_did_create; 1621 else 1622 ret = inode_state_will_create; 1623 } else { 1624 ret = -ENOENT; 1625 } 1626 } else if (!right_ret) { 1627 if (right_gen == gen) { 1628 if (ino < sctx->send_progress) 1629 ret = inode_state_did_delete; 1630 else 1631 ret = inode_state_will_delete; 1632 } else { 1633 ret = -ENOENT; 1634 } 1635 } else { 1636 ret = -ENOENT; 1637 } 1638 1639 out: 1640 return ret; 1641 } 1642 1643 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) 1644 { 1645 int ret; 1646 1647 ret = get_cur_inode_state(sctx, ino, gen); 1648 if (ret < 0) 1649 goto out; 1650 1651 if (ret == inode_state_no_change || 1652 ret == inode_state_did_create || 1653 ret == inode_state_will_delete) 1654 ret = 1; 1655 else 1656 ret = 0; 1657 1658 out: 1659 return ret; 1660 } 1661 1662 /* 1663 * Helper function to lookup a dir item in a dir. 1664 */ 1665 static int lookup_dir_item_inode(struct btrfs_root *root, 1666 u64 dir, const char *name, int name_len, 1667 u64 *found_inode, 1668 u8 *found_type) 1669 { 1670 int ret = 0; 1671 struct btrfs_dir_item *di; 1672 struct btrfs_key key; 1673 struct btrfs_path *path; 1674 1675 path = alloc_path_for_send(); 1676 if (!path) 1677 return -ENOMEM; 1678 1679 di = btrfs_lookup_dir_item(NULL, root, path, 1680 dir, name, name_len, 0); 1681 if (!di) { 1682 ret = -ENOENT; 1683 goto out; 1684 } 1685 if (IS_ERR(di)) { 1686 ret = PTR_ERR(di); 1687 goto out; 1688 } 1689 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1690 if (key.type == BTRFS_ROOT_ITEM_KEY) { 1691 ret = -ENOENT; 1692 goto out; 1693 } 1694 *found_inode = key.objectid; 1695 *found_type = btrfs_dir_type(path->nodes[0], di); 1696 1697 out: 1698 btrfs_free_path(path); 1699 return ret; 1700 } 1701 1702 /* 1703 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, 1704 * generation of the parent dir and the name of the dir entry. 1705 */ 1706 static int get_first_ref(struct btrfs_root *root, u64 ino, 1707 u64 *dir, u64 *dir_gen, struct fs_path *name) 1708 { 1709 int ret; 1710 struct btrfs_key key; 1711 struct btrfs_key found_key; 1712 struct btrfs_path *path; 1713 int len; 1714 u64 parent_dir; 1715 1716 path = alloc_path_for_send(); 1717 if (!path) 1718 return -ENOMEM; 1719 1720 key.objectid = ino; 1721 key.type = BTRFS_INODE_REF_KEY; 1722 key.offset = 0; 1723 1724 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 1725 if (ret < 0) 1726 goto out; 1727 if (!ret) 1728 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1729 path->slots[0]); 1730 if (ret || found_key.objectid != ino || 1731 (found_key.type != BTRFS_INODE_REF_KEY && 1732 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1733 ret = -ENOENT; 1734 goto out; 1735 } 1736 1737 if (found_key.type == BTRFS_INODE_REF_KEY) { 1738 struct btrfs_inode_ref *iref; 1739 iref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1740 struct btrfs_inode_ref); 1741 len = btrfs_inode_ref_name_len(path->nodes[0], iref); 1742 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1743 (unsigned long)(iref + 1), 1744 len); 1745 parent_dir = found_key.offset; 1746 } else { 1747 struct btrfs_inode_extref *extref; 1748 extref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1749 struct btrfs_inode_extref); 1750 len = btrfs_inode_extref_name_len(path->nodes[0], extref); 1751 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1752 (unsigned long)&extref->name, len); 1753 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); 1754 } 1755 if (ret < 0) 1756 goto out; 1757 btrfs_release_path(path); 1758 1759 if (dir_gen) { 1760 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, 1761 NULL, NULL, NULL); 1762 if (ret < 0) 1763 goto out; 1764 } 1765 1766 *dir = parent_dir; 1767 1768 out: 1769 btrfs_free_path(path); 1770 return ret; 1771 } 1772 1773 static int is_first_ref(struct btrfs_root *root, 1774 u64 ino, u64 dir, 1775 const char *name, int name_len) 1776 { 1777 int ret; 1778 struct fs_path *tmp_name; 1779 u64 tmp_dir; 1780 1781 tmp_name = fs_path_alloc(); 1782 if (!tmp_name) 1783 return -ENOMEM; 1784 1785 ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name); 1786 if (ret < 0) 1787 goto out; 1788 1789 if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { 1790 ret = 0; 1791 goto out; 1792 } 1793 1794 ret = !memcmp(tmp_name->start, name, name_len); 1795 1796 out: 1797 fs_path_free(tmp_name); 1798 return ret; 1799 } 1800 1801 /* 1802 * Used by process_recorded_refs to determine if a new ref would overwrite an 1803 * already existing ref. In case it detects an overwrite, it returns the 1804 * inode/gen in who_ino/who_gen. 1805 * When an overwrite is detected, process_recorded_refs does proper orphanizing 1806 * to make sure later references to the overwritten inode are possible. 1807 * Orphanizing is however only required for the first ref of an inode. 1808 * process_recorded_refs does an additional is_first_ref check to see if 1809 * orphanizing is really required. 1810 */ 1811 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, 1812 const char *name, int name_len, 1813 u64 *who_ino, u64 *who_gen) 1814 { 1815 int ret = 0; 1816 u64 gen; 1817 u64 other_inode = 0; 1818 u8 other_type = 0; 1819 1820 if (!sctx->parent_root) 1821 goto out; 1822 1823 ret = is_inode_existent(sctx, dir, dir_gen); 1824 if (ret <= 0) 1825 goto out; 1826 1827 /* 1828 * If we have a parent root we need to verify that the parent dir was 1829 * not delted and then re-created, if it was then we have no overwrite 1830 * and we can just unlink this entry. 1831 */ 1832 if (sctx->parent_root) { 1833 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, 1834 NULL, NULL, NULL); 1835 if (ret < 0 && ret != -ENOENT) 1836 goto out; 1837 if (ret) { 1838 ret = 0; 1839 goto out; 1840 } 1841 if (gen != dir_gen) 1842 goto out; 1843 } 1844 1845 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, 1846 &other_inode, &other_type); 1847 if (ret < 0 && ret != -ENOENT) 1848 goto out; 1849 if (ret) { 1850 ret = 0; 1851 goto out; 1852 } 1853 1854 /* 1855 * Check if the overwritten ref was already processed. If yes, the ref 1856 * was already unlinked/moved, so we can safely assume that we will not 1857 * overwrite anything at this point in time. 1858 */ 1859 if (other_inode > sctx->send_progress) { 1860 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1861 who_gen, NULL, NULL, NULL, NULL); 1862 if (ret < 0) 1863 goto out; 1864 1865 ret = 1; 1866 *who_ino = other_inode; 1867 } else { 1868 ret = 0; 1869 } 1870 1871 out: 1872 return ret; 1873 } 1874 1875 /* 1876 * Checks if the ref was overwritten by an already processed inode. This is 1877 * used by __get_cur_name_and_parent to find out if the ref was orphanized and 1878 * thus the orphan name needs be used. 1879 * process_recorded_refs also uses it to avoid unlinking of refs that were 1880 * overwritten. 1881 */ 1882 static int did_overwrite_ref(struct send_ctx *sctx, 1883 u64 dir, u64 dir_gen, 1884 u64 ino, u64 ino_gen, 1885 const char *name, int name_len) 1886 { 1887 int ret = 0; 1888 u64 gen; 1889 u64 ow_inode; 1890 u8 other_type; 1891 1892 if (!sctx->parent_root) 1893 goto out; 1894 1895 ret = is_inode_existent(sctx, dir, dir_gen); 1896 if (ret <= 0) 1897 goto out; 1898 1899 /* check if the ref was overwritten by another ref */ 1900 ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, 1901 &ow_inode, &other_type); 1902 if (ret < 0 && ret != -ENOENT) 1903 goto out; 1904 if (ret) { 1905 /* was never and will never be overwritten */ 1906 ret = 0; 1907 goto out; 1908 } 1909 1910 ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, 1911 NULL, NULL); 1912 if (ret < 0) 1913 goto out; 1914 1915 if (ow_inode == ino && gen == ino_gen) { 1916 ret = 0; 1917 goto out; 1918 } 1919 1920 /* 1921 * We know that it is or will be overwritten. Check this now. 1922 * The current inode being processed might have been the one that caused 1923 * inode 'ino' to be orphanized, therefore ow_inode can actually be the 1924 * same as sctx->send_progress. 1925 */ 1926 if (ow_inode <= sctx->send_progress) 1927 ret = 1; 1928 else 1929 ret = 0; 1930 1931 out: 1932 return ret; 1933 } 1934 1935 /* 1936 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode 1937 * that got overwritten. This is used by process_recorded_refs to determine 1938 * if it has to use the path as returned by get_cur_path or the orphan name. 1939 */ 1940 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) 1941 { 1942 int ret = 0; 1943 struct fs_path *name = NULL; 1944 u64 dir; 1945 u64 dir_gen; 1946 1947 if (!sctx->parent_root) 1948 goto out; 1949 1950 name = fs_path_alloc(); 1951 if (!name) 1952 return -ENOMEM; 1953 1954 ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name); 1955 if (ret < 0) 1956 goto out; 1957 1958 ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, 1959 name->start, fs_path_len(name)); 1960 1961 out: 1962 fs_path_free(name); 1963 return ret; 1964 } 1965 1966 /* 1967 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, 1968 * so we need to do some special handling in case we have clashes. This function 1969 * takes care of this with the help of name_cache_entry::radix_list. 1970 * In case of error, nce is kfreed. 1971 */ 1972 static int name_cache_insert(struct send_ctx *sctx, 1973 struct name_cache_entry *nce) 1974 { 1975 int ret = 0; 1976 struct list_head *nce_head; 1977 1978 nce_head = radix_tree_lookup(&sctx->name_cache, 1979 (unsigned long)nce->ino); 1980 if (!nce_head) { 1981 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1982 if (!nce_head) { 1983 kfree(nce); 1984 return -ENOMEM; 1985 } 1986 INIT_LIST_HEAD(nce_head); 1987 1988 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1989 if (ret < 0) { 1990 kfree(nce_head); 1991 kfree(nce); 1992 return ret; 1993 } 1994 } 1995 list_add_tail(&nce->radix_list, nce_head); 1996 list_add_tail(&nce->list, &sctx->name_cache_list); 1997 sctx->name_cache_size++; 1998 1999 return ret; 2000 } 2001 2002 static void name_cache_delete(struct send_ctx *sctx, 2003 struct name_cache_entry *nce) 2004 { 2005 struct list_head *nce_head; 2006 2007 nce_head = radix_tree_lookup(&sctx->name_cache, 2008 (unsigned long)nce->ino); 2009 if (!nce_head) { 2010 btrfs_err(sctx->send_root->fs_info, 2011 "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", 2012 nce->ino, sctx->name_cache_size); 2013 } 2014 2015 list_del(&nce->radix_list); 2016 list_del(&nce->list); 2017 sctx->name_cache_size--; 2018 2019 /* 2020 * We may not get to the final release of nce_head if the lookup fails 2021 */ 2022 if (nce_head && list_empty(nce_head)) { 2023 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); 2024 kfree(nce_head); 2025 } 2026 } 2027 2028 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, 2029 u64 ino, u64 gen) 2030 { 2031 struct list_head *nce_head; 2032 struct name_cache_entry *cur; 2033 2034 nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); 2035 if (!nce_head) 2036 return NULL; 2037 2038 list_for_each_entry(cur, nce_head, radix_list) { 2039 if (cur->ino == ino && cur->gen == gen) 2040 return cur; 2041 } 2042 return NULL; 2043 } 2044 2045 /* 2046 * Removes the entry from the list and adds it back to the end. This marks the 2047 * entry as recently used so that name_cache_clean_unused does not remove it. 2048 */ 2049 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) 2050 { 2051 list_del(&nce->list); 2052 list_add_tail(&nce->list, &sctx->name_cache_list); 2053 } 2054 2055 /* 2056 * Remove some entries from the beginning of name_cache_list. 2057 */ 2058 static void name_cache_clean_unused(struct send_ctx *sctx) 2059 { 2060 struct name_cache_entry *nce; 2061 2062 if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE) 2063 return; 2064 2065 while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) { 2066 nce = list_entry(sctx->name_cache_list.next, 2067 struct name_cache_entry, list); 2068 name_cache_delete(sctx, nce); 2069 kfree(nce); 2070 } 2071 } 2072 2073 static void name_cache_free(struct send_ctx *sctx) 2074 { 2075 struct name_cache_entry *nce; 2076 2077 while (!list_empty(&sctx->name_cache_list)) { 2078 nce = list_entry(sctx->name_cache_list.next, 2079 struct name_cache_entry, list); 2080 name_cache_delete(sctx, nce); 2081 kfree(nce); 2082 } 2083 } 2084 2085 /* 2086 * Used by get_cur_path for each ref up to the root. 2087 * Returns 0 if it succeeded. 2088 * Returns 1 if the inode is not existent or got overwritten. In that case, the 2089 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 2090 * is returned, parent_ino/parent_gen are not guaranteed to be valid. 2091 * Returns <0 in case of error. 2092 */ 2093 static int __get_cur_name_and_parent(struct send_ctx *sctx, 2094 u64 ino, u64 gen, 2095 u64 *parent_ino, 2096 u64 *parent_gen, 2097 struct fs_path *dest) 2098 { 2099 int ret; 2100 int nce_ret; 2101 struct name_cache_entry *nce = NULL; 2102 2103 /* 2104 * First check if we already did a call to this function with the same 2105 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 2106 * return the cached result. 2107 */ 2108 nce = name_cache_search(sctx, ino, gen); 2109 if (nce) { 2110 if (ino < sctx->send_progress && nce->need_later_update) { 2111 name_cache_delete(sctx, nce); 2112 kfree(nce); 2113 nce = NULL; 2114 } else { 2115 name_cache_used(sctx, nce); 2116 *parent_ino = nce->parent_ino; 2117 *parent_gen = nce->parent_gen; 2118 ret = fs_path_add(dest, nce->name, nce->name_len); 2119 if (ret < 0) 2120 goto out; 2121 ret = nce->ret; 2122 goto out; 2123 } 2124 } 2125 2126 /* 2127 * If the inode is not existent yet, add the orphan name and return 1. 2128 * This should only happen for the parent dir that we determine in 2129 * __record_new_ref 2130 */ 2131 ret = is_inode_existent(sctx, ino, gen); 2132 if (ret < 0) 2133 goto out; 2134 2135 if (!ret) { 2136 ret = gen_unique_name(sctx, ino, gen, dest); 2137 if (ret < 0) 2138 goto out; 2139 ret = 1; 2140 goto out_cache; 2141 } 2142 2143 /* 2144 * Depending on whether the inode was already processed or not, use 2145 * send_root or parent_root for ref lookup. 2146 */ 2147 if (ino < sctx->send_progress) 2148 ret = get_first_ref(sctx->send_root, ino, 2149 parent_ino, parent_gen, dest); 2150 else 2151 ret = get_first_ref(sctx->parent_root, ino, 2152 parent_ino, parent_gen, dest); 2153 if (ret < 0) 2154 goto out; 2155 2156 /* 2157 * Check if the ref was overwritten by an inode's ref that was processed 2158 * earlier. If yes, treat as orphan and return 1. 2159 */ 2160 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, 2161 dest->start, dest->end - dest->start); 2162 if (ret < 0) 2163 goto out; 2164 if (ret) { 2165 fs_path_reset(dest); 2166 ret = gen_unique_name(sctx, ino, gen, dest); 2167 if (ret < 0) 2168 goto out; 2169 ret = 1; 2170 } 2171 2172 out_cache: 2173 /* 2174 * Store the result of the lookup in the name cache. 2175 */ 2176 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); 2177 if (!nce) { 2178 ret = -ENOMEM; 2179 goto out; 2180 } 2181 2182 nce->ino = ino; 2183 nce->gen = gen; 2184 nce->parent_ino = *parent_ino; 2185 nce->parent_gen = *parent_gen; 2186 nce->name_len = fs_path_len(dest); 2187 nce->ret = ret; 2188 strcpy(nce->name, dest->start); 2189 2190 if (ino < sctx->send_progress) 2191 nce->need_later_update = 0; 2192 else 2193 nce->need_later_update = 1; 2194 2195 nce_ret = name_cache_insert(sctx, nce); 2196 if (nce_ret < 0) 2197 ret = nce_ret; 2198 name_cache_clean_unused(sctx); 2199 2200 out: 2201 return ret; 2202 } 2203 2204 /* 2205 * Magic happens here. This function returns the first ref to an inode as it 2206 * would look like while receiving the stream at this point in time. 2207 * We walk the path up to the root. For every inode in between, we check if it 2208 * was already processed/sent. If yes, we continue with the parent as found 2209 * in send_root. If not, we continue with the parent as found in parent_root. 2210 * If we encounter an inode that was deleted at this point in time, we use the 2211 * inodes "orphan" name instead of the real name and stop. Same with new inodes 2212 * that were not created yet and overwritten inodes/refs. 2213 * 2214 * When do we have have orphan inodes: 2215 * 1. When an inode is freshly created and thus no valid refs are available yet 2216 * 2. When a directory lost all it's refs (deleted) but still has dir items 2217 * inside which were not processed yet (pending for move/delete). If anyone 2218 * tried to get the path to the dir items, it would get a path inside that 2219 * orphan directory. 2220 * 3. When an inode is moved around or gets new links, it may overwrite the ref 2221 * of an unprocessed inode. If in that case the first ref would be 2222 * overwritten, the overwritten inode gets "orphanized". Later when we 2223 * process this overwritten inode, it is restored at a new place by moving 2224 * the orphan inode. 2225 * 2226 * sctx->send_progress tells this function at which point in time receiving 2227 * would be. 2228 */ 2229 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, 2230 struct fs_path *dest) 2231 { 2232 int ret = 0; 2233 struct fs_path *name = NULL; 2234 u64 parent_inode = 0; 2235 u64 parent_gen = 0; 2236 int stop = 0; 2237 2238 name = fs_path_alloc(); 2239 if (!name) { 2240 ret = -ENOMEM; 2241 goto out; 2242 } 2243 2244 dest->reversed = 1; 2245 fs_path_reset(dest); 2246 2247 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2248 struct waiting_dir_move *wdm; 2249 2250 fs_path_reset(name); 2251 2252 if (is_waiting_for_rm(sctx, ino)) { 2253 ret = gen_unique_name(sctx, ino, gen, name); 2254 if (ret < 0) 2255 goto out; 2256 ret = fs_path_add_path(dest, name); 2257 break; 2258 } 2259 2260 wdm = get_waiting_dir_move(sctx, ino); 2261 if (wdm && wdm->orphanized) { 2262 ret = gen_unique_name(sctx, ino, gen, name); 2263 stop = 1; 2264 } else if (wdm) { 2265 ret = get_first_ref(sctx->parent_root, ino, 2266 &parent_inode, &parent_gen, name); 2267 } else { 2268 ret = __get_cur_name_and_parent(sctx, ino, gen, 2269 &parent_inode, 2270 &parent_gen, name); 2271 if (ret) 2272 stop = 1; 2273 } 2274 2275 if (ret < 0) 2276 goto out; 2277 2278 ret = fs_path_add_path(dest, name); 2279 if (ret < 0) 2280 goto out; 2281 2282 ino = parent_inode; 2283 gen = parent_gen; 2284 } 2285 2286 out: 2287 fs_path_free(name); 2288 if (!ret) 2289 fs_path_unreverse(dest); 2290 return ret; 2291 } 2292 2293 /* 2294 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace 2295 */ 2296 static int send_subvol_begin(struct send_ctx *sctx) 2297 { 2298 int ret; 2299 struct btrfs_root *send_root = sctx->send_root; 2300 struct btrfs_root *parent_root = sctx->parent_root; 2301 struct btrfs_path *path; 2302 struct btrfs_key key; 2303 struct btrfs_root_ref *ref; 2304 struct extent_buffer *leaf; 2305 char *name = NULL; 2306 int namelen; 2307 2308 path = btrfs_alloc_path(); 2309 if (!path) 2310 return -ENOMEM; 2311 2312 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); 2313 if (!name) { 2314 btrfs_free_path(path); 2315 return -ENOMEM; 2316 } 2317 2318 key.objectid = send_root->objectid; 2319 key.type = BTRFS_ROOT_BACKREF_KEY; 2320 key.offset = 0; 2321 2322 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, 2323 &key, path, 1, 0); 2324 if (ret < 0) 2325 goto out; 2326 if (ret) { 2327 ret = -ENOENT; 2328 goto out; 2329 } 2330 2331 leaf = path->nodes[0]; 2332 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2333 if (key.type != BTRFS_ROOT_BACKREF_KEY || 2334 key.objectid != send_root->objectid) { 2335 ret = -ENOENT; 2336 goto out; 2337 } 2338 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 2339 namelen = btrfs_root_ref_name_len(leaf, ref); 2340 read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); 2341 btrfs_release_path(path); 2342 2343 if (parent_root) { 2344 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); 2345 if (ret < 0) 2346 goto out; 2347 } else { 2348 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); 2349 if (ret < 0) 2350 goto out; 2351 } 2352 2353 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); 2354 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2355 sctx->send_root->root_item.uuid); 2356 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2357 le64_to_cpu(sctx->send_root->root_item.ctransid)); 2358 if (parent_root) { 2359 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid)) 2360 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2361 parent_root->root_item.received_uuid); 2362 else 2363 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2364 parent_root->root_item.uuid); 2365 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2366 le64_to_cpu(sctx->parent_root->root_item.ctransid)); 2367 } 2368 2369 ret = send_cmd(sctx); 2370 2371 tlv_put_failure: 2372 out: 2373 btrfs_free_path(path); 2374 kfree(name); 2375 return ret; 2376 } 2377 2378 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) 2379 { 2380 int ret = 0; 2381 struct fs_path *p; 2382 2383 verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); 2384 2385 p = fs_path_alloc(); 2386 if (!p) 2387 return -ENOMEM; 2388 2389 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); 2390 if (ret < 0) 2391 goto out; 2392 2393 ret = get_cur_path(sctx, ino, gen, p); 2394 if (ret < 0) 2395 goto out; 2396 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2397 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); 2398 2399 ret = send_cmd(sctx); 2400 2401 tlv_put_failure: 2402 out: 2403 fs_path_free(p); 2404 return ret; 2405 } 2406 2407 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) 2408 { 2409 int ret = 0; 2410 struct fs_path *p; 2411 2412 verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); 2413 2414 p = fs_path_alloc(); 2415 if (!p) 2416 return -ENOMEM; 2417 2418 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); 2419 if (ret < 0) 2420 goto out; 2421 2422 ret = get_cur_path(sctx, ino, gen, p); 2423 if (ret < 0) 2424 goto out; 2425 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2426 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); 2427 2428 ret = send_cmd(sctx); 2429 2430 tlv_put_failure: 2431 out: 2432 fs_path_free(p); 2433 return ret; 2434 } 2435 2436 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) 2437 { 2438 int ret = 0; 2439 struct fs_path *p; 2440 2441 verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); 2442 2443 p = fs_path_alloc(); 2444 if (!p) 2445 return -ENOMEM; 2446 2447 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); 2448 if (ret < 0) 2449 goto out; 2450 2451 ret = get_cur_path(sctx, ino, gen, p); 2452 if (ret < 0) 2453 goto out; 2454 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2455 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); 2456 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); 2457 2458 ret = send_cmd(sctx); 2459 2460 tlv_put_failure: 2461 out: 2462 fs_path_free(p); 2463 return ret; 2464 } 2465 2466 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) 2467 { 2468 int ret = 0; 2469 struct fs_path *p = NULL; 2470 struct btrfs_inode_item *ii; 2471 struct btrfs_path *path = NULL; 2472 struct extent_buffer *eb; 2473 struct btrfs_key key; 2474 int slot; 2475 2476 verbose_printk("btrfs: send_utimes %llu\n", ino); 2477 2478 p = fs_path_alloc(); 2479 if (!p) 2480 return -ENOMEM; 2481 2482 path = alloc_path_for_send(); 2483 if (!path) { 2484 ret = -ENOMEM; 2485 goto out; 2486 } 2487 2488 key.objectid = ino; 2489 key.type = BTRFS_INODE_ITEM_KEY; 2490 key.offset = 0; 2491 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2492 if (ret < 0) 2493 goto out; 2494 2495 eb = path->nodes[0]; 2496 slot = path->slots[0]; 2497 ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 2498 2499 ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); 2500 if (ret < 0) 2501 goto out; 2502 2503 ret = get_cur_path(sctx, ino, gen, p); 2504 if (ret < 0) 2505 goto out; 2506 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2507 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime); 2508 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime); 2509 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime); 2510 /* TODO Add otime support when the otime patches get into upstream */ 2511 2512 ret = send_cmd(sctx); 2513 2514 tlv_put_failure: 2515 out: 2516 fs_path_free(p); 2517 btrfs_free_path(path); 2518 return ret; 2519 } 2520 2521 /* 2522 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have 2523 * a valid path yet because we did not process the refs yet. So, the inode 2524 * is created as orphan. 2525 */ 2526 static int send_create_inode(struct send_ctx *sctx, u64 ino) 2527 { 2528 int ret = 0; 2529 struct fs_path *p; 2530 int cmd; 2531 u64 gen; 2532 u64 mode; 2533 u64 rdev; 2534 2535 verbose_printk("btrfs: send_create_inode %llu\n", ino); 2536 2537 p = fs_path_alloc(); 2538 if (!p) 2539 return -ENOMEM; 2540 2541 if (ino != sctx->cur_ino) { 2542 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, 2543 NULL, NULL, &rdev); 2544 if (ret < 0) 2545 goto out; 2546 } else { 2547 gen = sctx->cur_inode_gen; 2548 mode = sctx->cur_inode_mode; 2549 rdev = sctx->cur_inode_rdev; 2550 } 2551 2552 if (S_ISREG(mode)) { 2553 cmd = BTRFS_SEND_C_MKFILE; 2554 } else if (S_ISDIR(mode)) { 2555 cmd = BTRFS_SEND_C_MKDIR; 2556 } else if (S_ISLNK(mode)) { 2557 cmd = BTRFS_SEND_C_SYMLINK; 2558 } else if (S_ISCHR(mode) || S_ISBLK(mode)) { 2559 cmd = BTRFS_SEND_C_MKNOD; 2560 } else if (S_ISFIFO(mode)) { 2561 cmd = BTRFS_SEND_C_MKFIFO; 2562 } else if (S_ISSOCK(mode)) { 2563 cmd = BTRFS_SEND_C_MKSOCK; 2564 } else { 2565 printk(KERN_WARNING "btrfs: unexpected inode type %o", 2566 (int)(mode & S_IFMT)); 2567 ret = -ENOTSUPP; 2568 goto out; 2569 } 2570 2571 ret = begin_cmd(sctx, cmd); 2572 if (ret < 0) 2573 goto out; 2574 2575 ret = gen_unique_name(sctx, ino, gen, p); 2576 if (ret < 0) 2577 goto out; 2578 2579 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2580 TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); 2581 2582 if (S_ISLNK(mode)) { 2583 fs_path_reset(p); 2584 ret = read_symlink(sctx->send_root, ino, p); 2585 if (ret < 0) 2586 goto out; 2587 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); 2588 } else if (S_ISCHR(mode) || S_ISBLK(mode) || 2589 S_ISFIFO(mode) || S_ISSOCK(mode)) { 2590 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev)); 2591 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode); 2592 } 2593 2594 ret = send_cmd(sctx); 2595 if (ret < 0) 2596 goto out; 2597 2598 2599 tlv_put_failure: 2600 out: 2601 fs_path_free(p); 2602 return ret; 2603 } 2604 2605 /* 2606 * We need some special handling for inodes that get processed before the parent 2607 * directory got created. See process_recorded_refs for details. 2608 * This function does the check if we already created the dir out of order. 2609 */ 2610 static int did_create_dir(struct send_ctx *sctx, u64 dir) 2611 { 2612 int ret = 0; 2613 struct btrfs_path *path = NULL; 2614 struct btrfs_key key; 2615 struct btrfs_key found_key; 2616 struct btrfs_key di_key; 2617 struct extent_buffer *eb; 2618 struct btrfs_dir_item *di; 2619 int slot; 2620 2621 path = alloc_path_for_send(); 2622 if (!path) { 2623 ret = -ENOMEM; 2624 goto out; 2625 } 2626 2627 key.objectid = dir; 2628 key.type = BTRFS_DIR_INDEX_KEY; 2629 key.offset = 0; 2630 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2631 if (ret < 0) 2632 goto out; 2633 2634 while (1) { 2635 eb = path->nodes[0]; 2636 slot = path->slots[0]; 2637 if (slot >= btrfs_header_nritems(eb)) { 2638 ret = btrfs_next_leaf(sctx->send_root, path); 2639 if (ret < 0) { 2640 goto out; 2641 } else if (ret > 0) { 2642 ret = 0; 2643 break; 2644 } 2645 continue; 2646 } 2647 2648 btrfs_item_key_to_cpu(eb, &found_key, slot); 2649 if (found_key.objectid != key.objectid || 2650 found_key.type != key.type) { 2651 ret = 0; 2652 goto out; 2653 } 2654 2655 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2656 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2657 2658 if (di_key.type != BTRFS_ROOT_ITEM_KEY && 2659 di_key.objectid < sctx->send_progress) { 2660 ret = 1; 2661 goto out; 2662 } 2663 2664 path->slots[0]++; 2665 } 2666 2667 out: 2668 btrfs_free_path(path); 2669 return ret; 2670 } 2671 2672 /* 2673 * Only creates the inode if it is: 2674 * 1. Not a directory 2675 * 2. Or a directory which was not created already due to out of order 2676 * directories. See did_create_dir and process_recorded_refs for details. 2677 */ 2678 static int send_create_inode_if_needed(struct send_ctx *sctx) 2679 { 2680 int ret; 2681 2682 if (S_ISDIR(sctx->cur_inode_mode)) { 2683 ret = did_create_dir(sctx, sctx->cur_ino); 2684 if (ret < 0) 2685 goto out; 2686 if (ret) { 2687 ret = 0; 2688 goto out; 2689 } 2690 } 2691 2692 ret = send_create_inode(sctx, sctx->cur_ino); 2693 if (ret < 0) 2694 goto out; 2695 2696 out: 2697 return ret; 2698 } 2699 2700 struct recorded_ref { 2701 struct list_head list; 2702 char *dir_path; 2703 char *name; 2704 struct fs_path *full_path; 2705 u64 dir; 2706 u64 dir_gen; 2707 int dir_path_len; 2708 int name_len; 2709 }; 2710 2711 /* 2712 * We need to process new refs before deleted refs, but compare_tree gives us 2713 * everything mixed. So we first record all refs and later process them. 2714 * This function is a helper to record one ref. 2715 */ 2716 static int __record_ref(struct list_head *head, u64 dir, 2717 u64 dir_gen, struct fs_path *path) 2718 { 2719 struct recorded_ref *ref; 2720 2721 ref = kmalloc(sizeof(*ref), GFP_NOFS); 2722 if (!ref) 2723 return -ENOMEM; 2724 2725 ref->dir = dir; 2726 ref->dir_gen = dir_gen; 2727 ref->full_path = path; 2728 2729 ref->name = (char *)kbasename(ref->full_path->start); 2730 ref->name_len = ref->full_path->end - ref->name; 2731 ref->dir_path = ref->full_path->start; 2732 if (ref->name == ref->full_path->start) 2733 ref->dir_path_len = 0; 2734 else 2735 ref->dir_path_len = ref->full_path->end - 2736 ref->full_path->start - 1 - ref->name_len; 2737 2738 list_add_tail(&ref->list, head); 2739 return 0; 2740 } 2741 2742 static int dup_ref(struct recorded_ref *ref, struct list_head *list) 2743 { 2744 struct recorded_ref *new; 2745 2746 new = kmalloc(sizeof(*ref), GFP_NOFS); 2747 if (!new) 2748 return -ENOMEM; 2749 2750 new->dir = ref->dir; 2751 new->dir_gen = ref->dir_gen; 2752 new->full_path = NULL; 2753 INIT_LIST_HEAD(&new->list); 2754 list_add_tail(&new->list, list); 2755 return 0; 2756 } 2757 2758 static void __free_recorded_refs(struct list_head *head) 2759 { 2760 struct recorded_ref *cur; 2761 2762 while (!list_empty(head)) { 2763 cur = list_entry(head->next, struct recorded_ref, list); 2764 fs_path_free(cur->full_path); 2765 list_del(&cur->list); 2766 kfree(cur); 2767 } 2768 } 2769 2770 static void free_recorded_refs(struct send_ctx *sctx) 2771 { 2772 __free_recorded_refs(&sctx->new_refs); 2773 __free_recorded_refs(&sctx->deleted_refs); 2774 } 2775 2776 /* 2777 * Renames/moves a file/dir to its orphan name. Used when the first 2778 * ref of an unprocessed inode gets overwritten and for all non empty 2779 * directories. 2780 */ 2781 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, 2782 struct fs_path *path) 2783 { 2784 int ret; 2785 struct fs_path *orphan; 2786 2787 orphan = fs_path_alloc(); 2788 if (!orphan) 2789 return -ENOMEM; 2790 2791 ret = gen_unique_name(sctx, ino, gen, orphan); 2792 if (ret < 0) 2793 goto out; 2794 2795 ret = send_rename(sctx, path, orphan); 2796 2797 out: 2798 fs_path_free(orphan); 2799 return ret; 2800 } 2801 2802 static struct orphan_dir_info * 2803 add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2804 { 2805 struct rb_node **p = &sctx->orphan_dirs.rb_node; 2806 struct rb_node *parent = NULL; 2807 struct orphan_dir_info *entry, *odi; 2808 2809 odi = kmalloc(sizeof(*odi), GFP_NOFS); 2810 if (!odi) 2811 return ERR_PTR(-ENOMEM); 2812 odi->ino = dir_ino; 2813 odi->gen = 0; 2814 2815 while (*p) { 2816 parent = *p; 2817 entry = rb_entry(parent, struct orphan_dir_info, node); 2818 if (dir_ino < entry->ino) { 2819 p = &(*p)->rb_left; 2820 } else if (dir_ino > entry->ino) { 2821 p = &(*p)->rb_right; 2822 } else { 2823 kfree(odi); 2824 return entry; 2825 } 2826 } 2827 2828 rb_link_node(&odi->node, parent, p); 2829 rb_insert_color(&odi->node, &sctx->orphan_dirs); 2830 return odi; 2831 } 2832 2833 static struct orphan_dir_info * 2834 get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2835 { 2836 struct rb_node *n = sctx->orphan_dirs.rb_node; 2837 struct orphan_dir_info *entry; 2838 2839 while (n) { 2840 entry = rb_entry(n, struct orphan_dir_info, node); 2841 if (dir_ino < entry->ino) 2842 n = n->rb_left; 2843 else if (dir_ino > entry->ino) 2844 n = n->rb_right; 2845 else 2846 return entry; 2847 } 2848 return NULL; 2849 } 2850 2851 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) 2852 { 2853 struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); 2854 2855 return odi != NULL; 2856 } 2857 2858 static void free_orphan_dir_info(struct send_ctx *sctx, 2859 struct orphan_dir_info *odi) 2860 { 2861 if (!odi) 2862 return; 2863 rb_erase(&odi->node, &sctx->orphan_dirs); 2864 kfree(odi); 2865 } 2866 2867 /* 2868 * Returns 1 if a directory can be removed at this point in time. 2869 * We check this by iterating all dir items and checking if the inode behind 2870 * the dir item was already processed. 2871 */ 2872 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, 2873 u64 send_progress) 2874 { 2875 int ret = 0; 2876 struct btrfs_root *root = sctx->parent_root; 2877 struct btrfs_path *path; 2878 struct btrfs_key key; 2879 struct btrfs_key found_key; 2880 struct btrfs_key loc; 2881 struct btrfs_dir_item *di; 2882 2883 /* 2884 * Don't try to rmdir the top/root subvolume dir. 2885 */ 2886 if (dir == BTRFS_FIRST_FREE_OBJECTID) 2887 return 0; 2888 2889 path = alloc_path_for_send(); 2890 if (!path) 2891 return -ENOMEM; 2892 2893 key.objectid = dir; 2894 key.type = BTRFS_DIR_INDEX_KEY; 2895 key.offset = 0; 2896 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2897 if (ret < 0) 2898 goto out; 2899 2900 while (1) { 2901 struct waiting_dir_move *dm; 2902 2903 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 2904 ret = btrfs_next_leaf(root, path); 2905 if (ret < 0) 2906 goto out; 2907 else if (ret > 0) 2908 break; 2909 continue; 2910 } 2911 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2912 path->slots[0]); 2913 if (found_key.objectid != key.objectid || 2914 found_key.type != key.type) 2915 break; 2916 2917 di = btrfs_item_ptr(path->nodes[0], path->slots[0], 2918 struct btrfs_dir_item); 2919 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); 2920 2921 dm = get_waiting_dir_move(sctx, loc.objectid); 2922 if (dm) { 2923 struct orphan_dir_info *odi; 2924 2925 odi = add_orphan_dir_info(sctx, dir); 2926 if (IS_ERR(odi)) { 2927 ret = PTR_ERR(odi); 2928 goto out; 2929 } 2930 odi->gen = dir_gen; 2931 dm->rmdir_ino = dir; 2932 ret = 0; 2933 goto out; 2934 } 2935 2936 if (loc.objectid > send_progress) { 2937 ret = 0; 2938 goto out; 2939 } 2940 2941 path->slots[0]++; 2942 } 2943 2944 ret = 1; 2945 2946 out: 2947 btrfs_free_path(path); 2948 return ret; 2949 } 2950 2951 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) 2952 { 2953 struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); 2954 2955 return entry != NULL; 2956 } 2957 2958 static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) 2959 { 2960 struct rb_node **p = &sctx->waiting_dir_moves.rb_node; 2961 struct rb_node *parent = NULL; 2962 struct waiting_dir_move *entry, *dm; 2963 2964 dm = kmalloc(sizeof(*dm), GFP_NOFS); 2965 if (!dm) 2966 return -ENOMEM; 2967 dm->ino = ino; 2968 dm->rmdir_ino = 0; 2969 dm->orphanized = orphanized; 2970 2971 while (*p) { 2972 parent = *p; 2973 entry = rb_entry(parent, struct waiting_dir_move, node); 2974 if (ino < entry->ino) { 2975 p = &(*p)->rb_left; 2976 } else if (ino > entry->ino) { 2977 p = &(*p)->rb_right; 2978 } else { 2979 kfree(dm); 2980 return -EEXIST; 2981 } 2982 } 2983 2984 rb_link_node(&dm->node, parent, p); 2985 rb_insert_color(&dm->node, &sctx->waiting_dir_moves); 2986 return 0; 2987 } 2988 2989 static struct waiting_dir_move * 2990 get_waiting_dir_move(struct send_ctx *sctx, u64 ino) 2991 { 2992 struct rb_node *n = sctx->waiting_dir_moves.rb_node; 2993 struct waiting_dir_move *entry; 2994 2995 while (n) { 2996 entry = rb_entry(n, struct waiting_dir_move, node); 2997 if (ino < entry->ino) 2998 n = n->rb_left; 2999 else if (ino > entry->ino) 3000 n = n->rb_right; 3001 else 3002 return entry; 3003 } 3004 return NULL; 3005 } 3006 3007 static void free_waiting_dir_move(struct send_ctx *sctx, 3008 struct waiting_dir_move *dm) 3009 { 3010 if (!dm) 3011 return; 3012 rb_erase(&dm->node, &sctx->waiting_dir_moves); 3013 kfree(dm); 3014 } 3015 3016 static int add_pending_dir_move(struct send_ctx *sctx, 3017 u64 ino, 3018 u64 ino_gen, 3019 u64 parent_ino, 3020 struct list_head *new_refs, 3021 struct list_head *deleted_refs, 3022 const bool is_orphan) 3023 { 3024 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 3025 struct rb_node *parent = NULL; 3026 struct pending_dir_move *entry = NULL, *pm; 3027 struct recorded_ref *cur; 3028 int exists = 0; 3029 int ret; 3030 3031 pm = kmalloc(sizeof(*pm), GFP_NOFS); 3032 if (!pm) 3033 return -ENOMEM; 3034 pm->parent_ino = parent_ino; 3035 pm->ino = ino; 3036 pm->gen = ino_gen; 3037 pm->is_orphan = is_orphan; 3038 INIT_LIST_HEAD(&pm->list); 3039 INIT_LIST_HEAD(&pm->update_refs); 3040 RB_CLEAR_NODE(&pm->node); 3041 3042 while (*p) { 3043 parent = *p; 3044 entry = rb_entry(parent, struct pending_dir_move, node); 3045 if (parent_ino < entry->parent_ino) { 3046 p = &(*p)->rb_left; 3047 } else if (parent_ino > entry->parent_ino) { 3048 p = &(*p)->rb_right; 3049 } else { 3050 exists = 1; 3051 break; 3052 } 3053 } 3054 3055 list_for_each_entry(cur, deleted_refs, list) { 3056 ret = dup_ref(cur, &pm->update_refs); 3057 if (ret < 0) 3058 goto out; 3059 } 3060 list_for_each_entry(cur, new_refs, list) { 3061 ret = dup_ref(cur, &pm->update_refs); 3062 if (ret < 0) 3063 goto out; 3064 } 3065 3066 ret = add_waiting_dir_move(sctx, pm->ino, is_orphan); 3067 if (ret) 3068 goto out; 3069 3070 if (exists) { 3071 list_add_tail(&pm->list, &entry->list); 3072 } else { 3073 rb_link_node(&pm->node, parent, p); 3074 rb_insert_color(&pm->node, &sctx->pending_dir_moves); 3075 } 3076 ret = 0; 3077 out: 3078 if (ret) { 3079 __free_recorded_refs(&pm->update_refs); 3080 kfree(pm); 3081 } 3082 return ret; 3083 } 3084 3085 static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, 3086 u64 parent_ino) 3087 { 3088 struct rb_node *n = sctx->pending_dir_moves.rb_node; 3089 struct pending_dir_move *entry; 3090 3091 while (n) { 3092 entry = rb_entry(n, struct pending_dir_move, node); 3093 if (parent_ino < entry->parent_ino) 3094 n = n->rb_left; 3095 else if (parent_ino > entry->parent_ino) 3096 n = n->rb_right; 3097 else 3098 return entry; 3099 } 3100 return NULL; 3101 } 3102 3103 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3104 { 3105 struct fs_path *from_path = NULL; 3106 struct fs_path *to_path = NULL; 3107 struct fs_path *name = NULL; 3108 u64 orig_progress = sctx->send_progress; 3109 struct recorded_ref *cur; 3110 u64 parent_ino, parent_gen; 3111 struct waiting_dir_move *dm = NULL; 3112 u64 rmdir_ino = 0; 3113 int ret; 3114 3115 name = fs_path_alloc(); 3116 from_path = fs_path_alloc(); 3117 if (!name || !from_path) { 3118 ret = -ENOMEM; 3119 goto out; 3120 } 3121 3122 dm = get_waiting_dir_move(sctx, pm->ino); 3123 ASSERT(dm); 3124 rmdir_ino = dm->rmdir_ino; 3125 free_waiting_dir_move(sctx, dm); 3126 3127 if (pm->is_orphan) { 3128 ret = gen_unique_name(sctx, pm->ino, 3129 pm->gen, from_path); 3130 } else { 3131 ret = get_first_ref(sctx->parent_root, pm->ino, 3132 &parent_ino, &parent_gen, name); 3133 if (ret < 0) 3134 goto out; 3135 ret = get_cur_path(sctx, parent_ino, parent_gen, 3136 from_path); 3137 if (ret < 0) 3138 goto out; 3139 ret = fs_path_add_path(from_path, name); 3140 } 3141 if (ret < 0) 3142 goto out; 3143 3144 sctx->send_progress = sctx->cur_ino + 1; 3145 fs_path_reset(name); 3146 to_path = name; 3147 name = NULL; 3148 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); 3149 if (ret < 0) 3150 goto out; 3151 3152 ret = send_rename(sctx, from_path, to_path); 3153 if (ret < 0) 3154 goto out; 3155 3156 if (rmdir_ino) { 3157 struct orphan_dir_info *odi; 3158 3159 odi = get_orphan_dir_info(sctx, rmdir_ino); 3160 if (!odi) { 3161 /* already deleted */ 3162 goto finish; 3163 } 3164 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); 3165 if (ret < 0) 3166 goto out; 3167 if (!ret) 3168 goto finish; 3169 3170 name = fs_path_alloc(); 3171 if (!name) { 3172 ret = -ENOMEM; 3173 goto out; 3174 } 3175 ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); 3176 if (ret < 0) 3177 goto out; 3178 ret = send_rmdir(sctx, name); 3179 if (ret < 0) 3180 goto out; 3181 free_orphan_dir_info(sctx, odi); 3182 } 3183 3184 finish: 3185 ret = send_utimes(sctx, pm->ino, pm->gen); 3186 if (ret < 0) 3187 goto out; 3188 3189 /* 3190 * After rename/move, need to update the utimes of both new parent(s) 3191 * and old parent(s). 3192 */ 3193 list_for_each_entry(cur, &pm->update_refs, list) { 3194 if (cur->dir == rmdir_ino) 3195 continue; 3196 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3197 if (ret < 0) 3198 goto out; 3199 } 3200 3201 out: 3202 fs_path_free(name); 3203 fs_path_free(from_path); 3204 fs_path_free(to_path); 3205 sctx->send_progress = orig_progress; 3206 3207 return ret; 3208 } 3209 3210 static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) 3211 { 3212 if (!list_empty(&m->list)) 3213 list_del(&m->list); 3214 if (!RB_EMPTY_NODE(&m->node)) 3215 rb_erase(&m->node, &sctx->pending_dir_moves); 3216 __free_recorded_refs(&m->update_refs); 3217 kfree(m); 3218 } 3219 3220 static void tail_append_pending_moves(struct pending_dir_move *moves, 3221 struct list_head *stack) 3222 { 3223 if (list_empty(&moves->list)) { 3224 list_add_tail(&moves->list, stack); 3225 } else { 3226 LIST_HEAD(list); 3227 list_splice_init(&moves->list, &list); 3228 list_add_tail(&moves->list, stack); 3229 list_splice_tail(&list, stack); 3230 } 3231 } 3232 3233 static int apply_children_dir_moves(struct send_ctx *sctx) 3234 { 3235 struct pending_dir_move *pm; 3236 struct list_head stack; 3237 u64 parent_ino = sctx->cur_ino; 3238 int ret = 0; 3239 3240 pm = get_pending_dir_moves(sctx, parent_ino); 3241 if (!pm) 3242 return 0; 3243 3244 INIT_LIST_HEAD(&stack); 3245 tail_append_pending_moves(pm, &stack); 3246 3247 while (!list_empty(&stack)) { 3248 pm = list_first_entry(&stack, struct pending_dir_move, list); 3249 parent_ino = pm->ino; 3250 ret = apply_dir_move(sctx, pm); 3251 free_pending_move(sctx, pm); 3252 if (ret) 3253 goto out; 3254 pm = get_pending_dir_moves(sctx, parent_ino); 3255 if (pm) 3256 tail_append_pending_moves(pm, &stack); 3257 } 3258 return 0; 3259 3260 out: 3261 while (!list_empty(&stack)) { 3262 pm = list_first_entry(&stack, struct pending_dir_move, list); 3263 free_pending_move(sctx, pm); 3264 } 3265 return ret; 3266 } 3267 3268 /* 3269 * We might need to delay a directory rename even when no ancestor directory 3270 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was 3271 * renamed. This happens when we rename a directory to the old name (the name 3272 * in the parent root) of some other unrelated directory that got its rename 3273 * delayed due to some ancestor with higher number that got renamed. 3274 * 3275 * Example: 3276 * 3277 * Parent snapshot: 3278 * . (ino 256) 3279 * |---- a/ (ino 257) 3280 * | |---- file (ino 260) 3281 * | 3282 * |---- b/ (ino 258) 3283 * |---- c/ (ino 259) 3284 * 3285 * Send snapshot: 3286 * . (ino 256) 3287 * |---- a/ (ino 258) 3288 * |---- x/ (ino 259) 3289 * |---- y/ (ino 257) 3290 * |----- file (ino 260) 3291 * 3292 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257 3293 * from 'a' to 'x/y' happening first, which in turn depends on the rename of 3294 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream 3295 * must issue is: 3296 * 3297 * 1 - rename 259 from 'c' to 'x' 3298 * 2 - rename 257 from 'a' to 'x/y' 3299 * 3 - rename 258 from 'b' to 'a' 3300 * 3301 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can 3302 * be done right away and < 0 on error. 3303 */ 3304 static int wait_for_dest_dir_move(struct send_ctx *sctx, 3305 struct recorded_ref *parent_ref, 3306 const bool is_orphan) 3307 { 3308 struct btrfs_path *path; 3309 struct btrfs_key key; 3310 struct btrfs_key di_key; 3311 struct btrfs_dir_item *di; 3312 u64 left_gen; 3313 u64 right_gen; 3314 int ret = 0; 3315 3316 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) 3317 return 0; 3318 3319 path = alloc_path_for_send(); 3320 if (!path) 3321 return -ENOMEM; 3322 3323 key.objectid = parent_ref->dir; 3324 key.type = BTRFS_DIR_ITEM_KEY; 3325 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); 3326 3327 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); 3328 if (ret < 0) { 3329 goto out; 3330 } else if (ret > 0) { 3331 ret = 0; 3332 goto out; 3333 } 3334 3335 di = btrfs_match_dir_item_name(sctx->parent_root, path, 3336 parent_ref->name, parent_ref->name_len); 3337 if (!di) { 3338 ret = 0; 3339 goto out; 3340 } 3341 /* 3342 * di_key.objectid has the number of the inode that has a dentry in the 3343 * parent directory with the same name that sctx->cur_ino is being 3344 * renamed to. We need to check if that inode is in the send root as 3345 * well and if it is currently marked as an inode with a pending rename, 3346 * if it is, we need to delay the rename of sctx->cur_ino as well, so 3347 * that it happens after that other inode is renamed. 3348 */ 3349 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); 3350 if (di_key.type != BTRFS_INODE_ITEM_KEY) { 3351 ret = 0; 3352 goto out; 3353 } 3354 3355 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL, 3356 &left_gen, NULL, NULL, NULL, NULL); 3357 if (ret < 0) 3358 goto out; 3359 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL, 3360 &right_gen, NULL, NULL, NULL, NULL); 3361 if (ret < 0) { 3362 if (ret == -ENOENT) 3363 ret = 0; 3364 goto out; 3365 } 3366 3367 /* Different inode, no need to delay the rename of sctx->cur_ino */ 3368 if (right_gen != left_gen) { 3369 ret = 0; 3370 goto out; 3371 } 3372 3373 if (is_waiting_for_move(sctx, di_key.objectid)) { 3374 ret = add_pending_dir_move(sctx, 3375 sctx->cur_ino, 3376 sctx->cur_inode_gen, 3377 di_key.objectid, 3378 &sctx->new_refs, 3379 &sctx->deleted_refs, 3380 is_orphan); 3381 if (!ret) 3382 ret = 1; 3383 } 3384 out: 3385 btrfs_free_path(path); 3386 return ret; 3387 } 3388 3389 /* 3390 * Check if ino ino1 is an ancestor of inode ino2 in the given root. 3391 * Return 1 if true, 0 if false and < 0 on error. 3392 */ 3393 static int is_ancestor(struct btrfs_root *root, 3394 const u64 ino1, 3395 const u64 ino1_gen, 3396 const u64 ino2, 3397 struct fs_path *fs_path) 3398 { 3399 u64 ino = ino2; 3400 3401 while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3402 int ret; 3403 u64 parent; 3404 u64 parent_gen; 3405 3406 fs_path_reset(fs_path); 3407 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); 3408 if (ret < 0) { 3409 if (ret == -ENOENT && ino == ino2) 3410 ret = 0; 3411 return ret; 3412 } 3413 if (parent == ino1) 3414 return parent_gen == ino1_gen ? 1 : 0; 3415 ino = parent; 3416 } 3417 return 0; 3418 } 3419 3420 static int wait_for_parent_move(struct send_ctx *sctx, 3421 struct recorded_ref *parent_ref, 3422 const bool is_orphan) 3423 { 3424 int ret = 0; 3425 u64 ino = parent_ref->dir; 3426 u64 parent_ino_before, parent_ino_after; 3427 struct fs_path *path_before = NULL; 3428 struct fs_path *path_after = NULL; 3429 int len1, len2; 3430 3431 path_after = fs_path_alloc(); 3432 path_before = fs_path_alloc(); 3433 if (!path_after || !path_before) { 3434 ret = -ENOMEM; 3435 goto out; 3436 } 3437 3438 /* 3439 * Our current directory inode may not yet be renamed/moved because some 3440 * ancestor (immediate or not) has to be renamed/moved first. So find if 3441 * such ancestor exists and make sure our own rename/move happens after 3442 * that ancestor is processed to avoid path build infinite loops (done 3443 * at get_cur_path()). 3444 */ 3445 while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3446 if (is_waiting_for_move(sctx, ino)) { 3447 /* 3448 * If the current inode is an ancestor of ino in the 3449 * parent root, we need to delay the rename of the 3450 * current inode, otherwise don't delayed the rename 3451 * because we can end up with a circular dependency 3452 * of renames, resulting in some directories never 3453 * getting the respective rename operations issued in 3454 * the send stream or getting into infinite path build 3455 * loops. 3456 */ 3457 ret = is_ancestor(sctx->parent_root, 3458 sctx->cur_ino, sctx->cur_inode_gen, 3459 ino, path_before); 3460 break; 3461 } 3462 3463 fs_path_reset(path_before); 3464 fs_path_reset(path_after); 3465 3466 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3467 NULL, path_after); 3468 if (ret < 0) 3469 goto out; 3470 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3471 NULL, path_before); 3472 if (ret < 0 && ret != -ENOENT) { 3473 goto out; 3474 } else if (ret == -ENOENT) { 3475 ret = 0; 3476 break; 3477 } 3478 3479 len1 = fs_path_len(path_before); 3480 len2 = fs_path_len(path_after); 3481 if (ino > sctx->cur_ino && 3482 (parent_ino_before != parent_ino_after || len1 != len2 || 3483 memcmp(path_before->start, path_after->start, len1))) { 3484 ret = 1; 3485 break; 3486 } 3487 ino = parent_ino_after; 3488 } 3489 3490 out: 3491 fs_path_free(path_before); 3492 fs_path_free(path_after); 3493 3494 if (ret == 1) { 3495 ret = add_pending_dir_move(sctx, 3496 sctx->cur_ino, 3497 sctx->cur_inode_gen, 3498 ino, 3499 &sctx->new_refs, 3500 &sctx->deleted_refs, 3501 is_orphan); 3502 if (!ret) 3503 ret = 1; 3504 } 3505 3506 return ret; 3507 } 3508 3509 /* 3510 * This does all the move/link/unlink/rmdir magic. 3511 */ 3512 static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) 3513 { 3514 int ret = 0; 3515 struct recorded_ref *cur; 3516 struct recorded_ref *cur2; 3517 struct list_head check_dirs; 3518 struct fs_path *valid_path = NULL; 3519 u64 ow_inode = 0; 3520 u64 ow_gen; 3521 int did_overwrite = 0; 3522 int is_orphan = 0; 3523 u64 last_dir_ino_rm = 0; 3524 bool can_rename = true; 3525 3526 verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3527 3528 /* 3529 * This should never happen as the root dir always has the same ref 3530 * which is always '..' 3531 */ 3532 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); 3533 INIT_LIST_HEAD(&check_dirs); 3534 3535 valid_path = fs_path_alloc(); 3536 if (!valid_path) { 3537 ret = -ENOMEM; 3538 goto out; 3539 } 3540 3541 /* 3542 * First, check if the first ref of the current inode was overwritten 3543 * before. If yes, we know that the current inode was already orphanized 3544 * and thus use the orphan name. If not, we can use get_cur_path to 3545 * get the path of the first ref as it would like while receiving at 3546 * this point in time. 3547 * New inodes are always orphan at the beginning, so force to use the 3548 * orphan name in this case. 3549 * The first ref is stored in valid_path and will be updated if it 3550 * gets moved around. 3551 */ 3552 if (!sctx->cur_inode_new) { 3553 ret = did_overwrite_first_ref(sctx, sctx->cur_ino, 3554 sctx->cur_inode_gen); 3555 if (ret < 0) 3556 goto out; 3557 if (ret) 3558 did_overwrite = 1; 3559 } 3560 if (sctx->cur_inode_new || did_overwrite) { 3561 ret = gen_unique_name(sctx, sctx->cur_ino, 3562 sctx->cur_inode_gen, valid_path); 3563 if (ret < 0) 3564 goto out; 3565 is_orphan = 1; 3566 } else { 3567 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3568 valid_path); 3569 if (ret < 0) 3570 goto out; 3571 } 3572 3573 list_for_each_entry(cur, &sctx->new_refs, list) { 3574 /* 3575 * We may have refs where the parent directory does not exist 3576 * yet. This happens if the parent directories inum is higher 3577 * the the current inum. To handle this case, we create the 3578 * parent directory out of order. But we need to check if this 3579 * did already happen before due to other refs in the same dir. 3580 */ 3581 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3582 if (ret < 0) 3583 goto out; 3584 if (ret == inode_state_will_create) { 3585 ret = 0; 3586 /* 3587 * First check if any of the current inodes refs did 3588 * already create the dir. 3589 */ 3590 list_for_each_entry(cur2, &sctx->new_refs, list) { 3591 if (cur == cur2) 3592 break; 3593 if (cur2->dir == cur->dir) { 3594 ret = 1; 3595 break; 3596 } 3597 } 3598 3599 /* 3600 * If that did not happen, check if a previous inode 3601 * did already create the dir. 3602 */ 3603 if (!ret) 3604 ret = did_create_dir(sctx, cur->dir); 3605 if (ret < 0) 3606 goto out; 3607 if (!ret) { 3608 ret = send_create_inode(sctx, cur->dir); 3609 if (ret < 0) 3610 goto out; 3611 } 3612 } 3613 3614 /* 3615 * Check if this new ref would overwrite the first ref of 3616 * another unprocessed inode. If yes, orphanize the 3617 * overwritten inode. If we find an overwritten ref that is 3618 * not the first ref, simply unlink it. 3619 */ 3620 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3621 cur->name, cur->name_len, 3622 &ow_inode, &ow_gen); 3623 if (ret < 0) 3624 goto out; 3625 if (ret) { 3626 ret = is_first_ref(sctx->parent_root, 3627 ow_inode, cur->dir, cur->name, 3628 cur->name_len); 3629 if (ret < 0) 3630 goto out; 3631 if (ret) { 3632 struct name_cache_entry *nce; 3633 3634 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3635 cur->full_path); 3636 if (ret < 0) 3637 goto out; 3638 /* 3639 * Make sure we clear our orphanized inode's 3640 * name from the name cache. This is because the 3641 * inode ow_inode might be an ancestor of some 3642 * other inode that will be orphanized as well 3643 * later and has an inode number greater than 3644 * sctx->send_progress. We need to prevent 3645 * future name lookups from using the old name 3646 * and get instead the orphan name. 3647 */ 3648 nce = name_cache_search(sctx, ow_inode, ow_gen); 3649 if (nce) { 3650 name_cache_delete(sctx, nce); 3651 kfree(nce); 3652 } 3653 } else { 3654 ret = send_unlink(sctx, cur->full_path); 3655 if (ret < 0) 3656 goto out; 3657 } 3658 } 3659 3660 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { 3661 ret = wait_for_dest_dir_move(sctx, cur, is_orphan); 3662 if (ret < 0) 3663 goto out; 3664 if (ret == 1) { 3665 can_rename = false; 3666 *pending_move = 1; 3667 } 3668 } 3669 3670 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root && 3671 can_rename) { 3672 ret = wait_for_parent_move(sctx, cur, is_orphan); 3673 if (ret < 0) 3674 goto out; 3675 if (ret == 1) { 3676 can_rename = false; 3677 *pending_move = 1; 3678 } 3679 } 3680 3681 /* 3682 * link/move the ref to the new place. If we have an orphan 3683 * inode, move it and update valid_path. If not, link or move 3684 * it depending on the inode mode. 3685 */ 3686 if (is_orphan && can_rename) { 3687 ret = send_rename(sctx, valid_path, cur->full_path); 3688 if (ret < 0) 3689 goto out; 3690 is_orphan = 0; 3691 ret = fs_path_copy(valid_path, cur->full_path); 3692 if (ret < 0) 3693 goto out; 3694 } else if (can_rename) { 3695 if (S_ISDIR(sctx->cur_inode_mode)) { 3696 /* 3697 * Dirs can't be linked, so move it. For moved 3698 * dirs, we always have one new and one deleted 3699 * ref. The deleted ref is ignored later. 3700 */ 3701 ret = send_rename(sctx, valid_path, 3702 cur->full_path); 3703 if (!ret) 3704 ret = fs_path_copy(valid_path, 3705 cur->full_path); 3706 if (ret < 0) 3707 goto out; 3708 } else { 3709 ret = send_link(sctx, cur->full_path, 3710 valid_path); 3711 if (ret < 0) 3712 goto out; 3713 } 3714 } 3715 ret = dup_ref(cur, &check_dirs); 3716 if (ret < 0) 3717 goto out; 3718 } 3719 3720 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) { 3721 /* 3722 * Check if we can already rmdir the directory. If not, 3723 * orphanize it. For every dir item inside that gets deleted 3724 * later, we do this check again and rmdir it then if possible. 3725 * See the use of check_dirs for more details. 3726 */ 3727 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3728 sctx->cur_ino); 3729 if (ret < 0) 3730 goto out; 3731 if (ret) { 3732 ret = send_rmdir(sctx, valid_path); 3733 if (ret < 0) 3734 goto out; 3735 } else if (!is_orphan) { 3736 ret = orphanize_inode(sctx, sctx->cur_ino, 3737 sctx->cur_inode_gen, valid_path); 3738 if (ret < 0) 3739 goto out; 3740 is_orphan = 1; 3741 } 3742 3743 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3744 ret = dup_ref(cur, &check_dirs); 3745 if (ret < 0) 3746 goto out; 3747 } 3748 } else if (S_ISDIR(sctx->cur_inode_mode) && 3749 !list_empty(&sctx->deleted_refs)) { 3750 /* 3751 * We have a moved dir. Add the old parent to check_dirs 3752 */ 3753 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, 3754 list); 3755 ret = dup_ref(cur, &check_dirs); 3756 if (ret < 0) 3757 goto out; 3758 } else if (!S_ISDIR(sctx->cur_inode_mode)) { 3759 /* 3760 * We have a non dir inode. Go through all deleted refs and 3761 * unlink them if they were not already overwritten by other 3762 * inodes. 3763 */ 3764 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3765 ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3766 sctx->cur_ino, sctx->cur_inode_gen, 3767 cur->name, cur->name_len); 3768 if (ret < 0) 3769 goto out; 3770 if (!ret) { 3771 ret = send_unlink(sctx, cur->full_path); 3772 if (ret < 0) 3773 goto out; 3774 } 3775 ret = dup_ref(cur, &check_dirs); 3776 if (ret < 0) 3777 goto out; 3778 } 3779 /* 3780 * If the inode is still orphan, unlink the orphan. This may 3781 * happen when a previous inode did overwrite the first ref 3782 * of this inode and no new refs were added for the current 3783 * inode. Unlinking does not mean that the inode is deleted in 3784 * all cases. There may still be links to this inode in other 3785 * places. 3786 */ 3787 if (is_orphan) { 3788 ret = send_unlink(sctx, valid_path); 3789 if (ret < 0) 3790 goto out; 3791 } 3792 } 3793 3794 /* 3795 * We did collect all parent dirs where cur_inode was once located. We 3796 * now go through all these dirs and check if they are pending for 3797 * deletion and if it's finally possible to perform the rmdir now. 3798 * We also update the inode stats of the parent dirs here. 3799 */ 3800 list_for_each_entry(cur, &check_dirs, list) { 3801 /* 3802 * In case we had refs into dirs that were not processed yet, 3803 * we don't need to do the utime and rmdir logic for these dirs. 3804 * The dir will be processed later. 3805 */ 3806 if (cur->dir > sctx->cur_ino) 3807 continue; 3808 3809 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3810 if (ret < 0) 3811 goto out; 3812 3813 if (ret == inode_state_did_create || 3814 ret == inode_state_no_change) { 3815 /* TODO delayed utimes */ 3816 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3817 if (ret < 0) 3818 goto out; 3819 } else if (ret == inode_state_did_delete && 3820 cur->dir != last_dir_ino_rm) { 3821 ret = can_rmdir(sctx, cur->dir, cur->dir_gen, 3822 sctx->cur_ino); 3823 if (ret < 0) 3824 goto out; 3825 if (ret) { 3826 ret = get_cur_path(sctx, cur->dir, 3827 cur->dir_gen, valid_path); 3828 if (ret < 0) 3829 goto out; 3830 ret = send_rmdir(sctx, valid_path); 3831 if (ret < 0) 3832 goto out; 3833 last_dir_ino_rm = cur->dir; 3834 } 3835 } 3836 } 3837 3838 ret = 0; 3839 3840 out: 3841 __free_recorded_refs(&check_dirs); 3842 free_recorded_refs(sctx); 3843 fs_path_free(valid_path); 3844 return ret; 3845 } 3846 3847 static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, 3848 struct fs_path *name, void *ctx, struct list_head *refs) 3849 { 3850 int ret = 0; 3851 struct send_ctx *sctx = ctx; 3852 struct fs_path *p; 3853 u64 gen; 3854 3855 p = fs_path_alloc(); 3856 if (!p) 3857 return -ENOMEM; 3858 3859 ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, 3860 NULL, NULL); 3861 if (ret < 0) 3862 goto out; 3863 3864 ret = get_cur_path(sctx, dir, gen, p); 3865 if (ret < 0) 3866 goto out; 3867 ret = fs_path_add_path(p, name); 3868 if (ret < 0) 3869 goto out; 3870 3871 ret = __record_ref(refs, dir, gen, p); 3872 3873 out: 3874 if (ret) 3875 fs_path_free(p); 3876 return ret; 3877 } 3878 3879 static int __record_new_ref(int num, u64 dir, int index, 3880 struct fs_path *name, 3881 void *ctx) 3882 { 3883 struct send_ctx *sctx = ctx; 3884 return record_ref(sctx->send_root, num, dir, index, name, 3885 ctx, &sctx->new_refs); 3886 } 3887 3888 3889 static int __record_deleted_ref(int num, u64 dir, int index, 3890 struct fs_path *name, 3891 void *ctx) 3892 { 3893 struct send_ctx *sctx = ctx; 3894 return record_ref(sctx->parent_root, num, dir, index, name, 3895 ctx, &sctx->deleted_refs); 3896 } 3897 3898 static int record_new_ref(struct send_ctx *sctx) 3899 { 3900 int ret; 3901 3902 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 3903 sctx->cmp_key, 0, __record_new_ref, sctx); 3904 if (ret < 0) 3905 goto out; 3906 ret = 0; 3907 3908 out: 3909 return ret; 3910 } 3911 3912 static int record_deleted_ref(struct send_ctx *sctx) 3913 { 3914 int ret; 3915 3916 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 3917 sctx->cmp_key, 0, __record_deleted_ref, sctx); 3918 if (ret < 0) 3919 goto out; 3920 ret = 0; 3921 3922 out: 3923 return ret; 3924 } 3925 3926 struct find_ref_ctx { 3927 u64 dir; 3928 u64 dir_gen; 3929 struct btrfs_root *root; 3930 struct fs_path *name; 3931 int found_idx; 3932 }; 3933 3934 static int __find_iref(int num, u64 dir, int index, 3935 struct fs_path *name, 3936 void *ctx_) 3937 { 3938 struct find_ref_ctx *ctx = ctx_; 3939 u64 dir_gen; 3940 int ret; 3941 3942 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && 3943 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { 3944 /* 3945 * To avoid doing extra lookups we'll only do this if everything 3946 * else matches. 3947 */ 3948 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, 3949 NULL, NULL, NULL); 3950 if (ret) 3951 return ret; 3952 if (dir_gen != ctx->dir_gen) 3953 return 0; 3954 ctx->found_idx = num; 3955 return 1; 3956 } 3957 return 0; 3958 } 3959 3960 static int find_iref(struct btrfs_root *root, 3961 struct btrfs_path *path, 3962 struct btrfs_key *key, 3963 u64 dir, u64 dir_gen, struct fs_path *name) 3964 { 3965 int ret; 3966 struct find_ref_ctx ctx; 3967 3968 ctx.dir = dir; 3969 ctx.name = name; 3970 ctx.dir_gen = dir_gen; 3971 ctx.found_idx = -1; 3972 ctx.root = root; 3973 3974 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); 3975 if (ret < 0) 3976 return ret; 3977 3978 if (ctx.found_idx == -1) 3979 return -ENOENT; 3980 3981 return ctx.found_idx; 3982 } 3983 3984 static int __record_changed_new_ref(int num, u64 dir, int index, 3985 struct fs_path *name, 3986 void *ctx) 3987 { 3988 u64 dir_gen; 3989 int ret; 3990 struct send_ctx *sctx = ctx; 3991 3992 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, 3993 NULL, NULL, NULL); 3994 if (ret) 3995 return ret; 3996 3997 ret = find_iref(sctx->parent_root, sctx->right_path, 3998 sctx->cmp_key, dir, dir_gen, name); 3999 if (ret == -ENOENT) 4000 ret = __record_new_ref(num, dir, index, name, sctx); 4001 else if (ret > 0) 4002 ret = 0; 4003 4004 return ret; 4005 } 4006 4007 static int __record_changed_deleted_ref(int num, u64 dir, int index, 4008 struct fs_path *name, 4009 void *ctx) 4010 { 4011 u64 dir_gen; 4012 int ret; 4013 struct send_ctx *sctx = ctx; 4014 4015 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, 4016 NULL, NULL, NULL); 4017 if (ret) 4018 return ret; 4019 4020 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, 4021 dir, dir_gen, name); 4022 if (ret == -ENOENT) 4023 ret = __record_deleted_ref(num, dir, index, name, sctx); 4024 else if (ret > 0) 4025 ret = 0; 4026 4027 return ret; 4028 } 4029 4030 static int record_changed_ref(struct send_ctx *sctx) 4031 { 4032 int ret = 0; 4033 4034 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 4035 sctx->cmp_key, 0, __record_changed_new_ref, sctx); 4036 if (ret < 0) 4037 goto out; 4038 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 4039 sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); 4040 if (ret < 0) 4041 goto out; 4042 ret = 0; 4043 4044 out: 4045 return ret; 4046 } 4047 4048 /* 4049 * Record and process all refs at once. Needed when an inode changes the 4050 * generation number, which means that it was deleted and recreated. 4051 */ 4052 static int process_all_refs(struct send_ctx *sctx, 4053 enum btrfs_compare_tree_result cmd) 4054 { 4055 int ret; 4056 struct btrfs_root *root; 4057 struct btrfs_path *path; 4058 struct btrfs_key key; 4059 struct btrfs_key found_key; 4060 struct extent_buffer *eb; 4061 int slot; 4062 iterate_inode_ref_t cb; 4063 int pending_move = 0; 4064 4065 path = alloc_path_for_send(); 4066 if (!path) 4067 return -ENOMEM; 4068 4069 if (cmd == BTRFS_COMPARE_TREE_NEW) { 4070 root = sctx->send_root; 4071 cb = __record_new_ref; 4072 } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { 4073 root = sctx->parent_root; 4074 cb = __record_deleted_ref; 4075 } else { 4076 btrfs_err(sctx->send_root->fs_info, 4077 "Wrong command %d in process_all_refs", cmd); 4078 ret = -EINVAL; 4079 goto out; 4080 } 4081 4082 key.objectid = sctx->cmp_key->objectid; 4083 key.type = BTRFS_INODE_REF_KEY; 4084 key.offset = 0; 4085 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4086 if (ret < 0) 4087 goto out; 4088 4089 while (1) { 4090 eb = path->nodes[0]; 4091 slot = path->slots[0]; 4092 if (slot >= btrfs_header_nritems(eb)) { 4093 ret = btrfs_next_leaf(root, path); 4094 if (ret < 0) 4095 goto out; 4096 else if (ret > 0) 4097 break; 4098 continue; 4099 } 4100 4101 btrfs_item_key_to_cpu(eb, &found_key, slot); 4102 4103 if (found_key.objectid != key.objectid || 4104 (found_key.type != BTRFS_INODE_REF_KEY && 4105 found_key.type != BTRFS_INODE_EXTREF_KEY)) 4106 break; 4107 4108 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); 4109 if (ret < 0) 4110 goto out; 4111 4112 path->slots[0]++; 4113 } 4114 btrfs_release_path(path); 4115 4116 ret = process_recorded_refs(sctx, &pending_move); 4117 /* Only applicable to an incremental send. */ 4118 ASSERT(pending_move == 0); 4119 4120 out: 4121 btrfs_free_path(path); 4122 return ret; 4123 } 4124 4125 static int send_set_xattr(struct send_ctx *sctx, 4126 struct fs_path *path, 4127 const char *name, int name_len, 4128 const char *data, int data_len) 4129 { 4130 int ret = 0; 4131 4132 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); 4133 if (ret < 0) 4134 goto out; 4135 4136 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4137 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 4138 TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); 4139 4140 ret = send_cmd(sctx); 4141 4142 tlv_put_failure: 4143 out: 4144 return ret; 4145 } 4146 4147 static int send_remove_xattr(struct send_ctx *sctx, 4148 struct fs_path *path, 4149 const char *name, int name_len) 4150 { 4151 int ret = 0; 4152 4153 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); 4154 if (ret < 0) 4155 goto out; 4156 4157 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4158 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 4159 4160 ret = send_cmd(sctx); 4161 4162 tlv_put_failure: 4163 out: 4164 return ret; 4165 } 4166 4167 static int __process_new_xattr(int num, struct btrfs_key *di_key, 4168 const char *name, int name_len, 4169 const char *data, int data_len, 4170 u8 type, void *ctx) 4171 { 4172 int ret; 4173 struct send_ctx *sctx = ctx; 4174 struct fs_path *p; 4175 posix_acl_xattr_header dummy_acl; 4176 4177 p = fs_path_alloc(); 4178 if (!p) 4179 return -ENOMEM; 4180 4181 /* 4182 * This hack is needed because empty acl's are stored as zero byte 4183 * data in xattrs. Problem with that is, that receiving these zero byte 4184 * acl's will fail later. To fix this, we send a dummy acl list that 4185 * only contains the version number and no entries. 4186 */ 4187 if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || 4188 !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { 4189 if (data_len == 0) { 4190 dummy_acl.a_version = 4191 cpu_to_le32(POSIX_ACL_XATTR_VERSION); 4192 data = (char *)&dummy_acl; 4193 data_len = sizeof(dummy_acl); 4194 } 4195 } 4196 4197 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4198 if (ret < 0) 4199 goto out; 4200 4201 ret = send_set_xattr(sctx, p, name, name_len, data, data_len); 4202 4203 out: 4204 fs_path_free(p); 4205 return ret; 4206 } 4207 4208 static int __process_deleted_xattr(int num, struct btrfs_key *di_key, 4209 const char *name, int name_len, 4210 const char *data, int data_len, 4211 u8 type, void *ctx) 4212 { 4213 int ret; 4214 struct send_ctx *sctx = ctx; 4215 struct fs_path *p; 4216 4217 p = fs_path_alloc(); 4218 if (!p) 4219 return -ENOMEM; 4220 4221 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4222 if (ret < 0) 4223 goto out; 4224 4225 ret = send_remove_xattr(sctx, p, name, name_len); 4226 4227 out: 4228 fs_path_free(p); 4229 return ret; 4230 } 4231 4232 static int process_new_xattr(struct send_ctx *sctx) 4233 { 4234 int ret = 0; 4235 4236 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4237 sctx->cmp_key, __process_new_xattr, sctx); 4238 4239 return ret; 4240 } 4241 4242 static int process_deleted_xattr(struct send_ctx *sctx) 4243 { 4244 int ret; 4245 4246 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4247 sctx->cmp_key, __process_deleted_xattr, sctx); 4248 4249 return ret; 4250 } 4251 4252 struct find_xattr_ctx { 4253 const char *name; 4254 int name_len; 4255 int found_idx; 4256 char *found_data; 4257 int found_data_len; 4258 }; 4259 4260 static int __find_xattr(int num, struct btrfs_key *di_key, 4261 const char *name, int name_len, 4262 const char *data, int data_len, 4263 u8 type, void *vctx) 4264 { 4265 struct find_xattr_ctx *ctx = vctx; 4266 4267 if (name_len == ctx->name_len && 4268 strncmp(name, ctx->name, name_len) == 0) { 4269 ctx->found_idx = num; 4270 ctx->found_data_len = data_len; 4271 ctx->found_data = kmemdup(data, data_len, GFP_NOFS); 4272 if (!ctx->found_data) 4273 return -ENOMEM; 4274 return 1; 4275 } 4276 return 0; 4277 } 4278 4279 static int find_xattr(struct btrfs_root *root, 4280 struct btrfs_path *path, 4281 struct btrfs_key *key, 4282 const char *name, int name_len, 4283 char **data, int *data_len) 4284 { 4285 int ret; 4286 struct find_xattr_ctx ctx; 4287 4288 ctx.name = name; 4289 ctx.name_len = name_len; 4290 ctx.found_idx = -1; 4291 ctx.found_data = NULL; 4292 ctx.found_data_len = 0; 4293 4294 ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); 4295 if (ret < 0) 4296 return ret; 4297 4298 if (ctx.found_idx == -1) 4299 return -ENOENT; 4300 if (data) { 4301 *data = ctx.found_data; 4302 *data_len = ctx.found_data_len; 4303 } else { 4304 kfree(ctx.found_data); 4305 } 4306 return ctx.found_idx; 4307 } 4308 4309 4310 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, 4311 const char *name, int name_len, 4312 const char *data, int data_len, 4313 u8 type, void *ctx) 4314 { 4315 int ret; 4316 struct send_ctx *sctx = ctx; 4317 char *found_data = NULL; 4318 int found_data_len = 0; 4319 4320 ret = find_xattr(sctx->parent_root, sctx->right_path, 4321 sctx->cmp_key, name, name_len, &found_data, 4322 &found_data_len); 4323 if (ret == -ENOENT) { 4324 ret = __process_new_xattr(num, di_key, name, name_len, data, 4325 data_len, type, ctx); 4326 } else if (ret >= 0) { 4327 if (data_len != found_data_len || 4328 memcmp(data, found_data, data_len)) { 4329 ret = __process_new_xattr(num, di_key, name, name_len, 4330 data, data_len, type, ctx); 4331 } else { 4332 ret = 0; 4333 } 4334 } 4335 4336 kfree(found_data); 4337 return ret; 4338 } 4339 4340 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, 4341 const char *name, int name_len, 4342 const char *data, int data_len, 4343 u8 type, void *ctx) 4344 { 4345 int ret; 4346 struct send_ctx *sctx = ctx; 4347 4348 ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key, 4349 name, name_len, NULL, NULL); 4350 if (ret == -ENOENT) 4351 ret = __process_deleted_xattr(num, di_key, name, name_len, data, 4352 data_len, type, ctx); 4353 else if (ret >= 0) 4354 ret = 0; 4355 4356 return ret; 4357 } 4358 4359 static int process_changed_xattr(struct send_ctx *sctx) 4360 { 4361 int ret = 0; 4362 4363 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4364 sctx->cmp_key, __process_changed_new_xattr, sctx); 4365 if (ret < 0) 4366 goto out; 4367 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4368 sctx->cmp_key, __process_changed_deleted_xattr, sctx); 4369 4370 out: 4371 return ret; 4372 } 4373 4374 static int process_all_new_xattrs(struct send_ctx *sctx) 4375 { 4376 int ret; 4377 struct btrfs_root *root; 4378 struct btrfs_path *path; 4379 struct btrfs_key key; 4380 struct btrfs_key found_key; 4381 struct extent_buffer *eb; 4382 int slot; 4383 4384 path = alloc_path_for_send(); 4385 if (!path) 4386 return -ENOMEM; 4387 4388 root = sctx->send_root; 4389 4390 key.objectid = sctx->cmp_key->objectid; 4391 key.type = BTRFS_XATTR_ITEM_KEY; 4392 key.offset = 0; 4393 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4394 if (ret < 0) 4395 goto out; 4396 4397 while (1) { 4398 eb = path->nodes[0]; 4399 slot = path->slots[0]; 4400 if (slot >= btrfs_header_nritems(eb)) { 4401 ret = btrfs_next_leaf(root, path); 4402 if (ret < 0) { 4403 goto out; 4404 } else if (ret > 0) { 4405 ret = 0; 4406 break; 4407 } 4408 continue; 4409 } 4410 4411 btrfs_item_key_to_cpu(eb, &found_key, slot); 4412 if (found_key.objectid != key.objectid || 4413 found_key.type != key.type) { 4414 ret = 0; 4415 goto out; 4416 } 4417 4418 ret = iterate_dir_item(root, path, &found_key, 4419 __process_new_xattr, sctx); 4420 if (ret < 0) 4421 goto out; 4422 4423 path->slots[0]++; 4424 } 4425 4426 out: 4427 btrfs_free_path(path); 4428 return ret; 4429 } 4430 4431 static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) 4432 { 4433 struct btrfs_root *root = sctx->send_root; 4434 struct btrfs_fs_info *fs_info = root->fs_info; 4435 struct inode *inode; 4436 struct page *page; 4437 char *addr; 4438 struct btrfs_key key; 4439 pgoff_t index = offset >> PAGE_CACHE_SHIFT; 4440 pgoff_t last_index; 4441 unsigned pg_offset = offset & ~PAGE_CACHE_MASK; 4442 ssize_t ret = 0; 4443 4444 key.objectid = sctx->cur_ino; 4445 key.type = BTRFS_INODE_ITEM_KEY; 4446 key.offset = 0; 4447 4448 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 4449 if (IS_ERR(inode)) 4450 return PTR_ERR(inode); 4451 4452 if (offset + len > i_size_read(inode)) { 4453 if (offset > i_size_read(inode)) 4454 len = 0; 4455 else 4456 len = offset - i_size_read(inode); 4457 } 4458 if (len == 0) 4459 goto out; 4460 4461 last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; 4462 4463 /* initial readahead */ 4464 memset(&sctx->ra, 0, sizeof(struct file_ra_state)); 4465 file_ra_state_init(&sctx->ra, inode->i_mapping); 4466 btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, 4467 last_index - index + 1); 4468 4469 while (index <= last_index) { 4470 unsigned cur_len = min_t(unsigned, len, 4471 PAGE_CACHE_SIZE - pg_offset); 4472 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 4473 if (!page) { 4474 ret = -ENOMEM; 4475 break; 4476 } 4477 4478 if (!PageUptodate(page)) { 4479 btrfs_readpage(NULL, page); 4480 lock_page(page); 4481 if (!PageUptodate(page)) { 4482 unlock_page(page); 4483 page_cache_release(page); 4484 ret = -EIO; 4485 break; 4486 } 4487 } 4488 4489 addr = kmap(page); 4490 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); 4491 kunmap(page); 4492 unlock_page(page); 4493 page_cache_release(page); 4494 index++; 4495 pg_offset = 0; 4496 len -= cur_len; 4497 ret += cur_len; 4498 } 4499 out: 4500 iput(inode); 4501 return ret; 4502 } 4503 4504 /* 4505 * Read some bytes from the current inode/file and send a write command to 4506 * user space. 4507 */ 4508 static int send_write(struct send_ctx *sctx, u64 offset, u32 len) 4509 { 4510 int ret = 0; 4511 struct fs_path *p; 4512 ssize_t num_read = 0; 4513 4514 p = fs_path_alloc(); 4515 if (!p) 4516 return -ENOMEM; 4517 4518 verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); 4519 4520 num_read = fill_read_buf(sctx, offset, len); 4521 if (num_read <= 0) { 4522 if (num_read < 0) 4523 ret = num_read; 4524 goto out; 4525 } 4526 4527 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4528 if (ret < 0) 4529 goto out; 4530 4531 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4532 if (ret < 0) 4533 goto out; 4534 4535 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4536 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4537 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); 4538 4539 ret = send_cmd(sctx); 4540 4541 tlv_put_failure: 4542 out: 4543 fs_path_free(p); 4544 if (ret < 0) 4545 return ret; 4546 return num_read; 4547 } 4548 4549 /* 4550 * Send a clone command to user space. 4551 */ 4552 static int send_clone(struct send_ctx *sctx, 4553 u64 offset, u32 len, 4554 struct clone_root *clone_root) 4555 { 4556 int ret = 0; 4557 struct fs_path *p; 4558 u64 gen; 4559 4560 verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " 4561 "clone_inode=%llu, clone_offset=%llu\n", offset, len, 4562 clone_root->root->objectid, clone_root->ino, 4563 clone_root->offset); 4564 4565 p = fs_path_alloc(); 4566 if (!p) 4567 return -ENOMEM; 4568 4569 ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); 4570 if (ret < 0) 4571 goto out; 4572 4573 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4574 if (ret < 0) 4575 goto out; 4576 4577 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4578 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); 4579 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4580 4581 if (clone_root->root == sctx->send_root) { 4582 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, 4583 &gen, NULL, NULL, NULL, NULL); 4584 if (ret < 0) 4585 goto out; 4586 ret = get_cur_path(sctx, clone_root->ino, gen, p); 4587 } else { 4588 ret = get_inode_path(clone_root->root, clone_root->ino, p); 4589 } 4590 if (ret < 0) 4591 goto out; 4592 4593 /* 4594 * If the parent we're using has a received_uuid set then use that as 4595 * our clone source as that is what we will look for when doing a 4596 * receive. 4597 * 4598 * This covers the case that we create a snapshot off of a received 4599 * subvolume and then use that as the parent and try to receive on a 4600 * different host. 4601 */ 4602 if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid)) 4603 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4604 clone_root->root->root_item.received_uuid); 4605 else 4606 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4607 clone_root->root->root_item.uuid); 4608 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4609 le64_to_cpu(clone_root->root->root_item.ctransid)); 4610 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4611 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4612 clone_root->offset); 4613 4614 ret = send_cmd(sctx); 4615 4616 tlv_put_failure: 4617 out: 4618 fs_path_free(p); 4619 return ret; 4620 } 4621 4622 /* 4623 * Send an update extent command to user space. 4624 */ 4625 static int send_update_extent(struct send_ctx *sctx, 4626 u64 offset, u32 len) 4627 { 4628 int ret = 0; 4629 struct fs_path *p; 4630 4631 p = fs_path_alloc(); 4632 if (!p) 4633 return -ENOMEM; 4634 4635 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); 4636 if (ret < 0) 4637 goto out; 4638 4639 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4640 if (ret < 0) 4641 goto out; 4642 4643 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4644 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4645 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); 4646 4647 ret = send_cmd(sctx); 4648 4649 tlv_put_failure: 4650 out: 4651 fs_path_free(p); 4652 return ret; 4653 } 4654 4655 static int send_hole(struct send_ctx *sctx, u64 end) 4656 { 4657 struct fs_path *p = NULL; 4658 u64 offset = sctx->cur_inode_last_extent; 4659 u64 len; 4660 int ret = 0; 4661 4662 p = fs_path_alloc(); 4663 if (!p) 4664 return -ENOMEM; 4665 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4666 if (ret < 0) 4667 goto tlv_put_failure; 4668 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); 4669 while (offset < end) { 4670 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); 4671 4672 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4673 if (ret < 0) 4674 break; 4675 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4676 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4677 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); 4678 ret = send_cmd(sctx); 4679 if (ret < 0) 4680 break; 4681 offset += len; 4682 } 4683 tlv_put_failure: 4684 fs_path_free(p); 4685 return ret; 4686 } 4687 4688 static int send_write_or_clone(struct send_ctx *sctx, 4689 struct btrfs_path *path, 4690 struct btrfs_key *key, 4691 struct clone_root *clone_root) 4692 { 4693 int ret = 0; 4694 struct btrfs_file_extent_item *ei; 4695 u64 offset = key->offset; 4696 u64 pos = 0; 4697 u64 len; 4698 u32 l; 4699 u8 type; 4700 u64 bs = sctx->send_root->fs_info->sb->s_blocksize; 4701 4702 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4703 struct btrfs_file_extent_item); 4704 type = btrfs_file_extent_type(path->nodes[0], ei); 4705 if (type == BTRFS_FILE_EXTENT_INLINE) { 4706 len = btrfs_file_extent_inline_len(path->nodes[0], 4707 path->slots[0], ei); 4708 /* 4709 * it is possible the inline item won't cover the whole page, 4710 * but there may be items after this page. Make 4711 * sure to send the whole thing 4712 */ 4713 len = PAGE_CACHE_ALIGN(len); 4714 } else { 4715 len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 4716 } 4717 4718 if (offset + len > sctx->cur_inode_size) 4719 len = sctx->cur_inode_size - offset; 4720 if (len == 0) { 4721 ret = 0; 4722 goto out; 4723 } 4724 4725 if (clone_root && IS_ALIGNED(offset + len, bs)) { 4726 ret = send_clone(sctx, offset, len, clone_root); 4727 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { 4728 ret = send_update_extent(sctx, offset, len); 4729 } else { 4730 while (pos < len) { 4731 l = len - pos; 4732 if (l > BTRFS_SEND_READ_SIZE) 4733 l = BTRFS_SEND_READ_SIZE; 4734 ret = send_write(sctx, pos + offset, l); 4735 if (ret < 0) 4736 goto out; 4737 if (!ret) 4738 break; 4739 pos += ret; 4740 } 4741 ret = 0; 4742 } 4743 out: 4744 return ret; 4745 } 4746 4747 static int is_extent_unchanged(struct send_ctx *sctx, 4748 struct btrfs_path *left_path, 4749 struct btrfs_key *ekey) 4750 { 4751 int ret = 0; 4752 struct btrfs_key key; 4753 struct btrfs_path *path = NULL; 4754 struct extent_buffer *eb; 4755 int slot; 4756 struct btrfs_key found_key; 4757 struct btrfs_file_extent_item *ei; 4758 u64 left_disknr; 4759 u64 right_disknr; 4760 u64 left_offset; 4761 u64 right_offset; 4762 u64 left_offset_fixed; 4763 u64 left_len; 4764 u64 right_len; 4765 u64 left_gen; 4766 u64 right_gen; 4767 u8 left_type; 4768 u8 right_type; 4769 4770 path = alloc_path_for_send(); 4771 if (!path) 4772 return -ENOMEM; 4773 4774 eb = left_path->nodes[0]; 4775 slot = left_path->slots[0]; 4776 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4777 left_type = btrfs_file_extent_type(eb, ei); 4778 4779 if (left_type != BTRFS_FILE_EXTENT_REG) { 4780 ret = 0; 4781 goto out; 4782 } 4783 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4784 left_len = btrfs_file_extent_num_bytes(eb, ei); 4785 left_offset = btrfs_file_extent_offset(eb, ei); 4786 left_gen = btrfs_file_extent_generation(eb, ei); 4787 4788 /* 4789 * Following comments will refer to these graphics. L is the left 4790 * extents which we are checking at the moment. 1-8 are the right 4791 * extents that we iterate. 4792 * 4793 * |-----L-----| 4794 * |-1-|-2a-|-3-|-4-|-5-|-6-| 4795 * 4796 * |-----L-----| 4797 * |--1--|-2b-|...(same as above) 4798 * 4799 * Alternative situation. Happens on files where extents got split. 4800 * |-----L-----| 4801 * |-----------7-----------|-6-| 4802 * 4803 * Alternative situation. Happens on files which got larger. 4804 * |-----L-----| 4805 * |-8-| 4806 * Nothing follows after 8. 4807 */ 4808 4809 key.objectid = ekey->objectid; 4810 key.type = BTRFS_EXTENT_DATA_KEY; 4811 key.offset = ekey->offset; 4812 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); 4813 if (ret < 0) 4814 goto out; 4815 if (ret) { 4816 ret = 0; 4817 goto out; 4818 } 4819 4820 /* 4821 * Handle special case where the right side has no extents at all. 4822 */ 4823 eb = path->nodes[0]; 4824 slot = path->slots[0]; 4825 btrfs_item_key_to_cpu(eb, &found_key, slot); 4826 if (found_key.objectid != key.objectid || 4827 found_key.type != key.type) { 4828 /* If we're a hole then just pretend nothing changed */ 4829 ret = (left_disknr) ? 0 : 1; 4830 goto out; 4831 } 4832 4833 /* 4834 * We're now on 2a, 2b or 7. 4835 */ 4836 key = found_key; 4837 while (key.offset < ekey->offset + left_len) { 4838 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4839 right_type = btrfs_file_extent_type(eb, ei); 4840 if (right_type != BTRFS_FILE_EXTENT_REG) { 4841 ret = 0; 4842 goto out; 4843 } 4844 4845 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4846 right_len = btrfs_file_extent_num_bytes(eb, ei); 4847 right_offset = btrfs_file_extent_offset(eb, ei); 4848 right_gen = btrfs_file_extent_generation(eb, ei); 4849 4850 /* 4851 * Are we at extent 8? If yes, we know the extent is changed. 4852 * This may only happen on the first iteration. 4853 */ 4854 if (found_key.offset + right_len <= ekey->offset) { 4855 /* If we're a hole just pretend nothing changed */ 4856 ret = (left_disknr) ? 0 : 1; 4857 goto out; 4858 } 4859 4860 left_offset_fixed = left_offset; 4861 if (key.offset < ekey->offset) { 4862 /* Fix the right offset for 2a and 7. */ 4863 right_offset += ekey->offset - key.offset; 4864 } else { 4865 /* Fix the left offset for all behind 2a and 2b */ 4866 left_offset_fixed += key.offset - ekey->offset; 4867 } 4868 4869 /* 4870 * Check if we have the same extent. 4871 */ 4872 if (left_disknr != right_disknr || 4873 left_offset_fixed != right_offset || 4874 left_gen != right_gen) { 4875 ret = 0; 4876 goto out; 4877 } 4878 4879 /* 4880 * Go to the next extent. 4881 */ 4882 ret = btrfs_next_item(sctx->parent_root, path); 4883 if (ret < 0) 4884 goto out; 4885 if (!ret) { 4886 eb = path->nodes[0]; 4887 slot = path->slots[0]; 4888 btrfs_item_key_to_cpu(eb, &found_key, slot); 4889 } 4890 if (ret || found_key.objectid != key.objectid || 4891 found_key.type != key.type) { 4892 key.offset += right_len; 4893 break; 4894 } 4895 if (found_key.offset != key.offset + right_len) { 4896 ret = 0; 4897 goto out; 4898 } 4899 key = found_key; 4900 } 4901 4902 /* 4903 * We're now behind the left extent (treat as unchanged) or at the end 4904 * of the right side (treat as changed). 4905 */ 4906 if (key.offset >= ekey->offset + left_len) 4907 ret = 1; 4908 else 4909 ret = 0; 4910 4911 4912 out: 4913 btrfs_free_path(path); 4914 return ret; 4915 } 4916 4917 static int get_last_extent(struct send_ctx *sctx, u64 offset) 4918 { 4919 struct btrfs_path *path; 4920 struct btrfs_root *root = sctx->send_root; 4921 struct btrfs_file_extent_item *fi; 4922 struct btrfs_key key; 4923 u64 extent_end; 4924 u8 type; 4925 int ret; 4926 4927 path = alloc_path_for_send(); 4928 if (!path) 4929 return -ENOMEM; 4930 4931 sctx->cur_inode_last_extent = 0; 4932 4933 key.objectid = sctx->cur_ino; 4934 key.type = BTRFS_EXTENT_DATA_KEY; 4935 key.offset = offset; 4936 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1); 4937 if (ret < 0) 4938 goto out; 4939 ret = 0; 4940 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 4941 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) 4942 goto out; 4943 4944 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 4945 struct btrfs_file_extent_item); 4946 type = btrfs_file_extent_type(path->nodes[0], fi); 4947 if (type == BTRFS_FILE_EXTENT_INLINE) { 4948 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 4949 path->slots[0], fi); 4950 extent_end = ALIGN(key.offset + size, 4951 sctx->send_root->sectorsize); 4952 } else { 4953 extent_end = key.offset + 4954 btrfs_file_extent_num_bytes(path->nodes[0], fi); 4955 } 4956 sctx->cur_inode_last_extent = extent_end; 4957 out: 4958 btrfs_free_path(path); 4959 return ret; 4960 } 4961 4962 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, 4963 struct btrfs_key *key) 4964 { 4965 struct btrfs_file_extent_item *fi; 4966 u64 extent_end; 4967 u8 type; 4968 int ret = 0; 4969 4970 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) 4971 return 0; 4972 4973 if (sctx->cur_inode_last_extent == (u64)-1) { 4974 ret = get_last_extent(sctx, key->offset - 1); 4975 if (ret) 4976 return ret; 4977 } 4978 4979 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 4980 struct btrfs_file_extent_item); 4981 type = btrfs_file_extent_type(path->nodes[0], fi); 4982 if (type == BTRFS_FILE_EXTENT_INLINE) { 4983 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 4984 path->slots[0], fi); 4985 extent_end = ALIGN(key->offset + size, 4986 sctx->send_root->sectorsize); 4987 } else { 4988 extent_end = key->offset + 4989 btrfs_file_extent_num_bytes(path->nodes[0], fi); 4990 } 4991 4992 if (path->slots[0] == 0 && 4993 sctx->cur_inode_last_extent < key->offset) { 4994 /* 4995 * We might have skipped entire leafs that contained only 4996 * file extent items for our current inode. These leafs have 4997 * a generation number smaller (older) than the one in the 4998 * current leaf and the leaf our last extent came from, and 4999 * are located between these 2 leafs. 5000 */ 5001 ret = get_last_extent(sctx, key->offset - 1); 5002 if (ret) 5003 return ret; 5004 } 5005 5006 if (sctx->cur_inode_last_extent < key->offset) 5007 ret = send_hole(sctx, key->offset); 5008 sctx->cur_inode_last_extent = extent_end; 5009 return ret; 5010 } 5011 5012 static int process_extent(struct send_ctx *sctx, 5013 struct btrfs_path *path, 5014 struct btrfs_key *key) 5015 { 5016 struct clone_root *found_clone = NULL; 5017 int ret = 0; 5018 5019 if (S_ISLNK(sctx->cur_inode_mode)) 5020 return 0; 5021 5022 if (sctx->parent_root && !sctx->cur_inode_new) { 5023 ret = is_extent_unchanged(sctx, path, key); 5024 if (ret < 0) 5025 goto out; 5026 if (ret) { 5027 ret = 0; 5028 goto out_hole; 5029 } 5030 } else { 5031 struct btrfs_file_extent_item *ei; 5032 u8 type; 5033 5034 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 5035 struct btrfs_file_extent_item); 5036 type = btrfs_file_extent_type(path->nodes[0], ei); 5037 if (type == BTRFS_FILE_EXTENT_PREALLOC || 5038 type == BTRFS_FILE_EXTENT_REG) { 5039 /* 5040 * The send spec does not have a prealloc command yet, 5041 * so just leave a hole for prealloc'ed extents until 5042 * we have enough commands queued up to justify rev'ing 5043 * the send spec. 5044 */ 5045 if (type == BTRFS_FILE_EXTENT_PREALLOC) { 5046 ret = 0; 5047 goto out; 5048 } 5049 5050 /* Have a hole, just skip it. */ 5051 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { 5052 ret = 0; 5053 goto out; 5054 } 5055 } 5056 } 5057 5058 ret = find_extent_clone(sctx, path, key->objectid, key->offset, 5059 sctx->cur_inode_size, &found_clone); 5060 if (ret != -ENOENT && ret < 0) 5061 goto out; 5062 5063 ret = send_write_or_clone(sctx, path, key, found_clone); 5064 if (ret) 5065 goto out; 5066 out_hole: 5067 ret = maybe_send_hole(sctx, path, key); 5068 out: 5069 return ret; 5070 } 5071 5072 static int process_all_extents(struct send_ctx *sctx) 5073 { 5074 int ret; 5075 struct btrfs_root *root; 5076 struct btrfs_path *path; 5077 struct btrfs_key key; 5078 struct btrfs_key found_key; 5079 struct extent_buffer *eb; 5080 int slot; 5081 5082 root = sctx->send_root; 5083 path = alloc_path_for_send(); 5084 if (!path) 5085 return -ENOMEM; 5086 5087 key.objectid = sctx->cmp_key->objectid; 5088 key.type = BTRFS_EXTENT_DATA_KEY; 5089 key.offset = 0; 5090 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5091 if (ret < 0) 5092 goto out; 5093 5094 while (1) { 5095 eb = path->nodes[0]; 5096 slot = path->slots[0]; 5097 5098 if (slot >= btrfs_header_nritems(eb)) { 5099 ret = btrfs_next_leaf(root, path); 5100 if (ret < 0) { 5101 goto out; 5102 } else if (ret > 0) { 5103 ret = 0; 5104 break; 5105 } 5106 continue; 5107 } 5108 5109 btrfs_item_key_to_cpu(eb, &found_key, slot); 5110 5111 if (found_key.objectid != key.objectid || 5112 found_key.type != key.type) { 5113 ret = 0; 5114 goto out; 5115 } 5116 5117 ret = process_extent(sctx, path, &found_key); 5118 if (ret < 0) 5119 goto out; 5120 5121 path->slots[0]++; 5122 } 5123 5124 out: 5125 btrfs_free_path(path); 5126 return ret; 5127 } 5128 5129 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end, 5130 int *pending_move, 5131 int *refs_processed) 5132 { 5133 int ret = 0; 5134 5135 if (sctx->cur_ino == 0) 5136 goto out; 5137 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && 5138 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY) 5139 goto out; 5140 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 5141 goto out; 5142 5143 ret = process_recorded_refs(sctx, pending_move); 5144 if (ret < 0) 5145 goto out; 5146 5147 *refs_processed = 1; 5148 out: 5149 return ret; 5150 } 5151 5152 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) 5153 { 5154 int ret = 0; 5155 u64 left_mode; 5156 u64 left_uid; 5157 u64 left_gid; 5158 u64 right_mode; 5159 u64 right_uid; 5160 u64 right_gid; 5161 int need_chmod = 0; 5162 int need_chown = 0; 5163 int pending_move = 0; 5164 int refs_processed = 0; 5165 5166 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 5167 &refs_processed); 5168 if (ret < 0) 5169 goto out; 5170 5171 /* 5172 * We have processed the refs and thus need to advance send_progress. 5173 * Now, calls to get_cur_xxx will take the updated refs of the current 5174 * inode into account. 5175 * 5176 * On the other hand, if our current inode is a directory and couldn't 5177 * be moved/renamed because its parent was renamed/moved too and it has 5178 * a higher inode number, we can only move/rename our current inode 5179 * after we moved/renamed its parent. Therefore in this case operate on 5180 * the old path (pre move/rename) of our current inode, and the 5181 * move/rename will be performed later. 5182 */ 5183 if (refs_processed && !pending_move) 5184 sctx->send_progress = sctx->cur_ino + 1; 5185 5186 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 5187 goto out; 5188 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 5189 goto out; 5190 5191 ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, 5192 &left_mode, &left_uid, &left_gid, NULL); 5193 if (ret < 0) 5194 goto out; 5195 5196 if (!sctx->parent_root || sctx->cur_inode_new) { 5197 need_chown = 1; 5198 if (!S_ISLNK(sctx->cur_inode_mode)) 5199 need_chmod = 1; 5200 } else { 5201 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, 5202 NULL, NULL, &right_mode, &right_uid, 5203 &right_gid, NULL); 5204 if (ret < 0) 5205 goto out; 5206 5207 if (left_uid != right_uid || left_gid != right_gid) 5208 need_chown = 1; 5209 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode) 5210 need_chmod = 1; 5211 } 5212 5213 if (S_ISREG(sctx->cur_inode_mode)) { 5214 if (need_send_hole(sctx)) { 5215 if (sctx->cur_inode_last_extent == (u64)-1 || 5216 sctx->cur_inode_last_extent < 5217 sctx->cur_inode_size) { 5218 ret = get_last_extent(sctx, (u64)-1); 5219 if (ret) 5220 goto out; 5221 } 5222 if (sctx->cur_inode_last_extent < 5223 sctx->cur_inode_size) { 5224 ret = send_hole(sctx, sctx->cur_inode_size); 5225 if (ret) 5226 goto out; 5227 } 5228 } 5229 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5230 sctx->cur_inode_size); 5231 if (ret < 0) 5232 goto out; 5233 } 5234 5235 if (need_chown) { 5236 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5237 left_uid, left_gid); 5238 if (ret < 0) 5239 goto out; 5240 } 5241 if (need_chmod) { 5242 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5243 left_mode); 5244 if (ret < 0) 5245 goto out; 5246 } 5247 5248 /* 5249 * If other directory inodes depended on our current directory 5250 * inode's move/rename, now do their move/rename operations. 5251 */ 5252 if (!is_waiting_for_move(sctx, sctx->cur_ino)) { 5253 ret = apply_children_dir_moves(sctx); 5254 if (ret) 5255 goto out; 5256 /* 5257 * Need to send that every time, no matter if it actually 5258 * changed between the two trees as we have done changes to 5259 * the inode before. If our inode is a directory and it's 5260 * waiting to be moved/renamed, we will send its utimes when 5261 * it's moved/renamed, therefore we don't need to do it here. 5262 */ 5263 sctx->send_progress = sctx->cur_ino + 1; 5264 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 5265 if (ret < 0) 5266 goto out; 5267 } 5268 5269 out: 5270 return ret; 5271 } 5272 5273 static int changed_inode(struct send_ctx *sctx, 5274 enum btrfs_compare_tree_result result) 5275 { 5276 int ret = 0; 5277 struct btrfs_key *key = sctx->cmp_key; 5278 struct btrfs_inode_item *left_ii = NULL; 5279 struct btrfs_inode_item *right_ii = NULL; 5280 u64 left_gen = 0; 5281 u64 right_gen = 0; 5282 5283 sctx->cur_ino = key->objectid; 5284 sctx->cur_inode_new_gen = 0; 5285 sctx->cur_inode_last_extent = (u64)-1; 5286 5287 /* 5288 * Set send_progress to current inode. This will tell all get_cur_xxx 5289 * functions that the current inode's refs are not updated yet. Later, 5290 * when process_recorded_refs is finished, it is set to cur_ino + 1. 5291 */ 5292 sctx->send_progress = sctx->cur_ino; 5293 5294 if (result == BTRFS_COMPARE_TREE_NEW || 5295 result == BTRFS_COMPARE_TREE_CHANGED) { 5296 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], 5297 sctx->left_path->slots[0], 5298 struct btrfs_inode_item); 5299 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], 5300 left_ii); 5301 } else { 5302 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5303 sctx->right_path->slots[0], 5304 struct btrfs_inode_item); 5305 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5306 right_ii); 5307 } 5308 if (result == BTRFS_COMPARE_TREE_CHANGED) { 5309 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5310 sctx->right_path->slots[0], 5311 struct btrfs_inode_item); 5312 5313 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5314 right_ii); 5315 5316 /* 5317 * The cur_ino = root dir case is special here. We can't treat 5318 * the inode as deleted+reused because it would generate a 5319 * stream that tries to delete/mkdir the root dir. 5320 */ 5321 if (left_gen != right_gen && 5322 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5323 sctx->cur_inode_new_gen = 1; 5324 } 5325 5326 if (result == BTRFS_COMPARE_TREE_NEW) { 5327 sctx->cur_inode_gen = left_gen; 5328 sctx->cur_inode_new = 1; 5329 sctx->cur_inode_deleted = 0; 5330 sctx->cur_inode_size = btrfs_inode_size( 5331 sctx->left_path->nodes[0], left_ii); 5332 sctx->cur_inode_mode = btrfs_inode_mode( 5333 sctx->left_path->nodes[0], left_ii); 5334 sctx->cur_inode_rdev = btrfs_inode_rdev( 5335 sctx->left_path->nodes[0], left_ii); 5336 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5337 ret = send_create_inode_if_needed(sctx); 5338 } else if (result == BTRFS_COMPARE_TREE_DELETED) { 5339 sctx->cur_inode_gen = right_gen; 5340 sctx->cur_inode_new = 0; 5341 sctx->cur_inode_deleted = 1; 5342 sctx->cur_inode_size = btrfs_inode_size( 5343 sctx->right_path->nodes[0], right_ii); 5344 sctx->cur_inode_mode = btrfs_inode_mode( 5345 sctx->right_path->nodes[0], right_ii); 5346 } else if (result == BTRFS_COMPARE_TREE_CHANGED) { 5347 /* 5348 * We need to do some special handling in case the inode was 5349 * reported as changed with a changed generation number. This 5350 * means that the original inode was deleted and new inode 5351 * reused the same inum. So we have to treat the old inode as 5352 * deleted and the new one as new. 5353 */ 5354 if (sctx->cur_inode_new_gen) { 5355 /* 5356 * First, process the inode as if it was deleted. 5357 */ 5358 sctx->cur_inode_gen = right_gen; 5359 sctx->cur_inode_new = 0; 5360 sctx->cur_inode_deleted = 1; 5361 sctx->cur_inode_size = btrfs_inode_size( 5362 sctx->right_path->nodes[0], right_ii); 5363 sctx->cur_inode_mode = btrfs_inode_mode( 5364 sctx->right_path->nodes[0], right_ii); 5365 ret = process_all_refs(sctx, 5366 BTRFS_COMPARE_TREE_DELETED); 5367 if (ret < 0) 5368 goto out; 5369 5370 /* 5371 * Now process the inode as if it was new. 5372 */ 5373 sctx->cur_inode_gen = left_gen; 5374 sctx->cur_inode_new = 1; 5375 sctx->cur_inode_deleted = 0; 5376 sctx->cur_inode_size = btrfs_inode_size( 5377 sctx->left_path->nodes[0], left_ii); 5378 sctx->cur_inode_mode = btrfs_inode_mode( 5379 sctx->left_path->nodes[0], left_ii); 5380 sctx->cur_inode_rdev = btrfs_inode_rdev( 5381 sctx->left_path->nodes[0], left_ii); 5382 ret = send_create_inode_if_needed(sctx); 5383 if (ret < 0) 5384 goto out; 5385 5386 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); 5387 if (ret < 0) 5388 goto out; 5389 /* 5390 * Advance send_progress now as we did not get into 5391 * process_recorded_refs_if_needed in the new_gen case. 5392 */ 5393 sctx->send_progress = sctx->cur_ino + 1; 5394 5395 /* 5396 * Now process all extents and xattrs of the inode as if 5397 * they were all new. 5398 */ 5399 ret = process_all_extents(sctx); 5400 if (ret < 0) 5401 goto out; 5402 ret = process_all_new_xattrs(sctx); 5403 if (ret < 0) 5404 goto out; 5405 } else { 5406 sctx->cur_inode_gen = left_gen; 5407 sctx->cur_inode_new = 0; 5408 sctx->cur_inode_new_gen = 0; 5409 sctx->cur_inode_deleted = 0; 5410 sctx->cur_inode_size = btrfs_inode_size( 5411 sctx->left_path->nodes[0], left_ii); 5412 sctx->cur_inode_mode = btrfs_inode_mode( 5413 sctx->left_path->nodes[0], left_ii); 5414 } 5415 } 5416 5417 out: 5418 return ret; 5419 } 5420 5421 /* 5422 * We have to process new refs before deleted refs, but compare_trees gives us 5423 * the new and deleted refs mixed. To fix this, we record the new/deleted refs 5424 * first and later process them in process_recorded_refs. 5425 * For the cur_inode_new_gen case, we skip recording completely because 5426 * changed_inode did already initiate processing of refs. The reason for this is 5427 * that in this case, compare_tree actually compares the refs of 2 different 5428 * inodes. To fix this, process_all_refs is used in changed_inode to handle all 5429 * refs of the right tree as deleted and all refs of the left tree as new. 5430 */ 5431 static int changed_ref(struct send_ctx *sctx, 5432 enum btrfs_compare_tree_result result) 5433 { 5434 int ret = 0; 5435 5436 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5437 5438 if (!sctx->cur_inode_new_gen && 5439 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { 5440 if (result == BTRFS_COMPARE_TREE_NEW) 5441 ret = record_new_ref(sctx); 5442 else if (result == BTRFS_COMPARE_TREE_DELETED) 5443 ret = record_deleted_ref(sctx); 5444 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5445 ret = record_changed_ref(sctx); 5446 } 5447 5448 return ret; 5449 } 5450 5451 /* 5452 * Process new/deleted/changed xattrs. We skip processing in the 5453 * cur_inode_new_gen case because changed_inode did already initiate processing 5454 * of xattrs. The reason is the same as in changed_ref 5455 */ 5456 static int changed_xattr(struct send_ctx *sctx, 5457 enum btrfs_compare_tree_result result) 5458 { 5459 int ret = 0; 5460 5461 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5462 5463 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5464 if (result == BTRFS_COMPARE_TREE_NEW) 5465 ret = process_new_xattr(sctx); 5466 else if (result == BTRFS_COMPARE_TREE_DELETED) 5467 ret = process_deleted_xattr(sctx); 5468 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5469 ret = process_changed_xattr(sctx); 5470 } 5471 5472 return ret; 5473 } 5474 5475 /* 5476 * Process new/deleted/changed extents. We skip processing in the 5477 * cur_inode_new_gen case because changed_inode did already initiate processing 5478 * of extents. The reason is the same as in changed_ref 5479 */ 5480 static int changed_extent(struct send_ctx *sctx, 5481 enum btrfs_compare_tree_result result) 5482 { 5483 int ret = 0; 5484 5485 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5486 5487 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5488 if (result != BTRFS_COMPARE_TREE_DELETED) 5489 ret = process_extent(sctx, sctx->left_path, 5490 sctx->cmp_key); 5491 } 5492 5493 return ret; 5494 } 5495 5496 static int dir_changed(struct send_ctx *sctx, u64 dir) 5497 { 5498 u64 orig_gen, new_gen; 5499 int ret; 5500 5501 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, 5502 NULL, NULL); 5503 if (ret) 5504 return ret; 5505 5506 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, 5507 NULL, NULL, NULL); 5508 if (ret) 5509 return ret; 5510 5511 return (orig_gen != new_gen) ? 1 : 0; 5512 } 5513 5514 static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, 5515 struct btrfs_key *key) 5516 { 5517 struct btrfs_inode_extref *extref; 5518 struct extent_buffer *leaf; 5519 u64 dirid = 0, last_dirid = 0; 5520 unsigned long ptr; 5521 u32 item_size; 5522 u32 cur_offset = 0; 5523 int ref_name_len; 5524 int ret = 0; 5525 5526 /* Easy case, just check this one dirid */ 5527 if (key->type == BTRFS_INODE_REF_KEY) { 5528 dirid = key->offset; 5529 5530 ret = dir_changed(sctx, dirid); 5531 goto out; 5532 } 5533 5534 leaf = path->nodes[0]; 5535 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 5536 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 5537 while (cur_offset < item_size) { 5538 extref = (struct btrfs_inode_extref *)(ptr + 5539 cur_offset); 5540 dirid = btrfs_inode_extref_parent(leaf, extref); 5541 ref_name_len = btrfs_inode_extref_name_len(leaf, extref); 5542 cur_offset += ref_name_len + sizeof(*extref); 5543 if (dirid == last_dirid) 5544 continue; 5545 ret = dir_changed(sctx, dirid); 5546 if (ret) 5547 break; 5548 last_dirid = dirid; 5549 } 5550 out: 5551 return ret; 5552 } 5553 5554 /* 5555 * Updates compare related fields in sctx and simply forwards to the actual 5556 * changed_xxx functions. 5557 */ 5558 static int changed_cb(struct btrfs_root *left_root, 5559 struct btrfs_root *right_root, 5560 struct btrfs_path *left_path, 5561 struct btrfs_path *right_path, 5562 struct btrfs_key *key, 5563 enum btrfs_compare_tree_result result, 5564 void *ctx) 5565 { 5566 int ret = 0; 5567 struct send_ctx *sctx = ctx; 5568 5569 if (result == BTRFS_COMPARE_TREE_SAME) { 5570 if (key->type == BTRFS_INODE_REF_KEY || 5571 key->type == BTRFS_INODE_EXTREF_KEY) { 5572 ret = compare_refs(sctx, left_path, key); 5573 if (!ret) 5574 return 0; 5575 if (ret < 0) 5576 return ret; 5577 } else if (key->type == BTRFS_EXTENT_DATA_KEY) { 5578 return maybe_send_hole(sctx, left_path, key); 5579 } else { 5580 return 0; 5581 } 5582 result = BTRFS_COMPARE_TREE_CHANGED; 5583 ret = 0; 5584 } 5585 5586 sctx->left_path = left_path; 5587 sctx->right_path = right_path; 5588 sctx->cmp_key = key; 5589 5590 ret = finish_inode_if_needed(sctx, 0); 5591 if (ret < 0) 5592 goto out; 5593 5594 /* Ignore non-FS objects */ 5595 if (key->objectid == BTRFS_FREE_INO_OBJECTID || 5596 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 5597 goto out; 5598 5599 if (key->type == BTRFS_INODE_ITEM_KEY) 5600 ret = changed_inode(sctx, result); 5601 else if (key->type == BTRFS_INODE_REF_KEY || 5602 key->type == BTRFS_INODE_EXTREF_KEY) 5603 ret = changed_ref(sctx, result); 5604 else if (key->type == BTRFS_XATTR_ITEM_KEY) 5605 ret = changed_xattr(sctx, result); 5606 else if (key->type == BTRFS_EXTENT_DATA_KEY) 5607 ret = changed_extent(sctx, result); 5608 5609 out: 5610 return ret; 5611 } 5612 5613 static int full_send_tree(struct send_ctx *sctx) 5614 { 5615 int ret; 5616 struct btrfs_root *send_root = sctx->send_root; 5617 struct btrfs_key key; 5618 struct btrfs_key found_key; 5619 struct btrfs_path *path; 5620 struct extent_buffer *eb; 5621 int slot; 5622 5623 path = alloc_path_for_send(); 5624 if (!path) 5625 return -ENOMEM; 5626 5627 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 5628 key.type = BTRFS_INODE_ITEM_KEY; 5629 key.offset = 0; 5630 5631 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); 5632 if (ret < 0) 5633 goto out; 5634 if (ret) 5635 goto out_finish; 5636 5637 while (1) { 5638 eb = path->nodes[0]; 5639 slot = path->slots[0]; 5640 btrfs_item_key_to_cpu(eb, &found_key, slot); 5641 5642 ret = changed_cb(send_root, NULL, path, NULL, 5643 &found_key, BTRFS_COMPARE_TREE_NEW, sctx); 5644 if (ret < 0) 5645 goto out; 5646 5647 key.objectid = found_key.objectid; 5648 key.type = found_key.type; 5649 key.offset = found_key.offset + 1; 5650 5651 ret = btrfs_next_item(send_root, path); 5652 if (ret < 0) 5653 goto out; 5654 if (ret) { 5655 ret = 0; 5656 break; 5657 } 5658 } 5659 5660 out_finish: 5661 ret = finish_inode_if_needed(sctx, 1); 5662 5663 out: 5664 btrfs_free_path(path); 5665 return ret; 5666 } 5667 5668 static int send_subvol(struct send_ctx *sctx) 5669 { 5670 int ret; 5671 5672 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) { 5673 ret = send_header(sctx); 5674 if (ret < 0) 5675 goto out; 5676 } 5677 5678 ret = send_subvol_begin(sctx); 5679 if (ret < 0) 5680 goto out; 5681 5682 if (sctx->parent_root) { 5683 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, 5684 changed_cb, sctx); 5685 if (ret < 0) 5686 goto out; 5687 ret = finish_inode_if_needed(sctx, 1); 5688 if (ret < 0) 5689 goto out; 5690 } else { 5691 ret = full_send_tree(sctx); 5692 if (ret < 0) 5693 goto out; 5694 } 5695 5696 out: 5697 free_recorded_refs(sctx); 5698 return ret; 5699 } 5700 5701 /* 5702 * If orphan cleanup did remove any orphans from a root, it means the tree 5703 * was modified and therefore the commit root is not the same as the current 5704 * root anymore. This is a problem, because send uses the commit root and 5705 * therefore can see inode items that don't exist in the current root anymore, 5706 * and for example make calls to btrfs_iget, which will do tree lookups based 5707 * on the current root and not on the commit root. Those lookups will fail, 5708 * returning a -ESTALE error, and making send fail with that error. So make 5709 * sure a send does not see any orphans we have just removed, and that it will 5710 * see the same inodes regardless of whether a transaction commit happened 5711 * before it started (meaning that the commit root will be the same as the 5712 * current root) or not. 5713 */ 5714 static int ensure_commit_roots_uptodate(struct send_ctx *sctx) 5715 { 5716 int i; 5717 struct btrfs_trans_handle *trans = NULL; 5718 5719 again: 5720 if (sctx->parent_root && 5721 sctx->parent_root->node != sctx->parent_root->commit_root) 5722 goto commit_trans; 5723 5724 for (i = 0; i < sctx->clone_roots_cnt; i++) 5725 if (sctx->clone_roots[i].root->node != 5726 sctx->clone_roots[i].root->commit_root) 5727 goto commit_trans; 5728 5729 if (trans) 5730 return btrfs_end_transaction(trans, sctx->send_root); 5731 5732 return 0; 5733 5734 commit_trans: 5735 /* Use any root, all fs roots will get their commit roots updated. */ 5736 if (!trans) { 5737 trans = btrfs_join_transaction(sctx->send_root); 5738 if (IS_ERR(trans)) 5739 return PTR_ERR(trans); 5740 goto again; 5741 } 5742 5743 return btrfs_commit_transaction(trans, sctx->send_root); 5744 } 5745 5746 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) 5747 { 5748 spin_lock(&root->root_item_lock); 5749 root->send_in_progress--; 5750 /* 5751 * Not much left to do, we don't know why it's unbalanced and 5752 * can't blindly reset it to 0. 5753 */ 5754 if (root->send_in_progress < 0) 5755 btrfs_err(root->fs_info, 5756 "send_in_progres unbalanced %d root %llu", 5757 root->send_in_progress, root->root_key.objectid); 5758 spin_unlock(&root->root_item_lock); 5759 } 5760 5761 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 5762 { 5763 int ret = 0; 5764 struct btrfs_root *send_root; 5765 struct btrfs_root *clone_root; 5766 struct btrfs_fs_info *fs_info; 5767 struct btrfs_ioctl_send_args *arg = NULL; 5768 struct btrfs_key key; 5769 struct send_ctx *sctx = NULL; 5770 u32 i; 5771 u64 *clone_sources_tmp = NULL; 5772 int clone_sources_to_rollback = 0; 5773 int sort_clone_roots = 0; 5774 int index; 5775 5776 if (!capable(CAP_SYS_ADMIN)) 5777 return -EPERM; 5778 5779 send_root = BTRFS_I(file_inode(mnt_file))->root; 5780 fs_info = send_root->fs_info; 5781 5782 /* 5783 * The subvolume must remain read-only during send, protect against 5784 * making it RW. This also protects against deletion. 5785 */ 5786 spin_lock(&send_root->root_item_lock); 5787 send_root->send_in_progress++; 5788 spin_unlock(&send_root->root_item_lock); 5789 5790 /* 5791 * This is done when we lookup the root, it should already be complete 5792 * by the time we get here. 5793 */ 5794 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 5795 5796 /* 5797 * Userspace tools do the checks and warn the user if it's 5798 * not RO. 5799 */ 5800 if (!btrfs_root_readonly(send_root)) { 5801 ret = -EPERM; 5802 goto out; 5803 } 5804 5805 arg = memdup_user(arg_, sizeof(*arg)); 5806 if (IS_ERR(arg)) { 5807 ret = PTR_ERR(arg); 5808 arg = NULL; 5809 goto out; 5810 } 5811 5812 if (!access_ok(VERIFY_READ, arg->clone_sources, 5813 sizeof(*arg->clone_sources) * 5814 arg->clone_sources_count)) { 5815 ret = -EFAULT; 5816 goto out; 5817 } 5818 5819 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { 5820 ret = -EINVAL; 5821 goto out; 5822 } 5823 5824 sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); 5825 if (!sctx) { 5826 ret = -ENOMEM; 5827 goto out; 5828 } 5829 5830 INIT_LIST_HEAD(&sctx->new_refs); 5831 INIT_LIST_HEAD(&sctx->deleted_refs); 5832 INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); 5833 INIT_LIST_HEAD(&sctx->name_cache_list); 5834 5835 sctx->flags = arg->flags; 5836 5837 sctx->send_filp = fget(arg->send_fd); 5838 if (!sctx->send_filp) { 5839 ret = -EBADF; 5840 goto out; 5841 } 5842 5843 sctx->send_root = send_root; 5844 /* 5845 * Unlikely but possible, if the subvolume is marked for deletion but 5846 * is slow to remove the directory entry, send can still be started 5847 */ 5848 if (btrfs_root_dead(sctx->send_root)) { 5849 ret = -EPERM; 5850 goto out; 5851 } 5852 5853 sctx->clone_roots_cnt = arg->clone_sources_count; 5854 5855 sctx->send_max_size = BTRFS_SEND_BUF_SIZE; 5856 sctx->send_buf = vmalloc(sctx->send_max_size); 5857 if (!sctx->send_buf) { 5858 ret = -ENOMEM; 5859 goto out; 5860 } 5861 5862 sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); 5863 if (!sctx->read_buf) { 5864 ret = -ENOMEM; 5865 goto out; 5866 } 5867 5868 sctx->pending_dir_moves = RB_ROOT; 5869 sctx->waiting_dir_moves = RB_ROOT; 5870 sctx->orphan_dirs = RB_ROOT; 5871 5872 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 5873 (arg->clone_sources_count + 1)); 5874 if (!sctx->clone_roots) { 5875 ret = -ENOMEM; 5876 goto out; 5877 } 5878 5879 if (arg->clone_sources_count) { 5880 clone_sources_tmp = vmalloc(arg->clone_sources_count * 5881 sizeof(*arg->clone_sources)); 5882 if (!clone_sources_tmp) { 5883 ret = -ENOMEM; 5884 goto out; 5885 } 5886 5887 ret = copy_from_user(clone_sources_tmp, arg->clone_sources, 5888 arg->clone_sources_count * 5889 sizeof(*arg->clone_sources)); 5890 if (ret) { 5891 ret = -EFAULT; 5892 goto out; 5893 } 5894 5895 for (i = 0; i < arg->clone_sources_count; i++) { 5896 key.objectid = clone_sources_tmp[i]; 5897 key.type = BTRFS_ROOT_ITEM_KEY; 5898 key.offset = (u64)-1; 5899 5900 index = srcu_read_lock(&fs_info->subvol_srcu); 5901 5902 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 5903 if (IS_ERR(clone_root)) { 5904 srcu_read_unlock(&fs_info->subvol_srcu, index); 5905 ret = PTR_ERR(clone_root); 5906 goto out; 5907 } 5908 spin_lock(&clone_root->root_item_lock); 5909 if (!btrfs_root_readonly(clone_root) || 5910 btrfs_root_dead(clone_root)) { 5911 spin_unlock(&clone_root->root_item_lock); 5912 srcu_read_unlock(&fs_info->subvol_srcu, index); 5913 ret = -EPERM; 5914 goto out; 5915 } 5916 clone_root->send_in_progress++; 5917 spin_unlock(&clone_root->root_item_lock); 5918 srcu_read_unlock(&fs_info->subvol_srcu, index); 5919 5920 sctx->clone_roots[i].root = clone_root; 5921 clone_sources_to_rollback = i + 1; 5922 } 5923 vfree(clone_sources_tmp); 5924 clone_sources_tmp = NULL; 5925 } 5926 5927 if (arg->parent_root) { 5928 key.objectid = arg->parent_root; 5929 key.type = BTRFS_ROOT_ITEM_KEY; 5930 key.offset = (u64)-1; 5931 5932 index = srcu_read_lock(&fs_info->subvol_srcu); 5933 5934 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 5935 if (IS_ERR(sctx->parent_root)) { 5936 srcu_read_unlock(&fs_info->subvol_srcu, index); 5937 ret = PTR_ERR(sctx->parent_root); 5938 goto out; 5939 } 5940 5941 spin_lock(&sctx->parent_root->root_item_lock); 5942 sctx->parent_root->send_in_progress++; 5943 if (!btrfs_root_readonly(sctx->parent_root) || 5944 btrfs_root_dead(sctx->parent_root)) { 5945 spin_unlock(&sctx->parent_root->root_item_lock); 5946 srcu_read_unlock(&fs_info->subvol_srcu, index); 5947 ret = -EPERM; 5948 goto out; 5949 } 5950 spin_unlock(&sctx->parent_root->root_item_lock); 5951 5952 srcu_read_unlock(&fs_info->subvol_srcu, index); 5953 } 5954 5955 /* 5956 * Clones from send_root are allowed, but only if the clone source 5957 * is behind the current send position. This is checked while searching 5958 * for possible clone sources. 5959 */ 5960 sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; 5961 5962 /* We do a bsearch later */ 5963 sort(sctx->clone_roots, sctx->clone_roots_cnt, 5964 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 5965 NULL); 5966 sort_clone_roots = 1; 5967 5968 ret = ensure_commit_roots_uptodate(sctx); 5969 if (ret) 5970 goto out; 5971 5972 current->journal_info = BTRFS_SEND_TRANS_STUB; 5973 ret = send_subvol(sctx); 5974 current->journal_info = NULL; 5975 if (ret < 0) 5976 goto out; 5977 5978 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) { 5979 ret = begin_cmd(sctx, BTRFS_SEND_C_END); 5980 if (ret < 0) 5981 goto out; 5982 ret = send_cmd(sctx); 5983 if (ret < 0) 5984 goto out; 5985 } 5986 5987 out: 5988 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)); 5989 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) { 5990 struct rb_node *n; 5991 struct pending_dir_move *pm; 5992 5993 n = rb_first(&sctx->pending_dir_moves); 5994 pm = rb_entry(n, struct pending_dir_move, node); 5995 while (!list_empty(&pm->list)) { 5996 struct pending_dir_move *pm2; 5997 5998 pm2 = list_first_entry(&pm->list, 5999 struct pending_dir_move, list); 6000 free_pending_move(sctx, pm2); 6001 } 6002 free_pending_move(sctx, pm); 6003 } 6004 6005 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)); 6006 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) { 6007 struct rb_node *n; 6008 struct waiting_dir_move *dm; 6009 6010 n = rb_first(&sctx->waiting_dir_moves); 6011 dm = rb_entry(n, struct waiting_dir_move, node); 6012 rb_erase(&dm->node, &sctx->waiting_dir_moves); 6013 kfree(dm); 6014 } 6015 6016 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); 6017 while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { 6018 struct rb_node *n; 6019 struct orphan_dir_info *odi; 6020 6021 n = rb_first(&sctx->orphan_dirs); 6022 odi = rb_entry(n, struct orphan_dir_info, node); 6023 free_orphan_dir_info(sctx, odi); 6024 } 6025 6026 if (sort_clone_roots) { 6027 for (i = 0; i < sctx->clone_roots_cnt; i++) 6028 btrfs_root_dec_send_in_progress( 6029 sctx->clone_roots[i].root); 6030 } else { 6031 for (i = 0; sctx && i < clone_sources_to_rollback; i++) 6032 btrfs_root_dec_send_in_progress( 6033 sctx->clone_roots[i].root); 6034 6035 btrfs_root_dec_send_in_progress(send_root); 6036 } 6037 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) 6038 btrfs_root_dec_send_in_progress(sctx->parent_root); 6039 6040 kfree(arg); 6041 vfree(clone_sources_tmp); 6042 6043 if (sctx) { 6044 if (sctx->send_filp) 6045 fput(sctx->send_filp); 6046 6047 vfree(sctx->clone_roots); 6048 vfree(sctx->send_buf); 6049 vfree(sctx->read_buf); 6050 6051 name_cache_free(sctx); 6052 6053 kfree(sctx); 6054 } 6055 6056 return ret; 6057 } 6058