Lines Matching +full:realm +full:-

1 // SPDX-License-Identifier: GPL-2.0
23 * This provides a perfect instantanous client-wide snapshot. Between
28 * Snapshots are _not_ file system-wide. Instead, each snapshot
31 * of the files contained by each realm share the same set of
32 * snapshots. An individual realm's snap set contains snapshots
33 * explicitly created on that realm, as well as any snaps in its
40 * realm relationship, and for each realm has an explicit list of snaps
44 * with an open cap in the system. (The needed snap realm information is
46 * version number is used to ensure that as realm parameters change (new
47 * snapshot, new parent, etc.) the client's realm hierarchy is updated.
49 * The realm hierarchy drives the generation of a 'snap context' for each
50 * realm, which simply lists the resulting set of snaps for the realm. This
55 * update, but don't have enough memory to update our realm hierarchy,
62 * increase ref count for the realm
67 struct ceph_snap_realm *realm) in ceph_get_snap_realm() argument
69 lockdep_assert_held(&mdsc->snap_rwsem); in ceph_get_snap_realm()
72 * The 0->1 and 1->0 transitions must take the snap_empty_lock in ceph_get_snap_realm()
77 if (atomic_inc_not_zero(&realm->nref)) in ceph_get_snap_realm()
80 spin_lock(&mdsc->snap_empty_lock); in ceph_get_snap_realm()
81 if (atomic_inc_return(&realm->nref) == 1) in ceph_get_snap_realm()
82 list_del_init(&realm->empty_item); in ceph_get_snap_realm()
83 spin_unlock(&mdsc->snap_empty_lock); in ceph_get_snap_realm()
89 struct rb_node **p = &root->rb_node; in __insert_snap_realm()
96 if (new->ino < r->ino) in __insert_snap_realm()
97 p = &(*p)->rb_left; in __insert_snap_realm()
98 else if (new->ino > r->ino) in __insert_snap_realm()
99 p = &(*p)->rb_right; in __insert_snap_realm()
104 rb_link_node(&new->node, parent, p); in __insert_snap_realm()
105 rb_insert_color(&new->node, root); in __insert_snap_realm()
109 * create and get the realm rooted at @ino and bump its ref count.
117 struct ceph_snap_realm *realm; in ceph_create_snap_realm() local
119 lockdep_assert_held_write(&mdsc->snap_rwsem); in ceph_create_snap_realm()
121 realm = kzalloc(sizeof(*realm), GFP_NOFS); in ceph_create_snap_realm()
122 if (!realm) in ceph_create_snap_realm()
123 return ERR_PTR(-ENOMEM); in ceph_create_snap_realm()
127 atomic_set(&realm->nref, 2); in ceph_create_snap_realm()
129 atomic_set(&realm->nref, 1); in ceph_create_snap_realm()
130 realm->ino = ino; in ceph_create_snap_realm()
131 INIT_LIST_HEAD(&realm->children); in ceph_create_snap_realm()
132 INIT_LIST_HEAD(&realm->child_item); in ceph_create_snap_realm()
133 INIT_LIST_HEAD(&realm->empty_item); in ceph_create_snap_realm()
134 INIT_LIST_HEAD(&realm->dirty_item); in ceph_create_snap_realm()
135 INIT_LIST_HEAD(&realm->rebuild_item); in ceph_create_snap_realm()
136 INIT_LIST_HEAD(&realm->inodes_with_caps); in ceph_create_snap_realm()
137 spin_lock_init(&realm->inodes_with_caps_lock); in ceph_create_snap_realm()
138 __insert_snap_realm(&mdsc->snap_realms, realm); in ceph_create_snap_realm()
139 mdsc->num_snap_realms++; in ceph_create_snap_realm()
141 doutc(mdsc->fsc->client, "%llx %p\n", realm->ino, realm); in ceph_create_snap_realm()
142 return realm; in ceph_create_snap_realm()
146 * lookup the realm rooted at @ino.
153 struct ceph_client *cl = mdsc->fsc->client; in __lookup_snap_realm()
154 struct rb_node *n = mdsc->snap_realms.rb_node; in __lookup_snap_realm()
157 lockdep_assert_held(&mdsc->snap_rwsem); in __lookup_snap_realm()
161 if (ino < r->ino) in __lookup_snap_realm()
162 n = n->rb_left; in __lookup_snap_realm()
163 else if (ino > r->ino) in __lookup_snap_realm()
164 n = n->rb_right; in __lookup_snap_realm()
166 doutc(cl, "%llx %p\n", r->ino, r); in __lookup_snap_realm()
184 struct ceph_snap_realm *realm);
190 struct ceph_snap_realm *realm) in __destroy_snap_realm() argument
192 struct ceph_client *cl = mdsc->fsc->client; in __destroy_snap_realm()
193 lockdep_assert_held_write(&mdsc->snap_rwsem); in __destroy_snap_realm()
195 doutc(cl, "%p %llx\n", realm, realm->ino); in __destroy_snap_realm()
197 rb_erase(&realm->node, &mdsc->snap_realms); in __destroy_snap_realm()
198 mdsc->num_snap_realms--; in __destroy_snap_realm()
200 if (realm->parent) { in __destroy_snap_realm()
201 list_del_init(&realm->child_item); in __destroy_snap_realm()
202 __put_snap_realm(mdsc, realm->parent); in __destroy_snap_realm()
205 kfree(realm->prior_parent_snaps); in __destroy_snap_realm()
206 kfree(realm->snaps); in __destroy_snap_realm()
207 ceph_put_snap_context(realm->cached_context); in __destroy_snap_realm()
208 kfree(realm); in __destroy_snap_realm()
215 struct ceph_snap_realm *realm) in __put_snap_realm() argument
217 lockdep_assert_held_write(&mdsc->snap_rwsem); in __put_snap_realm()
223 if (atomic_dec_and_test(&realm->nref)) in __put_snap_realm()
224 __destroy_snap_realm(mdsc, realm); in __put_snap_realm()
231 struct ceph_snap_realm *realm) in ceph_put_snap_realm() argument
233 if (!atomic_dec_and_lock(&realm->nref, &mdsc->snap_empty_lock)) in ceph_put_snap_realm()
236 if (down_write_trylock(&mdsc->snap_rwsem)) { in ceph_put_snap_realm()
237 spin_unlock(&mdsc->snap_empty_lock); in ceph_put_snap_realm()
238 __destroy_snap_realm(mdsc, realm); in ceph_put_snap_realm()
239 up_write(&mdsc->snap_rwsem); in ceph_put_snap_realm()
241 list_add(&realm->empty_item, &mdsc->snap_empty); in ceph_put_snap_realm()
242 spin_unlock(&mdsc->snap_empty_lock); in ceph_put_snap_realm()
255 struct ceph_snap_realm *realm; in __cleanup_empty_realms() local
257 lockdep_assert_held_write(&mdsc->snap_rwsem); in __cleanup_empty_realms()
259 spin_lock(&mdsc->snap_empty_lock); in __cleanup_empty_realms()
260 while (!list_empty(&mdsc->snap_empty)) { in __cleanup_empty_realms()
261 realm = list_first_entry(&mdsc->snap_empty, in __cleanup_empty_realms()
263 list_del(&realm->empty_item); in __cleanup_empty_realms()
264 spin_unlock(&mdsc->snap_empty_lock); in __cleanup_empty_realms()
265 __destroy_snap_realm(mdsc, realm); in __cleanup_empty_realms()
266 spin_lock(&mdsc->snap_empty_lock); in __cleanup_empty_realms()
268 spin_unlock(&mdsc->snap_empty_lock); in __cleanup_empty_realms()
275 down_write(&mdsc->snap_rwsem); in ceph_cleanup_global_and_empty_realms()
280 up_write(&mdsc->snap_rwsem); in ceph_cleanup_global_and_empty_realms()
284 * adjust the parent realm of a given @realm. adjust child list, and parent
292 struct ceph_snap_realm *realm, in adjust_snap_realm_parent() argument
295 struct ceph_client *cl = mdsc->fsc->client; in adjust_snap_realm_parent()
298 lockdep_assert_held_write(&mdsc->snap_rwsem); in adjust_snap_realm_parent()
300 if (realm->parent_ino == parentino) in adjust_snap_realm_parent()
309 doutc(cl, "%llx %p: %llx %p -> %llx %p\n", realm->ino, realm, in adjust_snap_realm_parent()
310 realm->parent_ino, realm->parent, parentino, parent); in adjust_snap_realm_parent()
311 if (realm->parent) { in adjust_snap_realm_parent()
312 list_del_init(&realm->child_item); in adjust_snap_realm_parent()
313 ceph_put_snap_realm(mdsc, realm->parent); in adjust_snap_realm_parent()
315 realm->parent_ino = parentino; in adjust_snap_realm_parent()
316 realm->parent = parent; in adjust_snap_realm_parent()
317 list_add(&realm->child_item, &parent->children); in adjust_snap_realm_parent()
327 return -1; in cmpu64_rev()
333 * build the snap context for a given realm.
336 struct ceph_snap_realm *realm, in build_snap_context() argument
340 struct ceph_client *cl = mdsc->fsc->client; in build_snap_context()
341 struct ceph_snap_realm *parent = realm->parent; in build_snap_context()
344 u32 num = realm->num_prior_parent_snaps + realm->num_snaps; in build_snap_context()
352 if (!parent->cached_context) { in build_snap_context()
354 list_add(&parent->rebuild_item, realm_queue); in build_snap_context()
357 num += parent->cached_context->num_snaps; in build_snap_context()
361 matches realm seq, and my parents' does to. (this works in build_snap_context()
364 if (realm->cached_context && in build_snap_context()
365 realm->cached_context->seq == realm->seq && in build_snap_context()
367 realm->cached_context->seq >= parent->cached_context->seq)) { in build_snap_context()
369 realm->ino, realm, realm->cached_context, in build_snap_context()
370 realm->cached_context->seq, in build_snap_context()
371 (unsigned int)realm->cached_context->num_snaps); in build_snap_context()
376 err = -ENOMEM; in build_snap_context()
377 if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) in build_snap_context()
385 snapc->seq = realm->seq; in build_snap_context()
391 for (i = 0; i < parent->cached_context->num_snaps; i++) in build_snap_context()
392 if (parent->cached_context->snaps[i] >= in build_snap_context()
393 realm->parent_since) in build_snap_context()
394 snapc->snaps[num++] = in build_snap_context()
395 parent->cached_context->snaps[i]; in build_snap_context()
396 if (parent->cached_context->seq > snapc->seq) in build_snap_context()
397 snapc->seq = parent->cached_context->seq; in build_snap_context()
399 memcpy(snapc->snaps + num, realm->snaps, in build_snap_context()
400 sizeof(u64)*realm->num_snaps); in build_snap_context()
401 num += realm->num_snaps; in build_snap_context()
402 memcpy(snapc->snaps + num, realm->prior_parent_snaps, in build_snap_context()
403 sizeof(u64)*realm->num_prior_parent_snaps); in build_snap_context()
404 num += realm->num_prior_parent_snaps; in build_snap_context()
406 sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL); in build_snap_context()
407 snapc->num_snaps = num; in build_snap_context()
408 doutc(cl, "%llx %p: %p seq %lld (%u snaps)\n", realm->ino, realm, in build_snap_context()
409 snapc, snapc->seq, (unsigned int) snapc->num_snaps); in build_snap_context()
411 ceph_put_snap_context(realm->cached_context); in build_snap_context()
412 realm->cached_context = snapc; in build_snap_context()
413 /* queue realm for cap_snap creation */ in build_snap_context()
414 list_add_tail(&realm->dirty_item, dirty_realms); in build_snap_context()
422 if (realm->cached_context) { in build_snap_context()
423 ceph_put_snap_context(realm->cached_context); in build_snap_context()
424 realm->cached_context = NULL; in build_snap_context()
426 pr_err_client(cl, "%llx %p fail %d\n", realm->ino, realm, err); in build_snap_context()
431 * rebuild snap context for the given realm and all of its children.
434 struct ceph_snap_realm *realm, in rebuild_snap_realms() argument
437 struct ceph_client *cl = mdsc->fsc->client; in rebuild_snap_realms()
442 list_add_tail(&realm->rebuild_item, &realm_queue); in rebuild_snap_realms()
457 list_del_init(&_realm->rebuild_item); in rebuild_snap_realms()
463 doutc(cl, "%llx %p, %s\n", realm->ino, realm, in rebuild_snap_realms()
467 list_for_each_entry(child, &_realm->children, child_item) { in rebuild_snap_realms()
468 if (!list_empty(&child->rebuild_item)) { in rebuild_snap_realms()
475 list_for_each_entry(child, &_realm->children, child_item) in rebuild_snap_realms()
476 list_add_tail(&child->rebuild_item, &realm_queue); in rebuild_snap_realms()
481 list_del_init(&_realm->rebuild_item); in rebuild_snap_realms()
498 return -ENOMEM; in dup_array()
510 if (n->num_snaps == 0) in has_new_snaps()
513 return n->snaps[0] > o->seq; in has_new_snaps()
521 * However, if a (sync) write is currently in-progress when we apply
524 * cap_snap->writing = 1, and is said to be "pending." When the write
527 * Caller must hold snap_rwsem for read (i.e., the realm topology won't
533 struct inode *inode = &ci->netfs.inode; in ceph_queue_cap_snap()
540 spin_lock(&ci->i_ceph_lock); in ceph_queue_cap_snap()
544 old_snapc = ci->i_head_snapc; in ceph_queue_cap_snap()
545 new_snapc = ci->i_snap_realm->cached_context; in ceph_queue_cap_snap()
564 if (ci->i_wrbuffer_ref_head == 0 && in ceph_queue_cap_snap()
575 * no new snapshot. But when there is dirty pages or on-going in ceph_queue_cap_snap()
583 capsnap->need_flush = true; in ceph_queue_cap_snap()
586 ci->i_wrbuffer_ref_head == 0) { in ceph_queue_cap_snap()
595 ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush"); in ceph_queue_cap_snap()
598 capsnap->follows = old_snapc->seq; in ceph_queue_cap_snap()
599 capsnap->issued = __ceph_caps_issued(ci, NULL); in ceph_queue_cap_snap()
600 capsnap->dirty = dirty; in ceph_queue_cap_snap()
602 capsnap->mode = inode->i_mode; in ceph_queue_cap_snap()
603 capsnap->uid = inode->i_uid; in ceph_queue_cap_snap()
604 capsnap->gid = inode->i_gid; in ceph_queue_cap_snap()
608 capsnap->xattr_blob = in ceph_queue_cap_snap()
609 ceph_buffer_get(ci->i_xattrs.blob); in ceph_queue_cap_snap()
610 capsnap->xattr_version = ci->i_xattrs.version; in ceph_queue_cap_snap()
612 capsnap->xattr_blob = NULL; in ceph_queue_cap_snap()
613 capsnap->xattr_version = 0; in ceph_queue_cap_snap()
616 capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; in ceph_queue_cap_snap()
621 capsnap->dirty_pages = ci->i_wrbuffer_ref_head; in ceph_queue_cap_snap()
622 ci->i_wrbuffer_ref_head = 0; in ceph_queue_cap_snap()
623 capsnap->context = old_snapc; in ceph_queue_cap_snap()
624 list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); in ceph_queue_cap_snap()
629 old_snapc, old_snapc->seq); in ceph_queue_cap_snap()
630 capsnap->writing = 1; in ceph_queue_cap_snap()
639 if (ci->i_wrbuffer_ref_head == 0 && in ceph_queue_cap_snap()
640 ci->i_wr_ref == 0 && in ceph_queue_cap_snap()
641 ci->i_dirty_caps == 0 && in ceph_queue_cap_snap()
642 ci->i_flushing_caps == 0) { in ceph_queue_cap_snap()
643 ci->i_head_snapc = NULL; in ceph_queue_cap_snap()
645 ci->i_head_snapc = ceph_get_snap_context(new_snapc); in ceph_queue_cap_snap()
648 spin_unlock(&ci->i_ceph_lock); in ceph_queue_cap_snap()
665 struct inode *inode = &ci->netfs.inode; in __ceph_finish_cap_snap()
666 struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); in __ceph_finish_cap_snap()
667 struct ceph_client *cl = mdsc->fsc->client; in __ceph_finish_cap_snap()
669 BUG_ON(capsnap->writing); in __ceph_finish_cap_snap()
670 capsnap->size = i_size_read(inode); in __ceph_finish_cap_snap()
671 capsnap->mtime = inode_get_mtime(inode); in __ceph_finish_cap_snap()
672 capsnap->atime = inode_get_atime(inode); in __ceph_finish_cap_snap()
673 capsnap->ctime = inode_get_ctime(inode); in __ceph_finish_cap_snap()
674 capsnap->btime = ci->i_btime; in __ceph_finish_cap_snap()
675 capsnap->change_attr = inode_peek_iversion_raw(inode); in __ceph_finish_cap_snap()
676 capsnap->time_warp_seq = ci->i_time_warp_seq; in __ceph_finish_cap_snap()
677 capsnap->truncate_size = ci->i_truncate_size; in __ceph_finish_cap_snap()
678 capsnap->truncate_seq = ci->i_truncate_seq; in __ceph_finish_cap_snap()
679 if (capsnap->dirty_pages) { in __ceph_finish_cap_snap()
682 ceph_vinop(inode), capsnap, capsnap->context, in __ceph_finish_cap_snap()
683 capsnap->context->seq, in __ceph_finish_cap_snap()
684 ceph_cap_string(capsnap->dirty), in __ceph_finish_cap_snap()
685 capsnap->size, capsnap->dirty_pages); in __ceph_finish_cap_snap()
693 if (ci->i_wrbuffer_ref) { in __ceph_finish_cap_snap()
696 ceph_vinop(inode), capsnap, capsnap->context, in __ceph_finish_cap_snap()
697 capsnap->context->seq, ceph_cap_string(capsnap->dirty), in __ceph_finish_cap_snap()
698 capsnap->size); in __ceph_finish_cap_snap()
703 ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; in __ceph_finish_cap_snap()
705 inode, ceph_vinop(inode), capsnap, capsnap->context, in __ceph_finish_cap_snap()
706 capsnap->context->seq, ceph_cap_string(capsnap->dirty), in __ceph_finish_cap_snap()
707 capsnap->size); in __ceph_finish_cap_snap()
709 spin_lock(&mdsc->snap_flush_lock); in __ceph_finish_cap_snap()
710 if (list_empty(&ci->i_snap_flush_item)) { in __ceph_finish_cap_snap()
712 list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); in __ceph_finish_cap_snap()
714 spin_unlock(&mdsc->snap_flush_lock); in __ceph_finish_cap_snap()
719 * Queue cap_snaps for snap writeback for this realm and its children.
720 * Called under snap_rwsem, so realm topology won't change.
723 struct ceph_snap_realm *realm) in queue_realm_cap_snaps() argument
725 struct ceph_client *cl = mdsc->fsc->client; in queue_realm_cap_snaps()
730 doutc(cl, "%p %llx inode\n", realm, realm->ino); in queue_realm_cap_snaps()
732 spin_lock(&realm->inodes_with_caps_lock); in queue_realm_cap_snaps()
733 list_for_each_entry(ci, &realm->inodes_with_caps, i_snap_realm_item) { in queue_realm_cap_snaps()
734 struct inode *inode = igrab(&ci->netfs.inode); in queue_realm_cap_snaps()
737 spin_unlock(&realm->inodes_with_caps_lock); in queue_realm_cap_snaps()
755 capsnap->cap_flush.is_capsnap = true; in queue_realm_cap_snaps()
756 refcount_set(&capsnap->nref, 1); in queue_realm_cap_snaps()
757 INIT_LIST_HEAD(&capsnap->cap_flush.i_list); in queue_realm_cap_snaps()
758 INIT_LIST_HEAD(&capsnap->cap_flush.g_list); in queue_realm_cap_snaps()
759 INIT_LIST_HEAD(&capsnap->ci_item); in queue_realm_cap_snaps()
762 spin_lock(&realm->inodes_with_caps_lock); in queue_realm_cap_snaps()
764 spin_unlock(&realm->inodes_with_caps_lock); in queue_realm_cap_snaps()
769 doutc(cl, "%p %llx done\n", realm, realm->ino); in queue_realm_cap_snaps()
774 * the snap realm parameters from a given realm and all of its ancestors,
783 struct ceph_client *cl = mdsc->fsc->client; in ceph_update_snap_trace()
787 struct ceph_snap_realm *realm; in ceph_update_snap_trace() local
790 struct ceph_client *client = mdsc->fsc->client; in ceph_update_snap_trace()
792 int err = -ENOMEM; in ceph_update_snap_trace()
796 lockdep_assert_held_write(&mdsc->snap_rwsem); in ceph_update_snap_trace()
800 realm = NULL; in ceph_update_snap_trace()
805 ceph_decode_need(&p, e, sizeof(u64)*(le32_to_cpu(ri->num_snaps) + in ceph_update_snap_trace()
806 le32_to_cpu(ri->num_prior_parent_snaps)), bad); in ceph_update_snap_trace()
808 p += sizeof(u64) * le32_to_cpu(ri->num_snaps); in ceph_update_snap_trace()
810 p += sizeof(u64) * le32_to_cpu(ri->num_prior_parent_snaps); in ceph_update_snap_trace()
812 realm = ceph_lookup_snap_realm(mdsc, le64_to_cpu(ri->ino)); in ceph_update_snap_trace()
813 if (!realm) { in ceph_update_snap_trace()
814 realm = ceph_create_snap_realm(mdsc, le64_to_cpu(ri->ino)); in ceph_update_snap_trace()
815 if (IS_ERR(realm)) { in ceph_update_snap_trace()
816 err = PTR_ERR(realm); in ceph_update_snap_trace()
822 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); in ceph_update_snap_trace()
827 if (le64_to_cpu(ri->seq) > realm->seq) { in ceph_update_snap_trace()
828 doutc(cl, "updating %llx %p %lld -> %lld\n", realm->ino, in ceph_update_snap_trace()
829 realm, realm->seq, le64_to_cpu(ri->seq)); in ceph_update_snap_trace()
830 /* update realm parameters, snap lists */ in ceph_update_snap_trace()
831 realm->seq = le64_to_cpu(ri->seq); in ceph_update_snap_trace()
832 realm->created = le64_to_cpu(ri->created); in ceph_update_snap_trace()
833 realm->parent_since = le64_to_cpu(ri->parent_since); in ceph_update_snap_trace()
835 realm->num_snaps = le32_to_cpu(ri->num_snaps); in ceph_update_snap_trace()
836 err = dup_array(&realm->snaps, snaps, realm->num_snaps); in ceph_update_snap_trace()
840 realm->num_prior_parent_snaps = in ceph_update_snap_trace()
841 le32_to_cpu(ri->num_prior_parent_snaps); in ceph_update_snap_trace()
842 err = dup_array(&realm->prior_parent_snaps, prior_parent_snaps, in ceph_update_snap_trace()
843 realm->num_prior_parent_snaps); in ceph_update_snap_trace()
847 if (realm->seq > mdsc->last_snap_seq) in ceph_update_snap_trace()
848 mdsc->last_snap_seq = realm->seq; in ceph_update_snap_trace()
851 } else if (!realm->cached_context) { in ceph_update_snap_trace()
852 doutc(cl, "%llx %p seq %lld new\n", realm->ino, realm, in ceph_update_snap_trace()
853 realm->seq); in ceph_update_snap_trace()
856 doutc(cl, "%llx %p seq %lld unchanged\n", realm->ino, realm, in ceph_update_snap_trace()
857 realm->seq); in ceph_update_snap_trace()
860 doutc(cl, "done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, in ceph_update_snap_trace()
861 realm, rebuild_snapcs, p, e); in ceph_update_snap_trace()
864 * this will always track the uppest parent realm from which in ceph_update_snap_trace()
869 realm_to_rebuild = realm; in ceph_update_snap_trace()
876 first_realm = realm; in ceph_update_snap_trace()
878 ceph_put_snap_realm(mdsc, realm); in ceph_update_snap_trace()
888 realm = list_first_entry(&dirty_realms, struct ceph_snap_realm, in ceph_update_snap_trace()
890 list_del_init(&realm->dirty_item); in ceph_update_snap_trace()
891 queue_realm_cap_snaps(mdsc, realm); in ceph_update_snap_trace()
903 err = -EIO; in ceph_update_snap_trace()
905 if (realm && !IS_ERR(realm)) in ceph_update_snap_trace()
906 ceph_put_snap_realm(mdsc, realm); in ceph_update_snap_trace()
920 WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO); in ceph_update_snap_trace()
921 ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr); in ceph_update_snap_trace()
924 ceph_pr_addr(&client->msgr.inst.addr), ret); in ceph_update_snap_trace()
927 client->monc.auth->global_id, __func__, in ceph_update_snap_trace()
928 ret ? "" : ceph_pr_addr(&client->msgr.inst.addr), in ceph_update_snap_trace()
930 err == -EIO ? " after corrupted snaptrace is fixed" : ""); in ceph_update_snap_trace()
944 struct ceph_client *cl = mdsc->fsc->client; in flush_snaps()
950 spin_lock(&mdsc->snap_flush_lock); in flush_snaps()
951 while (!list_empty(&mdsc->snap_flush_list)) { in flush_snaps()
952 ci = list_first_entry(&mdsc->snap_flush_list, in flush_snaps()
954 inode = &ci->netfs.inode; in flush_snaps()
956 spin_unlock(&mdsc->snap_flush_lock); in flush_snaps()
959 spin_lock(&mdsc->snap_flush_lock); in flush_snaps()
961 spin_unlock(&mdsc->snap_flush_lock); in flush_snaps()
968 * ceph_change_snap_realm - change the snap_realm for an inode
969 * @inode: inode to move to new snap realm
970 * @realm: new realm to move inode into (may be NULL)
973 * the new snaprealm (if any). The old snap realm reference held by
974 * the inode is put. If realm is non-NULL, then the caller's reference
977 void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm) in ceph_change_snap_realm() argument
980 struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; in ceph_change_snap_realm()
981 struct ceph_snap_realm *oldrealm = ci->i_snap_realm; in ceph_change_snap_realm()
983 lockdep_assert_held(&ci->i_ceph_lock); in ceph_change_snap_realm()
986 spin_lock(&oldrealm->inodes_with_caps_lock); in ceph_change_snap_realm()
987 list_del_init(&ci->i_snap_realm_item); in ceph_change_snap_realm()
988 if (oldrealm->ino == ci->i_vino.ino) in ceph_change_snap_realm()
989 oldrealm->inode = NULL; in ceph_change_snap_realm()
990 spin_unlock(&oldrealm->inodes_with_caps_lock); in ceph_change_snap_realm()
994 ci->i_snap_realm = realm; in ceph_change_snap_realm()
996 if (realm) { in ceph_change_snap_realm()
997 spin_lock(&realm->inodes_with_caps_lock); in ceph_change_snap_realm()
998 list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); in ceph_change_snap_realm()
999 if (realm->ino == ci->i_vino.ino) in ceph_change_snap_realm()
1000 realm->inode = inode; in ceph_change_snap_realm()
1001 spin_unlock(&realm->inodes_with_caps_lock); in ceph_change_snap_realm()
1009 * or deletion notification on an existing realm. This should update the
1010 * realm and its children.
1012 * The more difficult case is realm creation, due to snap creation at a
1014 * directory into another realm.
1020 struct ceph_client *cl = mdsc->fsc->client; in ceph_handle_snap()
1021 struct super_block *sb = mdsc->fsc->sb; in ceph_handle_snap()
1022 int mds = session->s_mds; in ceph_handle_snap()
1026 struct ceph_snap_realm *realm = NULL; in ceph_handle_snap() local
1027 void *p = msg->front.iov_base; in ceph_handle_snap()
1028 void *e = p + msg->front.iov_len; in ceph_handle_snap()
1040 if (msg->front.iov_len < sizeof(*h)) in ceph_handle_snap()
1043 op = le32_to_cpu(h->op); in ceph_handle_snap()
1044 split = le64_to_cpu(h->split); /* non-zero if we are splitting an in ceph_handle_snap()
1045 * existing realm */ in ceph_handle_snap()
1046 num_split_inos = le32_to_cpu(h->num_split_inos); in ceph_handle_snap()
1047 num_split_realms = le32_to_cpu(h->num_split_realms); in ceph_handle_snap()
1048 trace_len = le32_to_cpu(h->trace_len); in ceph_handle_snap()
1054 down_write(&mdsc->snap_rwsem); in ceph_handle_snap()
1061 * A "split" breaks part of an existing realm off into in ceph_handle_snap()
1062 * a new realm. The MDS provides a list of inodes in ceph_handle_snap()
1071 /* we will peek at realm info here, but will _not_ in ceph_handle_snap()
1072 * advance p, as the realm update will occur below in in ceph_handle_snap()
1076 realm = ceph_lookup_snap_realm(mdsc, split); in ceph_handle_snap()
1077 if (!realm) { in ceph_handle_snap()
1078 realm = ceph_create_snap_realm(mdsc, split); in ceph_handle_snap()
1079 if (IS_ERR(realm)) in ceph_handle_snap()
1083 doutc(cl, "splitting snap_realm %llx %p\n", realm->ino, realm); in ceph_handle_snap()
1096 spin_lock(&ci->i_ceph_lock); in ceph_handle_snap()
1097 if (!ci->i_snap_realm) in ceph_handle_snap()
1100 * If this inode belongs to a realm that was in ceph_handle_snap()
1101 * created after our new realm, we experienced in ceph_handle_snap()
1106 if (ci->i_snap_realm->created > in ceph_handle_snap()
1107 le64_to_cpu(ri->created)) { in ceph_handle_snap()
1108 doutc(cl, " leaving %p %llx.%llx in newer realm %llx %p\n", in ceph_handle_snap()
1109 inode, ceph_vinop(inode), ci->i_snap_realm->ino, in ceph_handle_snap()
1110 ci->i_snap_realm); in ceph_handle_snap()
1113 doutc(cl, " will move %p %llx.%llx to split realm %llx %p\n", in ceph_handle_snap()
1114 inode, ceph_vinop(inode), realm->ino, realm); in ceph_handle_snap()
1116 ceph_get_snap_realm(mdsc, realm); in ceph_handle_snap()
1117 ceph_change_snap_realm(inode, realm); in ceph_handle_snap()
1118 spin_unlock(&ci->i_ceph_lock); in ceph_handle_snap()
1123 spin_unlock(&ci->i_ceph_lock); in ceph_handle_snap()
1127 /* we may have taken some of the old realm's children. */ in ceph_handle_snap()
1134 adjust_snap_realm_parent(mdsc, child, realm->ino); in ceph_handle_snap()
1138 * In the non-split case both 'num_split_inos' and in ceph_handle_snap()
1139 * 'num_split_realms' should be 0, making this a no-op. in ceph_handle_snap()
1144 * positioned at the start of realm info, as expected by in ceph_handle_snap()
1163 /* we took a reference when we created the realm, above */ in ceph_handle_snap()
1164 ceph_put_snap_realm(mdsc, realm); in ceph_handle_snap()
1168 up_write(&mdsc->snap_rwsem); in ceph_handle_snap()
1179 up_write(&mdsc->snap_rwsem); in ceph_handle_snap()
1191 struct ceph_client *cl = mdsc->fsc->client; in ceph_get_snapid_map()
1197 spin_lock(&mdsc->snapid_map_lock); in ceph_get_snapid_map()
1198 p = &mdsc->snapid_map_tree.rb_node; in ceph_get_snapid_map()
1201 if (snap > exist->snap) { in ceph_get_snapid_map()
1202 p = &(*p)->rb_left; in ceph_get_snapid_map()
1203 } else if (snap < exist->snap) { in ceph_get_snapid_map()
1204 p = &(*p)->rb_right; in ceph_get_snapid_map()
1206 if (atomic_inc_return(&exist->ref) == 1) in ceph_get_snapid_map()
1207 list_del_init(&exist->lru); in ceph_get_snapid_map()
1212 spin_unlock(&mdsc->snapid_map_lock); in ceph_get_snapid_map()
1214 doutc(cl, "found snapid map %llx -> %x\n", exist->snap, in ceph_get_snapid_map()
1215 exist->dev); in ceph_get_snapid_map()
1223 ret = get_anon_bdev(&sm->dev); in ceph_get_snapid_map()
1229 INIT_LIST_HEAD(&sm->lru); in ceph_get_snapid_map()
1230 atomic_set(&sm->ref, 1); in ceph_get_snapid_map()
1231 sm->snap = snap; in ceph_get_snapid_map()
1235 p = &mdsc->snapid_map_tree.rb_node; in ceph_get_snapid_map()
1236 spin_lock(&mdsc->snapid_map_lock); in ceph_get_snapid_map()
1240 if (snap > exist->snap) in ceph_get_snapid_map()
1241 p = &(*p)->rb_left; in ceph_get_snapid_map()
1242 else if (snap < exist->snap) in ceph_get_snapid_map()
1243 p = &(*p)->rb_right; in ceph_get_snapid_map()
1249 if (atomic_inc_return(&exist->ref) == 1) in ceph_get_snapid_map()
1250 list_del_init(&exist->lru); in ceph_get_snapid_map()
1252 rb_link_node(&sm->node, parent, p); in ceph_get_snapid_map()
1253 rb_insert_color(&sm->node, &mdsc->snapid_map_tree); in ceph_get_snapid_map()
1255 spin_unlock(&mdsc->snapid_map_lock); in ceph_get_snapid_map()
1257 free_anon_bdev(sm->dev); in ceph_get_snapid_map()
1259 doutc(cl, "found snapid map %llx -> %x\n", exist->snap, in ceph_get_snapid_map()
1260 exist->dev); in ceph_get_snapid_map()
1264 doutc(cl, "create snapid map %llx -> %x\n", sm->snap, sm->dev); in ceph_get_snapid_map()
1273 if (atomic_dec_and_lock(&sm->ref, &mdsc->snapid_map_lock)) { in ceph_put_snapid_map()
1274 if (!RB_EMPTY_NODE(&sm->node)) { in ceph_put_snapid_map()
1275 sm->last_used = jiffies; in ceph_put_snapid_map()
1276 list_add_tail(&sm->lru, &mdsc->snapid_map_lru); in ceph_put_snapid_map()
1277 spin_unlock(&mdsc->snapid_map_lock); in ceph_put_snapid_map()
1281 spin_unlock(&mdsc->snapid_map_lock); in ceph_put_snapid_map()
1289 struct ceph_client *cl = mdsc->fsc->client; in ceph_trim_snapid_map()
1294 spin_lock(&mdsc->snapid_map_lock); in ceph_trim_snapid_map()
1297 while (!list_empty(&mdsc->snapid_map_lru)) { in ceph_trim_snapid_map()
1298 sm = list_first_entry(&mdsc->snapid_map_lru, in ceph_trim_snapid_map()
1300 if (time_after(sm->last_used + CEPH_SNAPID_MAP_TIMEOUT, now)) in ceph_trim_snapid_map()
1303 rb_erase(&sm->node, &mdsc->snapid_map_tree); in ceph_trim_snapid_map()
1304 list_move(&sm->lru, &to_free); in ceph_trim_snapid_map()
1306 spin_unlock(&mdsc->snapid_map_lock); in ceph_trim_snapid_map()
1310 list_del(&sm->lru); in ceph_trim_snapid_map()
1311 doutc(cl, "trim snapid map %llx -> %x\n", sm->snap, sm->dev); in ceph_trim_snapid_map()
1312 free_anon_bdev(sm->dev); in ceph_trim_snapid_map()
1319 struct ceph_client *cl = mdsc->fsc->client; in ceph_cleanup_snapid_map()
1324 spin_lock(&mdsc->snapid_map_lock); in ceph_cleanup_snapid_map()
1325 while ((p = rb_first(&mdsc->snapid_map_tree))) { in ceph_cleanup_snapid_map()
1327 rb_erase(p, &mdsc->snapid_map_tree); in ceph_cleanup_snapid_map()
1329 list_move(&sm->lru, &to_free); in ceph_cleanup_snapid_map()
1331 spin_unlock(&mdsc->snapid_map_lock); in ceph_cleanup_snapid_map()
1335 list_del(&sm->lru); in ceph_cleanup_snapid_map()
1336 free_anon_bdev(sm->dev); in ceph_cleanup_snapid_map()
1337 if (WARN_ON_ONCE(atomic_read(&sm->ref))) { in ceph_cleanup_snapid_map()
1338 pr_err_client(cl, "snapid map %llx -> %x still in use\n", in ceph_cleanup_snapid_map()
1339 sm->snap, sm->dev); in ceph_cleanup_snapid_map()