xref: /linux/fs/bcachefs/move.c (revision 7a9b709e7cc5ce1ffb84ce07bf6d157e1de758df)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "alloc_background.h"
5 #include "alloc_foreground.h"
6 #include "backpointers.h"
7 #include "bkey_buf.h"
8 #include "btree_gc.h"
9 #include "btree_io.h"
10 #include "btree_update.h"
11 #include "btree_update_interior.h"
12 #include "btree_write_buffer.h"
13 #include "compress.h"
14 #include "disk_groups.h"
15 #include "ec.h"
16 #include "errcode.h"
17 #include "error.h"
18 #include "inode.h"
19 #include "io_read.h"
20 #include "io_write.h"
21 #include "journal_reclaim.h"
22 #include "keylist.h"
23 #include "move.h"
24 #include "rebalance.h"
25 #include "reflink.h"
26 #include "replicas.h"
27 #include "snapshot.h"
28 #include "super-io.h"
29 #include "trace.h"
30 
31 #include <linux/ioprio.h>
32 #include <linux/kthread.h>
33 
34 const char * const bch2_data_ops_strs[] = {
35 #define x(t, n, ...) [n] = #t,
36 	BCH_DATA_OPS()
37 #undef x
38 	NULL
39 };
40 
41 static void trace_io_move2(struct bch_fs *c, struct bkey_s_c k,
42 			       struct bch_io_opts *io_opts,
43 			       struct data_update_opts *data_opts)
44 {
45 	if (trace_io_move_enabled()) {
46 		struct printbuf buf = PRINTBUF;
47 
48 		bch2_bkey_val_to_text(&buf, c, k);
49 		prt_newline(&buf);
50 		bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
51 		trace_io_move(c, buf.buf);
52 		printbuf_exit(&buf);
53 	}
54 }
55 
56 static void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k)
57 {
58 	if (trace_io_move_read_enabled()) {
59 		struct printbuf buf = PRINTBUF;
60 
61 		bch2_bkey_val_to_text(&buf, c, k);
62 		trace_io_move_read(c, buf.buf);
63 		printbuf_exit(&buf);
64 	}
65 }
66 
67 struct moving_io {
68 	struct list_head		read_list;
69 	struct list_head		io_list;
70 	struct move_bucket_in_flight	*b;
71 	struct closure			cl;
72 	bool				read_completed;
73 
74 	unsigned			read_sectors;
75 	unsigned			write_sectors;
76 
77 	struct data_update		write;
78 };
79 
80 static void move_free(struct moving_io *io)
81 {
82 	struct moving_context *ctxt = io->write.ctxt;
83 
84 	if (io->b)
85 		atomic_dec(&io->b->count);
86 
87 	mutex_lock(&ctxt->lock);
88 	list_del(&io->io_list);
89 	wake_up(&ctxt->wait);
90 	mutex_unlock(&ctxt->lock);
91 
92 	if (!io->write.data_opts.scrub) {
93 		bch2_data_update_exit(&io->write);
94 	} else {
95 		bch2_bio_free_pages_pool(io->write.op.c, &io->write.op.wbio.bio);
96 		kfree(io->write.bvecs);
97 	}
98 	kfree(io);
99 }
100 
101 static void move_write_done(struct bch_write_op *op)
102 {
103 	struct moving_io *io = container_of(op, struct moving_io, write.op);
104 	struct bch_fs *c = op->c;
105 	struct moving_context *ctxt = io->write.ctxt;
106 
107 	if (op->error) {
108 		if (trace_io_move_write_fail_enabled()) {
109 			struct printbuf buf = PRINTBUF;
110 
111 			bch2_write_op_to_text(&buf, op);
112 			prt_printf(&buf, "ret\t%s\n", bch2_err_str(op->error));
113 			trace_io_move_write_fail(c, buf.buf);
114 			printbuf_exit(&buf);
115 		}
116 		this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]);
117 
118 		ctxt->write_error = true;
119 	}
120 
121 	atomic_sub(io->write_sectors, &ctxt->write_sectors);
122 	atomic_dec(&ctxt->write_ios);
123 	move_free(io);
124 	closure_put(&ctxt->cl);
125 }
126 
127 static void move_write(struct moving_io *io)
128 {
129 	struct moving_context *ctxt = io->write.ctxt;
130 
131 	if (ctxt->stats) {
132 		if (io->write.rbio.bio.bi_status)
133 			atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
134 				     &ctxt->stats->sectors_error_uncorrected);
135 		else if (io->write.rbio.saw_error)
136 			atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
137 				     &ctxt->stats->sectors_error_corrected);
138 	}
139 
140 	if (unlikely(io->write.rbio.ret ||
141 		     io->write.rbio.bio.bi_status ||
142 		     io->write.data_opts.scrub)) {
143 		move_free(io);
144 		return;
145 	}
146 
147 	if (trace_io_move_write_enabled()) {
148 		struct bch_fs *c = io->write.op.c;
149 		struct printbuf buf = PRINTBUF;
150 
151 		bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
152 		trace_io_move_write(c, buf.buf);
153 		printbuf_exit(&buf);
154 	}
155 
156 	closure_get(&io->write.ctxt->cl);
157 	atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
158 	atomic_inc(&io->write.ctxt->write_ios);
159 
160 	bch2_data_update_read_done(&io->write);
161 }
162 
163 struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
164 {
165 	struct moving_io *io =
166 		list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list);
167 
168 	return io && io->read_completed ? io : NULL;
169 }
170 
171 static void move_read_endio(struct bio *bio)
172 {
173 	struct moving_io *io = container_of(bio, struct moving_io, write.rbio.bio);
174 	struct moving_context *ctxt = io->write.ctxt;
175 
176 	atomic_sub(io->read_sectors, &ctxt->read_sectors);
177 	atomic_dec(&ctxt->read_ios);
178 	io->read_completed = true;
179 
180 	wake_up(&ctxt->wait);
181 	closure_put(&ctxt->cl);
182 }
183 
184 void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt)
185 {
186 	struct moving_io *io;
187 
188 	while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
189 		bch2_trans_unlock_long(ctxt->trans);
190 		list_del(&io->read_list);
191 		move_write(io);
192 	}
193 }
194 
195 void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
196 {
197 	unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
198 
199 	move_ctxt_wait_event(ctxt,
200 		!atomic_read(&ctxt->write_sectors) ||
201 		atomic_read(&ctxt->write_sectors) != sectors_pending);
202 }
203 
204 void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
205 {
206 	move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
207 	bch2_trans_unlock_long(ctxt->trans);
208 	closure_sync(&ctxt->cl);
209 }
210 
211 void bch2_moving_ctxt_exit(struct moving_context *ctxt)
212 {
213 	struct bch_fs *c = ctxt->trans->c;
214 
215 	bch2_moving_ctxt_flush_all(ctxt);
216 
217 	EBUG_ON(atomic_read(&ctxt->write_sectors));
218 	EBUG_ON(atomic_read(&ctxt->write_ios));
219 	EBUG_ON(atomic_read(&ctxt->read_sectors));
220 	EBUG_ON(atomic_read(&ctxt->read_ios));
221 
222 	mutex_lock(&c->moving_context_lock);
223 	list_del(&ctxt->list);
224 	mutex_unlock(&c->moving_context_lock);
225 
226 	/*
227 	 * Generally, releasing a transaction within a transaction restart means
228 	 * an unhandled transaction restart: but this can happen legitimately
229 	 * within the move code, e.g. when bch2_move_ratelimit() tells us to
230 	 * exit before we've retried
231 	 */
232 	bch2_trans_begin(ctxt->trans);
233 	bch2_trans_put(ctxt->trans);
234 	memset(ctxt, 0, sizeof(*ctxt));
235 }
236 
237 void bch2_moving_ctxt_init(struct moving_context *ctxt,
238 			   struct bch_fs *c,
239 			   struct bch_ratelimit *rate,
240 			   struct bch_move_stats *stats,
241 			   struct write_point_specifier wp,
242 			   bool wait_on_copygc)
243 {
244 	memset(ctxt, 0, sizeof(*ctxt));
245 
246 	ctxt->trans	= bch2_trans_get(c);
247 	ctxt->fn	= (void *) _RET_IP_;
248 	ctxt->rate	= rate;
249 	ctxt->stats	= stats;
250 	ctxt->wp	= wp;
251 	ctxt->wait_on_copygc = wait_on_copygc;
252 
253 	closure_init_stack(&ctxt->cl);
254 
255 	mutex_init(&ctxt->lock);
256 	INIT_LIST_HEAD(&ctxt->reads);
257 	INIT_LIST_HEAD(&ctxt->ios);
258 	init_waitqueue_head(&ctxt->wait);
259 
260 	mutex_lock(&c->moving_context_lock);
261 	list_add(&ctxt->list, &c->moving_context_list);
262 	mutex_unlock(&c->moving_context_lock);
263 }
264 
265 void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c)
266 {
267 	trace_move_data(c, stats);
268 }
269 
270 void bch2_move_stats_init(struct bch_move_stats *stats, const char *name)
271 {
272 	memset(stats, 0, sizeof(*stats));
273 	stats->data_type = BCH_DATA_user;
274 	scnprintf(stats->name, sizeof(stats->name), "%s", name);
275 }
276 
277 int bch2_move_extent(struct moving_context *ctxt,
278 		     struct move_bucket_in_flight *bucket_in_flight,
279 		     struct btree_iter *iter,
280 		     struct bkey_s_c k,
281 		     struct bch_io_opts io_opts,
282 		     struct data_update_opts data_opts)
283 {
284 	struct btree_trans *trans = ctxt->trans;
285 	struct bch_fs *c = trans->c;
286 	int ret = -ENOMEM;
287 
288 	trace_io_move2(c, k, &io_opts, &data_opts);
289 	this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
290 
291 	if (ctxt->stats)
292 		ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
293 
294 	bch2_data_update_opts_normalize(k, &data_opts);
295 
296 	if (!data_opts.rewrite_ptrs &&
297 	    !data_opts.extra_replicas &&
298 	    !data_opts.scrub) {
299 		if (data_opts.kill_ptrs)
300 			return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
301 		return 0;
302 	}
303 
304 	/*
305 	 * Before memory allocations & taking nocow locks in
306 	 * bch2_data_update_init():
307 	 */
308 	bch2_trans_unlock(trans);
309 
310 	struct moving_io *io = kzalloc(sizeof(struct moving_io), GFP_KERNEL);
311 	if (!io)
312 		goto err;
313 
314 	INIT_LIST_HEAD(&io->io_list);
315 	io->write.ctxt		= ctxt;
316 	io->read_sectors	= k.k->size;
317 	io->write_sectors	= k.k->size;
318 
319 	if (!data_opts.scrub) {
320 		ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
321 					    &io_opts, data_opts, iter->btree_id, k);
322 		if (ret)
323 			goto err_free;
324 
325 		io->write.op.end_io	= move_write_done;
326 	} else {
327 		bch2_bkey_buf_init(&io->write.k);
328 		bch2_bkey_buf_reassemble(&io->write.k, c, k);
329 
330 		io->write.op.c		= c;
331 		io->write.data_opts	= data_opts;
332 
333 		ret = bch2_data_update_bios_init(&io->write, c, &io_opts);
334 		if (ret)
335 			goto err_free;
336 	}
337 
338 	io->write.rbio.bio.bi_end_io = move_read_endio;
339 	io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
340 
341 	if (ctxt->rate)
342 		bch2_ratelimit_increment(ctxt->rate, k.k->size);
343 
344 	if (ctxt->stats) {
345 		atomic64_inc(&ctxt->stats->keys_moved);
346 		atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
347 	}
348 
349 	if (bucket_in_flight) {
350 		io->b = bucket_in_flight;
351 		atomic_inc(&io->b->count);
352 	}
353 
354 	trace_io_move_read2(c, k);
355 
356 	mutex_lock(&ctxt->lock);
357 	atomic_add(io->read_sectors, &ctxt->read_sectors);
358 	atomic_inc(&ctxt->read_ios);
359 
360 	list_add_tail(&io->read_list, &ctxt->reads);
361 	list_add_tail(&io->io_list, &ctxt->ios);
362 	mutex_unlock(&ctxt->lock);
363 
364 	/*
365 	 * dropped by move_read_endio() - guards against use after free of
366 	 * ctxt when doing wakeup
367 	 */
368 	closure_get(&ctxt->cl);
369 	__bch2_read_extent(trans, &io->write.rbio,
370 			   io->write.rbio.bio.bi_iter,
371 			   bkey_start_pos(k.k),
372 			   iter->btree_id, k, 0,
373 			   NULL,
374 			   BCH_READ_last_fragment,
375 			   data_opts.scrub ?  data_opts.read_dev : -1);
376 	return 0;
377 err_free:
378 	kfree(io);
379 err:
380 	if (bch2_err_matches(ret, BCH_ERR_data_update_done))
381 		return 0;
382 
383 	if (bch2_err_matches(ret, EROFS) ||
384 	    bch2_err_matches(ret, BCH_ERR_transaction_restart))
385 		return ret;
386 
387 	count_event(c, io_move_start_fail);
388 
389 	if (trace_io_move_start_fail_enabled()) {
390 		struct printbuf buf = PRINTBUF;
391 
392 		bch2_bkey_val_to_text(&buf, c, k);
393 		prt_str(&buf, ": ");
394 		prt_str(&buf, bch2_err_str(ret));
395 		trace_io_move_start_fail(c, buf.buf);
396 		printbuf_exit(&buf);
397 	}
398 	return ret;
399 }
400 
401 static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
402 			  struct per_snapshot_io_opts *io_opts,
403 			  struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */
404 			  struct btree_iter *extent_iter,
405 			  struct bkey_s_c extent_k)
406 {
407 	struct bch_fs *c = trans->c;
408 	u32 restart_count = trans->restart_count;
409 	struct bch_io_opts *opts_ret = &io_opts->fs_io_opts;
410 	int ret = 0;
411 
412 	if (extent_k.k->type == KEY_TYPE_reflink_v)
413 		goto out;
414 
415 	if (io_opts->cur_inum != extent_pos.inode) {
416 		io_opts->d.nr = 0;
417 
418 		ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode),
419 					 BTREE_ITER_all_snapshots, k, ({
420 			if (k.k->p.offset != extent_pos.inode)
421 				break;
422 
423 			if (!bkey_is_inode(k.k))
424 				continue;
425 
426 			struct bch_inode_unpacked inode;
427 			_ret3 = bch2_inode_unpack(k, &inode);
428 			if (_ret3)
429 				break;
430 
431 			struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
432 			bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
433 
434 			darray_push(&io_opts->d, e);
435 		}));
436 		io_opts->cur_inum = extent_pos.inode;
437 	}
438 
439 	ret = ret ?: trans_was_restarted(trans, restart_count);
440 	if (ret)
441 		return ERR_PTR(ret);
442 
443 	if (extent_k.k->p.snapshot)
444 		darray_for_each(io_opts->d, i)
445 			if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) {
446 				opts_ret = &i->io_opts;
447 				break;
448 			}
449 out:
450 	ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k);
451 	if (ret)
452 		return ERR_PTR(ret);
453 	return opts_ret;
454 }
455 
456 int bch2_move_get_io_opts_one(struct btree_trans *trans,
457 			      struct bch_io_opts *io_opts,
458 			      struct btree_iter *extent_iter,
459 			      struct bkey_s_c extent_k)
460 {
461 	struct bch_fs *c = trans->c;
462 
463 	*io_opts = bch2_opts_to_inode_opts(c->opts);
464 
465 	/* reflink btree? */
466 	if (!extent_k.k->p.inode)
467 		goto out;
468 
469 	struct btree_iter inode_iter;
470 	struct bkey_s_c inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
471 			       SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
472 			       BTREE_ITER_cached);
473 	int ret = bkey_err(inode_k);
474 	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
475 		return ret;
476 
477 	if (!ret && bkey_is_inode(inode_k.k)) {
478 		struct bch_inode_unpacked inode;
479 		bch2_inode_unpack(inode_k, &inode);
480 		bch2_inode_opts_get(io_opts, c, &inode);
481 	}
482 	bch2_trans_iter_exit(trans, &inode_iter);
483 out:
484 	return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k);
485 }
486 
487 int bch2_move_ratelimit(struct moving_context *ctxt)
488 {
489 	struct bch_fs *c = ctxt->trans->c;
490 	bool is_kthread = current->flags & PF_KTHREAD;
491 	u64 delay;
492 
493 	if (ctxt->wait_on_copygc && c->copygc_running) {
494 		bch2_moving_ctxt_flush_all(ctxt);
495 		wait_event_killable(c->copygc_running_wq,
496 				    !c->copygc_running ||
497 				    (is_kthread && kthread_should_stop()));
498 	}
499 
500 	do {
501 		delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
502 
503 		if (is_kthread && kthread_should_stop())
504 			return 1;
505 
506 		if (delay)
507 			move_ctxt_wait_event_timeout(ctxt,
508 					freezing(current) ||
509 					(is_kthread && kthread_should_stop()),
510 					delay);
511 
512 		if (unlikely(freezing(current))) {
513 			bch2_moving_ctxt_flush_all(ctxt);
514 			try_to_freeze();
515 		}
516 	} while (delay);
517 
518 	/*
519 	 * XXX: these limits really ought to be per device, SSDs and hard drives
520 	 * will want different limits
521 	 */
522 	move_ctxt_wait_event(ctxt,
523 		atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 &&
524 		atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 &&
525 		atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight &&
526 		atomic_read(&ctxt->read_ios) < c->opts.move_ios_in_flight);
527 
528 	return 0;
529 }
530 
531 /*
532  * Move requires non extents iterators, and there's also no need for it to
533  * signal indirect_extent_missing_error:
534  */
535 static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans,
536 					    struct btree_iter *iter,
537 					    struct bkey_s_c_reflink_p p)
538 {
539 	if (unlikely(REFLINK_P_ERROR(p.v)))
540 		return bkey_s_c_null;
541 
542 	struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v));
543 
544 	bch2_trans_iter_init(trans, iter,
545 			     BTREE_ID_reflink, reflink_pos,
546 			     BTREE_ITER_not_extents);
547 
548 	struct bkey_s_c k = bch2_btree_iter_peek(trans, iter);
549 	if (!k.k || bkey_err(k)) {
550 		bch2_trans_iter_exit(trans, iter);
551 		return k;
552 	}
553 
554 	if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) {
555 		bch2_trans_iter_exit(trans, iter);
556 		return bkey_s_c_null;
557 	}
558 
559 	return k;
560 }
561 
562 static int bch2_move_data_btree(struct moving_context *ctxt,
563 				struct bpos start,
564 				struct bpos end,
565 				move_pred_fn pred, void *arg,
566 				enum btree_id btree_id)
567 {
568 	struct btree_trans *trans = ctxt->trans;
569 	struct bch_fs *c = trans->c;
570 	struct per_snapshot_io_opts snapshot_io_opts;
571 	struct bch_io_opts *io_opts;
572 	struct bkey_buf sk;
573 	struct btree_iter iter, reflink_iter = {};
574 	struct bkey_s_c k;
575 	struct data_update_opts data_opts;
576 	/*
577 	 * If we're moving a single file, also process reflinked data it points
578 	 * to (this includes propagating changed io_opts from the inode to the
579 	 * extent):
580 	 */
581 	bool walk_indirect = start.inode == end.inode;
582 	int ret = 0, ret2;
583 
584 	per_snapshot_io_opts_init(&snapshot_io_opts, c);
585 	bch2_bkey_buf_init(&sk);
586 
587 	if (ctxt->stats) {
588 		ctxt->stats->data_type	= BCH_DATA_user;
589 		ctxt->stats->pos	= BBPOS(btree_id, start);
590 	}
591 
592 	bch2_trans_begin(trans);
593 	bch2_trans_iter_init(trans, &iter, btree_id, start,
594 			     BTREE_ITER_prefetch|
595 			     BTREE_ITER_not_extents|
596 			     BTREE_ITER_all_snapshots);
597 
598 	if (ctxt->rate)
599 		bch2_ratelimit_reset(ctxt->rate);
600 
601 	while (!bch2_move_ratelimit(ctxt)) {
602 		struct btree_iter *extent_iter = &iter;
603 
604 		bch2_trans_begin(trans);
605 
606 		k = bch2_btree_iter_peek(trans, &iter);
607 		if (!k.k)
608 			break;
609 
610 		ret = bkey_err(k);
611 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
612 			continue;
613 		if (ret)
614 			break;
615 
616 		if (bkey_ge(bkey_start_pos(k.k), end))
617 			break;
618 
619 		if (ctxt->stats)
620 			ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
621 
622 		if (walk_indirect &&
623 		    k.k->type == KEY_TYPE_reflink_p &&
624 		    REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) {
625 			struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
626 
627 			bch2_trans_iter_exit(trans, &reflink_iter);
628 			k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p);
629 			ret = bkey_err(k);
630 			if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
631 				continue;
632 			if (ret)
633 				break;
634 
635 			if (!k.k)
636 				goto next_nondata;
637 
638 			/*
639 			 * XXX: reflink pointers may point to multiple indirect
640 			 * extents, so don't advance past the entire reflink
641 			 * pointer - need to fixup iter->k
642 			 */
643 			extent_iter = &reflink_iter;
644 		}
645 
646 		if (!bkey_extent_is_direct_data(k.k))
647 			goto next_nondata;
648 
649 		io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts,
650 						iter.pos, extent_iter, k);
651 		ret = PTR_ERR_OR_ZERO(io_opts);
652 		if (ret)
653 			continue;
654 
655 		memset(&data_opts, 0, sizeof(data_opts));
656 		if (!pred(c, arg, k, io_opts, &data_opts))
657 			goto next;
658 
659 		/*
660 		 * The iterator gets unlocked by __bch2_read_extent - need to
661 		 * save a copy of @k elsewhere:
662 		 */
663 		bch2_bkey_buf_reassemble(&sk, c, k);
664 		k = bkey_i_to_s_c(sk.k);
665 
666 		ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts);
667 		if (ret2) {
668 			if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
669 				continue;
670 
671 			if (bch2_err_matches(ret2, ENOMEM)) {
672 				/* memory allocation failure, wait for some IO to finish */
673 				bch2_move_ctxt_wait_for_io(ctxt);
674 				continue;
675 			}
676 
677 			/* XXX signal failure */
678 			goto next;
679 		}
680 next:
681 		if (ctxt->stats)
682 			atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
683 next_nondata:
684 		bch2_btree_iter_advance(trans, &iter);
685 	}
686 
687 	bch2_trans_iter_exit(trans, &reflink_iter);
688 	bch2_trans_iter_exit(trans, &iter);
689 	bch2_bkey_buf_exit(&sk, c);
690 	per_snapshot_io_opts_exit(&snapshot_io_opts);
691 
692 	return ret;
693 }
694 
695 int __bch2_move_data(struct moving_context *ctxt,
696 		     struct bbpos start,
697 		     struct bbpos end,
698 		     move_pred_fn pred, void *arg)
699 {
700 	struct bch_fs *c = ctxt->trans->c;
701 	enum btree_id id;
702 	int ret = 0;
703 
704 	for (id = start.btree;
705 	     id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
706 	     id++) {
707 		ctxt->stats->pos = BBPOS(id, POS_MIN);
708 
709 		if (!btree_type_has_ptrs(id) ||
710 		    !bch2_btree_id_root(c, id)->b)
711 			continue;
712 
713 		ret = bch2_move_data_btree(ctxt,
714 				       id == start.btree ? start.pos : POS_MIN,
715 				       id == end.btree   ? end.pos   : POS_MAX,
716 				       pred, arg, id);
717 		if (ret)
718 			break;
719 	}
720 
721 	return ret;
722 }
723 
724 int bch2_move_data(struct bch_fs *c,
725 		   struct bbpos start,
726 		   struct bbpos end,
727 		   struct bch_ratelimit *rate,
728 		   struct bch_move_stats *stats,
729 		   struct write_point_specifier wp,
730 		   bool wait_on_copygc,
731 		   move_pred_fn pred, void *arg)
732 {
733 	struct moving_context ctxt;
734 
735 	bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
736 	int ret = __bch2_move_data(&ctxt, start, end, pred, arg);
737 	bch2_moving_ctxt_exit(&ctxt);
738 
739 	return ret;
740 }
741 
742 static int __bch2_move_data_phys(struct moving_context *ctxt,
743 			struct move_bucket_in_flight *bucket_in_flight,
744 			unsigned dev,
745 			u64 bucket_start,
746 			u64 bucket_end,
747 			unsigned data_types,
748 			move_pred_fn pred, void *arg)
749 {
750 	struct btree_trans *trans = ctxt->trans;
751 	struct bch_fs *c = trans->c;
752 	bool is_kthread = current->flags & PF_KTHREAD;
753 	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
754 	struct btree_iter iter = {}, bp_iter = {};
755 	struct bkey_buf sk;
756 	struct bkey_s_c k;
757 	struct bkey_buf last_flushed;
758 	int ret = 0;
759 
760 	struct bch_dev *ca = bch2_dev_tryget(c, dev);
761 	if (!ca)
762 		return 0;
763 
764 	bucket_end = min(bucket_end, ca->mi.nbuckets);
765 
766 	struct bpos bp_start	= bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
767 	struct bpos bp_end	= bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
768 	bch2_dev_put(ca);
769 	ca = NULL;
770 
771 	bch2_bkey_buf_init(&last_flushed);
772 	bkey_init(&last_flushed.k->k);
773 	bch2_bkey_buf_init(&sk);
774 
775 	/*
776 	 * We're not run in a context that handles transaction restarts:
777 	 */
778 	bch2_trans_begin(trans);
779 
780 	bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0);
781 
782 	bch_err_msg(c, ret, "looking up alloc key");
783 	if (ret)
784 		goto err;
785 
786 	ret = bch2_btree_write_buffer_tryflush(trans);
787 	bch_err_msg(c, ret, "flushing btree write buffer");
788 	if (ret)
789 		goto err;
790 
791 	while (!(ret = bch2_move_ratelimit(ctxt))) {
792 		if (is_kthread && kthread_should_stop())
793 			break;
794 
795 		bch2_trans_begin(trans);
796 
797 		k = bch2_btree_iter_peek(trans, &bp_iter);
798 		ret = bkey_err(k);
799 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
800 			continue;
801 		if (ret)
802 			goto err;
803 
804 		if (!k.k || bkey_gt(k.k->p, bp_end))
805 			break;
806 
807 		if (k.k->type != KEY_TYPE_backpointer)
808 			goto next;
809 
810 		struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
811 
812 		if (ctxt->stats)
813 			ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
814 
815 		if (!(data_types & BIT(bp.v->data_type)))
816 			goto next;
817 
818 		if (!bp.v->level && bp.v->btree_id == BTREE_ID_stripes)
819 			goto next;
820 
821 		k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed);
822 		ret = bkey_err(k);
823 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
824 			continue;
825 		if (ret)
826 			goto err;
827 		if (!k.k)
828 			goto next;
829 
830 		if (!bp.v->level) {
831 			ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k);
832 			if (ret) {
833 				bch2_trans_iter_exit(trans, &iter);
834 				continue;
835 			}
836 		}
837 
838 		struct data_update_opts data_opts = {};
839 		if (!pred(c, arg, k, &io_opts, &data_opts)) {
840 			bch2_trans_iter_exit(trans, &iter);
841 			goto next;
842 		}
843 
844 		if (data_opts.scrub &&
845 		    !bch2_dev_idx_is_online(c, data_opts.read_dev)) {
846 			bch2_trans_iter_exit(trans, &iter);
847 			ret = -BCH_ERR_device_offline;
848 			break;
849 		}
850 
851 		bch2_bkey_buf_reassemble(&sk, c, k);
852 		k = bkey_i_to_s_c(sk.k);
853 
854 		/* move_extent will drop locks */
855 		unsigned sectors = bp.v->bucket_len;
856 
857 		if (!bp.v->level)
858 			ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts);
859 		else if (!data_opts.scrub)
860 			ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level, k.k->p, 0);
861 		else
862 			ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev);
863 
864 		bch2_trans_iter_exit(trans, &iter);
865 
866 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
867 			continue;
868 		if (ret == -ENOMEM) {
869 			/* memory allocation failure, wait for some IO to finish */
870 			bch2_move_ctxt_wait_for_io(ctxt);
871 			continue;
872 		}
873 		if (ret)
874 			goto err;
875 
876 		if (ctxt->stats)
877 			atomic64_add(sectors, &ctxt->stats->sectors_seen);
878 next:
879 		bch2_btree_iter_advance(trans, &bp_iter);
880 	}
881 err:
882 	bch2_trans_iter_exit(trans, &bp_iter);
883 	bch2_bkey_buf_exit(&sk, c);
884 	bch2_bkey_buf_exit(&last_flushed, c);
885 	return ret;
886 }
887 
888 static int bch2_move_data_phys(struct bch_fs *c,
889 			       unsigned dev,
890 			       u64 start,
891 			       u64 end,
892 			       unsigned data_types,
893 			       struct bch_ratelimit *rate,
894 			       struct bch_move_stats *stats,
895 			       struct write_point_specifier wp,
896 			       bool wait_on_copygc,
897 			       move_pred_fn pred, void *arg)
898 {
899 	struct moving_context ctxt;
900 
901 	bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans));
902 
903 	bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
904 	ctxt.stats->phys = true;
905 	ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
906 
907 	int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
908 	bch2_moving_ctxt_exit(&ctxt);
909 
910 	return ret;
911 }
912 
913 struct evacuate_bucket_arg {
914 	struct bpos		bucket;
915 	int			gen;
916 	struct data_update_opts	data_opts;
917 };
918 
919 static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k,
920 				 struct bch_io_opts *io_opts,
921 				 struct data_update_opts *data_opts)
922 {
923 	struct evacuate_bucket_arg *arg = _arg;
924 
925 	*data_opts = arg->data_opts;
926 
927 	unsigned i = 0;
928 	bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
929 		if (ptr->dev == arg->bucket.inode &&
930 		    (arg->gen < 0 || arg->gen == ptr->gen) &&
931 		    !ptr->cached)
932 			data_opts->rewrite_ptrs |= BIT(i);
933 		i++;
934 	}
935 
936 	return data_opts->rewrite_ptrs != 0;
937 }
938 
939 int bch2_evacuate_bucket(struct moving_context *ctxt,
940 			   struct move_bucket_in_flight *bucket_in_flight,
941 			   struct bpos bucket, int gen,
942 			   struct data_update_opts data_opts)
943 {
944 	struct evacuate_bucket_arg arg = { bucket, gen, data_opts, };
945 
946 	return __bch2_move_data_phys(ctxt, bucket_in_flight,
947 				   bucket.inode,
948 				   bucket.offset,
949 				   bucket.offset + 1,
950 				   ~0,
951 				   evacuate_bucket_pred, &arg);
952 }
953 
954 typedef bool (*move_btree_pred)(struct bch_fs *, void *,
955 				struct btree *, struct bch_io_opts *,
956 				struct data_update_opts *);
957 
958 static int bch2_move_btree(struct bch_fs *c,
959 			   struct bbpos start,
960 			   struct bbpos end,
961 			   move_btree_pred pred, void *arg,
962 			   struct bch_move_stats *stats)
963 {
964 	bool kthread = (current->flags & PF_KTHREAD) != 0;
965 	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
966 	struct moving_context ctxt;
967 	struct btree_trans *trans;
968 	struct btree_iter iter;
969 	struct btree *b;
970 	enum btree_id btree;
971 	struct data_update_opts data_opts;
972 	int ret = 0;
973 
974 	bch2_moving_ctxt_init(&ctxt, c, NULL, stats,
975 			      writepoint_ptr(&c->btree_write_point),
976 			      true);
977 	trans = ctxt.trans;
978 
979 	stats->data_type = BCH_DATA_btree;
980 
981 	for (btree = start.btree;
982 	     btree <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
983 	     btree ++) {
984 		stats->pos = BBPOS(btree, POS_MIN);
985 
986 		if (!bch2_btree_id_root(c, btree)->b)
987 			continue;
988 
989 		bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, 0,
990 					  BTREE_ITER_prefetch);
991 retry:
992 		ret = 0;
993 		while (bch2_trans_begin(trans),
994 		       (b = bch2_btree_iter_peek_node(trans, &iter)) &&
995 		       !(ret = PTR_ERR_OR_ZERO(b))) {
996 			if (kthread && kthread_should_stop())
997 				break;
998 
999 			if ((cmp_int(btree, end.btree) ?:
1000 			     bpos_cmp(b->key.k.p, end.pos)) > 0)
1001 				break;
1002 
1003 			stats->pos = BBPOS(iter.btree_id, iter.pos);
1004 
1005 			if (!pred(c, arg, b, &io_opts, &data_opts))
1006 				goto next;
1007 
1008 			ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret;
1009 			if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1010 				continue;
1011 			if (ret)
1012 				break;
1013 next:
1014 			bch2_btree_iter_next_node(trans, &iter);
1015 		}
1016 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1017 			goto retry;
1018 
1019 		bch2_trans_iter_exit(trans, &iter);
1020 
1021 		if (kthread && kthread_should_stop())
1022 			break;
1023 	}
1024 
1025 	bch_err_fn(c, ret);
1026 	bch2_moving_ctxt_exit(&ctxt);
1027 	bch2_btree_interior_updates_flush(c);
1028 
1029 	return ret;
1030 }
1031 
1032 static bool rereplicate_pred(struct bch_fs *c, void *arg,
1033 			     struct bkey_s_c k,
1034 			     struct bch_io_opts *io_opts,
1035 			     struct data_update_opts *data_opts)
1036 {
1037 	unsigned nr_good = bch2_bkey_durability(c, k);
1038 	unsigned replicas = bkey_is_btree_ptr(k.k)
1039 		? c->opts.metadata_replicas
1040 		: io_opts->data_replicas;
1041 
1042 	rcu_read_lock();
1043 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1044 	unsigned i = 0;
1045 	bkey_for_each_ptr(ptrs, ptr) {
1046 		struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
1047 		if (!ptr->cached &&
1048 		    (!ca || !ca->mi.durability))
1049 			data_opts->kill_ptrs |= BIT(i);
1050 		i++;
1051 	}
1052 	rcu_read_unlock();
1053 
1054 	if (!data_opts->kill_ptrs &&
1055 	    (!nr_good || nr_good >= replicas))
1056 		return false;
1057 
1058 	data_opts->target		= 0;
1059 	data_opts->extra_replicas	= replicas - nr_good;
1060 	data_opts->btree_insert_flags	= 0;
1061 	return true;
1062 }
1063 
1064 static bool migrate_pred(struct bch_fs *c, void *arg,
1065 			 struct bkey_s_c k,
1066 			 struct bch_io_opts *io_opts,
1067 			 struct data_update_opts *data_opts)
1068 {
1069 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1070 	struct bch_ioctl_data *op = arg;
1071 	unsigned i = 0;
1072 
1073 	data_opts->rewrite_ptrs		= 0;
1074 	data_opts->target		= 0;
1075 	data_opts->extra_replicas	= 0;
1076 	data_opts->btree_insert_flags	= 0;
1077 
1078 	bkey_for_each_ptr(ptrs, ptr) {
1079 		if (ptr->dev == op->migrate.dev)
1080 			data_opts->rewrite_ptrs |= 1U << i;
1081 		i++;
1082 	}
1083 
1084 	return data_opts->rewrite_ptrs != 0;
1085 }
1086 
1087 static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
1088 				   struct btree *b,
1089 				   struct bch_io_opts *io_opts,
1090 				   struct data_update_opts *data_opts)
1091 {
1092 	return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
1093 }
1094 
1095 /*
1096  * Ancient versions of bcachefs produced packed formats which could represent
1097  * keys that the in memory format cannot represent; this checks for those
1098  * formats so we can get rid of them.
1099  */
1100 static bool bformat_needs_redo(struct bkey_format *f)
1101 {
1102 	for (unsigned i = 0; i < f->nr_fields; i++)
1103 		if (bch2_bkey_format_field_overflows(f, i))
1104 			return true;
1105 
1106 	return false;
1107 }
1108 
1109 static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
1110 				   struct btree *b,
1111 				   struct bch_io_opts *io_opts,
1112 				   struct data_update_opts *data_opts)
1113 {
1114 	if (b->version_ondisk != c->sb.version ||
1115 	    btree_node_need_rewrite(b) ||
1116 	    bformat_needs_redo(&b->format)) {
1117 		data_opts->target		= 0;
1118 		data_opts->extra_replicas	= 0;
1119 		data_opts->btree_insert_flags	= 0;
1120 		return true;
1121 	}
1122 
1123 	return false;
1124 }
1125 
1126 int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
1127 {
1128 	int ret;
1129 
1130 	ret = bch2_move_btree(c,
1131 			      BBPOS_MIN,
1132 			      BBPOS_MAX,
1133 			      rewrite_old_nodes_pred, c, stats);
1134 	if (!ret) {
1135 		mutex_lock(&c->sb_lock);
1136 		c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
1137 		c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
1138 		c->disk_sb.sb->version_min = c->disk_sb.sb->version;
1139 		bch2_write_super(c);
1140 		mutex_unlock(&c->sb_lock);
1141 	}
1142 
1143 	bch_err_fn(c, ret);
1144 	return ret;
1145 }
1146 
1147 static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
1148 			     struct bkey_s_c k,
1149 			     struct bch_io_opts *io_opts,
1150 			     struct data_update_opts *data_opts)
1151 {
1152 	unsigned durability = bch2_bkey_durability(c, k);
1153 	unsigned replicas = bkey_is_btree_ptr(k.k)
1154 		? c->opts.metadata_replicas
1155 		: io_opts->data_replicas;
1156 	const union bch_extent_entry *entry;
1157 	struct extent_ptr_decoded p;
1158 	unsigned i = 0;
1159 
1160 	rcu_read_lock();
1161 	bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
1162 		unsigned d = bch2_extent_ptr_durability(c, &p);
1163 
1164 		if (d && durability - d >= replicas) {
1165 			data_opts->kill_ptrs |= BIT(i);
1166 			durability -= d;
1167 		}
1168 
1169 		i++;
1170 	}
1171 	rcu_read_unlock();
1172 
1173 	return data_opts->kill_ptrs != 0;
1174 }
1175 
1176 static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg,
1177 				   struct btree *b,
1178 				   struct bch_io_opts *io_opts,
1179 				   struct data_update_opts *data_opts)
1180 {
1181 	return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
1182 }
1183 
1184 static bool scrub_pred(struct bch_fs *c, void *_arg,
1185 		       struct bkey_s_c k,
1186 		       struct bch_io_opts *io_opts,
1187 		       struct data_update_opts *data_opts)
1188 {
1189 	struct bch_ioctl_data *arg = _arg;
1190 
1191 	if (k.k->type != KEY_TYPE_btree_ptr_v2) {
1192 		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1193 		const union bch_extent_entry *entry;
1194 		struct extent_ptr_decoded p;
1195 		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
1196 			if (p.ptr.dev == arg->migrate.dev) {
1197 				if (!p.crc.csum_type)
1198 					return false;
1199 				break;
1200 			}
1201 	}
1202 
1203 	data_opts->scrub	= true;
1204 	data_opts->read_dev	= arg->migrate.dev;
1205 	return true;
1206 }
1207 
1208 int bch2_data_job(struct bch_fs *c,
1209 		  struct bch_move_stats *stats,
1210 		  struct bch_ioctl_data op)
1211 {
1212 	struct bbpos start	= BBPOS(op.start_btree, op.start_pos);
1213 	struct bbpos end	= BBPOS(op.end_btree, op.end_pos);
1214 	int ret = 0;
1215 
1216 	if (op.op >= BCH_DATA_OP_NR)
1217 		return -EINVAL;
1218 
1219 	bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]);
1220 
1221 	switch (op.op) {
1222 	case BCH_DATA_OP_scrub:
1223 		/*
1224 		 * prevent tests from spuriously failing, make sure we see all
1225 		 * btree nodes that need to be repaired
1226 		 */
1227 		bch2_btree_interior_updates_flush(c);
1228 
1229 		ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX,
1230 					  op.scrub.data_types,
1231 					  NULL,
1232 					  stats,
1233 					  writepoint_hashed((unsigned long) current),
1234 					  false,
1235 					  scrub_pred, &op) ?: ret;
1236 		break;
1237 
1238 	case BCH_DATA_OP_rereplicate:
1239 		stats->data_type = BCH_DATA_journal;
1240 		ret = bch2_journal_flush_device_pins(&c->journal, -1);
1241 		ret = bch2_move_btree(c, start, end,
1242 				      rereplicate_btree_pred, c, stats) ?: ret;
1243 		ret = bch2_move_data(c, start, end,
1244 				     NULL,
1245 				     stats,
1246 				     writepoint_hashed((unsigned long) current),
1247 				     true,
1248 				     rereplicate_pred, c) ?: ret;
1249 		ret = bch2_replicas_gc2(c) ?: ret;
1250 		break;
1251 	case BCH_DATA_OP_migrate:
1252 		if (op.migrate.dev >= c->sb.nr_devices)
1253 			return -EINVAL;
1254 
1255 		stats->data_type = BCH_DATA_journal;
1256 		ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
1257 		ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX,
1258 					  ~0,
1259 					  NULL,
1260 					  stats,
1261 					  writepoint_hashed((unsigned long) current),
1262 					  true,
1263 					  migrate_pred, &op) ?: ret;
1264 		bch2_btree_interior_updates_flush(c);
1265 		ret = bch2_replicas_gc2(c) ?: ret;
1266 		break;
1267 	case BCH_DATA_OP_rewrite_old_nodes:
1268 		ret = bch2_scan_old_btree_nodes(c, stats);
1269 		break;
1270 	case BCH_DATA_OP_drop_extra_replicas:
1271 		ret = bch2_move_btree(c, start, end,
1272 				drop_extra_replicas_btree_pred, c, stats) ?: ret;
1273 		ret = bch2_move_data(c, start, end, NULL, stats,
1274 				writepoint_hashed((unsigned long) current),
1275 				true,
1276 				drop_extra_replicas_pred, c) ?: ret;
1277 		ret = bch2_replicas_gc2(c) ?: ret;
1278 		break;
1279 	default:
1280 		ret = -EINVAL;
1281 	}
1282 
1283 	bch2_move_stats_exit(stats, c);
1284 	return ret;
1285 }
1286 
1287 void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
1288 {
1289 	prt_printf(out, "%s: data type==", stats->name);
1290 	bch2_prt_data_type(out, stats->data_type);
1291 	prt_str(out, " pos=");
1292 	bch2_bbpos_to_text(out, stats->pos);
1293 	prt_newline(out);
1294 	printbuf_indent_add(out, 2);
1295 
1296 	prt_printf(out, "keys moved:\t%llu\n",	atomic64_read(&stats->keys_moved));
1297 	prt_printf(out, "keys raced:\t%llu\n",	atomic64_read(&stats->keys_raced));
1298 	prt_printf(out, "bytes seen:\t");
1299 	prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9);
1300 	prt_newline(out);
1301 
1302 	prt_printf(out, "bytes moved:\t");
1303 	prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9);
1304 	prt_newline(out);
1305 
1306 	prt_printf(out, "bytes raced:\t");
1307 	prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9);
1308 	prt_newline(out);
1309 
1310 	printbuf_indent_sub(out, 2);
1311 }
1312 
1313 static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
1314 {
1315 	if (!out->nr_tabstops)
1316 		printbuf_tabstop_push(out, 32);
1317 
1318 	bch2_move_stats_to_text(out, ctxt->stats);
1319 	printbuf_indent_add(out, 2);
1320 
1321 	prt_printf(out, "reads: ios %u/%u sectors %u/%u\n",
1322 		   atomic_read(&ctxt->read_ios),
1323 		   c->opts.move_ios_in_flight,
1324 		   atomic_read(&ctxt->read_sectors),
1325 		   c->opts.move_bytes_in_flight >> 9);
1326 
1327 	prt_printf(out, "writes: ios %u/%u sectors %u/%u\n",
1328 		   atomic_read(&ctxt->write_ios),
1329 		   c->opts.move_ios_in_flight,
1330 		   atomic_read(&ctxt->write_sectors),
1331 		   c->opts.move_bytes_in_flight >> 9);
1332 
1333 	printbuf_indent_add(out, 2);
1334 
1335 	mutex_lock(&ctxt->lock);
1336 	struct moving_io *io;
1337 	list_for_each_entry(io, &ctxt->ios, io_list)
1338 		bch2_data_update_inflight_to_text(out, &io->write);
1339 	mutex_unlock(&ctxt->lock);
1340 
1341 	printbuf_indent_sub(out, 4);
1342 }
1343 
1344 void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c)
1345 {
1346 	struct moving_context *ctxt;
1347 
1348 	mutex_lock(&c->moving_context_lock);
1349 	list_for_each_entry(ctxt, &c->moving_context_list, list)
1350 		bch2_moving_ctxt_to_text(out, c, ctxt);
1351 	mutex_unlock(&c->moving_context_lock);
1352 }
1353 
1354 void bch2_fs_move_init(struct bch_fs *c)
1355 {
1356 	INIT_LIST_HEAD(&c->moving_context_list);
1357 	mutex_init(&c->moving_context_lock);
1358 }
1359