xref: /linux/fs/btrfs/scrub.c (revision e9e8bcb8178e197d889ec31e79fa1ddc1732c8f9)
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/blkdev.h>
20 #include "ctree.h"
21 #include "volumes.h"
22 #include "disk-io.h"
23 #include "ordered-data.h"
24 
25 /*
26  * This is only the first step towards a full-features scrub. It reads all
27  * extent and super block and verifies the checksums. In case a bad checksum
28  * is found or the extent cannot be read, good data will be written back if
29  * any can be found.
30  *
31  * Future enhancements:
32  *  - To enhance the performance, better read-ahead strategies for the
33  *    extent-tree can be employed.
34  *  - In case an unrepairable extent is encountered, track which files are
35  *    affected and report them
36  *  - In case of a read error on files with nodatasum, map the file and read
37  *    the extent to trigger a writeback of the good copy
38  *  - track and record media errors, throw out bad devices
39  *  - add a mode to also read unallocated space
40  *  - make the prefetch cancellable
41  */
42 
43 struct scrub_bio;
44 struct scrub_page;
45 struct scrub_dev;
46 static void scrub_bio_end_io(struct bio *bio, int err);
47 static void scrub_checksum(struct btrfs_work *work);
48 static int scrub_checksum_data(struct scrub_dev *sdev,
49 			       struct scrub_page *spag, void *buffer);
50 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
51 				     struct scrub_page *spag, u64 logical,
52 				     void *buffer);
53 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
54 static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
55 static void scrub_fixup_end_io(struct bio *bio, int err);
56 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
57 			  struct page *page);
58 static void scrub_fixup(struct scrub_bio *sbio, int ix);
59 
60 #define SCRUB_PAGES_PER_BIO	16	/* 64k per bio */
61 #define SCRUB_BIOS_PER_DEV	16	/* 1 MB per device in flight */
62 
63 struct scrub_page {
64 	u64			flags;  /* extent flags */
65 	u64			generation;
66 	u64			mirror_num;
67 	int			have_csum;
68 	u8			csum[BTRFS_CSUM_SIZE];
69 };
70 
71 struct scrub_bio {
72 	int			index;
73 	struct scrub_dev	*sdev;
74 	struct bio		*bio;
75 	int			err;
76 	u64			logical;
77 	u64			physical;
78 	struct scrub_page	spag[SCRUB_PAGES_PER_BIO];
79 	u64			count;
80 	int			next_free;
81 	struct btrfs_work	work;
82 };
83 
84 struct scrub_dev {
85 	struct scrub_bio	*bios[SCRUB_BIOS_PER_DEV];
86 	struct btrfs_device	*dev;
87 	int			first_free;
88 	int			curr;
89 	atomic_t		in_flight;
90 	spinlock_t		list_lock;
91 	wait_queue_head_t	list_wait;
92 	u16			csum_size;
93 	struct list_head	csum_list;
94 	atomic_t		cancel_req;
95 	int			readonly;
96 	/*
97 	 * statistics
98 	 */
99 	struct btrfs_scrub_progress stat;
100 	spinlock_t		stat_lock;
101 };
102 
103 static void scrub_free_csums(struct scrub_dev *sdev)
104 {
105 	while (!list_empty(&sdev->csum_list)) {
106 		struct btrfs_ordered_sum *sum;
107 		sum = list_first_entry(&sdev->csum_list,
108 				       struct btrfs_ordered_sum, list);
109 		list_del(&sum->list);
110 		kfree(sum);
111 	}
112 }
113 
114 static void scrub_free_bio(struct bio *bio)
115 {
116 	int i;
117 	struct page *last_page = NULL;
118 
119 	if (!bio)
120 		return;
121 
122 	for (i = 0; i < bio->bi_vcnt; ++i) {
123 		if (bio->bi_io_vec[i].bv_page == last_page)
124 			continue;
125 		last_page = bio->bi_io_vec[i].bv_page;
126 		__free_page(last_page);
127 	}
128 	bio_put(bio);
129 }
130 
131 static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
132 {
133 	int i;
134 
135 	if (!sdev)
136 		return;
137 
138 	for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
139 		struct scrub_bio *sbio = sdev->bios[i];
140 
141 		if (!sbio)
142 			break;
143 
144 		scrub_free_bio(sbio->bio);
145 		kfree(sbio);
146 	}
147 
148 	scrub_free_csums(sdev);
149 	kfree(sdev);
150 }
151 
152 static noinline_for_stack
153 struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
154 {
155 	struct scrub_dev *sdev;
156 	int		i;
157 	struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
158 
159 	sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
160 	if (!sdev)
161 		goto nomem;
162 	sdev->dev = dev;
163 	for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
164 		struct scrub_bio *sbio;
165 
166 		sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
167 		if (!sbio)
168 			goto nomem;
169 		sdev->bios[i] = sbio;
170 
171 		sbio->index = i;
172 		sbio->sdev = sdev;
173 		sbio->count = 0;
174 		sbio->work.func = scrub_checksum;
175 
176 		if (i != SCRUB_BIOS_PER_DEV-1)
177 			sdev->bios[i]->next_free = i + 1;
178 		 else
179 			sdev->bios[i]->next_free = -1;
180 	}
181 	sdev->first_free = 0;
182 	sdev->curr = -1;
183 	atomic_set(&sdev->in_flight, 0);
184 	atomic_set(&sdev->cancel_req, 0);
185 	sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
186 	INIT_LIST_HEAD(&sdev->csum_list);
187 
188 	spin_lock_init(&sdev->list_lock);
189 	spin_lock_init(&sdev->stat_lock);
190 	init_waitqueue_head(&sdev->list_wait);
191 	return sdev;
192 
193 nomem:
194 	scrub_free_dev(sdev);
195 	return ERR_PTR(-ENOMEM);
196 }
197 
198 /*
199  * scrub_recheck_error gets called when either verification of the page
200  * failed or the bio failed to read, e.g. with EIO. In the latter case,
201  * recheck_error gets called for every page in the bio, even though only
202  * one may be bad
203  */
204 static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
205 {
206 	if (sbio->err) {
207 		if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
208 				   (sbio->physical + ix * PAGE_SIZE) >> 9,
209 				   sbio->bio->bi_io_vec[ix].bv_page) == 0) {
210 			if (scrub_fixup_check(sbio, ix) == 0)
211 				return;
212 		}
213 	}
214 
215 	scrub_fixup(sbio, ix);
216 }
217 
218 static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
219 {
220 	int ret = 1;
221 	struct page *page;
222 	void *buffer;
223 	u64 flags = sbio->spag[ix].flags;
224 
225 	page = sbio->bio->bi_io_vec[ix].bv_page;
226 	buffer = kmap_atomic(page, KM_USER0);
227 	if (flags & BTRFS_EXTENT_FLAG_DATA) {
228 		ret = scrub_checksum_data(sbio->sdev,
229 					  sbio->spag + ix, buffer);
230 	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
231 		ret = scrub_checksum_tree_block(sbio->sdev,
232 						sbio->spag + ix,
233 						sbio->logical + ix * PAGE_SIZE,
234 						buffer);
235 	} else {
236 		WARN_ON(1);
237 	}
238 	kunmap_atomic(buffer, KM_USER0);
239 
240 	return ret;
241 }
242 
243 static void scrub_fixup_end_io(struct bio *bio, int err)
244 {
245 	complete((struct completion *)bio->bi_private);
246 }
247 
248 static void scrub_fixup(struct scrub_bio *sbio, int ix)
249 {
250 	struct scrub_dev *sdev = sbio->sdev;
251 	struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
252 	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
253 	struct btrfs_multi_bio *multi = NULL;
254 	u64 logical = sbio->logical + ix * PAGE_SIZE;
255 	u64 length;
256 	int i;
257 	int ret;
258 	DECLARE_COMPLETION_ONSTACK(complete);
259 
260 	if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
261 	    (sbio->spag[ix].have_csum == 0)) {
262 		/*
263 		 * nodatasum, don't try to fix anything
264 		 * FIXME: we can do better, open the inode and trigger a
265 		 * writeback
266 		 */
267 		goto uncorrectable;
268 	}
269 
270 	length = PAGE_SIZE;
271 	ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
272 			      &multi, 0);
273 	if (ret || !multi || length < PAGE_SIZE) {
274 		printk(KERN_ERR
275 		       "scrub_fixup: btrfs_map_block failed us for %llu\n",
276 		       (unsigned long long)logical);
277 		WARN_ON(1);
278 		return;
279 	}
280 
281 	if (multi->num_stripes == 1)
282 		/* there aren't any replicas */
283 		goto uncorrectable;
284 
285 	/*
286 	 * first find a good copy
287 	 */
288 	for (i = 0; i < multi->num_stripes; ++i) {
289 		if (i == sbio->spag[ix].mirror_num)
290 			continue;
291 
292 		if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
293 				   multi->stripes[i].physical >> 9,
294 				   sbio->bio->bi_io_vec[ix].bv_page)) {
295 			/* I/O-error, this is not a good copy */
296 			continue;
297 		}
298 
299 		if (scrub_fixup_check(sbio, ix) == 0)
300 			break;
301 	}
302 	if (i == multi->num_stripes)
303 		goto uncorrectable;
304 
305 	if (!sdev->readonly) {
306 		/*
307 		 * bi_io_vec[ix].bv_page now contains good data, write it back
308 		 */
309 		if (scrub_fixup_io(WRITE, sdev->dev->bdev,
310 				   (sbio->physical + ix * PAGE_SIZE) >> 9,
311 				   sbio->bio->bi_io_vec[ix].bv_page)) {
312 			/* I/O-error, writeback failed, give up */
313 			goto uncorrectable;
314 		}
315 	}
316 
317 	kfree(multi);
318 	spin_lock(&sdev->stat_lock);
319 	++sdev->stat.corrected_errors;
320 	spin_unlock(&sdev->stat_lock);
321 
322 	if (printk_ratelimit())
323 		printk(KERN_ERR "btrfs: fixed up at %llu\n",
324 		       (unsigned long long)logical);
325 	return;
326 
327 uncorrectable:
328 	kfree(multi);
329 	spin_lock(&sdev->stat_lock);
330 	++sdev->stat.uncorrectable_errors;
331 	spin_unlock(&sdev->stat_lock);
332 
333 	if (printk_ratelimit())
334 		printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
335 			 (unsigned long long)logical);
336 }
337 
338 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
339 			 struct page *page)
340 {
341 	struct bio *bio = NULL;
342 	int ret;
343 	DECLARE_COMPLETION_ONSTACK(complete);
344 
345 	bio = bio_alloc(GFP_NOFS, 1);
346 	bio->bi_bdev = bdev;
347 	bio->bi_sector = sector;
348 	bio_add_page(bio, page, PAGE_SIZE, 0);
349 	bio->bi_end_io = scrub_fixup_end_io;
350 	bio->bi_private = &complete;
351 	submit_bio(rw, bio);
352 
353 	/* this will also unplug the queue */
354 	wait_for_completion(&complete);
355 
356 	ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
357 	bio_put(bio);
358 	return ret;
359 }
360 
361 static void scrub_bio_end_io(struct bio *bio, int err)
362 {
363 	struct scrub_bio *sbio = bio->bi_private;
364 	struct scrub_dev *sdev = sbio->sdev;
365 	struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
366 
367 	sbio->err = err;
368 	sbio->bio = bio;
369 
370 	btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
371 }
372 
373 static void scrub_checksum(struct btrfs_work *work)
374 {
375 	struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
376 	struct scrub_dev *sdev = sbio->sdev;
377 	struct page *page;
378 	void *buffer;
379 	int i;
380 	u64 flags;
381 	u64 logical;
382 	int ret;
383 
384 	if (sbio->err) {
385 		for (i = 0; i < sbio->count; ++i)
386 			scrub_recheck_error(sbio, i);
387 
388 		sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
389 		sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
390 		sbio->bio->bi_phys_segments = 0;
391 		sbio->bio->bi_idx = 0;
392 
393 		for (i = 0; i < sbio->count; i++) {
394 			struct bio_vec *bi;
395 			bi = &sbio->bio->bi_io_vec[i];
396 			bi->bv_offset = 0;
397 			bi->bv_len = PAGE_SIZE;
398 		}
399 
400 		spin_lock(&sdev->stat_lock);
401 		++sdev->stat.read_errors;
402 		spin_unlock(&sdev->stat_lock);
403 		goto out;
404 	}
405 	for (i = 0; i < sbio->count; ++i) {
406 		page = sbio->bio->bi_io_vec[i].bv_page;
407 		buffer = kmap_atomic(page, KM_USER0);
408 		flags = sbio->spag[i].flags;
409 		logical = sbio->logical + i * PAGE_SIZE;
410 		ret = 0;
411 		if (flags & BTRFS_EXTENT_FLAG_DATA) {
412 			ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
413 		} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
414 			ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
415 							logical, buffer);
416 		} else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
417 			BUG_ON(i);
418 			(void)scrub_checksum_super(sbio, buffer);
419 		} else {
420 			WARN_ON(1);
421 		}
422 		kunmap_atomic(buffer, KM_USER0);
423 		if (ret)
424 			scrub_recheck_error(sbio, i);
425 	}
426 
427 out:
428 	scrub_free_bio(sbio->bio);
429 	sbio->bio = NULL;
430 	spin_lock(&sdev->list_lock);
431 	sbio->next_free = sdev->first_free;
432 	sdev->first_free = sbio->index;
433 	spin_unlock(&sdev->list_lock);
434 	atomic_dec(&sdev->in_flight);
435 	wake_up(&sdev->list_wait);
436 }
437 
438 static int scrub_checksum_data(struct scrub_dev *sdev,
439 			       struct scrub_page *spag, void *buffer)
440 {
441 	u8 csum[BTRFS_CSUM_SIZE];
442 	u32 crc = ~(u32)0;
443 	int fail = 0;
444 	struct btrfs_root *root = sdev->dev->dev_root;
445 
446 	if (!spag->have_csum)
447 		return 0;
448 
449 	crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
450 	btrfs_csum_final(crc, csum);
451 	if (memcmp(csum, spag->csum, sdev->csum_size))
452 		fail = 1;
453 
454 	spin_lock(&sdev->stat_lock);
455 	++sdev->stat.data_extents_scrubbed;
456 	sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
457 	if (fail)
458 		++sdev->stat.csum_errors;
459 	spin_unlock(&sdev->stat_lock);
460 
461 	return fail;
462 }
463 
464 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
465 				     struct scrub_page *spag, u64 logical,
466 				     void *buffer)
467 {
468 	struct btrfs_header *h;
469 	struct btrfs_root *root = sdev->dev->dev_root;
470 	struct btrfs_fs_info *fs_info = root->fs_info;
471 	u8 csum[BTRFS_CSUM_SIZE];
472 	u32 crc = ~(u32)0;
473 	int fail = 0;
474 	int crc_fail = 0;
475 
476 	/*
477 	 * we don't use the getter functions here, as we
478 	 * a) don't have an extent buffer and
479 	 * b) the page is already kmapped
480 	 */
481 	h = (struct btrfs_header *)buffer;
482 
483 	if (logical != le64_to_cpu(h->bytenr))
484 		++fail;
485 
486 	if (spag->generation != le64_to_cpu(h->generation))
487 		++fail;
488 
489 	if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
490 		++fail;
491 
492 	if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
493 		   BTRFS_UUID_SIZE))
494 		++fail;
495 
496 	crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
497 			      PAGE_SIZE - BTRFS_CSUM_SIZE);
498 	btrfs_csum_final(crc, csum);
499 	if (memcmp(csum, h->csum, sdev->csum_size))
500 		++crc_fail;
501 
502 	spin_lock(&sdev->stat_lock);
503 	++sdev->stat.tree_extents_scrubbed;
504 	sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
505 	if (crc_fail)
506 		++sdev->stat.csum_errors;
507 	if (fail)
508 		++sdev->stat.verify_errors;
509 	spin_unlock(&sdev->stat_lock);
510 
511 	return fail || crc_fail;
512 }
513 
514 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
515 {
516 	struct btrfs_super_block *s;
517 	u64 logical;
518 	struct scrub_dev *sdev = sbio->sdev;
519 	struct btrfs_root *root = sdev->dev->dev_root;
520 	struct btrfs_fs_info *fs_info = root->fs_info;
521 	u8 csum[BTRFS_CSUM_SIZE];
522 	u32 crc = ~(u32)0;
523 	int fail = 0;
524 
525 	s = (struct btrfs_super_block *)buffer;
526 	logical = sbio->logical;
527 
528 	if (logical != le64_to_cpu(s->bytenr))
529 		++fail;
530 
531 	if (sbio->spag[0].generation != le64_to_cpu(s->generation))
532 		++fail;
533 
534 	if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
535 		++fail;
536 
537 	crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
538 			      PAGE_SIZE - BTRFS_CSUM_SIZE);
539 	btrfs_csum_final(crc, csum);
540 	if (memcmp(csum, s->csum, sbio->sdev->csum_size))
541 		++fail;
542 
543 	if (fail) {
544 		/*
545 		 * if we find an error in a super block, we just report it.
546 		 * They will get written with the next transaction commit
547 		 * anyway
548 		 */
549 		spin_lock(&sdev->stat_lock);
550 		++sdev->stat.super_errors;
551 		spin_unlock(&sdev->stat_lock);
552 	}
553 
554 	return fail;
555 }
556 
557 static int scrub_submit(struct scrub_dev *sdev)
558 {
559 	struct scrub_bio *sbio;
560 	struct bio *bio;
561 	int i;
562 
563 	if (sdev->curr == -1)
564 		return 0;
565 
566 	sbio = sdev->bios[sdev->curr];
567 
568 	bio = bio_alloc(GFP_NOFS, sbio->count);
569 	if (!bio)
570 		goto nomem;
571 
572 	bio->bi_private = sbio;
573 	bio->bi_end_io = scrub_bio_end_io;
574 	bio->bi_bdev = sdev->dev->bdev;
575 	bio->bi_sector = sbio->physical >> 9;
576 
577 	for (i = 0; i < sbio->count; ++i) {
578 		struct page *page;
579 		int ret;
580 
581 		page = alloc_page(GFP_NOFS);
582 		if (!page)
583 			goto nomem;
584 
585 		ret = bio_add_page(bio, page, PAGE_SIZE, 0);
586 		if (!ret) {
587 			__free_page(page);
588 			goto nomem;
589 		}
590 	}
591 
592 	sbio->err = 0;
593 	sdev->curr = -1;
594 	atomic_inc(&sdev->in_flight);
595 
596 	submit_bio(READ, bio);
597 
598 	return 0;
599 
600 nomem:
601 	scrub_free_bio(bio);
602 
603 	return -ENOMEM;
604 }
605 
606 static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
607 		      u64 physical, u64 flags, u64 gen, u64 mirror_num,
608 		      u8 *csum, int force)
609 {
610 	struct scrub_bio *sbio;
611 
612 again:
613 	/*
614 	 * grab a fresh bio or wait for one to become available
615 	 */
616 	while (sdev->curr == -1) {
617 		spin_lock(&sdev->list_lock);
618 		sdev->curr = sdev->first_free;
619 		if (sdev->curr != -1) {
620 			sdev->first_free = sdev->bios[sdev->curr]->next_free;
621 			sdev->bios[sdev->curr]->next_free = -1;
622 			sdev->bios[sdev->curr]->count = 0;
623 			spin_unlock(&sdev->list_lock);
624 		} else {
625 			spin_unlock(&sdev->list_lock);
626 			wait_event(sdev->list_wait, sdev->first_free != -1);
627 		}
628 	}
629 	sbio = sdev->bios[sdev->curr];
630 	if (sbio->count == 0) {
631 		sbio->physical = physical;
632 		sbio->logical = logical;
633 	} else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
634 		   sbio->logical + sbio->count * PAGE_SIZE != logical) {
635 		int ret;
636 
637 		ret = scrub_submit(sdev);
638 		if (ret)
639 			return ret;
640 		goto again;
641 	}
642 	sbio->spag[sbio->count].flags = flags;
643 	sbio->spag[sbio->count].generation = gen;
644 	sbio->spag[sbio->count].have_csum = 0;
645 	sbio->spag[sbio->count].mirror_num = mirror_num;
646 	if (csum) {
647 		sbio->spag[sbio->count].have_csum = 1;
648 		memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
649 	}
650 	++sbio->count;
651 	if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
652 		int ret;
653 
654 		ret = scrub_submit(sdev);
655 		if (ret)
656 			return ret;
657 	}
658 
659 	return 0;
660 }
661 
662 static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
663 			   u8 *csum)
664 {
665 	struct btrfs_ordered_sum *sum = NULL;
666 	int ret = 0;
667 	unsigned long i;
668 	unsigned long num_sectors;
669 	u32 sectorsize = sdev->dev->dev_root->sectorsize;
670 
671 	while (!list_empty(&sdev->csum_list)) {
672 		sum = list_first_entry(&sdev->csum_list,
673 				       struct btrfs_ordered_sum, list);
674 		if (sum->bytenr > logical)
675 			return 0;
676 		if (sum->bytenr + sum->len > logical)
677 			break;
678 
679 		++sdev->stat.csum_discards;
680 		list_del(&sum->list);
681 		kfree(sum);
682 		sum = NULL;
683 	}
684 	if (!sum)
685 		return 0;
686 
687 	num_sectors = sum->len / sectorsize;
688 	for (i = 0; i < num_sectors; ++i) {
689 		if (sum->sums[i].bytenr == logical) {
690 			memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
691 			ret = 1;
692 			break;
693 		}
694 	}
695 	if (ret && i == num_sectors - 1) {
696 		list_del(&sum->list);
697 		kfree(sum);
698 	}
699 	return ret;
700 }
701 
702 /* scrub extent tries to collect up to 64 kB for each bio */
703 static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
704 			u64 physical, u64 flags, u64 gen, u64 mirror_num)
705 {
706 	int ret;
707 	u8 csum[BTRFS_CSUM_SIZE];
708 
709 	while (len) {
710 		u64 l = min_t(u64, len, PAGE_SIZE);
711 		int have_csum = 0;
712 
713 		if (flags & BTRFS_EXTENT_FLAG_DATA) {
714 			/* push csums to sbio */
715 			have_csum = scrub_find_csum(sdev, logical, l, csum);
716 			if (have_csum == 0)
717 				++sdev->stat.no_csum;
718 		}
719 		ret = scrub_page(sdev, logical, l, physical, flags, gen,
720 				 mirror_num, have_csum ? csum : NULL, 0);
721 		if (ret)
722 			return ret;
723 		len -= l;
724 		logical += l;
725 		physical += l;
726 	}
727 	return 0;
728 }
729 
730 static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
731 	struct map_lookup *map, int num, u64 base, u64 length)
732 {
733 	struct btrfs_path *path;
734 	struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
735 	struct btrfs_root *root = fs_info->extent_root;
736 	struct btrfs_root *csum_root = fs_info->csum_root;
737 	struct btrfs_extent_item *extent;
738 	struct blk_plug plug;
739 	u64 flags;
740 	int ret;
741 	int slot;
742 	int i;
743 	u64 nstripes;
744 	int start_stripe;
745 	struct extent_buffer *l;
746 	struct btrfs_key key;
747 	u64 physical;
748 	u64 logical;
749 	u64 generation;
750 	u64 mirror_num;
751 
752 	u64 increment = map->stripe_len;
753 	u64 offset;
754 
755 	nstripes = length;
756 	offset = 0;
757 	do_div(nstripes, map->stripe_len);
758 	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
759 		offset = map->stripe_len * num;
760 		increment = map->stripe_len * map->num_stripes;
761 		mirror_num = 0;
762 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
763 		int factor = map->num_stripes / map->sub_stripes;
764 		offset = map->stripe_len * (num / map->sub_stripes);
765 		increment = map->stripe_len * factor;
766 		mirror_num = num % map->sub_stripes;
767 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
768 		increment = map->stripe_len;
769 		mirror_num = num % map->num_stripes;
770 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
771 		increment = map->stripe_len;
772 		mirror_num = num % map->num_stripes;
773 	} else {
774 		increment = map->stripe_len;
775 		mirror_num = 0;
776 	}
777 
778 	path = btrfs_alloc_path();
779 	if (!path)
780 		return -ENOMEM;
781 
782 	path->reada = 2;
783 	path->search_commit_root = 1;
784 	path->skip_locking = 1;
785 
786 	/*
787 	 * find all extents for each stripe and just read them to get
788 	 * them into the page cache
789 	 * FIXME: we can do better. build a more intelligent prefetching
790 	 */
791 	logical = base + offset;
792 	physical = map->stripes[num].physical;
793 	ret = 0;
794 	for (i = 0; i < nstripes; ++i) {
795 		key.objectid = logical;
796 		key.type = BTRFS_EXTENT_ITEM_KEY;
797 		key.offset = (u64)0;
798 
799 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
800 		if (ret < 0)
801 			goto out_noplug;
802 
803 		/*
804 		 * we might miss half an extent here, but that doesn't matter,
805 		 * as it's only the prefetch
806 		 */
807 		while (1) {
808 			l = path->nodes[0];
809 			slot = path->slots[0];
810 			if (slot >= btrfs_header_nritems(l)) {
811 				ret = btrfs_next_leaf(root, path);
812 				if (ret == 0)
813 					continue;
814 				if (ret < 0)
815 					goto out_noplug;
816 
817 				break;
818 			}
819 			btrfs_item_key_to_cpu(l, &key, slot);
820 
821 			if (key.objectid >= logical + map->stripe_len)
822 				break;
823 
824 			path->slots[0]++;
825 		}
826 		btrfs_release_path(path);
827 		logical += increment;
828 		physical += map->stripe_len;
829 		cond_resched();
830 	}
831 
832 	/*
833 	 * collect all data csums for the stripe to avoid seeking during
834 	 * the scrub. This might currently (crc32) end up to be about 1MB
835 	 */
836 	start_stripe = 0;
837 	blk_start_plug(&plug);
838 again:
839 	logical = base + offset + start_stripe * increment;
840 	for (i = start_stripe; i < nstripes; ++i) {
841 		ret = btrfs_lookup_csums_range(csum_root, logical,
842 					       logical + map->stripe_len - 1,
843 					       &sdev->csum_list, 1);
844 		if (ret)
845 			goto out;
846 
847 		logical += increment;
848 		cond_resched();
849 	}
850 	/*
851 	 * now find all extents for each stripe and scrub them
852 	 */
853 	logical = base + offset + start_stripe * increment;
854 	physical = map->stripes[num].physical + start_stripe * map->stripe_len;
855 	ret = 0;
856 	for (i = start_stripe; i < nstripes; ++i) {
857 		/*
858 		 * canceled?
859 		 */
860 		if (atomic_read(&fs_info->scrub_cancel_req) ||
861 		    atomic_read(&sdev->cancel_req)) {
862 			ret = -ECANCELED;
863 			goto out;
864 		}
865 		/*
866 		 * check to see if we have to pause
867 		 */
868 		if (atomic_read(&fs_info->scrub_pause_req)) {
869 			/* push queued extents */
870 			scrub_submit(sdev);
871 			wait_event(sdev->list_wait,
872 				   atomic_read(&sdev->in_flight) == 0);
873 			atomic_inc(&fs_info->scrubs_paused);
874 			wake_up(&fs_info->scrub_pause_wait);
875 			mutex_lock(&fs_info->scrub_lock);
876 			while (atomic_read(&fs_info->scrub_pause_req)) {
877 				mutex_unlock(&fs_info->scrub_lock);
878 				wait_event(fs_info->scrub_pause_wait,
879 				   atomic_read(&fs_info->scrub_pause_req) == 0);
880 				mutex_lock(&fs_info->scrub_lock);
881 			}
882 			atomic_dec(&fs_info->scrubs_paused);
883 			mutex_unlock(&fs_info->scrub_lock);
884 			wake_up(&fs_info->scrub_pause_wait);
885 			scrub_free_csums(sdev);
886 			start_stripe = i;
887 			goto again;
888 		}
889 
890 		key.objectid = logical;
891 		key.type = BTRFS_EXTENT_ITEM_KEY;
892 		key.offset = (u64)0;
893 
894 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
895 		if (ret < 0)
896 			goto out;
897 		if (ret > 0) {
898 			ret = btrfs_previous_item(root, path, 0,
899 						  BTRFS_EXTENT_ITEM_KEY);
900 			if (ret < 0)
901 				goto out;
902 			if (ret > 0) {
903 				/* there's no smaller item, so stick with the
904 				 * larger one */
905 				btrfs_release_path(path);
906 				ret = btrfs_search_slot(NULL, root, &key,
907 							path, 0, 0);
908 				if (ret < 0)
909 					goto out;
910 			}
911 		}
912 
913 		while (1) {
914 			l = path->nodes[0];
915 			slot = path->slots[0];
916 			if (slot >= btrfs_header_nritems(l)) {
917 				ret = btrfs_next_leaf(root, path);
918 				if (ret == 0)
919 					continue;
920 				if (ret < 0)
921 					goto out;
922 
923 				break;
924 			}
925 			btrfs_item_key_to_cpu(l, &key, slot);
926 
927 			if (key.objectid + key.offset <= logical)
928 				goto next;
929 
930 			if (key.objectid >= logical + map->stripe_len)
931 				break;
932 
933 			if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
934 				goto next;
935 
936 			extent = btrfs_item_ptr(l, slot,
937 						struct btrfs_extent_item);
938 			flags = btrfs_extent_flags(l, extent);
939 			generation = btrfs_extent_generation(l, extent);
940 
941 			if (key.objectid < logical &&
942 			    (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
943 				printk(KERN_ERR
944 				       "btrfs scrub: tree block %llu spanning "
945 				       "stripes, ignored. logical=%llu\n",
946 				       (unsigned long long)key.objectid,
947 				       (unsigned long long)logical);
948 				goto next;
949 			}
950 
951 			/*
952 			 * trim extent to this stripe
953 			 */
954 			if (key.objectid < logical) {
955 				key.offset -= logical - key.objectid;
956 				key.objectid = logical;
957 			}
958 			if (key.objectid + key.offset >
959 			    logical + map->stripe_len) {
960 				key.offset = logical + map->stripe_len -
961 					     key.objectid;
962 			}
963 
964 			ret = scrub_extent(sdev, key.objectid, key.offset,
965 					   key.objectid - logical + physical,
966 					   flags, generation, mirror_num);
967 			if (ret)
968 				goto out;
969 
970 next:
971 			path->slots[0]++;
972 		}
973 		btrfs_release_path(path);
974 		logical += increment;
975 		physical += map->stripe_len;
976 		spin_lock(&sdev->stat_lock);
977 		sdev->stat.last_physical = physical;
978 		spin_unlock(&sdev->stat_lock);
979 	}
980 	/* push queued extents */
981 	scrub_submit(sdev);
982 
983 out:
984 	blk_finish_plug(&plug);
985 out_noplug:
986 	btrfs_free_path(path);
987 	return ret < 0 ? ret : 0;
988 }
989 
990 static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
991 	u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
992 {
993 	struct btrfs_mapping_tree *map_tree =
994 		&sdev->dev->dev_root->fs_info->mapping_tree;
995 	struct map_lookup *map;
996 	struct extent_map *em;
997 	int i;
998 	int ret = -EINVAL;
999 
1000 	read_lock(&map_tree->map_tree.lock);
1001 	em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
1002 	read_unlock(&map_tree->map_tree.lock);
1003 
1004 	if (!em)
1005 		return -EINVAL;
1006 
1007 	map = (struct map_lookup *)em->bdev;
1008 	if (em->start != chunk_offset)
1009 		goto out;
1010 
1011 	if (em->len < length)
1012 		goto out;
1013 
1014 	for (i = 0; i < map->num_stripes; ++i) {
1015 		if (map->stripes[i].dev == sdev->dev) {
1016 			ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1017 			if (ret)
1018 				goto out;
1019 		}
1020 	}
1021 out:
1022 	free_extent_map(em);
1023 
1024 	return ret;
1025 }
1026 
1027 static noinline_for_stack
1028 int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1029 {
1030 	struct btrfs_dev_extent *dev_extent = NULL;
1031 	struct btrfs_path *path;
1032 	struct btrfs_root *root = sdev->dev->dev_root;
1033 	struct btrfs_fs_info *fs_info = root->fs_info;
1034 	u64 length;
1035 	u64 chunk_tree;
1036 	u64 chunk_objectid;
1037 	u64 chunk_offset;
1038 	int ret;
1039 	int slot;
1040 	struct extent_buffer *l;
1041 	struct btrfs_key key;
1042 	struct btrfs_key found_key;
1043 	struct btrfs_block_group_cache *cache;
1044 
1045 	path = btrfs_alloc_path();
1046 	if (!path)
1047 		return -ENOMEM;
1048 
1049 	path->reada = 2;
1050 	path->search_commit_root = 1;
1051 	path->skip_locking = 1;
1052 
1053 	key.objectid = sdev->dev->devid;
1054 	key.offset = 0ull;
1055 	key.type = BTRFS_DEV_EXTENT_KEY;
1056 
1057 
1058 	while (1) {
1059 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1060 		if (ret < 0)
1061 			break;
1062 		if (ret > 0) {
1063 			if (path->slots[0] >=
1064 			    btrfs_header_nritems(path->nodes[0])) {
1065 				ret = btrfs_next_leaf(root, path);
1066 				if (ret)
1067 					break;
1068 			}
1069 		}
1070 
1071 		l = path->nodes[0];
1072 		slot = path->slots[0];
1073 
1074 		btrfs_item_key_to_cpu(l, &found_key, slot);
1075 
1076 		if (found_key.objectid != sdev->dev->devid)
1077 			break;
1078 
1079 		if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1080 			break;
1081 
1082 		if (found_key.offset >= end)
1083 			break;
1084 
1085 		if (found_key.offset < key.offset)
1086 			break;
1087 
1088 		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1089 		length = btrfs_dev_extent_length(l, dev_extent);
1090 
1091 		if (found_key.offset + length <= start) {
1092 			key.offset = found_key.offset + length;
1093 			btrfs_release_path(path);
1094 			continue;
1095 		}
1096 
1097 		chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1098 		chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1099 		chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1100 
1101 		/*
1102 		 * get a reference on the corresponding block group to prevent
1103 		 * the chunk from going away while we scrub it
1104 		 */
1105 		cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1106 		if (!cache) {
1107 			ret = -ENOENT;
1108 			break;
1109 		}
1110 		ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1111 				  chunk_offset, length);
1112 		btrfs_put_block_group(cache);
1113 		if (ret)
1114 			break;
1115 
1116 		key.offset = found_key.offset + length;
1117 		btrfs_release_path(path);
1118 	}
1119 
1120 	btrfs_free_path(path);
1121 
1122 	/*
1123 	 * ret can still be 1 from search_slot or next_leaf,
1124 	 * that's not an error
1125 	 */
1126 	return ret < 0 ? ret : 0;
1127 }
1128 
1129 static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1130 {
1131 	int	i;
1132 	u64	bytenr;
1133 	u64	gen;
1134 	int	ret;
1135 	struct btrfs_device *device = sdev->dev;
1136 	struct btrfs_root *root = device->dev_root;
1137 
1138 	gen = root->fs_info->last_trans_committed;
1139 
1140 	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1141 		bytenr = btrfs_sb_offset(i);
1142 		if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1143 			break;
1144 
1145 		ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1146 				 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1147 		if (ret)
1148 			return ret;
1149 	}
1150 	wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1151 
1152 	return 0;
1153 }
1154 
1155 /*
1156  * get a reference count on fs_info->scrub_workers. start worker if necessary
1157  */
1158 static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1159 {
1160 	struct btrfs_fs_info *fs_info = root->fs_info;
1161 
1162 	mutex_lock(&fs_info->scrub_lock);
1163 	if (fs_info->scrub_workers_refcnt == 0) {
1164 		btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1165 			   fs_info->thread_pool_size, &fs_info->generic_worker);
1166 		fs_info->scrub_workers.idle_thresh = 4;
1167 		btrfs_start_workers(&fs_info->scrub_workers, 1);
1168 	}
1169 	++fs_info->scrub_workers_refcnt;
1170 	mutex_unlock(&fs_info->scrub_lock);
1171 
1172 	return 0;
1173 }
1174 
1175 static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1176 {
1177 	struct btrfs_fs_info *fs_info = root->fs_info;
1178 
1179 	mutex_lock(&fs_info->scrub_lock);
1180 	if (--fs_info->scrub_workers_refcnt == 0)
1181 		btrfs_stop_workers(&fs_info->scrub_workers);
1182 	WARN_ON(fs_info->scrub_workers_refcnt < 0);
1183 	mutex_unlock(&fs_info->scrub_lock);
1184 }
1185 
1186 
1187 int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1188 		    struct btrfs_scrub_progress *progress, int readonly)
1189 {
1190 	struct scrub_dev *sdev;
1191 	struct btrfs_fs_info *fs_info = root->fs_info;
1192 	int ret;
1193 	struct btrfs_device *dev;
1194 
1195 	if (btrfs_fs_closing(root->fs_info))
1196 		return -EINVAL;
1197 
1198 	/*
1199 	 * check some assumptions
1200 	 */
1201 	if (root->sectorsize != PAGE_SIZE ||
1202 	    root->sectorsize != root->leafsize ||
1203 	    root->sectorsize != root->nodesize) {
1204 		printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1205 		return -EINVAL;
1206 	}
1207 
1208 	ret = scrub_workers_get(root);
1209 	if (ret)
1210 		return ret;
1211 
1212 	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1213 	dev = btrfs_find_device(root, devid, NULL, NULL);
1214 	if (!dev || dev->missing) {
1215 		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1216 		scrub_workers_put(root);
1217 		return -ENODEV;
1218 	}
1219 	mutex_lock(&fs_info->scrub_lock);
1220 
1221 	if (!dev->in_fs_metadata) {
1222 		mutex_unlock(&fs_info->scrub_lock);
1223 		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1224 		scrub_workers_put(root);
1225 		return -ENODEV;
1226 	}
1227 
1228 	if (dev->scrub_device) {
1229 		mutex_unlock(&fs_info->scrub_lock);
1230 		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1231 		scrub_workers_put(root);
1232 		return -EINPROGRESS;
1233 	}
1234 	sdev = scrub_setup_dev(dev);
1235 	if (IS_ERR(sdev)) {
1236 		mutex_unlock(&fs_info->scrub_lock);
1237 		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1238 		scrub_workers_put(root);
1239 		return PTR_ERR(sdev);
1240 	}
1241 	sdev->readonly = readonly;
1242 	dev->scrub_device = sdev;
1243 
1244 	atomic_inc(&fs_info->scrubs_running);
1245 	mutex_unlock(&fs_info->scrub_lock);
1246 	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1247 
1248 	down_read(&fs_info->scrub_super_lock);
1249 	ret = scrub_supers(sdev);
1250 	up_read(&fs_info->scrub_super_lock);
1251 
1252 	if (!ret)
1253 		ret = scrub_enumerate_chunks(sdev, start, end);
1254 
1255 	wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1256 
1257 	atomic_dec(&fs_info->scrubs_running);
1258 	wake_up(&fs_info->scrub_pause_wait);
1259 
1260 	if (progress)
1261 		memcpy(progress, &sdev->stat, sizeof(*progress));
1262 
1263 	mutex_lock(&fs_info->scrub_lock);
1264 	dev->scrub_device = NULL;
1265 	mutex_unlock(&fs_info->scrub_lock);
1266 
1267 	scrub_free_dev(sdev);
1268 	scrub_workers_put(root);
1269 
1270 	return ret;
1271 }
1272 
1273 int btrfs_scrub_pause(struct btrfs_root *root)
1274 {
1275 	struct btrfs_fs_info *fs_info = root->fs_info;
1276 
1277 	mutex_lock(&fs_info->scrub_lock);
1278 	atomic_inc(&fs_info->scrub_pause_req);
1279 	while (atomic_read(&fs_info->scrubs_paused) !=
1280 	       atomic_read(&fs_info->scrubs_running)) {
1281 		mutex_unlock(&fs_info->scrub_lock);
1282 		wait_event(fs_info->scrub_pause_wait,
1283 			   atomic_read(&fs_info->scrubs_paused) ==
1284 			   atomic_read(&fs_info->scrubs_running));
1285 		mutex_lock(&fs_info->scrub_lock);
1286 	}
1287 	mutex_unlock(&fs_info->scrub_lock);
1288 
1289 	return 0;
1290 }
1291 
1292 int btrfs_scrub_continue(struct btrfs_root *root)
1293 {
1294 	struct btrfs_fs_info *fs_info = root->fs_info;
1295 
1296 	atomic_dec(&fs_info->scrub_pause_req);
1297 	wake_up(&fs_info->scrub_pause_wait);
1298 	return 0;
1299 }
1300 
1301 int btrfs_scrub_pause_super(struct btrfs_root *root)
1302 {
1303 	down_write(&root->fs_info->scrub_super_lock);
1304 	return 0;
1305 }
1306 
1307 int btrfs_scrub_continue_super(struct btrfs_root *root)
1308 {
1309 	up_write(&root->fs_info->scrub_super_lock);
1310 	return 0;
1311 }
1312 
1313 int btrfs_scrub_cancel(struct btrfs_root *root)
1314 {
1315 	struct btrfs_fs_info *fs_info = root->fs_info;
1316 
1317 	mutex_lock(&fs_info->scrub_lock);
1318 	if (!atomic_read(&fs_info->scrubs_running)) {
1319 		mutex_unlock(&fs_info->scrub_lock);
1320 		return -ENOTCONN;
1321 	}
1322 
1323 	atomic_inc(&fs_info->scrub_cancel_req);
1324 	while (atomic_read(&fs_info->scrubs_running)) {
1325 		mutex_unlock(&fs_info->scrub_lock);
1326 		wait_event(fs_info->scrub_pause_wait,
1327 			   atomic_read(&fs_info->scrubs_running) == 0);
1328 		mutex_lock(&fs_info->scrub_lock);
1329 	}
1330 	atomic_dec(&fs_info->scrub_cancel_req);
1331 	mutex_unlock(&fs_info->scrub_lock);
1332 
1333 	return 0;
1334 }
1335 
1336 int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1337 {
1338 	struct btrfs_fs_info *fs_info = root->fs_info;
1339 	struct scrub_dev *sdev;
1340 
1341 	mutex_lock(&fs_info->scrub_lock);
1342 	sdev = dev->scrub_device;
1343 	if (!sdev) {
1344 		mutex_unlock(&fs_info->scrub_lock);
1345 		return -ENOTCONN;
1346 	}
1347 	atomic_inc(&sdev->cancel_req);
1348 	while (dev->scrub_device) {
1349 		mutex_unlock(&fs_info->scrub_lock);
1350 		wait_event(fs_info->scrub_pause_wait,
1351 			   dev->scrub_device == NULL);
1352 		mutex_lock(&fs_info->scrub_lock);
1353 	}
1354 	mutex_unlock(&fs_info->scrub_lock);
1355 
1356 	return 0;
1357 }
1358 int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1359 {
1360 	struct btrfs_fs_info *fs_info = root->fs_info;
1361 	struct btrfs_device *dev;
1362 	int ret;
1363 
1364 	/*
1365 	 * we have to hold the device_list_mutex here so the device
1366 	 * does not go away in cancel_dev. FIXME: find a better solution
1367 	 */
1368 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
1369 	dev = btrfs_find_device(root, devid, NULL, NULL);
1370 	if (!dev) {
1371 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1372 		return -ENODEV;
1373 	}
1374 	ret = btrfs_scrub_cancel_dev(root, dev);
1375 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1376 
1377 	return ret;
1378 }
1379 
1380 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1381 			 struct btrfs_scrub_progress *progress)
1382 {
1383 	struct btrfs_device *dev;
1384 	struct scrub_dev *sdev = NULL;
1385 
1386 	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1387 	dev = btrfs_find_device(root, devid, NULL, NULL);
1388 	if (dev)
1389 		sdev = dev->scrub_device;
1390 	if (sdev)
1391 		memcpy(progress, &sdev->stat, sizeof(*progress));
1392 	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1393 
1394 	return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1395 }
1396