xref: /linux/fs/btrfs/raid56.c (revision ef340fccbe982a14588ed15eb3a20b2e5b78a3e1)
1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0
253b381b3SDavid Woodhouse /*
353b381b3SDavid Woodhouse  * Copyright (C) 2012 Fusion-io  All rights reserved.
453b381b3SDavid Woodhouse  * Copyright (C) 2012 Intel Corp. All rights reserved.
553b381b3SDavid Woodhouse  */
6c1d7c514SDavid Sterba 
753b381b3SDavid Woodhouse #include <linux/sched.h>
853b381b3SDavid Woodhouse #include <linux/bio.h>
953b381b3SDavid Woodhouse #include <linux/slab.h>
1053b381b3SDavid Woodhouse #include <linux/blkdev.h>
1153b381b3SDavid Woodhouse #include <linux/raid/pq.h>
1253b381b3SDavid Woodhouse #include <linux/hash.h>
1353b381b3SDavid Woodhouse #include <linux/list_sort.h>
1453b381b3SDavid Woodhouse #include <linux/raid/xor.h>
15818e010bSDavid Sterba #include <linux/mm.h>
16cea62800SJohannes Thumshirn #include "misc.h"
1753b381b3SDavid Woodhouse #include "ctree.h"
1853b381b3SDavid Woodhouse #include "disk-io.h"
1953b381b3SDavid Woodhouse #include "volumes.h"
2053b381b3SDavid Woodhouse #include "raid56.h"
2153b381b3SDavid Woodhouse #include "async-thread.h"
2253b381b3SDavid Woodhouse 
2353b381b3SDavid Woodhouse /* set when additional merges to this rbio are not allowed */
2453b381b3SDavid Woodhouse #define RBIO_RMW_LOCKED_BIT	1
2553b381b3SDavid Woodhouse 
264ae10b3aSChris Mason /*
274ae10b3aSChris Mason  * set when this rbio is sitting in the hash, but it is just a cache
284ae10b3aSChris Mason  * of past RMW
294ae10b3aSChris Mason  */
304ae10b3aSChris Mason #define RBIO_CACHE_BIT		2
314ae10b3aSChris Mason 
324ae10b3aSChris Mason /*
334ae10b3aSChris Mason  * set when it is safe to trust the stripe_pages for caching
344ae10b3aSChris Mason  */
354ae10b3aSChris Mason #define RBIO_CACHE_READY_BIT	3
364ae10b3aSChris Mason 
374ae10b3aSChris Mason #define RBIO_CACHE_SIZE 1024
384ae10b3aSChris Mason 
398a953348SDavid Sterba #define BTRFS_STRIPE_HASH_TABLE_BITS				11
408a953348SDavid Sterba 
418a953348SDavid Sterba /* Used by the raid56 code to lock stripes for read/modify/write */
428a953348SDavid Sterba struct btrfs_stripe_hash {
438a953348SDavid Sterba 	struct list_head hash_list;
448a953348SDavid Sterba 	spinlock_t lock;
458a953348SDavid Sterba };
468a953348SDavid Sterba 
478a953348SDavid Sterba /* Used by the raid56 code to lock stripes for read/modify/write */
488a953348SDavid Sterba struct btrfs_stripe_hash_table {
498a953348SDavid Sterba 	struct list_head stripe_cache;
508a953348SDavid Sterba 	spinlock_t cache_lock;
518a953348SDavid Sterba 	int cache_size;
528a953348SDavid Sterba 	struct btrfs_stripe_hash table[];
538a953348SDavid Sterba };
548a953348SDavid Sterba 
55eb357060SQu Wenruo /*
56eb357060SQu Wenruo  * A bvec like structure to present a sector inside a page.
57eb357060SQu Wenruo  *
58eb357060SQu Wenruo  * Unlike bvec we don't need bvlen, as it's fixed to sectorsize.
59eb357060SQu Wenruo  */
60eb357060SQu Wenruo struct sector_ptr {
61eb357060SQu Wenruo 	struct page *page;
6200425dd9SQu Wenruo 	unsigned int pgoff:24;
6300425dd9SQu Wenruo 	unsigned int uptodate:8;
64eb357060SQu Wenruo };
65eb357060SQu Wenruo 
6653b381b3SDavid Woodhouse static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
6753b381b3SDavid Woodhouse static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
68385de0efSChristoph Hellwig static void rmw_work(struct work_struct *work);
69385de0efSChristoph Hellwig static void read_rebuild_work(struct work_struct *work);
7053b381b3SDavid Woodhouse static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
7153b381b3SDavid Woodhouse static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
7253b381b3SDavid Woodhouse static void __free_raid_bio(struct btrfs_raid_bio *rbio);
7353b381b3SDavid Woodhouse static void index_rbio_pages(struct btrfs_raid_bio *rbio);
7453b381b3SDavid Woodhouse static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
7553b381b3SDavid Woodhouse 
765a6ac9eaSMiao Xie static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
775a6ac9eaSMiao Xie 					 int need_check);
78385de0efSChristoph Hellwig static void scrub_parity_work(struct work_struct *work);
795a6ac9eaSMiao Xie 
80385de0efSChristoph Hellwig static void start_async_work(struct btrfs_raid_bio *rbio, work_func_t work_func)
81ac638859SDavid Sterba {
82385de0efSChristoph Hellwig 	INIT_WORK(&rbio->work, work_func);
83385de0efSChristoph Hellwig 	queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work);
84ac638859SDavid Sterba }
85ac638859SDavid Sterba 
8653b381b3SDavid Woodhouse /*
8753b381b3SDavid Woodhouse  * the stripe hash table is used for locking, and to collect
8853b381b3SDavid Woodhouse  * bios in hopes of making a full stripe
8953b381b3SDavid Woodhouse  */
9053b381b3SDavid Woodhouse int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
9153b381b3SDavid Woodhouse {
9253b381b3SDavid Woodhouse 	struct btrfs_stripe_hash_table *table;
9353b381b3SDavid Woodhouse 	struct btrfs_stripe_hash_table *x;
9453b381b3SDavid Woodhouse 	struct btrfs_stripe_hash *cur;
9553b381b3SDavid Woodhouse 	struct btrfs_stripe_hash *h;
9653b381b3SDavid Woodhouse 	int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
9753b381b3SDavid Woodhouse 	int i;
9853b381b3SDavid Woodhouse 
9953b381b3SDavid Woodhouse 	if (info->stripe_hash_table)
10053b381b3SDavid Woodhouse 		return 0;
10153b381b3SDavid Woodhouse 
10283c8266aSDavid Sterba 	/*
10383c8266aSDavid Sterba 	 * The table is large, starting with order 4 and can go as high as
10483c8266aSDavid Sterba 	 * order 7 in case lock debugging is turned on.
10583c8266aSDavid Sterba 	 *
10683c8266aSDavid Sterba 	 * Try harder to allocate and fallback to vmalloc to lower the chance
10783c8266aSDavid Sterba 	 * of a failing mount.
10883c8266aSDavid Sterba 	 */
109ee787f95SDavid Sterba 	table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL);
11053b381b3SDavid Woodhouse 	if (!table)
11153b381b3SDavid Woodhouse 		return -ENOMEM;
11253b381b3SDavid Woodhouse 
1134ae10b3aSChris Mason 	spin_lock_init(&table->cache_lock);
1144ae10b3aSChris Mason 	INIT_LIST_HEAD(&table->stripe_cache);
1154ae10b3aSChris Mason 
11653b381b3SDavid Woodhouse 	h = table->table;
11753b381b3SDavid Woodhouse 
11853b381b3SDavid Woodhouse 	for (i = 0; i < num_entries; i++) {
11953b381b3SDavid Woodhouse 		cur = h + i;
12053b381b3SDavid Woodhouse 		INIT_LIST_HEAD(&cur->hash_list);
12153b381b3SDavid Woodhouse 		spin_lock_init(&cur->lock);
12253b381b3SDavid Woodhouse 	}
12353b381b3SDavid Woodhouse 
12453b381b3SDavid Woodhouse 	x = cmpxchg(&info->stripe_hash_table, NULL, table);
125f749303bSWang Shilong 	kvfree(x);
12653b381b3SDavid Woodhouse 	return 0;
12753b381b3SDavid Woodhouse }
12853b381b3SDavid Woodhouse 
12953b381b3SDavid Woodhouse /*
1304ae10b3aSChris Mason  * caching an rbio means to copy anything from the
131ac26df8bSQu Wenruo  * bio_sectors array into the stripe_pages array.  We
1324ae10b3aSChris Mason  * use the page uptodate bit in the stripe cache array
1334ae10b3aSChris Mason  * to indicate if it has valid data
1344ae10b3aSChris Mason  *
1354ae10b3aSChris Mason  * once the caching is done, we set the cache ready
1364ae10b3aSChris Mason  * bit.
1374ae10b3aSChris Mason  */
1384ae10b3aSChris Mason static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
1394ae10b3aSChris Mason {
1404ae10b3aSChris Mason 	int i;
1414ae10b3aSChris Mason 	int ret;
1424ae10b3aSChris Mason 
1434ae10b3aSChris Mason 	ret = alloc_rbio_pages(rbio);
1444ae10b3aSChris Mason 	if (ret)
1454ae10b3aSChris Mason 		return;
1464ae10b3aSChris Mason 
14700425dd9SQu Wenruo 	for (i = 0; i < rbio->nr_sectors; i++) {
14800425dd9SQu Wenruo 		/* Some range not covered by bio (partial write), skip it */
14900425dd9SQu Wenruo 		if (!rbio->bio_sectors[i].page)
15000425dd9SQu Wenruo 			continue;
15100425dd9SQu Wenruo 
15200425dd9SQu Wenruo 		ASSERT(rbio->stripe_sectors[i].page);
15300425dd9SQu Wenruo 		memcpy_page(rbio->stripe_sectors[i].page,
15400425dd9SQu Wenruo 			    rbio->stripe_sectors[i].pgoff,
15500425dd9SQu Wenruo 			    rbio->bio_sectors[i].page,
15600425dd9SQu Wenruo 			    rbio->bio_sectors[i].pgoff,
15700425dd9SQu Wenruo 			    rbio->bioc->fs_info->sectorsize);
15800425dd9SQu Wenruo 		rbio->stripe_sectors[i].uptodate = 1;
15900425dd9SQu Wenruo 	}
1604ae10b3aSChris Mason 	set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
1614ae10b3aSChris Mason }
1624ae10b3aSChris Mason 
1634ae10b3aSChris Mason /*
16453b381b3SDavid Woodhouse  * we hash on the first logical address of the stripe
16553b381b3SDavid Woodhouse  */
16653b381b3SDavid Woodhouse static int rbio_bucket(struct btrfs_raid_bio *rbio)
16753b381b3SDavid Woodhouse {
1684c664611SQu Wenruo 	u64 num = rbio->bioc->raid_map[0];
16953b381b3SDavid Woodhouse 
17053b381b3SDavid Woodhouse 	/*
17153b381b3SDavid Woodhouse 	 * we shift down quite a bit.  We're using byte
17253b381b3SDavid Woodhouse 	 * addressing, and most of the lower bits are zeros.
17353b381b3SDavid Woodhouse 	 * This tends to upset hash_64, and it consistently
17453b381b3SDavid Woodhouse 	 * returns just one or two different values.
17553b381b3SDavid Woodhouse 	 *
17653b381b3SDavid Woodhouse 	 * shifting off the lower bits fixes things.
17753b381b3SDavid Woodhouse 	 */
17853b381b3SDavid Woodhouse 	return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
17953b381b3SDavid Woodhouse }
18053b381b3SDavid Woodhouse 
181d4e28d9bSQu Wenruo static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
182d4e28d9bSQu Wenruo 				       unsigned int page_nr)
183d4e28d9bSQu Wenruo {
184d4e28d9bSQu Wenruo 	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
185d4e28d9bSQu Wenruo 	const u32 sectors_per_page = PAGE_SIZE / sectorsize;
186d4e28d9bSQu Wenruo 	int i;
187d4e28d9bSQu Wenruo 
188d4e28d9bSQu Wenruo 	ASSERT(page_nr < rbio->nr_pages);
189d4e28d9bSQu Wenruo 
190d4e28d9bSQu Wenruo 	for (i = sectors_per_page * page_nr;
191d4e28d9bSQu Wenruo 	     i < sectors_per_page * page_nr + sectors_per_page;
192d4e28d9bSQu Wenruo 	     i++) {
193d4e28d9bSQu Wenruo 		if (!rbio->stripe_sectors[i].uptodate)
194d4e28d9bSQu Wenruo 			return false;
195d4e28d9bSQu Wenruo 	}
196d4e28d9bSQu Wenruo 	return true;
197d4e28d9bSQu Wenruo }
198d4e28d9bSQu Wenruo 
19953b381b3SDavid Woodhouse /*
200eb357060SQu Wenruo  * Update the stripe_sectors[] array to use correct page and pgoff
201eb357060SQu Wenruo  *
202eb357060SQu Wenruo  * Should be called every time any page pointer in stripes_pages[] got modified.
203eb357060SQu Wenruo  */
204eb357060SQu Wenruo static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
205eb357060SQu Wenruo {
206eb357060SQu Wenruo 	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
207eb357060SQu Wenruo 	u32 offset;
208eb357060SQu Wenruo 	int i;
209eb357060SQu Wenruo 
210eb357060SQu Wenruo 	for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) {
211eb357060SQu Wenruo 		int page_index = offset >> PAGE_SHIFT;
212eb357060SQu Wenruo 
213eb357060SQu Wenruo 		ASSERT(page_index < rbio->nr_pages);
214eb357060SQu Wenruo 		rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index];
215eb357060SQu Wenruo 		rbio->stripe_sectors[i].pgoff = offset_in_page(offset);
216eb357060SQu Wenruo 	}
217eb357060SQu Wenruo }
218eb357060SQu Wenruo 
2194d100466SQu Wenruo static void steal_rbio_page(struct btrfs_raid_bio *src,
2204d100466SQu Wenruo 			    struct btrfs_raid_bio *dest, int page_nr)
2214d100466SQu Wenruo {
2224d100466SQu Wenruo 	const u32 sectorsize = src->bioc->fs_info->sectorsize;
2234d100466SQu Wenruo 	const u32 sectors_per_page = PAGE_SIZE / sectorsize;
2244d100466SQu Wenruo 	int i;
2254d100466SQu Wenruo 
2264d100466SQu Wenruo 	if (dest->stripe_pages[page_nr])
2274d100466SQu Wenruo 		__free_page(dest->stripe_pages[page_nr]);
2284d100466SQu Wenruo 	dest->stripe_pages[page_nr] = src->stripe_pages[page_nr];
2294d100466SQu Wenruo 	src->stripe_pages[page_nr] = NULL;
2304d100466SQu Wenruo 
2314d100466SQu Wenruo 	/* Also update the sector->uptodate bits. */
2324d100466SQu Wenruo 	for (i = sectors_per_page * page_nr;
2334d100466SQu Wenruo 	     i < sectors_per_page * page_nr + sectors_per_page; i++)
2344d100466SQu Wenruo 		dest->stripe_sectors[i].uptodate = true;
2354d100466SQu Wenruo }
2364d100466SQu Wenruo 
237eb357060SQu Wenruo /*
238d4e28d9bSQu Wenruo  * Stealing an rbio means taking all the uptodate pages from the stripe array
239d4e28d9bSQu Wenruo  * in the source rbio and putting them into the destination rbio.
240d4e28d9bSQu Wenruo  *
241d4e28d9bSQu Wenruo  * This will also update the involved stripe_sectors[] which are referring to
242d4e28d9bSQu Wenruo  * the old pages.
2434ae10b3aSChris Mason  */
2444ae10b3aSChris Mason static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
2454ae10b3aSChris Mason {
2464ae10b3aSChris Mason 	int i;
2474ae10b3aSChris Mason 	struct page *s;
2484ae10b3aSChris Mason 
2494ae10b3aSChris Mason 	if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
2504ae10b3aSChris Mason 		return;
2514ae10b3aSChris Mason 
2524ae10b3aSChris Mason 	for (i = 0; i < dest->nr_pages; i++) {
2534ae10b3aSChris Mason 		s = src->stripe_pages[i];
254d4e28d9bSQu Wenruo 		if (!s || !full_page_sectors_uptodate(src, i))
2554ae10b3aSChris Mason 			continue;
2564ae10b3aSChris Mason 
2574d100466SQu Wenruo 		steal_rbio_page(src, dest, i);
2584ae10b3aSChris Mason 	}
259eb357060SQu Wenruo 	index_stripe_sectors(dest);
260eb357060SQu Wenruo 	index_stripe_sectors(src);
2614ae10b3aSChris Mason }
2624ae10b3aSChris Mason 
2634ae10b3aSChris Mason /*
26453b381b3SDavid Woodhouse  * merging means we take the bio_list from the victim and
26553b381b3SDavid Woodhouse  * splice it into the destination.  The victim should
26653b381b3SDavid Woodhouse  * be discarded afterwards.
26753b381b3SDavid Woodhouse  *
26853b381b3SDavid Woodhouse  * must be called with dest->rbio_list_lock held
26953b381b3SDavid Woodhouse  */
27053b381b3SDavid Woodhouse static void merge_rbio(struct btrfs_raid_bio *dest,
27153b381b3SDavid Woodhouse 		       struct btrfs_raid_bio *victim)
27253b381b3SDavid Woodhouse {
27353b381b3SDavid Woodhouse 	bio_list_merge(&dest->bio_list, &victim->bio_list);
27453b381b3SDavid Woodhouse 	dest->bio_list_bytes += victim->bio_list_bytes;
275bd8f7e62SQu Wenruo 	/* Also inherit the bitmaps from @victim. */
276bd8f7e62SQu Wenruo 	bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap,
277bd8f7e62SQu Wenruo 		  dest->stripe_nsectors);
2784245215dSMiao Xie 	dest->generic_bio_cnt += victim->generic_bio_cnt;
27953b381b3SDavid Woodhouse 	bio_list_init(&victim->bio_list);
28053b381b3SDavid Woodhouse }
28153b381b3SDavid Woodhouse 
28253b381b3SDavid Woodhouse /*
2834ae10b3aSChris Mason  * used to prune items that are in the cache.  The caller
2844ae10b3aSChris Mason  * must hold the hash table lock.
2854ae10b3aSChris Mason  */
2864ae10b3aSChris Mason static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
2874ae10b3aSChris Mason {
2884ae10b3aSChris Mason 	int bucket = rbio_bucket(rbio);
2894ae10b3aSChris Mason 	struct btrfs_stripe_hash_table *table;
2904ae10b3aSChris Mason 	struct btrfs_stripe_hash *h;
2914ae10b3aSChris Mason 	int freeit = 0;
2924ae10b3aSChris Mason 
2934ae10b3aSChris Mason 	/*
2944ae10b3aSChris Mason 	 * check the bit again under the hash table lock.
2954ae10b3aSChris Mason 	 */
2964ae10b3aSChris Mason 	if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
2974ae10b3aSChris Mason 		return;
2984ae10b3aSChris Mason 
2996a258d72SQu Wenruo 	table = rbio->bioc->fs_info->stripe_hash_table;
3004ae10b3aSChris Mason 	h = table->table + bucket;
3014ae10b3aSChris Mason 
3024ae10b3aSChris Mason 	/* hold the lock for the bucket because we may be
3034ae10b3aSChris Mason 	 * removing it from the hash table
3044ae10b3aSChris Mason 	 */
3054ae10b3aSChris Mason 	spin_lock(&h->lock);
3064ae10b3aSChris Mason 
3074ae10b3aSChris Mason 	/*
3084ae10b3aSChris Mason 	 * hold the lock for the bio list because we need
3094ae10b3aSChris Mason 	 * to make sure the bio list is empty
3104ae10b3aSChris Mason 	 */
3114ae10b3aSChris Mason 	spin_lock(&rbio->bio_list_lock);
3124ae10b3aSChris Mason 
3134ae10b3aSChris Mason 	if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
3144ae10b3aSChris Mason 		list_del_init(&rbio->stripe_cache);
3154ae10b3aSChris Mason 		table->cache_size -= 1;
3164ae10b3aSChris Mason 		freeit = 1;
3174ae10b3aSChris Mason 
3184ae10b3aSChris Mason 		/* if the bio list isn't empty, this rbio is
3194ae10b3aSChris Mason 		 * still involved in an IO.  We take it out
3204ae10b3aSChris Mason 		 * of the cache list, and drop the ref that
3214ae10b3aSChris Mason 		 * was held for the list.
3224ae10b3aSChris Mason 		 *
3234ae10b3aSChris Mason 		 * If the bio_list was empty, we also remove
3244ae10b3aSChris Mason 		 * the rbio from the hash_table, and drop
3254ae10b3aSChris Mason 		 * the corresponding ref
3264ae10b3aSChris Mason 		 */
3274ae10b3aSChris Mason 		if (bio_list_empty(&rbio->bio_list)) {
3284ae10b3aSChris Mason 			if (!list_empty(&rbio->hash_list)) {
3294ae10b3aSChris Mason 				list_del_init(&rbio->hash_list);
330dec95574SElena Reshetova 				refcount_dec(&rbio->refs);
3314ae10b3aSChris Mason 				BUG_ON(!list_empty(&rbio->plug_list));
3324ae10b3aSChris Mason 			}
3334ae10b3aSChris Mason 		}
3344ae10b3aSChris Mason 	}
3354ae10b3aSChris Mason 
3364ae10b3aSChris Mason 	spin_unlock(&rbio->bio_list_lock);
3374ae10b3aSChris Mason 	spin_unlock(&h->lock);
3384ae10b3aSChris Mason 
3394ae10b3aSChris Mason 	if (freeit)
3404ae10b3aSChris Mason 		__free_raid_bio(rbio);
3414ae10b3aSChris Mason }
3424ae10b3aSChris Mason 
3434ae10b3aSChris Mason /*
3444ae10b3aSChris Mason  * prune a given rbio from the cache
3454ae10b3aSChris Mason  */
3464ae10b3aSChris Mason static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
3474ae10b3aSChris Mason {
3484ae10b3aSChris Mason 	struct btrfs_stripe_hash_table *table;
3494ae10b3aSChris Mason 	unsigned long flags;
3504ae10b3aSChris Mason 
3514ae10b3aSChris Mason 	if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
3524ae10b3aSChris Mason 		return;
3534ae10b3aSChris Mason 
3546a258d72SQu Wenruo 	table = rbio->bioc->fs_info->stripe_hash_table;
3554ae10b3aSChris Mason 
3564ae10b3aSChris Mason 	spin_lock_irqsave(&table->cache_lock, flags);
3574ae10b3aSChris Mason 	__remove_rbio_from_cache(rbio);
3584ae10b3aSChris Mason 	spin_unlock_irqrestore(&table->cache_lock, flags);
3594ae10b3aSChris Mason }
3604ae10b3aSChris Mason 
3614ae10b3aSChris Mason /*
3624ae10b3aSChris Mason  * remove everything in the cache
3634ae10b3aSChris Mason  */
36448a3b636SEric Sandeen static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
3654ae10b3aSChris Mason {
3664ae10b3aSChris Mason 	struct btrfs_stripe_hash_table *table;
3674ae10b3aSChris Mason 	unsigned long flags;
3684ae10b3aSChris Mason 	struct btrfs_raid_bio *rbio;
3694ae10b3aSChris Mason 
3704ae10b3aSChris Mason 	table = info->stripe_hash_table;
3714ae10b3aSChris Mason 
3724ae10b3aSChris Mason 	spin_lock_irqsave(&table->cache_lock, flags);
3734ae10b3aSChris Mason 	while (!list_empty(&table->stripe_cache)) {
3744ae10b3aSChris Mason 		rbio = list_entry(table->stripe_cache.next,
3754ae10b3aSChris Mason 				  struct btrfs_raid_bio,
3764ae10b3aSChris Mason 				  stripe_cache);
3774ae10b3aSChris Mason 		__remove_rbio_from_cache(rbio);
3784ae10b3aSChris Mason 	}
3794ae10b3aSChris Mason 	spin_unlock_irqrestore(&table->cache_lock, flags);
3804ae10b3aSChris Mason }
3814ae10b3aSChris Mason 
3824ae10b3aSChris Mason /*
3834ae10b3aSChris Mason  * remove all cached entries and free the hash table
3844ae10b3aSChris Mason  * used by unmount
38553b381b3SDavid Woodhouse  */
38653b381b3SDavid Woodhouse void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
38753b381b3SDavid Woodhouse {
38853b381b3SDavid Woodhouse 	if (!info->stripe_hash_table)
38953b381b3SDavid Woodhouse 		return;
3904ae10b3aSChris Mason 	btrfs_clear_rbio_cache(info);
391f749303bSWang Shilong 	kvfree(info->stripe_hash_table);
39253b381b3SDavid Woodhouse 	info->stripe_hash_table = NULL;
39353b381b3SDavid Woodhouse }
39453b381b3SDavid Woodhouse 
39553b381b3SDavid Woodhouse /*
3964ae10b3aSChris Mason  * insert an rbio into the stripe cache.  It
3974ae10b3aSChris Mason  * must have already been prepared by calling
3984ae10b3aSChris Mason  * cache_rbio_pages
3994ae10b3aSChris Mason  *
4004ae10b3aSChris Mason  * If this rbio was already cached, it gets
4014ae10b3aSChris Mason  * moved to the front of the lru.
4024ae10b3aSChris Mason  *
4034ae10b3aSChris Mason  * If the size of the rbio cache is too big, we
4044ae10b3aSChris Mason  * prune an item.
4054ae10b3aSChris Mason  */
4064ae10b3aSChris Mason static void cache_rbio(struct btrfs_raid_bio *rbio)
4074ae10b3aSChris Mason {
4084ae10b3aSChris Mason 	struct btrfs_stripe_hash_table *table;
4094ae10b3aSChris Mason 	unsigned long flags;
4104ae10b3aSChris Mason 
4114ae10b3aSChris Mason 	if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
4124ae10b3aSChris Mason 		return;
4134ae10b3aSChris Mason 
4146a258d72SQu Wenruo 	table = rbio->bioc->fs_info->stripe_hash_table;
4154ae10b3aSChris Mason 
4164ae10b3aSChris Mason 	spin_lock_irqsave(&table->cache_lock, flags);
4174ae10b3aSChris Mason 	spin_lock(&rbio->bio_list_lock);
4184ae10b3aSChris Mason 
4194ae10b3aSChris Mason 	/* bump our ref if we were not in the list before */
4204ae10b3aSChris Mason 	if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
421dec95574SElena Reshetova 		refcount_inc(&rbio->refs);
4224ae10b3aSChris Mason 
4234ae10b3aSChris Mason 	if (!list_empty(&rbio->stripe_cache)){
4244ae10b3aSChris Mason 		list_move(&rbio->stripe_cache, &table->stripe_cache);
4254ae10b3aSChris Mason 	} else {
4264ae10b3aSChris Mason 		list_add(&rbio->stripe_cache, &table->stripe_cache);
4274ae10b3aSChris Mason 		table->cache_size += 1;
4284ae10b3aSChris Mason 	}
4294ae10b3aSChris Mason 
4304ae10b3aSChris Mason 	spin_unlock(&rbio->bio_list_lock);
4314ae10b3aSChris Mason 
4324ae10b3aSChris Mason 	if (table->cache_size > RBIO_CACHE_SIZE) {
4334ae10b3aSChris Mason 		struct btrfs_raid_bio *found;
4344ae10b3aSChris Mason 
4354ae10b3aSChris Mason 		found = list_entry(table->stripe_cache.prev,
4364ae10b3aSChris Mason 				  struct btrfs_raid_bio,
4374ae10b3aSChris Mason 				  stripe_cache);
4384ae10b3aSChris Mason 
4394ae10b3aSChris Mason 		if (found != rbio)
4404ae10b3aSChris Mason 			__remove_rbio_from_cache(found);
4414ae10b3aSChris Mason 	}
4424ae10b3aSChris Mason 
4434ae10b3aSChris Mason 	spin_unlock_irqrestore(&table->cache_lock, flags);
4444ae10b3aSChris Mason }
4454ae10b3aSChris Mason 
4464ae10b3aSChris Mason /*
44753b381b3SDavid Woodhouse  * helper function to run the xor_blocks api.  It is only
44853b381b3SDavid Woodhouse  * able to do MAX_XOR_BLOCKS at a time, so we need to
44953b381b3SDavid Woodhouse  * loop through.
45053b381b3SDavid Woodhouse  */
45153b381b3SDavid Woodhouse static void run_xor(void **pages, int src_cnt, ssize_t len)
45253b381b3SDavid Woodhouse {
45353b381b3SDavid Woodhouse 	int src_off = 0;
45453b381b3SDavid Woodhouse 	int xor_src_cnt = 0;
45553b381b3SDavid Woodhouse 	void *dest = pages[src_cnt];
45653b381b3SDavid Woodhouse 
45753b381b3SDavid Woodhouse 	while(src_cnt > 0) {
45853b381b3SDavid Woodhouse 		xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
45953b381b3SDavid Woodhouse 		xor_blocks(xor_src_cnt, len, dest, pages + src_off);
46053b381b3SDavid Woodhouse 
46153b381b3SDavid Woodhouse 		src_cnt -= xor_src_cnt;
46253b381b3SDavid Woodhouse 		src_off += xor_src_cnt;
46353b381b3SDavid Woodhouse 	}
46453b381b3SDavid Woodhouse }
46553b381b3SDavid Woodhouse 
46653b381b3SDavid Woodhouse /*
467176571a1SDavid Sterba  * Returns true if the bio list inside this rbio covers an entire stripe (no
468176571a1SDavid Sterba  * rmw required).
46953b381b3SDavid Woodhouse  */
47053b381b3SDavid Woodhouse static int rbio_is_full(struct btrfs_raid_bio *rbio)
47153b381b3SDavid Woodhouse {
47253b381b3SDavid Woodhouse 	unsigned long flags;
473176571a1SDavid Sterba 	unsigned long size = rbio->bio_list_bytes;
474176571a1SDavid Sterba 	int ret = 1;
47553b381b3SDavid Woodhouse 
47653b381b3SDavid Woodhouse 	spin_lock_irqsave(&rbio->bio_list_lock, flags);
477176571a1SDavid Sterba 	if (size != rbio->nr_data * rbio->stripe_len)
478176571a1SDavid Sterba 		ret = 0;
479176571a1SDavid Sterba 	BUG_ON(size > rbio->nr_data * rbio->stripe_len);
48053b381b3SDavid Woodhouse 	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
481176571a1SDavid Sterba 
48253b381b3SDavid Woodhouse 	return ret;
48353b381b3SDavid Woodhouse }
48453b381b3SDavid Woodhouse 
48553b381b3SDavid Woodhouse /*
48653b381b3SDavid Woodhouse  * returns 1 if it is safe to merge two rbios together.
48753b381b3SDavid Woodhouse  * The merging is safe if the two rbios correspond to
48853b381b3SDavid Woodhouse  * the same stripe and if they are both going in the same
48953b381b3SDavid Woodhouse  * direction (read vs write), and if neither one is
49053b381b3SDavid Woodhouse  * locked for final IO
49153b381b3SDavid Woodhouse  *
49253b381b3SDavid Woodhouse  * The caller is responsible for locking such that
49353b381b3SDavid Woodhouse  * rmw_locked is safe to test
49453b381b3SDavid Woodhouse  */
49553b381b3SDavid Woodhouse static int rbio_can_merge(struct btrfs_raid_bio *last,
49653b381b3SDavid Woodhouse 			  struct btrfs_raid_bio *cur)
49753b381b3SDavid Woodhouse {
49853b381b3SDavid Woodhouse 	if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) ||
49953b381b3SDavid Woodhouse 	    test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
50053b381b3SDavid Woodhouse 		return 0;
50153b381b3SDavid Woodhouse 
5024ae10b3aSChris Mason 	/*
5034ae10b3aSChris Mason 	 * we can't merge with cached rbios, since the
5044ae10b3aSChris Mason 	 * idea is that when we merge the destination
5054ae10b3aSChris Mason 	 * rbio is going to run our IO for us.  We can
50601327610SNicholas D Steeves 	 * steal from cached rbios though, other functions
5074ae10b3aSChris Mason 	 * handle that.
5084ae10b3aSChris Mason 	 */
5094ae10b3aSChris Mason 	if (test_bit(RBIO_CACHE_BIT, &last->flags) ||
5104ae10b3aSChris Mason 	    test_bit(RBIO_CACHE_BIT, &cur->flags))
5114ae10b3aSChris Mason 		return 0;
5124ae10b3aSChris Mason 
5134c664611SQu Wenruo 	if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
51453b381b3SDavid Woodhouse 		return 0;
51553b381b3SDavid Woodhouse 
5165a6ac9eaSMiao Xie 	/* we can't merge with different operations */
5175a6ac9eaSMiao Xie 	if (last->operation != cur->operation)
51853b381b3SDavid Woodhouse 		return 0;
5195a6ac9eaSMiao Xie 	/*
5205a6ac9eaSMiao Xie 	 * We've need read the full stripe from the drive.
5215a6ac9eaSMiao Xie 	 * check and repair the parity and write the new results.
5225a6ac9eaSMiao Xie 	 *
5235a6ac9eaSMiao Xie 	 * We're not allowed to add any new bios to the
5245a6ac9eaSMiao Xie 	 * bio list here, anyone else that wants to
5255a6ac9eaSMiao Xie 	 * change this stripe needs to do their own rmw.
5265a6ac9eaSMiao Xie 	 */
527db34be19SLiu Bo 	if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
5285a6ac9eaSMiao Xie 		return 0;
52953b381b3SDavid Woodhouse 
530db34be19SLiu Bo 	if (last->operation == BTRFS_RBIO_REBUILD_MISSING)
531b4ee1782SOmar Sandoval 		return 0;
532b4ee1782SOmar Sandoval 
533cc54ff62SLiu Bo 	if (last->operation == BTRFS_RBIO_READ_REBUILD) {
534cc54ff62SLiu Bo 		int fa = last->faila;
535cc54ff62SLiu Bo 		int fb = last->failb;
536cc54ff62SLiu Bo 		int cur_fa = cur->faila;
537cc54ff62SLiu Bo 		int cur_fb = cur->failb;
538cc54ff62SLiu Bo 
539cc54ff62SLiu Bo 		if (last->faila >= last->failb) {
540cc54ff62SLiu Bo 			fa = last->failb;
541cc54ff62SLiu Bo 			fb = last->faila;
542cc54ff62SLiu Bo 		}
543cc54ff62SLiu Bo 
544cc54ff62SLiu Bo 		if (cur->faila >= cur->failb) {
545cc54ff62SLiu Bo 			cur_fa = cur->failb;
546cc54ff62SLiu Bo 			cur_fb = cur->faila;
547cc54ff62SLiu Bo 		}
548cc54ff62SLiu Bo 
549cc54ff62SLiu Bo 		if (fa != cur_fa || fb != cur_fb)
550cc54ff62SLiu Bo 			return 0;
551cc54ff62SLiu Bo 	}
55253b381b3SDavid Woodhouse 	return 1;
55353b381b3SDavid Woodhouse }
55453b381b3SDavid Woodhouse 
5553e77605dSQu Wenruo static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio,
5563e77605dSQu Wenruo 					     unsigned int stripe_nr,
5573e77605dSQu Wenruo 					     unsigned int sector_nr)
5583e77605dSQu Wenruo {
5593e77605dSQu Wenruo 	ASSERT(stripe_nr < rbio->real_stripes);
5603e77605dSQu Wenruo 	ASSERT(sector_nr < rbio->stripe_nsectors);
5613e77605dSQu Wenruo 
5623e77605dSQu Wenruo 	return stripe_nr * rbio->stripe_nsectors + sector_nr;
5633e77605dSQu Wenruo }
5643e77605dSQu Wenruo 
5653e77605dSQu Wenruo /* Return a sector from rbio->stripe_sectors, not from the bio list */
5663e77605dSQu Wenruo static struct sector_ptr *rbio_stripe_sector(const struct btrfs_raid_bio *rbio,
5673e77605dSQu Wenruo 					     unsigned int stripe_nr,
5683e77605dSQu Wenruo 					     unsigned int sector_nr)
5693e77605dSQu Wenruo {
5703e77605dSQu Wenruo 	return &rbio->stripe_sectors[rbio_stripe_sector_index(rbio, stripe_nr,
5713e77605dSQu Wenruo 							      sector_nr)];
5723e77605dSQu Wenruo }
5733e77605dSQu Wenruo 
5741145059aSQu Wenruo /* Grab a sector inside P stripe */
5751145059aSQu Wenruo static struct sector_ptr *rbio_pstripe_sector(const struct btrfs_raid_bio *rbio,
5761145059aSQu Wenruo 					      unsigned int sector_nr)
577b7178a5fSZhao Lei {
5781145059aSQu Wenruo 	return rbio_stripe_sector(rbio, rbio->nr_data, sector_nr);
579b7178a5fSZhao Lei }
580b7178a5fSZhao Lei 
5811145059aSQu Wenruo /* Grab a sector inside Q stripe, return NULL if not RAID6 */
5821145059aSQu Wenruo static struct sector_ptr *rbio_qstripe_sector(const struct btrfs_raid_bio *rbio,
5831145059aSQu Wenruo 					      unsigned int sector_nr)
58453b381b3SDavid Woodhouse {
5852c8cdd6eSMiao Xie 	if (rbio->nr_data + 1 == rbio->real_stripes)
58653b381b3SDavid Woodhouse 		return NULL;
5871145059aSQu Wenruo 	return rbio_stripe_sector(rbio, rbio->nr_data + 1, sector_nr);
5881145059aSQu Wenruo }
5891145059aSQu Wenruo 
59053b381b3SDavid Woodhouse /*
59153b381b3SDavid Woodhouse  * The first stripe in the table for a logical address
59253b381b3SDavid Woodhouse  * has the lock.  rbios are added in one of three ways:
59353b381b3SDavid Woodhouse  *
59453b381b3SDavid Woodhouse  * 1) Nobody has the stripe locked yet.  The rbio is given
59553b381b3SDavid Woodhouse  * the lock and 0 is returned.  The caller must start the IO
59653b381b3SDavid Woodhouse  * themselves.
59753b381b3SDavid Woodhouse  *
59853b381b3SDavid Woodhouse  * 2) Someone has the stripe locked, but we're able to merge
59953b381b3SDavid Woodhouse  * with the lock owner.  The rbio is freed and the IO will
60053b381b3SDavid Woodhouse  * start automatically along with the existing rbio.  1 is returned.
60153b381b3SDavid Woodhouse  *
60253b381b3SDavid Woodhouse  * 3) Someone has the stripe locked, but we're not able to merge.
60353b381b3SDavid Woodhouse  * The rbio is added to the lock owner's plug list, or merged into
60453b381b3SDavid Woodhouse  * an rbio already on the plug list.  When the lock owner unlocks,
60553b381b3SDavid Woodhouse  * the next rbio on the list is run and the IO is started automatically.
60653b381b3SDavid Woodhouse  * 1 is returned
60753b381b3SDavid Woodhouse  *
60853b381b3SDavid Woodhouse  * If we return 0, the caller still owns the rbio and must continue with
60953b381b3SDavid Woodhouse  * IO submission.  If we return 1, the caller must assume the rbio has
61053b381b3SDavid Woodhouse  * already been freed.
61153b381b3SDavid Woodhouse  */
61253b381b3SDavid Woodhouse static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
61353b381b3SDavid Woodhouse {
614721860d5SJohannes Thumshirn 	struct btrfs_stripe_hash *h;
61553b381b3SDavid Woodhouse 	struct btrfs_raid_bio *cur;
61653b381b3SDavid Woodhouse 	struct btrfs_raid_bio *pending;
61753b381b3SDavid Woodhouse 	unsigned long flags;
61853b381b3SDavid Woodhouse 	struct btrfs_raid_bio *freeit = NULL;
6194ae10b3aSChris Mason 	struct btrfs_raid_bio *cache_drop = NULL;
62053b381b3SDavid Woodhouse 	int ret = 0;
62153b381b3SDavid Woodhouse 
6226a258d72SQu Wenruo 	h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
623721860d5SJohannes Thumshirn 
62453b381b3SDavid Woodhouse 	spin_lock_irqsave(&h->lock, flags);
62553b381b3SDavid Woodhouse 	list_for_each_entry(cur, &h->hash_list, hash_list) {
6264c664611SQu Wenruo 		if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
6279d6cb1b0SJohannes Thumshirn 			continue;
6289d6cb1b0SJohannes Thumshirn 
62953b381b3SDavid Woodhouse 		spin_lock(&cur->bio_list_lock);
63053b381b3SDavid Woodhouse 
6319d6cb1b0SJohannes Thumshirn 		/* Can we steal this cached rbio's pages? */
6324ae10b3aSChris Mason 		if (bio_list_empty(&cur->bio_list) &&
6334ae10b3aSChris Mason 		    list_empty(&cur->plug_list) &&
6344ae10b3aSChris Mason 		    test_bit(RBIO_CACHE_BIT, &cur->flags) &&
6354ae10b3aSChris Mason 		    !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
6364ae10b3aSChris Mason 			list_del_init(&cur->hash_list);
637dec95574SElena Reshetova 			refcount_dec(&cur->refs);
6384ae10b3aSChris Mason 
6394ae10b3aSChris Mason 			steal_rbio(cur, rbio);
6404ae10b3aSChris Mason 			cache_drop = cur;
6414ae10b3aSChris Mason 			spin_unlock(&cur->bio_list_lock);
6424ae10b3aSChris Mason 
6434ae10b3aSChris Mason 			goto lockit;
6444ae10b3aSChris Mason 		}
6454ae10b3aSChris Mason 
6469d6cb1b0SJohannes Thumshirn 		/* Can we merge into the lock owner? */
64753b381b3SDavid Woodhouse 		if (rbio_can_merge(cur, rbio)) {
64853b381b3SDavid Woodhouse 			merge_rbio(cur, rbio);
64953b381b3SDavid Woodhouse 			spin_unlock(&cur->bio_list_lock);
65053b381b3SDavid Woodhouse 			freeit = rbio;
65153b381b3SDavid Woodhouse 			ret = 1;
65253b381b3SDavid Woodhouse 			goto out;
65353b381b3SDavid Woodhouse 		}
65453b381b3SDavid Woodhouse 
6554ae10b3aSChris Mason 
65653b381b3SDavid Woodhouse 		/*
6579d6cb1b0SJohannes Thumshirn 		 * We couldn't merge with the running rbio, see if we can merge
6589d6cb1b0SJohannes Thumshirn 		 * with the pending ones.  We don't have to check for rmw_locked
6599d6cb1b0SJohannes Thumshirn 		 * because there is no way they are inside finish_rmw right now
66053b381b3SDavid Woodhouse 		 */
6619d6cb1b0SJohannes Thumshirn 		list_for_each_entry(pending, &cur->plug_list, plug_list) {
66253b381b3SDavid Woodhouse 			if (rbio_can_merge(pending, rbio)) {
66353b381b3SDavid Woodhouse 				merge_rbio(pending, rbio);
66453b381b3SDavid Woodhouse 				spin_unlock(&cur->bio_list_lock);
66553b381b3SDavid Woodhouse 				freeit = rbio;
66653b381b3SDavid Woodhouse 				ret = 1;
66753b381b3SDavid Woodhouse 				goto out;
66853b381b3SDavid Woodhouse 			}
66953b381b3SDavid Woodhouse 		}
67053b381b3SDavid Woodhouse 
6719d6cb1b0SJohannes Thumshirn 		/*
6729d6cb1b0SJohannes Thumshirn 		 * No merging, put us on the tail of the plug list, our rbio
6739d6cb1b0SJohannes Thumshirn 		 * will be started with the currently running rbio unlocks
67453b381b3SDavid Woodhouse 		 */
67553b381b3SDavid Woodhouse 		list_add_tail(&rbio->plug_list, &cur->plug_list);
67653b381b3SDavid Woodhouse 		spin_unlock(&cur->bio_list_lock);
67753b381b3SDavid Woodhouse 		ret = 1;
67853b381b3SDavid Woodhouse 		goto out;
67953b381b3SDavid Woodhouse 	}
6804ae10b3aSChris Mason lockit:
681dec95574SElena Reshetova 	refcount_inc(&rbio->refs);
68253b381b3SDavid Woodhouse 	list_add(&rbio->hash_list, &h->hash_list);
68353b381b3SDavid Woodhouse out:
68453b381b3SDavid Woodhouse 	spin_unlock_irqrestore(&h->lock, flags);
6854ae10b3aSChris Mason 	if (cache_drop)
6864ae10b3aSChris Mason 		remove_rbio_from_cache(cache_drop);
68753b381b3SDavid Woodhouse 	if (freeit)
68853b381b3SDavid Woodhouse 		__free_raid_bio(freeit);
68953b381b3SDavid Woodhouse 	return ret;
69053b381b3SDavid Woodhouse }
69153b381b3SDavid Woodhouse 
69253b381b3SDavid Woodhouse /*
69353b381b3SDavid Woodhouse  * called as rmw or parity rebuild is completed.  If the plug list has more
69453b381b3SDavid Woodhouse  * rbios waiting for this stripe, the next one on the list will be started
69553b381b3SDavid Woodhouse  */
69653b381b3SDavid Woodhouse static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
69753b381b3SDavid Woodhouse {
69853b381b3SDavid Woodhouse 	int bucket;
69953b381b3SDavid Woodhouse 	struct btrfs_stripe_hash *h;
70053b381b3SDavid Woodhouse 	unsigned long flags;
7014ae10b3aSChris Mason 	int keep_cache = 0;
70253b381b3SDavid Woodhouse 
70353b381b3SDavid Woodhouse 	bucket = rbio_bucket(rbio);
7046a258d72SQu Wenruo 	h = rbio->bioc->fs_info->stripe_hash_table->table + bucket;
70553b381b3SDavid Woodhouse 
7064ae10b3aSChris Mason 	if (list_empty(&rbio->plug_list))
7074ae10b3aSChris Mason 		cache_rbio(rbio);
7084ae10b3aSChris Mason 
70953b381b3SDavid Woodhouse 	spin_lock_irqsave(&h->lock, flags);
71053b381b3SDavid Woodhouse 	spin_lock(&rbio->bio_list_lock);
71153b381b3SDavid Woodhouse 
71253b381b3SDavid Woodhouse 	if (!list_empty(&rbio->hash_list)) {
7134ae10b3aSChris Mason 		/*
7144ae10b3aSChris Mason 		 * if we're still cached and there is no other IO
7154ae10b3aSChris Mason 		 * to perform, just leave this rbio here for others
7164ae10b3aSChris Mason 		 * to steal from later
7174ae10b3aSChris Mason 		 */
7184ae10b3aSChris Mason 		if (list_empty(&rbio->plug_list) &&
7194ae10b3aSChris Mason 		    test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
7204ae10b3aSChris Mason 			keep_cache = 1;
7214ae10b3aSChris Mason 			clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
7224ae10b3aSChris Mason 			BUG_ON(!bio_list_empty(&rbio->bio_list));
7234ae10b3aSChris Mason 			goto done;
7244ae10b3aSChris Mason 		}
72553b381b3SDavid Woodhouse 
72653b381b3SDavid Woodhouse 		list_del_init(&rbio->hash_list);
727dec95574SElena Reshetova 		refcount_dec(&rbio->refs);
72853b381b3SDavid Woodhouse 
72953b381b3SDavid Woodhouse 		/*
73053b381b3SDavid Woodhouse 		 * we use the plug list to hold all the rbios
73153b381b3SDavid Woodhouse 		 * waiting for the chance to lock this stripe.
73253b381b3SDavid Woodhouse 		 * hand the lock over to one of them.
73353b381b3SDavid Woodhouse 		 */
73453b381b3SDavid Woodhouse 		if (!list_empty(&rbio->plug_list)) {
73553b381b3SDavid Woodhouse 			struct btrfs_raid_bio *next;
73653b381b3SDavid Woodhouse 			struct list_head *head = rbio->plug_list.next;
73753b381b3SDavid Woodhouse 
73853b381b3SDavid Woodhouse 			next = list_entry(head, struct btrfs_raid_bio,
73953b381b3SDavid Woodhouse 					  plug_list);
74053b381b3SDavid Woodhouse 
74153b381b3SDavid Woodhouse 			list_del_init(&rbio->plug_list);
74253b381b3SDavid Woodhouse 
74353b381b3SDavid Woodhouse 			list_add(&next->hash_list, &h->hash_list);
744dec95574SElena Reshetova 			refcount_inc(&next->refs);
74553b381b3SDavid Woodhouse 			spin_unlock(&rbio->bio_list_lock);
74653b381b3SDavid Woodhouse 			spin_unlock_irqrestore(&h->lock, flags);
74753b381b3SDavid Woodhouse 
7481b94b556SMiao Xie 			if (next->operation == BTRFS_RBIO_READ_REBUILD)
749e66d8d5aSDavid Sterba 				start_async_work(next, read_rebuild_work);
750b4ee1782SOmar Sandoval 			else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
751b4ee1782SOmar Sandoval 				steal_rbio(rbio, next);
752e66d8d5aSDavid Sterba 				start_async_work(next, read_rebuild_work);
753b4ee1782SOmar Sandoval 			} else if (next->operation == BTRFS_RBIO_WRITE) {
7544ae10b3aSChris Mason 				steal_rbio(rbio, next);
755cf6a4a75SDavid Sterba 				start_async_work(next, rmw_work);
7565a6ac9eaSMiao Xie 			} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
7575a6ac9eaSMiao Xie 				steal_rbio(rbio, next);
758a81b747dSDavid Sterba 				start_async_work(next, scrub_parity_work);
7594ae10b3aSChris Mason 			}
76053b381b3SDavid Woodhouse 
76153b381b3SDavid Woodhouse 			goto done_nolock;
76253b381b3SDavid Woodhouse 		}
76353b381b3SDavid Woodhouse 	}
7644ae10b3aSChris Mason done:
76553b381b3SDavid Woodhouse 	spin_unlock(&rbio->bio_list_lock);
76653b381b3SDavid Woodhouse 	spin_unlock_irqrestore(&h->lock, flags);
76753b381b3SDavid Woodhouse 
76853b381b3SDavid Woodhouse done_nolock:
7694ae10b3aSChris Mason 	if (!keep_cache)
7704ae10b3aSChris Mason 		remove_rbio_from_cache(rbio);
77153b381b3SDavid Woodhouse }
77253b381b3SDavid Woodhouse 
77353b381b3SDavid Woodhouse static void __free_raid_bio(struct btrfs_raid_bio *rbio)
77453b381b3SDavid Woodhouse {
77553b381b3SDavid Woodhouse 	int i;
77653b381b3SDavid Woodhouse 
777dec95574SElena Reshetova 	if (!refcount_dec_and_test(&rbio->refs))
77853b381b3SDavid Woodhouse 		return;
77953b381b3SDavid Woodhouse 
7804ae10b3aSChris Mason 	WARN_ON(!list_empty(&rbio->stripe_cache));
78153b381b3SDavid Woodhouse 	WARN_ON(!list_empty(&rbio->hash_list));
78253b381b3SDavid Woodhouse 	WARN_ON(!bio_list_empty(&rbio->bio_list));
78353b381b3SDavid Woodhouse 
78453b381b3SDavid Woodhouse 	for (i = 0; i < rbio->nr_pages; i++) {
78553b381b3SDavid Woodhouse 		if (rbio->stripe_pages[i]) {
78653b381b3SDavid Woodhouse 			__free_page(rbio->stripe_pages[i]);
78753b381b3SDavid Woodhouse 			rbio->stripe_pages[i] = NULL;
78853b381b3SDavid Woodhouse 		}
78953b381b3SDavid Woodhouse 	}
790af8e2d1dSMiao Xie 
7914c664611SQu Wenruo 	btrfs_put_bioc(rbio->bioc);
79253b381b3SDavid Woodhouse 	kfree(rbio);
79353b381b3SDavid Woodhouse }
79453b381b3SDavid Woodhouse 
7957583d8d0SLiu Bo static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
79653b381b3SDavid Woodhouse {
7977583d8d0SLiu Bo 	struct bio *next;
7987583d8d0SLiu Bo 
7997583d8d0SLiu Bo 	while (cur) {
8007583d8d0SLiu Bo 		next = cur->bi_next;
8017583d8d0SLiu Bo 		cur->bi_next = NULL;
8027583d8d0SLiu Bo 		cur->bi_status = err;
8037583d8d0SLiu Bo 		bio_endio(cur);
8047583d8d0SLiu Bo 		cur = next;
8057583d8d0SLiu Bo 	}
80653b381b3SDavid Woodhouse }
80753b381b3SDavid Woodhouse 
80853b381b3SDavid Woodhouse /*
80953b381b3SDavid Woodhouse  * this frees the rbio and runs through all the bios in the
81053b381b3SDavid Woodhouse  * bio_list and calls end_io on them
81153b381b3SDavid Woodhouse  */
8124e4cbee9SChristoph Hellwig static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
81353b381b3SDavid Woodhouse {
81453b381b3SDavid Woodhouse 	struct bio *cur = bio_list_get(&rbio->bio_list);
8157583d8d0SLiu Bo 	struct bio *extra;
8164245215dSMiao Xie 
8174245215dSMiao Xie 	if (rbio->generic_bio_cnt)
8186a258d72SQu Wenruo 		btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
819bd8f7e62SQu Wenruo 	/*
820bd8f7e62SQu Wenruo 	 * Clear the data bitmap, as the rbio may be cached for later usage.
821bd8f7e62SQu Wenruo 	 * do this before before unlock_stripe() so there will be no new bio
822bd8f7e62SQu Wenruo 	 * for this bio.
823bd8f7e62SQu Wenruo 	 */
824bd8f7e62SQu Wenruo 	bitmap_clear(&rbio->dbitmap, 0, rbio->stripe_nsectors);
8254245215dSMiao Xie 
8267583d8d0SLiu Bo 	/*
8277583d8d0SLiu Bo 	 * At this moment, rbio->bio_list is empty, however since rbio does not
8287583d8d0SLiu Bo 	 * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the
8297583d8d0SLiu Bo 	 * hash list, rbio may be merged with others so that rbio->bio_list
8307583d8d0SLiu Bo 	 * becomes non-empty.
8317583d8d0SLiu Bo 	 * Once unlock_stripe() is done, rbio->bio_list will not be updated any
8327583d8d0SLiu Bo 	 * more and we can call bio_endio() on all queued bios.
8337583d8d0SLiu Bo 	 */
8347583d8d0SLiu Bo 	unlock_stripe(rbio);
8357583d8d0SLiu Bo 	extra = bio_list_get(&rbio->bio_list);
8367583d8d0SLiu Bo 	__free_raid_bio(rbio);
83753b381b3SDavid Woodhouse 
8387583d8d0SLiu Bo 	rbio_endio_bio_list(cur, err);
8397583d8d0SLiu Bo 	if (extra)
8407583d8d0SLiu Bo 		rbio_endio_bio_list(extra, err);
84153b381b3SDavid Woodhouse }
84253b381b3SDavid Woodhouse 
84353b381b3SDavid Woodhouse /*
84453b381b3SDavid Woodhouse  * end io function used by finish_rmw.  When we finally
84553b381b3SDavid Woodhouse  * get here, we've written a full stripe
84653b381b3SDavid Woodhouse  */
8474246a0b6SChristoph Hellwig static void raid_write_end_io(struct bio *bio)
84853b381b3SDavid Woodhouse {
84953b381b3SDavid Woodhouse 	struct btrfs_raid_bio *rbio = bio->bi_private;
8504e4cbee9SChristoph Hellwig 	blk_status_t err = bio->bi_status;
851a6111d11SZhao Lei 	int max_errors;
85253b381b3SDavid Woodhouse 
85353b381b3SDavid Woodhouse 	if (err)
85453b381b3SDavid Woodhouse 		fail_bio_stripe(rbio, bio);
85553b381b3SDavid Woodhouse 
85653b381b3SDavid Woodhouse 	bio_put(bio);
85753b381b3SDavid Woodhouse 
858b89e1b01SMiao Xie 	if (!atomic_dec_and_test(&rbio->stripes_pending))
85953b381b3SDavid Woodhouse 		return;
86053b381b3SDavid Woodhouse 
86158efbc9fSOmar Sandoval 	err = BLK_STS_OK;
86253b381b3SDavid Woodhouse 
86353b381b3SDavid Woodhouse 	/* OK, we have read all the stripes we need to. */
864a6111d11SZhao Lei 	max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
8654c664611SQu Wenruo 		     0 : rbio->bioc->max_errors;
866a6111d11SZhao Lei 	if (atomic_read(&rbio->error) > max_errors)
8674e4cbee9SChristoph Hellwig 		err = BLK_STS_IOERR;
86853b381b3SDavid Woodhouse 
8694246a0b6SChristoph Hellwig 	rbio_orig_end_io(rbio, err);
87053b381b3SDavid Woodhouse }
87153b381b3SDavid Woodhouse 
8723e77605dSQu Wenruo /**
8733e77605dSQu Wenruo  * Get a sector pointer specified by its @stripe_nr and @sector_nr
8743e77605dSQu Wenruo  *
8753e77605dSQu Wenruo  * @rbio:               The raid bio
8763e77605dSQu Wenruo  * @stripe_nr:          Stripe number, valid range [0, real_stripe)
8773e77605dSQu Wenruo  * @sector_nr:		Sector number inside the stripe,
8783e77605dSQu Wenruo  *			valid range [0, stripe_nsectors)
8793e77605dSQu Wenruo  * @bio_list_only:      Whether to use sectors inside the bio list only.
8803e77605dSQu Wenruo  *
8813e77605dSQu Wenruo  * The read/modify/write code wants to reuse the original bio page as much
8823e77605dSQu Wenruo  * as possible, and only use stripe_sectors as fallback.
8833e77605dSQu Wenruo  */
8843e77605dSQu Wenruo static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
8853e77605dSQu Wenruo 					 int stripe_nr, int sector_nr,
8863e77605dSQu Wenruo 					 bool bio_list_only)
8873e77605dSQu Wenruo {
8883e77605dSQu Wenruo 	struct sector_ptr *sector;
8893e77605dSQu Wenruo 	int index;
8903e77605dSQu Wenruo 
8913e77605dSQu Wenruo 	ASSERT(stripe_nr >= 0 && stripe_nr < rbio->real_stripes);
8923e77605dSQu Wenruo 	ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
8933e77605dSQu Wenruo 
8943e77605dSQu Wenruo 	index = stripe_nr * rbio->stripe_nsectors + sector_nr;
8953e77605dSQu Wenruo 	ASSERT(index >= 0 && index < rbio->nr_sectors);
8963e77605dSQu Wenruo 
8973e77605dSQu Wenruo 	spin_lock_irq(&rbio->bio_list_lock);
8983e77605dSQu Wenruo 	sector = &rbio->bio_sectors[index];
8993e77605dSQu Wenruo 	if (sector->page || bio_list_only) {
9003e77605dSQu Wenruo 		/* Don't return sector without a valid page pointer */
9013e77605dSQu Wenruo 		if (!sector->page)
9023e77605dSQu Wenruo 			sector = NULL;
9033e77605dSQu Wenruo 		spin_unlock_irq(&rbio->bio_list_lock);
9043e77605dSQu Wenruo 		return sector;
9053e77605dSQu Wenruo 	}
9063e77605dSQu Wenruo 	spin_unlock_irq(&rbio->bio_list_lock);
9073e77605dSQu Wenruo 
9083e77605dSQu Wenruo 	return &rbio->stripe_sectors[index];
9093e77605dSQu Wenruo }
9103e77605dSQu Wenruo 
91153b381b3SDavid Woodhouse /*
91253b381b3SDavid Woodhouse  * allocation and initial setup for the btrfs_raid_bio.  Not
91353b381b3SDavid Woodhouse  * this does not allocate any pages for rbio->pages.
91453b381b3SDavid Woodhouse  */
9152ff7e61eSJeff Mahoney static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
9164c664611SQu Wenruo 					 struct btrfs_io_context *bioc,
917cc353a8bSQu Wenruo 					 u32 stripe_len)
91853b381b3SDavid Woodhouse {
919843de58bSQu Wenruo 	const unsigned int real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
920843de58bSQu Wenruo 	const unsigned int stripe_npages = stripe_len >> PAGE_SHIFT;
921843de58bSQu Wenruo 	const unsigned int num_pages = stripe_npages * real_stripes;
92294efbe19SQu Wenruo 	const unsigned int stripe_nsectors = stripe_len >> fs_info->sectorsize_bits;
92394efbe19SQu Wenruo 	const unsigned int num_sectors = stripe_nsectors * real_stripes;
92453b381b3SDavid Woodhouse 	struct btrfs_raid_bio *rbio;
92553b381b3SDavid Woodhouse 	int nr_data = 0;
92653b381b3SDavid Woodhouse 	void *p;
92753b381b3SDavid Woodhouse 
928843de58bSQu Wenruo 	ASSERT(IS_ALIGNED(stripe_len, PAGE_SIZE));
92994efbe19SQu Wenruo 	/* PAGE_SIZE must also be aligned to sectorsize for subpage support */
93094efbe19SQu Wenruo 	ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize));
931c67c68ebSQu Wenruo 	/*
932c67c68ebSQu Wenruo 	 * Our current stripe len should be fixed to 64k thus stripe_nsectors
933c67c68ebSQu Wenruo 	 * (at most 16) should be no larger than BITS_PER_LONG.
934c67c68ebSQu Wenruo 	 */
935c67c68ebSQu Wenruo 	ASSERT(stripe_nsectors <= BITS_PER_LONG);
936843de58bSQu Wenruo 
9371389053eSKees Cook 	rbio = kzalloc(sizeof(*rbio) +
9381389053eSKees Cook 		       sizeof(*rbio->stripe_pages) * num_pages +
93900425dd9SQu Wenruo 		       sizeof(*rbio->bio_sectors) * num_sectors +
940eb357060SQu Wenruo 		       sizeof(*rbio->stripe_sectors) * num_sectors +
941c67c68ebSQu Wenruo 		       sizeof(*rbio->finish_pointers) * real_stripes,
9421389053eSKees Cook 		       GFP_NOFS);
943af8e2d1dSMiao Xie 	if (!rbio)
94453b381b3SDavid Woodhouse 		return ERR_PTR(-ENOMEM);
94553b381b3SDavid Woodhouse 
94653b381b3SDavid Woodhouse 	bio_list_init(&rbio->bio_list);
94753b381b3SDavid Woodhouse 	INIT_LIST_HEAD(&rbio->plug_list);
94853b381b3SDavid Woodhouse 	spin_lock_init(&rbio->bio_list_lock);
9494ae10b3aSChris Mason 	INIT_LIST_HEAD(&rbio->stripe_cache);
95053b381b3SDavid Woodhouse 	INIT_LIST_HEAD(&rbio->hash_list);
9514c664611SQu Wenruo 	rbio->bioc = bioc;
95253b381b3SDavid Woodhouse 	rbio->stripe_len = stripe_len;
95353b381b3SDavid Woodhouse 	rbio->nr_pages = num_pages;
95494efbe19SQu Wenruo 	rbio->nr_sectors = num_sectors;
9552c8cdd6eSMiao Xie 	rbio->real_stripes = real_stripes;
9565a6ac9eaSMiao Xie 	rbio->stripe_npages = stripe_npages;
95794efbe19SQu Wenruo 	rbio->stripe_nsectors = stripe_nsectors;
95853b381b3SDavid Woodhouse 	rbio->faila = -1;
95953b381b3SDavid Woodhouse 	rbio->failb = -1;
960dec95574SElena Reshetova 	refcount_set(&rbio->refs, 1);
961b89e1b01SMiao Xie 	atomic_set(&rbio->error, 0);
962b89e1b01SMiao Xie 	atomic_set(&rbio->stripes_pending, 0);
96353b381b3SDavid Woodhouse 
96453b381b3SDavid Woodhouse 	/*
965ac26df8bSQu Wenruo 	 * The stripe_pages, bio_sectors, etc arrays point to the extra memory
966ac26df8bSQu Wenruo 	 * we allocated past the end of the rbio.
96753b381b3SDavid Woodhouse 	 */
96853b381b3SDavid Woodhouse 	p = rbio + 1;
9691389053eSKees Cook #define CONSUME_ALLOC(ptr, count)	do {				\
9701389053eSKees Cook 		ptr = p;						\
9711389053eSKees Cook 		p = (unsigned char *)p + sizeof(*(ptr)) * (count);	\
9721389053eSKees Cook 	} while (0)
9731389053eSKees Cook 	CONSUME_ALLOC(rbio->stripe_pages, num_pages);
97400425dd9SQu Wenruo 	CONSUME_ALLOC(rbio->bio_sectors, num_sectors);
975eb357060SQu Wenruo 	CONSUME_ALLOC(rbio->stripe_sectors, num_sectors);
9761389053eSKees Cook 	CONSUME_ALLOC(rbio->finish_pointers, real_stripes);
9771389053eSKees Cook #undef  CONSUME_ALLOC
97853b381b3SDavid Woodhouse 
9794c664611SQu Wenruo 	if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
98010f11900SZhao Lei 		nr_data = real_stripes - 1;
9814c664611SQu Wenruo 	else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
9822c8cdd6eSMiao Xie 		nr_data = real_stripes - 2;
98353b381b3SDavid Woodhouse 	else
98410f11900SZhao Lei 		BUG();
98553b381b3SDavid Woodhouse 
98653b381b3SDavid Woodhouse 	rbio->nr_data = nr_data;
98753b381b3SDavid Woodhouse 	return rbio;
98853b381b3SDavid Woodhouse }
98953b381b3SDavid Woodhouse 
99053b381b3SDavid Woodhouse /* allocate pages for all the stripes in the bio, including parity */
99153b381b3SDavid Woodhouse static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
99253b381b3SDavid Woodhouse {
993eb357060SQu Wenruo 	int ret;
994eb357060SQu Wenruo 
995eb357060SQu Wenruo 	ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages);
996eb357060SQu Wenruo 	if (ret < 0)
997eb357060SQu Wenruo 		return ret;
998eb357060SQu Wenruo 	/* Mapping all sectors */
999eb357060SQu Wenruo 	index_stripe_sectors(rbio);
1000eb357060SQu Wenruo 	return 0;
100153b381b3SDavid Woodhouse }
100253b381b3SDavid Woodhouse 
1003b7178a5fSZhao Lei /* only allocate pages for p/q stripes */
100453b381b3SDavid Woodhouse static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
100553b381b3SDavid Woodhouse {
1006f77183dcSQu Wenruo 	const int data_pages = rbio->nr_data * rbio->stripe_npages;
1007eb357060SQu Wenruo 	int ret;
100853b381b3SDavid Woodhouse 
1009eb357060SQu Wenruo 	ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
1010dd137dd1SSweet Tea Dorminy 				     rbio->stripe_pages + data_pages);
1011eb357060SQu Wenruo 	if (ret < 0)
1012eb357060SQu Wenruo 		return ret;
1013eb357060SQu Wenruo 
1014eb357060SQu Wenruo 	index_stripe_sectors(rbio);
1015eb357060SQu Wenruo 	return 0;
101653b381b3SDavid Woodhouse }
101753b381b3SDavid Woodhouse 
101853b381b3SDavid Woodhouse /*
10193e77605dSQu Wenruo  * Add a single sector @sector into our list of bios for IO.
10203e77605dSQu Wenruo  *
10213e77605dSQu Wenruo  * Return 0 if everything went well.
10223e77605dSQu Wenruo  * Return <0 for error.
102353b381b3SDavid Woodhouse  */
10243e77605dSQu Wenruo static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
102553b381b3SDavid Woodhouse 			      struct bio_list *bio_list,
10263e77605dSQu Wenruo 			      struct sector_ptr *sector,
10273e77605dSQu Wenruo 			      unsigned int stripe_nr,
10283e77605dSQu Wenruo 			      unsigned int sector_nr,
1029e01bf588SChristoph Hellwig 			      unsigned long bio_max_len,
1030e01bf588SChristoph Hellwig 			      unsigned int opf)
103153b381b3SDavid Woodhouse {
10323e77605dSQu Wenruo 	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
103353b381b3SDavid Woodhouse 	struct bio *last = bio_list->tail;
103453b381b3SDavid Woodhouse 	int ret;
103553b381b3SDavid Woodhouse 	struct bio *bio;
10364c664611SQu Wenruo 	struct btrfs_io_stripe *stripe;
103753b381b3SDavid Woodhouse 	u64 disk_start;
103853b381b3SDavid Woodhouse 
10393e77605dSQu Wenruo 	/*
10403e77605dSQu Wenruo 	 * Note: here stripe_nr has taken device replace into consideration,
10413e77605dSQu Wenruo 	 * thus it can be larger than rbio->real_stripe.
10423e77605dSQu Wenruo 	 * So here we check against bioc->num_stripes, not rbio->real_stripes.
10433e77605dSQu Wenruo 	 */
10443e77605dSQu Wenruo 	ASSERT(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes);
10453e77605dSQu Wenruo 	ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
10463e77605dSQu Wenruo 	ASSERT(sector->page);
10473e77605dSQu Wenruo 
10484c664611SQu Wenruo 	stripe = &rbio->bioc->stripes[stripe_nr];
10493e77605dSQu Wenruo 	disk_start = stripe->physical + sector_nr * sectorsize;
105053b381b3SDavid Woodhouse 
105153b381b3SDavid Woodhouse 	/* if the device is missing, just fail this stripe */
105253b381b3SDavid Woodhouse 	if (!stripe->dev->bdev)
105353b381b3SDavid Woodhouse 		return fail_rbio_index(rbio, stripe_nr);
105453b381b3SDavid Woodhouse 
105553b381b3SDavid Woodhouse 	/* see if we can add this page onto our existing bio */
105653b381b3SDavid Woodhouse 	if (last) {
10571201b58bSDavid Sterba 		u64 last_end = last->bi_iter.bi_sector << 9;
10584f024f37SKent Overstreet 		last_end += last->bi_iter.bi_size;
105953b381b3SDavid Woodhouse 
106053b381b3SDavid Woodhouse 		/*
106153b381b3SDavid Woodhouse 		 * we can't merge these if they are from different
106253b381b3SDavid Woodhouse 		 * devices or if they are not contiguous
106353b381b3SDavid Woodhouse 		 */
1064f90ae76aSNikolay Borisov 		if (last_end == disk_start && !last->bi_status &&
1065309dca30SChristoph Hellwig 		    last->bi_bdev == stripe->dev->bdev) {
10663e77605dSQu Wenruo 			ret = bio_add_page(last, sector->page, sectorsize,
10673e77605dSQu Wenruo 					   sector->pgoff);
10683e77605dSQu Wenruo 			if (ret == sectorsize)
106953b381b3SDavid Woodhouse 				return 0;
107053b381b3SDavid Woodhouse 		}
107153b381b3SDavid Woodhouse 	}
107253b381b3SDavid Woodhouse 
107353b381b3SDavid Woodhouse 	/* put a new bio on the list */
1074e1b4b44eSChristoph Hellwig 	bio = bio_alloc(stripe->dev->bdev, max(bio_max_len >> PAGE_SHIFT, 1UL),
1075e1b4b44eSChristoph Hellwig 			opf, GFP_NOFS);
10764f024f37SKent Overstreet 	bio->bi_iter.bi_sector = disk_start >> 9;
1077e01bf588SChristoph Hellwig 	bio->bi_private = rbio;
107853b381b3SDavid Woodhouse 
10793e77605dSQu Wenruo 	bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
108053b381b3SDavid Woodhouse 	bio_list_add(bio_list, bio);
108153b381b3SDavid Woodhouse 	return 0;
108253b381b3SDavid Woodhouse }
108353b381b3SDavid Woodhouse 
108453b381b3SDavid Woodhouse /*
108553b381b3SDavid Woodhouse  * while we're doing the read/modify/write cycle, we could
108653b381b3SDavid Woodhouse  * have errors in reading pages off the disk.  This checks
108753b381b3SDavid Woodhouse  * for errors and if we're not able to read the page it'll
108853b381b3SDavid Woodhouse  * trigger parity reconstruction.  The rmw will be finished
108953b381b3SDavid Woodhouse  * after we've reconstructed the failed stripes
109053b381b3SDavid Woodhouse  */
109153b381b3SDavid Woodhouse static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
109253b381b3SDavid Woodhouse {
109353b381b3SDavid Woodhouse 	if (rbio->faila >= 0 || rbio->failb >= 0) {
10942c8cdd6eSMiao Xie 		BUG_ON(rbio->faila == rbio->real_stripes - 1);
109553b381b3SDavid Woodhouse 		__raid56_parity_recover(rbio);
109653b381b3SDavid Woodhouse 	} else {
109753b381b3SDavid Woodhouse 		finish_rmw(rbio);
109853b381b3SDavid Woodhouse 	}
109953b381b3SDavid Woodhouse }
110053b381b3SDavid Woodhouse 
110100425dd9SQu Wenruo static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
110200425dd9SQu Wenruo {
110300425dd9SQu Wenruo 	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
110400425dd9SQu Wenruo 	struct bio_vec bvec;
110500425dd9SQu Wenruo 	struct bvec_iter iter;
110600425dd9SQu Wenruo 	u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
110700425dd9SQu Wenruo 		     rbio->bioc->raid_map[0];
110800425dd9SQu Wenruo 
110900425dd9SQu Wenruo 	bio_for_each_segment(bvec, bio, iter) {
111000425dd9SQu Wenruo 		u32 bvec_offset;
111100425dd9SQu Wenruo 
111200425dd9SQu Wenruo 		for (bvec_offset = 0; bvec_offset < bvec.bv_len;
111300425dd9SQu Wenruo 		     bvec_offset += sectorsize, offset += sectorsize) {
111400425dd9SQu Wenruo 			int index = offset / sectorsize;
111500425dd9SQu Wenruo 			struct sector_ptr *sector = &rbio->bio_sectors[index];
111600425dd9SQu Wenruo 
111700425dd9SQu Wenruo 			sector->page = bvec.bv_page;
111800425dd9SQu Wenruo 			sector->pgoff = bvec.bv_offset + bvec_offset;
111900425dd9SQu Wenruo 			ASSERT(sector->pgoff < PAGE_SIZE);
112000425dd9SQu Wenruo 		}
112100425dd9SQu Wenruo 	}
112200425dd9SQu Wenruo }
112300425dd9SQu Wenruo 
112453b381b3SDavid Woodhouse /*
112553b381b3SDavid Woodhouse  * helper function to walk our bio list and populate the bio_pages array with
112653b381b3SDavid Woodhouse  * the result.  This seems expensive, but it is faster than constantly
112753b381b3SDavid Woodhouse  * searching through the bio list as we setup the IO in finish_rmw or stripe
112853b381b3SDavid Woodhouse  * reconstruction.
112953b381b3SDavid Woodhouse  *
113053b381b3SDavid Woodhouse  * This must be called before you trust the answers from page_in_rbio
113153b381b3SDavid Woodhouse  */
113253b381b3SDavid Woodhouse static void index_rbio_pages(struct btrfs_raid_bio *rbio)
113353b381b3SDavid Woodhouse {
113453b381b3SDavid Woodhouse 	struct bio *bio;
113553b381b3SDavid Woodhouse 
113653b381b3SDavid Woodhouse 	spin_lock_irq(&rbio->bio_list_lock);
113700425dd9SQu Wenruo 	bio_list_for_each(bio, &rbio->bio_list)
113800425dd9SQu Wenruo 		index_one_bio(rbio, bio);
113900425dd9SQu Wenruo 
114053b381b3SDavid Woodhouse 	spin_unlock_irq(&rbio->bio_list_lock);
114153b381b3SDavid Woodhouse }
114253b381b3SDavid Woodhouse 
1143b8bea09aSQu Wenruo static void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio,
1144b8bea09aSQu Wenruo 			       struct raid56_bio_trace_info *trace_info)
1145b8bea09aSQu Wenruo {
1146b8bea09aSQu Wenruo 	const struct btrfs_io_context *bioc = rbio->bioc;
1147b8bea09aSQu Wenruo 	int i;
1148b8bea09aSQu Wenruo 
1149b8bea09aSQu Wenruo 	ASSERT(bioc);
1150b8bea09aSQu Wenruo 
1151b8bea09aSQu Wenruo 	/* We rely on bio->bi_bdev to find the stripe number. */
1152b8bea09aSQu Wenruo 	if (!bio->bi_bdev)
1153b8bea09aSQu Wenruo 		goto not_found;
1154b8bea09aSQu Wenruo 
1155b8bea09aSQu Wenruo 	for (i = 0; i < bioc->num_stripes; i++) {
1156b8bea09aSQu Wenruo 		if (bio->bi_bdev != bioc->stripes[i].dev->bdev)
1157b8bea09aSQu Wenruo 			continue;
1158b8bea09aSQu Wenruo 		trace_info->stripe_nr = i;
1159b8bea09aSQu Wenruo 		trace_info->devid = bioc->stripes[i].dev->devid;
1160b8bea09aSQu Wenruo 		trace_info->offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
1161b8bea09aSQu Wenruo 				     bioc->stripes[i].physical;
1162b8bea09aSQu Wenruo 		return;
1163b8bea09aSQu Wenruo 	}
1164b8bea09aSQu Wenruo 
1165b8bea09aSQu Wenruo not_found:
1166b8bea09aSQu Wenruo 	trace_info->devid = -1;
1167b8bea09aSQu Wenruo 	trace_info->offset = -1;
1168b8bea09aSQu Wenruo 	trace_info->stripe_nr = -1;
1169b8bea09aSQu Wenruo }
1170b8bea09aSQu Wenruo 
117153b381b3SDavid Woodhouse /*
117253b381b3SDavid Woodhouse  * this is called from one of two situations.  We either
117353b381b3SDavid Woodhouse  * have a full stripe from the higher layers, or we've read all
117453b381b3SDavid Woodhouse  * the missing bits off disk.
117553b381b3SDavid Woodhouse  *
117653b381b3SDavid Woodhouse  * This will calculate the parity and then send down any
117753b381b3SDavid Woodhouse  * changed blocks.
117853b381b3SDavid Woodhouse  */
117953b381b3SDavid Woodhouse static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
118053b381b3SDavid Woodhouse {
11814c664611SQu Wenruo 	struct btrfs_io_context *bioc = rbio->bioc;
11821145059aSQu Wenruo 	const u32 sectorsize = bioc->fs_info->sectorsize;
11831389053eSKees Cook 	void **pointers = rbio->finish_pointers;
118453b381b3SDavid Woodhouse 	int nr_data = rbio->nr_data;
118536920044SQu Wenruo 	/* The total sector number inside the full stripe. */
118636920044SQu Wenruo 	int total_sector_nr;
118753b381b3SDavid Woodhouse 	int stripe;
118836920044SQu Wenruo 	/* Sector number inside a stripe. */
11893e77605dSQu Wenruo 	int sectornr;
1190c17af965SDavid Sterba 	bool has_qstripe;
119153b381b3SDavid Woodhouse 	struct bio_list bio_list;
119253b381b3SDavid Woodhouse 	struct bio *bio;
119353b381b3SDavid Woodhouse 	int ret;
119453b381b3SDavid Woodhouse 
119553b381b3SDavid Woodhouse 	bio_list_init(&bio_list);
119653b381b3SDavid Woodhouse 
1197c17af965SDavid Sterba 	if (rbio->real_stripes - rbio->nr_data == 1)
1198c17af965SDavid Sterba 		has_qstripe = false;
1199c17af965SDavid Sterba 	else if (rbio->real_stripes - rbio->nr_data == 2)
1200c17af965SDavid Sterba 		has_qstripe = true;
1201c17af965SDavid Sterba 	else
120253b381b3SDavid Woodhouse 		BUG();
120353b381b3SDavid Woodhouse 
1204bd8f7e62SQu Wenruo 	/* We should have at least one data sector. */
1205bd8f7e62SQu Wenruo 	ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors));
1206bd8f7e62SQu Wenruo 
120753b381b3SDavid Woodhouse 	/* at this point we either have a full stripe,
120853b381b3SDavid Woodhouse 	 * or we've read the full stripe from the drive.
120953b381b3SDavid Woodhouse 	 * recalculate the parity and write the new results.
121053b381b3SDavid Woodhouse 	 *
121153b381b3SDavid Woodhouse 	 * We're not allowed to add any new bios to the
121253b381b3SDavid Woodhouse 	 * bio list here, anyone else that wants to
121353b381b3SDavid Woodhouse 	 * change this stripe needs to do their own rmw.
121453b381b3SDavid Woodhouse 	 */
121553b381b3SDavid Woodhouse 	spin_lock_irq(&rbio->bio_list_lock);
121653b381b3SDavid Woodhouse 	set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
121753b381b3SDavid Woodhouse 	spin_unlock_irq(&rbio->bio_list_lock);
121853b381b3SDavid Woodhouse 
1219b89e1b01SMiao Xie 	atomic_set(&rbio->error, 0);
122053b381b3SDavid Woodhouse 
122153b381b3SDavid Woodhouse 	/*
122253b381b3SDavid Woodhouse 	 * now that we've set rmw_locked, run through the
122353b381b3SDavid Woodhouse 	 * bio list one last time and map the page pointers
12244ae10b3aSChris Mason 	 *
12254ae10b3aSChris Mason 	 * We don't cache full rbios because we're assuming
12264ae10b3aSChris Mason 	 * the higher layers are unlikely to use this area of
12274ae10b3aSChris Mason 	 * the disk again soon.  If they do use it again,
12284ae10b3aSChris Mason 	 * hopefully they will send another full bio.
122953b381b3SDavid Woodhouse 	 */
123053b381b3SDavid Woodhouse 	index_rbio_pages(rbio);
12314ae10b3aSChris Mason 	if (!rbio_is_full(rbio))
12324ae10b3aSChris Mason 		cache_rbio_pages(rbio);
12334ae10b3aSChris Mason 	else
12344ae10b3aSChris Mason 		clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
123553b381b3SDavid Woodhouse 
12363e77605dSQu Wenruo 	for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
12371145059aSQu Wenruo 		struct sector_ptr *sector;
12381145059aSQu Wenruo 
12391145059aSQu Wenruo 		/* First collect one sector from each data stripe */
124053b381b3SDavid Woodhouse 		for (stripe = 0; stripe < nr_data; stripe++) {
12411145059aSQu Wenruo 			sector = sector_in_rbio(rbio, stripe, sectornr, 0);
12421145059aSQu Wenruo 			pointers[stripe] = kmap_local_page(sector->page) +
12431145059aSQu Wenruo 					   sector->pgoff;
124453b381b3SDavid Woodhouse 		}
124553b381b3SDavid Woodhouse 
12461145059aSQu Wenruo 		/* Then add the parity stripe */
12471145059aSQu Wenruo 		sector = rbio_pstripe_sector(rbio, sectornr);
12481145059aSQu Wenruo 		sector->uptodate = 1;
12491145059aSQu Wenruo 		pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff;
125053b381b3SDavid Woodhouse 
1251c17af965SDavid Sterba 		if (has_qstripe) {
125253b381b3SDavid Woodhouse 			/*
12531145059aSQu Wenruo 			 * RAID6, add the qstripe and call the library function
12541145059aSQu Wenruo 			 * to fill in our p/q
125553b381b3SDavid Woodhouse 			 */
12561145059aSQu Wenruo 			sector = rbio_qstripe_sector(rbio, sectornr);
12571145059aSQu Wenruo 			sector->uptodate = 1;
12581145059aSQu Wenruo 			pointers[stripe++] = kmap_local_page(sector->page) +
12591145059aSQu Wenruo 					     sector->pgoff;
126053b381b3SDavid Woodhouse 
12611145059aSQu Wenruo 			raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
126253b381b3SDavid Woodhouse 						pointers);
126353b381b3SDavid Woodhouse 		} else {
126453b381b3SDavid Woodhouse 			/* raid5 */
12651145059aSQu Wenruo 			memcpy(pointers[nr_data], pointers[0], sectorsize);
12661145059aSQu Wenruo 			run_xor(pointers + 1, nr_data - 1, sectorsize);
126753b381b3SDavid Woodhouse 		}
126894a0b58dSIra Weiny 		for (stripe = stripe - 1; stripe >= 0; stripe--)
126994a0b58dSIra Weiny 			kunmap_local(pointers[stripe]);
127053b381b3SDavid Woodhouse 	}
127153b381b3SDavid Woodhouse 
127253b381b3SDavid Woodhouse 	/*
127336920044SQu Wenruo 	 * Start writing.  Make bios for everything from the higher layers (the
127436920044SQu Wenruo 	 * bio_list in our rbio) and our P/Q.  Ignore everything else.
127553b381b3SDavid Woodhouse 	 */
127636920044SQu Wenruo 	for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
127736920044SQu Wenruo 	     total_sector_nr++) {
12783e77605dSQu Wenruo 		struct sector_ptr *sector;
12793e77605dSQu Wenruo 
128036920044SQu Wenruo 		stripe = total_sector_nr / rbio->stripe_nsectors;
128136920044SQu Wenruo 		sectornr = total_sector_nr % rbio->stripe_nsectors;
128236920044SQu Wenruo 
1283bd8f7e62SQu Wenruo 		/* This vertical stripe has no data, skip it. */
1284bd8f7e62SQu Wenruo 		if (!test_bit(sectornr, &rbio->dbitmap))
1285bd8f7e62SQu Wenruo 			continue;
1286bd8f7e62SQu Wenruo 
128753b381b3SDavid Woodhouse 		if (stripe < rbio->nr_data) {
12883e77605dSQu Wenruo 			sector = sector_in_rbio(rbio, stripe, sectornr, 1);
12893e77605dSQu Wenruo 			if (!sector)
129053b381b3SDavid Woodhouse 				continue;
129153b381b3SDavid Woodhouse 		} else {
12923e77605dSQu Wenruo 			sector = rbio_stripe_sector(rbio, stripe, sectornr);
129353b381b3SDavid Woodhouse 		}
129453b381b3SDavid Woodhouse 
12953e77605dSQu Wenruo 		ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
12963e77605dSQu Wenruo 					 sectornr, rbio->stripe_len,
1297e01bf588SChristoph Hellwig 					 REQ_OP_WRITE);
129853b381b3SDavid Woodhouse 		if (ret)
129953b381b3SDavid Woodhouse 			goto cleanup;
130053b381b3SDavid Woodhouse 	}
130153b381b3SDavid Woodhouse 
13024c664611SQu Wenruo 	if (likely(!bioc->num_tgtdevs))
13032c8cdd6eSMiao Xie 		goto write_data;
13042c8cdd6eSMiao Xie 
130536920044SQu Wenruo 	for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
130636920044SQu Wenruo 	     total_sector_nr++) {
13073e77605dSQu Wenruo 		struct sector_ptr *sector;
13083e77605dSQu Wenruo 
130936920044SQu Wenruo 		stripe = total_sector_nr / rbio->stripe_nsectors;
131036920044SQu Wenruo 		sectornr = total_sector_nr % rbio->stripe_nsectors;
131136920044SQu Wenruo 
131236920044SQu Wenruo 		if (!bioc->tgtdev_map[stripe]) {
131336920044SQu Wenruo 			/*
131436920044SQu Wenruo 			 * We can skip the whole stripe completely, note
131536920044SQu Wenruo 			 * total_sector_nr will be increased by one anyway.
131636920044SQu Wenruo 			 */
131736920044SQu Wenruo 			ASSERT(sectornr == 0);
131836920044SQu Wenruo 			total_sector_nr += rbio->stripe_nsectors - 1;
131936920044SQu Wenruo 			continue;
132036920044SQu Wenruo 		}
132136920044SQu Wenruo 
1322bd8f7e62SQu Wenruo 		/* This vertical stripe has no data, skip it. */
1323bd8f7e62SQu Wenruo 		if (!test_bit(sectornr, &rbio->dbitmap))
1324bd8f7e62SQu Wenruo 			continue;
1325bd8f7e62SQu Wenruo 
13262c8cdd6eSMiao Xie 		if (stripe < rbio->nr_data) {
13273e77605dSQu Wenruo 			sector = sector_in_rbio(rbio, stripe, sectornr, 1);
13283e77605dSQu Wenruo 			if (!sector)
13292c8cdd6eSMiao Xie 				continue;
13302c8cdd6eSMiao Xie 		} else {
13313e77605dSQu Wenruo 			sector = rbio_stripe_sector(rbio, stripe, sectornr);
13322c8cdd6eSMiao Xie 		}
13332c8cdd6eSMiao Xie 
13343e77605dSQu Wenruo 		ret = rbio_add_io_sector(rbio, &bio_list, sector,
13354c664611SQu Wenruo 					 rbio->bioc->tgtdev_map[stripe],
13363e77605dSQu Wenruo 					 sectornr, rbio->stripe_len,
1337e01bf588SChristoph Hellwig 					 REQ_OP_WRITE);
13382c8cdd6eSMiao Xie 		if (ret)
13392c8cdd6eSMiao Xie 			goto cleanup;
13402c8cdd6eSMiao Xie 	}
13412c8cdd6eSMiao Xie 
13422c8cdd6eSMiao Xie write_data:
1343b89e1b01SMiao Xie 	atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
1344b89e1b01SMiao Xie 	BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
134553b381b3SDavid Woodhouse 
1346bf28a605SNikolay Borisov 	while ((bio = bio_list_pop(&bio_list))) {
134753b381b3SDavid Woodhouse 		bio->bi_end_io = raid_write_end_io;
13484e49ea4aSMike Christie 
1349b8bea09aSQu Wenruo 		if (trace_raid56_write_stripe_enabled()) {
1350b8bea09aSQu Wenruo 			struct raid56_bio_trace_info trace_info = { 0 };
1351b8bea09aSQu Wenruo 
1352b8bea09aSQu Wenruo 			bio_get_trace_info(rbio, bio, &trace_info);
1353b8bea09aSQu Wenruo 			trace_raid56_write_stripe(rbio, bio, &trace_info);
1354b8bea09aSQu Wenruo 		}
13554e49ea4aSMike Christie 		submit_bio(bio);
135653b381b3SDavid Woodhouse 	}
135753b381b3SDavid Woodhouse 	return;
135853b381b3SDavid Woodhouse 
135953b381b3SDavid Woodhouse cleanup:
136058efbc9fSOmar Sandoval 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
1361785884fcSLiu Bo 
1362785884fcSLiu Bo 	while ((bio = bio_list_pop(&bio_list)))
1363785884fcSLiu Bo 		bio_put(bio);
136453b381b3SDavid Woodhouse }
136553b381b3SDavid Woodhouse 
136653b381b3SDavid Woodhouse /*
136753b381b3SDavid Woodhouse  * helper to find the stripe number for a given bio.  Used to figure out which
136853b381b3SDavid Woodhouse  * stripe has failed.  This expects the bio to correspond to a physical disk,
136953b381b3SDavid Woodhouse  * so it looks up based on physical sector numbers.
137053b381b3SDavid Woodhouse  */
137153b381b3SDavid Woodhouse static int find_bio_stripe(struct btrfs_raid_bio *rbio,
137253b381b3SDavid Woodhouse 			   struct bio *bio)
137353b381b3SDavid Woodhouse {
13744f024f37SKent Overstreet 	u64 physical = bio->bi_iter.bi_sector;
137553b381b3SDavid Woodhouse 	int i;
13764c664611SQu Wenruo 	struct btrfs_io_stripe *stripe;
137753b381b3SDavid Woodhouse 
137853b381b3SDavid Woodhouse 	physical <<= 9;
137953b381b3SDavid Woodhouse 
13804c664611SQu Wenruo 	for (i = 0; i < rbio->bioc->num_stripes; i++) {
13814c664611SQu Wenruo 		stripe = &rbio->bioc->stripes[i];
138283025863SNikolay Borisov 		if (in_range(physical, stripe->physical, rbio->stripe_len) &&
1383309dca30SChristoph Hellwig 		    stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) {
138453b381b3SDavid Woodhouse 			return i;
138553b381b3SDavid Woodhouse 		}
138653b381b3SDavid Woodhouse 	}
138753b381b3SDavid Woodhouse 	return -1;
138853b381b3SDavid Woodhouse }
138953b381b3SDavid Woodhouse 
139053b381b3SDavid Woodhouse /*
139153b381b3SDavid Woodhouse  * helper to find the stripe number for a given
139253b381b3SDavid Woodhouse  * bio (before mapping).  Used to figure out which stripe has
139353b381b3SDavid Woodhouse  * failed.  This looks up based on logical block numbers.
139453b381b3SDavid Woodhouse  */
139553b381b3SDavid Woodhouse static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
139653b381b3SDavid Woodhouse 				   struct bio *bio)
139753b381b3SDavid Woodhouse {
13981201b58bSDavid Sterba 	u64 logical = bio->bi_iter.bi_sector << 9;
139953b381b3SDavid Woodhouse 	int i;
140053b381b3SDavid Woodhouse 
140153b381b3SDavid Woodhouse 	for (i = 0; i < rbio->nr_data; i++) {
14024c664611SQu Wenruo 		u64 stripe_start = rbio->bioc->raid_map[i];
140383025863SNikolay Borisov 
140483025863SNikolay Borisov 		if (in_range(logical, stripe_start, rbio->stripe_len))
140553b381b3SDavid Woodhouse 			return i;
140653b381b3SDavid Woodhouse 	}
140753b381b3SDavid Woodhouse 	return -1;
140853b381b3SDavid Woodhouse }
140953b381b3SDavid Woodhouse 
141053b381b3SDavid Woodhouse /*
141153b381b3SDavid Woodhouse  * returns -EIO if we had too many failures
141253b381b3SDavid Woodhouse  */
141353b381b3SDavid Woodhouse static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
141453b381b3SDavid Woodhouse {
141553b381b3SDavid Woodhouse 	unsigned long flags;
141653b381b3SDavid Woodhouse 	int ret = 0;
141753b381b3SDavid Woodhouse 
141853b381b3SDavid Woodhouse 	spin_lock_irqsave(&rbio->bio_list_lock, flags);
141953b381b3SDavid Woodhouse 
142053b381b3SDavid Woodhouse 	/* we already know this stripe is bad, move on */
142153b381b3SDavid Woodhouse 	if (rbio->faila == failed || rbio->failb == failed)
142253b381b3SDavid Woodhouse 		goto out;
142353b381b3SDavid Woodhouse 
142453b381b3SDavid Woodhouse 	if (rbio->faila == -1) {
142553b381b3SDavid Woodhouse 		/* first failure on this rbio */
142653b381b3SDavid Woodhouse 		rbio->faila = failed;
1427b89e1b01SMiao Xie 		atomic_inc(&rbio->error);
142853b381b3SDavid Woodhouse 	} else if (rbio->failb == -1) {
142953b381b3SDavid Woodhouse 		/* second failure on this rbio */
143053b381b3SDavid Woodhouse 		rbio->failb = failed;
1431b89e1b01SMiao Xie 		atomic_inc(&rbio->error);
143253b381b3SDavid Woodhouse 	} else {
143353b381b3SDavid Woodhouse 		ret = -EIO;
143453b381b3SDavid Woodhouse 	}
143553b381b3SDavid Woodhouse out:
143653b381b3SDavid Woodhouse 	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
143753b381b3SDavid Woodhouse 
143853b381b3SDavid Woodhouse 	return ret;
143953b381b3SDavid Woodhouse }
144053b381b3SDavid Woodhouse 
144153b381b3SDavid Woodhouse /*
144253b381b3SDavid Woodhouse  * helper to fail a stripe based on a physical disk
144353b381b3SDavid Woodhouse  * bio.
144453b381b3SDavid Woodhouse  */
144553b381b3SDavid Woodhouse static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
144653b381b3SDavid Woodhouse 			   struct bio *bio)
144753b381b3SDavid Woodhouse {
144853b381b3SDavid Woodhouse 	int failed = find_bio_stripe(rbio, bio);
144953b381b3SDavid Woodhouse 
145053b381b3SDavid Woodhouse 	if (failed < 0)
145153b381b3SDavid Woodhouse 		return -EIO;
145253b381b3SDavid Woodhouse 
145353b381b3SDavid Woodhouse 	return fail_rbio_index(rbio, failed);
145453b381b3SDavid Woodhouse }
145553b381b3SDavid Woodhouse 
145653b381b3SDavid Woodhouse /*
14575fdb7afcSQu Wenruo  * For subpage case, we can no longer set page Uptodate directly for
14585fdb7afcSQu Wenruo  * stripe_pages[], thus we need to locate the sector.
14595fdb7afcSQu Wenruo  */
14605fdb7afcSQu Wenruo static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
14615fdb7afcSQu Wenruo 					     struct page *page,
14625fdb7afcSQu Wenruo 					     unsigned int pgoff)
14635fdb7afcSQu Wenruo {
14645fdb7afcSQu Wenruo 	int i;
14655fdb7afcSQu Wenruo 
14665fdb7afcSQu Wenruo 	for (i = 0; i < rbio->nr_sectors; i++) {
14675fdb7afcSQu Wenruo 		struct sector_ptr *sector = &rbio->stripe_sectors[i];
14685fdb7afcSQu Wenruo 
14695fdb7afcSQu Wenruo 		if (sector->page == page && sector->pgoff == pgoff)
14705fdb7afcSQu Wenruo 			return sector;
14715fdb7afcSQu Wenruo 	}
14725fdb7afcSQu Wenruo 	return NULL;
14735fdb7afcSQu Wenruo }
14745fdb7afcSQu Wenruo 
14755fdb7afcSQu Wenruo /*
147653b381b3SDavid Woodhouse  * this sets each page in the bio uptodate.  It should only be used on private
147753b381b3SDavid Woodhouse  * rbio pages, nothing that comes in from the higher layers
147853b381b3SDavid Woodhouse  */
14795fdb7afcSQu Wenruo static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
148053b381b3SDavid Woodhouse {
14815fdb7afcSQu Wenruo 	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
14820198e5b7SLiu Bo 	struct bio_vec *bvec;
14836dc4f100SMing Lei 	struct bvec_iter_all iter_all;
148453b381b3SDavid Woodhouse 
14850198e5b7SLiu Bo 	ASSERT(!bio_flagged(bio, BIO_CLONED));
14866592e58cSFilipe Manana 
14875fdb7afcSQu Wenruo 	bio_for_each_segment_all(bvec, bio, iter_all) {
14885fdb7afcSQu Wenruo 		struct sector_ptr *sector;
14895fdb7afcSQu Wenruo 		int pgoff;
14905fdb7afcSQu Wenruo 
14915fdb7afcSQu Wenruo 		for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len;
14925fdb7afcSQu Wenruo 		     pgoff += sectorsize) {
14935fdb7afcSQu Wenruo 			sector = find_stripe_sector(rbio, bvec->bv_page, pgoff);
14945fdb7afcSQu Wenruo 			ASSERT(sector);
14955fdb7afcSQu Wenruo 			if (sector)
14965fdb7afcSQu Wenruo 				sector->uptodate = 1;
14975fdb7afcSQu Wenruo 		}
14985fdb7afcSQu Wenruo 	}
149953b381b3SDavid Woodhouse }
150053b381b3SDavid Woodhouse 
1501d34e123dSChristoph Hellwig static void raid56_bio_end_io(struct bio *bio)
150253b381b3SDavid Woodhouse {
150353b381b3SDavid Woodhouse 	struct btrfs_raid_bio *rbio = bio->bi_private;
150453b381b3SDavid Woodhouse 
15054e4cbee9SChristoph Hellwig 	if (bio->bi_status)
150653b381b3SDavid Woodhouse 		fail_bio_stripe(rbio, bio);
150753b381b3SDavid Woodhouse 	else
15085fdb7afcSQu Wenruo 		set_bio_pages_uptodate(rbio, bio);
150953b381b3SDavid Woodhouse 
151053b381b3SDavid Woodhouse 	bio_put(bio);
151153b381b3SDavid Woodhouse 
1512d34e123dSChristoph Hellwig 	if (atomic_dec_and_test(&rbio->stripes_pending))
1513d34e123dSChristoph Hellwig 		queue_work(rbio->bioc->fs_info->endio_raid56_workers,
1514d34e123dSChristoph Hellwig 			   &rbio->end_io_work);
1515d34e123dSChristoph Hellwig }
151653b381b3SDavid Woodhouse 
151753b381b3SDavid Woodhouse /*
1518d34e123dSChristoph Hellwig  * End io handler for the read phase of the RMW cycle.  All the bios here are
1519d34e123dSChristoph Hellwig  * physical stripe bios we've read from the disk so we can recalculate the
1520d34e123dSChristoph Hellwig  * parity of the stripe.
1521d34e123dSChristoph Hellwig  *
1522d34e123dSChristoph Hellwig  * This will usually kick off finish_rmw once all the bios are read in, but it
1523d34e123dSChristoph Hellwig  * may trigger parity reconstruction if we had any errors along the way
1524d34e123dSChristoph Hellwig  */
1525d34e123dSChristoph Hellwig static void raid56_rmw_end_io_work(struct work_struct *work)
1526d34e123dSChristoph Hellwig {
1527d34e123dSChristoph Hellwig 	struct btrfs_raid_bio *rbio =
1528d34e123dSChristoph Hellwig 		container_of(work, struct btrfs_raid_bio, end_io_work);
1529d34e123dSChristoph Hellwig 
1530d34e123dSChristoph Hellwig 	if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
1531d34e123dSChristoph Hellwig 		rbio_orig_end_io(rbio, BLK_STS_IOERR);
1532d34e123dSChristoph Hellwig 		return;
1533d34e123dSChristoph Hellwig 	}
1534d34e123dSChristoph Hellwig 
1535d34e123dSChristoph Hellwig 	/*
1536d34e123dSChristoph Hellwig 	 * This will normally call finish_rmw to start our write but if there
1537d34e123dSChristoph Hellwig 	 * are any failed stripes we'll reconstruct from parity first.
153853b381b3SDavid Woodhouse 	 */
153953b381b3SDavid Woodhouse 	validate_rbio_for_rmw(rbio);
154053b381b3SDavid Woodhouse }
154153b381b3SDavid Woodhouse 
154253b381b3SDavid Woodhouse /*
154353b381b3SDavid Woodhouse  * the stripe must be locked by the caller.  It will
154453b381b3SDavid Woodhouse  * unlock after all the writes are done
154553b381b3SDavid Woodhouse  */
154653b381b3SDavid Woodhouse static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
154753b381b3SDavid Woodhouse {
154853b381b3SDavid Woodhouse 	int bios_to_read = 0;
154953b381b3SDavid Woodhouse 	struct bio_list bio_list;
155053b381b3SDavid Woodhouse 	int ret;
15513e77605dSQu Wenruo 	int sectornr;
155253b381b3SDavid Woodhouse 	int stripe;
155353b381b3SDavid Woodhouse 	struct bio *bio;
155453b381b3SDavid Woodhouse 
155553b381b3SDavid Woodhouse 	bio_list_init(&bio_list);
155653b381b3SDavid Woodhouse 
155753b381b3SDavid Woodhouse 	ret = alloc_rbio_pages(rbio);
155853b381b3SDavid Woodhouse 	if (ret)
155953b381b3SDavid Woodhouse 		goto cleanup;
156053b381b3SDavid Woodhouse 
156153b381b3SDavid Woodhouse 	index_rbio_pages(rbio);
156253b381b3SDavid Woodhouse 
1563b89e1b01SMiao Xie 	atomic_set(&rbio->error, 0);
156453b381b3SDavid Woodhouse 	/*
156553b381b3SDavid Woodhouse 	 * build a list of bios to read all the missing parts of this
156653b381b3SDavid Woodhouse 	 * stripe
156753b381b3SDavid Woodhouse 	 */
156853b381b3SDavid Woodhouse 	for (stripe = 0; stripe < rbio->nr_data; stripe++) {
15693e77605dSQu Wenruo 		for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
15703e77605dSQu Wenruo 			struct sector_ptr *sector;
15713e77605dSQu Wenruo 
157253b381b3SDavid Woodhouse 			/*
15733e77605dSQu Wenruo 			 * We want to find all the sectors missing from the
15743e77605dSQu Wenruo 			 * rbio and read them from the disk.  If * sector_in_rbio()
15753e77605dSQu Wenruo 			 * finds a page in the bio list we don't need to read
15763e77605dSQu Wenruo 			 * it off the stripe.
157753b381b3SDavid Woodhouse 			 */
15783e77605dSQu Wenruo 			sector = sector_in_rbio(rbio, stripe, sectornr, 1);
15793e77605dSQu Wenruo 			if (sector)
158053b381b3SDavid Woodhouse 				continue;
158153b381b3SDavid Woodhouse 
15823e77605dSQu Wenruo 			sector = rbio_stripe_sector(rbio, stripe, sectornr);
15834ae10b3aSChris Mason 			/*
15843e77605dSQu Wenruo 			 * The bio cache may have handed us an uptodate page.
15853e77605dSQu Wenruo 			 * If so, be happy and use it.
15864ae10b3aSChris Mason 			 */
15873e77605dSQu Wenruo 			if (sector->uptodate)
15884ae10b3aSChris Mason 				continue;
15894ae10b3aSChris Mason 
15903e77605dSQu Wenruo 			ret = rbio_add_io_sector(rbio, &bio_list, sector,
15913e77605dSQu Wenruo 				       stripe, sectornr, rbio->stripe_len,
1592e01bf588SChristoph Hellwig 				       REQ_OP_READ);
159353b381b3SDavid Woodhouse 			if (ret)
159453b381b3SDavid Woodhouse 				goto cleanup;
159553b381b3SDavid Woodhouse 		}
159653b381b3SDavid Woodhouse 	}
159753b381b3SDavid Woodhouse 
159853b381b3SDavid Woodhouse 	bios_to_read = bio_list_size(&bio_list);
159953b381b3SDavid Woodhouse 	if (!bios_to_read) {
160053b381b3SDavid Woodhouse 		/*
160153b381b3SDavid Woodhouse 		 * this can happen if others have merged with
160253b381b3SDavid Woodhouse 		 * us, it means there is nothing left to read.
160353b381b3SDavid Woodhouse 		 * But if there are missing devices it may not be
160453b381b3SDavid Woodhouse 		 * safe to do the full stripe write yet.
160553b381b3SDavid Woodhouse 		 */
160653b381b3SDavid Woodhouse 		goto finish;
160753b381b3SDavid Woodhouse 	}
160853b381b3SDavid Woodhouse 
160953b381b3SDavid Woodhouse 	/*
16104c664611SQu Wenruo 	 * The bioc may be freed once we submit the last bio. Make sure not to
16114c664611SQu Wenruo 	 * touch it after that.
161253b381b3SDavid Woodhouse 	 */
1613b89e1b01SMiao Xie 	atomic_set(&rbio->stripes_pending, bios_to_read);
1614d34e123dSChristoph Hellwig 	INIT_WORK(&rbio->end_io_work, raid56_rmw_end_io_work);
1615bf28a605SNikolay Borisov 	while ((bio = bio_list_pop(&bio_list))) {
1616d34e123dSChristoph Hellwig 		bio->bi_end_io = raid56_bio_end_io;
161753b381b3SDavid Woodhouse 
1618b8bea09aSQu Wenruo 		if (trace_raid56_read_partial_enabled()) {
1619b8bea09aSQu Wenruo 			struct raid56_bio_trace_info trace_info = { 0 };
1620b8bea09aSQu Wenruo 
1621b8bea09aSQu Wenruo 			bio_get_trace_info(rbio, bio, &trace_info);
1622b8bea09aSQu Wenruo 			trace_raid56_read_partial(rbio, bio, &trace_info);
1623b8bea09aSQu Wenruo 		}
16244e49ea4aSMike Christie 		submit_bio(bio);
162553b381b3SDavid Woodhouse 	}
162653b381b3SDavid Woodhouse 	/* the actual write will happen once the reads are done */
162753b381b3SDavid Woodhouse 	return 0;
162853b381b3SDavid Woodhouse 
162953b381b3SDavid Woodhouse cleanup:
163058efbc9fSOmar Sandoval 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
1631785884fcSLiu Bo 
1632785884fcSLiu Bo 	while ((bio = bio_list_pop(&bio_list)))
1633785884fcSLiu Bo 		bio_put(bio);
1634785884fcSLiu Bo 
163553b381b3SDavid Woodhouse 	return -EIO;
163653b381b3SDavid Woodhouse 
163753b381b3SDavid Woodhouse finish:
163853b381b3SDavid Woodhouse 	validate_rbio_for_rmw(rbio);
163953b381b3SDavid Woodhouse 	return 0;
164053b381b3SDavid Woodhouse }
164153b381b3SDavid Woodhouse 
164253b381b3SDavid Woodhouse /*
164353b381b3SDavid Woodhouse  * if the upper layers pass in a full stripe, we thank them by only allocating
164453b381b3SDavid Woodhouse  * enough pages to hold the parity, and sending it all down quickly.
164553b381b3SDavid Woodhouse  */
164653b381b3SDavid Woodhouse static int full_stripe_write(struct btrfs_raid_bio *rbio)
164753b381b3SDavid Woodhouse {
164853b381b3SDavid Woodhouse 	int ret;
164953b381b3SDavid Woodhouse 
165053b381b3SDavid Woodhouse 	ret = alloc_rbio_parity_pages(rbio);
16513cd846d1SMiao Xie 	if (ret) {
16523cd846d1SMiao Xie 		__free_raid_bio(rbio);
165353b381b3SDavid Woodhouse 		return ret;
16543cd846d1SMiao Xie 	}
165553b381b3SDavid Woodhouse 
165653b381b3SDavid Woodhouse 	ret = lock_stripe_add(rbio);
165753b381b3SDavid Woodhouse 	if (ret == 0)
165853b381b3SDavid Woodhouse 		finish_rmw(rbio);
165953b381b3SDavid Woodhouse 	return 0;
166053b381b3SDavid Woodhouse }
166153b381b3SDavid Woodhouse 
166253b381b3SDavid Woodhouse /*
166353b381b3SDavid Woodhouse  * partial stripe writes get handed over to async helpers.
166453b381b3SDavid Woodhouse  * We're really hoping to merge a few more writes into this
166553b381b3SDavid Woodhouse  * rbio before calculating new parity
166653b381b3SDavid Woodhouse  */
166753b381b3SDavid Woodhouse static int partial_stripe_write(struct btrfs_raid_bio *rbio)
166853b381b3SDavid Woodhouse {
166953b381b3SDavid Woodhouse 	int ret;
167053b381b3SDavid Woodhouse 
167153b381b3SDavid Woodhouse 	ret = lock_stripe_add(rbio);
167253b381b3SDavid Woodhouse 	if (ret == 0)
1673cf6a4a75SDavid Sterba 		start_async_work(rbio, rmw_work);
167453b381b3SDavid Woodhouse 	return 0;
167553b381b3SDavid Woodhouse }
167653b381b3SDavid Woodhouse 
167753b381b3SDavid Woodhouse /*
167853b381b3SDavid Woodhouse  * sometimes while we were reading from the drive to
167953b381b3SDavid Woodhouse  * recalculate parity, enough new bios come into create
168053b381b3SDavid Woodhouse  * a full stripe.  So we do a check here to see if we can
168153b381b3SDavid Woodhouse  * go directly to finish_rmw
168253b381b3SDavid Woodhouse  */
168353b381b3SDavid Woodhouse static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
168453b381b3SDavid Woodhouse {
168553b381b3SDavid Woodhouse 	/* head off into rmw land if we don't have a full stripe */
168653b381b3SDavid Woodhouse 	if (!rbio_is_full(rbio))
168753b381b3SDavid Woodhouse 		return partial_stripe_write(rbio);
168853b381b3SDavid Woodhouse 	return full_stripe_write(rbio);
168953b381b3SDavid Woodhouse }
169053b381b3SDavid Woodhouse 
169153b381b3SDavid Woodhouse /*
16926ac0f488SChris Mason  * We use plugging call backs to collect full stripes.
16936ac0f488SChris Mason  * Any time we get a partial stripe write while plugged
16946ac0f488SChris Mason  * we collect it into a list.  When the unplug comes down,
16956ac0f488SChris Mason  * we sort the list by logical block number and merge
16966ac0f488SChris Mason  * everything we can into the same rbios
16976ac0f488SChris Mason  */
16986ac0f488SChris Mason struct btrfs_plug_cb {
16996ac0f488SChris Mason 	struct blk_plug_cb cb;
17006ac0f488SChris Mason 	struct btrfs_fs_info *info;
17016ac0f488SChris Mason 	struct list_head rbio_list;
1702385de0efSChristoph Hellwig 	struct work_struct work;
17036ac0f488SChris Mason };
17046ac0f488SChris Mason 
17056ac0f488SChris Mason /*
17066ac0f488SChris Mason  * rbios on the plug list are sorted for easier merging.
17076ac0f488SChris Mason  */
17084f0f586bSSami Tolvanen static int plug_cmp(void *priv, const struct list_head *a,
17094f0f586bSSami Tolvanen 		    const struct list_head *b)
17106ac0f488SChris Mason {
1711214cc184SDavid Sterba 	const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
17126ac0f488SChris Mason 						       plug_list);
1713214cc184SDavid Sterba 	const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
17146ac0f488SChris Mason 						       plug_list);
17154f024f37SKent Overstreet 	u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
17164f024f37SKent Overstreet 	u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
17176ac0f488SChris Mason 
17186ac0f488SChris Mason 	if (a_sector < b_sector)
17196ac0f488SChris Mason 		return -1;
17206ac0f488SChris Mason 	if (a_sector > b_sector)
17216ac0f488SChris Mason 		return 1;
17226ac0f488SChris Mason 	return 0;
17236ac0f488SChris Mason }
17246ac0f488SChris Mason 
17256ac0f488SChris Mason static void run_plug(struct btrfs_plug_cb *plug)
17266ac0f488SChris Mason {
17276ac0f488SChris Mason 	struct btrfs_raid_bio *cur;
17286ac0f488SChris Mason 	struct btrfs_raid_bio *last = NULL;
17296ac0f488SChris Mason 
17306ac0f488SChris Mason 	/*
17316ac0f488SChris Mason 	 * sort our plug list then try to merge
17326ac0f488SChris Mason 	 * everything we can in hopes of creating full
17336ac0f488SChris Mason 	 * stripes.
17346ac0f488SChris Mason 	 */
17356ac0f488SChris Mason 	list_sort(NULL, &plug->rbio_list, plug_cmp);
17366ac0f488SChris Mason 	while (!list_empty(&plug->rbio_list)) {
17376ac0f488SChris Mason 		cur = list_entry(plug->rbio_list.next,
17386ac0f488SChris Mason 				 struct btrfs_raid_bio, plug_list);
17396ac0f488SChris Mason 		list_del_init(&cur->plug_list);
17406ac0f488SChris Mason 
17416ac0f488SChris Mason 		if (rbio_is_full(cur)) {
1742c7b562c5SDavid Sterba 			int ret;
1743c7b562c5SDavid Sterba 
17446ac0f488SChris Mason 			/* we have a full stripe, send it down */
1745c7b562c5SDavid Sterba 			ret = full_stripe_write(cur);
1746c7b562c5SDavid Sterba 			BUG_ON(ret);
17476ac0f488SChris Mason 			continue;
17486ac0f488SChris Mason 		}
17496ac0f488SChris Mason 		if (last) {
17506ac0f488SChris Mason 			if (rbio_can_merge(last, cur)) {
17516ac0f488SChris Mason 				merge_rbio(last, cur);
17526ac0f488SChris Mason 				__free_raid_bio(cur);
17536ac0f488SChris Mason 				continue;
17546ac0f488SChris Mason 
17556ac0f488SChris Mason 			}
17566ac0f488SChris Mason 			__raid56_parity_write(last);
17576ac0f488SChris Mason 		}
17586ac0f488SChris Mason 		last = cur;
17596ac0f488SChris Mason 	}
17606ac0f488SChris Mason 	if (last) {
17616ac0f488SChris Mason 		__raid56_parity_write(last);
17626ac0f488SChris Mason 	}
17636ac0f488SChris Mason 	kfree(plug);
17646ac0f488SChris Mason }
17656ac0f488SChris Mason 
17666ac0f488SChris Mason /*
17676ac0f488SChris Mason  * if the unplug comes from schedule, we have to push the
17686ac0f488SChris Mason  * work off to a helper thread
17696ac0f488SChris Mason  */
1770385de0efSChristoph Hellwig static void unplug_work(struct work_struct *work)
17716ac0f488SChris Mason {
17726ac0f488SChris Mason 	struct btrfs_plug_cb *plug;
17736ac0f488SChris Mason 	plug = container_of(work, struct btrfs_plug_cb, work);
17746ac0f488SChris Mason 	run_plug(plug);
17756ac0f488SChris Mason }
17766ac0f488SChris Mason 
17776ac0f488SChris Mason static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
17786ac0f488SChris Mason {
17796ac0f488SChris Mason 	struct btrfs_plug_cb *plug;
17806ac0f488SChris Mason 	plug = container_of(cb, struct btrfs_plug_cb, cb);
17816ac0f488SChris Mason 
17826ac0f488SChris Mason 	if (from_schedule) {
1783385de0efSChristoph Hellwig 		INIT_WORK(&plug->work, unplug_work);
1784385de0efSChristoph Hellwig 		queue_work(plug->info->rmw_workers, &plug->work);
17856ac0f488SChris Mason 		return;
17866ac0f488SChris Mason 	}
17876ac0f488SChris Mason 	run_plug(plug);
17886ac0f488SChris Mason }
17896ac0f488SChris Mason 
1790bd8f7e62SQu Wenruo /* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
1791bd8f7e62SQu Wenruo static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
1792bd8f7e62SQu Wenruo {
1793bd8f7e62SQu Wenruo 	const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
1794bd8f7e62SQu Wenruo 	const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
1795bd8f7e62SQu Wenruo 	const u64 full_stripe_start = rbio->bioc->raid_map[0];
1796bd8f7e62SQu Wenruo 	const u32 orig_len = orig_bio->bi_iter.bi_size;
1797bd8f7e62SQu Wenruo 	const u32 sectorsize = fs_info->sectorsize;
1798bd8f7e62SQu Wenruo 	u64 cur_logical;
1799bd8f7e62SQu Wenruo 
1800bd8f7e62SQu Wenruo 	ASSERT(orig_logical >= full_stripe_start &&
1801bd8f7e62SQu Wenruo 	       orig_logical + orig_len <= full_stripe_start +
1802bd8f7e62SQu Wenruo 	       rbio->nr_data * rbio->stripe_len);
1803bd8f7e62SQu Wenruo 
1804bd8f7e62SQu Wenruo 	bio_list_add(&rbio->bio_list, orig_bio);
1805bd8f7e62SQu Wenruo 	rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
1806bd8f7e62SQu Wenruo 
1807bd8f7e62SQu Wenruo 	/* Update the dbitmap. */
1808bd8f7e62SQu Wenruo 	for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
1809bd8f7e62SQu Wenruo 	     cur_logical += sectorsize) {
1810bd8f7e62SQu Wenruo 		int bit = ((u32)(cur_logical - full_stripe_start) >>
1811bd8f7e62SQu Wenruo 			   fs_info->sectorsize_bits) % rbio->stripe_nsectors;
1812bd8f7e62SQu Wenruo 
1813bd8f7e62SQu Wenruo 		set_bit(bit, &rbio->dbitmap);
1814bd8f7e62SQu Wenruo 	}
1815bd8f7e62SQu Wenruo }
1816bd8f7e62SQu Wenruo 
18176ac0f488SChris Mason /*
181853b381b3SDavid Woodhouse  * our main entry point for writes from the rest of the FS.
181953b381b3SDavid Woodhouse  */
1820cc353a8bSQu Wenruo int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, u32 stripe_len)
182153b381b3SDavid Woodhouse {
18226a258d72SQu Wenruo 	struct btrfs_fs_info *fs_info = bioc->fs_info;
182353b381b3SDavid Woodhouse 	struct btrfs_raid_bio *rbio;
18246ac0f488SChris Mason 	struct btrfs_plug_cb *plug = NULL;
18256ac0f488SChris Mason 	struct blk_plug_cb *cb;
18264245215dSMiao Xie 	int ret;
182753b381b3SDavid Woodhouse 
18284c664611SQu Wenruo 	rbio = alloc_rbio(fs_info, bioc, stripe_len);
1829af8e2d1dSMiao Xie 	if (IS_ERR(rbio)) {
18304c664611SQu Wenruo 		btrfs_put_bioc(bioc);
183153b381b3SDavid Woodhouse 		return PTR_ERR(rbio);
1832af8e2d1dSMiao Xie 	}
18331b94b556SMiao Xie 	rbio->operation = BTRFS_RBIO_WRITE;
1834bd8f7e62SQu Wenruo 	rbio_add_bio(rbio, bio);
18356ac0f488SChris Mason 
18360b246afaSJeff Mahoney 	btrfs_bio_counter_inc_noblocked(fs_info);
18374245215dSMiao Xie 	rbio->generic_bio_cnt = 1;
18384245215dSMiao Xie 
18396ac0f488SChris Mason 	/*
18406ac0f488SChris Mason 	 * don't plug on full rbios, just get them out the door
18416ac0f488SChris Mason 	 * as quickly as we can
18426ac0f488SChris Mason 	 */
18434245215dSMiao Xie 	if (rbio_is_full(rbio)) {
18444245215dSMiao Xie 		ret = full_stripe_write(rbio);
18454245215dSMiao Xie 		if (ret)
18460b246afaSJeff Mahoney 			btrfs_bio_counter_dec(fs_info);
18474245215dSMiao Xie 		return ret;
18484245215dSMiao Xie 	}
18496ac0f488SChris Mason 
18500b246afaSJeff Mahoney 	cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
18516ac0f488SChris Mason 	if (cb) {
18526ac0f488SChris Mason 		plug = container_of(cb, struct btrfs_plug_cb, cb);
18536ac0f488SChris Mason 		if (!plug->info) {
18540b246afaSJeff Mahoney 			plug->info = fs_info;
18556ac0f488SChris Mason 			INIT_LIST_HEAD(&plug->rbio_list);
18566ac0f488SChris Mason 		}
18576ac0f488SChris Mason 		list_add_tail(&rbio->plug_list, &plug->rbio_list);
18584245215dSMiao Xie 		ret = 0;
18596ac0f488SChris Mason 	} else {
18604245215dSMiao Xie 		ret = __raid56_parity_write(rbio);
18614245215dSMiao Xie 		if (ret)
18620b246afaSJeff Mahoney 			btrfs_bio_counter_dec(fs_info);
186353b381b3SDavid Woodhouse 	}
18644245215dSMiao Xie 	return ret;
18656ac0f488SChris Mason }
186653b381b3SDavid Woodhouse 
186753b381b3SDavid Woodhouse /*
186853b381b3SDavid Woodhouse  * all parity reconstruction happens here.  We've read in everything
186953b381b3SDavid Woodhouse  * we can find from the drives and this does the heavy lifting of
187053b381b3SDavid Woodhouse  * sorting the good from the bad.
187153b381b3SDavid Woodhouse  */
187253b381b3SDavid Woodhouse static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
187353b381b3SDavid Woodhouse {
187407e4d380SQu Wenruo 	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
187507e4d380SQu Wenruo 	int sectornr, stripe;
187653b381b3SDavid Woodhouse 	void **pointers;
187794a0b58dSIra Weiny 	void **unmap_array;
187853b381b3SDavid Woodhouse 	int faila = -1, failb = -1;
187958efbc9fSOmar Sandoval 	blk_status_t err;
188053b381b3SDavid Woodhouse 	int i;
188153b381b3SDavid Woodhouse 
188207e4d380SQu Wenruo 	/*
188307e4d380SQu Wenruo 	 * This array stores the pointer for each sector, thus it has the extra
188407e4d380SQu Wenruo 	 * pgoff value added from each sector
188507e4d380SQu Wenruo 	 */
188631e818feSDavid Sterba 	pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
188753b381b3SDavid Woodhouse 	if (!pointers) {
188858efbc9fSOmar Sandoval 		err = BLK_STS_RESOURCE;
188953b381b3SDavid Woodhouse 		goto cleanup_io;
189053b381b3SDavid Woodhouse 	}
189153b381b3SDavid Woodhouse 
189294a0b58dSIra Weiny 	/*
189394a0b58dSIra Weiny 	 * Store copy of pointers that does not get reordered during
189494a0b58dSIra Weiny 	 * reconstruction so that kunmap_local works.
189594a0b58dSIra Weiny 	 */
189694a0b58dSIra Weiny 	unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
189794a0b58dSIra Weiny 	if (!unmap_array) {
189894a0b58dSIra Weiny 		err = BLK_STS_RESOURCE;
189994a0b58dSIra Weiny 		goto cleanup_pointers;
190094a0b58dSIra Weiny 	}
190194a0b58dSIra Weiny 
190253b381b3SDavid Woodhouse 	faila = rbio->faila;
190353b381b3SDavid Woodhouse 	failb = rbio->failb;
190453b381b3SDavid Woodhouse 
1905b4ee1782SOmar Sandoval 	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
1906b4ee1782SOmar Sandoval 	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
190753b381b3SDavid Woodhouse 		spin_lock_irq(&rbio->bio_list_lock);
190853b381b3SDavid Woodhouse 		set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
190953b381b3SDavid Woodhouse 		spin_unlock_irq(&rbio->bio_list_lock);
191053b381b3SDavid Woodhouse 	}
191153b381b3SDavid Woodhouse 
191253b381b3SDavid Woodhouse 	index_rbio_pages(rbio);
191353b381b3SDavid Woodhouse 
191407e4d380SQu Wenruo 	for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
191507e4d380SQu Wenruo 		struct sector_ptr *sector;
191607e4d380SQu Wenruo 
19175a6ac9eaSMiao Xie 		/*
19185a6ac9eaSMiao Xie 		 * Now we just use bitmap to mark the horizontal stripes in
19195a6ac9eaSMiao Xie 		 * which we have data when doing parity scrub.
19205a6ac9eaSMiao Xie 		 */
19215a6ac9eaSMiao Xie 		if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
1922c67c68ebSQu Wenruo 		    !test_bit(sectornr, &rbio->dbitmap))
19235a6ac9eaSMiao Xie 			continue;
19245a6ac9eaSMiao Xie 
192594a0b58dSIra Weiny 		/*
192607e4d380SQu Wenruo 		 * Setup our array of pointers with sectors from each stripe
192794a0b58dSIra Weiny 		 *
192894a0b58dSIra Weiny 		 * NOTE: store a duplicate array of pointers to preserve the
192994a0b58dSIra Weiny 		 * pointer order
193053b381b3SDavid Woodhouse 		 */
19312c8cdd6eSMiao Xie 		for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
193253b381b3SDavid Woodhouse 			/*
193307e4d380SQu Wenruo 			 * If we're rebuilding a read, we have to use
193453b381b3SDavid Woodhouse 			 * pages from the bio list
193553b381b3SDavid Woodhouse 			 */
1936b4ee1782SOmar Sandoval 			if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
1937b4ee1782SOmar Sandoval 			     rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
193853b381b3SDavid Woodhouse 			    (stripe == faila || stripe == failb)) {
193907e4d380SQu Wenruo 				sector = sector_in_rbio(rbio, stripe, sectornr, 0);
194053b381b3SDavid Woodhouse 			} else {
194107e4d380SQu Wenruo 				sector = rbio_stripe_sector(rbio, stripe, sectornr);
194253b381b3SDavid Woodhouse 			}
194307e4d380SQu Wenruo 			ASSERT(sector->page);
194407e4d380SQu Wenruo 			pointers[stripe] = kmap_local_page(sector->page) +
194507e4d380SQu Wenruo 					   sector->pgoff;
194694a0b58dSIra Weiny 			unmap_array[stripe] = pointers[stripe];
194753b381b3SDavid Woodhouse 		}
194853b381b3SDavid Woodhouse 
194907e4d380SQu Wenruo 		/* All raid6 handling here */
19504c664611SQu Wenruo 		if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) {
195107e4d380SQu Wenruo 			/* Single failure, rebuild from parity raid5 style */
195253b381b3SDavid Woodhouse 			if (failb < 0) {
195353b381b3SDavid Woodhouse 				if (faila == rbio->nr_data) {
195453b381b3SDavid Woodhouse 					/*
195553b381b3SDavid Woodhouse 					 * Just the P stripe has failed, without
195653b381b3SDavid Woodhouse 					 * a bad data or Q stripe.
195753b381b3SDavid Woodhouse 					 * TODO, we should redo the xor here.
195853b381b3SDavid Woodhouse 					 */
195958efbc9fSOmar Sandoval 					err = BLK_STS_IOERR;
196053b381b3SDavid Woodhouse 					goto cleanup;
196153b381b3SDavid Woodhouse 				}
196253b381b3SDavid Woodhouse 				/*
196353b381b3SDavid Woodhouse 				 * a single failure in raid6 is rebuilt
196453b381b3SDavid Woodhouse 				 * in the pstripe code below
196553b381b3SDavid Woodhouse 				 */
196653b381b3SDavid Woodhouse 				goto pstripe;
196753b381b3SDavid Woodhouse 			}
196853b381b3SDavid Woodhouse 
196953b381b3SDavid Woodhouse 			/* make sure our ps and qs are in order */
1970b7d2083aSNikolay Borisov 			if (faila > failb)
1971b7d2083aSNikolay Borisov 				swap(faila, failb);
197253b381b3SDavid Woodhouse 
197353b381b3SDavid Woodhouse 			/* if the q stripe is failed, do a pstripe reconstruction
197453b381b3SDavid Woodhouse 			 * from the xors.
197553b381b3SDavid Woodhouse 			 * If both the q stripe and the P stripe are failed, we're
197653b381b3SDavid Woodhouse 			 * here due to a crc mismatch and we can't give them the
197753b381b3SDavid Woodhouse 			 * data they want
197853b381b3SDavid Woodhouse 			 */
19794c664611SQu Wenruo 			if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
19804c664611SQu Wenruo 				if (rbio->bioc->raid_map[faila] ==
19818e5cfb55SZhao Lei 				    RAID5_P_STRIPE) {
198258efbc9fSOmar Sandoval 					err = BLK_STS_IOERR;
198353b381b3SDavid Woodhouse 					goto cleanup;
198453b381b3SDavid Woodhouse 				}
198553b381b3SDavid Woodhouse 				/*
198653b381b3SDavid Woodhouse 				 * otherwise we have one bad data stripe and
198753b381b3SDavid Woodhouse 				 * a good P stripe.  raid5!
198853b381b3SDavid Woodhouse 				 */
198953b381b3SDavid Woodhouse 				goto pstripe;
199053b381b3SDavid Woodhouse 			}
199153b381b3SDavid Woodhouse 
19924c664611SQu Wenruo 			if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
19932c8cdd6eSMiao Xie 				raid6_datap_recov(rbio->real_stripes,
199407e4d380SQu Wenruo 						  sectorsize, faila, pointers);
199553b381b3SDavid Woodhouse 			} else {
19962c8cdd6eSMiao Xie 				raid6_2data_recov(rbio->real_stripes,
199707e4d380SQu Wenruo 						  sectorsize, faila, failb,
199853b381b3SDavid Woodhouse 						  pointers);
199953b381b3SDavid Woodhouse 			}
200053b381b3SDavid Woodhouse 		} else {
200153b381b3SDavid Woodhouse 			void *p;
200253b381b3SDavid Woodhouse 
200353b381b3SDavid Woodhouse 			/* rebuild from P stripe here (raid5 or raid6) */
200453b381b3SDavid Woodhouse 			BUG_ON(failb != -1);
200553b381b3SDavid Woodhouse pstripe:
200653b381b3SDavid Woodhouse 			/* Copy parity block into failed block to start with */
200707e4d380SQu Wenruo 			memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize);
200853b381b3SDavid Woodhouse 
200953b381b3SDavid Woodhouse 			/* rearrange the pointer array */
201053b381b3SDavid Woodhouse 			p = pointers[faila];
201153b381b3SDavid Woodhouse 			for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
201253b381b3SDavid Woodhouse 				pointers[stripe] = pointers[stripe + 1];
201353b381b3SDavid Woodhouse 			pointers[rbio->nr_data - 1] = p;
201453b381b3SDavid Woodhouse 
201553b381b3SDavid Woodhouse 			/* xor in the rest */
201607e4d380SQu Wenruo 			run_xor(pointers, rbio->nr_data - 1, sectorsize);
201753b381b3SDavid Woodhouse 		}
201853b381b3SDavid Woodhouse 		/* if we're doing this rebuild as part of an rmw, go through
201953b381b3SDavid Woodhouse 		 * and set all of our private rbio pages in the
202053b381b3SDavid Woodhouse 		 * failed stripes as uptodate.  This way finish_rmw will
202153b381b3SDavid Woodhouse 		 * know they can be trusted.  If this was a read reconstruction,
202253b381b3SDavid Woodhouse 		 * other endio functions will fiddle the uptodate bits
202353b381b3SDavid Woodhouse 		 */
20241b94b556SMiao Xie 		if (rbio->operation == BTRFS_RBIO_WRITE) {
202507e4d380SQu Wenruo 			for (i = 0;  i < rbio->stripe_nsectors; i++) {
202653b381b3SDavid Woodhouse 				if (faila != -1) {
202707e4d380SQu Wenruo 					sector = rbio_stripe_sector(rbio, faila, i);
202807e4d380SQu Wenruo 					sector->uptodate = 1;
202953b381b3SDavid Woodhouse 				}
203053b381b3SDavid Woodhouse 				if (failb != -1) {
203107e4d380SQu Wenruo 					sector = rbio_stripe_sector(rbio, failb, i);
203207e4d380SQu Wenruo 					sector->uptodate = 1;
203353b381b3SDavid Woodhouse 				}
203453b381b3SDavid Woodhouse 			}
203553b381b3SDavid Woodhouse 		}
203694a0b58dSIra Weiny 		for (stripe = rbio->real_stripes - 1; stripe >= 0; stripe--)
203794a0b58dSIra Weiny 			kunmap_local(unmap_array[stripe]);
203853b381b3SDavid Woodhouse 	}
203953b381b3SDavid Woodhouse 
204058efbc9fSOmar Sandoval 	err = BLK_STS_OK;
204153b381b3SDavid Woodhouse cleanup:
204294a0b58dSIra Weiny 	kfree(unmap_array);
204394a0b58dSIra Weiny cleanup_pointers:
204453b381b3SDavid Woodhouse 	kfree(pointers);
204553b381b3SDavid Woodhouse 
204653b381b3SDavid Woodhouse cleanup_io:
2047580c6efaSLiu Bo 	/*
2048580c6efaSLiu Bo 	 * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
2049580c6efaSLiu Bo 	 * valid rbio which is consistent with ondisk content, thus such a
2050580c6efaSLiu Bo 	 * valid rbio can be cached to avoid further disk reads.
2051580c6efaSLiu Bo 	 */
2052580c6efaSLiu Bo 	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
2053580c6efaSLiu Bo 	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
205444ac474dSLiu Bo 		/*
205544ac474dSLiu Bo 		 * - In case of two failures, where rbio->failb != -1:
205644ac474dSLiu Bo 		 *
205744ac474dSLiu Bo 		 *   Do not cache this rbio since the above read reconstruction
205844ac474dSLiu Bo 		 *   (raid6_datap_recov() or raid6_2data_recov()) may have
205944ac474dSLiu Bo 		 *   changed some content of stripes which are not identical to
206044ac474dSLiu Bo 		 *   on-disk content any more, otherwise, a later write/recover
206144ac474dSLiu Bo 		 *   may steal stripe_pages from this rbio and end up with
206244ac474dSLiu Bo 		 *   corruptions or rebuild failures.
206344ac474dSLiu Bo 		 *
206444ac474dSLiu Bo 		 * - In case of single failure, where rbio->failb == -1:
206544ac474dSLiu Bo 		 *
206644ac474dSLiu Bo 		 *   Cache this rbio iff the above read reconstruction is
206752042d8eSAndrea Gelmini 		 *   executed without problems.
206844ac474dSLiu Bo 		 */
206944ac474dSLiu Bo 		if (err == BLK_STS_OK && rbio->failb < 0)
20704ae10b3aSChris Mason 			cache_rbio_pages(rbio);
20714ae10b3aSChris Mason 		else
20724ae10b3aSChris Mason 			clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
20734ae10b3aSChris Mason 
20744246a0b6SChristoph Hellwig 		rbio_orig_end_io(rbio, err);
207558efbc9fSOmar Sandoval 	} else if (err == BLK_STS_OK) {
207653b381b3SDavid Woodhouse 		rbio->faila = -1;
207753b381b3SDavid Woodhouse 		rbio->failb = -1;
20785a6ac9eaSMiao Xie 
20795a6ac9eaSMiao Xie 		if (rbio->operation == BTRFS_RBIO_WRITE)
208053b381b3SDavid Woodhouse 			finish_rmw(rbio);
20815a6ac9eaSMiao Xie 		else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
20825a6ac9eaSMiao Xie 			finish_parity_scrub(rbio, 0);
20835a6ac9eaSMiao Xie 		else
20845a6ac9eaSMiao Xie 			BUG();
208553b381b3SDavid Woodhouse 	} else {
20864246a0b6SChristoph Hellwig 		rbio_orig_end_io(rbio, err);
208753b381b3SDavid Woodhouse 	}
208853b381b3SDavid Woodhouse }
208953b381b3SDavid Woodhouse 
209053b381b3SDavid Woodhouse /*
2091d34e123dSChristoph Hellwig  * This is called only for stripes we've read from disk to reconstruct the
2092d34e123dSChristoph Hellwig  * parity.
209353b381b3SDavid Woodhouse  */
2094d34e123dSChristoph Hellwig static void raid_recover_end_io_work(struct work_struct *work)
209553b381b3SDavid Woodhouse {
2096d34e123dSChristoph Hellwig 	struct btrfs_raid_bio *rbio =
2097d34e123dSChristoph Hellwig 		container_of(work, struct btrfs_raid_bio, end_io_work);
209853b381b3SDavid Woodhouse 
20994c664611SQu Wenruo 	if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
210058efbc9fSOmar Sandoval 		rbio_orig_end_io(rbio, BLK_STS_IOERR);
210153b381b3SDavid Woodhouse 	else
210253b381b3SDavid Woodhouse 		__raid_recover_end_io(rbio);
210353b381b3SDavid Woodhouse }
210453b381b3SDavid Woodhouse 
210553b381b3SDavid Woodhouse /*
210653b381b3SDavid Woodhouse  * reads everything we need off the disk to reconstruct
210753b381b3SDavid Woodhouse  * the parity. endio handlers trigger final reconstruction
210853b381b3SDavid Woodhouse  * when the IO is done.
210953b381b3SDavid Woodhouse  *
211053b381b3SDavid Woodhouse  * This is used both for reads from the higher layers and for
211153b381b3SDavid Woodhouse  * parity construction required to finish a rmw cycle.
211253b381b3SDavid Woodhouse  */
211353b381b3SDavid Woodhouse static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
211453b381b3SDavid Woodhouse {
211553b381b3SDavid Woodhouse 	int bios_to_read = 0;
211653b381b3SDavid Woodhouse 	struct bio_list bio_list;
211753b381b3SDavid Woodhouse 	int ret;
2118*ef340fccSQu Wenruo 	int total_sector_nr;
211953b381b3SDavid Woodhouse 	struct bio *bio;
212053b381b3SDavid Woodhouse 
212153b381b3SDavid Woodhouse 	bio_list_init(&bio_list);
212253b381b3SDavid Woodhouse 
212353b381b3SDavid Woodhouse 	ret = alloc_rbio_pages(rbio);
212453b381b3SDavid Woodhouse 	if (ret)
212553b381b3SDavid Woodhouse 		goto cleanup;
212653b381b3SDavid Woodhouse 
2127b89e1b01SMiao Xie 	atomic_set(&rbio->error, 0);
212853b381b3SDavid Woodhouse 
212953b381b3SDavid Woodhouse 	/*
21304ae10b3aSChris Mason 	 * read everything that hasn't failed.  Thanks to the
21314ae10b3aSChris Mason 	 * stripe cache, it is possible that some or all of these
21324ae10b3aSChris Mason 	 * pages are going to be uptodate.
213353b381b3SDavid Woodhouse 	 */
2134*ef340fccSQu Wenruo 	for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
2135*ef340fccSQu Wenruo 	     total_sector_nr++) {
2136*ef340fccSQu Wenruo 		int stripe = total_sector_nr / rbio->stripe_nsectors;
2137*ef340fccSQu Wenruo 		int sectornr = total_sector_nr % rbio->stripe_nsectors;
21383e77605dSQu Wenruo 		struct sector_ptr *sector;
213953b381b3SDavid Woodhouse 
2140*ef340fccSQu Wenruo 		if (rbio->faila == stripe || rbio->failb == stripe) {
2141*ef340fccSQu Wenruo 			atomic_inc(&rbio->error);
2142*ef340fccSQu Wenruo 			/* Skip the current stripe. */
2143*ef340fccSQu Wenruo 			ASSERT(sectornr == 0);
2144*ef340fccSQu Wenruo 			total_sector_nr += rbio->stripe_nsectors - 1;
2145*ef340fccSQu Wenruo 			continue;
2146*ef340fccSQu Wenruo 		}
2147*ef340fccSQu Wenruo 		/* The RMW code may have already read this page in. */
21483e77605dSQu Wenruo 		sector = rbio_stripe_sector(rbio, stripe, sectornr);
21493e77605dSQu Wenruo 		if (sector->uptodate)
215053b381b3SDavid Woodhouse 			continue;
215153b381b3SDavid Woodhouse 
2152*ef340fccSQu Wenruo 		ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
2153*ef340fccSQu Wenruo 					 sectornr, rbio->stripe_len,
2154e01bf588SChristoph Hellwig 					 REQ_OP_READ);
215553b381b3SDavid Woodhouse 		if (ret < 0)
215653b381b3SDavid Woodhouse 			goto cleanup;
215753b381b3SDavid Woodhouse 	}
215853b381b3SDavid Woodhouse 
215953b381b3SDavid Woodhouse 	bios_to_read = bio_list_size(&bio_list);
216053b381b3SDavid Woodhouse 	if (!bios_to_read) {
216153b381b3SDavid Woodhouse 		/*
216253b381b3SDavid Woodhouse 		 * we might have no bios to read just because the pages
216353b381b3SDavid Woodhouse 		 * were up to date, or we might have no bios to read because
216453b381b3SDavid Woodhouse 		 * the devices were gone.
216553b381b3SDavid Woodhouse 		 */
21664c664611SQu Wenruo 		if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
216753b381b3SDavid Woodhouse 			__raid_recover_end_io(rbio);
2168813f8a0eSNikolay Borisov 			return 0;
216953b381b3SDavid Woodhouse 		} else {
217053b381b3SDavid Woodhouse 			goto cleanup;
217153b381b3SDavid Woodhouse 		}
217253b381b3SDavid Woodhouse 	}
217353b381b3SDavid Woodhouse 
217453b381b3SDavid Woodhouse 	/*
21754c664611SQu Wenruo 	 * The bioc may be freed once we submit the last bio. Make sure not to
21764c664611SQu Wenruo 	 * touch it after that.
217753b381b3SDavid Woodhouse 	 */
2178b89e1b01SMiao Xie 	atomic_set(&rbio->stripes_pending, bios_to_read);
2179d34e123dSChristoph Hellwig 	INIT_WORK(&rbio->end_io_work, raid_recover_end_io_work);
2180bf28a605SNikolay Borisov 	while ((bio = bio_list_pop(&bio_list))) {
2181d34e123dSChristoph Hellwig 		bio->bi_end_io = raid56_bio_end_io;
218253b381b3SDavid Woodhouse 
2183b8bea09aSQu Wenruo 		if (trace_raid56_scrub_read_recover_enabled()) {
2184b8bea09aSQu Wenruo 			struct raid56_bio_trace_info trace_info = { 0 };
2185b8bea09aSQu Wenruo 
2186b8bea09aSQu Wenruo 			bio_get_trace_info(rbio, bio, &trace_info);
2187b8bea09aSQu Wenruo 			trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
2188b8bea09aSQu Wenruo 		}
21894e49ea4aSMike Christie 		submit_bio(bio);
219053b381b3SDavid Woodhouse 	}
2191813f8a0eSNikolay Borisov 
219253b381b3SDavid Woodhouse 	return 0;
219353b381b3SDavid Woodhouse 
219453b381b3SDavid Woodhouse cleanup:
2195b4ee1782SOmar Sandoval 	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
2196b4ee1782SOmar Sandoval 	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
219758efbc9fSOmar Sandoval 		rbio_orig_end_io(rbio, BLK_STS_IOERR);
2198785884fcSLiu Bo 
2199785884fcSLiu Bo 	while ((bio = bio_list_pop(&bio_list)))
2200785884fcSLiu Bo 		bio_put(bio);
2201785884fcSLiu Bo 
220253b381b3SDavid Woodhouse 	return -EIO;
220353b381b3SDavid Woodhouse }
220453b381b3SDavid Woodhouse 
220553b381b3SDavid Woodhouse /*
220653b381b3SDavid Woodhouse  * the main entry point for reads from the higher layers.  This
220753b381b3SDavid Woodhouse  * is really only called when the normal read path had a failure,
220853b381b3SDavid Woodhouse  * so we assume the bio they send down corresponds to a failed part
220953b381b3SDavid Woodhouse  * of the drive.
221053b381b3SDavid Woodhouse  */
22116a258d72SQu Wenruo int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
2212cc353a8bSQu Wenruo 			  u32 stripe_len, int mirror_num, int generic_io)
221353b381b3SDavid Woodhouse {
22146a258d72SQu Wenruo 	struct btrfs_fs_info *fs_info = bioc->fs_info;
221553b381b3SDavid Woodhouse 	struct btrfs_raid_bio *rbio;
221653b381b3SDavid Woodhouse 	int ret;
221753b381b3SDavid Woodhouse 
2218abad60c6SLiu Bo 	if (generic_io) {
22194c664611SQu Wenruo 		ASSERT(bioc->mirror_num == mirror_num);
2220c3a3b19bSQu Wenruo 		btrfs_bio(bio)->mirror_num = mirror_num;
2221abad60c6SLiu Bo 	}
2222abad60c6SLiu Bo 
22234c664611SQu Wenruo 	rbio = alloc_rbio(fs_info, bioc, stripe_len);
2224af8e2d1dSMiao Xie 	if (IS_ERR(rbio)) {
22256e9606d2SZhao Lei 		if (generic_io)
22264c664611SQu Wenruo 			btrfs_put_bioc(bioc);
222753b381b3SDavid Woodhouse 		return PTR_ERR(rbio);
2228af8e2d1dSMiao Xie 	}
222953b381b3SDavid Woodhouse 
22301b94b556SMiao Xie 	rbio->operation = BTRFS_RBIO_READ_REBUILD;
2231bd8f7e62SQu Wenruo 	rbio_add_bio(rbio, bio);
223253b381b3SDavid Woodhouse 
223353b381b3SDavid Woodhouse 	rbio->faila = find_logical_bio_stripe(rbio, bio);
223453b381b3SDavid Woodhouse 	if (rbio->faila == -1) {
22350b246afaSJeff Mahoney 		btrfs_warn(fs_info,
22364c664611SQu Wenruo "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
22371201b58bSDavid Sterba 			   __func__, bio->bi_iter.bi_sector << 9,
22384c664611SQu Wenruo 			   (u64)bio->bi_iter.bi_size, bioc->map_type);
22396e9606d2SZhao Lei 		if (generic_io)
22404c664611SQu Wenruo 			btrfs_put_bioc(bioc);
224153b381b3SDavid Woodhouse 		kfree(rbio);
224253b381b3SDavid Woodhouse 		return -EIO;
224353b381b3SDavid Woodhouse 	}
224453b381b3SDavid Woodhouse 
22454245215dSMiao Xie 	if (generic_io) {
22460b246afaSJeff Mahoney 		btrfs_bio_counter_inc_noblocked(fs_info);
22474245215dSMiao Xie 		rbio->generic_bio_cnt = 1;
22484245215dSMiao Xie 	} else {
22494c664611SQu Wenruo 		btrfs_get_bioc(bioc);
22504245215dSMiao Xie 	}
22514245215dSMiao Xie 
225253b381b3SDavid Woodhouse 	/*
22538810f751SLiu Bo 	 * Loop retry:
22548810f751SLiu Bo 	 * for 'mirror == 2', reconstruct from all other stripes.
22558810f751SLiu Bo 	 * for 'mirror_num > 2', select a stripe to fail on every retry.
225653b381b3SDavid Woodhouse 	 */
22578810f751SLiu Bo 	if (mirror_num > 2) {
22588810f751SLiu Bo 		/*
22598810f751SLiu Bo 		 * 'mirror == 3' is to fail the p stripe and
22608810f751SLiu Bo 		 * reconstruct from the q stripe.  'mirror > 3' is to
22618810f751SLiu Bo 		 * fail a data stripe and reconstruct from p+q stripe.
22628810f751SLiu Bo 		 */
22638810f751SLiu Bo 		rbio->failb = rbio->real_stripes - (mirror_num - 1);
22648810f751SLiu Bo 		ASSERT(rbio->failb > 0);
22658810f751SLiu Bo 		if (rbio->failb <= rbio->faila)
22668810f751SLiu Bo 			rbio->failb--;
22678810f751SLiu Bo 	}
226853b381b3SDavid Woodhouse 
226953b381b3SDavid Woodhouse 	ret = lock_stripe_add(rbio);
227053b381b3SDavid Woodhouse 
227153b381b3SDavid Woodhouse 	/*
227253b381b3SDavid Woodhouse 	 * __raid56_parity_recover will end the bio with
227353b381b3SDavid Woodhouse 	 * any errors it hits.  We don't want to return
227453b381b3SDavid Woodhouse 	 * its error value up the stack because our caller
227553b381b3SDavid Woodhouse 	 * will end up calling bio_endio with any nonzero
227653b381b3SDavid Woodhouse 	 * return
227753b381b3SDavid Woodhouse 	 */
227853b381b3SDavid Woodhouse 	if (ret == 0)
227953b381b3SDavid Woodhouse 		__raid56_parity_recover(rbio);
228053b381b3SDavid Woodhouse 	/*
228153b381b3SDavid Woodhouse 	 * our rbio has been added to the list of
228253b381b3SDavid Woodhouse 	 * rbios that will be handled after the
228353b381b3SDavid Woodhouse 	 * currently lock owner is done
228453b381b3SDavid Woodhouse 	 */
228553b381b3SDavid Woodhouse 	return 0;
228653b381b3SDavid Woodhouse 
228753b381b3SDavid Woodhouse }
228853b381b3SDavid Woodhouse 
2289385de0efSChristoph Hellwig static void rmw_work(struct work_struct *work)
229053b381b3SDavid Woodhouse {
229153b381b3SDavid Woodhouse 	struct btrfs_raid_bio *rbio;
229253b381b3SDavid Woodhouse 
229353b381b3SDavid Woodhouse 	rbio = container_of(work, struct btrfs_raid_bio, work);
229453b381b3SDavid Woodhouse 	raid56_rmw_stripe(rbio);
229553b381b3SDavid Woodhouse }
229653b381b3SDavid Woodhouse 
2297385de0efSChristoph Hellwig static void read_rebuild_work(struct work_struct *work)
229853b381b3SDavid Woodhouse {
229953b381b3SDavid Woodhouse 	struct btrfs_raid_bio *rbio;
230053b381b3SDavid Woodhouse 
230153b381b3SDavid Woodhouse 	rbio = container_of(work, struct btrfs_raid_bio, work);
230253b381b3SDavid Woodhouse 	__raid56_parity_recover(rbio);
230353b381b3SDavid Woodhouse }
23045a6ac9eaSMiao Xie 
23055a6ac9eaSMiao Xie /*
23065a6ac9eaSMiao Xie  * The following code is used to scrub/replace the parity stripe
23075a6ac9eaSMiao Xie  *
23084c664611SQu Wenruo  * Caller must have already increased bio_counter for getting @bioc.
2309ae6529c3SQu Wenruo  *
23105a6ac9eaSMiao Xie  * Note: We need make sure all the pages that add into the scrub/replace
23115a6ac9eaSMiao Xie  * raid bio are correct and not be changed during the scrub/replace. That
23125a6ac9eaSMiao Xie  * is those pages just hold metadata or file data with checksum.
23135a6ac9eaSMiao Xie  */
23145a6ac9eaSMiao Xie 
23156a258d72SQu Wenruo struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
23166a258d72SQu Wenruo 				struct btrfs_io_context *bioc,
2317cc353a8bSQu Wenruo 				u32 stripe_len, struct btrfs_device *scrub_dev,
23185a6ac9eaSMiao Xie 				unsigned long *dbitmap, int stripe_nsectors)
23195a6ac9eaSMiao Xie {
23206a258d72SQu Wenruo 	struct btrfs_fs_info *fs_info = bioc->fs_info;
23215a6ac9eaSMiao Xie 	struct btrfs_raid_bio *rbio;
23225a6ac9eaSMiao Xie 	int i;
23235a6ac9eaSMiao Xie 
23244c664611SQu Wenruo 	rbio = alloc_rbio(fs_info, bioc, stripe_len);
23255a6ac9eaSMiao Xie 	if (IS_ERR(rbio))
23265a6ac9eaSMiao Xie 		return NULL;
23275a6ac9eaSMiao Xie 	bio_list_add(&rbio->bio_list, bio);
23285a6ac9eaSMiao Xie 	/*
23295a6ac9eaSMiao Xie 	 * This is a special bio which is used to hold the completion handler
23305a6ac9eaSMiao Xie 	 * and make the scrub rbio is similar to the other types
23315a6ac9eaSMiao Xie 	 */
23325a6ac9eaSMiao Xie 	ASSERT(!bio->bi_iter.bi_size);
23335a6ac9eaSMiao Xie 	rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
23345a6ac9eaSMiao Xie 
23359cd3a7ebSLiu Bo 	/*
23364c664611SQu Wenruo 	 * After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted
23379cd3a7ebSLiu Bo 	 * to the end position, so this search can start from the first parity
23389cd3a7ebSLiu Bo 	 * stripe.
23399cd3a7ebSLiu Bo 	 */
23409cd3a7ebSLiu Bo 	for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
23414c664611SQu Wenruo 		if (bioc->stripes[i].dev == scrub_dev) {
23425a6ac9eaSMiao Xie 			rbio->scrubp = i;
23435a6ac9eaSMiao Xie 			break;
23445a6ac9eaSMiao Xie 		}
23455a6ac9eaSMiao Xie 	}
23469cd3a7ebSLiu Bo 	ASSERT(i < rbio->real_stripes);
23475a6ac9eaSMiao Xie 
2348c67c68ebSQu Wenruo 	bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors);
23495a6ac9eaSMiao Xie 
2350ae6529c3SQu Wenruo 	/*
23514c664611SQu Wenruo 	 * We have already increased bio_counter when getting bioc, record it
2352ae6529c3SQu Wenruo 	 * so we can free it at rbio_orig_end_io().
2353ae6529c3SQu Wenruo 	 */
2354ae6529c3SQu Wenruo 	rbio->generic_bio_cnt = 1;
2355ae6529c3SQu Wenruo 
23565a6ac9eaSMiao Xie 	return rbio;
23575a6ac9eaSMiao Xie }
23585a6ac9eaSMiao Xie 
2359b4ee1782SOmar Sandoval /* Used for both parity scrub and missing. */
2360b4ee1782SOmar Sandoval void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
23616346f6bfSQu Wenruo 			    unsigned int pgoff, u64 logical)
23625a6ac9eaSMiao Xie {
23636346f6bfSQu Wenruo 	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
23645a6ac9eaSMiao Xie 	int stripe_offset;
23655a6ac9eaSMiao Xie 	int index;
23665a6ac9eaSMiao Xie 
23674c664611SQu Wenruo 	ASSERT(logical >= rbio->bioc->raid_map[0]);
23686346f6bfSQu Wenruo 	ASSERT(logical + sectorsize <= rbio->bioc->raid_map[0] +
23695a6ac9eaSMiao Xie 				rbio->stripe_len * rbio->nr_data);
23704c664611SQu Wenruo 	stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
23716346f6bfSQu Wenruo 	index = stripe_offset / sectorsize;
23726346f6bfSQu Wenruo 	rbio->bio_sectors[index].page = page;
23736346f6bfSQu Wenruo 	rbio->bio_sectors[index].pgoff = pgoff;
23745a6ac9eaSMiao Xie }
23755a6ac9eaSMiao Xie 
23765a6ac9eaSMiao Xie /*
23775a6ac9eaSMiao Xie  * We just scrub the parity that we have correct data on the same horizontal,
23785a6ac9eaSMiao Xie  * so we needn't allocate all pages for all the stripes.
23795a6ac9eaSMiao Xie  */
23805a6ac9eaSMiao Xie static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
23815a6ac9eaSMiao Xie {
23823907ce29SQu Wenruo 	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
23833907ce29SQu Wenruo 	int stripe;
23843907ce29SQu Wenruo 	int sectornr;
23855a6ac9eaSMiao Xie 
2386c67c68ebSQu Wenruo 	for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
23873907ce29SQu Wenruo 		for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
23883907ce29SQu Wenruo 			struct page *page;
23893907ce29SQu Wenruo 			int index = (stripe * rbio->stripe_nsectors + sectornr) *
23903907ce29SQu Wenruo 				    sectorsize >> PAGE_SHIFT;
23913907ce29SQu Wenruo 
23925a6ac9eaSMiao Xie 			if (rbio->stripe_pages[index])
23935a6ac9eaSMiao Xie 				continue;
23945a6ac9eaSMiao Xie 
2395b0ee5e1eSDavid Sterba 			page = alloc_page(GFP_NOFS);
23965a6ac9eaSMiao Xie 			if (!page)
23975a6ac9eaSMiao Xie 				return -ENOMEM;
23985a6ac9eaSMiao Xie 			rbio->stripe_pages[index] = page;
23995a6ac9eaSMiao Xie 		}
24005a6ac9eaSMiao Xie 	}
2401eb357060SQu Wenruo 	index_stripe_sectors(rbio);
24025a6ac9eaSMiao Xie 	return 0;
24035a6ac9eaSMiao Xie }
24045a6ac9eaSMiao Xie 
24055a6ac9eaSMiao Xie static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
24065a6ac9eaSMiao Xie 					 int need_check)
24075a6ac9eaSMiao Xie {
24084c664611SQu Wenruo 	struct btrfs_io_context *bioc = rbio->bioc;
240946900662SQu Wenruo 	const u32 sectorsize = bioc->fs_info->sectorsize;
24101389053eSKees Cook 	void **pointers = rbio->finish_pointers;
2411c67c68ebSQu Wenruo 	unsigned long *pbitmap = &rbio->finish_pbitmap;
24125a6ac9eaSMiao Xie 	int nr_data = rbio->nr_data;
24135a6ac9eaSMiao Xie 	int stripe;
24143e77605dSQu Wenruo 	int sectornr;
2415c17af965SDavid Sterba 	bool has_qstripe;
241646900662SQu Wenruo 	struct sector_ptr p_sector = { 0 };
241746900662SQu Wenruo 	struct sector_ptr q_sector = { 0 };
24185a6ac9eaSMiao Xie 	struct bio_list bio_list;
24195a6ac9eaSMiao Xie 	struct bio *bio;
242076035976SMiao Xie 	int is_replace = 0;
24215a6ac9eaSMiao Xie 	int ret;
24225a6ac9eaSMiao Xie 
24235a6ac9eaSMiao Xie 	bio_list_init(&bio_list);
24245a6ac9eaSMiao Xie 
2425c17af965SDavid Sterba 	if (rbio->real_stripes - rbio->nr_data == 1)
2426c17af965SDavid Sterba 		has_qstripe = false;
2427c17af965SDavid Sterba 	else if (rbio->real_stripes - rbio->nr_data == 2)
2428c17af965SDavid Sterba 		has_qstripe = true;
2429c17af965SDavid Sterba 	else
24305a6ac9eaSMiao Xie 		BUG();
24315a6ac9eaSMiao Xie 
24324c664611SQu Wenruo 	if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) {
243376035976SMiao Xie 		is_replace = 1;
2434c67c68ebSQu Wenruo 		bitmap_copy(pbitmap, &rbio->dbitmap, rbio->stripe_nsectors);
243576035976SMiao Xie 	}
243676035976SMiao Xie 
24375a6ac9eaSMiao Xie 	/*
24385a6ac9eaSMiao Xie 	 * Because the higher layers(scrubber) are unlikely to
24395a6ac9eaSMiao Xie 	 * use this area of the disk again soon, so don't cache
24405a6ac9eaSMiao Xie 	 * it.
24415a6ac9eaSMiao Xie 	 */
24425a6ac9eaSMiao Xie 	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
24435a6ac9eaSMiao Xie 
24445a6ac9eaSMiao Xie 	if (!need_check)
24455a6ac9eaSMiao Xie 		goto writeback;
24465a6ac9eaSMiao Xie 
244746900662SQu Wenruo 	p_sector.page = alloc_page(GFP_NOFS);
244846900662SQu Wenruo 	if (!p_sector.page)
24495a6ac9eaSMiao Xie 		goto cleanup;
245046900662SQu Wenruo 	p_sector.pgoff = 0;
245146900662SQu Wenruo 	p_sector.uptodate = 1;
24525a6ac9eaSMiao Xie 
2453c17af965SDavid Sterba 	if (has_qstripe) {
2454d70cef0dSIra Weiny 		/* RAID6, allocate and map temp space for the Q stripe */
245546900662SQu Wenruo 		q_sector.page = alloc_page(GFP_NOFS);
245646900662SQu Wenruo 		if (!q_sector.page) {
245746900662SQu Wenruo 			__free_page(p_sector.page);
245846900662SQu Wenruo 			p_sector.page = NULL;
24595a6ac9eaSMiao Xie 			goto cleanup;
24605a6ac9eaSMiao Xie 		}
246146900662SQu Wenruo 		q_sector.pgoff = 0;
246246900662SQu Wenruo 		q_sector.uptodate = 1;
246346900662SQu Wenruo 		pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page);
24645a6ac9eaSMiao Xie 	}
24655a6ac9eaSMiao Xie 
24665a6ac9eaSMiao Xie 	atomic_set(&rbio->error, 0);
24675a6ac9eaSMiao Xie 
2468d70cef0dSIra Weiny 	/* Map the parity stripe just once */
246946900662SQu Wenruo 	pointers[nr_data] = kmap_local_page(p_sector.page);
2470d70cef0dSIra Weiny 
2471c67c68ebSQu Wenruo 	for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
247246900662SQu Wenruo 		struct sector_ptr *sector;
24735a6ac9eaSMiao Xie 		void *parity;
247446900662SQu Wenruo 
24755a6ac9eaSMiao Xie 		/* first collect one page from each data stripe */
24765a6ac9eaSMiao Xie 		for (stripe = 0; stripe < nr_data; stripe++) {
247746900662SQu Wenruo 			sector = sector_in_rbio(rbio, stripe, sectornr, 0);
247846900662SQu Wenruo 			pointers[stripe] = kmap_local_page(sector->page) +
247946900662SQu Wenruo 					   sector->pgoff;
24805a6ac9eaSMiao Xie 		}
24815a6ac9eaSMiao Xie 
2482c17af965SDavid Sterba 		if (has_qstripe) {
2483d70cef0dSIra Weiny 			/* RAID6, call the library function to fill in our P/Q */
248446900662SQu Wenruo 			raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
24855a6ac9eaSMiao Xie 						pointers);
24865a6ac9eaSMiao Xie 		} else {
24875a6ac9eaSMiao Xie 			/* raid5 */
248846900662SQu Wenruo 			memcpy(pointers[nr_data], pointers[0], sectorsize);
248946900662SQu Wenruo 			run_xor(pointers + 1, nr_data - 1, sectorsize);
24905a6ac9eaSMiao Xie 		}
24915a6ac9eaSMiao Xie 
249201327610SNicholas D Steeves 		/* Check scrubbing parity and repair it */
249346900662SQu Wenruo 		sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
249446900662SQu Wenruo 		parity = kmap_local_page(sector->page) + sector->pgoff;
249546900662SQu Wenruo 		if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0)
249646900662SQu Wenruo 			memcpy(parity, pointers[rbio->scrubp], sectorsize);
24975a6ac9eaSMiao Xie 		else
24985a6ac9eaSMiao Xie 			/* Parity is right, needn't writeback */
2499c67c68ebSQu Wenruo 			bitmap_clear(&rbio->dbitmap, sectornr, 1);
250058c1a35cSIra Weiny 		kunmap_local(parity);
25015a6ac9eaSMiao Xie 
250294a0b58dSIra Weiny 		for (stripe = nr_data - 1; stripe >= 0; stripe--)
250394a0b58dSIra Weiny 			kunmap_local(pointers[stripe]);
25045a6ac9eaSMiao Xie 	}
25055a6ac9eaSMiao Xie 
250694a0b58dSIra Weiny 	kunmap_local(pointers[nr_data]);
250746900662SQu Wenruo 	__free_page(p_sector.page);
250846900662SQu Wenruo 	p_sector.page = NULL;
250946900662SQu Wenruo 	if (q_sector.page) {
251094a0b58dSIra Weiny 		kunmap_local(pointers[rbio->real_stripes - 1]);
251146900662SQu Wenruo 		__free_page(q_sector.page);
251246900662SQu Wenruo 		q_sector.page = NULL;
2513d70cef0dSIra Weiny 	}
25145a6ac9eaSMiao Xie 
25155a6ac9eaSMiao Xie writeback:
25165a6ac9eaSMiao Xie 	/*
25175a6ac9eaSMiao Xie 	 * time to start writing.  Make bios for everything from the
25185a6ac9eaSMiao Xie 	 * higher layers (the bio_list in our rbio) and our p/q.  Ignore
25195a6ac9eaSMiao Xie 	 * everything else.
25205a6ac9eaSMiao Xie 	 */
2521c67c68ebSQu Wenruo 	for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
25223e77605dSQu Wenruo 		struct sector_ptr *sector;
25235a6ac9eaSMiao Xie 
25243e77605dSQu Wenruo 		sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
25253e77605dSQu Wenruo 		ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp,
25263e77605dSQu Wenruo 					 sectornr, rbio->stripe_len, REQ_OP_WRITE);
25275a6ac9eaSMiao Xie 		if (ret)
25285a6ac9eaSMiao Xie 			goto cleanup;
25295a6ac9eaSMiao Xie 	}
25305a6ac9eaSMiao Xie 
253176035976SMiao Xie 	if (!is_replace)
253276035976SMiao Xie 		goto submit_write;
253376035976SMiao Xie 
25343e77605dSQu Wenruo 	for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) {
25353e77605dSQu Wenruo 		struct sector_ptr *sector;
253676035976SMiao Xie 
25373e77605dSQu Wenruo 		sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
25383e77605dSQu Wenruo 		ret = rbio_add_io_sector(rbio, &bio_list, sector,
25394c664611SQu Wenruo 				       bioc->tgtdev_map[rbio->scrubp],
25403e77605dSQu Wenruo 				       sectornr, rbio->stripe_len, REQ_OP_WRITE);
254176035976SMiao Xie 		if (ret)
254276035976SMiao Xie 			goto cleanup;
254376035976SMiao Xie 	}
254476035976SMiao Xie 
254576035976SMiao Xie submit_write:
25465a6ac9eaSMiao Xie 	nr_data = bio_list_size(&bio_list);
25475a6ac9eaSMiao Xie 	if (!nr_data) {
25485a6ac9eaSMiao Xie 		/* Every parity is right */
254958efbc9fSOmar Sandoval 		rbio_orig_end_io(rbio, BLK_STS_OK);
25505a6ac9eaSMiao Xie 		return;
25515a6ac9eaSMiao Xie 	}
25525a6ac9eaSMiao Xie 
25535a6ac9eaSMiao Xie 	atomic_set(&rbio->stripes_pending, nr_data);
25545a6ac9eaSMiao Xie 
2555bf28a605SNikolay Borisov 	while ((bio = bio_list_pop(&bio_list))) {
2556a6111d11SZhao Lei 		bio->bi_end_io = raid_write_end_io;
25574e49ea4aSMike Christie 
2558b8bea09aSQu Wenruo 		if (trace_raid56_scrub_write_stripe_enabled()) {
2559b8bea09aSQu Wenruo 			struct raid56_bio_trace_info trace_info = { 0 };
2560b8bea09aSQu Wenruo 
2561b8bea09aSQu Wenruo 			bio_get_trace_info(rbio, bio, &trace_info);
2562b8bea09aSQu Wenruo 			trace_raid56_scrub_write_stripe(rbio, bio, &trace_info);
2563b8bea09aSQu Wenruo 		}
25644e49ea4aSMike Christie 		submit_bio(bio);
25655a6ac9eaSMiao Xie 	}
25665a6ac9eaSMiao Xie 	return;
25675a6ac9eaSMiao Xie 
25685a6ac9eaSMiao Xie cleanup:
256958efbc9fSOmar Sandoval 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
2570785884fcSLiu Bo 
2571785884fcSLiu Bo 	while ((bio = bio_list_pop(&bio_list)))
2572785884fcSLiu Bo 		bio_put(bio);
25735a6ac9eaSMiao Xie }
25745a6ac9eaSMiao Xie 
25755a6ac9eaSMiao Xie static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
25765a6ac9eaSMiao Xie {
25775a6ac9eaSMiao Xie 	if (stripe >= 0 && stripe < rbio->nr_data)
25785a6ac9eaSMiao Xie 		return 1;
25795a6ac9eaSMiao Xie 	return 0;
25805a6ac9eaSMiao Xie }
25815a6ac9eaSMiao Xie 
25825a6ac9eaSMiao Xie /*
25835a6ac9eaSMiao Xie  * While we're doing the parity check and repair, we could have errors
25845a6ac9eaSMiao Xie  * in reading pages off the disk.  This checks for errors and if we're
25855a6ac9eaSMiao Xie  * not able to read the page it'll trigger parity reconstruction.  The
25865a6ac9eaSMiao Xie  * parity scrub will be finished after we've reconstructed the failed
25875a6ac9eaSMiao Xie  * stripes
25885a6ac9eaSMiao Xie  */
25895a6ac9eaSMiao Xie static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
25905a6ac9eaSMiao Xie {
25914c664611SQu Wenruo 	if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
25925a6ac9eaSMiao Xie 		goto cleanup;
25935a6ac9eaSMiao Xie 
25945a6ac9eaSMiao Xie 	if (rbio->faila >= 0 || rbio->failb >= 0) {
25955a6ac9eaSMiao Xie 		int dfail = 0, failp = -1;
25965a6ac9eaSMiao Xie 
25975a6ac9eaSMiao Xie 		if (is_data_stripe(rbio, rbio->faila))
25985a6ac9eaSMiao Xie 			dfail++;
25995a6ac9eaSMiao Xie 		else if (is_parity_stripe(rbio->faila))
26005a6ac9eaSMiao Xie 			failp = rbio->faila;
26015a6ac9eaSMiao Xie 
26025a6ac9eaSMiao Xie 		if (is_data_stripe(rbio, rbio->failb))
26035a6ac9eaSMiao Xie 			dfail++;
26045a6ac9eaSMiao Xie 		else if (is_parity_stripe(rbio->failb))
26055a6ac9eaSMiao Xie 			failp = rbio->failb;
26065a6ac9eaSMiao Xie 
26075a6ac9eaSMiao Xie 		/*
26085a6ac9eaSMiao Xie 		 * Because we can not use a scrubbing parity to repair
26095a6ac9eaSMiao Xie 		 * the data, so the capability of the repair is declined.
26105a6ac9eaSMiao Xie 		 * (In the case of RAID5, we can not repair anything)
26115a6ac9eaSMiao Xie 		 */
26124c664611SQu Wenruo 		if (dfail > rbio->bioc->max_errors - 1)
26135a6ac9eaSMiao Xie 			goto cleanup;
26145a6ac9eaSMiao Xie 
26155a6ac9eaSMiao Xie 		/*
26165a6ac9eaSMiao Xie 		 * If all data is good, only parity is correctly, just
26175a6ac9eaSMiao Xie 		 * repair the parity.
26185a6ac9eaSMiao Xie 		 */
26195a6ac9eaSMiao Xie 		if (dfail == 0) {
26205a6ac9eaSMiao Xie 			finish_parity_scrub(rbio, 0);
26215a6ac9eaSMiao Xie 			return;
26225a6ac9eaSMiao Xie 		}
26235a6ac9eaSMiao Xie 
26245a6ac9eaSMiao Xie 		/*
26255a6ac9eaSMiao Xie 		 * Here means we got one corrupted data stripe and one
26265a6ac9eaSMiao Xie 		 * corrupted parity on RAID6, if the corrupted parity
262701327610SNicholas D Steeves 		 * is scrubbing parity, luckily, use the other one to repair
26285a6ac9eaSMiao Xie 		 * the data, or we can not repair the data stripe.
26295a6ac9eaSMiao Xie 		 */
26305a6ac9eaSMiao Xie 		if (failp != rbio->scrubp)
26315a6ac9eaSMiao Xie 			goto cleanup;
26325a6ac9eaSMiao Xie 
26335a6ac9eaSMiao Xie 		__raid_recover_end_io(rbio);
26345a6ac9eaSMiao Xie 	} else {
26355a6ac9eaSMiao Xie 		finish_parity_scrub(rbio, 1);
26365a6ac9eaSMiao Xie 	}
26375a6ac9eaSMiao Xie 	return;
26385a6ac9eaSMiao Xie 
26395a6ac9eaSMiao Xie cleanup:
264058efbc9fSOmar Sandoval 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
26415a6ac9eaSMiao Xie }
26425a6ac9eaSMiao Xie 
26435a6ac9eaSMiao Xie /*
26445a6ac9eaSMiao Xie  * end io for the read phase of the rmw cycle.  All the bios here are physical
26455a6ac9eaSMiao Xie  * stripe bios we've read from the disk so we can recalculate the parity of the
26465a6ac9eaSMiao Xie  * stripe.
26475a6ac9eaSMiao Xie  *
26485a6ac9eaSMiao Xie  * This will usually kick off finish_rmw once all the bios are read in, but it
26495a6ac9eaSMiao Xie  * may trigger parity reconstruction if we had any errors along the way
26505a6ac9eaSMiao Xie  */
2651d34e123dSChristoph Hellwig static void raid56_parity_scrub_end_io_work(struct work_struct *work)
26525a6ac9eaSMiao Xie {
2653d34e123dSChristoph Hellwig 	struct btrfs_raid_bio *rbio =
2654d34e123dSChristoph Hellwig 		container_of(work, struct btrfs_raid_bio, end_io_work);
26555a6ac9eaSMiao Xie 
26565a6ac9eaSMiao Xie 	/*
2657d34e123dSChristoph Hellwig 	 * This will normally call finish_rmw to start our write, but if there
2658d34e123dSChristoph Hellwig 	 * are any failed stripes we'll reconstruct from parity first
26595a6ac9eaSMiao Xie 	 */
26605a6ac9eaSMiao Xie 	validate_rbio_for_parity_scrub(rbio);
26615a6ac9eaSMiao Xie }
26625a6ac9eaSMiao Xie 
26635a6ac9eaSMiao Xie static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
26645a6ac9eaSMiao Xie {
26655a6ac9eaSMiao Xie 	int bios_to_read = 0;
26665a6ac9eaSMiao Xie 	struct bio_list bio_list;
26675a6ac9eaSMiao Xie 	int ret;
26683e77605dSQu Wenruo 	int sectornr;
26695a6ac9eaSMiao Xie 	int stripe;
26705a6ac9eaSMiao Xie 	struct bio *bio;
26715a6ac9eaSMiao Xie 
2672785884fcSLiu Bo 	bio_list_init(&bio_list);
2673785884fcSLiu Bo 
26745a6ac9eaSMiao Xie 	ret = alloc_rbio_essential_pages(rbio);
26755a6ac9eaSMiao Xie 	if (ret)
26765a6ac9eaSMiao Xie 		goto cleanup;
26775a6ac9eaSMiao Xie 
26785a6ac9eaSMiao Xie 	atomic_set(&rbio->error, 0);
26795a6ac9eaSMiao Xie 	/*
26805a6ac9eaSMiao Xie 	 * build a list of bios to read all the missing parts of this
26815a6ac9eaSMiao Xie 	 * stripe
26825a6ac9eaSMiao Xie 	 */
26832c8cdd6eSMiao Xie 	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
2684c67c68ebSQu Wenruo 		for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
26853e77605dSQu Wenruo 			struct sector_ptr *sector;
26865a6ac9eaSMiao Xie 			/*
26873e77605dSQu Wenruo 			 * We want to find all the sectors missing from the
26883e77605dSQu Wenruo 			 * rbio and read them from the disk.  If * sector_in_rbio()
26893e77605dSQu Wenruo 			 * finds a sector in the bio list we don't need to read
26903e77605dSQu Wenruo 			 * it off the stripe.
26915a6ac9eaSMiao Xie 			 */
26923e77605dSQu Wenruo 			sector = sector_in_rbio(rbio, stripe, sectornr, 1);
26933e77605dSQu Wenruo 			if (sector)
26945a6ac9eaSMiao Xie 				continue;
26955a6ac9eaSMiao Xie 
26963e77605dSQu Wenruo 			sector = rbio_stripe_sector(rbio, stripe, sectornr);
26975a6ac9eaSMiao Xie 			/*
26983e77605dSQu Wenruo 			 * The bio cache may have handed us an uptodate sector.
26993e77605dSQu Wenruo 			 * If so, be happy and use it.
27005a6ac9eaSMiao Xie 			 */
27013e77605dSQu Wenruo 			if (sector->uptodate)
27025a6ac9eaSMiao Xie 				continue;
27035a6ac9eaSMiao Xie 
27043e77605dSQu Wenruo 			ret = rbio_add_io_sector(rbio, &bio_list, sector,
27053e77605dSQu Wenruo 						 stripe, sectornr, rbio->stripe_len,
27063e77605dSQu Wenruo 						 REQ_OP_READ);
27075a6ac9eaSMiao Xie 			if (ret)
27085a6ac9eaSMiao Xie 				goto cleanup;
27095a6ac9eaSMiao Xie 		}
27105a6ac9eaSMiao Xie 	}
27115a6ac9eaSMiao Xie 
27125a6ac9eaSMiao Xie 	bios_to_read = bio_list_size(&bio_list);
27135a6ac9eaSMiao Xie 	if (!bios_to_read) {
27145a6ac9eaSMiao Xie 		/*
27155a6ac9eaSMiao Xie 		 * this can happen if others have merged with
27165a6ac9eaSMiao Xie 		 * us, it means there is nothing left to read.
27175a6ac9eaSMiao Xie 		 * But if there are missing devices it may not be
27185a6ac9eaSMiao Xie 		 * safe to do the full stripe write yet.
27195a6ac9eaSMiao Xie 		 */
27205a6ac9eaSMiao Xie 		goto finish;
27215a6ac9eaSMiao Xie 	}
27225a6ac9eaSMiao Xie 
27235a6ac9eaSMiao Xie 	/*
27244c664611SQu Wenruo 	 * The bioc may be freed once we submit the last bio. Make sure not to
27254c664611SQu Wenruo 	 * touch it after that.
27265a6ac9eaSMiao Xie 	 */
27275a6ac9eaSMiao Xie 	atomic_set(&rbio->stripes_pending, bios_to_read);
2728d34e123dSChristoph Hellwig 	INIT_WORK(&rbio->end_io_work, raid56_parity_scrub_end_io_work);
2729bf28a605SNikolay Borisov 	while ((bio = bio_list_pop(&bio_list))) {
2730d34e123dSChristoph Hellwig 		bio->bi_end_io = raid56_bio_end_io;
27315a6ac9eaSMiao Xie 
2732b8bea09aSQu Wenruo 		if (trace_raid56_scrub_read_enabled()) {
2733b8bea09aSQu Wenruo 			struct raid56_bio_trace_info trace_info = { 0 };
2734b8bea09aSQu Wenruo 
2735b8bea09aSQu Wenruo 			bio_get_trace_info(rbio, bio, &trace_info);
2736b8bea09aSQu Wenruo 			trace_raid56_scrub_read(rbio, bio, &trace_info);
2737b8bea09aSQu Wenruo 		}
27384e49ea4aSMike Christie 		submit_bio(bio);
27395a6ac9eaSMiao Xie 	}
27405a6ac9eaSMiao Xie 	/* the actual write will happen once the reads are done */
27415a6ac9eaSMiao Xie 	return;
27425a6ac9eaSMiao Xie 
27435a6ac9eaSMiao Xie cleanup:
274458efbc9fSOmar Sandoval 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
2745785884fcSLiu Bo 
2746785884fcSLiu Bo 	while ((bio = bio_list_pop(&bio_list)))
2747785884fcSLiu Bo 		bio_put(bio);
2748785884fcSLiu Bo 
27495a6ac9eaSMiao Xie 	return;
27505a6ac9eaSMiao Xie 
27515a6ac9eaSMiao Xie finish:
27525a6ac9eaSMiao Xie 	validate_rbio_for_parity_scrub(rbio);
27535a6ac9eaSMiao Xie }
27545a6ac9eaSMiao Xie 
2755385de0efSChristoph Hellwig static void scrub_parity_work(struct work_struct *work)
27565a6ac9eaSMiao Xie {
27575a6ac9eaSMiao Xie 	struct btrfs_raid_bio *rbio;
27585a6ac9eaSMiao Xie 
27595a6ac9eaSMiao Xie 	rbio = container_of(work, struct btrfs_raid_bio, work);
27605a6ac9eaSMiao Xie 	raid56_parity_scrub_stripe(rbio);
27615a6ac9eaSMiao Xie }
27625a6ac9eaSMiao Xie 
27635a6ac9eaSMiao Xie void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
27645a6ac9eaSMiao Xie {
27655a6ac9eaSMiao Xie 	if (!lock_stripe_add(rbio))
2766a81b747dSDavid Sterba 		start_async_work(rbio, scrub_parity_work);
27675a6ac9eaSMiao Xie }
2768b4ee1782SOmar Sandoval 
2769b4ee1782SOmar Sandoval /* The following code is used for dev replace of a missing RAID 5/6 device. */
2770b4ee1782SOmar Sandoval 
2771b4ee1782SOmar Sandoval struct btrfs_raid_bio *
27726a258d72SQu Wenruo raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
27736a258d72SQu Wenruo 			  u64 length)
2774b4ee1782SOmar Sandoval {
27756a258d72SQu Wenruo 	struct btrfs_fs_info *fs_info = bioc->fs_info;
2776b4ee1782SOmar Sandoval 	struct btrfs_raid_bio *rbio;
2777b4ee1782SOmar Sandoval 
27784c664611SQu Wenruo 	rbio = alloc_rbio(fs_info, bioc, length);
2779b4ee1782SOmar Sandoval 	if (IS_ERR(rbio))
2780b4ee1782SOmar Sandoval 		return NULL;
2781b4ee1782SOmar Sandoval 
2782b4ee1782SOmar Sandoval 	rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
2783b4ee1782SOmar Sandoval 	bio_list_add(&rbio->bio_list, bio);
2784b4ee1782SOmar Sandoval 	/*
2785b4ee1782SOmar Sandoval 	 * This is a special bio which is used to hold the completion handler
2786b4ee1782SOmar Sandoval 	 * and make the scrub rbio is similar to the other types
2787b4ee1782SOmar Sandoval 	 */
2788b4ee1782SOmar Sandoval 	ASSERT(!bio->bi_iter.bi_size);
2789b4ee1782SOmar Sandoval 
2790b4ee1782SOmar Sandoval 	rbio->faila = find_logical_bio_stripe(rbio, bio);
2791b4ee1782SOmar Sandoval 	if (rbio->faila == -1) {
2792b4ee1782SOmar Sandoval 		BUG();
2793b4ee1782SOmar Sandoval 		kfree(rbio);
2794b4ee1782SOmar Sandoval 		return NULL;
2795b4ee1782SOmar Sandoval 	}
2796b4ee1782SOmar Sandoval 
2797ae6529c3SQu Wenruo 	/*
27984c664611SQu Wenruo 	 * When we get bioc, we have already increased bio_counter, record it
2799ae6529c3SQu Wenruo 	 * so we can free it at rbio_orig_end_io()
2800ae6529c3SQu Wenruo 	 */
2801ae6529c3SQu Wenruo 	rbio->generic_bio_cnt = 1;
2802ae6529c3SQu Wenruo 
2803b4ee1782SOmar Sandoval 	return rbio;
2804b4ee1782SOmar Sandoval }
2805b4ee1782SOmar Sandoval 
2806b4ee1782SOmar Sandoval void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
2807b4ee1782SOmar Sandoval {
2808b4ee1782SOmar Sandoval 	if (!lock_stripe_add(rbio))
2809e66d8d5aSDavid Sterba 		start_async_work(rbio, read_rebuild_work);
2810b4ee1782SOmar Sandoval }
2811