1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0 253b381b3SDavid Woodhouse /* 353b381b3SDavid Woodhouse * Copyright (C) 2012 Fusion-io All rights reserved. 453b381b3SDavid Woodhouse * Copyright (C) 2012 Intel Corp. All rights reserved. 553b381b3SDavid Woodhouse */ 6c1d7c514SDavid Sterba 753b381b3SDavid Woodhouse #include <linux/sched.h> 853b381b3SDavid Woodhouse #include <linux/bio.h> 953b381b3SDavid Woodhouse #include <linux/slab.h> 1053b381b3SDavid Woodhouse #include <linux/blkdev.h> 1153b381b3SDavid Woodhouse #include <linux/raid/pq.h> 1253b381b3SDavid Woodhouse #include <linux/hash.h> 1353b381b3SDavid Woodhouse #include <linux/list_sort.h> 1453b381b3SDavid Woodhouse #include <linux/raid/xor.h> 15818e010bSDavid Sterba #include <linux/mm.h> 16cea62800SJohannes Thumshirn #include "misc.h" 1753b381b3SDavid Woodhouse #include "ctree.h" 1853b381b3SDavid Woodhouse #include "disk-io.h" 1953b381b3SDavid Woodhouse #include "volumes.h" 2053b381b3SDavid Woodhouse #include "raid56.h" 2153b381b3SDavid Woodhouse #include "async-thread.h" 2253b381b3SDavid Woodhouse 2353b381b3SDavid Woodhouse /* set when additional merges to this rbio are not allowed */ 2453b381b3SDavid Woodhouse #define RBIO_RMW_LOCKED_BIT 1 2553b381b3SDavid Woodhouse 264ae10b3aSChris Mason /* 274ae10b3aSChris Mason * set when this rbio is sitting in the hash, but it is just a cache 284ae10b3aSChris Mason * of past RMW 294ae10b3aSChris Mason */ 304ae10b3aSChris Mason #define RBIO_CACHE_BIT 2 314ae10b3aSChris Mason 324ae10b3aSChris Mason /* 334ae10b3aSChris Mason * set when it is safe to trust the stripe_pages for caching 344ae10b3aSChris Mason */ 354ae10b3aSChris Mason #define RBIO_CACHE_READY_BIT 3 364ae10b3aSChris Mason 374ae10b3aSChris Mason #define RBIO_CACHE_SIZE 1024 384ae10b3aSChris Mason 398a953348SDavid Sterba #define BTRFS_STRIPE_HASH_TABLE_BITS 11 408a953348SDavid Sterba 418a953348SDavid Sterba /* Used by the raid56 code to lock stripes for read/modify/write */ 428a953348SDavid Sterba struct btrfs_stripe_hash { 438a953348SDavid Sterba struct list_head hash_list; 448a953348SDavid Sterba spinlock_t lock; 458a953348SDavid Sterba }; 468a953348SDavid Sterba 478a953348SDavid Sterba /* Used by the raid56 code to lock stripes for read/modify/write */ 488a953348SDavid Sterba struct btrfs_stripe_hash_table { 498a953348SDavid Sterba struct list_head stripe_cache; 508a953348SDavid Sterba spinlock_t cache_lock; 518a953348SDavid Sterba int cache_size; 528a953348SDavid Sterba struct btrfs_stripe_hash table[]; 538a953348SDavid Sterba }; 548a953348SDavid Sterba 551b94b556SMiao Xie enum btrfs_rbio_ops { 56b4ee1782SOmar Sandoval BTRFS_RBIO_WRITE, 57b4ee1782SOmar Sandoval BTRFS_RBIO_READ_REBUILD, 58b4ee1782SOmar Sandoval BTRFS_RBIO_PARITY_SCRUB, 59b4ee1782SOmar Sandoval BTRFS_RBIO_REBUILD_MISSING, 601b94b556SMiao Xie }; 611b94b556SMiao Xie 6253b381b3SDavid Woodhouse struct btrfs_raid_bio { 634c664611SQu Wenruo struct btrfs_io_context *bioc; 6453b381b3SDavid Woodhouse 6553b381b3SDavid Woodhouse /* while we're doing rmw on a stripe 6653b381b3SDavid Woodhouse * we put it into a hash table so we can 6753b381b3SDavid Woodhouse * lock the stripe and merge more rbios 6853b381b3SDavid Woodhouse * into it. 6953b381b3SDavid Woodhouse */ 7053b381b3SDavid Woodhouse struct list_head hash_list; 7153b381b3SDavid Woodhouse 7253b381b3SDavid Woodhouse /* 734ae10b3aSChris Mason * LRU list for the stripe cache 744ae10b3aSChris Mason */ 754ae10b3aSChris Mason struct list_head stripe_cache; 764ae10b3aSChris Mason 774ae10b3aSChris Mason /* 7853b381b3SDavid Woodhouse * for scheduling work in the helper threads 7953b381b3SDavid Woodhouse */ 8053b381b3SDavid Woodhouse struct btrfs_work work; 8153b381b3SDavid Woodhouse 8253b381b3SDavid Woodhouse /* 8353b381b3SDavid Woodhouse * bio list and bio_list_lock are used 8453b381b3SDavid Woodhouse * to add more bios into the stripe 8553b381b3SDavid Woodhouse * in hopes of avoiding the full rmw 8653b381b3SDavid Woodhouse */ 8753b381b3SDavid Woodhouse struct bio_list bio_list; 8853b381b3SDavid Woodhouse spinlock_t bio_list_lock; 8953b381b3SDavid Woodhouse 906ac0f488SChris Mason /* also protected by the bio_list_lock, the 916ac0f488SChris Mason * plug list is used by the plugging code 926ac0f488SChris Mason * to collect partial bios while plugged. The 936ac0f488SChris Mason * stripe locking code also uses it to hand off 9453b381b3SDavid Woodhouse * the stripe lock to the next pending IO 9553b381b3SDavid Woodhouse */ 9653b381b3SDavid Woodhouse struct list_head plug_list; 9753b381b3SDavid Woodhouse 9853b381b3SDavid Woodhouse /* 9953b381b3SDavid Woodhouse * flags that tell us if it is safe to 10053b381b3SDavid Woodhouse * merge with this bio 10153b381b3SDavid Woodhouse */ 10253b381b3SDavid Woodhouse unsigned long flags; 10353b381b3SDavid Woodhouse 10453b381b3SDavid Woodhouse /* size of each individual stripe on disk */ 10553b381b3SDavid Woodhouse int stripe_len; 10653b381b3SDavid Woodhouse 10753b381b3SDavid Woodhouse /* number of data stripes (no p/q) */ 10853b381b3SDavid Woodhouse int nr_data; 10953b381b3SDavid Woodhouse 1102c8cdd6eSMiao Xie int real_stripes; 1112c8cdd6eSMiao Xie 1125a6ac9eaSMiao Xie int stripe_npages; 11353b381b3SDavid Woodhouse /* 11453b381b3SDavid Woodhouse * set if we're doing a parity rebuild 11553b381b3SDavid Woodhouse * for a read from higher up, which is handled 11653b381b3SDavid Woodhouse * differently from a parity rebuild as part of 11753b381b3SDavid Woodhouse * rmw 11853b381b3SDavid Woodhouse */ 1191b94b556SMiao Xie enum btrfs_rbio_ops operation; 12053b381b3SDavid Woodhouse 12153b381b3SDavid Woodhouse /* first bad stripe */ 12253b381b3SDavid Woodhouse int faila; 12353b381b3SDavid Woodhouse 12453b381b3SDavid Woodhouse /* second bad stripe (for raid6 use) */ 12553b381b3SDavid Woodhouse int failb; 12653b381b3SDavid Woodhouse 1275a6ac9eaSMiao Xie int scrubp; 12853b381b3SDavid Woodhouse /* 12953b381b3SDavid Woodhouse * number of pages needed to represent the full 13053b381b3SDavid Woodhouse * stripe 13153b381b3SDavid Woodhouse */ 13253b381b3SDavid Woodhouse int nr_pages; 13353b381b3SDavid Woodhouse 13453b381b3SDavid Woodhouse /* 13553b381b3SDavid Woodhouse * size of all the bios in the bio_list. This 13653b381b3SDavid Woodhouse * helps us decide if the rbio maps to a full 13753b381b3SDavid Woodhouse * stripe or not 13853b381b3SDavid Woodhouse */ 13953b381b3SDavid Woodhouse int bio_list_bytes; 14053b381b3SDavid Woodhouse 1414245215dSMiao Xie int generic_bio_cnt; 1424245215dSMiao Xie 143dec95574SElena Reshetova refcount_t refs; 14453b381b3SDavid Woodhouse 145b89e1b01SMiao Xie atomic_t stripes_pending; 146b89e1b01SMiao Xie 147b89e1b01SMiao Xie atomic_t error; 14853b381b3SDavid Woodhouse /* 14953b381b3SDavid Woodhouse * these are two arrays of pointers. We allocate the 15053b381b3SDavid Woodhouse * rbio big enough to hold them both and setup their 15153b381b3SDavid Woodhouse * locations when the rbio is allocated 15253b381b3SDavid Woodhouse */ 15353b381b3SDavid Woodhouse 15453b381b3SDavid Woodhouse /* pointers to pages that we allocated for 15553b381b3SDavid Woodhouse * reading/writing stripes directly from the disk (including P/Q) 15653b381b3SDavid Woodhouse */ 15753b381b3SDavid Woodhouse struct page **stripe_pages; 15853b381b3SDavid Woodhouse 15953b381b3SDavid Woodhouse /* 16053b381b3SDavid Woodhouse * pointers to the pages in the bio_list. Stored 16153b381b3SDavid Woodhouse * here for faster lookup 16253b381b3SDavid Woodhouse */ 16353b381b3SDavid Woodhouse struct page **bio_pages; 1645a6ac9eaSMiao Xie 1655a6ac9eaSMiao Xie /* 1665a6ac9eaSMiao Xie * bitmap to record which horizontal stripe has data 1675a6ac9eaSMiao Xie */ 1685a6ac9eaSMiao Xie unsigned long *dbitmap; 1691389053eSKees Cook 1701389053eSKees Cook /* allocated with real_stripes-many pointers for finish_*() calls */ 1711389053eSKees Cook void **finish_pointers; 1721389053eSKees Cook 1731389053eSKees Cook /* allocated with stripe_npages-many bits for finish_*() calls */ 1741389053eSKees Cook unsigned long *finish_pbitmap; 17553b381b3SDavid Woodhouse }; 17653b381b3SDavid Woodhouse 17753b381b3SDavid Woodhouse static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); 17853b381b3SDavid Woodhouse static noinline void finish_rmw(struct btrfs_raid_bio *rbio); 17953b381b3SDavid Woodhouse static void rmw_work(struct btrfs_work *work); 18053b381b3SDavid Woodhouse static void read_rebuild_work(struct btrfs_work *work); 18153b381b3SDavid Woodhouse static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio); 18253b381b3SDavid Woodhouse static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed); 18353b381b3SDavid Woodhouse static void __free_raid_bio(struct btrfs_raid_bio *rbio); 18453b381b3SDavid Woodhouse static void index_rbio_pages(struct btrfs_raid_bio *rbio); 18553b381b3SDavid Woodhouse static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); 18653b381b3SDavid Woodhouse 1875a6ac9eaSMiao Xie static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, 1885a6ac9eaSMiao Xie int need_check); 189a81b747dSDavid Sterba static void scrub_parity_work(struct btrfs_work *work); 1905a6ac9eaSMiao Xie 191ac638859SDavid Sterba static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func) 192ac638859SDavid Sterba { 193a0cac0ecSOmar Sandoval btrfs_init_work(&rbio->work, work_func, NULL, NULL); 1946a258d72SQu Wenruo btrfs_queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work); 195ac638859SDavid Sterba } 196ac638859SDavid Sterba 19753b381b3SDavid Woodhouse /* 19853b381b3SDavid Woodhouse * the stripe hash table is used for locking, and to collect 19953b381b3SDavid Woodhouse * bios in hopes of making a full stripe 20053b381b3SDavid Woodhouse */ 20153b381b3SDavid Woodhouse int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) 20253b381b3SDavid Woodhouse { 20353b381b3SDavid Woodhouse struct btrfs_stripe_hash_table *table; 20453b381b3SDavid Woodhouse struct btrfs_stripe_hash_table *x; 20553b381b3SDavid Woodhouse struct btrfs_stripe_hash *cur; 20653b381b3SDavid Woodhouse struct btrfs_stripe_hash *h; 20753b381b3SDavid Woodhouse int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS; 20853b381b3SDavid Woodhouse int i; 20953b381b3SDavid Woodhouse 21053b381b3SDavid Woodhouse if (info->stripe_hash_table) 21153b381b3SDavid Woodhouse return 0; 21253b381b3SDavid Woodhouse 21383c8266aSDavid Sterba /* 21483c8266aSDavid Sterba * The table is large, starting with order 4 and can go as high as 21583c8266aSDavid Sterba * order 7 in case lock debugging is turned on. 21683c8266aSDavid Sterba * 21783c8266aSDavid Sterba * Try harder to allocate and fallback to vmalloc to lower the chance 21883c8266aSDavid Sterba * of a failing mount. 21983c8266aSDavid Sterba */ 220ee787f95SDavid Sterba table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL); 22153b381b3SDavid Woodhouse if (!table) 22253b381b3SDavid Woodhouse return -ENOMEM; 22353b381b3SDavid Woodhouse 2244ae10b3aSChris Mason spin_lock_init(&table->cache_lock); 2254ae10b3aSChris Mason INIT_LIST_HEAD(&table->stripe_cache); 2264ae10b3aSChris Mason 22753b381b3SDavid Woodhouse h = table->table; 22853b381b3SDavid Woodhouse 22953b381b3SDavid Woodhouse for (i = 0; i < num_entries; i++) { 23053b381b3SDavid Woodhouse cur = h + i; 23153b381b3SDavid Woodhouse INIT_LIST_HEAD(&cur->hash_list); 23253b381b3SDavid Woodhouse spin_lock_init(&cur->lock); 23353b381b3SDavid Woodhouse } 23453b381b3SDavid Woodhouse 23553b381b3SDavid Woodhouse x = cmpxchg(&info->stripe_hash_table, NULL, table); 236f749303bSWang Shilong kvfree(x); 23753b381b3SDavid Woodhouse return 0; 23853b381b3SDavid Woodhouse } 23953b381b3SDavid Woodhouse 24053b381b3SDavid Woodhouse /* 2414ae10b3aSChris Mason * caching an rbio means to copy anything from the 2424ae10b3aSChris Mason * bio_pages array into the stripe_pages array. We 2434ae10b3aSChris Mason * use the page uptodate bit in the stripe cache array 2444ae10b3aSChris Mason * to indicate if it has valid data 2454ae10b3aSChris Mason * 2464ae10b3aSChris Mason * once the caching is done, we set the cache ready 2474ae10b3aSChris Mason * bit. 2484ae10b3aSChris Mason */ 2494ae10b3aSChris Mason static void cache_rbio_pages(struct btrfs_raid_bio *rbio) 2504ae10b3aSChris Mason { 2514ae10b3aSChris Mason int i; 2524ae10b3aSChris Mason int ret; 2534ae10b3aSChris Mason 2544ae10b3aSChris Mason ret = alloc_rbio_pages(rbio); 2554ae10b3aSChris Mason if (ret) 2564ae10b3aSChris Mason return; 2574ae10b3aSChris Mason 2584ae10b3aSChris Mason for (i = 0; i < rbio->nr_pages; i++) { 2594ae10b3aSChris Mason if (!rbio->bio_pages[i]) 2604ae10b3aSChris Mason continue; 2614ae10b3aSChris Mason 26280cc8384SIra Weiny copy_highpage(rbio->stripe_pages[i], rbio->bio_pages[i]); 2634ae10b3aSChris Mason SetPageUptodate(rbio->stripe_pages[i]); 2644ae10b3aSChris Mason } 2654ae10b3aSChris Mason set_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 2664ae10b3aSChris Mason } 2674ae10b3aSChris Mason 2684ae10b3aSChris Mason /* 26953b381b3SDavid Woodhouse * we hash on the first logical address of the stripe 27053b381b3SDavid Woodhouse */ 27153b381b3SDavid Woodhouse static int rbio_bucket(struct btrfs_raid_bio *rbio) 27253b381b3SDavid Woodhouse { 2734c664611SQu Wenruo u64 num = rbio->bioc->raid_map[0]; 27453b381b3SDavid Woodhouse 27553b381b3SDavid Woodhouse /* 27653b381b3SDavid Woodhouse * we shift down quite a bit. We're using byte 27753b381b3SDavid Woodhouse * addressing, and most of the lower bits are zeros. 27853b381b3SDavid Woodhouse * This tends to upset hash_64, and it consistently 27953b381b3SDavid Woodhouse * returns just one or two different values. 28053b381b3SDavid Woodhouse * 28153b381b3SDavid Woodhouse * shifting off the lower bits fixes things. 28253b381b3SDavid Woodhouse */ 28353b381b3SDavid Woodhouse return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS); 28453b381b3SDavid Woodhouse } 28553b381b3SDavid Woodhouse 28653b381b3SDavid Woodhouse /* 2874ae10b3aSChris Mason * stealing an rbio means taking all the uptodate pages from the stripe 2884ae10b3aSChris Mason * array in the source rbio and putting them into the destination rbio 2894ae10b3aSChris Mason */ 2904ae10b3aSChris Mason static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest) 2914ae10b3aSChris Mason { 2924ae10b3aSChris Mason int i; 2934ae10b3aSChris Mason struct page *s; 2944ae10b3aSChris Mason struct page *d; 2954ae10b3aSChris Mason 2964ae10b3aSChris Mason if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags)) 2974ae10b3aSChris Mason return; 2984ae10b3aSChris Mason 2994ae10b3aSChris Mason for (i = 0; i < dest->nr_pages; i++) { 3004ae10b3aSChris Mason s = src->stripe_pages[i]; 3014ae10b3aSChris Mason if (!s || !PageUptodate(s)) { 3024ae10b3aSChris Mason continue; 3034ae10b3aSChris Mason } 3044ae10b3aSChris Mason 3054ae10b3aSChris Mason d = dest->stripe_pages[i]; 3064ae10b3aSChris Mason if (d) 3074ae10b3aSChris Mason __free_page(d); 3084ae10b3aSChris Mason 3094ae10b3aSChris Mason dest->stripe_pages[i] = s; 3104ae10b3aSChris Mason src->stripe_pages[i] = NULL; 3114ae10b3aSChris Mason } 3124ae10b3aSChris Mason } 3134ae10b3aSChris Mason 3144ae10b3aSChris Mason /* 31553b381b3SDavid Woodhouse * merging means we take the bio_list from the victim and 31653b381b3SDavid Woodhouse * splice it into the destination. The victim should 31753b381b3SDavid Woodhouse * be discarded afterwards. 31853b381b3SDavid Woodhouse * 31953b381b3SDavid Woodhouse * must be called with dest->rbio_list_lock held 32053b381b3SDavid Woodhouse */ 32153b381b3SDavid Woodhouse static void merge_rbio(struct btrfs_raid_bio *dest, 32253b381b3SDavid Woodhouse struct btrfs_raid_bio *victim) 32353b381b3SDavid Woodhouse { 32453b381b3SDavid Woodhouse bio_list_merge(&dest->bio_list, &victim->bio_list); 32553b381b3SDavid Woodhouse dest->bio_list_bytes += victim->bio_list_bytes; 3264245215dSMiao Xie dest->generic_bio_cnt += victim->generic_bio_cnt; 32753b381b3SDavid Woodhouse bio_list_init(&victim->bio_list); 32853b381b3SDavid Woodhouse } 32953b381b3SDavid Woodhouse 33053b381b3SDavid Woodhouse /* 3314ae10b3aSChris Mason * used to prune items that are in the cache. The caller 3324ae10b3aSChris Mason * must hold the hash table lock. 3334ae10b3aSChris Mason */ 3344ae10b3aSChris Mason static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio) 3354ae10b3aSChris Mason { 3364ae10b3aSChris Mason int bucket = rbio_bucket(rbio); 3374ae10b3aSChris Mason struct btrfs_stripe_hash_table *table; 3384ae10b3aSChris Mason struct btrfs_stripe_hash *h; 3394ae10b3aSChris Mason int freeit = 0; 3404ae10b3aSChris Mason 3414ae10b3aSChris Mason /* 3424ae10b3aSChris Mason * check the bit again under the hash table lock. 3434ae10b3aSChris Mason */ 3444ae10b3aSChris Mason if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) 3454ae10b3aSChris Mason return; 3464ae10b3aSChris Mason 3476a258d72SQu Wenruo table = rbio->bioc->fs_info->stripe_hash_table; 3484ae10b3aSChris Mason h = table->table + bucket; 3494ae10b3aSChris Mason 3504ae10b3aSChris Mason /* hold the lock for the bucket because we may be 3514ae10b3aSChris Mason * removing it from the hash table 3524ae10b3aSChris Mason */ 3534ae10b3aSChris Mason spin_lock(&h->lock); 3544ae10b3aSChris Mason 3554ae10b3aSChris Mason /* 3564ae10b3aSChris Mason * hold the lock for the bio list because we need 3574ae10b3aSChris Mason * to make sure the bio list is empty 3584ae10b3aSChris Mason */ 3594ae10b3aSChris Mason spin_lock(&rbio->bio_list_lock); 3604ae10b3aSChris Mason 3614ae10b3aSChris Mason if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) { 3624ae10b3aSChris Mason list_del_init(&rbio->stripe_cache); 3634ae10b3aSChris Mason table->cache_size -= 1; 3644ae10b3aSChris Mason freeit = 1; 3654ae10b3aSChris Mason 3664ae10b3aSChris Mason /* if the bio list isn't empty, this rbio is 3674ae10b3aSChris Mason * still involved in an IO. We take it out 3684ae10b3aSChris Mason * of the cache list, and drop the ref that 3694ae10b3aSChris Mason * was held for the list. 3704ae10b3aSChris Mason * 3714ae10b3aSChris Mason * If the bio_list was empty, we also remove 3724ae10b3aSChris Mason * the rbio from the hash_table, and drop 3734ae10b3aSChris Mason * the corresponding ref 3744ae10b3aSChris Mason */ 3754ae10b3aSChris Mason if (bio_list_empty(&rbio->bio_list)) { 3764ae10b3aSChris Mason if (!list_empty(&rbio->hash_list)) { 3774ae10b3aSChris Mason list_del_init(&rbio->hash_list); 378dec95574SElena Reshetova refcount_dec(&rbio->refs); 3794ae10b3aSChris Mason BUG_ON(!list_empty(&rbio->plug_list)); 3804ae10b3aSChris Mason } 3814ae10b3aSChris Mason } 3824ae10b3aSChris Mason } 3834ae10b3aSChris Mason 3844ae10b3aSChris Mason spin_unlock(&rbio->bio_list_lock); 3854ae10b3aSChris Mason spin_unlock(&h->lock); 3864ae10b3aSChris Mason 3874ae10b3aSChris Mason if (freeit) 3884ae10b3aSChris Mason __free_raid_bio(rbio); 3894ae10b3aSChris Mason } 3904ae10b3aSChris Mason 3914ae10b3aSChris Mason /* 3924ae10b3aSChris Mason * prune a given rbio from the cache 3934ae10b3aSChris Mason */ 3944ae10b3aSChris Mason static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) 3954ae10b3aSChris Mason { 3964ae10b3aSChris Mason struct btrfs_stripe_hash_table *table; 3974ae10b3aSChris Mason unsigned long flags; 3984ae10b3aSChris Mason 3994ae10b3aSChris Mason if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) 4004ae10b3aSChris Mason return; 4014ae10b3aSChris Mason 4026a258d72SQu Wenruo table = rbio->bioc->fs_info->stripe_hash_table; 4034ae10b3aSChris Mason 4044ae10b3aSChris Mason spin_lock_irqsave(&table->cache_lock, flags); 4054ae10b3aSChris Mason __remove_rbio_from_cache(rbio); 4064ae10b3aSChris Mason spin_unlock_irqrestore(&table->cache_lock, flags); 4074ae10b3aSChris Mason } 4084ae10b3aSChris Mason 4094ae10b3aSChris Mason /* 4104ae10b3aSChris Mason * remove everything in the cache 4114ae10b3aSChris Mason */ 41248a3b636SEric Sandeen static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) 4134ae10b3aSChris Mason { 4144ae10b3aSChris Mason struct btrfs_stripe_hash_table *table; 4154ae10b3aSChris Mason unsigned long flags; 4164ae10b3aSChris Mason struct btrfs_raid_bio *rbio; 4174ae10b3aSChris Mason 4184ae10b3aSChris Mason table = info->stripe_hash_table; 4194ae10b3aSChris Mason 4204ae10b3aSChris Mason spin_lock_irqsave(&table->cache_lock, flags); 4214ae10b3aSChris Mason while (!list_empty(&table->stripe_cache)) { 4224ae10b3aSChris Mason rbio = list_entry(table->stripe_cache.next, 4234ae10b3aSChris Mason struct btrfs_raid_bio, 4244ae10b3aSChris Mason stripe_cache); 4254ae10b3aSChris Mason __remove_rbio_from_cache(rbio); 4264ae10b3aSChris Mason } 4274ae10b3aSChris Mason spin_unlock_irqrestore(&table->cache_lock, flags); 4284ae10b3aSChris Mason } 4294ae10b3aSChris Mason 4304ae10b3aSChris Mason /* 4314ae10b3aSChris Mason * remove all cached entries and free the hash table 4324ae10b3aSChris Mason * used by unmount 43353b381b3SDavid Woodhouse */ 43453b381b3SDavid Woodhouse void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info) 43553b381b3SDavid Woodhouse { 43653b381b3SDavid Woodhouse if (!info->stripe_hash_table) 43753b381b3SDavid Woodhouse return; 4384ae10b3aSChris Mason btrfs_clear_rbio_cache(info); 439f749303bSWang Shilong kvfree(info->stripe_hash_table); 44053b381b3SDavid Woodhouse info->stripe_hash_table = NULL; 44153b381b3SDavid Woodhouse } 44253b381b3SDavid Woodhouse 44353b381b3SDavid Woodhouse /* 4444ae10b3aSChris Mason * insert an rbio into the stripe cache. It 4454ae10b3aSChris Mason * must have already been prepared by calling 4464ae10b3aSChris Mason * cache_rbio_pages 4474ae10b3aSChris Mason * 4484ae10b3aSChris Mason * If this rbio was already cached, it gets 4494ae10b3aSChris Mason * moved to the front of the lru. 4504ae10b3aSChris Mason * 4514ae10b3aSChris Mason * If the size of the rbio cache is too big, we 4524ae10b3aSChris Mason * prune an item. 4534ae10b3aSChris Mason */ 4544ae10b3aSChris Mason static void cache_rbio(struct btrfs_raid_bio *rbio) 4554ae10b3aSChris Mason { 4564ae10b3aSChris Mason struct btrfs_stripe_hash_table *table; 4574ae10b3aSChris Mason unsigned long flags; 4584ae10b3aSChris Mason 4594ae10b3aSChris Mason if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags)) 4604ae10b3aSChris Mason return; 4614ae10b3aSChris Mason 4626a258d72SQu Wenruo table = rbio->bioc->fs_info->stripe_hash_table; 4634ae10b3aSChris Mason 4644ae10b3aSChris Mason spin_lock_irqsave(&table->cache_lock, flags); 4654ae10b3aSChris Mason spin_lock(&rbio->bio_list_lock); 4664ae10b3aSChris Mason 4674ae10b3aSChris Mason /* bump our ref if we were not in the list before */ 4684ae10b3aSChris Mason if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags)) 469dec95574SElena Reshetova refcount_inc(&rbio->refs); 4704ae10b3aSChris Mason 4714ae10b3aSChris Mason if (!list_empty(&rbio->stripe_cache)){ 4724ae10b3aSChris Mason list_move(&rbio->stripe_cache, &table->stripe_cache); 4734ae10b3aSChris Mason } else { 4744ae10b3aSChris Mason list_add(&rbio->stripe_cache, &table->stripe_cache); 4754ae10b3aSChris Mason table->cache_size += 1; 4764ae10b3aSChris Mason } 4774ae10b3aSChris Mason 4784ae10b3aSChris Mason spin_unlock(&rbio->bio_list_lock); 4794ae10b3aSChris Mason 4804ae10b3aSChris Mason if (table->cache_size > RBIO_CACHE_SIZE) { 4814ae10b3aSChris Mason struct btrfs_raid_bio *found; 4824ae10b3aSChris Mason 4834ae10b3aSChris Mason found = list_entry(table->stripe_cache.prev, 4844ae10b3aSChris Mason struct btrfs_raid_bio, 4854ae10b3aSChris Mason stripe_cache); 4864ae10b3aSChris Mason 4874ae10b3aSChris Mason if (found != rbio) 4884ae10b3aSChris Mason __remove_rbio_from_cache(found); 4894ae10b3aSChris Mason } 4904ae10b3aSChris Mason 4914ae10b3aSChris Mason spin_unlock_irqrestore(&table->cache_lock, flags); 4924ae10b3aSChris Mason } 4934ae10b3aSChris Mason 4944ae10b3aSChris Mason /* 49553b381b3SDavid Woodhouse * helper function to run the xor_blocks api. It is only 49653b381b3SDavid Woodhouse * able to do MAX_XOR_BLOCKS at a time, so we need to 49753b381b3SDavid Woodhouse * loop through. 49853b381b3SDavid Woodhouse */ 49953b381b3SDavid Woodhouse static void run_xor(void **pages, int src_cnt, ssize_t len) 50053b381b3SDavid Woodhouse { 50153b381b3SDavid Woodhouse int src_off = 0; 50253b381b3SDavid Woodhouse int xor_src_cnt = 0; 50353b381b3SDavid Woodhouse void *dest = pages[src_cnt]; 50453b381b3SDavid Woodhouse 50553b381b3SDavid Woodhouse while(src_cnt > 0) { 50653b381b3SDavid Woodhouse xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); 50753b381b3SDavid Woodhouse xor_blocks(xor_src_cnt, len, dest, pages + src_off); 50853b381b3SDavid Woodhouse 50953b381b3SDavid Woodhouse src_cnt -= xor_src_cnt; 51053b381b3SDavid Woodhouse src_off += xor_src_cnt; 51153b381b3SDavid Woodhouse } 51253b381b3SDavid Woodhouse } 51353b381b3SDavid Woodhouse 51453b381b3SDavid Woodhouse /* 515176571a1SDavid Sterba * Returns true if the bio list inside this rbio covers an entire stripe (no 516176571a1SDavid Sterba * rmw required). 51753b381b3SDavid Woodhouse */ 51853b381b3SDavid Woodhouse static int rbio_is_full(struct btrfs_raid_bio *rbio) 51953b381b3SDavid Woodhouse { 52053b381b3SDavid Woodhouse unsigned long flags; 521176571a1SDavid Sterba unsigned long size = rbio->bio_list_bytes; 522176571a1SDavid Sterba int ret = 1; 52353b381b3SDavid Woodhouse 52453b381b3SDavid Woodhouse spin_lock_irqsave(&rbio->bio_list_lock, flags); 525176571a1SDavid Sterba if (size != rbio->nr_data * rbio->stripe_len) 526176571a1SDavid Sterba ret = 0; 527176571a1SDavid Sterba BUG_ON(size > rbio->nr_data * rbio->stripe_len); 52853b381b3SDavid Woodhouse spin_unlock_irqrestore(&rbio->bio_list_lock, flags); 529176571a1SDavid Sterba 53053b381b3SDavid Woodhouse return ret; 53153b381b3SDavid Woodhouse } 53253b381b3SDavid Woodhouse 53353b381b3SDavid Woodhouse /* 53453b381b3SDavid Woodhouse * returns 1 if it is safe to merge two rbios together. 53553b381b3SDavid Woodhouse * The merging is safe if the two rbios correspond to 53653b381b3SDavid Woodhouse * the same stripe and if they are both going in the same 53753b381b3SDavid Woodhouse * direction (read vs write), and if neither one is 53853b381b3SDavid Woodhouse * locked for final IO 53953b381b3SDavid Woodhouse * 54053b381b3SDavid Woodhouse * The caller is responsible for locking such that 54153b381b3SDavid Woodhouse * rmw_locked is safe to test 54253b381b3SDavid Woodhouse */ 54353b381b3SDavid Woodhouse static int rbio_can_merge(struct btrfs_raid_bio *last, 54453b381b3SDavid Woodhouse struct btrfs_raid_bio *cur) 54553b381b3SDavid Woodhouse { 54653b381b3SDavid Woodhouse if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) || 54753b381b3SDavid Woodhouse test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) 54853b381b3SDavid Woodhouse return 0; 54953b381b3SDavid Woodhouse 5504ae10b3aSChris Mason /* 5514ae10b3aSChris Mason * we can't merge with cached rbios, since the 5524ae10b3aSChris Mason * idea is that when we merge the destination 5534ae10b3aSChris Mason * rbio is going to run our IO for us. We can 55401327610SNicholas D Steeves * steal from cached rbios though, other functions 5554ae10b3aSChris Mason * handle that. 5564ae10b3aSChris Mason */ 5574ae10b3aSChris Mason if (test_bit(RBIO_CACHE_BIT, &last->flags) || 5584ae10b3aSChris Mason test_bit(RBIO_CACHE_BIT, &cur->flags)) 5594ae10b3aSChris Mason return 0; 5604ae10b3aSChris Mason 5614c664611SQu Wenruo if (last->bioc->raid_map[0] != cur->bioc->raid_map[0]) 56253b381b3SDavid Woodhouse return 0; 56353b381b3SDavid Woodhouse 5645a6ac9eaSMiao Xie /* we can't merge with different operations */ 5655a6ac9eaSMiao Xie if (last->operation != cur->operation) 56653b381b3SDavid Woodhouse return 0; 5675a6ac9eaSMiao Xie /* 5685a6ac9eaSMiao Xie * We've need read the full stripe from the drive. 5695a6ac9eaSMiao Xie * check and repair the parity and write the new results. 5705a6ac9eaSMiao Xie * 5715a6ac9eaSMiao Xie * We're not allowed to add any new bios to the 5725a6ac9eaSMiao Xie * bio list here, anyone else that wants to 5735a6ac9eaSMiao Xie * change this stripe needs to do their own rmw. 5745a6ac9eaSMiao Xie */ 575db34be19SLiu Bo if (last->operation == BTRFS_RBIO_PARITY_SCRUB) 5765a6ac9eaSMiao Xie return 0; 57753b381b3SDavid Woodhouse 578db34be19SLiu Bo if (last->operation == BTRFS_RBIO_REBUILD_MISSING) 579b4ee1782SOmar Sandoval return 0; 580b4ee1782SOmar Sandoval 581cc54ff62SLiu Bo if (last->operation == BTRFS_RBIO_READ_REBUILD) { 582cc54ff62SLiu Bo int fa = last->faila; 583cc54ff62SLiu Bo int fb = last->failb; 584cc54ff62SLiu Bo int cur_fa = cur->faila; 585cc54ff62SLiu Bo int cur_fb = cur->failb; 586cc54ff62SLiu Bo 587cc54ff62SLiu Bo if (last->faila >= last->failb) { 588cc54ff62SLiu Bo fa = last->failb; 589cc54ff62SLiu Bo fb = last->faila; 590cc54ff62SLiu Bo } 591cc54ff62SLiu Bo 592cc54ff62SLiu Bo if (cur->faila >= cur->failb) { 593cc54ff62SLiu Bo cur_fa = cur->failb; 594cc54ff62SLiu Bo cur_fb = cur->faila; 595cc54ff62SLiu Bo } 596cc54ff62SLiu Bo 597cc54ff62SLiu Bo if (fa != cur_fa || fb != cur_fb) 598cc54ff62SLiu Bo return 0; 599cc54ff62SLiu Bo } 60053b381b3SDavid Woodhouse return 1; 60153b381b3SDavid Woodhouse } 60253b381b3SDavid Woodhouse 603b7178a5fSZhao Lei static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe, 604b7178a5fSZhao Lei int index) 605b7178a5fSZhao Lei { 606b7178a5fSZhao Lei return stripe * rbio->stripe_npages + index; 607b7178a5fSZhao Lei } 608b7178a5fSZhao Lei 609b7178a5fSZhao Lei /* 610b7178a5fSZhao Lei * these are just the pages from the rbio array, not from anything 611b7178a5fSZhao Lei * the FS sent down to us 612b7178a5fSZhao Lei */ 613b7178a5fSZhao Lei static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, 614b7178a5fSZhao Lei int index) 615b7178a5fSZhao Lei { 616b7178a5fSZhao Lei return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)]; 617b7178a5fSZhao Lei } 618b7178a5fSZhao Lei 61953b381b3SDavid Woodhouse /* 62053b381b3SDavid Woodhouse * helper to index into the pstripe 62153b381b3SDavid Woodhouse */ 62253b381b3SDavid Woodhouse static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index) 62353b381b3SDavid Woodhouse { 624b7178a5fSZhao Lei return rbio_stripe_page(rbio, rbio->nr_data, index); 62553b381b3SDavid Woodhouse } 62653b381b3SDavid Woodhouse 62753b381b3SDavid Woodhouse /* 62853b381b3SDavid Woodhouse * helper to index into the qstripe, returns null 62953b381b3SDavid Woodhouse * if there is no qstripe 63053b381b3SDavid Woodhouse */ 63153b381b3SDavid Woodhouse static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index) 63253b381b3SDavid Woodhouse { 6332c8cdd6eSMiao Xie if (rbio->nr_data + 1 == rbio->real_stripes) 63453b381b3SDavid Woodhouse return NULL; 635b7178a5fSZhao Lei return rbio_stripe_page(rbio, rbio->nr_data + 1, index); 63653b381b3SDavid Woodhouse } 63753b381b3SDavid Woodhouse 63853b381b3SDavid Woodhouse /* 63953b381b3SDavid Woodhouse * The first stripe in the table for a logical address 64053b381b3SDavid Woodhouse * has the lock. rbios are added in one of three ways: 64153b381b3SDavid Woodhouse * 64253b381b3SDavid Woodhouse * 1) Nobody has the stripe locked yet. The rbio is given 64353b381b3SDavid Woodhouse * the lock and 0 is returned. The caller must start the IO 64453b381b3SDavid Woodhouse * themselves. 64553b381b3SDavid Woodhouse * 64653b381b3SDavid Woodhouse * 2) Someone has the stripe locked, but we're able to merge 64753b381b3SDavid Woodhouse * with the lock owner. The rbio is freed and the IO will 64853b381b3SDavid Woodhouse * start automatically along with the existing rbio. 1 is returned. 64953b381b3SDavid Woodhouse * 65053b381b3SDavid Woodhouse * 3) Someone has the stripe locked, but we're not able to merge. 65153b381b3SDavid Woodhouse * The rbio is added to the lock owner's plug list, or merged into 65253b381b3SDavid Woodhouse * an rbio already on the plug list. When the lock owner unlocks, 65353b381b3SDavid Woodhouse * the next rbio on the list is run and the IO is started automatically. 65453b381b3SDavid Woodhouse * 1 is returned 65553b381b3SDavid Woodhouse * 65653b381b3SDavid Woodhouse * If we return 0, the caller still owns the rbio and must continue with 65753b381b3SDavid Woodhouse * IO submission. If we return 1, the caller must assume the rbio has 65853b381b3SDavid Woodhouse * already been freed. 65953b381b3SDavid Woodhouse */ 66053b381b3SDavid Woodhouse static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) 66153b381b3SDavid Woodhouse { 662721860d5SJohannes Thumshirn struct btrfs_stripe_hash *h; 66353b381b3SDavid Woodhouse struct btrfs_raid_bio *cur; 66453b381b3SDavid Woodhouse struct btrfs_raid_bio *pending; 66553b381b3SDavid Woodhouse unsigned long flags; 66653b381b3SDavid Woodhouse struct btrfs_raid_bio *freeit = NULL; 6674ae10b3aSChris Mason struct btrfs_raid_bio *cache_drop = NULL; 66853b381b3SDavid Woodhouse int ret = 0; 66953b381b3SDavid Woodhouse 6706a258d72SQu Wenruo h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio); 671721860d5SJohannes Thumshirn 67253b381b3SDavid Woodhouse spin_lock_irqsave(&h->lock, flags); 67353b381b3SDavid Woodhouse list_for_each_entry(cur, &h->hash_list, hash_list) { 6744c664611SQu Wenruo if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0]) 6759d6cb1b0SJohannes Thumshirn continue; 6769d6cb1b0SJohannes Thumshirn 67753b381b3SDavid Woodhouse spin_lock(&cur->bio_list_lock); 67853b381b3SDavid Woodhouse 6799d6cb1b0SJohannes Thumshirn /* Can we steal this cached rbio's pages? */ 6804ae10b3aSChris Mason if (bio_list_empty(&cur->bio_list) && 6814ae10b3aSChris Mason list_empty(&cur->plug_list) && 6824ae10b3aSChris Mason test_bit(RBIO_CACHE_BIT, &cur->flags) && 6834ae10b3aSChris Mason !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) { 6844ae10b3aSChris Mason list_del_init(&cur->hash_list); 685dec95574SElena Reshetova refcount_dec(&cur->refs); 6864ae10b3aSChris Mason 6874ae10b3aSChris Mason steal_rbio(cur, rbio); 6884ae10b3aSChris Mason cache_drop = cur; 6894ae10b3aSChris Mason spin_unlock(&cur->bio_list_lock); 6904ae10b3aSChris Mason 6914ae10b3aSChris Mason goto lockit; 6924ae10b3aSChris Mason } 6934ae10b3aSChris Mason 6949d6cb1b0SJohannes Thumshirn /* Can we merge into the lock owner? */ 69553b381b3SDavid Woodhouse if (rbio_can_merge(cur, rbio)) { 69653b381b3SDavid Woodhouse merge_rbio(cur, rbio); 69753b381b3SDavid Woodhouse spin_unlock(&cur->bio_list_lock); 69853b381b3SDavid Woodhouse freeit = rbio; 69953b381b3SDavid Woodhouse ret = 1; 70053b381b3SDavid Woodhouse goto out; 70153b381b3SDavid Woodhouse } 70253b381b3SDavid Woodhouse 7034ae10b3aSChris Mason 70453b381b3SDavid Woodhouse /* 7059d6cb1b0SJohannes Thumshirn * We couldn't merge with the running rbio, see if we can merge 7069d6cb1b0SJohannes Thumshirn * with the pending ones. We don't have to check for rmw_locked 7079d6cb1b0SJohannes Thumshirn * because there is no way they are inside finish_rmw right now 70853b381b3SDavid Woodhouse */ 7099d6cb1b0SJohannes Thumshirn list_for_each_entry(pending, &cur->plug_list, plug_list) { 71053b381b3SDavid Woodhouse if (rbio_can_merge(pending, rbio)) { 71153b381b3SDavid Woodhouse merge_rbio(pending, rbio); 71253b381b3SDavid Woodhouse spin_unlock(&cur->bio_list_lock); 71353b381b3SDavid Woodhouse freeit = rbio; 71453b381b3SDavid Woodhouse ret = 1; 71553b381b3SDavid Woodhouse goto out; 71653b381b3SDavid Woodhouse } 71753b381b3SDavid Woodhouse } 71853b381b3SDavid Woodhouse 7199d6cb1b0SJohannes Thumshirn /* 7209d6cb1b0SJohannes Thumshirn * No merging, put us on the tail of the plug list, our rbio 7219d6cb1b0SJohannes Thumshirn * will be started with the currently running rbio unlocks 72253b381b3SDavid Woodhouse */ 72353b381b3SDavid Woodhouse list_add_tail(&rbio->plug_list, &cur->plug_list); 72453b381b3SDavid Woodhouse spin_unlock(&cur->bio_list_lock); 72553b381b3SDavid Woodhouse ret = 1; 72653b381b3SDavid Woodhouse goto out; 72753b381b3SDavid Woodhouse } 7284ae10b3aSChris Mason lockit: 729dec95574SElena Reshetova refcount_inc(&rbio->refs); 73053b381b3SDavid Woodhouse list_add(&rbio->hash_list, &h->hash_list); 73153b381b3SDavid Woodhouse out: 73253b381b3SDavid Woodhouse spin_unlock_irqrestore(&h->lock, flags); 7334ae10b3aSChris Mason if (cache_drop) 7344ae10b3aSChris Mason remove_rbio_from_cache(cache_drop); 73553b381b3SDavid Woodhouse if (freeit) 73653b381b3SDavid Woodhouse __free_raid_bio(freeit); 73753b381b3SDavid Woodhouse return ret; 73853b381b3SDavid Woodhouse } 73953b381b3SDavid Woodhouse 74053b381b3SDavid Woodhouse /* 74153b381b3SDavid Woodhouse * called as rmw or parity rebuild is completed. If the plug list has more 74253b381b3SDavid Woodhouse * rbios waiting for this stripe, the next one on the list will be started 74353b381b3SDavid Woodhouse */ 74453b381b3SDavid Woodhouse static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) 74553b381b3SDavid Woodhouse { 74653b381b3SDavid Woodhouse int bucket; 74753b381b3SDavid Woodhouse struct btrfs_stripe_hash *h; 74853b381b3SDavid Woodhouse unsigned long flags; 7494ae10b3aSChris Mason int keep_cache = 0; 75053b381b3SDavid Woodhouse 75153b381b3SDavid Woodhouse bucket = rbio_bucket(rbio); 7526a258d72SQu Wenruo h = rbio->bioc->fs_info->stripe_hash_table->table + bucket; 75353b381b3SDavid Woodhouse 7544ae10b3aSChris Mason if (list_empty(&rbio->plug_list)) 7554ae10b3aSChris Mason cache_rbio(rbio); 7564ae10b3aSChris Mason 75753b381b3SDavid Woodhouse spin_lock_irqsave(&h->lock, flags); 75853b381b3SDavid Woodhouse spin_lock(&rbio->bio_list_lock); 75953b381b3SDavid Woodhouse 76053b381b3SDavid Woodhouse if (!list_empty(&rbio->hash_list)) { 7614ae10b3aSChris Mason /* 7624ae10b3aSChris Mason * if we're still cached and there is no other IO 7634ae10b3aSChris Mason * to perform, just leave this rbio here for others 7644ae10b3aSChris Mason * to steal from later 7654ae10b3aSChris Mason */ 7664ae10b3aSChris Mason if (list_empty(&rbio->plug_list) && 7674ae10b3aSChris Mason test_bit(RBIO_CACHE_BIT, &rbio->flags)) { 7684ae10b3aSChris Mason keep_cache = 1; 7694ae10b3aSChris Mason clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 7704ae10b3aSChris Mason BUG_ON(!bio_list_empty(&rbio->bio_list)); 7714ae10b3aSChris Mason goto done; 7724ae10b3aSChris Mason } 77353b381b3SDavid Woodhouse 77453b381b3SDavid Woodhouse list_del_init(&rbio->hash_list); 775dec95574SElena Reshetova refcount_dec(&rbio->refs); 77653b381b3SDavid Woodhouse 77753b381b3SDavid Woodhouse /* 77853b381b3SDavid Woodhouse * we use the plug list to hold all the rbios 77953b381b3SDavid Woodhouse * waiting for the chance to lock this stripe. 78053b381b3SDavid Woodhouse * hand the lock over to one of them. 78153b381b3SDavid Woodhouse */ 78253b381b3SDavid Woodhouse if (!list_empty(&rbio->plug_list)) { 78353b381b3SDavid Woodhouse struct btrfs_raid_bio *next; 78453b381b3SDavid Woodhouse struct list_head *head = rbio->plug_list.next; 78553b381b3SDavid Woodhouse 78653b381b3SDavid Woodhouse next = list_entry(head, struct btrfs_raid_bio, 78753b381b3SDavid Woodhouse plug_list); 78853b381b3SDavid Woodhouse 78953b381b3SDavid Woodhouse list_del_init(&rbio->plug_list); 79053b381b3SDavid Woodhouse 79153b381b3SDavid Woodhouse list_add(&next->hash_list, &h->hash_list); 792dec95574SElena Reshetova refcount_inc(&next->refs); 79353b381b3SDavid Woodhouse spin_unlock(&rbio->bio_list_lock); 79453b381b3SDavid Woodhouse spin_unlock_irqrestore(&h->lock, flags); 79553b381b3SDavid Woodhouse 7961b94b556SMiao Xie if (next->operation == BTRFS_RBIO_READ_REBUILD) 797e66d8d5aSDavid Sterba start_async_work(next, read_rebuild_work); 798b4ee1782SOmar Sandoval else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) { 799b4ee1782SOmar Sandoval steal_rbio(rbio, next); 800e66d8d5aSDavid Sterba start_async_work(next, read_rebuild_work); 801b4ee1782SOmar Sandoval } else if (next->operation == BTRFS_RBIO_WRITE) { 8024ae10b3aSChris Mason steal_rbio(rbio, next); 803cf6a4a75SDavid Sterba start_async_work(next, rmw_work); 8045a6ac9eaSMiao Xie } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { 8055a6ac9eaSMiao Xie steal_rbio(rbio, next); 806a81b747dSDavid Sterba start_async_work(next, scrub_parity_work); 8074ae10b3aSChris Mason } 80853b381b3SDavid Woodhouse 80953b381b3SDavid Woodhouse goto done_nolock; 81053b381b3SDavid Woodhouse } 81153b381b3SDavid Woodhouse } 8124ae10b3aSChris Mason done: 81353b381b3SDavid Woodhouse spin_unlock(&rbio->bio_list_lock); 81453b381b3SDavid Woodhouse spin_unlock_irqrestore(&h->lock, flags); 81553b381b3SDavid Woodhouse 81653b381b3SDavid Woodhouse done_nolock: 8174ae10b3aSChris Mason if (!keep_cache) 8184ae10b3aSChris Mason remove_rbio_from_cache(rbio); 81953b381b3SDavid Woodhouse } 82053b381b3SDavid Woodhouse 82153b381b3SDavid Woodhouse static void __free_raid_bio(struct btrfs_raid_bio *rbio) 82253b381b3SDavid Woodhouse { 82353b381b3SDavid Woodhouse int i; 82453b381b3SDavid Woodhouse 825dec95574SElena Reshetova if (!refcount_dec_and_test(&rbio->refs)) 82653b381b3SDavid Woodhouse return; 82753b381b3SDavid Woodhouse 8284ae10b3aSChris Mason WARN_ON(!list_empty(&rbio->stripe_cache)); 82953b381b3SDavid Woodhouse WARN_ON(!list_empty(&rbio->hash_list)); 83053b381b3SDavid Woodhouse WARN_ON(!bio_list_empty(&rbio->bio_list)); 83153b381b3SDavid Woodhouse 83253b381b3SDavid Woodhouse for (i = 0; i < rbio->nr_pages; i++) { 83353b381b3SDavid Woodhouse if (rbio->stripe_pages[i]) { 83453b381b3SDavid Woodhouse __free_page(rbio->stripe_pages[i]); 83553b381b3SDavid Woodhouse rbio->stripe_pages[i] = NULL; 83653b381b3SDavid Woodhouse } 83753b381b3SDavid Woodhouse } 838af8e2d1dSMiao Xie 8394c664611SQu Wenruo btrfs_put_bioc(rbio->bioc); 84053b381b3SDavid Woodhouse kfree(rbio); 84153b381b3SDavid Woodhouse } 84253b381b3SDavid Woodhouse 8437583d8d0SLiu Bo static void rbio_endio_bio_list(struct bio *cur, blk_status_t err) 84453b381b3SDavid Woodhouse { 8457583d8d0SLiu Bo struct bio *next; 8467583d8d0SLiu Bo 8477583d8d0SLiu Bo while (cur) { 8487583d8d0SLiu Bo next = cur->bi_next; 8497583d8d0SLiu Bo cur->bi_next = NULL; 8507583d8d0SLiu Bo cur->bi_status = err; 8517583d8d0SLiu Bo bio_endio(cur); 8527583d8d0SLiu Bo cur = next; 8537583d8d0SLiu Bo } 85453b381b3SDavid Woodhouse } 85553b381b3SDavid Woodhouse 85653b381b3SDavid Woodhouse /* 85753b381b3SDavid Woodhouse * this frees the rbio and runs through all the bios in the 85853b381b3SDavid Woodhouse * bio_list and calls end_io on them 85953b381b3SDavid Woodhouse */ 8604e4cbee9SChristoph Hellwig static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) 86153b381b3SDavid Woodhouse { 86253b381b3SDavid Woodhouse struct bio *cur = bio_list_get(&rbio->bio_list); 8637583d8d0SLiu Bo struct bio *extra; 8644245215dSMiao Xie 8654245215dSMiao Xie if (rbio->generic_bio_cnt) 8666a258d72SQu Wenruo btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt); 8674245215dSMiao Xie 8687583d8d0SLiu Bo /* 8697583d8d0SLiu Bo * At this moment, rbio->bio_list is empty, however since rbio does not 8707583d8d0SLiu Bo * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the 8717583d8d0SLiu Bo * hash list, rbio may be merged with others so that rbio->bio_list 8727583d8d0SLiu Bo * becomes non-empty. 8737583d8d0SLiu Bo * Once unlock_stripe() is done, rbio->bio_list will not be updated any 8747583d8d0SLiu Bo * more and we can call bio_endio() on all queued bios. 8757583d8d0SLiu Bo */ 8767583d8d0SLiu Bo unlock_stripe(rbio); 8777583d8d0SLiu Bo extra = bio_list_get(&rbio->bio_list); 8787583d8d0SLiu Bo __free_raid_bio(rbio); 87953b381b3SDavid Woodhouse 8807583d8d0SLiu Bo rbio_endio_bio_list(cur, err); 8817583d8d0SLiu Bo if (extra) 8827583d8d0SLiu Bo rbio_endio_bio_list(extra, err); 88353b381b3SDavid Woodhouse } 88453b381b3SDavid Woodhouse 88553b381b3SDavid Woodhouse /* 88653b381b3SDavid Woodhouse * end io function used by finish_rmw. When we finally 88753b381b3SDavid Woodhouse * get here, we've written a full stripe 88853b381b3SDavid Woodhouse */ 8894246a0b6SChristoph Hellwig static void raid_write_end_io(struct bio *bio) 89053b381b3SDavid Woodhouse { 89153b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio = bio->bi_private; 8924e4cbee9SChristoph Hellwig blk_status_t err = bio->bi_status; 893a6111d11SZhao Lei int max_errors; 89453b381b3SDavid Woodhouse 89553b381b3SDavid Woodhouse if (err) 89653b381b3SDavid Woodhouse fail_bio_stripe(rbio, bio); 89753b381b3SDavid Woodhouse 89853b381b3SDavid Woodhouse bio_put(bio); 89953b381b3SDavid Woodhouse 900b89e1b01SMiao Xie if (!atomic_dec_and_test(&rbio->stripes_pending)) 90153b381b3SDavid Woodhouse return; 90253b381b3SDavid Woodhouse 90358efbc9fSOmar Sandoval err = BLK_STS_OK; 90453b381b3SDavid Woodhouse 90553b381b3SDavid Woodhouse /* OK, we have read all the stripes we need to. */ 906a6111d11SZhao Lei max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? 9074c664611SQu Wenruo 0 : rbio->bioc->max_errors; 908a6111d11SZhao Lei if (atomic_read(&rbio->error) > max_errors) 9094e4cbee9SChristoph Hellwig err = BLK_STS_IOERR; 91053b381b3SDavid Woodhouse 9114246a0b6SChristoph Hellwig rbio_orig_end_io(rbio, err); 91253b381b3SDavid Woodhouse } 91353b381b3SDavid Woodhouse 91453b381b3SDavid Woodhouse /* 91553b381b3SDavid Woodhouse * the read/modify/write code wants to use the original bio for 91653b381b3SDavid Woodhouse * any pages it included, and then use the rbio for everything 91753b381b3SDavid Woodhouse * else. This function decides if a given index (stripe number) 91853b381b3SDavid Woodhouse * and page number in that stripe fall inside the original bio 91953b381b3SDavid Woodhouse * or the rbio. 92053b381b3SDavid Woodhouse * 92153b381b3SDavid Woodhouse * if you set bio_list_only, you'll get a NULL back for any ranges 92253b381b3SDavid Woodhouse * that are outside the bio_list 92353b381b3SDavid Woodhouse * 92453b381b3SDavid Woodhouse * This doesn't take any refs on anything, you get a bare page pointer 92553b381b3SDavid Woodhouse * and the caller must bump refs as required. 92653b381b3SDavid Woodhouse * 92753b381b3SDavid Woodhouse * You must call index_rbio_pages once before you can trust 92853b381b3SDavid Woodhouse * the answers from this function. 92953b381b3SDavid Woodhouse */ 93053b381b3SDavid Woodhouse static struct page *page_in_rbio(struct btrfs_raid_bio *rbio, 93153b381b3SDavid Woodhouse int index, int pagenr, int bio_list_only) 93253b381b3SDavid Woodhouse { 93353b381b3SDavid Woodhouse int chunk_page; 93453b381b3SDavid Woodhouse struct page *p = NULL; 93553b381b3SDavid Woodhouse 93653b381b3SDavid Woodhouse chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr; 93753b381b3SDavid Woodhouse 93853b381b3SDavid Woodhouse spin_lock_irq(&rbio->bio_list_lock); 93953b381b3SDavid Woodhouse p = rbio->bio_pages[chunk_page]; 94053b381b3SDavid Woodhouse spin_unlock_irq(&rbio->bio_list_lock); 94153b381b3SDavid Woodhouse 94253b381b3SDavid Woodhouse if (p || bio_list_only) 94353b381b3SDavid Woodhouse return p; 94453b381b3SDavid Woodhouse 94553b381b3SDavid Woodhouse return rbio->stripe_pages[chunk_page]; 94653b381b3SDavid Woodhouse } 94753b381b3SDavid Woodhouse 94853b381b3SDavid Woodhouse /* 94953b381b3SDavid Woodhouse * number of pages we need for the entire stripe across all the 95053b381b3SDavid Woodhouse * drives 95153b381b3SDavid Woodhouse */ 95253b381b3SDavid Woodhouse static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) 95353b381b3SDavid Woodhouse { 95409cbfeafSKirill A. Shutemov return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes; 95553b381b3SDavid Woodhouse } 95653b381b3SDavid Woodhouse 95753b381b3SDavid Woodhouse /* 95853b381b3SDavid Woodhouse * allocation and initial setup for the btrfs_raid_bio. Not 95953b381b3SDavid Woodhouse * this does not allocate any pages for rbio->pages. 96053b381b3SDavid Woodhouse */ 9612ff7e61eSJeff Mahoney static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, 9624c664611SQu Wenruo struct btrfs_io_context *bioc, 9632ff7e61eSJeff Mahoney u64 stripe_len) 96453b381b3SDavid Woodhouse { 96553b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 96653b381b3SDavid Woodhouse int nr_data = 0; 9674c664611SQu Wenruo int real_stripes = bioc->num_stripes - bioc->num_tgtdevs; 9682c8cdd6eSMiao Xie int num_pages = rbio_nr_pages(stripe_len, real_stripes); 9695a6ac9eaSMiao Xie int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); 97053b381b3SDavid Woodhouse void *p; 97153b381b3SDavid Woodhouse 9721389053eSKees Cook rbio = kzalloc(sizeof(*rbio) + 9731389053eSKees Cook sizeof(*rbio->stripe_pages) * num_pages + 9741389053eSKees Cook sizeof(*rbio->bio_pages) * num_pages + 9751389053eSKees Cook sizeof(*rbio->finish_pointers) * real_stripes + 9761389053eSKees Cook sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) + 9771389053eSKees Cook sizeof(*rbio->finish_pbitmap) * 9781389053eSKees Cook BITS_TO_LONGS(stripe_npages), 9791389053eSKees Cook GFP_NOFS); 980af8e2d1dSMiao Xie if (!rbio) 98153b381b3SDavid Woodhouse return ERR_PTR(-ENOMEM); 98253b381b3SDavid Woodhouse 98353b381b3SDavid Woodhouse bio_list_init(&rbio->bio_list); 98453b381b3SDavid Woodhouse INIT_LIST_HEAD(&rbio->plug_list); 98553b381b3SDavid Woodhouse spin_lock_init(&rbio->bio_list_lock); 9864ae10b3aSChris Mason INIT_LIST_HEAD(&rbio->stripe_cache); 98753b381b3SDavid Woodhouse INIT_LIST_HEAD(&rbio->hash_list); 9884c664611SQu Wenruo rbio->bioc = bioc; 98953b381b3SDavid Woodhouse rbio->stripe_len = stripe_len; 99053b381b3SDavid Woodhouse rbio->nr_pages = num_pages; 9912c8cdd6eSMiao Xie rbio->real_stripes = real_stripes; 9925a6ac9eaSMiao Xie rbio->stripe_npages = stripe_npages; 99353b381b3SDavid Woodhouse rbio->faila = -1; 99453b381b3SDavid Woodhouse rbio->failb = -1; 995dec95574SElena Reshetova refcount_set(&rbio->refs, 1); 996b89e1b01SMiao Xie atomic_set(&rbio->error, 0); 997b89e1b01SMiao Xie atomic_set(&rbio->stripes_pending, 0); 99853b381b3SDavid Woodhouse 99953b381b3SDavid Woodhouse /* 10001389053eSKees Cook * the stripe_pages, bio_pages, etc arrays point to the extra 100153b381b3SDavid Woodhouse * memory we allocated past the end of the rbio 100253b381b3SDavid Woodhouse */ 100353b381b3SDavid Woodhouse p = rbio + 1; 10041389053eSKees Cook #define CONSUME_ALLOC(ptr, count) do { \ 10051389053eSKees Cook ptr = p; \ 10061389053eSKees Cook p = (unsigned char *)p + sizeof(*(ptr)) * (count); \ 10071389053eSKees Cook } while (0) 10081389053eSKees Cook CONSUME_ALLOC(rbio->stripe_pages, num_pages); 10091389053eSKees Cook CONSUME_ALLOC(rbio->bio_pages, num_pages); 10101389053eSKees Cook CONSUME_ALLOC(rbio->finish_pointers, real_stripes); 10111389053eSKees Cook CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages)); 10121389053eSKees Cook CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages)); 10131389053eSKees Cook #undef CONSUME_ALLOC 101453b381b3SDavid Woodhouse 10154c664611SQu Wenruo if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5) 101610f11900SZhao Lei nr_data = real_stripes - 1; 10174c664611SQu Wenruo else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) 10182c8cdd6eSMiao Xie nr_data = real_stripes - 2; 101953b381b3SDavid Woodhouse else 102010f11900SZhao Lei BUG(); 102153b381b3SDavid Woodhouse 102253b381b3SDavid Woodhouse rbio->nr_data = nr_data; 102353b381b3SDavid Woodhouse return rbio; 102453b381b3SDavid Woodhouse } 102553b381b3SDavid Woodhouse 102653b381b3SDavid Woodhouse /* allocate pages for all the stripes in the bio, including parity */ 102753b381b3SDavid Woodhouse static int alloc_rbio_pages(struct btrfs_raid_bio *rbio) 102853b381b3SDavid Woodhouse { 1029dd137dd1SSweet Tea Dorminy return btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages); 103053b381b3SDavid Woodhouse } 103153b381b3SDavid Woodhouse 1032b7178a5fSZhao Lei /* only allocate pages for p/q stripes */ 103353b381b3SDavid Woodhouse static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio) 103453b381b3SDavid Woodhouse { 1035dd137dd1SSweet Tea Dorminy int data_pages = rbio_stripe_page_index(rbio, rbio->nr_data, 0); 103653b381b3SDavid Woodhouse 1037dd137dd1SSweet Tea Dorminy return btrfs_alloc_page_array(rbio->nr_pages - data_pages, 1038dd137dd1SSweet Tea Dorminy rbio->stripe_pages + data_pages); 103953b381b3SDavid Woodhouse } 104053b381b3SDavid Woodhouse 104153b381b3SDavid Woodhouse /* 104253b381b3SDavid Woodhouse * add a single page from a specific stripe into our list of bios for IO 104353b381b3SDavid Woodhouse * this will try to merge into existing bios if possible, and returns 104453b381b3SDavid Woodhouse * zero if all went well. 104553b381b3SDavid Woodhouse */ 104648a3b636SEric Sandeen static int rbio_add_io_page(struct btrfs_raid_bio *rbio, 104753b381b3SDavid Woodhouse struct bio_list *bio_list, 104853b381b3SDavid Woodhouse struct page *page, 104953b381b3SDavid Woodhouse int stripe_nr, 105053b381b3SDavid Woodhouse unsigned long page_index, 1051*e01bf588SChristoph Hellwig unsigned long bio_max_len, 1052*e01bf588SChristoph Hellwig unsigned int opf) 105353b381b3SDavid Woodhouse { 105453b381b3SDavid Woodhouse struct bio *last = bio_list->tail; 105553b381b3SDavid Woodhouse int ret; 105653b381b3SDavid Woodhouse struct bio *bio; 10574c664611SQu Wenruo struct btrfs_io_stripe *stripe; 105853b381b3SDavid Woodhouse u64 disk_start; 105953b381b3SDavid Woodhouse 10604c664611SQu Wenruo stripe = &rbio->bioc->stripes[stripe_nr]; 106109cbfeafSKirill A. Shutemov disk_start = stripe->physical + (page_index << PAGE_SHIFT); 106253b381b3SDavid Woodhouse 106353b381b3SDavid Woodhouse /* if the device is missing, just fail this stripe */ 106453b381b3SDavid Woodhouse if (!stripe->dev->bdev) 106553b381b3SDavid Woodhouse return fail_rbio_index(rbio, stripe_nr); 106653b381b3SDavid Woodhouse 106753b381b3SDavid Woodhouse /* see if we can add this page onto our existing bio */ 106853b381b3SDavid Woodhouse if (last) { 10691201b58bSDavid Sterba u64 last_end = last->bi_iter.bi_sector << 9; 10704f024f37SKent Overstreet last_end += last->bi_iter.bi_size; 107153b381b3SDavid Woodhouse 107253b381b3SDavid Woodhouse /* 107353b381b3SDavid Woodhouse * we can't merge these if they are from different 107453b381b3SDavid Woodhouse * devices or if they are not contiguous 107553b381b3SDavid Woodhouse */ 1076f90ae76aSNikolay Borisov if (last_end == disk_start && !last->bi_status && 1077309dca30SChristoph Hellwig last->bi_bdev == stripe->dev->bdev) { 107809cbfeafSKirill A. Shutemov ret = bio_add_page(last, page, PAGE_SIZE, 0); 107909cbfeafSKirill A. Shutemov if (ret == PAGE_SIZE) 108053b381b3SDavid Woodhouse return 0; 108153b381b3SDavid Woodhouse } 108253b381b3SDavid Woodhouse } 108353b381b3SDavid Woodhouse 108453b381b3SDavid Woodhouse /* put a new bio on the list */ 1085c3a3b19bSQu Wenruo bio = btrfs_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1); 1086c3a3b19bSQu Wenruo btrfs_bio(bio)->device = stripe->dev; 10874f024f37SKent Overstreet bio->bi_iter.bi_size = 0; 108874d46992SChristoph Hellwig bio_set_dev(bio, stripe->dev->bdev); 1089*e01bf588SChristoph Hellwig bio->bi_opf = opf; 10904f024f37SKent Overstreet bio->bi_iter.bi_sector = disk_start >> 9; 1091*e01bf588SChristoph Hellwig bio->bi_private = rbio; 109253b381b3SDavid Woodhouse 109309cbfeafSKirill A. Shutemov bio_add_page(bio, page, PAGE_SIZE, 0); 109453b381b3SDavid Woodhouse bio_list_add(bio_list, bio); 109553b381b3SDavid Woodhouse return 0; 109653b381b3SDavid Woodhouse } 109753b381b3SDavid Woodhouse 109853b381b3SDavid Woodhouse /* 109953b381b3SDavid Woodhouse * while we're doing the read/modify/write cycle, we could 110053b381b3SDavid Woodhouse * have errors in reading pages off the disk. This checks 110153b381b3SDavid Woodhouse * for errors and if we're not able to read the page it'll 110253b381b3SDavid Woodhouse * trigger parity reconstruction. The rmw will be finished 110353b381b3SDavid Woodhouse * after we've reconstructed the failed stripes 110453b381b3SDavid Woodhouse */ 110553b381b3SDavid Woodhouse static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) 110653b381b3SDavid Woodhouse { 110753b381b3SDavid Woodhouse if (rbio->faila >= 0 || rbio->failb >= 0) { 11082c8cdd6eSMiao Xie BUG_ON(rbio->faila == rbio->real_stripes - 1); 110953b381b3SDavid Woodhouse __raid56_parity_recover(rbio); 111053b381b3SDavid Woodhouse } else { 111153b381b3SDavid Woodhouse finish_rmw(rbio); 111253b381b3SDavid Woodhouse } 111353b381b3SDavid Woodhouse } 111453b381b3SDavid Woodhouse 111553b381b3SDavid Woodhouse /* 111653b381b3SDavid Woodhouse * helper function to walk our bio list and populate the bio_pages array with 111753b381b3SDavid Woodhouse * the result. This seems expensive, but it is faster than constantly 111853b381b3SDavid Woodhouse * searching through the bio list as we setup the IO in finish_rmw or stripe 111953b381b3SDavid Woodhouse * reconstruction. 112053b381b3SDavid Woodhouse * 112153b381b3SDavid Woodhouse * This must be called before you trust the answers from page_in_rbio 112253b381b3SDavid Woodhouse */ 112353b381b3SDavid Woodhouse static void index_rbio_pages(struct btrfs_raid_bio *rbio) 112453b381b3SDavid Woodhouse { 112553b381b3SDavid Woodhouse struct bio *bio; 112653b381b3SDavid Woodhouse u64 start; 112753b381b3SDavid Woodhouse unsigned long stripe_offset; 112853b381b3SDavid Woodhouse unsigned long page_index; 112953b381b3SDavid Woodhouse 113053b381b3SDavid Woodhouse spin_lock_irq(&rbio->bio_list_lock); 113153b381b3SDavid Woodhouse bio_list_for_each(bio, &rbio->bio_list) { 11326592e58cSFilipe Manana struct bio_vec bvec; 11336592e58cSFilipe Manana struct bvec_iter iter; 11346592e58cSFilipe Manana int i = 0; 11356592e58cSFilipe Manana 11361201b58bSDavid Sterba start = bio->bi_iter.bi_sector << 9; 11374c664611SQu Wenruo stripe_offset = start - rbio->bioc->raid_map[0]; 113809cbfeafSKirill A. Shutemov page_index = stripe_offset >> PAGE_SHIFT; 113953b381b3SDavid Woodhouse 11406592e58cSFilipe Manana if (bio_flagged(bio, BIO_CLONED)) 1141c3a3b19bSQu Wenruo bio->bi_iter = btrfs_bio(bio)->iter; 11426592e58cSFilipe Manana 11436592e58cSFilipe Manana bio_for_each_segment(bvec, bio, iter) { 11446592e58cSFilipe Manana rbio->bio_pages[page_index + i] = bvec.bv_page; 11456592e58cSFilipe Manana i++; 11466592e58cSFilipe Manana } 114753b381b3SDavid Woodhouse } 114853b381b3SDavid Woodhouse spin_unlock_irq(&rbio->bio_list_lock); 114953b381b3SDavid Woodhouse } 115053b381b3SDavid Woodhouse 115153b381b3SDavid Woodhouse /* 115253b381b3SDavid Woodhouse * this is called from one of two situations. We either 115353b381b3SDavid Woodhouse * have a full stripe from the higher layers, or we've read all 115453b381b3SDavid Woodhouse * the missing bits off disk. 115553b381b3SDavid Woodhouse * 115653b381b3SDavid Woodhouse * This will calculate the parity and then send down any 115753b381b3SDavid Woodhouse * changed blocks. 115853b381b3SDavid Woodhouse */ 115953b381b3SDavid Woodhouse static noinline void finish_rmw(struct btrfs_raid_bio *rbio) 116053b381b3SDavid Woodhouse { 11614c664611SQu Wenruo struct btrfs_io_context *bioc = rbio->bioc; 11621389053eSKees Cook void **pointers = rbio->finish_pointers; 116353b381b3SDavid Woodhouse int nr_data = rbio->nr_data; 116453b381b3SDavid Woodhouse int stripe; 116553b381b3SDavid Woodhouse int pagenr; 1166c17af965SDavid Sterba bool has_qstripe; 116753b381b3SDavid Woodhouse struct bio_list bio_list; 116853b381b3SDavid Woodhouse struct bio *bio; 116953b381b3SDavid Woodhouse int ret; 117053b381b3SDavid Woodhouse 117153b381b3SDavid Woodhouse bio_list_init(&bio_list); 117253b381b3SDavid Woodhouse 1173c17af965SDavid Sterba if (rbio->real_stripes - rbio->nr_data == 1) 1174c17af965SDavid Sterba has_qstripe = false; 1175c17af965SDavid Sterba else if (rbio->real_stripes - rbio->nr_data == 2) 1176c17af965SDavid Sterba has_qstripe = true; 1177c17af965SDavid Sterba else 117853b381b3SDavid Woodhouse BUG(); 117953b381b3SDavid Woodhouse 118053b381b3SDavid Woodhouse /* at this point we either have a full stripe, 118153b381b3SDavid Woodhouse * or we've read the full stripe from the drive. 118253b381b3SDavid Woodhouse * recalculate the parity and write the new results. 118353b381b3SDavid Woodhouse * 118453b381b3SDavid Woodhouse * We're not allowed to add any new bios to the 118553b381b3SDavid Woodhouse * bio list here, anyone else that wants to 118653b381b3SDavid Woodhouse * change this stripe needs to do their own rmw. 118753b381b3SDavid Woodhouse */ 118853b381b3SDavid Woodhouse spin_lock_irq(&rbio->bio_list_lock); 118953b381b3SDavid Woodhouse set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 119053b381b3SDavid Woodhouse spin_unlock_irq(&rbio->bio_list_lock); 119153b381b3SDavid Woodhouse 1192b89e1b01SMiao Xie atomic_set(&rbio->error, 0); 119353b381b3SDavid Woodhouse 119453b381b3SDavid Woodhouse /* 119553b381b3SDavid Woodhouse * now that we've set rmw_locked, run through the 119653b381b3SDavid Woodhouse * bio list one last time and map the page pointers 11974ae10b3aSChris Mason * 11984ae10b3aSChris Mason * We don't cache full rbios because we're assuming 11994ae10b3aSChris Mason * the higher layers are unlikely to use this area of 12004ae10b3aSChris Mason * the disk again soon. If they do use it again, 12014ae10b3aSChris Mason * hopefully they will send another full bio. 120253b381b3SDavid Woodhouse */ 120353b381b3SDavid Woodhouse index_rbio_pages(rbio); 12044ae10b3aSChris Mason if (!rbio_is_full(rbio)) 12054ae10b3aSChris Mason cache_rbio_pages(rbio); 12064ae10b3aSChris Mason else 12074ae10b3aSChris Mason clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 120853b381b3SDavid Woodhouse 1209915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 121053b381b3SDavid Woodhouse struct page *p; 121153b381b3SDavid Woodhouse /* first collect one page from each data stripe */ 121253b381b3SDavid Woodhouse for (stripe = 0; stripe < nr_data; stripe++) { 121353b381b3SDavid Woodhouse p = page_in_rbio(rbio, stripe, pagenr, 0); 121494a0b58dSIra Weiny pointers[stripe] = kmap_local_page(p); 121553b381b3SDavid Woodhouse } 121653b381b3SDavid Woodhouse 121753b381b3SDavid Woodhouse /* then add the parity stripe */ 121853b381b3SDavid Woodhouse p = rbio_pstripe_page(rbio, pagenr); 121953b381b3SDavid Woodhouse SetPageUptodate(p); 122094a0b58dSIra Weiny pointers[stripe++] = kmap_local_page(p); 122153b381b3SDavid Woodhouse 1222c17af965SDavid Sterba if (has_qstripe) { 122353b381b3SDavid Woodhouse 122453b381b3SDavid Woodhouse /* 122553b381b3SDavid Woodhouse * raid6, add the qstripe and call the 122653b381b3SDavid Woodhouse * library function to fill in our p/q 122753b381b3SDavid Woodhouse */ 122853b381b3SDavid Woodhouse p = rbio_qstripe_page(rbio, pagenr); 122953b381b3SDavid Woodhouse SetPageUptodate(p); 123094a0b58dSIra Weiny pointers[stripe++] = kmap_local_page(p); 123153b381b3SDavid Woodhouse 12322c8cdd6eSMiao Xie raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, 123353b381b3SDavid Woodhouse pointers); 123453b381b3SDavid Woodhouse } else { 123553b381b3SDavid Woodhouse /* raid5 */ 123669d24804SDavid Sterba copy_page(pointers[nr_data], pointers[0]); 123709cbfeafSKirill A. Shutemov run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 123853b381b3SDavid Woodhouse } 123994a0b58dSIra Weiny for (stripe = stripe - 1; stripe >= 0; stripe--) 124094a0b58dSIra Weiny kunmap_local(pointers[stripe]); 124153b381b3SDavid Woodhouse } 124253b381b3SDavid Woodhouse 124353b381b3SDavid Woodhouse /* 124453b381b3SDavid Woodhouse * time to start writing. Make bios for everything from the 124553b381b3SDavid Woodhouse * higher layers (the bio_list in our rbio) and our p/q. Ignore 124653b381b3SDavid Woodhouse * everything else. 124753b381b3SDavid Woodhouse */ 12482c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 1249915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 125053b381b3SDavid Woodhouse struct page *page; 125153b381b3SDavid Woodhouse if (stripe < rbio->nr_data) { 125253b381b3SDavid Woodhouse page = page_in_rbio(rbio, stripe, pagenr, 1); 125353b381b3SDavid Woodhouse if (!page) 125453b381b3SDavid Woodhouse continue; 125553b381b3SDavid Woodhouse } else { 125653b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, stripe, pagenr); 125753b381b3SDavid Woodhouse } 125853b381b3SDavid Woodhouse 125953b381b3SDavid Woodhouse ret = rbio_add_io_page(rbio, &bio_list, 1260*e01bf588SChristoph Hellwig page, stripe, pagenr, rbio->stripe_len, 1261*e01bf588SChristoph Hellwig REQ_OP_WRITE); 126253b381b3SDavid Woodhouse if (ret) 126353b381b3SDavid Woodhouse goto cleanup; 126453b381b3SDavid Woodhouse } 126553b381b3SDavid Woodhouse } 126653b381b3SDavid Woodhouse 12674c664611SQu Wenruo if (likely(!bioc->num_tgtdevs)) 12682c8cdd6eSMiao Xie goto write_data; 12692c8cdd6eSMiao Xie 12702c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 12714c664611SQu Wenruo if (!bioc->tgtdev_map[stripe]) 12722c8cdd6eSMiao Xie continue; 12732c8cdd6eSMiao Xie 1274915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 12752c8cdd6eSMiao Xie struct page *page; 12762c8cdd6eSMiao Xie if (stripe < rbio->nr_data) { 12772c8cdd6eSMiao Xie page = page_in_rbio(rbio, stripe, pagenr, 1); 12782c8cdd6eSMiao Xie if (!page) 12792c8cdd6eSMiao Xie continue; 12802c8cdd6eSMiao Xie } else { 12812c8cdd6eSMiao Xie page = rbio_stripe_page(rbio, stripe, pagenr); 12822c8cdd6eSMiao Xie } 12832c8cdd6eSMiao Xie 12842c8cdd6eSMiao Xie ret = rbio_add_io_page(rbio, &bio_list, page, 12854c664611SQu Wenruo rbio->bioc->tgtdev_map[stripe], 1286*e01bf588SChristoph Hellwig pagenr, rbio->stripe_len, 1287*e01bf588SChristoph Hellwig REQ_OP_WRITE); 12882c8cdd6eSMiao Xie if (ret) 12892c8cdd6eSMiao Xie goto cleanup; 12902c8cdd6eSMiao Xie } 12912c8cdd6eSMiao Xie } 12922c8cdd6eSMiao Xie 12932c8cdd6eSMiao Xie write_data: 1294b89e1b01SMiao Xie atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list)); 1295b89e1b01SMiao Xie BUG_ON(atomic_read(&rbio->stripes_pending) == 0); 129653b381b3SDavid Woodhouse 1297bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 129853b381b3SDavid Woodhouse bio->bi_end_io = raid_write_end_io; 12994e49ea4aSMike Christie 13004e49ea4aSMike Christie submit_bio(bio); 130153b381b3SDavid Woodhouse } 130253b381b3SDavid Woodhouse return; 130353b381b3SDavid Woodhouse 130453b381b3SDavid Woodhouse cleanup: 130558efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 1306785884fcSLiu Bo 1307785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 1308785884fcSLiu Bo bio_put(bio); 130953b381b3SDavid Woodhouse } 131053b381b3SDavid Woodhouse 131153b381b3SDavid Woodhouse /* 131253b381b3SDavid Woodhouse * helper to find the stripe number for a given bio. Used to figure out which 131353b381b3SDavid Woodhouse * stripe has failed. This expects the bio to correspond to a physical disk, 131453b381b3SDavid Woodhouse * so it looks up based on physical sector numbers. 131553b381b3SDavid Woodhouse */ 131653b381b3SDavid Woodhouse static int find_bio_stripe(struct btrfs_raid_bio *rbio, 131753b381b3SDavid Woodhouse struct bio *bio) 131853b381b3SDavid Woodhouse { 13194f024f37SKent Overstreet u64 physical = bio->bi_iter.bi_sector; 132053b381b3SDavid Woodhouse int i; 13214c664611SQu Wenruo struct btrfs_io_stripe *stripe; 132253b381b3SDavid Woodhouse 132353b381b3SDavid Woodhouse physical <<= 9; 132453b381b3SDavid Woodhouse 13254c664611SQu Wenruo for (i = 0; i < rbio->bioc->num_stripes; i++) { 13264c664611SQu Wenruo stripe = &rbio->bioc->stripes[i]; 132783025863SNikolay Borisov if (in_range(physical, stripe->physical, rbio->stripe_len) && 1328309dca30SChristoph Hellwig stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) { 132953b381b3SDavid Woodhouse return i; 133053b381b3SDavid Woodhouse } 133153b381b3SDavid Woodhouse } 133253b381b3SDavid Woodhouse return -1; 133353b381b3SDavid Woodhouse } 133453b381b3SDavid Woodhouse 133553b381b3SDavid Woodhouse /* 133653b381b3SDavid Woodhouse * helper to find the stripe number for a given 133753b381b3SDavid Woodhouse * bio (before mapping). Used to figure out which stripe has 133853b381b3SDavid Woodhouse * failed. This looks up based on logical block numbers. 133953b381b3SDavid Woodhouse */ 134053b381b3SDavid Woodhouse static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio, 134153b381b3SDavid Woodhouse struct bio *bio) 134253b381b3SDavid Woodhouse { 13431201b58bSDavid Sterba u64 logical = bio->bi_iter.bi_sector << 9; 134453b381b3SDavid Woodhouse int i; 134553b381b3SDavid Woodhouse 134653b381b3SDavid Woodhouse for (i = 0; i < rbio->nr_data; i++) { 13474c664611SQu Wenruo u64 stripe_start = rbio->bioc->raid_map[i]; 134883025863SNikolay Borisov 134983025863SNikolay Borisov if (in_range(logical, stripe_start, rbio->stripe_len)) 135053b381b3SDavid Woodhouse return i; 135153b381b3SDavid Woodhouse } 135253b381b3SDavid Woodhouse return -1; 135353b381b3SDavid Woodhouse } 135453b381b3SDavid Woodhouse 135553b381b3SDavid Woodhouse /* 135653b381b3SDavid Woodhouse * returns -EIO if we had too many failures 135753b381b3SDavid Woodhouse */ 135853b381b3SDavid Woodhouse static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed) 135953b381b3SDavid Woodhouse { 136053b381b3SDavid Woodhouse unsigned long flags; 136153b381b3SDavid Woodhouse int ret = 0; 136253b381b3SDavid Woodhouse 136353b381b3SDavid Woodhouse spin_lock_irqsave(&rbio->bio_list_lock, flags); 136453b381b3SDavid Woodhouse 136553b381b3SDavid Woodhouse /* we already know this stripe is bad, move on */ 136653b381b3SDavid Woodhouse if (rbio->faila == failed || rbio->failb == failed) 136753b381b3SDavid Woodhouse goto out; 136853b381b3SDavid Woodhouse 136953b381b3SDavid Woodhouse if (rbio->faila == -1) { 137053b381b3SDavid Woodhouse /* first failure on this rbio */ 137153b381b3SDavid Woodhouse rbio->faila = failed; 1372b89e1b01SMiao Xie atomic_inc(&rbio->error); 137353b381b3SDavid Woodhouse } else if (rbio->failb == -1) { 137453b381b3SDavid Woodhouse /* second failure on this rbio */ 137553b381b3SDavid Woodhouse rbio->failb = failed; 1376b89e1b01SMiao Xie atomic_inc(&rbio->error); 137753b381b3SDavid Woodhouse } else { 137853b381b3SDavid Woodhouse ret = -EIO; 137953b381b3SDavid Woodhouse } 138053b381b3SDavid Woodhouse out: 138153b381b3SDavid Woodhouse spin_unlock_irqrestore(&rbio->bio_list_lock, flags); 138253b381b3SDavid Woodhouse 138353b381b3SDavid Woodhouse return ret; 138453b381b3SDavid Woodhouse } 138553b381b3SDavid Woodhouse 138653b381b3SDavid Woodhouse /* 138753b381b3SDavid Woodhouse * helper to fail a stripe based on a physical disk 138853b381b3SDavid Woodhouse * bio. 138953b381b3SDavid Woodhouse */ 139053b381b3SDavid Woodhouse static int fail_bio_stripe(struct btrfs_raid_bio *rbio, 139153b381b3SDavid Woodhouse struct bio *bio) 139253b381b3SDavid Woodhouse { 139353b381b3SDavid Woodhouse int failed = find_bio_stripe(rbio, bio); 139453b381b3SDavid Woodhouse 139553b381b3SDavid Woodhouse if (failed < 0) 139653b381b3SDavid Woodhouse return -EIO; 139753b381b3SDavid Woodhouse 139853b381b3SDavid Woodhouse return fail_rbio_index(rbio, failed); 139953b381b3SDavid Woodhouse } 140053b381b3SDavid Woodhouse 140153b381b3SDavid Woodhouse /* 140253b381b3SDavid Woodhouse * this sets each page in the bio uptodate. It should only be used on private 140353b381b3SDavid Woodhouse * rbio pages, nothing that comes in from the higher layers 140453b381b3SDavid Woodhouse */ 140553b381b3SDavid Woodhouse static void set_bio_pages_uptodate(struct bio *bio) 140653b381b3SDavid Woodhouse { 14070198e5b7SLiu Bo struct bio_vec *bvec; 14086dc4f100SMing Lei struct bvec_iter_all iter_all; 140953b381b3SDavid Woodhouse 14100198e5b7SLiu Bo ASSERT(!bio_flagged(bio, BIO_CLONED)); 14116592e58cSFilipe Manana 14122b070cfeSChristoph Hellwig bio_for_each_segment_all(bvec, bio, iter_all) 14130198e5b7SLiu Bo SetPageUptodate(bvec->bv_page); 141453b381b3SDavid Woodhouse } 141553b381b3SDavid Woodhouse 141653b381b3SDavid Woodhouse /* 141753b381b3SDavid Woodhouse * end io for the read phase of the rmw cycle. All the bios here are physical 141853b381b3SDavid Woodhouse * stripe bios we've read from the disk so we can recalculate the parity of the 141953b381b3SDavid Woodhouse * stripe. 142053b381b3SDavid Woodhouse * 142153b381b3SDavid Woodhouse * This will usually kick off finish_rmw once all the bios are read in, but it 142253b381b3SDavid Woodhouse * may trigger parity reconstruction if we had any errors along the way 142353b381b3SDavid Woodhouse */ 14244246a0b6SChristoph Hellwig static void raid_rmw_end_io(struct bio *bio) 142553b381b3SDavid Woodhouse { 142653b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio = bio->bi_private; 142753b381b3SDavid Woodhouse 14284e4cbee9SChristoph Hellwig if (bio->bi_status) 142953b381b3SDavid Woodhouse fail_bio_stripe(rbio, bio); 143053b381b3SDavid Woodhouse else 143153b381b3SDavid Woodhouse set_bio_pages_uptodate(bio); 143253b381b3SDavid Woodhouse 143353b381b3SDavid Woodhouse bio_put(bio); 143453b381b3SDavid Woodhouse 1435b89e1b01SMiao Xie if (!atomic_dec_and_test(&rbio->stripes_pending)) 143653b381b3SDavid Woodhouse return; 143753b381b3SDavid Woodhouse 14384c664611SQu Wenruo if (atomic_read(&rbio->error) > rbio->bioc->max_errors) 143953b381b3SDavid Woodhouse goto cleanup; 144053b381b3SDavid Woodhouse 144153b381b3SDavid Woodhouse /* 144253b381b3SDavid Woodhouse * this will normally call finish_rmw to start our write 144353b381b3SDavid Woodhouse * but if there are any failed stripes we'll reconstruct 144453b381b3SDavid Woodhouse * from parity first 144553b381b3SDavid Woodhouse */ 144653b381b3SDavid Woodhouse validate_rbio_for_rmw(rbio); 144753b381b3SDavid Woodhouse return; 144853b381b3SDavid Woodhouse 144953b381b3SDavid Woodhouse cleanup: 145053b381b3SDavid Woodhouse 145158efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 145253b381b3SDavid Woodhouse } 145353b381b3SDavid Woodhouse 145453b381b3SDavid Woodhouse /* 145553b381b3SDavid Woodhouse * the stripe must be locked by the caller. It will 145653b381b3SDavid Woodhouse * unlock after all the writes are done 145753b381b3SDavid Woodhouse */ 145853b381b3SDavid Woodhouse static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) 145953b381b3SDavid Woodhouse { 146053b381b3SDavid Woodhouse int bios_to_read = 0; 146153b381b3SDavid Woodhouse struct bio_list bio_list; 146253b381b3SDavid Woodhouse int ret; 146353b381b3SDavid Woodhouse int pagenr; 146453b381b3SDavid Woodhouse int stripe; 146553b381b3SDavid Woodhouse struct bio *bio; 146653b381b3SDavid Woodhouse 146753b381b3SDavid Woodhouse bio_list_init(&bio_list); 146853b381b3SDavid Woodhouse 146953b381b3SDavid Woodhouse ret = alloc_rbio_pages(rbio); 147053b381b3SDavid Woodhouse if (ret) 147153b381b3SDavid Woodhouse goto cleanup; 147253b381b3SDavid Woodhouse 147353b381b3SDavid Woodhouse index_rbio_pages(rbio); 147453b381b3SDavid Woodhouse 1475b89e1b01SMiao Xie atomic_set(&rbio->error, 0); 147653b381b3SDavid Woodhouse /* 147753b381b3SDavid Woodhouse * build a list of bios to read all the missing parts of this 147853b381b3SDavid Woodhouse * stripe 147953b381b3SDavid Woodhouse */ 148053b381b3SDavid Woodhouse for (stripe = 0; stripe < rbio->nr_data; stripe++) { 1481915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 148253b381b3SDavid Woodhouse struct page *page; 148353b381b3SDavid Woodhouse /* 148453b381b3SDavid Woodhouse * we want to find all the pages missing from 148553b381b3SDavid Woodhouse * the rbio and read them from the disk. If 148653b381b3SDavid Woodhouse * page_in_rbio finds a page in the bio list 148753b381b3SDavid Woodhouse * we don't need to read it off the stripe. 148853b381b3SDavid Woodhouse */ 148953b381b3SDavid Woodhouse page = page_in_rbio(rbio, stripe, pagenr, 1); 149053b381b3SDavid Woodhouse if (page) 149153b381b3SDavid Woodhouse continue; 149253b381b3SDavid Woodhouse 149353b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, stripe, pagenr); 14944ae10b3aSChris Mason /* 14954ae10b3aSChris Mason * the bio cache may have handed us an uptodate 14964ae10b3aSChris Mason * page. If so, be happy and use it 14974ae10b3aSChris Mason */ 14984ae10b3aSChris Mason if (PageUptodate(page)) 14994ae10b3aSChris Mason continue; 15004ae10b3aSChris Mason 150153b381b3SDavid Woodhouse ret = rbio_add_io_page(rbio, &bio_list, page, 1502*e01bf588SChristoph Hellwig stripe, pagenr, rbio->stripe_len, 1503*e01bf588SChristoph Hellwig REQ_OP_READ); 150453b381b3SDavid Woodhouse if (ret) 150553b381b3SDavid Woodhouse goto cleanup; 150653b381b3SDavid Woodhouse } 150753b381b3SDavid Woodhouse } 150853b381b3SDavid Woodhouse 150953b381b3SDavid Woodhouse bios_to_read = bio_list_size(&bio_list); 151053b381b3SDavid Woodhouse if (!bios_to_read) { 151153b381b3SDavid Woodhouse /* 151253b381b3SDavid Woodhouse * this can happen if others have merged with 151353b381b3SDavid Woodhouse * us, it means there is nothing left to read. 151453b381b3SDavid Woodhouse * But if there are missing devices it may not be 151553b381b3SDavid Woodhouse * safe to do the full stripe write yet. 151653b381b3SDavid Woodhouse */ 151753b381b3SDavid Woodhouse goto finish; 151853b381b3SDavid Woodhouse } 151953b381b3SDavid Woodhouse 152053b381b3SDavid Woodhouse /* 15214c664611SQu Wenruo * The bioc may be freed once we submit the last bio. Make sure not to 15224c664611SQu Wenruo * touch it after that. 152353b381b3SDavid Woodhouse */ 1524b89e1b01SMiao Xie atomic_set(&rbio->stripes_pending, bios_to_read); 1525bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 152653b381b3SDavid Woodhouse bio->bi_end_io = raid_rmw_end_io; 152753b381b3SDavid Woodhouse 15286a258d72SQu Wenruo btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 152953b381b3SDavid Woodhouse 15304e49ea4aSMike Christie submit_bio(bio); 153153b381b3SDavid Woodhouse } 153253b381b3SDavid Woodhouse /* the actual write will happen once the reads are done */ 153353b381b3SDavid Woodhouse return 0; 153453b381b3SDavid Woodhouse 153553b381b3SDavid Woodhouse cleanup: 153658efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 1537785884fcSLiu Bo 1538785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 1539785884fcSLiu Bo bio_put(bio); 1540785884fcSLiu Bo 154153b381b3SDavid Woodhouse return -EIO; 154253b381b3SDavid Woodhouse 154353b381b3SDavid Woodhouse finish: 154453b381b3SDavid Woodhouse validate_rbio_for_rmw(rbio); 154553b381b3SDavid Woodhouse return 0; 154653b381b3SDavid Woodhouse } 154753b381b3SDavid Woodhouse 154853b381b3SDavid Woodhouse /* 154953b381b3SDavid Woodhouse * if the upper layers pass in a full stripe, we thank them by only allocating 155053b381b3SDavid Woodhouse * enough pages to hold the parity, and sending it all down quickly. 155153b381b3SDavid Woodhouse */ 155253b381b3SDavid Woodhouse static int full_stripe_write(struct btrfs_raid_bio *rbio) 155353b381b3SDavid Woodhouse { 155453b381b3SDavid Woodhouse int ret; 155553b381b3SDavid Woodhouse 155653b381b3SDavid Woodhouse ret = alloc_rbio_parity_pages(rbio); 15573cd846d1SMiao Xie if (ret) { 15583cd846d1SMiao Xie __free_raid_bio(rbio); 155953b381b3SDavid Woodhouse return ret; 15603cd846d1SMiao Xie } 156153b381b3SDavid Woodhouse 156253b381b3SDavid Woodhouse ret = lock_stripe_add(rbio); 156353b381b3SDavid Woodhouse if (ret == 0) 156453b381b3SDavid Woodhouse finish_rmw(rbio); 156553b381b3SDavid Woodhouse return 0; 156653b381b3SDavid Woodhouse } 156753b381b3SDavid Woodhouse 156853b381b3SDavid Woodhouse /* 156953b381b3SDavid Woodhouse * partial stripe writes get handed over to async helpers. 157053b381b3SDavid Woodhouse * We're really hoping to merge a few more writes into this 157153b381b3SDavid Woodhouse * rbio before calculating new parity 157253b381b3SDavid Woodhouse */ 157353b381b3SDavid Woodhouse static int partial_stripe_write(struct btrfs_raid_bio *rbio) 157453b381b3SDavid Woodhouse { 157553b381b3SDavid Woodhouse int ret; 157653b381b3SDavid Woodhouse 157753b381b3SDavid Woodhouse ret = lock_stripe_add(rbio); 157853b381b3SDavid Woodhouse if (ret == 0) 1579cf6a4a75SDavid Sterba start_async_work(rbio, rmw_work); 158053b381b3SDavid Woodhouse return 0; 158153b381b3SDavid Woodhouse } 158253b381b3SDavid Woodhouse 158353b381b3SDavid Woodhouse /* 158453b381b3SDavid Woodhouse * sometimes while we were reading from the drive to 158553b381b3SDavid Woodhouse * recalculate parity, enough new bios come into create 158653b381b3SDavid Woodhouse * a full stripe. So we do a check here to see if we can 158753b381b3SDavid Woodhouse * go directly to finish_rmw 158853b381b3SDavid Woodhouse */ 158953b381b3SDavid Woodhouse static int __raid56_parity_write(struct btrfs_raid_bio *rbio) 159053b381b3SDavid Woodhouse { 159153b381b3SDavid Woodhouse /* head off into rmw land if we don't have a full stripe */ 159253b381b3SDavid Woodhouse if (!rbio_is_full(rbio)) 159353b381b3SDavid Woodhouse return partial_stripe_write(rbio); 159453b381b3SDavid Woodhouse return full_stripe_write(rbio); 159553b381b3SDavid Woodhouse } 159653b381b3SDavid Woodhouse 159753b381b3SDavid Woodhouse /* 15986ac0f488SChris Mason * We use plugging call backs to collect full stripes. 15996ac0f488SChris Mason * Any time we get a partial stripe write while plugged 16006ac0f488SChris Mason * we collect it into a list. When the unplug comes down, 16016ac0f488SChris Mason * we sort the list by logical block number and merge 16026ac0f488SChris Mason * everything we can into the same rbios 16036ac0f488SChris Mason */ 16046ac0f488SChris Mason struct btrfs_plug_cb { 16056ac0f488SChris Mason struct blk_plug_cb cb; 16066ac0f488SChris Mason struct btrfs_fs_info *info; 16076ac0f488SChris Mason struct list_head rbio_list; 16086ac0f488SChris Mason struct btrfs_work work; 16096ac0f488SChris Mason }; 16106ac0f488SChris Mason 16116ac0f488SChris Mason /* 16126ac0f488SChris Mason * rbios on the plug list are sorted for easier merging. 16136ac0f488SChris Mason */ 16144f0f586bSSami Tolvanen static int plug_cmp(void *priv, const struct list_head *a, 16154f0f586bSSami Tolvanen const struct list_head *b) 16166ac0f488SChris Mason { 1617214cc184SDavid Sterba const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio, 16186ac0f488SChris Mason plug_list); 1619214cc184SDavid Sterba const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio, 16206ac0f488SChris Mason plug_list); 16214f024f37SKent Overstreet u64 a_sector = ra->bio_list.head->bi_iter.bi_sector; 16224f024f37SKent Overstreet u64 b_sector = rb->bio_list.head->bi_iter.bi_sector; 16236ac0f488SChris Mason 16246ac0f488SChris Mason if (a_sector < b_sector) 16256ac0f488SChris Mason return -1; 16266ac0f488SChris Mason if (a_sector > b_sector) 16276ac0f488SChris Mason return 1; 16286ac0f488SChris Mason return 0; 16296ac0f488SChris Mason } 16306ac0f488SChris Mason 16316ac0f488SChris Mason static void run_plug(struct btrfs_plug_cb *plug) 16326ac0f488SChris Mason { 16336ac0f488SChris Mason struct btrfs_raid_bio *cur; 16346ac0f488SChris Mason struct btrfs_raid_bio *last = NULL; 16356ac0f488SChris Mason 16366ac0f488SChris Mason /* 16376ac0f488SChris Mason * sort our plug list then try to merge 16386ac0f488SChris Mason * everything we can in hopes of creating full 16396ac0f488SChris Mason * stripes. 16406ac0f488SChris Mason */ 16416ac0f488SChris Mason list_sort(NULL, &plug->rbio_list, plug_cmp); 16426ac0f488SChris Mason while (!list_empty(&plug->rbio_list)) { 16436ac0f488SChris Mason cur = list_entry(plug->rbio_list.next, 16446ac0f488SChris Mason struct btrfs_raid_bio, plug_list); 16456ac0f488SChris Mason list_del_init(&cur->plug_list); 16466ac0f488SChris Mason 16476ac0f488SChris Mason if (rbio_is_full(cur)) { 1648c7b562c5SDavid Sterba int ret; 1649c7b562c5SDavid Sterba 16506ac0f488SChris Mason /* we have a full stripe, send it down */ 1651c7b562c5SDavid Sterba ret = full_stripe_write(cur); 1652c7b562c5SDavid Sterba BUG_ON(ret); 16536ac0f488SChris Mason continue; 16546ac0f488SChris Mason } 16556ac0f488SChris Mason if (last) { 16566ac0f488SChris Mason if (rbio_can_merge(last, cur)) { 16576ac0f488SChris Mason merge_rbio(last, cur); 16586ac0f488SChris Mason __free_raid_bio(cur); 16596ac0f488SChris Mason continue; 16606ac0f488SChris Mason 16616ac0f488SChris Mason } 16626ac0f488SChris Mason __raid56_parity_write(last); 16636ac0f488SChris Mason } 16646ac0f488SChris Mason last = cur; 16656ac0f488SChris Mason } 16666ac0f488SChris Mason if (last) { 16676ac0f488SChris Mason __raid56_parity_write(last); 16686ac0f488SChris Mason } 16696ac0f488SChris Mason kfree(plug); 16706ac0f488SChris Mason } 16716ac0f488SChris Mason 16726ac0f488SChris Mason /* 16736ac0f488SChris Mason * if the unplug comes from schedule, we have to push the 16746ac0f488SChris Mason * work off to a helper thread 16756ac0f488SChris Mason */ 16766ac0f488SChris Mason static void unplug_work(struct btrfs_work *work) 16776ac0f488SChris Mason { 16786ac0f488SChris Mason struct btrfs_plug_cb *plug; 16796ac0f488SChris Mason plug = container_of(work, struct btrfs_plug_cb, work); 16806ac0f488SChris Mason run_plug(plug); 16816ac0f488SChris Mason } 16826ac0f488SChris Mason 16836ac0f488SChris Mason static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) 16846ac0f488SChris Mason { 16856ac0f488SChris Mason struct btrfs_plug_cb *plug; 16866ac0f488SChris Mason plug = container_of(cb, struct btrfs_plug_cb, cb); 16876ac0f488SChris Mason 16886ac0f488SChris Mason if (from_schedule) { 1689a0cac0ecSOmar Sandoval btrfs_init_work(&plug->work, unplug_work, NULL, NULL); 1690d05a33acSQu Wenruo btrfs_queue_work(plug->info->rmw_workers, 16916ac0f488SChris Mason &plug->work); 16926ac0f488SChris Mason return; 16936ac0f488SChris Mason } 16946ac0f488SChris Mason run_plug(plug); 16956ac0f488SChris Mason } 16966ac0f488SChris Mason 16976ac0f488SChris Mason /* 169853b381b3SDavid Woodhouse * our main entry point for writes from the rest of the FS. 169953b381b3SDavid Woodhouse */ 17006a258d72SQu Wenruo int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, 17016a258d72SQu Wenruo u64 stripe_len) 170253b381b3SDavid Woodhouse { 17036a258d72SQu Wenruo struct btrfs_fs_info *fs_info = bioc->fs_info; 170453b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 17056ac0f488SChris Mason struct btrfs_plug_cb *plug = NULL; 17066ac0f488SChris Mason struct blk_plug_cb *cb; 17074245215dSMiao Xie int ret; 170853b381b3SDavid Woodhouse 17094c664611SQu Wenruo rbio = alloc_rbio(fs_info, bioc, stripe_len); 1710af8e2d1dSMiao Xie if (IS_ERR(rbio)) { 17114c664611SQu Wenruo btrfs_put_bioc(bioc); 171253b381b3SDavid Woodhouse return PTR_ERR(rbio); 1713af8e2d1dSMiao Xie } 171453b381b3SDavid Woodhouse bio_list_add(&rbio->bio_list, bio); 17154f024f37SKent Overstreet rbio->bio_list_bytes = bio->bi_iter.bi_size; 17161b94b556SMiao Xie rbio->operation = BTRFS_RBIO_WRITE; 17176ac0f488SChris Mason 17180b246afaSJeff Mahoney btrfs_bio_counter_inc_noblocked(fs_info); 17194245215dSMiao Xie rbio->generic_bio_cnt = 1; 17204245215dSMiao Xie 17216ac0f488SChris Mason /* 17226ac0f488SChris Mason * don't plug on full rbios, just get them out the door 17236ac0f488SChris Mason * as quickly as we can 17246ac0f488SChris Mason */ 17254245215dSMiao Xie if (rbio_is_full(rbio)) { 17264245215dSMiao Xie ret = full_stripe_write(rbio); 17274245215dSMiao Xie if (ret) 17280b246afaSJeff Mahoney btrfs_bio_counter_dec(fs_info); 17294245215dSMiao Xie return ret; 17304245215dSMiao Xie } 17316ac0f488SChris Mason 17320b246afaSJeff Mahoney cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug)); 17336ac0f488SChris Mason if (cb) { 17346ac0f488SChris Mason plug = container_of(cb, struct btrfs_plug_cb, cb); 17356ac0f488SChris Mason if (!plug->info) { 17360b246afaSJeff Mahoney plug->info = fs_info; 17376ac0f488SChris Mason INIT_LIST_HEAD(&plug->rbio_list); 17386ac0f488SChris Mason } 17396ac0f488SChris Mason list_add_tail(&rbio->plug_list, &plug->rbio_list); 17404245215dSMiao Xie ret = 0; 17416ac0f488SChris Mason } else { 17424245215dSMiao Xie ret = __raid56_parity_write(rbio); 17434245215dSMiao Xie if (ret) 17440b246afaSJeff Mahoney btrfs_bio_counter_dec(fs_info); 174553b381b3SDavid Woodhouse } 17464245215dSMiao Xie return ret; 17476ac0f488SChris Mason } 174853b381b3SDavid Woodhouse 174953b381b3SDavid Woodhouse /* 175053b381b3SDavid Woodhouse * all parity reconstruction happens here. We've read in everything 175153b381b3SDavid Woodhouse * we can find from the drives and this does the heavy lifting of 175253b381b3SDavid Woodhouse * sorting the good from the bad. 175353b381b3SDavid Woodhouse */ 175453b381b3SDavid Woodhouse static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) 175553b381b3SDavid Woodhouse { 175653b381b3SDavid Woodhouse int pagenr, stripe; 175753b381b3SDavid Woodhouse void **pointers; 175894a0b58dSIra Weiny void **unmap_array; 175953b381b3SDavid Woodhouse int faila = -1, failb = -1; 176053b381b3SDavid Woodhouse struct page *page; 176158efbc9fSOmar Sandoval blk_status_t err; 176253b381b3SDavid Woodhouse int i; 176353b381b3SDavid Woodhouse 176431e818feSDavid Sterba pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); 176553b381b3SDavid Woodhouse if (!pointers) { 176658efbc9fSOmar Sandoval err = BLK_STS_RESOURCE; 176753b381b3SDavid Woodhouse goto cleanup_io; 176853b381b3SDavid Woodhouse } 176953b381b3SDavid Woodhouse 177094a0b58dSIra Weiny /* 177194a0b58dSIra Weiny * Store copy of pointers that does not get reordered during 177294a0b58dSIra Weiny * reconstruction so that kunmap_local works. 177394a0b58dSIra Weiny */ 177494a0b58dSIra Weiny unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); 177594a0b58dSIra Weiny if (!unmap_array) { 177694a0b58dSIra Weiny err = BLK_STS_RESOURCE; 177794a0b58dSIra Weiny goto cleanup_pointers; 177894a0b58dSIra Weiny } 177994a0b58dSIra Weiny 178053b381b3SDavid Woodhouse faila = rbio->faila; 178153b381b3SDavid Woodhouse failb = rbio->failb; 178253b381b3SDavid Woodhouse 1783b4ee1782SOmar Sandoval if (rbio->operation == BTRFS_RBIO_READ_REBUILD || 1784b4ee1782SOmar Sandoval rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { 178553b381b3SDavid Woodhouse spin_lock_irq(&rbio->bio_list_lock); 178653b381b3SDavid Woodhouse set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 178753b381b3SDavid Woodhouse spin_unlock_irq(&rbio->bio_list_lock); 178853b381b3SDavid Woodhouse } 178953b381b3SDavid Woodhouse 179053b381b3SDavid Woodhouse index_rbio_pages(rbio); 179153b381b3SDavid Woodhouse 1792915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 17935a6ac9eaSMiao Xie /* 17945a6ac9eaSMiao Xie * Now we just use bitmap to mark the horizontal stripes in 17955a6ac9eaSMiao Xie * which we have data when doing parity scrub. 17965a6ac9eaSMiao Xie */ 17975a6ac9eaSMiao Xie if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && 17985a6ac9eaSMiao Xie !test_bit(pagenr, rbio->dbitmap)) 17995a6ac9eaSMiao Xie continue; 18005a6ac9eaSMiao Xie 180194a0b58dSIra Weiny /* 180294a0b58dSIra Weiny * Setup our array of pointers with pages from each stripe 180394a0b58dSIra Weiny * 180494a0b58dSIra Weiny * NOTE: store a duplicate array of pointers to preserve the 180594a0b58dSIra Weiny * pointer order 180653b381b3SDavid Woodhouse */ 18072c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 180853b381b3SDavid Woodhouse /* 180953b381b3SDavid Woodhouse * if we're rebuilding a read, we have to use 181053b381b3SDavid Woodhouse * pages from the bio list 181153b381b3SDavid Woodhouse */ 1812b4ee1782SOmar Sandoval if ((rbio->operation == BTRFS_RBIO_READ_REBUILD || 1813b4ee1782SOmar Sandoval rbio->operation == BTRFS_RBIO_REBUILD_MISSING) && 181453b381b3SDavid Woodhouse (stripe == faila || stripe == failb)) { 181553b381b3SDavid Woodhouse page = page_in_rbio(rbio, stripe, pagenr, 0); 181653b381b3SDavid Woodhouse } else { 181753b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, stripe, pagenr); 181853b381b3SDavid Woodhouse } 181994a0b58dSIra Weiny pointers[stripe] = kmap_local_page(page); 182094a0b58dSIra Weiny unmap_array[stripe] = pointers[stripe]; 182153b381b3SDavid Woodhouse } 182253b381b3SDavid Woodhouse 182353b381b3SDavid Woodhouse /* all raid6 handling here */ 18244c664611SQu Wenruo if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) { 182553b381b3SDavid Woodhouse /* 182653b381b3SDavid Woodhouse * single failure, rebuild from parity raid5 182753b381b3SDavid Woodhouse * style 182853b381b3SDavid Woodhouse */ 182953b381b3SDavid Woodhouse if (failb < 0) { 183053b381b3SDavid Woodhouse if (faila == rbio->nr_data) { 183153b381b3SDavid Woodhouse /* 183253b381b3SDavid Woodhouse * Just the P stripe has failed, without 183353b381b3SDavid Woodhouse * a bad data or Q stripe. 183453b381b3SDavid Woodhouse * TODO, we should redo the xor here. 183553b381b3SDavid Woodhouse */ 183658efbc9fSOmar Sandoval err = BLK_STS_IOERR; 183753b381b3SDavid Woodhouse goto cleanup; 183853b381b3SDavid Woodhouse } 183953b381b3SDavid Woodhouse /* 184053b381b3SDavid Woodhouse * a single failure in raid6 is rebuilt 184153b381b3SDavid Woodhouse * in the pstripe code below 184253b381b3SDavid Woodhouse */ 184353b381b3SDavid Woodhouse goto pstripe; 184453b381b3SDavid Woodhouse } 184553b381b3SDavid Woodhouse 184653b381b3SDavid Woodhouse /* make sure our ps and qs are in order */ 1847b7d2083aSNikolay Borisov if (faila > failb) 1848b7d2083aSNikolay Borisov swap(faila, failb); 184953b381b3SDavid Woodhouse 185053b381b3SDavid Woodhouse /* if the q stripe is failed, do a pstripe reconstruction 185153b381b3SDavid Woodhouse * from the xors. 185253b381b3SDavid Woodhouse * If both the q stripe and the P stripe are failed, we're 185353b381b3SDavid Woodhouse * here due to a crc mismatch and we can't give them the 185453b381b3SDavid Woodhouse * data they want 185553b381b3SDavid Woodhouse */ 18564c664611SQu Wenruo if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) { 18574c664611SQu Wenruo if (rbio->bioc->raid_map[faila] == 18588e5cfb55SZhao Lei RAID5_P_STRIPE) { 185958efbc9fSOmar Sandoval err = BLK_STS_IOERR; 186053b381b3SDavid Woodhouse goto cleanup; 186153b381b3SDavid Woodhouse } 186253b381b3SDavid Woodhouse /* 186353b381b3SDavid Woodhouse * otherwise we have one bad data stripe and 186453b381b3SDavid Woodhouse * a good P stripe. raid5! 186553b381b3SDavid Woodhouse */ 186653b381b3SDavid Woodhouse goto pstripe; 186753b381b3SDavid Woodhouse } 186853b381b3SDavid Woodhouse 18694c664611SQu Wenruo if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) { 18702c8cdd6eSMiao Xie raid6_datap_recov(rbio->real_stripes, 187153b381b3SDavid Woodhouse PAGE_SIZE, faila, pointers); 187253b381b3SDavid Woodhouse } else { 18732c8cdd6eSMiao Xie raid6_2data_recov(rbio->real_stripes, 187453b381b3SDavid Woodhouse PAGE_SIZE, faila, failb, 187553b381b3SDavid Woodhouse pointers); 187653b381b3SDavid Woodhouse } 187753b381b3SDavid Woodhouse } else { 187853b381b3SDavid Woodhouse void *p; 187953b381b3SDavid Woodhouse 188053b381b3SDavid Woodhouse /* rebuild from P stripe here (raid5 or raid6) */ 188153b381b3SDavid Woodhouse BUG_ON(failb != -1); 188253b381b3SDavid Woodhouse pstripe: 188353b381b3SDavid Woodhouse /* Copy parity block into failed block to start with */ 188469d24804SDavid Sterba copy_page(pointers[faila], pointers[rbio->nr_data]); 188553b381b3SDavid Woodhouse 188653b381b3SDavid Woodhouse /* rearrange the pointer array */ 188753b381b3SDavid Woodhouse p = pointers[faila]; 188853b381b3SDavid Woodhouse for (stripe = faila; stripe < rbio->nr_data - 1; stripe++) 188953b381b3SDavid Woodhouse pointers[stripe] = pointers[stripe + 1]; 189053b381b3SDavid Woodhouse pointers[rbio->nr_data - 1] = p; 189153b381b3SDavid Woodhouse 189253b381b3SDavid Woodhouse /* xor in the rest */ 189309cbfeafSKirill A. Shutemov run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE); 189453b381b3SDavid Woodhouse } 189553b381b3SDavid Woodhouse /* if we're doing this rebuild as part of an rmw, go through 189653b381b3SDavid Woodhouse * and set all of our private rbio pages in the 189753b381b3SDavid Woodhouse * failed stripes as uptodate. This way finish_rmw will 189853b381b3SDavid Woodhouse * know they can be trusted. If this was a read reconstruction, 189953b381b3SDavid Woodhouse * other endio functions will fiddle the uptodate bits 190053b381b3SDavid Woodhouse */ 19011b94b556SMiao Xie if (rbio->operation == BTRFS_RBIO_WRITE) { 1902915e2290SZhao Lei for (i = 0; i < rbio->stripe_npages; i++) { 190353b381b3SDavid Woodhouse if (faila != -1) { 190453b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, faila, i); 190553b381b3SDavid Woodhouse SetPageUptodate(page); 190653b381b3SDavid Woodhouse } 190753b381b3SDavid Woodhouse if (failb != -1) { 190853b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, failb, i); 190953b381b3SDavid Woodhouse SetPageUptodate(page); 191053b381b3SDavid Woodhouse } 191153b381b3SDavid Woodhouse } 191253b381b3SDavid Woodhouse } 191394a0b58dSIra Weiny for (stripe = rbio->real_stripes - 1; stripe >= 0; stripe--) 191494a0b58dSIra Weiny kunmap_local(unmap_array[stripe]); 191553b381b3SDavid Woodhouse } 191653b381b3SDavid Woodhouse 191758efbc9fSOmar Sandoval err = BLK_STS_OK; 191853b381b3SDavid Woodhouse cleanup: 191994a0b58dSIra Weiny kfree(unmap_array); 192094a0b58dSIra Weiny cleanup_pointers: 192153b381b3SDavid Woodhouse kfree(pointers); 192253b381b3SDavid Woodhouse 192353b381b3SDavid Woodhouse cleanup_io: 1924580c6efaSLiu Bo /* 1925580c6efaSLiu Bo * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a 1926580c6efaSLiu Bo * valid rbio which is consistent with ondisk content, thus such a 1927580c6efaSLiu Bo * valid rbio can be cached to avoid further disk reads. 1928580c6efaSLiu Bo */ 1929580c6efaSLiu Bo if (rbio->operation == BTRFS_RBIO_READ_REBUILD || 1930580c6efaSLiu Bo rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { 193144ac474dSLiu Bo /* 193244ac474dSLiu Bo * - In case of two failures, where rbio->failb != -1: 193344ac474dSLiu Bo * 193444ac474dSLiu Bo * Do not cache this rbio since the above read reconstruction 193544ac474dSLiu Bo * (raid6_datap_recov() or raid6_2data_recov()) may have 193644ac474dSLiu Bo * changed some content of stripes which are not identical to 193744ac474dSLiu Bo * on-disk content any more, otherwise, a later write/recover 193844ac474dSLiu Bo * may steal stripe_pages from this rbio and end up with 193944ac474dSLiu Bo * corruptions or rebuild failures. 194044ac474dSLiu Bo * 194144ac474dSLiu Bo * - In case of single failure, where rbio->failb == -1: 194244ac474dSLiu Bo * 194344ac474dSLiu Bo * Cache this rbio iff the above read reconstruction is 194452042d8eSAndrea Gelmini * executed without problems. 194544ac474dSLiu Bo */ 194644ac474dSLiu Bo if (err == BLK_STS_OK && rbio->failb < 0) 19474ae10b3aSChris Mason cache_rbio_pages(rbio); 19484ae10b3aSChris Mason else 19494ae10b3aSChris Mason clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 19504ae10b3aSChris Mason 19514246a0b6SChristoph Hellwig rbio_orig_end_io(rbio, err); 195258efbc9fSOmar Sandoval } else if (err == BLK_STS_OK) { 195353b381b3SDavid Woodhouse rbio->faila = -1; 195453b381b3SDavid Woodhouse rbio->failb = -1; 19555a6ac9eaSMiao Xie 19565a6ac9eaSMiao Xie if (rbio->operation == BTRFS_RBIO_WRITE) 195753b381b3SDavid Woodhouse finish_rmw(rbio); 19585a6ac9eaSMiao Xie else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) 19595a6ac9eaSMiao Xie finish_parity_scrub(rbio, 0); 19605a6ac9eaSMiao Xie else 19615a6ac9eaSMiao Xie BUG(); 196253b381b3SDavid Woodhouse } else { 19634246a0b6SChristoph Hellwig rbio_orig_end_io(rbio, err); 196453b381b3SDavid Woodhouse } 196553b381b3SDavid Woodhouse } 196653b381b3SDavid Woodhouse 196753b381b3SDavid Woodhouse /* 196853b381b3SDavid Woodhouse * This is called only for stripes we've read from disk to 196953b381b3SDavid Woodhouse * reconstruct the parity. 197053b381b3SDavid Woodhouse */ 19714246a0b6SChristoph Hellwig static void raid_recover_end_io(struct bio *bio) 197253b381b3SDavid Woodhouse { 197353b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio = bio->bi_private; 197453b381b3SDavid Woodhouse 197553b381b3SDavid Woodhouse /* 197653b381b3SDavid Woodhouse * we only read stripe pages off the disk, set them 197753b381b3SDavid Woodhouse * up to date if there were no errors 197853b381b3SDavid Woodhouse */ 19794e4cbee9SChristoph Hellwig if (bio->bi_status) 198053b381b3SDavid Woodhouse fail_bio_stripe(rbio, bio); 198153b381b3SDavid Woodhouse else 198253b381b3SDavid Woodhouse set_bio_pages_uptodate(bio); 198353b381b3SDavid Woodhouse bio_put(bio); 198453b381b3SDavid Woodhouse 1985b89e1b01SMiao Xie if (!atomic_dec_and_test(&rbio->stripes_pending)) 198653b381b3SDavid Woodhouse return; 198753b381b3SDavid Woodhouse 19884c664611SQu Wenruo if (atomic_read(&rbio->error) > rbio->bioc->max_errors) 198958efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 199053b381b3SDavid Woodhouse else 199153b381b3SDavid Woodhouse __raid_recover_end_io(rbio); 199253b381b3SDavid Woodhouse } 199353b381b3SDavid Woodhouse 199453b381b3SDavid Woodhouse /* 199553b381b3SDavid Woodhouse * reads everything we need off the disk to reconstruct 199653b381b3SDavid Woodhouse * the parity. endio handlers trigger final reconstruction 199753b381b3SDavid Woodhouse * when the IO is done. 199853b381b3SDavid Woodhouse * 199953b381b3SDavid Woodhouse * This is used both for reads from the higher layers and for 200053b381b3SDavid Woodhouse * parity construction required to finish a rmw cycle. 200153b381b3SDavid Woodhouse */ 200253b381b3SDavid Woodhouse static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) 200353b381b3SDavid Woodhouse { 200453b381b3SDavid Woodhouse int bios_to_read = 0; 200553b381b3SDavid Woodhouse struct bio_list bio_list; 200653b381b3SDavid Woodhouse int ret; 200753b381b3SDavid Woodhouse int pagenr; 200853b381b3SDavid Woodhouse int stripe; 200953b381b3SDavid Woodhouse struct bio *bio; 201053b381b3SDavid Woodhouse 201153b381b3SDavid Woodhouse bio_list_init(&bio_list); 201253b381b3SDavid Woodhouse 201353b381b3SDavid Woodhouse ret = alloc_rbio_pages(rbio); 201453b381b3SDavid Woodhouse if (ret) 201553b381b3SDavid Woodhouse goto cleanup; 201653b381b3SDavid Woodhouse 2017b89e1b01SMiao Xie atomic_set(&rbio->error, 0); 201853b381b3SDavid Woodhouse 201953b381b3SDavid Woodhouse /* 20204ae10b3aSChris Mason * read everything that hasn't failed. Thanks to the 20214ae10b3aSChris Mason * stripe cache, it is possible that some or all of these 20224ae10b3aSChris Mason * pages are going to be uptodate. 202353b381b3SDavid Woodhouse */ 20242c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 20255588383eSLiu Bo if (rbio->faila == stripe || rbio->failb == stripe) { 2026b89e1b01SMiao Xie atomic_inc(&rbio->error); 202753b381b3SDavid Woodhouse continue; 20285588383eSLiu Bo } 202953b381b3SDavid Woodhouse 2030915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 203153b381b3SDavid Woodhouse struct page *p; 203253b381b3SDavid Woodhouse 203353b381b3SDavid Woodhouse /* 203453b381b3SDavid Woodhouse * the rmw code may have already read this 203553b381b3SDavid Woodhouse * page in 203653b381b3SDavid Woodhouse */ 203753b381b3SDavid Woodhouse p = rbio_stripe_page(rbio, stripe, pagenr); 203853b381b3SDavid Woodhouse if (PageUptodate(p)) 203953b381b3SDavid Woodhouse continue; 204053b381b3SDavid Woodhouse 204153b381b3SDavid Woodhouse ret = rbio_add_io_page(rbio, &bio_list, 204253b381b3SDavid Woodhouse rbio_stripe_page(rbio, stripe, pagenr), 2043*e01bf588SChristoph Hellwig stripe, pagenr, rbio->stripe_len, 2044*e01bf588SChristoph Hellwig REQ_OP_READ); 204553b381b3SDavid Woodhouse if (ret < 0) 204653b381b3SDavid Woodhouse goto cleanup; 204753b381b3SDavid Woodhouse } 204853b381b3SDavid Woodhouse } 204953b381b3SDavid Woodhouse 205053b381b3SDavid Woodhouse bios_to_read = bio_list_size(&bio_list); 205153b381b3SDavid Woodhouse if (!bios_to_read) { 205253b381b3SDavid Woodhouse /* 205353b381b3SDavid Woodhouse * we might have no bios to read just because the pages 205453b381b3SDavid Woodhouse * were up to date, or we might have no bios to read because 205553b381b3SDavid Woodhouse * the devices were gone. 205653b381b3SDavid Woodhouse */ 20574c664611SQu Wenruo if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) { 205853b381b3SDavid Woodhouse __raid_recover_end_io(rbio); 2059813f8a0eSNikolay Borisov return 0; 206053b381b3SDavid Woodhouse } else { 206153b381b3SDavid Woodhouse goto cleanup; 206253b381b3SDavid Woodhouse } 206353b381b3SDavid Woodhouse } 206453b381b3SDavid Woodhouse 206553b381b3SDavid Woodhouse /* 20664c664611SQu Wenruo * The bioc may be freed once we submit the last bio. Make sure not to 20674c664611SQu Wenruo * touch it after that. 206853b381b3SDavid Woodhouse */ 2069b89e1b01SMiao Xie atomic_set(&rbio->stripes_pending, bios_to_read); 2070bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 207153b381b3SDavid Woodhouse bio->bi_end_io = raid_recover_end_io; 207253b381b3SDavid Woodhouse 20736a258d72SQu Wenruo btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 207453b381b3SDavid Woodhouse 20754e49ea4aSMike Christie submit_bio(bio); 207653b381b3SDavid Woodhouse } 2077813f8a0eSNikolay Borisov 207853b381b3SDavid Woodhouse return 0; 207953b381b3SDavid Woodhouse 208053b381b3SDavid Woodhouse cleanup: 2081b4ee1782SOmar Sandoval if (rbio->operation == BTRFS_RBIO_READ_REBUILD || 2082b4ee1782SOmar Sandoval rbio->operation == BTRFS_RBIO_REBUILD_MISSING) 208358efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 2084785884fcSLiu Bo 2085785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 2086785884fcSLiu Bo bio_put(bio); 2087785884fcSLiu Bo 208853b381b3SDavid Woodhouse return -EIO; 208953b381b3SDavid Woodhouse } 209053b381b3SDavid Woodhouse 209153b381b3SDavid Woodhouse /* 209253b381b3SDavid Woodhouse * the main entry point for reads from the higher layers. This 209353b381b3SDavid Woodhouse * is really only called when the normal read path had a failure, 209453b381b3SDavid Woodhouse * so we assume the bio they send down corresponds to a failed part 209553b381b3SDavid Woodhouse * of the drive. 209653b381b3SDavid Woodhouse */ 20976a258d72SQu Wenruo int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, 20986a258d72SQu Wenruo u64 stripe_len, int mirror_num, int generic_io) 209953b381b3SDavid Woodhouse { 21006a258d72SQu Wenruo struct btrfs_fs_info *fs_info = bioc->fs_info; 210153b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 210253b381b3SDavid Woodhouse int ret; 210353b381b3SDavid Woodhouse 2104abad60c6SLiu Bo if (generic_io) { 21054c664611SQu Wenruo ASSERT(bioc->mirror_num == mirror_num); 2106c3a3b19bSQu Wenruo btrfs_bio(bio)->mirror_num = mirror_num; 2107abad60c6SLiu Bo } 2108abad60c6SLiu Bo 21094c664611SQu Wenruo rbio = alloc_rbio(fs_info, bioc, stripe_len); 2110af8e2d1dSMiao Xie if (IS_ERR(rbio)) { 21116e9606d2SZhao Lei if (generic_io) 21124c664611SQu Wenruo btrfs_put_bioc(bioc); 211353b381b3SDavid Woodhouse return PTR_ERR(rbio); 2114af8e2d1dSMiao Xie } 211553b381b3SDavid Woodhouse 21161b94b556SMiao Xie rbio->operation = BTRFS_RBIO_READ_REBUILD; 211753b381b3SDavid Woodhouse bio_list_add(&rbio->bio_list, bio); 21184f024f37SKent Overstreet rbio->bio_list_bytes = bio->bi_iter.bi_size; 211953b381b3SDavid Woodhouse 212053b381b3SDavid Woodhouse rbio->faila = find_logical_bio_stripe(rbio, bio); 212153b381b3SDavid Woodhouse if (rbio->faila == -1) { 21220b246afaSJeff Mahoney btrfs_warn(fs_info, 21234c664611SQu Wenruo "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)", 21241201b58bSDavid Sterba __func__, bio->bi_iter.bi_sector << 9, 21254c664611SQu Wenruo (u64)bio->bi_iter.bi_size, bioc->map_type); 21266e9606d2SZhao Lei if (generic_io) 21274c664611SQu Wenruo btrfs_put_bioc(bioc); 212853b381b3SDavid Woodhouse kfree(rbio); 212953b381b3SDavid Woodhouse return -EIO; 213053b381b3SDavid Woodhouse } 213153b381b3SDavid Woodhouse 21324245215dSMiao Xie if (generic_io) { 21330b246afaSJeff Mahoney btrfs_bio_counter_inc_noblocked(fs_info); 21344245215dSMiao Xie rbio->generic_bio_cnt = 1; 21354245215dSMiao Xie } else { 21364c664611SQu Wenruo btrfs_get_bioc(bioc); 21374245215dSMiao Xie } 21384245215dSMiao Xie 213953b381b3SDavid Woodhouse /* 21408810f751SLiu Bo * Loop retry: 21418810f751SLiu Bo * for 'mirror == 2', reconstruct from all other stripes. 21428810f751SLiu Bo * for 'mirror_num > 2', select a stripe to fail on every retry. 214353b381b3SDavid Woodhouse */ 21448810f751SLiu Bo if (mirror_num > 2) { 21458810f751SLiu Bo /* 21468810f751SLiu Bo * 'mirror == 3' is to fail the p stripe and 21478810f751SLiu Bo * reconstruct from the q stripe. 'mirror > 3' is to 21488810f751SLiu Bo * fail a data stripe and reconstruct from p+q stripe. 21498810f751SLiu Bo */ 21508810f751SLiu Bo rbio->failb = rbio->real_stripes - (mirror_num - 1); 21518810f751SLiu Bo ASSERT(rbio->failb > 0); 21528810f751SLiu Bo if (rbio->failb <= rbio->faila) 21538810f751SLiu Bo rbio->failb--; 21548810f751SLiu Bo } 215553b381b3SDavid Woodhouse 215653b381b3SDavid Woodhouse ret = lock_stripe_add(rbio); 215753b381b3SDavid Woodhouse 215853b381b3SDavid Woodhouse /* 215953b381b3SDavid Woodhouse * __raid56_parity_recover will end the bio with 216053b381b3SDavid Woodhouse * any errors it hits. We don't want to return 216153b381b3SDavid Woodhouse * its error value up the stack because our caller 216253b381b3SDavid Woodhouse * will end up calling bio_endio with any nonzero 216353b381b3SDavid Woodhouse * return 216453b381b3SDavid Woodhouse */ 216553b381b3SDavid Woodhouse if (ret == 0) 216653b381b3SDavid Woodhouse __raid56_parity_recover(rbio); 216753b381b3SDavid Woodhouse /* 216853b381b3SDavid Woodhouse * our rbio has been added to the list of 216953b381b3SDavid Woodhouse * rbios that will be handled after the 217053b381b3SDavid Woodhouse * currently lock owner is done 217153b381b3SDavid Woodhouse */ 217253b381b3SDavid Woodhouse return 0; 217353b381b3SDavid Woodhouse 217453b381b3SDavid Woodhouse } 217553b381b3SDavid Woodhouse 217653b381b3SDavid Woodhouse static void rmw_work(struct btrfs_work *work) 217753b381b3SDavid Woodhouse { 217853b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 217953b381b3SDavid Woodhouse 218053b381b3SDavid Woodhouse rbio = container_of(work, struct btrfs_raid_bio, work); 218153b381b3SDavid Woodhouse raid56_rmw_stripe(rbio); 218253b381b3SDavid Woodhouse } 218353b381b3SDavid Woodhouse 218453b381b3SDavid Woodhouse static void read_rebuild_work(struct btrfs_work *work) 218553b381b3SDavid Woodhouse { 218653b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 218753b381b3SDavid Woodhouse 218853b381b3SDavid Woodhouse rbio = container_of(work, struct btrfs_raid_bio, work); 218953b381b3SDavid Woodhouse __raid56_parity_recover(rbio); 219053b381b3SDavid Woodhouse } 21915a6ac9eaSMiao Xie 21925a6ac9eaSMiao Xie /* 21935a6ac9eaSMiao Xie * The following code is used to scrub/replace the parity stripe 21945a6ac9eaSMiao Xie * 21954c664611SQu Wenruo * Caller must have already increased bio_counter for getting @bioc. 2196ae6529c3SQu Wenruo * 21975a6ac9eaSMiao Xie * Note: We need make sure all the pages that add into the scrub/replace 21985a6ac9eaSMiao Xie * raid bio are correct and not be changed during the scrub/replace. That 21995a6ac9eaSMiao Xie * is those pages just hold metadata or file data with checksum. 22005a6ac9eaSMiao Xie */ 22015a6ac9eaSMiao Xie 22026a258d72SQu Wenruo struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, 22036a258d72SQu Wenruo struct btrfs_io_context *bioc, 22046a258d72SQu Wenruo u64 stripe_len, struct btrfs_device *scrub_dev, 22055a6ac9eaSMiao Xie unsigned long *dbitmap, int stripe_nsectors) 22065a6ac9eaSMiao Xie { 22076a258d72SQu Wenruo struct btrfs_fs_info *fs_info = bioc->fs_info; 22085a6ac9eaSMiao Xie struct btrfs_raid_bio *rbio; 22095a6ac9eaSMiao Xie int i; 22105a6ac9eaSMiao Xie 22114c664611SQu Wenruo rbio = alloc_rbio(fs_info, bioc, stripe_len); 22125a6ac9eaSMiao Xie if (IS_ERR(rbio)) 22135a6ac9eaSMiao Xie return NULL; 22145a6ac9eaSMiao Xie bio_list_add(&rbio->bio_list, bio); 22155a6ac9eaSMiao Xie /* 22165a6ac9eaSMiao Xie * This is a special bio which is used to hold the completion handler 22175a6ac9eaSMiao Xie * and make the scrub rbio is similar to the other types 22185a6ac9eaSMiao Xie */ 22195a6ac9eaSMiao Xie ASSERT(!bio->bi_iter.bi_size); 22205a6ac9eaSMiao Xie rbio->operation = BTRFS_RBIO_PARITY_SCRUB; 22215a6ac9eaSMiao Xie 22229cd3a7ebSLiu Bo /* 22234c664611SQu Wenruo * After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted 22249cd3a7ebSLiu Bo * to the end position, so this search can start from the first parity 22259cd3a7ebSLiu Bo * stripe. 22269cd3a7ebSLiu Bo */ 22279cd3a7ebSLiu Bo for (i = rbio->nr_data; i < rbio->real_stripes; i++) { 22284c664611SQu Wenruo if (bioc->stripes[i].dev == scrub_dev) { 22295a6ac9eaSMiao Xie rbio->scrubp = i; 22305a6ac9eaSMiao Xie break; 22315a6ac9eaSMiao Xie } 22325a6ac9eaSMiao Xie } 22339cd3a7ebSLiu Bo ASSERT(i < rbio->real_stripes); 22345a6ac9eaSMiao Xie 22355a6ac9eaSMiao Xie /* Now we just support the sectorsize equals to page size */ 22360b246afaSJeff Mahoney ASSERT(fs_info->sectorsize == PAGE_SIZE); 22375a6ac9eaSMiao Xie ASSERT(rbio->stripe_npages == stripe_nsectors); 22385a6ac9eaSMiao Xie bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors); 22395a6ac9eaSMiao Xie 2240ae6529c3SQu Wenruo /* 22414c664611SQu Wenruo * We have already increased bio_counter when getting bioc, record it 2242ae6529c3SQu Wenruo * so we can free it at rbio_orig_end_io(). 2243ae6529c3SQu Wenruo */ 2244ae6529c3SQu Wenruo rbio->generic_bio_cnt = 1; 2245ae6529c3SQu Wenruo 22465a6ac9eaSMiao Xie return rbio; 22475a6ac9eaSMiao Xie } 22485a6ac9eaSMiao Xie 2249b4ee1782SOmar Sandoval /* Used for both parity scrub and missing. */ 2250b4ee1782SOmar Sandoval void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, 2251b4ee1782SOmar Sandoval u64 logical) 22525a6ac9eaSMiao Xie { 22535a6ac9eaSMiao Xie int stripe_offset; 22545a6ac9eaSMiao Xie int index; 22555a6ac9eaSMiao Xie 22564c664611SQu Wenruo ASSERT(logical >= rbio->bioc->raid_map[0]); 22574c664611SQu Wenruo ASSERT(logical + PAGE_SIZE <= rbio->bioc->raid_map[0] + 22585a6ac9eaSMiao Xie rbio->stripe_len * rbio->nr_data); 22594c664611SQu Wenruo stripe_offset = (int)(logical - rbio->bioc->raid_map[0]); 226009cbfeafSKirill A. Shutemov index = stripe_offset >> PAGE_SHIFT; 22615a6ac9eaSMiao Xie rbio->bio_pages[index] = page; 22625a6ac9eaSMiao Xie } 22635a6ac9eaSMiao Xie 22645a6ac9eaSMiao Xie /* 22655a6ac9eaSMiao Xie * We just scrub the parity that we have correct data on the same horizontal, 22665a6ac9eaSMiao Xie * so we needn't allocate all pages for all the stripes. 22675a6ac9eaSMiao Xie */ 22685a6ac9eaSMiao Xie static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) 22695a6ac9eaSMiao Xie { 22705a6ac9eaSMiao Xie int i; 22715a6ac9eaSMiao Xie int bit; 22725a6ac9eaSMiao Xie int index; 22735a6ac9eaSMiao Xie struct page *page; 22745a6ac9eaSMiao Xie 22755a6ac9eaSMiao Xie for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) { 22762c8cdd6eSMiao Xie for (i = 0; i < rbio->real_stripes; i++) { 22775a6ac9eaSMiao Xie index = i * rbio->stripe_npages + bit; 22785a6ac9eaSMiao Xie if (rbio->stripe_pages[index]) 22795a6ac9eaSMiao Xie continue; 22805a6ac9eaSMiao Xie 2281b0ee5e1eSDavid Sterba page = alloc_page(GFP_NOFS); 22825a6ac9eaSMiao Xie if (!page) 22835a6ac9eaSMiao Xie return -ENOMEM; 22845a6ac9eaSMiao Xie rbio->stripe_pages[index] = page; 22855a6ac9eaSMiao Xie } 22865a6ac9eaSMiao Xie } 22875a6ac9eaSMiao Xie return 0; 22885a6ac9eaSMiao Xie } 22895a6ac9eaSMiao Xie 22905a6ac9eaSMiao Xie static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, 22915a6ac9eaSMiao Xie int need_check) 22925a6ac9eaSMiao Xie { 22934c664611SQu Wenruo struct btrfs_io_context *bioc = rbio->bioc; 22941389053eSKees Cook void **pointers = rbio->finish_pointers; 22951389053eSKees Cook unsigned long *pbitmap = rbio->finish_pbitmap; 22965a6ac9eaSMiao Xie int nr_data = rbio->nr_data; 22975a6ac9eaSMiao Xie int stripe; 22985a6ac9eaSMiao Xie int pagenr; 2299c17af965SDavid Sterba bool has_qstripe; 23005a6ac9eaSMiao Xie struct page *p_page = NULL; 23015a6ac9eaSMiao Xie struct page *q_page = NULL; 23025a6ac9eaSMiao Xie struct bio_list bio_list; 23035a6ac9eaSMiao Xie struct bio *bio; 230476035976SMiao Xie int is_replace = 0; 23055a6ac9eaSMiao Xie int ret; 23065a6ac9eaSMiao Xie 23075a6ac9eaSMiao Xie bio_list_init(&bio_list); 23085a6ac9eaSMiao Xie 2309c17af965SDavid Sterba if (rbio->real_stripes - rbio->nr_data == 1) 2310c17af965SDavid Sterba has_qstripe = false; 2311c17af965SDavid Sterba else if (rbio->real_stripes - rbio->nr_data == 2) 2312c17af965SDavid Sterba has_qstripe = true; 2313c17af965SDavid Sterba else 23145a6ac9eaSMiao Xie BUG(); 23155a6ac9eaSMiao Xie 23164c664611SQu Wenruo if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) { 231776035976SMiao Xie is_replace = 1; 231876035976SMiao Xie bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages); 231976035976SMiao Xie } 232076035976SMiao Xie 23215a6ac9eaSMiao Xie /* 23225a6ac9eaSMiao Xie * Because the higher layers(scrubber) are unlikely to 23235a6ac9eaSMiao Xie * use this area of the disk again soon, so don't cache 23245a6ac9eaSMiao Xie * it. 23255a6ac9eaSMiao Xie */ 23265a6ac9eaSMiao Xie clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 23275a6ac9eaSMiao Xie 23285a6ac9eaSMiao Xie if (!need_check) 23295a6ac9eaSMiao Xie goto writeback; 23305a6ac9eaSMiao Xie 2331b0ee5e1eSDavid Sterba p_page = alloc_page(GFP_NOFS); 23325a6ac9eaSMiao Xie if (!p_page) 23335a6ac9eaSMiao Xie goto cleanup; 23345a6ac9eaSMiao Xie SetPageUptodate(p_page); 23355a6ac9eaSMiao Xie 2336c17af965SDavid Sterba if (has_qstripe) { 2337d70cef0dSIra Weiny /* RAID6, allocate and map temp space for the Q stripe */ 2338b0ee5e1eSDavid Sterba q_page = alloc_page(GFP_NOFS); 23395a6ac9eaSMiao Xie if (!q_page) { 23405a6ac9eaSMiao Xie __free_page(p_page); 23415a6ac9eaSMiao Xie goto cleanup; 23425a6ac9eaSMiao Xie } 23435a6ac9eaSMiao Xie SetPageUptodate(q_page); 234494a0b58dSIra Weiny pointers[rbio->real_stripes - 1] = kmap_local_page(q_page); 23455a6ac9eaSMiao Xie } 23465a6ac9eaSMiao Xie 23475a6ac9eaSMiao Xie atomic_set(&rbio->error, 0); 23485a6ac9eaSMiao Xie 2349d70cef0dSIra Weiny /* Map the parity stripe just once */ 235094a0b58dSIra Weiny pointers[nr_data] = kmap_local_page(p_page); 2351d70cef0dSIra Weiny 23525a6ac9eaSMiao Xie for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { 23535a6ac9eaSMiao Xie struct page *p; 23545a6ac9eaSMiao Xie void *parity; 23555a6ac9eaSMiao Xie /* first collect one page from each data stripe */ 23565a6ac9eaSMiao Xie for (stripe = 0; stripe < nr_data; stripe++) { 23575a6ac9eaSMiao Xie p = page_in_rbio(rbio, stripe, pagenr, 0); 235894a0b58dSIra Weiny pointers[stripe] = kmap_local_page(p); 23595a6ac9eaSMiao Xie } 23605a6ac9eaSMiao Xie 2361c17af965SDavid Sterba if (has_qstripe) { 2362d70cef0dSIra Weiny /* RAID6, call the library function to fill in our P/Q */ 23632c8cdd6eSMiao Xie raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, 23645a6ac9eaSMiao Xie pointers); 23655a6ac9eaSMiao Xie } else { 23665a6ac9eaSMiao Xie /* raid5 */ 236769d24804SDavid Sterba copy_page(pointers[nr_data], pointers[0]); 236809cbfeafSKirill A. Shutemov run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 23695a6ac9eaSMiao Xie } 23705a6ac9eaSMiao Xie 237101327610SNicholas D Steeves /* Check scrubbing parity and repair it */ 23725a6ac9eaSMiao Xie p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 237358c1a35cSIra Weiny parity = kmap_local_page(p); 237409cbfeafSKirill A. Shutemov if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE)) 237569d24804SDavid Sterba copy_page(parity, pointers[rbio->scrubp]); 23765a6ac9eaSMiao Xie else 23775a6ac9eaSMiao Xie /* Parity is right, needn't writeback */ 23785a6ac9eaSMiao Xie bitmap_clear(rbio->dbitmap, pagenr, 1); 237958c1a35cSIra Weiny kunmap_local(parity); 23805a6ac9eaSMiao Xie 238194a0b58dSIra Weiny for (stripe = nr_data - 1; stripe >= 0; stripe--) 238294a0b58dSIra Weiny kunmap_local(pointers[stripe]); 23835a6ac9eaSMiao Xie } 23845a6ac9eaSMiao Xie 238594a0b58dSIra Weiny kunmap_local(pointers[nr_data]); 23865a6ac9eaSMiao Xie __free_page(p_page); 2387d70cef0dSIra Weiny if (q_page) { 238894a0b58dSIra Weiny kunmap_local(pointers[rbio->real_stripes - 1]); 23895a6ac9eaSMiao Xie __free_page(q_page); 2390d70cef0dSIra Weiny } 23915a6ac9eaSMiao Xie 23925a6ac9eaSMiao Xie writeback: 23935a6ac9eaSMiao Xie /* 23945a6ac9eaSMiao Xie * time to start writing. Make bios for everything from the 23955a6ac9eaSMiao Xie * higher layers (the bio_list in our rbio) and our p/q. Ignore 23965a6ac9eaSMiao Xie * everything else. 23975a6ac9eaSMiao Xie */ 23985a6ac9eaSMiao Xie for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { 23995a6ac9eaSMiao Xie struct page *page; 24005a6ac9eaSMiao Xie 24015a6ac9eaSMiao Xie page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 2402*e01bf588SChristoph Hellwig ret = rbio_add_io_page(rbio, &bio_list, page, rbio->scrubp, 2403*e01bf588SChristoph Hellwig pagenr, rbio->stripe_len, REQ_OP_WRITE); 24045a6ac9eaSMiao Xie if (ret) 24055a6ac9eaSMiao Xie goto cleanup; 24065a6ac9eaSMiao Xie } 24075a6ac9eaSMiao Xie 240876035976SMiao Xie if (!is_replace) 240976035976SMiao Xie goto submit_write; 241076035976SMiao Xie 241176035976SMiao Xie for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) { 241276035976SMiao Xie struct page *page; 241376035976SMiao Xie 241476035976SMiao Xie page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 241576035976SMiao Xie ret = rbio_add_io_page(rbio, &bio_list, page, 24164c664611SQu Wenruo bioc->tgtdev_map[rbio->scrubp], 2417*e01bf588SChristoph Hellwig pagenr, rbio->stripe_len, REQ_OP_WRITE); 241876035976SMiao Xie if (ret) 241976035976SMiao Xie goto cleanup; 242076035976SMiao Xie } 242176035976SMiao Xie 242276035976SMiao Xie submit_write: 24235a6ac9eaSMiao Xie nr_data = bio_list_size(&bio_list); 24245a6ac9eaSMiao Xie if (!nr_data) { 24255a6ac9eaSMiao Xie /* Every parity is right */ 242658efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_OK); 24275a6ac9eaSMiao Xie return; 24285a6ac9eaSMiao Xie } 24295a6ac9eaSMiao Xie 24305a6ac9eaSMiao Xie atomic_set(&rbio->stripes_pending, nr_data); 24315a6ac9eaSMiao Xie 2432bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 2433a6111d11SZhao Lei bio->bi_end_io = raid_write_end_io; 24344e49ea4aSMike Christie 24354e49ea4aSMike Christie submit_bio(bio); 24365a6ac9eaSMiao Xie } 24375a6ac9eaSMiao Xie return; 24385a6ac9eaSMiao Xie 24395a6ac9eaSMiao Xie cleanup: 244058efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 2441785884fcSLiu Bo 2442785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 2443785884fcSLiu Bo bio_put(bio); 24445a6ac9eaSMiao Xie } 24455a6ac9eaSMiao Xie 24465a6ac9eaSMiao Xie static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) 24475a6ac9eaSMiao Xie { 24485a6ac9eaSMiao Xie if (stripe >= 0 && stripe < rbio->nr_data) 24495a6ac9eaSMiao Xie return 1; 24505a6ac9eaSMiao Xie return 0; 24515a6ac9eaSMiao Xie } 24525a6ac9eaSMiao Xie 24535a6ac9eaSMiao Xie /* 24545a6ac9eaSMiao Xie * While we're doing the parity check and repair, we could have errors 24555a6ac9eaSMiao Xie * in reading pages off the disk. This checks for errors and if we're 24565a6ac9eaSMiao Xie * not able to read the page it'll trigger parity reconstruction. The 24575a6ac9eaSMiao Xie * parity scrub will be finished after we've reconstructed the failed 24585a6ac9eaSMiao Xie * stripes 24595a6ac9eaSMiao Xie */ 24605a6ac9eaSMiao Xie static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) 24615a6ac9eaSMiao Xie { 24624c664611SQu Wenruo if (atomic_read(&rbio->error) > rbio->bioc->max_errors) 24635a6ac9eaSMiao Xie goto cleanup; 24645a6ac9eaSMiao Xie 24655a6ac9eaSMiao Xie if (rbio->faila >= 0 || rbio->failb >= 0) { 24665a6ac9eaSMiao Xie int dfail = 0, failp = -1; 24675a6ac9eaSMiao Xie 24685a6ac9eaSMiao Xie if (is_data_stripe(rbio, rbio->faila)) 24695a6ac9eaSMiao Xie dfail++; 24705a6ac9eaSMiao Xie else if (is_parity_stripe(rbio->faila)) 24715a6ac9eaSMiao Xie failp = rbio->faila; 24725a6ac9eaSMiao Xie 24735a6ac9eaSMiao Xie if (is_data_stripe(rbio, rbio->failb)) 24745a6ac9eaSMiao Xie dfail++; 24755a6ac9eaSMiao Xie else if (is_parity_stripe(rbio->failb)) 24765a6ac9eaSMiao Xie failp = rbio->failb; 24775a6ac9eaSMiao Xie 24785a6ac9eaSMiao Xie /* 24795a6ac9eaSMiao Xie * Because we can not use a scrubbing parity to repair 24805a6ac9eaSMiao Xie * the data, so the capability of the repair is declined. 24815a6ac9eaSMiao Xie * (In the case of RAID5, we can not repair anything) 24825a6ac9eaSMiao Xie */ 24834c664611SQu Wenruo if (dfail > rbio->bioc->max_errors - 1) 24845a6ac9eaSMiao Xie goto cleanup; 24855a6ac9eaSMiao Xie 24865a6ac9eaSMiao Xie /* 24875a6ac9eaSMiao Xie * If all data is good, only parity is correctly, just 24885a6ac9eaSMiao Xie * repair the parity. 24895a6ac9eaSMiao Xie */ 24905a6ac9eaSMiao Xie if (dfail == 0) { 24915a6ac9eaSMiao Xie finish_parity_scrub(rbio, 0); 24925a6ac9eaSMiao Xie return; 24935a6ac9eaSMiao Xie } 24945a6ac9eaSMiao Xie 24955a6ac9eaSMiao Xie /* 24965a6ac9eaSMiao Xie * Here means we got one corrupted data stripe and one 24975a6ac9eaSMiao Xie * corrupted parity on RAID6, if the corrupted parity 249801327610SNicholas D Steeves * is scrubbing parity, luckily, use the other one to repair 24995a6ac9eaSMiao Xie * the data, or we can not repair the data stripe. 25005a6ac9eaSMiao Xie */ 25015a6ac9eaSMiao Xie if (failp != rbio->scrubp) 25025a6ac9eaSMiao Xie goto cleanup; 25035a6ac9eaSMiao Xie 25045a6ac9eaSMiao Xie __raid_recover_end_io(rbio); 25055a6ac9eaSMiao Xie } else { 25065a6ac9eaSMiao Xie finish_parity_scrub(rbio, 1); 25075a6ac9eaSMiao Xie } 25085a6ac9eaSMiao Xie return; 25095a6ac9eaSMiao Xie 25105a6ac9eaSMiao Xie cleanup: 251158efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 25125a6ac9eaSMiao Xie } 25135a6ac9eaSMiao Xie 25145a6ac9eaSMiao Xie /* 25155a6ac9eaSMiao Xie * end io for the read phase of the rmw cycle. All the bios here are physical 25165a6ac9eaSMiao Xie * stripe bios we've read from the disk so we can recalculate the parity of the 25175a6ac9eaSMiao Xie * stripe. 25185a6ac9eaSMiao Xie * 25195a6ac9eaSMiao Xie * This will usually kick off finish_rmw once all the bios are read in, but it 25205a6ac9eaSMiao Xie * may trigger parity reconstruction if we had any errors along the way 25215a6ac9eaSMiao Xie */ 25224246a0b6SChristoph Hellwig static void raid56_parity_scrub_end_io(struct bio *bio) 25235a6ac9eaSMiao Xie { 25245a6ac9eaSMiao Xie struct btrfs_raid_bio *rbio = bio->bi_private; 25255a6ac9eaSMiao Xie 25264e4cbee9SChristoph Hellwig if (bio->bi_status) 25275a6ac9eaSMiao Xie fail_bio_stripe(rbio, bio); 25285a6ac9eaSMiao Xie else 25295a6ac9eaSMiao Xie set_bio_pages_uptodate(bio); 25305a6ac9eaSMiao Xie 25315a6ac9eaSMiao Xie bio_put(bio); 25325a6ac9eaSMiao Xie 25335a6ac9eaSMiao Xie if (!atomic_dec_and_test(&rbio->stripes_pending)) 25345a6ac9eaSMiao Xie return; 25355a6ac9eaSMiao Xie 25365a6ac9eaSMiao Xie /* 25375a6ac9eaSMiao Xie * this will normally call finish_rmw to start our write 25385a6ac9eaSMiao Xie * but if there are any failed stripes we'll reconstruct 25395a6ac9eaSMiao Xie * from parity first 25405a6ac9eaSMiao Xie */ 25415a6ac9eaSMiao Xie validate_rbio_for_parity_scrub(rbio); 25425a6ac9eaSMiao Xie } 25435a6ac9eaSMiao Xie 25445a6ac9eaSMiao Xie static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) 25455a6ac9eaSMiao Xie { 25465a6ac9eaSMiao Xie int bios_to_read = 0; 25475a6ac9eaSMiao Xie struct bio_list bio_list; 25485a6ac9eaSMiao Xie int ret; 25495a6ac9eaSMiao Xie int pagenr; 25505a6ac9eaSMiao Xie int stripe; 25515a6ac9eaSMiao Xie struct bio *bio; 25525a6ac9eaSMiao Xie 2553785884fcSLiu Bo bio_list_init(&bio_list); 2554785884fcSLiu Bo 25555a6ac9eaSMiao Xie ret = alloc_rbio_essential_pages(rbio); 25565a6ac9eaSMiao Xie if (ret) 25575a6ac9eaSMiao Xie goto cleanup; 25585a6ac9eaSMiao Xie 25595a6ac9eaSMiao Xie atomic_set(&rbio->error, 0); 25605a6ac9eaSMiao Xie /* 25615a6ac9eaSMiao Xie * build a list of bios to read all the missing parts of this 25625a6ac9eaSMiao Xie * stripe 25635a6ac9eaSMiao Xie */ 25642c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 25655a6ac9eaSMiao Xie for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { 25665a6ac9eaSMiao Xie struct page *page; 25675a6ac9eaSMiao Xie /* 25685a6ac9eaSMiao Xie * we want to find all the pages missing from 25695a6ac9eaSMiao Xie * the rbio and read them from the disk. If 25705a6ac9eaSMiao Xie * page_in_rbio finds a page in the bio list 25715a6ac9eaSMiao Xie * we don't need to read it off the stripe. 25725a6ac9eaSMiao Xie */ 25735a6ac9eaSMiao Xie page = page_in_rbio(rbio, stripe, pagenr, 1); 25745a6ac9eaSMiao Xie if (page) 25755a6ac9eaSMiao Xie continue; 25765a6ac9eaSMiao Xie 25775a6ac9eaSMiao Xie page = rbio_stripe_page(rbio, stripe, pagenr); 25785a6ac9eaSMiao Xie /* 25795a6ac9eaSMiao Xie * the bio cache may have handed us an uptodate 25805a6ac9eaSMiao Xie * page. If so, be happy and use it 25815a6ac9eaSMiao Xie */ 25825a6ac9eaSMiao Xie if (PageUptodate(page)) 25835a6ac9eaSMiao Xie continue; 25845a6ac9eaSMiao Xie 2585*e01bf588SChristoph Hellwig ret = rbio_add_io_page(rbio, &bio_list, page, stripe, 2586*e01bf588SChristoph Hellwig pagenr, rbio->stripe_len, REQ_OP_READ); 25875a6ac9eaSMiao Xie if (ret) 25885a6ac9eaSMiao Xie goto cleanup; 25895a6ac9eaSMiao Xie } 25905a6ac9eaSMiao Xie } 25915a6ac9eaSMiao Xie 25925a6ac9eaSMiao Xie bios_to_read = bio_list_size(&bio_list); 25935a6ac9eaSMiao Xie if (!bios_to_read) { 25945a6ac9eaSMiao Xie /* 25955a6ac9eaSMiao Xie * this can happen if others have merged with 25965a6ac9eaSMiao Xie * us, it means there is nothing left to read. 25975a6ac9eaSMiao Xie * But if there are missing devices it may not be 25985a6ac9eaSMiao Xie * safe to do the full stripe write yet. 25995a6ac9eaSMiao Xie */ 26005a6ac9eaSMiao Xie goto finish; 26015a6ac9eaSMiao Xie } 26025a6ac9eaSMiao Xie 26035a6ac9eaSMiao Xie /* 26044c664611SQu Wenruo * The bioc may be freed once we submit the last bio. Make sure not to 26054c664611SQu Wenruo * touch it after that. 26065a6ac9eaSMiao Xie */ 26075a6ac9eaSMiao Xie atomic_set(&rbio->stripes_pending, bios_to_read); 2608bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 26095a6ac9eaSMiao Xie bio->bi_end_io = raid56_parity_scrub_end_io; 26105a6ac9eaSMiao Xie 26116a258d72SQu Wenruo btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 26125a6ac9eaSMiao Xie 26134e49ea4aSMike Christie submit_bio(bio); 26145a6ac9eaSMiao Xie } 26155a6ac9eaSMiao Xie /* the actual write will happen once the reads are done */ 26165a6ac9eaSMiao Xie return; 26175a6ac9eaSMiao Xie 26185a6ac9eaSMiao Xie cleanup: 261958efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 2620785884fcSLiu Bo 2621785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 2622785884fcSLiu Bo bio_put(bio); 2623785884fcSLiu Bo 26245a6ac9eaSMiao Xie return; 26255a6ac9eaSMiao Xie 26265a6ac9eaSMiao Xie finish: 26275a6ac9eaSMiao Xie validate_rbio_for_parity_scrub(rbio); 26285a6ac9eaSMiao Xie } 26295a6ac9eaSMiao Xie 26305a6ac9eaSMiao Xie static void scrub_parity_work(struct btrfs_work *work) 26315a6ac9eaSMiao Xie { 26325a6ac9eaSMiao Xie struct btrfs_raid_bio *rbio; 26335a6ac9eaSMiao Xie 26345a6ac9eaSMiao Xie rbio = container_of(work, struct btrfs_raid_bio, work); 26355a6ac9eaSMiao Xie raid56_parity_scrub_stripe(rbio); 26365a6ac9eaSMiao Xie } 26375a6ac9eaSMiao Xie 26385a6ac9eaSMiao Xie void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) 26395a6ac9eaSMiao Xie { 26405a6ac9eaSMiao Xie if (!lock_stripe_add(rbio)) 2641a81b747dSDavid Sterba start_async_work(rbio, scrub_parity_work); 26425a6ac9eaSMiao Xie } 2643b4ee1782SOmar Sandoval 2644b4ee1782SOmar Sandoval /* The following code is used for dev replace of a missing RAID 5/6 device. */ 2645b4ee1782SOmar Sandoval 2646b4ee1782SOmar Sandoval struct btrfs_raid_bio * 26476a258d72SQu Wenruo raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc, 26486a258d72SQu Wenruo u64 length) 2649b4ee1782SOmar Sandoval { 26506a258d72SQu Wenruo struct btrfs_fs_info *fs_info = bioc->fs_info; 2651b4ee1782SOmar Sandoval struct btrfs_raid_bio *rbio; 2652b4ee1782SOmar Sandoval 26534c664611SQu Wenruo rbio = alloc_rbio(fs_info, bioc, length); 2654b4ee1782SOmar Sandoval if (IS_ERR(rbio)) 2655b4ee1782SOmar Sandoval return NULL; 2656b4ee1782SOmar Sandoval 2657b4ee1782SOmar Sandoval rbio->operation = BTRFS_RBIO_REBUILD_MISSING; 2658b4ee1782SOmar Sandoval bio_list_add(&rbio->bio_list, bio); 2659b4ee1782SOmar Sandoval /* 2660b4ee1782SOmar Sandoval * This is a special bio which is used to hold the completion handler 2661b4ee1782SOmar Sandoval * and make the scrub rbio is similar to the other types 2662b4ee1782SOmar Sandoval */ 2663b4ee1782SOmar Sandoval ASSERT(!bio->bi_iter.bi_size); 2664b4ee1782SOmar Sandoval 2665b4ee1782SOmar Sandoval rbio->faila = find_logical_bio_stripe(rbio, bio); 2666b4ee1782SOmar Sandoval if (rbio->faila == -1) { 2667b4ee1782SOmar Sandoval BUG(); 2668b4ee1782SOmar Sandoval kfree(rbio); 2669b4ee1782SOmar Sandoval return NULL; 2670b4ee1782SOmar Sandoval } 2671b4ee1782SOmar Sandoval 2672ae6529c3SQu Wenruo /* 26734c664611SQu Wenruo * When we get bioc, we have already increased bio_counter, record it 2674ae6529c3SQu Wenruo * so we can free it at rbio_orig_end_io() 2675ae6529c3SQu Wenruo */ 2676ae6529c3SQu Wenruo rbio->generic_bio_cnt = 1; 2677ae6529c3SQu Wenruo 2678b4ee1782SOmar Sandoval return rbio; 2679b4ee1782SOmar Sandoval } 2680b4ee1782SOmar Sandoval 2681b4ee1782SOmar Sandoval void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio) 2682b4ee1782SOmar Sandoval { 2683b4ee1782SOmar Sandoval if (!lock_stripe_add(rbio)) 2684e66d8d5aSDavid Sterba start_async_work(rbio, read_rebuild_work); 2685b4ee1782SOmar Sandoval } 2686